|  | CUTLASS
    CUDA Templates for Linear Algebra Subroutines and Solvers | 
Partial specialization: More...
#include <default_mma_core_simt.h>
| Static Public Attributes | |
| static int const | PartitionsK = Shape::kK / WarpShape::kK | 
| static int const | kWarpSize = warp::WarpSize<arch::OpClassSimt>::value | 
| Number of threads per warp.  More... | |
| static int const | kThreads = WarpCount::kCount * kWarpSize | 
| Number of threads total.  More... | |
| static const int | WarpNumThreadsM = detail::simt_get_warp_threads_m<WarpShape>() | 
| static const int | WarpNumThreadsN = kWarpSize / WarpNumThreadsM | 
| static const int | ThreadTileM = WarpShape::kM / WarpNumThreadsM | 
| static const int | ThreadTileN = WarpShape::kN / WarpNumThreadsN | 
| static const int | LaneLayout = ThreadTileM > 4 && ThreadTileN > 4 ? 2 : 1 | 
| static const int | numElementsA = 128 / sizeof_bits<ElementA>::value | 
| static const int | numElementsB = 128 / sizeof_bits<ElementB>::value | 
| static const int | LaneM = cutlass::const_min(4, ThreadTileM) | 
| static const int | LaneN = cutlass::const_min(4, ThreadTileN) | 
| static int const | kPaddingM = detail::simt_transpose_padding(kWarpSize, Shape::kK, sizeof_bits<ElementA>::value) | 
| static int const | kPaddingN = detail::simt_transpose_padding(kWarpSize, Shape::kK, sizeof_bits<ElementB>::value) | 
A: Row-major B: Row-major Operator: simt class, for dp4a
This uses the default warp-level operator given tile sizes
| using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 4 >, int8_t, layout::RowMajor, int8_t, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::ElementA = int8_t | 
| using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 4 >, int8_t, layout::RowMajor, int8_t, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::ElementB = int8_t | 
| using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 4 >, int8_t, layout::RowMajor, int8_t, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::ElementC = ElementC_ | 
| using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 4 >, int8_t, layout::RowMajor, int8_t, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::InstructionShape = GemmShape<1, 1, 4> | 
| using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 4 >, int8_t, layout::RowMajor, int8_t, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::IteratorThreadMapA = transform::PitchLinear2DThreadTileStripminedThreadMap< layout::PitchLinearShape<Shape::kK, Shape::kM>, kThreads, layout::PitchLinearShape<4, 4> > | 
| using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 4 >, int8_t, layout::RowMajor, int8_t, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::IteratorThreadMapB = transform::PitchLinear2DThreadTileStripminedThreadMap< layout::PitchLinearShape<Shape::kN, Shape::kK>, kThreads, layout::PitchLinearShape<4, 4> > | 
| using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 4 >, int8_t, layout::RowMajor, int8_t, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::LaneMmaShape = cutlass::gemm::GemmShape< LaneM, LaneN, 4> | 
| using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 4 >, int8_t, layout::RowMajor, int8_t, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::LayoutA = layout::RowMajor | 
| using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 4 >, int8_t, layout::RowMajor, int8_t, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::LayoutB = layout::RowMajor | 
| using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 4 >, int8_t, layout::RowMajor, int8_t, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::LayoutC = LayoutC_ | 
| using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 4 >, int8_t, layout::RowMajor, int8_t, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::MmaPolicy = MmaPolicy< MmaWarpSimt, MatrixShape<kPaddingM, 0>, MatrixShape<0, 0>, WarpCount::kK > | 
| using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 4 >, int8_t, layout::RowMajor, int8_t, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::MmaWarpSimt = cutlass::gemm::warp::MmaSimt< WarpShape, ElementA, SmemLayoutA, ElementB, SmemLayoutB, ElementC, LayoutC, Policy, PartitionsK > | 
| using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 4 >, int8_t, layout::RowMajor, int8_t, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::Operator = Operator_ | 
| using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 4 >, int8_t, layout::RowMajor, int8_t, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::OperatorClass = arch::OpClassSimt | 
| using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 4 >, int8_t, layout::RowMajor, int8_t, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::Policy = cutlass::gemm::warp::MmaSimtPolicy< cutlass::MatrixShape<WarpNumThreadsM, WarpNumThreadsN>, cutlass::layout::ColumnMajorInterleaved<LaneLayout>, LaneMmaShape > | 
| using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 4 >, int8_t, layout::RowMajor, int8_t, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::Shape = Shape_ | 
| using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 4 >, int8_t, layout::RowMajor, int8_t, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::SmemIteratorA = transform::threadblock::RegularTileIterator2dThreadTile< MatrixShape<Shape::kM, Shape::kK>, ElementA, SmemLayoutA, 1, SmemThreadMapA > | 
| using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 4 >, int8_t, layout::RowMajor, int8_t, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::SmemIteratorB = transform::threadblock::RegularTileIterator2dThreadTile< MatrixShape<Shape::kK, Shape::kN>, ElementB, SmemLayoutB, 0, IteratorThreadMapB > | 
| using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 4 >, int8_t, layout::RowMajor, int8_t, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::SmemLayoutA = layout::ColumnMajorInterleaved<4> | 
| using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 4 >, int8_t, layout::RowMajor, int8_t, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::SmemLayoutB = layout::RowMajorInterleaved<4> | 
| using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 4 >, int8_t, layout::RowMajor, int8_t, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::SmemThreadMapA = transform::TransposePitchLinearThreadMap2DThreadTile<IteratorThreadMapA> | 
| using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 4 >, int8_t, layout::RowMajor, int8_t, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::WarpCount = GemmShape< Shape::kM / WarpShape::kM, Shape::kN / WarpShape::kN, PartitionsK > | 
| using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, GemmShape< 1, 1, 4 >, int8_t, layout::RowMajor, int8_t, layout::RowMajor, ElementC_, LayoutC_, arch::OpClassSimt, 2, Operator_ >::WarpShape = WarpShape_ | 
| 
 | static | 
| 
 | static | 
| 
 | static | 
| 
 | static | 
| 
 | static | 
| 
 | static | 
| 
 | static | 
| 
 | static | 
| 
 | static | 
| 
 | static | 
| 
 | static | 
| 
 | static | 
| 
 | static | 
| 
 | static | 
 1.8.11
 1.8.11