|  | CUTLASS
    CUDA Templates for Linear Algebra Subroutines and Solvers | 
#include <default_mma_core_sm75.h>
| Static Public Attributes | |
| static int const | kWarpSize = warp::WarpSize<arch::OpClassTensorOp>::value | 
| Number of threads per warp.  More... | |
| static int const | kThreads = WarpCount::kCount * kWarpSize | 
| Number of threads total.  More... | |
| static int const | kAccessSizeInBits = 128 | 
| Size of a threadblock-scoped access.  More... | |
| static int const | kWarpThreadArrangementContiguousA | 
| static int const | kWarpThreadArrangementStridedA | 
| static int const | kWarpThreadArrangementContiguousB | 
| static int const | kWarpThreadArrangementStridedB | 
Partial specialization:
A: row-major B: column-major Operator: tensor op class
This uses the default warp-level operator given tile sizes
| using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, InstructionShape_, ElementA_, layout::RowMajor, ElementB_, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ >::ElementA = ElementA_ | 
| using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, InstructionShape_, ElementA_, layout::RowMajor, ElementB_, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ >::ElementB = ElementB_ | 
| using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, InstructionShape_, ElementA_, layout::RowMajor, ElementB_, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ >::ElementC = ElementC_ | 
| using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, InstructionShape_, ElementA_, layout::RowMajor, ElementB_, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ >::InstructionShape = InstructionShape_ | 
| using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, InstructionShape_, ElementA_, layout::RowMajor, ElementB_, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ >::IteratorThreadMapA = transform::PitchLinearWarpRakedThreadMap< layout::PitchLinearShape<Shape::kK, Shape::kM>, kThreads, layout::PitchLinearShape<kWarpThreadArrangementContiguousA, kWarpThreadArrangementStridedA>, kAccessSizeInBits / sizeof_bits<ElementA>::value> | 
| using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, InstructionShape_, ElementA_, layout::RowMajor, ElementB_, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ >::IteratorThreadMapB = transform::PitchLinearWarpRakedThreadMap< layout::PitchLinearShape<Shape::kK, Shape::kN>, kThreads, layout::PitchLinearShape<kWarpThreadArrangementContiguousB, kWarpThreadArrangementStridedB>, kAccessSizeInBits / sizeof_bits<ElementB>::value> | 
| using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, InstructionShape_, ElementA_, layout::RowMajor, ElementB_, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ >::LayoutA = layout::RowMajor | 
| using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, InstructionShape_, ElementA_, layout::RowMajor, ElementB_, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ >::LayoutB = layout::ColumnMajor | 
| using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, InstructionShape_, ElementA_, layout::RowMajor, ElementB_, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ >::LayoutC = LayoutC_ | 
| using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, InstructionShape_, ElementA_, layout::RowMajor, ElementB_, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ >::MmaPolicy = MmaPolicy< MmaTensorOp, MatrixShape<0, 0>, MatrixShape<0, 0>, WarpCount::kK > | 
| using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, InstructionShape_, ElementA_, layout::RowMajor, ElementB_, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ >::MmaTensorOp = typename cutlass::gemm::warp::DefaultMmaTensorOp< WarpShape, InstructionShape, ElementA, SmemLayoutA, ElementB, SmemLayoutB, ElementC, LayoutC, Operator, WarpCount::kK>::Type | 
| using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, InstructionShape_, ElementA_, layout::RowMajor, ElementB_, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ >::Operator = Operator_ | 
| using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, InstructionShape_, ElementA_, layout::RowMajor, ElementB_, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ >::OperatorClass = arch::OpClassTensorOp | 
| using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, InstructionShape_, ElementA_, layout::RowMajor, ElementB_, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ >::Shape = Shape_ | 
| using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, InstructionShape_, ElementA_, layout::RowMajor, ElementB_, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ >::SmemIteratorA = transform::threadblock::RegularTileIterator< MatrixShape<Shape::kM, Shape::kK>, ElementA, SmemLayoutA, 0, IteratorThreadMapA > | 
| using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, InstructionShape_, ElementA_, layout::RowMajor, ElementB_, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ >::SmemIteratorB = transform::threadblock::RegularTileIterator< MatrixShape<Shape::kK, Shape::kN>, ElementB, SmemLayoutB, 1, IteratorThreadMapB > | 
| using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, InstructionShape_, ElementA_, layout::RowMajor, ElementB_, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ >::SmemLayoutA = layout::RowMajorTensorOpMultiplicandCrosswise< sizeof_bits<ElementA>::value, Shape::kK> | 
| using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, InstructionShape_, ElementA_, layout::RowMajor, ElementB_, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ >::SmemLayoutB = layout::ColumnMajorTensorOpMultiplicandCrosswise< sizeof_bits<ElementB>::value, Shape::kK> | 
| using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, InstructionShape_, ElementA_, layout::RowMajor, ElementB_, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ >::WarpCount = GemmShape< Shape::kM / WarpShape::kM, Shape::kN / WarpShape::kN, Shape::kK / WarpShape::kK > | 
| using cutlass::gemm::threadblock::DefaultMmaCore< Shape_, WarpShape_, InstructionShape_, ElementA_, layout::RowMajor, ElementB_, layout::ColumnMajor, ElementC_, LayoutC_, arch::OpClassTensorOp, 2, Operator_ >::WarpShape = WarpShape_ | 
| 
 | static | 
| 
 | static | 
| 
 | static | 
| 
 | static | 
| 
 | static | 
| 
 | static | 
| 
 | static | 
 1.8.11
 1.8.11