|  | CUTLASS
    CUDA Templates for Linear Algebra Subroutines and Solvers | 
#include <default_gemm_configuration.h>
| Public Types | |
| using | ThreadblockShape = GemmShape< 128, 256, 64 > | 
| using | WarpShape = GemmShape< 64, 64, 64 > | 
| using | InstructionShape = GemmShape< 8, 8, 16 > | 
| using | EpilogueOutputOp = epilogue::thread::LinearCombinationClamp< ElementC, 128/sizeof_bits< ElementC >::value, int32_t, float > | 
| using | Operator = arch::OpMultiplyAddSaturate | 
| Static Public Attributes | |
| static int const | kAlignmentA = 128 / sizeof_bits<uint8_t>::value | 
| static int const | kAlignmentB = 128 / sizeof_bits<uint8_t>::value | 
| static int const | kStages = 2 | 
| using cutlass::gemm::device::DefaultGemmConfiguration< arch::OpClassTensorOp, arch::Sm75, uint8_t, uint8_t, ElementC, int32_t >::EpilogueOutputOp = epilogue::thread::LinearCombinationClamp< ElementC, 128 / sizeof_bits<ElementC>::value, int32_t, float> | 
| using cutlass::gemm::device::DefaultGemmConfiguration< arch::OpClassTensorOp, arch::Sm75, uint8_t, uint8_t, ElementC, int32_t >::InstructionShape = GemmShape<8, 8, 16> | 
| using cutlass::gemm::device::DefaultGemmConfiguration< arch::OpClassTensorOp, arch::Sm75, uint8_t, uint8_t, ElementC, int32_t >::Operator = arch::OpMultiplyAddSaturate | 
| using cutlass::gemm::device::DefaultGemmConfiguration< arch::OpClassTensorOp, arch::Sm75, uint8_t, uint8_t, ElementC, int32_t >::ThreadblockShape = GemmShape<128, 256, 64> | 
| using cutlass::gemm::device::DefaultGemmConfiguration< arch::OpClassTensorOp, arch::Sm75, uint8_t, uint8_t, ElementC, int32_t >::WarpShape = GemmShape<64, 64, 64> | 
| 
 | static | 
| 
 | static | 
| 
 | static | 
 1.8.11
 1.8.11