35 #if defined(CUTLASS_ARCH_WMMA_ENABLED)    48 #if ((__CUDACC_VER_MAJOR__ > 10) || (__CUDACC_VER_MAJOR__ == 10 && __CUDACC_VER_MINOR__ >= 2))    50 #define CUTLASS_ARCH_MMA_SM75_SUPPORTED 1    52 #if (defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 750))    53 #define CUTLASS_ARCH_MMA_SM75_ENABLED    71   gemm::GemmShape<16, 8, 8>,
   105 #if defined(CUTLASS_ARCH_MMA_SM75_ENABLED)   107   unsigned const *A = 
reinterpret_cast<unsigned const *
>(&a);
   108   unsigned const *B = 
reinterpret_cast<unsigned const *
>(&b);
   109   unsigned const *C = 
reinterpret_cast<unsigned const *
>(&c);
   110   unsigned *D = 
reinterpret_cast<unsigned *
>(&d);
   113     "mma.sync.aligned.m16n8k8.row.col.f16.f16.f16.f16 {%0,%1}, {%2,%3}, {%4}, {%5,%6};\n"   114       : 
"=r"(D[0]), 
"=r"(D[1])
   115       : 
"r"(A[0]), 
"r"(A[1]), 
"r"(B[0]), 
"r"(C[0]), 
"r"(C[1]));
   132   gemm::GemmShape<16, 8, 8>,
   163 #if defined(CUTLASS_ARCH_MMA_SM75_ENABLED)   165   unsigned const *A = 
reinterpret_cast<unsigned const *
>(&a);
   166   unsigned const *B = 
reinterpret_cast<unsigned const *
>(&b);
   167   float const *C = 
reinterpret_cast<float const *
>(&c);
   168   float *D = 
reinterpret_cast<float *
>(&d);
   170   asm volatile(
"mma.sync.aligned.m16n8k8.row.col.f32.f16.f16.f32 {%0,%1,%2,%3}, {%4,%5}, {%6}, {%7,%8,%9,%10};\n"   171       : 
"=f"(D[0]), 
"=f"(D[1]), 
"=f"(D[2]), 
"=f"(D[3])
   173         "r"(A[0]), 
"r"(A[1]), 
   175         "f"(C[0]), 
"f"(C[1]), 
"f"(C[2]), 
"f"(C[3])
   193   gemm::GemmShape<8, 8, 16>,
   228 #if defined(CUTLASS_ARCH_MMA_SM75_ENABLED)   230   unsigned const & A = 
reinterpret_cast<unsigned const &
>(a);
   231   unsigned const & B = 
reinterpret_cast<unsigned const &
>(b);
   233   int const *C = 
reinterpret_cast<int const *
>(&c);
   234   int *D = 
reinterpret_cast<int *
>(&d);
   236   asm volatile(
"mma.sync.aligned.m8n8k16.row.col.s32.s8.s8.s32 {%0,%1}, {%2}, {%3}, {%4,%5};\n"   237       : 
"=r"(D[0]), 
"=r"(D[1])
   238       : 
"r"(A), 
"r"(B), 
"r"(C[0]), 
"r"(C[1]));
   249   gemm::GemmShape<8, 8, 16>,
   284 #if defined(CUTLASS_ARCH_MMA_SM75_ENABLED)   286   unsigned const & A = 
reinterpret_cast<unsigned const &
>(a);
   287   unsigned const & B = 
reinterpret_cast<unsigned const &
>(b);
   289   int const *C = 
reinterpret_cast<int const *
>(&c);
   290   int *D = 
reinterpret_cast<int *
>(&d);
   292   asm volatile(
"mma.sync.aligned.m8n8k16.row.col.s32.u8.s8.s32 {%0,%1}, {%2}, {%3}, {%4,%5};\n"   293       : 
"=r"(D[0]), 
"=r"(D[1])
   294       : 
"r"(A), 
"r"(B), 
"r"(C[0]), 
"r"(C[1]));
   305   gemm::GemmShape<8, 8, 16>,
   340 #if defined(CUTLASS_ARCH_MMA_SM75_ENABLED)   342   unsigned const & A = 
reinterpret_cast<unsigned const &
>(a);
   343   unsigned const & B = 
reinterpret_cast<unsigned const &
>(b);
   345   int const *C = 
reinterpret_cast<int const *
>(&c);
   346   int *D = 
reinterpret_cast<int *
>(&d);
   348   asm volatile(
"mma.sync.aligned.m8n8k16.row.col.s8.u8 {%0,%1}, {%2}, {%3}, {%4,%5};\n"   349       : 
"=r"(D[0]), 
"=r"(D[1])
   350       : 
"r"(A), 
"r"(B), 
"r"(C[0]), 
"r"(C[1]));
   362   gemm::GemmShape<8, 8, 16>,
   397 #if defined(CUTLASS_ARCH_MMA_SM75_ENABLED)   399   unsigned const & A = 
reinterpret_cast<unsigned const &
>(a);
   400   unsigned const & B = 
reinterpret_cast<unsigned const &
>(b);
   402   int const *C = 
reinterpret_cast<int const *
>(&c);
   403   int *D = 
reinterpret_cast<int *
>(&d);
   405   asm volatile(
"mma.sync.aligned.m8n8k16.row.col.s32.u8.u8.s32 {%0,%1}, {%2}, {%3}, {%4,%5};\n"   406       : 
"=r"(D[0]), 
"=r"(D[1])
   407       : 
"r"(A), 
"r"(B), 
"r"(C[0]), 
"r"(C[1]));
   424   gemm::GemmShape<8,8,16>,
   432   OpMultiplyAddSaturate> {
   459 #if defined(CUTLASS_ARCH_MMA_SM75_ENABLED)   461   unsigned const & A = 
reinterpret_cast<unsigned const &
>(a);
   462   unsigned const & B = 
reinterpret_cast<unsigned const &
>(b);
   464   int const *C = 
reinterpret_cast<int const *
>(&c);
   465   int *D = 
reinterpret_cast<int *
>(&d);
   467   asm volatile(
"mma.sync.aligned.m8n8k16.row.col.satfinite.s32.s8.s8.s32 {%0,%1}, {%2}, {%3}, {%4,%5};\n"   468       : 
"=r"(D[0]), 
"=r"(D[1])
   469       : 
"r"(A), 
"r"(B), 
"r"(C[0]), 
"r"(C[1]));
   480   gemm::GemmShape<8,8,16>,
   488   OpMultiplyAddSaturate> {
   515 #if defined(CUTLASS_ARCH_MMA_SM75_ENABLED)   517   unsigned const & A = 
reinterpret_cast<unsigned const &
>(a);
   518   unsigned const & B = 
reinterpret_cast<unsigned const &
>(b);
   520   int const *C = 
reinterpret_cast<int const *
>(&c);
   521   int *D = 
reinterpret_cast<int *
>(&d);
   523   asm volatile(
"mma.sync.aligned.m8n8k16.row.col.satfinite.s32.u8.s8.s32 {%0,%1}, {%2}, {%3}, {%4,%5};\n"   524       : 
"=r"(D[0]), 
"=r"(D[1])
   525       : 
"r"(A), 
"r"(B), 
"r"(C[0]), 
"r"(C[1]));
   536   gemm::GemmShape<8,8,16>,
   544   OpMultiplyAddSaturate> {
   571 #if defined(CUTLASS_ARCH_MMA_SM75_ENABLED)   573   unsigned const & A = 
reinterpret_cast<unsigned const &
>(a);
   574   unsigned const & B = 
reinterpret_cast<unsigned const &
>(b);
   576   int const *C = 
reinterpret_cast<int const *
>(&c);
   577   int *D = 
reinterpret_cast<int *
>(&d);
   579   asm volatile(
"mma.sync.aligned.m8n8k16.row.col.satfinite.s32.s8.u8.s32 {%0,%1}, {%2}, {%3}, {%4,%5};\n"   580       : 
"=r"(D[0]), 
"=r"(D[1])
   581       : 
"r"(A), 
"r"(B), 
"r"(C[0]), 
"r"(C[1]));
   592   gemm::GemmShape<8,8,16>,
   600   OpMultiplyAddSaturate> {
   627 #if defined(CUTLASS_ARCH_MMA_SM75_ENABLED)   629   unsigned const & A = 
reinterpret_cast<unsigned const &
>(a);
   630   unsigned const & B = 
reinterpret_cast<unsigned const &
>(b);
   632   int const *C = 
reinterpret_cast<int const *
>(&c);
   633   int *D = 
reinterpret_cast<int *
>(&d);
   635   asm volatile(
"mma.sync.aligned.m8n8k16.row.col.satfinite.s32.u8.u8.s32 {%0,%1}, {%2}, {%3}, {%4,%5};\n"   636       : 
"=r"(D[0]), 
"=r"(D[1])
   637       : 
"r"(A), 
"r"(B), 
"r"(C[0]), 
"r"(C[1]));
   654   gemm::GemmShape<8,8,32>,
   689 #if defined(CUTLASS_ARCH_MMA_SM75_ENABLED)   691   unsigned const & A = 
reinterpret_cast<unsigned const &
>(a);
   692   unsigned const & B = 
reinterpret_cast<unsigned const &
>(b);
   694   int const *C = 
reinterpret_cast<int const *
>(&c);
   695   int *D = 
reinterpret_cast<int *
>(&d);
   697   asm volatile(
"mma.sync.aligned.m8n8k32.row.col.s32.s4.s4.s32 {%0,%1}, {%2}, {%3}, {%4,%5};\n"   698       : 
"=r"(D[0]), 
"=r"(D[1])
   699       : 
"r"(A), 
"r"(B), 
"r"(C[0]), 
"r"(C[1]));
   710   gemm::GemmShape<8,8,32>,
   745 #if defined(CUTLASS_ARCH_MMA_SM75_ENABLED)   747   unsigned const & A = 
reinterpret_cast<unsigned const &
>(a);
   748   unsigned const & B = 
reinterpret_cast<unsigned const &
>(b);
   750   int const *C = 
reinterpret_cast<int const *
>(&c);
   751   int *D = 
reinterpret_cast<int *
>(&d);
   753   asm volatile(
"mma.sync.aligned.m8n8k32.row.col.s32.u4.s4.s32 {%0,%1}, {%2}, {%3}, {%4,%5};\n"   754       : 
"=r"(D[0]), 
"=r"(D[1])
   755       : 
"r"(A), 
"r"(B), 
"r"(C[0]), 
"r"(C[1]));
   766   gemm::GemmShape<8,8,32>,
   801 #if defined(CUTLASS_ARCH_MMA_SM75_ENABLED)   803   unsigned const & A = 
reinterpret_cast<unsigned const &
>(a);
   804   unsigned const & B = 
reinterpret_cast<unsigned const &
>(b);
   806   int const *C = 
reinterpret_cast<int const *
>(&c);
   807   int *D = 
reinterpret_cast<int *
>(&d);
   809   asm volatile(
"_mma.m8n8k32.row.col.s32.s4.u4.s32 {%0,%1}, {%2}, {%3}, {%4,%5};\n"   810       : 
"=r"(D[0]), 
"=r"(D[1])
   811       : 
"r"(A), 
"r"(B), 
"r"(C[0]), 
"r"(C[1]));
   822   gemm::GemmShape<8,8,32>,
   857 #if defined(CUTLASS_ARCH_MMA_SM75_ENABLED)   859   unsigned const & A = 
reinterpret_cast<unsigned const &
>(a);
   860   unsigned const & B = 
reinterpret_cast<unsigned const &
>(b);
   862   int const *C = 
reinterpret_cast<int const *
>(&c);
   863   int *D = 
reinterpret_cast<int *
>(&d);
   865   asm volatile(
"mma.sync.aligned.m8n8k32.row.col.s32.u4.u4.s32 {%0,%1}, {%2}, {%3}, {%4,%5};\n"   866       : 
"=r"(D[0]), 
"=r"(D[1])
   867       : 
"r"(A), 
"r"(B), 
"r"(C[0]), 
"r"(C[1]));
   884   gemm::GemmShape<8,8,32>,
   892   OpMultiplyAddSaturate> {
   919 #if defined(CUTLASS_ARCH_MMA_SM75_ENABLED)   921   unsigned const & A = 
reinterpret_cast<unsigned const &
>(a);
   922   unsigned const & B = 
reinterpret_cast<unsigned const &
>(b);
   924   int const *C = 
reinterpret_cast<int const *
>(&c);
   925   int *D = 
reinterpret_cast<int *
>(&d);
   927   asm volatile(
"mma.sync.aligned.m8n8k32.row.col.satfinite.s32.s4.s4.s32 {%0,%1}, {%2}, {%3}, {%4,%5};\n"   928       : 
"=r"(D[0]), 
"=r"(D[1])
   929       : 
"r"(A), 
"r"(B), 
"r"(C[0]), 
"r"(C[1]));
   940   gemm::GemmShape<8,8,32>,
   948   OpMultiplyAddSaturate> {
   975 #if defined(CUTLASS_ARCH_MMA_SM75_ENABLED)   977   unsigned const & A = 
reinterpret_cast<unsigned const &
>(a);
   978   unsigned const & B = 
reinterpret_cast<unsigned const &
>(b);
   980   int const *C = 
reinterpret_cast<int const *
>(&c);
   981   int *D = 
reinterpret_cast<int *
>(&d);
   983   asm volatile(
"mma.sync.aligned.m8n8k32.row.col.satfinite.s32.u4.s4.s32 {%0,%1}, {%2}, {%3}, {%4,%5};\n"   984       : 
"=r"(D[0]), 
"=r"(D[1])
   985       : 
"r"(A), 
"r"(B), 
"r"(C[0]), 
"r"(C[1]));
   996   gemm::GemmShape<8,8,32>,
  1004   OpMultiplyAddSaturate> {
  1031 #if defined(CUTLASS_ARCH_MMA_SM75_ENABLED)  1033   unsigned const & A = 
reinterpret_cast<unsigned const &
>(a);
  1034   unsigned const & B = 
reinterpret_cast<unsigned const &
>(b);
  1036   int const *C = 
reinterpret_cast<int const *
>(&c);
  1037   int *D = 
reinterpret_cast<int *
>(&d);
  1039   asm volatile(
"mma.sync.aligned.m8n8k32.row.col.satfinite.s32.s4.u4.s32 {%0,%1}, {%2}, {%3}, {%4,%5};\n"  1040       : 
"=r"(D[0]), 
"=r"(D[1])
  1041       : 
"r"(A), 
"r"(B), 
"r"(C[0]), 
"r"(C[1]));
  1052   gemm::GemmShape<8,8,32>,
  1060   OpMultiplyAddSaturate> {
  1087 #if defined(CUTLASS_ARCH_MMA_SM75_ENABLED)  1089   unsigned const & A = 
reinterpret_cast<unsigned const &
>(a);
  1090   unsigned const & B = 
reinterpret_cast<unsigned const &
>(b);
  1092   int const *C = 
reinterpret_cast<int const *
>(&c);
  1093   int *D = 
reinterpret_cast<int *
>(&d);
  1095   asm volatile(
"mma.sync.aligned.m8n8k32.row.col.satfinite.s32.u4.u4.s32 {%0,%1}, {%2}, {%3}, {%4,%5};\n"  1096       : 
"=r"(D[0]), 
"=r"(D[1])
  1097       : 
"r"(A), 
"r"(B), 
"r"(C[0]), 
"r"(C[1]));
  1114   gemm::GemmShape<8,8,128>,
  1149 #if defined(CUTLASS_ARCH_MMA_SM75_ENABLED)  1151 #if defined(CUTLASS_ARCH_WMMA_ENABLED)  1152   using WmmaFragmentA = nvcuda::wmma::fragment<
  1153           nvcuda::wmma::matrix_a,
  1157           nvcuda::wmma::experimental::precision::b1,
  1158           nvcuda::wmma::row_major>;
  1160   using WmmaFragmentB = nvcuda::wmma::fragment<
  1161           nvcuda::wmma::matrix_b,
  1165           nvcuda::wmma::experimental::precision::b1,
  1166           nvcuda::wmma::col_major>;
  1168   using WmmaFragmentC = nvcuda::wmma::fragment<
  1169           nvcuda::wmma::accumulator,
  1175   WmmaFragmentA 
const & A = 
reinterpret_cast<WmmaFragmentA 
const &
>(a);
  1176   WmmaFragmentB 
const & B = 
reinterpret_cast<WmmaFragmentB 
const &
>(b);
  1178   WmmaFragmentC 
const & C = 
reinterpret_cast<WmmaFragmentC 
const &
>(c);
  1179   WmmaFragmentC & D = 
reinterpret_cast<WmmaFragmentC &
>(d);
  1181   nvcuda::wmma::bmma_sync(D, A, B, C, nvcuda::wmma::experimental::bmmaBitOpXOR, 
  1182                                           nvcuda::wmma::experimental::bmmaAccumulateOpPOPC);
  1187 #endif // defined(CUTLASS_ARCH_WMMA_ENABLED) cutlass::arch::Mma< gemm::GemmShape< 8, 8, 32 >, 32, int4b_t, layout::RowMajor, uint4b_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd >::operator() CUTLASS_HOST_DEVICE void operator()(FragmentC &d, FragmentA const &a, FragmentB const &b, FragmentC const &c) const 
Computes multiply-add. 
Definition: mma_sm75.h:794
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, int8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd >::Operator OpMultiplyAdd Operator
Definition: mma_sm75.h:217
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, uint8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate >::ElementA uint8_t ElementA
Definition: mma_sm75.h:492
integer_subbyte< 4, false > uint4b_t
4-bit Unsigned integer type 
Definition: integer_subbyte.h:158
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 32 >, 32, uint4b_t, layout::RowMajor, int4b_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd >::Operator OpMultiplyAdd Operator
Definition: mma_sm75.h:734
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 32 >, 32, uint4b_t, layout::RowMajor, uint4b_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate >::FragmentB Array< uint4b_t, 8 > FragmentB
Definition: mma_sm75.h:1070
Statically sized array of elements that accommodates all CUTLASS-supported numeric types and is safe ...
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, int8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd >::FragmentB Array< int8_t, 4 > FragmentB
Definition: mma_sm75.h:211
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, uint8_t, layout::RowMajor, uint8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate >::FragmentB Array< uint8_t, 4 > FragmentB
Definition: mma_sm75.h:610
Definition: aligned_buffer.h:35
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, uint8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd >::ElementC int ElementC
Definition: mma_sm75.h:269
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 32 >, 32, uint4b_t, layout::RowMajor, uint4b_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd >::Operator OpMultiplyAdd Operator
Definition: mma_sm75.h:846
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, uint8_t, layout::RowMajor, uint8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate >::Operator OpMultiplyAddSaturate Operator
Definition: mma_sm75.h:616
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, uint8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate >::ElementC int ElementC
Definition: mma_sm75.h:500
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, uint8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd >::operator() CUTLASS_HOST_DEVICE void operator()(FragmentC &d, FragmentA const &a, FragmentB const &b, FragmentC const &c) const 
Computes multiply-add. 
Definition: mma_sm75.h:277
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, int8_t, layout::RowMajor, uint8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd >::Operator OpMultiplyAdd Operator
Definition: mma_sm75.h:329
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 32 >, 32, int4b_t, layout::RowMajor, int4b_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd >::FragmentC Array< int, 2 > FragmentC
Definition: mma_sm75.h:676
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, int8_t, layout::RowMajor, uint8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd >::FragmentB Array< uint8_t, 4 > FragmentB
Definition: mma_sm75.h:323
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, int8_t, layout::RowMajor, uint8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate >::FragmentB Array< uint8_t, 4 > FragmentB
Definition: mma_sm75.h:554
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, uint8_t, layout::RowMajor, uint8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd >::ElementC int ElementC
Definition: mma_sm75.h:382
integer_subbyte< 1, false > uint1b_t
1-bit Unsigned integer type 
Definition: integer_subbyte.h:152
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 32 >, 32, int4b_t, layout::RowMajor, uint4b_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate >::ElementC int ElementC
Definition: mma_sm75.h:1016
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, int8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate >::FragmentC Array< int, 2 > FragmentC
Definition: mma_sm75.h:446
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, uint8_t, layout::RowMajor, uint8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd >::ElementB uint8_t ElementB
Definition: mma_sm75.h:378
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, int8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd >::ElementA int8_t ElementA
Definition: mma_sm75.h:205
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, int8_t, layout::RowMajor, uint8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate >::ElementA int8_t ElementA
Definition: mma_sm75.h:548
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, uint8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd >::FragmentB Array< int8_t, 4 > FragmentB
Definition: mma_sm75.h:267
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 32 >, 32, uint4b_t, layout::RowMajor, int4b_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate >::operator() CUTLASS_HOST_DEVICE void operator()(FragmentC &d, FragmentA const &a, FragmentB const &b, FragmentC const &c) const 
Computes multiply-add. 
Definition: mma_sm75.h:968
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 32 >, 32, int4b_t, layout::RowMajor, uint4b_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate >::operator() CUTLASS_HOST_DEVICE void operator()(FragmentC &d, FragmentA const &a, FragmentB const &b, FragmentC const &c) const 
Computes multiply-add. 
Definition: mma_sm75.h:1024
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, int8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd >::FragmentA Array< int8_t, 4 > FragmentA
Definition: mma_sm75.h:207
cutlass::arch::Mma< gemm::GemmShape< 16, 8, 8 >, 32, half_t, layout::RowMajor, half_t, layout::ColumnMajor, float, layout::RowMajor, OpMultiplyAdd >::FragmentB Array< half_t, 2 > FragmentB
Definition: mma_sm75.h:150
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, int8_t, layout::RowMajor, uint8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd >::FragmentA Array< int8_t, 4 > FragmentA
Definition: mma_sm75.h:319
4-bit signed integer type 
Definition: integer_subbyte.h:42
IEEE half-precision floating-point type. 
Definition: half.h:126
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 32 >, 32, uint4b_t, layout::RowMajor, int4b_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd >::ElementC int ElementC
Definition: mma_sm75.h:730
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, int8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate >::ElementC int ElementC
Definition: mma_sm75.h:444
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, int8_t, layout::RowMajor, uint8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate >::operator() CUTLASS_HOST_DEVICE void operator()(FragmentC &d, FragmentA const &a, FragmentB const &b, FragmentC const &c) const 
Computes multiply-add. 
Definition: mma_sm75.h:564
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 32 >, 32, int4b_t, layout::RowMajor, int4b_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate >::ElementC int ElementC
Definition: mma_sm75.h:904
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 128 >, 32, uint1b_t, layout::RowMajor, uint1b_t, layout::ColumnMajor, int, layout::RowMajor, OpXorPopc >::operator() CUTLASS_HOST_DEVICE void operator()(FragmentC &d, FragmentA const &a, FragmentB const &b, FragmentC const &c) const 
Computes multiply-add. 
Definition: mma_sm75.h:1142
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 32 >, 32, int4b_t, layout::RowMajor, uint4b_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd >::FragmentA Array< int4b_t, 8 > FragmentA
Definition: mma_sm75.h:780
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 128 >, 32, uint1b_t, layout::RowMajor, uint1b_t, layout::ColumnMajor, int, layout::RowMajor, OpXorPopc >::ElementC int ElementC
Definition: mma_sm75.h:1134
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 32 >, 32, uint4b_t, layout::RowMajor, uint4b_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd >::ElementC int ElementC
Definition: mma_sm75.h:842
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 32 >, 32, uint4b_t, layout::RowMajor, int4b_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate >::Operator OpMultiplyAddSaturate Operator
Definition: mma_sm75.h:964
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, uint8_t, layout::RowMajor, uint8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd >::FragmentB Array< uint8_t, 4 > FragmentB
Definition: mma_sm75.h:380
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, int8_t, layout::RowMajor, uint8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate >::Operator OpMultiplyAddSaturate Operator
Definition: mma_sm75.h:560
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, uint8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate >::ElementB int8_t ElementB
Definition: mma_sm75.h:496
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 32 >, 32, int4b_t, layout::RowMajor, int4b_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate >::operator() CUTLASS_HOST_DEVICE void operator()(FragmentC &d, FragmentA const &a, FragmentB const &b, FragmentC const &c) const 
Computes multiply-add. 
Definition: mma_sm75.h:912
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, uint8_t, layout::RowMajor, uint8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate >::ElementB uint8_t ElementB
Definition: mma_sm75.h:608
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 32 >, 32, int4b_t, layout::RowMajor, uint4b_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate >::FragmentC Array< int, 2 > FragmentC
Definition: mma_sm75.h:1018
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 32 >, 32, uint4b_t, layout::RowMajor, int4b_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate >::FragmentB Array< int4b_t, 8 > FragmentB
Definition: mma_sm75.h:958
Mapping function for column-major matrices. 
Definition: layout/matrix.h:142
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, uint8_t, layout::RowMajor, uint8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate >::ElementC int ElementC
Definition: mma_sm75.h:612
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, int8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd >::ElementB int8_t ElementB
Definition: mma_sm75.h:209
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, int8_t, layout::RowMajor, uint8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd >::operator() CUTLASS_HOST_DEVICE void operator()(FragmentC &d, FragmentA const &a, FragmentB const &b, FragmentC const &c) const 
Computes multiply-add. 
Definition: mma_sm75.h:333
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, uint8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd >::ElementA uint8_t ElementA
Definition: mma_sm75.h:261
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, uint8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate >::FragmentB Array< int8_t, 4 > FragmentB
Definition: mma_sm75.h:498
cutlass::arch::Mma< gemm::GemmShape< 16, 8, 8 >, 32, half_t, layout::RowMajor, half_t, layout::ColumnMajor, float, layout::RowMajor, OpMultiplyAdd >::ElementC float ElementC
Definition: mma_sm75.h:152
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, int8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate >::ElementB int8_t ElementB
Definition: mma_sm75.h:440
Templates exposing architecture support for multiply-add operations. 
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, int8_t, layout::RowMajor, uint8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate >::FragmentA Array< int8_t, 4 > FragmentA
Definition: mma_sm75.h:550
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, int8_t, layout::RowMajor, uint8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd >::ElementB uint8_t ElementB
Definition: mma_sm75.h:321
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, uint8_t, layout::RowMajor, uint8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd >::ElementA uint8_t ElementA
Definition: mma_sm75.h:374
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, int8_t, layout::RowMajor, uint8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate >::FragmentC Array< int, 2 > FragmentC
Definition: mma_sm75.h:558
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 32 >, 32, int4b_t, layout::RowMajor, uint4b_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate >::FragmentB Array< uint4b_t, 8 > FragmentB
Definition: mma_sm75.h:1014
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 32 >, 32, int4b_t, layout::RowMajor, uint4b_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate >::FragmentA Array< int4b_t, 8 > FragmentA
Definition: mma_sm75.h:1010
cutlass::arch::Mma< gemm::GemmShape< 16, 8, 8 >, 32, half_t, layout::RowMajor, half_t, layout::ColumnMajor, float, layout::RowMajor, OpMultiplyAdd >::Operator OpMultiplyAdd Operator
Definition: mma_sm75.h:156
cutlass::arch::Mma< gemm::GemmShape< 16, 8, 8 >, 32, half_t, layout::RowMajor, half_t, layout::ColumnMajor, half_t, layout::RowMajor, OpMultiplyAdd >::Operator OpMultiplyAdd Operator
Definition: mma_sm75.h:95
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 32 >, 32, uint4b_t, layout::RowMajor, uint4b_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd >::FragmentB Array< uint4b_t, 8 > FragmentB
Definition: mma_sm75.h:840
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, uint8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate >::operator() CUTLASS_HOST_DEVICE void operator()(FragmentC &d, FragmentA const &a, FragmentB const &b, FragmentC const &c) const 
Computes multiply-add. 
Definition: mma_sm75.h:508
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, int8_t, layout::RowMajor, uint8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate >::ElementC int ElementC
Definition: mma_sm75.h:556
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 32 >, 32, int4b_t, layout::RowMajor, int4b_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd >::ElementC int ElementC
Definition: mma_sm75.h:674
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, uint8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd >::Operator OpMultiplyAdd Operator
Definition: mma_sm75.h:273
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, int8_t, layout::RowMajor, uint8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd >::FragmentC Array< int, 2 > FragmentC
Definition: mma_sm75.h:327
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, uint8_t, layout::RowMajor, uint8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd >::FragmentC Array< int, 2 > FragmentC
Definition: mma_sm75.h:384
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 128 >, 32, uint1b_t, layout::RowMajor, uint1b_t, layout::ColumnMajor, int, layout::RowMajor, OpXorPopc >::Operator OpXorPopc Operator
Definition: mma_sm75.h:1138
cutlass::arch::Mma< gemm::GemmShape< 16, 8, 8 >, 32, half_t, layout::RowMajor, half_t, layout::ColumnMajor, float, layout::RowMajor, OpMultiplyAdd >::FragmentC Array< float, 4 > FragmentC
Definition: mma_sm75.h:154
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, uint8_t, layout::RowMajor, uint8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate >::FragmentA Array< uint8_t, 4 > FragmentA
Definition: mma_sm75.h:606
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 32 >, 32, int4b_t, layout::RowMajor, int4b_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd >::operator() CUTLASS_HOST_DEVICE void operator()(FragmentC &d, FragmentA const &a, FragmentB const &b, FragmentC const &c) const 
Computes multiply-add. 
Definition: mma_sm75.h:682
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, int8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate >::ElementA int8_t ElementA
Definition: mma_sm75.h:436
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 32 >, 32, uint4b_t, layout::RowMajor, int4b_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd >::FragmentC Array< int, 2 > FragmentC
Definition: mma_sm75.h:732
cutlass::arch::Mma< gemm::GemmShape< 16, 8, 8 >, 32, half_t, layout::RowMajor, half_t, layout::ColumnMajor, half_t, layout::RowMajor, OpMultiplyAdd >::FragmentC Array< half_t, 4 > FragmentC
Definition: mma_sm75.h:93
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, int8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd >::FragmentC Array< int, 2 > FragmentC
Definition: mma_sm75.h:215
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, int8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd >::operator() CUTLASS_HOST_DEVICE void operator()(FragmentC &d, FragmentA const &a, FragmentB const &b, FragmentC const &c) const 
Computes multiply-add. 
Definition: mma_sm75.h:221
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, int8_t, layout::RowMajor, uint8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate >::ElementB uint8_t ElementB
Definition: mma_sm75.h:552
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, int8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate >::FragmentA Array< int8_t, 4 > FragmentA
Definition: mma_sm75.h:438
#define CUTLASS_HOST_DEVICE
Definition: cutlass.h:89
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 32 >, 32, int4b_t, layout::RowMajor, uint4b_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate >::Operator OpMultiplyAddSaturate Operator
Definition: mma_sm75.h:1020
Top-level include for all CUTLASS numeric types. 
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 32 >, 32, uint4b_t, layout::RowMajor, uint4b_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate >::Operator OpMultiplyAddSaturate Operator
Definition: mma_sm75.h:1076
cutlass::arch::Mma< gemm::GemmShape< 16, 8, 8 >, 32, half_t, layout::RowMajor, half_t, layout::ColumnMajor, float, layout::RowMajor, OpMultiplyAdd >::FragmentA Array< half_t, 4 > FragmentA
Definition: mma_sm75.h:146
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 32 >, 32, uint4b_t, layout::RowMajor, uint4b_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate >::operator() CUTLASS_HOST_DEVICE void operator()(FragmentC &d, FragmentA const &a, FragmentB const &b, FragmentC const &c) const 
Computes multiply-add. 
Definition: mma_sm75.h:1080
Shape of a matrix multiply-add operation. 
Definition: include/cutlass/gemm/gemm.h:57
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 32 >, 32, uint4b_t, layout::RowMajor, int4b_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd >::FragmentB Array< int4b_t, 8 > FragmentB
Definition: mma_sm75.h:728
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, uint8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd >::ElementB int8_t ElementB
Definition: mma_sm75.h:265
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, uint8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate >::FragmentC Array< int, 2 > FragmentC
Definition: mma_sm75.h:502
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, int8_t, layout::RowMajor, uint8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd >::ElementA int8_t ElementA
Definition: mma_sm75.h:317
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 32 >, 32, uint4b_t, layout::RowMajor, int4b_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate >::FragmentA Array< uint4b_t, 8 > FragmentA
Definition: mma_sm75.h:954
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 128 >, 32, uint1b_t, layout::RowMajor, uint1b_t, layout::ColumnMajor, int, layout::RowMajor, OpXorPopc >::FragmentC Array< int, 2 > FragmentC
Definition: mma_sm75.h:1136
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 32 >, 32, int4b_t, layout::RowMajor, int4b_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd >::FragmentA Array< int4b_t, 8 > FragmentA
Definition: mma_sm75.h:668
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 32 >, 32, uint4b_t, layout::RowMajor, uint4b_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate >::ElementC int ElementC
Definition: mma_sm75.h:1072
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 32 >, 32, int4b_t, layout::RowMajor, int4b_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate >::Operator OpMultiplyAddSaturate Operator
Definition: mma_sm75.h:908
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 32 >, 32, uint4b_t, layout::RowMajor, int4b_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd >::FragmentA Array< uint4b_t, 8 > FragmentA
Definition: mma_sm75.h:724
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 128 >, 32, uint1b_t, layout::RowMajor, uint1b_t, layout::ColumnMajor, int, layout::RowMajor, OpXorPopc >::FragmentA Array< uint1b_t, 32 > FragmentA
Definition: mma_sm75.h:1128
Mapping function for row-major matrices. 
Definition: layout/matrix.h:50
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 32 >, 32, uint4b_t, layout::RowMajor, uint4b_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate >::FragmentA Array< uint4b_t, 8 > FragmentA
Definition: mma_sm75.h:1066
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 32 >, 32, uint4b_t, layout::RowMajor, int4b_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate >::FragmentC Array< int, 2 > FragmentC
Definition: mma_sm75.h:962
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, uint8_t, layout::RowMajor, uint8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd >::FragmentA Array< uint8_t, 4 > FragmentA
Definition: mma_sm75.h:376
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 32 >, 32, uint4b_t, layout::RowMajor, uint4b_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd >::FragmentA Array< uint4b_t, 8 > FragmentA
Definition: mma_sm75.h:836
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 32 >, 32, uint4b_t, layout::RowMajor, uint4b_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd >::FragmentC Array< int, 2 > FragmentC
Definition: mma_sm75.h:844
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 32 >, 32, int4b_t, layout::RowMajor, int4b_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate >::FragmentC Array< int, 2 > FragmentC
Definition: mma_sm75.h:906
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 32 >, 32, int4b_t, layout::RowMajor, int4b_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd >::Operator OpMultiplyAdd Operator
Definition: mma_sm75.h:678
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, uint8_t, layout::RowMajor, uint8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate >::operator() CUTLASS_HOST_DEVICE void operator()(FragmentC &d, FragmentA const &a, FragmentB const &b, FragmentC const &c) const 
Computes multiply-add. 
Definition: mma_sm75.h:620
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 32 >, 32, uint4b_t, layout::RowMajor, int4b_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd >::operator() CUTLASS_HOST_DEVICE void operator()(FragmentC &d, FragmentA const &a, FragmentB const &b, FragmentC const &c) const 
Computes multiply-add. 
Definition: mma_sm75.h:738
cutlass::arch::Mma< gemm::GemmShape< 16, 8, 8 >, 32, half_t, layout::RowMajor, half_t, layout::ColumnMajor, float, layout::RowMajor, OpMultiplyAdd >::operator() CUTLASS_HOST_DEVICE void operator()(FragmentC &d, FragmentA const &a, FragmentB const &b, FragmentC const &c) const 
Computes multiply-add. 
Definition: mma_sm75.h:160
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, uint8_t, layout::RowMajor, uint8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate >::ElementA uint8_t ElementA
Definition: mma_sm75.h:604
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, int8_t, layout::RowMajor, uint8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd >::ElementC int ElementC
Definition: mma_sm75.h:325
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 32 >, 32, uint4b_t, layout::RowMajor, uint4b_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd >::operator() CUTLASS_HOST_DEVICE void operator()(FragmentC &d, FragmentA const &a, FragmentB const &b, FragmentC const &c) const 
Computes multiply-add. 
Definition: mma_sm75.h:850
Defines layout functions used by TensorRef and derived classes. 
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, int8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate >::operator() CUTLASS_HOST_DEVICE void operator()(FragmentC &d, FragmentA const &a, FragmentB const &b, FragmentC const &c) const 
Computes multiply-add. 
Definition: mma_sm75.h:452
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, int8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate >::Operator OpMultiplyAddSaturate Operator
Definition: mma_sm75.h:448
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, int8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate >::FragmentB Array< int8_t, 4 > FragmentB
Definition: mma_sm75.h:442
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, uint8_t, layout::RowMajor, uint8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd >::operator() CUTLASS_HOST_DEVICE void operator()(FragmentC &d, FragmentA const &a, FragmentB const &b, FragmentC const &c) const 
Computes multiply-add. 
Definition: mma_sm75.h:390
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, uint8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd >::FragmentA Array< uint8_t, 4 > FragmentA
Definition: mma_sm75.h:263
cutlass::arch::Mma< gemm::GemmShape< 16, 8, 8 >, 32, half_t, layout::RowMajor, half_t, layout::ColumnMajor, half_t, layout::RowMajor, OpMultiplyAdd >::operator() CUTLASS_HOST_DEVICE void operator()(FragmentC &d, FragmentA const &a, FragmentB const &b, FragmentC const &c) const 
Definition: mma_sm75.h:98
Matrix multiply-add operation. 
Definition: arch/mma.h:92
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 32 >, 32, int4b_t, layout::RowMajor, int4b_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate >::FragmentA Array< int4b_t, 8 > FragmentA
Definition: mma_sm75.h:898
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 32 >, 32, int4b_t, layout::RowMajor, int4b_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate >::FragmentB Array< int4b_t, 8 > FragmentB
Definition: mma_sm75.h:902
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, uint8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate >::FragmentA Array< uint8_t, 4 > FragmentA
Definition: mma_sm75.h:494
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, uint8_t, layout::RowMajor, uint8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd >::Operator OpMultiplyAdd Operator
Definition: mma_sm75.h:386
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 128 >, 32, uint1b_t, layout::RowMajor, uint1b_t, layout::ColumnMajor, int, layout::RowMajor, OpXorPopc >::FragmentB Array< uint1b_t, 32 > FragmentB
Definition: mma_sm75.h:1132
Templates exposing architecture support for warp matrix multiply-add (WMMA) operations. 
cutlass::arch::Mma< gemm::GemmShape< 16, 8, 8 >, 32, half_t, layout::RowMajor, half_t, layout::ColumnMajor, half_t, layout::RowMajor, OpMultiplyAdd >::FragmentB Array< half_t, 2 > FragmentB
Definition: mma_sm75.h:89
cutlass::arch::Mma< gemm::GemmShape< 16, 8, 8 >, 32, half_t, layout::RowMajor, half_t, layout::ColumnMajor, half_t, layout::RowMajor, OpMultiplyAdd >::FragmentA Array< half_t, 4 > FragmentA
Definition: mma_sm75.h:85
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 32 >, 32, uint4b_t, layout::RowMajor, uint4b_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate >::FragmentC Array< int, 2 > FragmentC
Definition: mma_sm75.h:1074
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, uint8_t, layout::RowMajor, uint8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate >::FragmentC Array< int, 2 > FragmentC
Definition: mma_sm75.h:614
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 32 >, 32, int4b_t, layout::RowMajor, uint4b_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd >::FragmentB Array< uint4b_t, 8 > FragmentB
Definition: mma_sm75.h:784
integer_subbyte< 4, true > int4b_t
4-bit Integer type 
Definition: integer_subbyte.h:155
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, uint8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate >::Operator OpMultiplyAddSaturate Operator
Definition: mma_sm75.h:504
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 32 >, 32, int4b_t, layout::RowMajor, int4b_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd >::FragmentB Array< int4b_t, 8 > FragmentB
Definition: mma_sm75.h:672
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, uint8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd >::FragmentC Array< int, 2 > FragmentC
Definition: mma_sm75.h:271
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 32 >, 32, int4b_t, layout::RowMajor, uint4b_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd >::ElementC int ElementC
Definition: mma_sm75.h:786
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 32 >, 32, int4b_t, layout::RowMajor, uint4b_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd >::FragmentC Array< int, 2 > FragmentC
Definition: mma_sm75.h:788
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 32 >, 32, int4b_t, layout::RowMajor, uint4b_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd >::Operator OpMultiplyAdd Operator
Definition: mma_sm75.h:790
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 16 >, 32, int8_t, layout::RowMajor, int8_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAdd >::ElementC int ElementC
Definition: mma_sm75.h:213
cutlass::arch::Mma< gemm::GemmShape< 8, 8, 32 >, 32, uint4b_t, layout::RowMajor, int4b_t, layout::ColumnMajor, int, layout::RowMajor, OpMultiplyAddSaturate >::ElementC int ElementC
Definition: mma_sm75.h:960