Enable multiple of 16 padding for INT8 Tensor Cores (#1982)

Purfview · web-flow · commit ec0821c33af0 · 2026-01-21T21:16:02.000+01:00
diff --git a/src/cuda/utils.cc b/src/cuda/utils.cc
@@ -182,9 +182,6 @@ namespace ctranslate2 {
 
     bool gpu_supports_int8(int device) {
       const cudaDeviceProp& device_prop = get_device_properties(device);
-      // Disable INT8 for sm120: https://github.com/OpenNMT/CTranslate2/issues/1865
-      if (device_prop.major == 12 && device_prop.minor == 0)
-        return false;
       return device_prop.major > 6 || (device_prop.major == 6 && device_prop.minor == 1);
     }
 
diff --git a/src/types.cc b/src/types.cc
@@ -350,6 +350,9 @@ namespace ctranslate2 {
       if ((compute_type == ComputeType::FLOAT16 || compute_type == ComputeType::BFLOAT16)
           && cuda::gpu_has_fp16_tensor_cores(device_index))
         return 8;
+      else if ((compute_type == ComputeType::INT8_FLOAT16 || compute_type == ComputeType::INT8_BFLOAT16 || compute_type == ComputeType::INT8_FLOAT32)
+          && cuda::gpu_has_int8_tensor_cores(device_index))
+        return 16;
     }
 #else
     (void)compute_type;

Original file line number	Diff line number	Diff line change
`@@ -182,9 +182,6 @@ namespace ctranslate2 {`
`182`	`182`
`183`	`183`	`bool gpu_supports_int8(int device) {`
`184`	`184`	`const cudaDeviceProp& device_prop = get_device_properties(device);`
`185`		`- // Disable INT8 for sm120: https://github.com/OpenNMT/CTranslate2/issues/1865`
`186`		`- if (device_prop.major == 12 && device_prop.minor == 0)`
`187`		`- return false;`
`188`	`185`	`return device_prop.major > 6 \|\| (device_prop.major == 6 && device_prop.minor == 1);`
`189`	`186`	`}`
`190`	`187`