Skip to content

Commit ec0821c

Browse files
authored
Enable multiple of 16 padding for INT8 Tensor Cores (#1982)
1 parent a1ce077 commit ec0821c

File tree

2 files changed

+3
-3
lines changed

2 files changed

+3
-3
lines changed

src/cuda/utils.cc

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -182,9 +182,6 @@ namespace ctranslate2 {
182182

183183
bool gpu_supports_int8(int device) {
184184
const cudaDeviceProp& device_prop = get_device_properties(device);
185-
// Disable INT8 for sm120: https://github.com/OpenNMT/CTranslate2/issues/1865
186-
if (device_prop.major == 12 && device_prop.minor == 0)
187-
return false;
188185
return device_prop.major > 6 || (device_prop.major == 6 && device_prop.minor == 1);
189186
}
190187

src/types.cc

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -350,6 +350,9 @@ namespace ctranslate2 {
350350
if ((compute_type == ComputeType::FLOAT16 || compute_type == ComputeType::BFLOAT16)
351351
&& cuda::gpu_has_fp16_tensor_cores(device_index))
352352
return 8;
353+
else if ((compute_type == ComputeType::INT8_FLOAT16 || compute_type == ComputeType::INT8_BFLOAT16 || compute_type == ComputeType::INT8_FLOAT32)
354+
&& cuda::gpu_has_int8_tensor_cores(device_index))
355+
return 16;
353356
}
354357
#else
355358
(void)compute_type;

0 commit comments

Comments
 (0)