Skip to content

Commit 2bc695e

Browse files
committed
compile guard
Signed-off-by: Zhongbo Zhu <zhongboz@nvidia.com>
1 parent fc42825 commit 2bc695e

File tree

1 file changed

+18
-0
lines changed

1 file changed

+18
-0
lines changed

transformer_engine/common/hadamard_transform/row_cast_col_hadamard_transform_cast_fusion.cu

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,24 @@ __global__ static void row_col_rht_gemm_device(
179179
float const* c_global_amax,
180180
const size_t* rng_state) {
181181
using namespace cute;
182+
183+
// Abort immediately if compilation is not supported
184+
constexpr bool is_blackwell_arch = ARCH_BLACKWELL_FAMILY;
185+
if constexpr (!is_blackwell_arch) {
186+
NVTE_DEVICE_ERROR(
187+
"row_col_rht_gemm_device is only supported on Blackwell "
188+
"with architecture-specific compilation. "
189+
"Try recompiling with sm_100a or similar.");
190+
return;
191+
}
192+
static_assert(kEnableRHTColQuant_ || kEnableRowQuant_,
193+
"row_col_rht_gemm_device must generate row-wise "
194+
"and/or column-wise output.");
195+
#if !defined(CUTLASS_ARCH_CLC_ENABLED)
196+
CUTLASS_NOT_IMPLEMENTED();
197+
return;
198+
#endif
199+
182200
using X = Underscore;
183201
// static constexpr bool kApplyStochasticRounding = true;
184202
using ElementAccumulator = float;

0 commit comments

Comments
 (0)