Skip to content

Commit c1988a8

Browse files
committed
Refine migration of red.relaxed.gpu.global
Signed-off-by: chenwei.sun <[email protected]>
1 parent 7f5af18 commit c1988a8

File tree

2 files changed

+34
-54
lines changed

2 files changed

+34
-54
lines changed

clang/lib/DPCT/RulesAsm/AsmMigration.cpp

Lines changed: 16 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -2730,7 +2730,8 @@ class SYCLGen : public SYCLGenBase {
27302730
const auto *Type = dyn_cast<InlineAsmBuiltinType>(Inst->getType(0));
27312731
if (!Type || (Type->getKind() != InlineAsmBuiltinType::s32 &&
27322732
Type->getKind() != InlineAsmBuiltinType::b32 &&
2733-
Type->getKind() != InlineAsmBuiltinType::u32))
2733+
Type->getKind() != InlineAsmBuiltinType::u32 &&
2734+
Type->getKind() != InlineAsmBuiltinType::f32))
27342735
return SYCLGenError();
27352736

27362737
if (emitStmt(Dst))
@@ -2743,45 +2744,27 @@ class SYCLGen : public SYCLGenBase {
27432744
if (tryEmitStmt(b, Src))
27442745
return SYCLGenError();
27452746

2747+
OS() << " = ";
2748+
OS() << MapNames::getClNamespace() + "reduce_over_group(";
2749+
OS() << DpctGlobalInfo::getItem(GAS) << ".get_group(), " << b << ",";
2750+
27462751
if (Inst->hasAttr(InstAttr::add))
2747-
OS() << " += ";
2752+
OS() << MapNames::getClNamespace() + "plus<>()";
27482753
else if (Inst->hasAttr(InstAttr::op_or))
2749-
OS() << " |= ";
2754+
OS() << MapNames::getClNamespace() + "bit_or<>()";
27502755
else if (Inst->hasAttr(InstAttr::op_xor))
2751-
OS() << " ^= ";
2756+
OS() << MapNames::getClNamespace() + "bit_xor<>()";
27522757
else if (Inst->hasAttr(InstAttr::op_and))
2753-
OS() << " &= ";
2754-
else if (Inst->hasAttr(InstAttr::dec)) {
2755-
OS() << " = ";
2756-
OS() << '(';
2757-
OS() << a << " == 0 || " << a << " > " << b << ") ? " << b << " : " << a
2758-
<< " - 1";
2759-
endstmt();
2760-
return SYCLGenSuccess();
2761-
2762-
} else if (Inst->hasAttr(InstAttr::inc)) {
2763-
OS() << " = ";
2764-
OS() << '(';
2765-
OS() << a << " >= " << b << ") ? 0 : " << a << " + 1";
2766-
endstmt();
2767-
return SYCLGenSuccess();
2768-
} else if (Inst->hasAttr(InstAttr::max)) {
2769-
OS() << " = " << MapNames::getClNamespace() + "max(" << a << ", " << b
2770-
<< ")";
2771-
endstmt();
2772-
return SYCLGenSuccess();
2773-
} else if (Inst->hasAttr(InstAttr::min)) {
2774-
OS() << " = " << MapNames::getClNamespace() + "min(" << a << ", " << b
2775-
<< ")";
2776-
endstmt();
2777-
return SYCLGenSuccess();
2778-
} else
2758+
OS() << MapNames::getClNamespace() + "bit_and<>()";
2759+
else if (Inst->hasAttr(InstAttr::min))
2760+
OS() << MapNames::getClNamespace() + "minimum<>()";
2761+
else if (Inst->hasAttr(InstAttr::max))
2762+
OS() << MapNames::getClNamespace() + "maximum<>()";
2763+
else
27792764
return SYCLGenError();
27802765

2781-
if (emitStmt(Src))
2782-
return SYCLGenError();
2766+
OS() << ")";
27832767
endstmt();
2784-
27852768
return SYCLGenSuccess();
27862769
}
27872770
};

clang/test/dpct/asm/red.cu

Lines changed: 18 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -8,56 +8,53 @@
88
#include <cstdint>
99
#include <cuda_runtime.h>
1010

11-
// CHECK: void atomicAddKernel(int* lock, int val) {
12-
// CHECK-NEXT: *lock += val;
11+
// CHECK: void atomicAddKernel(int* lock, int val, const sycl::nd_item<3> &item_ct1) {
12+
// CHECK-NEXT: *lock = sycl::reduce_over_group(item_ct1.get_group(), val,sycl::plus<>());
1313
// CHECK-NEXT:}
1414
__global__ void atomicAddKernel(int* lock, int val) {
1515
asm volatile("red.relaxed.gpu.global.add.s32 [%0], %1;\n"
1616
::"l"(lock),"r"(val):"memory");
1717
}
1818

19-
// CHECK: void atomicOrKernel(uint32_t* lock, uint32_t val) {
20-
// CHECK-NEXT: *lock |= val;
19+
// CHECK: void atomicOrKernel(uint32_t* lock, uint32_t val,
20+
// CHECK-NEXT: const sycl::nd_item<3> &item_ct1) {
21+
// CHECK-NEXT: *lock = sycl::reduce_over_group(item_ct1.get_group(), val,sycl::bit_or<>());
2122
// CHECK-NEXT:}
2223
__global__ void atomicOrKernel(uint32_t* lock, uint32_t val) {
2324
asm volatile("red.relaxed.gpu.global.or.b32 [%0], %1;\n"
2425
::"l"(lock),"r"(val):"memory");
2526
}
2627

27-
// CHECK: void atomicXorKernel(uint32_t* lock, uint32_t val) {
28-
// CHECK-NEXT: *lock ^= val;
28+
// CHECK: void atomicXorKernel(uint32_t* lock, uint32_t val,
29+
// CHECK-NEXT: const sycl::nd_item<3> &item_ct1) {
30+
// CHECK-NEXT: *lock = sycl::reduce_over_group(item_ct1.get_group(), val,sycl::bit_xor<>());
2931
// CHECK-NEXT:}
3032
__global__ void atomicXorKernel(uint32_t* lock, uint32_t val) {
3133
asm volatile("red.relaxed.gpu.global.xor.b32 [%0], %1;\n"
3234
::"l"(lock),"r"(val):"memory");
3335
}
3436

35-
// CHECK: void atomicAndKernel(uint32_t* lock, uint32_t val) {
36-
// CHECK-NEXT: *lock &= val;
37-
// CHECK-NEXT:}
37+
// CHECK: void atomicAndKernel(uint32_t* lock, uint32_t val,
38+
// CHECK-NEXT: const sycl::nd_item<3> &item_ct1) {
39+
// CHECK-NEXT: *lock = sycl::reduce_over_group(item_ct1.get_group(), val,sycl::bit_and<>());
40+
// CHECK-NEXT: }
3841
__global__ void atomicAndKernel(uint32_t* lock, uint32_t val) {
3942
asm volatile("red.relaxed.gpu.global.and.b32 [%0], %1;\n"
4043
::"l"(lock),"r"(val):"memory");
4144
}
4245

43-
// CHECK: void atomicDecKernel(uint32_t* lock, uint32_t val) {
44-
// CHECK-NEXT: *lock = (*lock == 0 || *lock > val) ? val : *lock - 1;
45-
// CHECK-NEXT: }
46-
__global__ void atomicDecKernel(uint32_t* lock, uint32_t val) {
47-
asm volatile("red.relaxed.gpu.global.dec.u32 [%0], %1;\n"
48-
::"l"(lock),"r"(val):"memory");
49-
}
50-
51-
// CHECK: void atomicMaxKernel(uint32_t* lock, uint32_t val) {
52-
// CHECK-NEXT: *lock = sycl::max(*lock, val);
46+
// CHECK: void atomicMaxKernel(uint32_t* lock, uint32_t val,
47+
// CHECK-NEXT: const sycl::nd_item<3> &item_ct1) {
48+
// CHECK-NEXT: *lock = sycl::reduce_over_group(item_ct1.get_group(), val,sycl::maximum<>());
5349
// CHECK-NEXT: }
5450
__global__ void atomicMaxKernel(uint32_t* lock, uint32_t val) {
5551
asm volatile("red.relaxed.gpu.global.max.u32 [%0], %1;\n"
5652
::"l"(lock),"r"(val):"memory");
5753
}
5854

59-
// CHECK: void atomicMinKernel(uint32_t* lock, uint32_t val) {
60-
// CHECK-NEXT: *lock = sycl::min(*lock, val);
55+
// CHECK: void atomicMinKernel(uint32_t* lock, uint32_t val,
56+
// CHECK-NEXT: const sycl::nd_item<3> &item_ct1) {
57+
// CHECK-NEXT: *lock = sycl::reduce_over_group(item_ct1.get_group(), val,sycl::minimum<>());
6158
// CHECK-NEXT: }
6259
__global__ void atomicMinKernel(uint32_t* lock, uint32_t val) {
6360
asm volatile("red.relaxed.gpu.global.min.u32 [%0], %1;\n"

0 commit comments

Comments
 (0)