|
8 | 8 | #include <cstdint> |
9 | 9 | #include <cuda_runtime.h> |
10 | 10 |
|
11 | | -// CHECK: void atomicAddKernel(int* lock, int val) { |
12 | | -// CHECK-NEXT: *lock += val; |
| 11 | +// CHECK: void atomicAddKernel(int* lock, int val, const sycl::nd_item<3> &item_ct1) { |
| 12 | +// CHECK-NEXT: *lock = sycl::reduce_over_group(item_ct1.get_group(), val,sycl::plus<>()); |
13 | 13 | // CHECK-NEXT:} |
14 | 14 | __global__ void atomicAddKernel(int* lock, int val) { |
15 | 15 | asm volatile("red.relaxed.gpu.global.add.s32 [%0], %1;\n" |
16 | 16 | ::"l"(lock),"r"(val):"memory"); |
17 | 17 | } |
18 | 18 |
|
19 | | -// CHECK: void atomicOrKernel(uint32_t* lock, uint32_t val) { |
20 | | -// CHECK-NEXT: *lock |= val; |
| 19 | +// CHECK: void atomicOrKernel(uint32_t* lock, uint32_t val, |
| 20 | +// CHECK-NEXT: const sycl::nd_item<3> &item_ct1) { |
| 21 | +// CHECK-NEXT: *lock = sycl::reduce_over_group(item_ct1.get_group(), val,sycl::bit_or<>()); |
21 | 22 | // CHECK-NEXT:} |
22 | 23 | __global__ void atomicOrKernel(uint32_t* lock, uint32_t val) { |
23 | 24 | asm volatile("red.relaxed.gpu.global.or.b32 [%0], %1;\n" |
24 | 25 | ::"l"(lock),"r"(val):"memory"); |
25 | 26 | } |
26 | 27 |
|
27 | | -// CHECK: void atomicXorKernel(uint32_t* lock, uint32_t val) { |
28 | | -// CHECK-NEXT: *lock ^= val; |
| 28 | +// CHECK: void atomicXorKernel(uint32_t* lock, uint32_t val, |
| 29 | +// CHECK-NEXT: const sycl::nd_item<3> &item_ct1) { |
| 30 | +// CHECK-NEXT: *lock = sycl::reduce_over_group(item_ct1.get_group(), val,sycl::bit_xor<>()); |
29 | 31 | // CHECK-NEXT:} |
30 | 32 | __global__ void atomicXorKernel(uint32_t* lock, uint32_t val) { |
31 | 33 | asm volatile("red.relaxed.gpu.global.xor.b32 [%0], %1;\n" |
32 | 34 | ::"l"(lock),"r"(val):"memory"); |
33 | 35 | } |
34 | 36 |
|
35 | | -// CHECK: void atomicAndKernel(uint32_t* lock, uint32_t val) { |
36 | | -// CHECK-NEXT: *lock &= val; |
37 | | -// CHECK-NEXT:} |
| 37 | +// CHECK: void atomicAndKernel(uint32_t* lock, uint32_t val, |
| 38 | +// CHECK-NEXT: const sycl::nd_item<3> &item_ct1) { |
| 39 | +// CHECK-NEXT: *lock = sycl::reduce_over_group(item_ct1.get_group(), val,sycl::bit_and<>()); |
| 40 | +// CHECK-NEXT: } |
38 | 41 | __global__ void atomicAndKernel(uint32_t* lock, uint32_t val) { |
39 | 42 | asm volatile("red.relaxed.gpu.global.and.b32 [%0], %1;\n" |
40 | 43 | ::"l"(lock),"r"(val):"memory"); |
41 | 44 | } |
42 | 45 |
|
43 | | -// CHECK: void atomicDecKernel(uint32_t* lock, uint32_t val) { |
44 | | -// CHECK-NEXT: *lock = (*lock == 0 || *lock > val) ? val : *lock - 1; |
45 | | -// CHECK-NEXT: } |
46 | | -__global__ void atomicDecKernel(uint32_t* lock, uint32_t val) { |
47 | | - asm volatile("red.relaxed.gpu.global.dec.u32 [%0], %1;\n" |
48 | | - ::"l"(lock),"r"(val):"memory"); |
49 | | -} |
50 | | - |
51 | | -// CHECK: void atomicMaxKernel(uint32_t* lock, uint32_t val) { |
52 | | -// CHECK-NEXT: *lock = sycl::max(*lock, val); |
| 46 | +// CHECK: void atomicMaxKernel(uint32_t* lock, uint32_t val, |
| 47 | +// CHECK-NEXT: const sycl::nd_item<3> &item_ct1) { |
| 48 | +// CHECK-NEXT: *lock = sycl::reduce_over_group(item_ct1.get_group(), val,sycl::maximum<>()); |
53 | 49 | // CHECK-NEXT: } |
54 | 50 | __global__ void atomicMaxKernel(uint32_t* lock, uint32_t val) { |
55 | 51 | asm volatile("red.relaxed.gpu.global.max.u32 [%0], %1;\n" |
56 | 52 | ::"l"(lock),"r"(val):"memory"); |
57 | 53 | } |
58 | 54 |
|
59 | | -// CHECK: void atomicMinKernel(uint32_t* lock, uint32_t val) { |
60 | | -// CHECK-NEXT: *lock = sycl::min(*lock, val); |
| 55 | +// CHECK: void atomicMinKernel(uint32_t* lock, uint32_t val, |
| 56 | +// CHECK-NEXT: const sycl::nd_item<3> &item_ct1) { |
| 57 | +// CHECK-NEXT: *lock = sycl::reduce_over_group(item_ct1.get_group(), val,sycl::minimum<>()); |
61 | 58 | // CHECK-NEXT: } |
62 | 59 | __global__ void atomicMinKernel(uint32_t* lock, uint32_t val) { |
63 | 60 | asm volatile("red.relaxed.gpu.global.min.u32 [%0], %1;\n" |
|
0 commit comments