Skip to content

Commit 8bb39dd

Browse files
Added support for prefetch ASM migration
1 parent c0852a0 commit 8bb39dd

File tree

6 files changed

+153
-8
lines changed

6 files changed

+153
-8
lines changed

clang/lib/DPCT/RulesAsm/AsmMigration.cpp

Lines changed: 42 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -536,6 +536,7 @@ bool SYCLGenBase::emitBuiltinType(const InlineAsmBuiltinType *T) {
536536
case InlineAsmBuiltinType::u16x2: OS() << MapNames::getClNamespace() + "ushort2"; break;
537537
case InlineAsmBuiltinType::bf16: OS() << MapNames::getClNamespace() + "ext::oneapi::bfloat16"; break;
538538
case InlineAsmBuiltinType::f16x2: OS() << MapNames::getClNamespace() + "half2"; break;
539+
case InlineAsmBuiltinType::voidType: OS() << "void"; break;
539540
case InlineAsmBuiltinType::e4m3:
540541
case InlineAsmBuiltinType::e5m2:
541542
case InlineAsmBuiltinType::tf32:
@@ -588,8 +589,9 @@ bool SYCLGenBase::emitVariableDeclaration(const InlineAsmVarDecl *D) {
588589
}
589590

590591
bool SYCLGenBase::emitAddressExpr(const InlineAsmAddressExpr *Dst) {
591-
// Address expression only support ld/st instructions.
592-
if (!CurrInst || !CurrInst->is(asmtok::op_st, asmtok::op_ld, asmtok::op_atom))
592+
// Address expression only support ld/st & atom instructions.
593+
if (!CurrInst || !CurrInst->is(asmtok::op_st, asmtok::op_ld, asmtok::op_atom,
594+
asmtok::op_prefetch))
593595
return SYCLGenError();
594596
std::string Type;
595597
if (tryEmitType(Type, CurrInst->getType(0)))
@@ -617,7 +619,7 @@ bool SYCLGenBase::emitAddressExpr(const InlineAsmAddressExpr *Dst) {
617619
std::string Reg;
618620
if (tryEmitStmt(Reg, Dst->getSymbol()))
619621
return SYCLGenSuccess();
620-
if (CanSuppressCast(Dst->getSymbol()))
622+
if (CurrInst->is(asmtok::op_prefetch) || CanSuppressCast(Dst->getSymbol()))
621623
OS() << llvm::formatv("{0}", Reg);
622624
else
623625
OS() << llvm::formatv("(({0} *)(uintptr_t){1})", Type, Reg);
@@ -1281,6 +1283,43 @@ class SYCLGen : public SYCLGenBase {
12811283
return SYCLGenSuccess();
12821284
}
12831285

1286+
bool handle_prefetch(const InlineAsmInstruction *Inst) override {
1287+
if (Inst->getNumInputOperands() != 1)
1288+
return SYCLGenError();
1289+
1290+
AsmStateSpace SS = Inst->getStateSpace();
1291+
if (SS != AsmStateSpace::S_global && SS != AsmStateSpace::none)
1292+
return SYCLGenError();
1293+
1294+
if (!(Inst->hasAttr(InstAttr::L1) || Inst->hasAttr(InstAttr::L2)))
1295+
return SYCLGenError();
1296+
1297+
std::string PrefetchHint;
1298+
if (Inst->hasAttr(InstAttr::L1))
1299+
PrefetchHint = "L1";
1300+
else if (Inst->hasAttr(InstAttr::L2))
1301+
PrefetchHint = "L2";
1302+
else
1303+
return SYCLGenError();
1304+
1305+
llvm::SaveAndRestore<const InlineAsmInstruction *> Store(CurrInst);
1306+
CurrInst = Inst;
1307+
const auto *Src =
1308+
dyn_cast_or_null<InlineAsmAddressExpr>(Inst->getInputOperand(0));
1309+
if (!Src)
1310+
return false;
1311+
1312+
OS() << MapNames::getExpNamespace() << "prefetch(";
1313+
if (emitStmt(Src))
1314+
return SYCLGenError();
1315+
OS() << ", ";
1316+
OS() << MapNames::getExpNamespace() << "properties{";
1317+
OS() << MapNames::getExpNamespace() << "prefetch_hint_" << PrefetchHint;
1318+
OS() << "})";
1319+
endstmt();
1320+
return SYCLGenSuccess();
1321+
}
1322+
12841323
StringRef GetWiderTypeAsString(const InlineAsmBuiltinType *Type) const {
12851324
switch (Type->getKind()) {
12861325
case InlineAsmBuiltinType::s16:

clang/lib/DPCT/RulesAsm/Parser/AsmNodes.h

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@ class InlineAsmBuiltinType : public InlineAsmType {
8383
enum TypeKind {
8484
#define BUILTIN_TYPE(X, Y) X,
8585
#include "AsmTokenKinds.def"
86+
voidType,
8687
NUM_TYPES
8788
};
8889

@@ -222,9 +223,7 @@ class InlineAsmVarDecl : public InlineAsmNamedDecl {
222223
: InlineAsmNamedDecl(VariableDeclClass, Name), StateSpace(SS),
223224
Type(Type) {}
224225

225-
AsmStateSpace getStorageClass() const {
226-
return StateSpace;
227-
}
226+
AsmStateSpace getStorageClass() const { return StateSpace; }
228227

229228
void setInlineAsmOp(const Expr *Val) { InlineAsmOp = Val; }
230229
const Expr *getInlineAsmOp() const { return InlineAsmOp; }
@@ -389,6 +388,14 @@ class InlineAsmInstruction : public InlineAsmStmt {
389388
static bool classof(const InlineAsmStmt *S) {
390389
return InstructionClass <= S->getStmtClass();
391390
}
391+
AsmStateSpace getStateSpace() const {
392+
AsmStateSpace SS = AsmStateSpace::none;
393+
394+
if (StateSpace.has_value()) {
395+
return StateSpace.value();
396+
}
397+
return SS;
398+
}
392399
};
393400

394401
/// This represents a device conditional instruction, e.g. instruction @%p

clang/lib/DPCT/RulesAsm/Parser/AsmParser.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -377,6 +377,12 @@ InlineAsmStmtResult InlineAsmParser::ParseInstruction() {
377377
Ops.push_back(Out.get());
378378
Out = nullptr;
379379
}
380+
// prefetch{.state}.{level} [%0] has only one input operand and no type.
381+
if (Opcode->getTokenID() == asmtok::op_prefetch) {
382+
Ops.push_back(Out.get());
383+
Out = nullptr;
384+
Types.push_back(Context.getBuiltinType(InlineAsmBuiltinType::voidType));
385+
}
380386

381387
return ::new (Context) InlineAsmInstruction(Opcode, StateSpace, Attrs, Types,
382388
Out.get(), Pred.get(), Ops);

clang/lib/DPCT/RulesAsm/Parser/AsmTokenKinds.def

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
#endif
1313

1414
#ifndef PUNCTUATOR
15-
#define PUNCTUATOR(X,Y) TOK(X)
15+
#define PUNCTUATOR(X, Y) TOK(X)
1616
#endif
1717

1818
#ifndef KEYWORD
@@ -404,6 +404,8 @@ MODIFIER(idx, ".idx")
404404
MODIFIER(bfly, ".bfly")
405405
MODIFIER(sc, ".sc")
406406
MODIFIER(gl, ".gl")
407+
MODIFIER(L1, ".L1")
408+
MODIFIER(L2, ".L2")
407409

408410
#undef LINKAGE
409411
#undef TARGET

clang/lib/DPCT/SrcAPI/APINames_ASM.inc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ ENTRY("not", "not", true, NO_FLAG, P1, "Successful")
100100
ENTRY("or", "or", true, NO_FLAG, P1, "Successful")
101101
ENTRY("pmevent", "pmevent", false, NO_FLAG, P1, "Comment")
102102
ENTRY("popc", "popc", true, NO_FLAG, P1, "Successful")
103-
ENTRY("prefetch", "prefetch", false, NO_FLAG, P1, "Comment")
103+
ENTRY("prefetch", "prefetch", true, NO_FLAG, P1, "Partial")
104104
ENTRY("prefetchu", "prefetchu", false, NO_FLAG, P1, "Comment")
105105
ENTRY("prmt", "prmt", false, NO_FLAG, P1, "Comment")
106106
ENTRY("rcp", "rcp", true, NO_FLAG, P1, "Successful")

clang/test/dpct/asm/prefetch.cu

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
// UNSUPPORTED: cuda-8.0, cuda-9.0, cuda-9.1, cuda-9.2, cuda-10.0, cuda-10.1, cuda-10.2, cuda-11.0, cuda-11.1, cuda-11.2, cuda-11.3, cuda-11.4, cuda-11.5, cuda-11.6, cuda-11.7
2+
// UNSUPPORTED: v8.0, v9.0, v9.1, v9.2, v10.0, v10.1, v10.2, v11.0, v11.1, v11.2, v11.3, v11.4, v11.5, v11.6, v11.7
3+
// RUN: dpct --format-range=none -out-root %T/prefetch %s --cuda-include-path="%cuda-path/include" -- -std=c++14 -x cuda --cuda-host-only
4+
// RUN: FileCheck %s --match-full-lines --input-file %T/prefetch/prefetch.dp.cpp
5+
// RUN: %if BUILD_LIT %{icpx -c -DBUILD_TEST -fsycl %T/prefetch/prefetch.dp.cpp -o %T/prefetch/prefetch.dp.o %}
6+
7+
// clang-format off
8+
#include <cuda_runtime.h>
9+
10+
/*
11+
Supported syntax:
12+
-----------------
13+
prefetch.level [a]; // prefetch to generic addr space cache
14+
prefetch.global.level [a]; // prefetch to global cache
15+
16+
Unsupported syntax:
17+
-------------------
18+
prefetch.local.level
19+
prefetch.global.level::eviction_priority [a]; // prefetch to data cache
20+
prefetchu.L1 [a]; // prefetch to uniform cache
21+
prefetch{.tensormap_space}.tensormap [a]; // prefetch the tensormap
22+
23+
.level = { .L1, .L2 };
24+
.level::eviction_priority = { .L2::evict_last, .L2::evict_normal };
25+
.tensormap_space = { .const, .param };
26+
*/
27+
28+
__global__ void prefetch(int *arr) {
29+
// CHECK: sycl::ext::oneapi::experimental::prefetch(arr, sycl::ext::oneapi::experimental::properties{sycl::ext::oneapi::experimental::prefetch_hint_L1});
30+
asm volatile ("prefetch.L1 [%0];" : : "l"(arr));
31+
// CHECK: sycl::ext::oneapi::experimental::prefetch(arr, sycl::ext::oneapi::experimental::properties{sycl::ext::oneapi::experimental::prefetch_hint_L2});
32+
asm volatile ("prefetch.L2 [%0];" : : "l"(arr));
33+
#ifndef BUILD_TEST
34+
// CHECK: /*
35+
// CHECK-NEXT: DPCT1053:{{.*}} Migration of device assembly code is not supported.
36+
// CHECK-NEXT: */
37+
asm volatile ("prefetch.L2::evict_last [%0];" : : "l"(arr));
38+
// CHECK: /*
39+
// CHECK-NEXT: DPCT1053:{{.*}} Migration of device assembly code is not supported.
40+
// CHECK-NEXT: */
41+
asm volatile ("prefetch.L2::evict_normal [%0];" : : "l"(arr));
42+
#endif // BUILD_TEST
43+
44+
// CHECK: sycl::ext::oneapi::experimental::prefetch(arr, sycl::ext::oneapi::experimental::properties{sycl::ext::oneapi::experimental::prefetch_hint_L1});
45+
asm volatile ("prefetch.global.L1 [%0];" : : "l"(arr));
46+
// CHECK: sycl::ext::oneapi::experimental::prefetch(arr, sycl::ext::oneapi::experimental::properties{sycl::ext::oneapi::experimental::prefetch_hint_L2});
47+
asm volatile ("prefetch.global.L2 [%0];" : : "l"(arr));
48+
#ifndef BUILD_TEST
49+
// CHECK: /*
50+
// CHECK-NEXT: DPCT1053:{{.*}} Migration of device assembly code is not supported.
51+
// CHECK-NEXT: */
52+
asm volatile ("prefetch.global.L2::evict_last [%0];" : : "l"(arr));
53+
// CHECK: /*
54+
// CHECK-NEXT: DPCT1053:{{.*}} Migration of device assembly code is not supported.
55+
// CHECK-NEXT: */
56+
asm volatile ("prefetch.global.L2::evict_normal [%0];" : : "l"(arr));
57+
58+
// CHECK: /*
59+
// CHECK-NEXT: DPCT1053:{{.*}} Migration of device assembly code is not supported.
60+
// CHECK-NEXT: */
61+
asm volatile ("prefetch.local.L1 [%0];" : : "l"(arr));
62+
// CHECK: /*
63+
// CHECK-NEXT: DPCT1053:{{.*}} Migration of device assembly code is not supported.
64+
// CHECK-NEXT: */
65+
asm volatile ("prefetch.local.L2 [%0];" : : "l"(arr));
66+
// CHECK: /*
67+
// CHECK-NEXT: DPCT1053:{{.*}} Migration of device assembly code is not supported.
68+
// CHECK-NEXT: */
69+
asm volatile ("prefetch.local.L2::evict_last [%0];" : : "l"(arr));
70+
// CHECK: /*
71+
// CHECK-NEXT: DPCT1053:{{.*}} Migration of device assembly code is not supported.
72+
// CHECK-NEXT: */
73+
asm volatile ("prefetch.local.L2::evict_normal [%0];" : : "l"(arr));
74+
75+
// CHECK: /*
76+
// CHECK-NEXT: DPCT1053:{{.*}} Migration of device assembly code is not supported.
77+
// CHECK-NEXT: */
78+
asm volatile ("prefetchu.L1 [%0];" : : "l"(arr));
79+
80+
// CHECK: /*
81+
// CHECK-NEXT: DPCT1053:{{.*}} Migration of device assembly code is not supported.
82+
// CHECK-NEXT: */
83+
asm volatile ("prefetch.const.tensormap [%0];" : : "l"(arr));
84+
// CHECK: /*
85+
// CHECK-NEXT: DPCT1053:{{.*}} Migration of device assembly code is not supported.
86+
// CHECK-NEXT: */
87+
asm volatile ("prefetch.param.tensormap [%0];" : : "l"(arr));
88+
#endif // BUILD_TEST
89+
}
90+
91+
// clang-format on

0 commit comments

Comments
 (0)