Skip to content

Commit 3631901

Browse files
Andrew Savonichevvladimirlaz
authored andcommitted
[SYCL] Add intel::kernel_args_restrict attribute
When applied to a device function that is invoked as a device kernel, the attribute is a hint to the compiler that no pointer argument to the kernel which is defined through an accessor (not USM), will alias any other pointer kernel argument that was defined through an accessor. This effect is equivalent to annotating restrict on all kernel pointer arguments in an OpenCL or SPIR-V kernel. Signed-off-by: Andrew Savonichev <[email protected]>
1 parent faecc73 commit 3631901

File tree

10 files changed

+185
-7
lines changed

10 files changed

+185
-7
lines changed

clang/include/clang/Basic/Attr.td

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1091,6 +1091,12 @@ def SYCLDeviceIndirectlyCallable : InheritableAttr {
10911091
let LangOpts = [SYCLIsDevice];
10921092
let Documentation = [SYCLDeviceIndirectlyCallableDocs];
10931093
}
1094+
def SYCLIntelKernelArgsRestrict : InheritableAttr {
1095+
let Spellings = [ CXX11<"intel", "kernel_args_restrict"> ];
1096+
let Subjects = SubjectList<[ FunctionLike ], ErrorDiag>;
1097+
let LangOpts = [ SYCLIsDevice, SYCLIsHost ];
1098+
let Documentation = [ SYCLIntelKernelArgsRestrictDocs ];
1099+
}
10941100

10951101
def C11NoReturn : InheritableAttr {
10961102
let Spellings = [Keyword<"_Noreturn">];

clang/include/clang/Basic/AttrDocs.td

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1835,6 +1835,43 @@ loads).
18351835
}];
18361836
}
18371837

1838+
def SYCLIntelKernelArgsRestrictDocs : Documentation {
1839+
let Category = DocCatVariable;
1840+
let Heading = "kernel_args_restrict";
1841+
let Content = [{
1842+
The attribute ``intel::kernel_args_restrict`` is legal on device functions, and
1843+
can be ignored on non-device functions. When applied to a function, lambda, or
1844+
function call operator (of a functor), the attribute is a hint to the compiler
1845+
equivalent to specifying the C99 restrict attribute on all pointer arguments or
1846+
the pointer member of any accessors, which are a function argument, lambda
1847+
capture, or functor member, of the callable to which the attribute was
1848+
applied. This effect is equivalent to annotating restrict on **all** kernel
1849+
pointer arguments in an OpenCL or SPIR-V kernel.
1850+
1851+
If ``intel::kernel_args_restrict`` is applied to a function called from a device
1852+
kernel, propagation of the attribute to any caller(s), including up to a kernel
1853+
boundary, is implementation defined and not guaranteed through this
1854+
extension. The attribute forms an unchecked assertion, in that implementations
1855+
do not need to check/confirm the pre-condition in any way. If a user applies
1856+
``inte::_kernel_args_restrict`` to a kernel, but there is in fact aliasing
1857+
between kernel pointer arguments at runtime, the behavior is undefined.
1858+
1859+
The attribute-token ``intel::kernel_args_restrict`` shall appear at most once in
1860+
each attribute-list and no attribute-argument-clause shall be present. The
1861+
attribute may be applied to the function-type in a function declaration. The
1862+
first declaration of a function shall specify the
1863+
``intel::kernel_args_restrict`` attribute if any declaration of that function
1864+
specifies the ``intel::kernel_args_restrict`` attribute. If a function is
1865+
declared with the ``intel::kernel_args_restrict`` attribute in one translation
1866+
unit and the same function is declared without the
1867+
``intel::kernel_args_restrict`` attribute in another translation unit, the
1868+
program is ill-formed and no diagnostic is required.
1869+
1870+
The ``intel::kernel_args_restrict`` attribute has an effect when applied to a
1871+
function, and no effect otherwise.
1872+
}];
1873+
}
1874+
18381875
def SYCLIntelFPGAIVDepAttrDocs : Documentation {
18391876
let Category = DocCatVariable;
18401877
let Heading = "ivdep";

clang/include/clang/Basic/AttributeCommonInfo.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,13 @@ class AttributeCommonInfo {
148148
return SyntaxUsed == AS_CXX11 || isAlignasAttribute();
149149
}
150150

151+
bool isAllowedOnLambdas() const {
152+
// FIXME: Eventually we want to do a list here populated via tablegen. But
153+
// we want C++ attributes to be permissible on Lambdas, and get propagated
154+
// to the call operator declaration.
155+
return getParsedKind() == AT_SYCLIntelKernelArgsRestrict;
156+
}
157+
151158
bool isC2xAttribute() const { return SyntaxUsed == AS_C2x; }
152159

153160
bool isKeywordAttribute() const {

clang/lib/CodeGen/CGCall.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2412,7 +2412,9 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
24122412
}
24132413
}
24142414

2415-
if (Arg->getType().isRestrictQualified())
2415+
if (Arg->getType().isRestrictQualified() ||
2416+
(CurCodeDecl &&
2417+
CurCodeDecl->hasAttr<SYCLIntelKernelArgsRestrictAttr>()))
24162418
AI->addAttr(llvm::Attribute::NoAlias);
24172419

24182420
// LLVM expects swifterror parameters to be used in very restricted

clang/lib/Sema/SemaDeclAttr.cpp

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6752,6 +6752,13 @@ static void handleMSAllocatorAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
67526752
// Top Level Sema Entry Points
67536753
//===----------------------------------------------------------------------===//
67546754

6755+
static bool IsDeclLambdaCallOperator(Decl *D) {
6756+
if (const auto *MD = dyn_cast<CXXMethodDecl>(D))
6757+
return MD->getParent()->isLambda() &&
6758+
MD->getOverloadedOperator() == OverloadedOperatorKind::OO_Call;
6759+
return false;
6760+
}
6761+
67556762
/// ProcessDeclAttribute - Apply the specific attribute to the specified decl if
67566763
/// the attribute applies to decls. If the attribute is a type attribute, just
67576764
/// silently ignore it if a GNU attribute.
@@ -6763,7 +6770,8 @@ static void ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D,
67636770

67646771
// Ignore C++11 attributes on declarator chunks: they appertain to the type
67656772
// instead.
6766-
if (AL.isCXX11Attribute() && !IncludeCXX11Attributes)
6773+
if (AL.isCXX11Attribute() && !IncludeCXX11Attributes &&
6774+
(!IsDeclLambdaCallOperator(D) || !AL.isAllowedOnLambdas()))
67676775
return;
67686776

67696777
// Unknown attributes are automatically warned on. Target-specific attributes
@@ -7516,6 +7524,9 @@ static void ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D,
75167524
case ParsedAttr::AT_RenderScriptKernel:
75177525
handleSimpleAttribute<RenderScriptKernelAttr>(S, D, AL);
75187526
break;
7527+
case ParsedAttr::AT_SYCLIntelKernelArgsRestrict:
7528+
handleSimpleAttribute<SYCLIntelKernelArgsRestrictAttr>(S, D, AL);
7529+
break;
75197530
// XRay attributes.
75207531
case ParsedAttr::AT_XRayInstrument:
75217532
handleSimpleAttribute<XRayInstrumentAttr>(S, D, AL);

clang/lib/Sema/SemaSYCL.cpp

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -411,12 +411,14 @@ class MarkDeviceFunction : public RecursiveASTVisitor<MarkDeviceFunction> {
411411
// Attributes applied to SYCLKernel are also included
412412
void CollectPossibleKernelAttributes(FunctionDecl *SYCLKernel,
413413
llvm::SmallPtrSet<Attr *, 4> &Attrs) {
414+
typedef std::pair<FunctionDecl *, FunctionDecl *> ChildParentPair;
414415
llvm::SmallPtrSet<FunctionDecl *, 16> Visited;
415-
llvm::SmallVector<FunctionDecl *, 16> WorkList;
416-
WorkList.push_back(SYCLKernel);
416+
llvm::SmallVector<ChildParentPair, 16> WorkList;
417+
WorkList.push_back({SYCLKernel, nullptr});
417418

418419
while (!WorkList.empty()) {
419-
FunctionDecl *FD = WorkList.back();
420+
FunctionDecl *FD = WorkList.back().first;
421+
FunctionDecl *ParentFD = WorkList.back().second;
420422
WorkList.pop_back();
421423
if (!Visited.insert(FD).second)
422424
continue; // We've already seen this Decl
@@ -425,6 +427,18 @@ class MarkDeviceFunction : public RecursiveASTVisitor<MarkDeviceFunction> {
425427
Attrs.insert(A);
426428
else if (auto *A = FD->getAttr<ReqdWorkGroupSizeAttr>())
427429
Attrs.insert(A);
430+
else if (auto *A = FD->getAttr<SYCLIntelKernelArgsRestrictAttr>()) {
431+
// Allow the intel::kernel_args_restrict only on the lambda (functor
432+
// object) function, that is called directly from a kernel (i.e. the one
433+
// passed to the parallel_for function). Emit a warning and ignore all
434+
// other cases.
435+
if (ParentFD == SYCLKernel) {
436+
Attrs.insert(A);
437+
} else {
438+
SemaRef.Diag(A->getLocation(), diag::warn_attribute_ignored) << A;
439+
FD->dropAttr<SYCLIntelKernelArgsRestrictAttr>();
440+
}
441+
}
428442

429443
// TODO: vec_len_hint should be handled here
430444

@@ -436,7 +450,7 @@ class MarkDeviceFunction : public RecursiveASTVisitor<MarkDeviceFunction> {
436450
if (auto *Callee = dyn_cast<FunctionDecl>(CI->getDecl())) {
437451
Callee = Callee->getCanonicalDecl();
438452
if (!Visited.count(Callee))
439-
WorkList.push_back(Callee);
453+
WorkList.push_back({Callee, FD});
440454
}
441455
}
442456
}
@@ -1296,6 +1310,10 @@ void Sema::MarkDevice(void) {
12961310
}
12971311
break;
12981312
}
1313+
case attr::Kind::SYCLIntelKernelArgsRestrict: {
1314+
SYCLKernel->addAttr(A);
1315+
break;
1316+
}
12991317
// TODO: vec_len_hint should be handled here
13001318
default:
13011319
// Seeing this means that CollectPossibleKernelAttributes was

clang/lib/Sema/SemaType.cpp

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -208,6 +208,11 @@ namespace {
208208
return chunkIndex == declarator.getNumTypeObjects();
209209
}
210210

211+
bool isProcessingLambdaExpr() const {
212+
return declarator.isFunctionDeclarator() &&
213+
declarator.getContext() == DeclaratorContext::LambdaExprContext;
214+
}
215+
211216
unsigned getCurrentChunkIndex() const {
212217
return chunkIndex;
213218
}
@@ -7582,7 +7587,8 @@ static void processTypeAttrs(TypeProcessingState &state, QualType &type,
75827587
switch (attr.getKind()) {
75837588
default:
75847589
// A C++11 attribute on a declarator chunk must appertain to a type.
7585-
if (attr.isCXX11Attribute() && TAL == TAL_DeclChunk) {
7590+
if (attr.isCXX11Attribute() && TAL == TAL_DeclChunk &&
7591+
(!state.isProcessingLambdaExpr() || !attr.isAllowedOnLambdas())) {
75867592
state.getSema().Diag(attr.getLoc(), diag::err_attribute_not_type_attr)
75877593
<< attr;
75887594
attr.setUsedAsTypeAttr();
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
// RUN: %clang %s -S -emit-llvm --sycl -o - | FileCheck %s
2+
3+
#include "CL/sycl.hpp"
4+
5+
constexpr auto sycl_read_write = cl::sycl::access::mode::read_write;
6+
constexpr auto sycl_global_buffer = cl::sycl::access::target::global_buffer;
7+
8+
template <typename Acc1Ty, typename Acc2Ty>
9+
struct foostr {
10+
Acc1Ty A;
11+
Acc2Ty B;
12+
foostr(Acc1Ty A, Acc2Ty B): A(A), B(B) {}
13+
[[intel::kernel_args_restrict]]
14+
void operator()() {
15+
A[0] = B[0];
16+
}
17+
};
18+
19+
int foo(int X) {
20+
int A[] = { 42 };
21+
int B[] = { 0 };
22+
{
23+
cl::sycl::queue Q;
24+
cl::sycl::buffer<int, 1> BufA(A, 1);
25+
cl::sycl::buffer<int, 1> BufB(B, 1);
26+
27+
// CHECK: define {{.*}} spir_kernel {{.*}}kernel_norestrict{{.*}}(i32 addrspace(1)* %{{.*}} i32 addrspace(1)* %{{.*}}
28+
29+
Q.submit([&](cl::sycl::handler& cgh) {
30+
auto AccA = BufA.get_access<sycl_read_write, sycl_global_buffer>(cgh);
31+
auto AccB = BufB.get_access<sycl_read_write, sycl_global_buffer>(cgh);
32+
cgh.single_task<class kernel_norestrict>(
33+
[=]() {
34+
AccB[0] = AccA[0];
35+
});
36+
});
37+
38+
// CHECK: define {{.*}} spir_kernel {{.*}}kernel_restrict{{.*}}(i32 addrspace(1)* noalias %{{.*}} i32 addrspace(1)* noalias %{{.*}}
39+
Q.submit([&](cl::sycl::handler& cgh) {
40+
auto AccA = BufA.get_access<sycl_read_write, sycl_global_buffer>(cgh);
41+
auto AccB = BufB.get_access<sycl_read_write, sycl_global_buffer>(cgh);
42+
cgh.single_task<class kernel_restrict>(
43+
[=]() [[intel::kernel_args_restrict]] {
44+
AccB[0] = AccA[0];
45+
});
46+
});
47+
48+
// CHECK: define {{.*}} spir_kernel {{.*}}kernel_restrict_struct{{.*}}(i32 addrspace(1)* noalias %{{.*}} i32 addrspace(1)* noalias %{{.*}}
49+
Q.submit([&](cl::sycl::handler& cgh) {
50+
auto AccA = BufA.get_access<sycl_read_write, sycl_global_buffer>(cgh);
51+
auto AccB = BufB.get_access<sycl_read_write, sycl_global_buffer>(cgh);
52+
foostr<decltype(AccA), decltype(AccB)> f(AccA, AccB);
53+
cgh.single_task<class kernel_restrict_struct>(f);
54+
});
55+
}
56+
return B[0];
57+
}

clang/test/Misc/pragma-attribute-supported-attributes-list.test

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,7 @@
131131
// CHECK-NEXT: ReturnsTwice (SubjectMatchRule_function)
132132
// CHECK-NEXT: SYCLDevice (SubjectMatchRule_function)
133133
// CHECK-NEXT: SYCLDeviceIndirectlyCallable (SubjectMatchRule_function)
134+
// CHECK-NEXT: SYCLIntelKernelArgsRestrict (SubjectMatchRule_hasType_functionType)
134135
// CHECK-NEXT: SYCLKernel (SubjectMatchRule_function)
135136
// CHECK-NEXT: ScopedLockable (SubjectMatchRule_record)
136137
// CHECK-NEXT: Section (SubjectMatchRule_function, SubjectMatchRule_variable_is_global, SubjectMatchRule_objc_method, SubjectMatchRule_objc_property)
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
// RUN: %clang %s -fsyntax-only --sycl -DCHECKDIAG -Xclang -verify
2+
// RUN: %clang %s -fsyntax-only -Xclang -ast-dump --sycl | FileCheck %s
3+
4+
[[intel::kernel_args_restrict]] // expected-warning{{'kernel_args_restrict' attribute ignored}}
5+
void func_ignore() {}
6+
7+
8+
struct Functor {
9+
[[intel::kernel_args_restrict]]
10+
void operator()() {}
11+
};
12+
13+
template <typename name, typename Func>
14+
__attribute__((sycl_kernel)) void kernel(Func kernelFunc) {
15+
kernelFunc();
16+
}
17+
18+
int main() {
19+
// CHECK-LABEL: FunctionDecl {{.*}} _ZTSZ4mainE12test_kernel1
20+
// CHECK: SYCLIntelKernelArgsRestrictAttr
21+
kernel<class test_kernel1>(
22+
Functor());
23+
24+
// CHECK-LABEL: FunctionDecl {{.*}} _ZTSZ4mainE12test_kernel2
25+
// CHECK: SYCLIntelKernelArgsRestrictAttr
26+
kernel<class test_kernel2>(
27+
[]() [[intel::kernel_args_restrict]] {});
28+
29+
// CHECK-LABEL: FunctionDecl {{.*}} _ZTSZ4mainE12test_kernel3
30+
// CHECK-NOT: SYCLIntelKernelArgsRestrictAttr
31+
kernel<class test_kernel3>(
32+
[]() {func_ignore();});
33+
}

0 commit comments

Comments
 (0)