diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index 691b0f3f6922b..b8c7451b51ebb 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -2384,6 +2384,18 @@ def SYCLIntelFPGANofusion : StmtAttr { let Documentation = [SYCLIntelFPGANofusionAttrDocs]; } +def SYCLIntelFPGAMaxReinvocationDelay : StmtAttr { + let Spellings = [CXX11<"intel", "max_reinvocation_delay">]; + let Subjects = SubjectList<[ForStmt, CXXForRangeStmt, WhileStmt, DoStmt], + ErrorDiag, "'for', 'while', and 'do' statements">; + let Args = [ExprArgument<"NExpr">]; + let LangOpts = [SYCLIsDevice, SilentlyIgnoreSYCLIsHost]; + let IsStmtDependent = 1; + let Documentation = [SYCLIntelFPGAMaxReinvocationDelayAttrDocs]; +} +def : MutualExclusions<[SYCLIntelFPGADisableLoopPipelining, + SYCLIntelFPGAMaxReinvocationDelay]>; + def IntelFPGALocalNonConstVar : SubsetSubjecthasLocalStorage() && S->getKind() != Decl::ImplicitParam && diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td index 32c873543b9dd..ff2468f9a2a3f 100644 --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -3314,7 +3314,7 @@ disables pipelining of the loop or function data path, causing the loop or function to be executed serially. Cannot be used on the same loop or function, or in conjunction with ``max_interleaving``, ``speculated_iterations``, ``max_concurrency``, ``initiation_interval``, -or ``ivdep``. +``ivdep``, or ``max_reinvocation_delay``. .. code-block:: c++ @@ -3447,6 +3447,31 @@ loop should not be fused with any adjacent loop. }]; } +def SYCLIntelFPGAMaxReinvocationDelayAttrDocs : Documentation { + let Category = DocCatVariable; + let Heading = "intel::max_reinvocation_delay"; + let Content = [{ +This attribute applies to a loop. Specifies the maximum number of cycles allowed +on the delay between the launch of the last iteration of a loop invocation and +the launch of the first iteration of a new loop invocation. Parameter N is +mandatory, and is a positive integer. Cannot be used on the same loop in +conjunction with disable_loop_pipelining. + +.. code-block:: c++ + + void foo() { + int var = 0; + [[intel::max_reinvocation_delay(1)]] + for (int i = 0; sycl::log10((float)(x)) < 10; i++) var++; + } + + template + void bar() { + [[intel::max_reinvocation_delay(N)]] for(;;) { } + } + }]; +} + def SYCLIntelLoopFuseDocs : Documentation { let Category = DocCatFunction; let Heading = "loop_fuse, loop_fuse_independent"; diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index 34cf4c7553551..a61ad4da77950 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -2287,6 +2287,9 @@ class Sema final { Expr *E); SYCLIntelFPGALoopCoalesceAttr * BuildSYCLIntelFPGALoopCoalesceAttr(const AttributeCommonInfo &CI, Expr *E); + SYCLIntelFPGAMaxReinvocationDelayAttr * + BuildSYCLIntelFPGAMaxReinvocationDelayAttr(const AttributeCommonInfo &CI, + Expr *E); bool CheckQualifiedFunctionForTypeId(QualType T, SourceLocation Loc); diff --git a/clang/lib/CodeGen/CGLoopInfo.cpp b/clang/lib/CodeGen/CGLoopInfo.cpp index 3ce45370f75f1..ce3d048868db3 100644 --- a/clang/lib/CodeGen/CGLoopInfo.cpp +++ b/clang/lib/CodeGen/CGLoopInfo.cpp @@ -611,6 +611,15 @@ MDNode *LoopInfo::createMetadata( llvm::Type::getInt32Ty(Ctx), VC.second))}; LoopProperties.push_back(MDNode::get(Ctx, Vals)); } + + if (Attrs.SYCLMaxReinvocationDelayNCycles) { + Metadata *Vals[] = { + MDString::get(Ctx, "llvm.loop.intel.max_reinvocation_delay.count"), + ConstantAsMetadata::get( + ConstantInt::get(llvm::Type::getInt32Ty(Ctx), + *Attrs.SYCLMaxReinvocationDelayNCycles))}; + LoopProperties.push_back(MDNode::get(Ctx, Vals)); + } LoopProperties.insert(LoopProperties.end(), AdditionalLoopProperties.begin(), AdditionalLoopProperties.end()); @@ -645,6 +654,7 @@ void LoopAttributes::clear() { SYCLMaxInterleavingNInvocations.reset(); SYCLSpeculatedIterationsNIterations.reset(); SYCLIntelFPGAVariantCount.clear(); + SYCLMaxReinvocationDelayNCycles.reset(); UnrollCount = 0; UnrollAndJamCount = 0; VectorizeEnable = LoopAttributes::Unspecified; @@ -681,6 +691,7 @@ LoopInfo::LoopInfo(BasicBlock *Header, const LoopAttributes &Attrs, !Attrs.SYCLMaxInterleavingNInvocations && !Attrs.SYCLSpeculatedIterationsNIterations && Attrs.SYCLIntelFPGAVariantCount.empty() && Attrs.UnrollCount == 0 && + !Attrs.SYCLMaxReinvocationDelayNCycles && Attrs.UnrollAndJamCount == 0 && !Attrs.PipelineDisabled && Attrs.PipelineInitiationInterval == 0 && Attrs.VectorizePredicateEnable == LoopAttributes::Unspecified && @@ -1012,6 +1023,9 @@ void LoopInfoStack::push(BasicBlock *Header, clang::ASTContext &Ctx, // emitted // For attribute nofusion: // 'llvm.loop.fusion.disable' metadata will be emitted + // For attribute max_reinvocation_delay: + // n - 'llvm.loop.intel.max_reinvocation_delay.count, i32 n' metadata will be + // emitted for (const auto *A : Attrs) { if (const auto *IntelFPGAIVDep = dyn_cast(A)) addSYCLIVDepInfo(Header->getContext(), IntelFPGAIVDep->getSafelenValue(), @@ -1076,6 +1090,14 @@ void LoopInfoStack::push(BasicBlock *Header, clang::ASTContext &Ctx, if (isa(A)) setSYCLNofusionEnable(); + + if (const auto *IntelFPGAMaxReinvocationDelay = + dyn_cast(A)) { + const auto *CE = cast( + IntelFPGAMaxReinvocationDelay->getNExpr()); + llvm::APSInt ArgVal = CE->getResultAsAPSInt(); + setSYCLMaxReinvocationDelayNCycles(ArgVal.getSExtValue()); + } } setMustProgress(MustProgress); diff --git a/clang/lib/CodeGen/CGLoopInfo.h b/clang/lib/CodeGen/CGLoopInfo.h index f48cfb248c3cb..74929e7f7ece2 100644 --- a/clang/lib/CodeGen/CGLoopInfo.h +++ b/clang/lib/CodeGen/CGLoopInfo.h @@ -134,6 +134,9 @@ struct LoopAttributes { /// Value for llvm.loop.intel.speculated.iterations.count metadata. llvm::Optional SYCLSpeculatedIterationsNIterations; + // Value for llvm.loop.intel.max_reinvocation_delay metadata. + llvm::Optional SYCLMaxReinvocationDelayNCycles; + /// llvm.unroll. unsigned UnrollCount; @@ -410,6 +413,11 @@ class LoopInfoStack { /// Set no progress for the next loop pushed. void setMustProgress(bool P) { StagedAttrs.MustProgress = P; } + /// Set value of max reinvocation delay for the next loop pushed. + void setSYCLMaxReinvocationDelayNCycles(unsigned C) { + StagedAttrs.SYCLMaxReinvocationDelayNCycles = C; + } + private: /// Returns true if there is LoopInfo on the stack. bool hasInfo() const { return !Active.empty(); } diff --git a/clang/lib/Sema/SemaStmtAttr.cpp b/clang/lib/Sema/SemaStmtAttr.cpp index 27109565b3191..95b500f31adb6 100644 --- a/clang/lib/Sema/SemaStmtAttr.cpp +++ b/clang/lib/Sema/SemaStmtAttr.cpp @@ -451,6 +451,35 @@ static Attr *handleIntelFPGANofusionAttr(Sema &S, Stmt *St, return new (S.Context) SYCLIntelFPGANofusionAttr(S.Context, A); } +SYCLIntelFPGAMaxReinvocationDelayAttr * +Sema::BuildSYCLIntelFPGAMaxReinvocationDelayAttr(const AttributeCommonInfo &CI, + Expr *E) { + if (!E->isValueDependent()) { + llvm::APSInt ArgVal; + ExprResult Res = VerifyIntegerConstantExpression(E, &ArgVal); + if (Res.isInvalid()) + return nullptr; + E = Res.get(); + + // This attribute requires a strictly positive value. + if (ArgVal <= 0) { + Diag(E->getExprLoc(), diag::err_attribute_requires_positive_integer) + << CI << /*positive*/ 0; + return nullptr; + } + } + + return new (Context) SYCLIntelFPGAMaxReinvocationDelayAttr(Context, CI, E); +} + +static Attr * handleSYCLIntelFPGAMaxReinvocationDelayAttr(Sema &S, Stmt *St, + const ParsedAttr &A) { + S.CheckDeprecatedSYCLAttributeSpelling(A); + + Expr *E = A.getArgAsExpr(0); + return S.BuildSYCLIntelFPGAMaxReinvocationDelayAttr(A, E); +} + static Attr *handleLoopHintAttr(Sema &S, Stmt *St, const ParsedAttr &A, SourceRange) { IdentifierLoc *PragmaNameLoc = A.getArgAsIdent(0); @@ -828,6 +857,8 @@ static void CheckForIncompatibleSYCLLoopAttributes( CheckForDuplicationSYCLLoopAttribute(S, Attrs, false); CheckRedundantSYCLIntelFPGAIVDepAttrs(S, Attrs); CheckForDuplicationSYCLLoopAttribute(S, Attrs); + CheckForDuplicationSYCLLoopAttribute( + S, Attrs); } void CheckForIncompatibleUnrollHintAttributes( @@ -973,6 +1004,8 @@ static Attr *ProcessStmtAttribute(Sema &S, Stmt *St, const ParsedAttr &A, return handleUnlikely(S, St, A, Range); case ParsedAttr::AT_SYCLIntelFPGANofusion: return handleIntelFPGANofusionAttr(S, St, A); + case ParsedAttr::AT_SYCLIntelFPGAMaxReinvocationDelay: + return handleSYCLIntelFPGAMaxReinvocationDelayAttr(S, St, A); default: // N.B., ClangAttrEmitter.cpp emits a diagnostic helper that ensures a // declaration attribute is not written on a statement, but this code is diff --git a/clang/lib/Sema/SemaTemplateInstantiate.cpp b/clang/lib/Sema/SemaTemplateInstantiate.cpp index 2a0e9421fb55a..1192112285c26 100644 --- a/clang/lib/Sema/SemaTemplateInstantiate.cpp +++ b/clang/lib/Sema/SemaTemplateInstantiate.cpp @@ -1112,6 +1112,9 @@ namespace { const SYCLIntelFPGASpeculatedIterationsAttr *SI); const SYCLIntelFPGALoopCountAttr * TransformSYCLIntelFPGALoopCountAttr(const SYCLIntelFPGALoopCountAttr *SI); + const SYCLIntelFPGAMaxReinvocationDelayAttr * + TransformSYCLIntelFPGAMaxReinvocationDelayAttr( + const SYCLIntelFPGAMaxReinvocationDelayAttr *MRD); ExprResult TransformPredefinedExpr(PredefinedExpr *E); ExprResult TransformDeclRefExpr(DeclRefExpr *E); @@ -1603,6 +1606,14 @@ const LoopUnrollHintAttr *TemplateInstantiator::TransformLoopUnrollHintAttr( return getSema().BuildLoopUnrollHintAttr(*LU, TransformedExpr); } +const SYCLIntelFPGAMaxReinvocationDelayAttr * +TemplateInstantiator::TransformSYCLIntelFPGAMaxReinvocationDelayAttr( + const SYCLIntelFPGAMaxReinvocationDelayAttr *MRD) { + Expr *TransformedExpr = getDerived().TransformExpr(MRD->getNExpr()).get(); + return getSema().BuildSYCLIntelFPGAMaxReinvocationDelayAttr(*MRD, + TransformedExpr); +} + ExprResult TemplateInstantiator::transformNonTypeTemplateParmRef( NonTypeTemplateParmDecl *parm, SourceLocation loc, diff --git a/clang/test/CodeGenSYCL/intel-fpga-loops.cpp b/clang/test/CodeGenSYCL/intel-fpga-loops.cpp index 87e8f2b9e6bb3..b6522ab5b5890 100644 --- a/clang/test/CodeGenSYCL/intel-fpga-loops.cpp +++ b/clang/test/CodeGenSYCL/intel-fpga-loops.cpp @@ -20,6 +20,9 @@ // CHECK: br label %for.cond2, !llvm.loop ![[MD_LCA_1:[0-9]+]] // CHECK: br label %for.cond13, !llvm.loop ![[MD_LCA_2:[0-9]+]] // CHECK: br label %for.cond24, !llvm.loop ![[MD_LCA_3:[0-9]+]] +// CHECK: br label %for.cond, !llvm.loop ![[MD_MRD:[0-9]+]] +// CHECK: br label %for.cond2, !llvm.loop ![[MD_MRD_2:[0-9]+]] +// CHECK: br label %for.cond13, !llvm.loop ![[MD_MRD_3:[0-9]+]] void disable_loop_pipelining() { int a[10]; @@ -151,6 +154,23 @@ void loop_count_control() { a[i] = 0; } +template +void max_reinvocation_delay() { + int a[10]; + // CHECK: ![[MD_MRD]] = distinct !{![[MD_MRD]], ![[MP]], ![[MD_max_reinvocation_delay:[0-9]+]]} + // CHECK-NEXT: ![[MD_max_reinvocation_delay]] = !{!"llvm.loop.intel.max_reinvocation_delay.count", i32 3} + [[intel::max_reinvocation_delay(A)]] for (int i = 0; i != 10; ++i) + a[i] = 0; + // CHECK: ![[MD_MRD_2]] = distinct !{![[MD_MRD_2]], ![[MP]], ![[MD_max_reinvocation_delay_2:[0-9]+]]} + // CHECK-NEXT: ![[MD_max_reinvocation_delay_2]] = !{!"llvm.loop.intel.max_reinvocation_delay.count", i32 5} + [[intel::max_reinvocation_delay(5)]] for (int i = 0; i != 10; ++i) + a[i] = 0; + // CHECK: ![[MD_MRD_3]] = distinct !{![[MD_MRD_3]], ![[MP]], ![[MD_max_reinvocation_delay_3:[0-9]+]]} + // CHECK-NEXT: ![[MD_max_reinvocation_delay_3]] = !{!"llvm.loop.intel.max_reinvocation_delay.count", i32 1} + [[intel::max_reinvocation_delay(B)]] for (int i = 0; i != 10; ++i) + a[i] = 0; +} + template __attribute__((sycl_kernel)) void kernel_single_task(const Func &kernelFunc) { kernelFunc(); @@ -166,6 +186,7 @@ int main() { max_interleaving<3, 0>(); speculated_iterations<4, 0>(); loop_count_control<12>(); + max_reinvocation_delay<3, 1>(); }); return 0; } diff --git a/clang/test/SemaSYCL/intel-fpga-loops.cpp b/clang/test/SemaSYCL/intel-fpga-loops.cpp index 87f32d2df271a..a6f91c074e21a 100644 --- a/clang/test/SemaSYCL/intel-fpga-loops.cpp +++ b/clang/test/SemaSYCL/intel-fpga-loops.cpp @@ -26,6 +26,8 @@ void foo() { [[intel::loop_count_avg(6)]] int l[10]; // expected-error@+1{{'loop_count' attribute cannot be applied to a declaration}} [[intel::loop_count(8)]] int m[10]; + // expected-error@+1 {{'max_reinvocation_delay' attribute cannot be applied to a declaration}} + [[intel::max_reinvocation_delay(1)]] int n[10]; } // Test for deprecated spelling of Intel FPGA loop attributes @@ -122,6 +124,9 @@ void boo() { // expected-error@+1 {{'loop_count' attribute takes one argument}} [[intel::loop_count(6, 9)]] for (int i = 0; i != 10; ++i) a[i] = 0; + // expected-error@+1 {{'max_reinvocation_delay' attribute takes one argument}} + [[intel::max_reinvocation_delay(5, 2)]] for (int i = 0; i != 10; ++i) + a[i] = 0; } // Test for incorrect argument value for Intel FPGA loop attributes @@ -216,6 +221,12 @@ void goo() { // expected-error@+1 {{'loop_count' attribute requires a non-negative integral compile time constant expression}} [[intel::loop_count(-1)]] for (int i = 0; i != 10; ++i) a[i] = 0; + // expected-error@+1 {{'max_reinvocation_delay' attribute requires a positive integral compile time constant expression}} + [[intel::max_reinvocation_delay(0)]] for (int i = 0; i != 10; ++i) + a[i] = 0; + // expected-error@+1 {{integral constant expression must have integral or unscoped enumeration type, not 'const char[8]'}} + [[intel::max_reinvocation_delay("test123")]] for (int i = 0; i != 10; ++i) + a[i] = 0; } // Test for Intel FPGA loop attributes duplication @@ -334,6 +345,11 @@ void zoo() { // expected-error@+1{{duplicate Intel FPGA loop attribute 'loop_count'}} [[intel::loop_count(2)]] for (int i = 0; i != 10; ++i) a[i] = 0; + + [[intel::max_reinvocation_delay(1)]] + // expected-error@+1{{duplicate Intel FPGA loop attribute 'max_reinvocation_delay'}} + [[intel::max_reinvocation_delay(1)]] for (int i = 0; i != 10; ++i) + a[i] = 0; } // Test for Intel FPGA loop attributes compatibility @@ -374,6 +390,10 @@ void loop_attrs_compatibility() { a[i] = 0; [[intel::loop_count(8)]] for (int i = 0; i != 10; ++i) a[i] = 0; + // expected-error@+2 {{'disable_loop_pipelining' and 'max_reinvocation_delay' attributes are not compatible}} + // expected-note@+1 {{conflicting attribute is here}} + [[intel::max_reinvocation_delay(1)]] [[intel::disable_loop_pipelining]] for (int i = 0; i != 10; ++i) + a[i] = 0; } template @@ -534,6 +554,19 @@ void loop_count_control_dependent() { a[i] = 0; } +template +void max_reinvocation_delay_dependent() { + int a[10]; + // expected-error@+1 {{'max_reinvocation_delay' attribute requires a positive integral compile time constant expression}} + [[intel::max_reinvocation_delay(C)]] for (int i = 0; i != 10; ++i) + a[i] = 0; + + // expected-error@+2 {{duplicate Intel FPGA loop attribute 'max_reinvocation_delay'}} + [[intel::max_reinvocation_delay(A)]] + [[intel::max_reinvocation_delay(B)]] for (int i = 0; i != 10; ++i) + a[i] = 0; +} + void check_max_concurrency_expression() { int a[10]; // Test that checks expression is not a constant expression. @@ -630,6 +663,22 @@ void check_loop_count_expression() { a[i] = 0; } +void check_max_reinvocation_delay_expression() { + int a[10]; + // Test that checks expression is not a constant expression. + // expected-note@+1{{declared here}} + int foo; + // expected-error@+2{{expression is not an integral constant expression}} + // expected-note@+1{{read of non-const variable 'foo' is not allowed in a constant expression}} + [[intel::max_reinvocation_delay(foo + 1)]] for (int i = 0; i != 10; ++i) + a[i] = 0; + + // Test that checks expression is a constant expression. + constexpr int bar = 0; + [[intel::max_reinvocation_delay(bar + 2)]] for (int i = 0; i != 10; ++i) // OK + a[i] = 0; +} + // Test that checks wrong template instantiation and ensures that the type // is checked properly when instantiating from the template definition. struct S {}; @@ -671,6 +720,12 @@ void check_loop_attr_template_instantiation() { // expected-error@+1 {{integral constant expression must have integral or unscoped enumeration type, not 'float'}} [[intel::loop_count(Ty{})]] for (int i = 0; i != 10; ++i) a[i] = 0; + + // expected-error@+2 {{integral constant expression must have integral or unscoped enumeration type, not 'S'}} + // expected-error@+1 {{integral constant expression must have integral or unscoped enumeration type, not 'float'}} + [[intel::max_reinvocation_delay(Ty{})]] for (int i = 0; i != 10; ++i) + a[i] = 0; + } int main() { @@ -693,12 +748,14 @@ int main() { speculated_iterations_dependent<1, 8, -3, 0>(); // expected-note{{in instantiation of function template specialization 'speculated_iterations_dependent<1, 8, -3, 0>' requested here}} loop_coalesce_dependent<-1, 4, 0>(); // expected-note{{in instantiation of function template specialization 'loop_coalesce_dependent<-1, 4, 0>' requested here}} loop_count_control_dependent<3, 2, -1>(); // expected-note{{in instantiation of function template specialization 'loop_count_control_dependent<3, 2, -1>' requested here}} + max_reinvocation_delay_dependent<1, 3, 0>(); // expected-note{{in instantiation of function template specialization 'max_reinvocation_delay_dependent<1, 3, 0>' requested here}} check_max_concurrency_expression(); check_max_interleaving_expression(); check_speculated_iterations_expression(); check_loop_coalesce_expression(); check_initiation_interval_expression(); check_loop_count_expression(); + check_max_reinvocation_delay_expression(); check_loop_attr_template_instantiation(); //expected-note{{in instantiation of function template specialization 'check_loop_attr_template_instantiation' requested here}} check_loop_attr_template_instantiation(); //expected-note{{in instantiation of function template specialization 'check_loop_attr_template_instantiation' requested here}} });