diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index f163583b148d5..8a9a60eaa776c 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -1213,6 +1213,7 @@ def SYCLSimd : InheritableAttr { let Subjects = SubjectList<[Function, GlobalVar]>; let Documentation = [SYCLSimdDocs]; let SupportsNonconformingLambdaSyntax = 1; + let SimpleHandler = 1; } // Available in SYCL explicit SIMD extension. Binds a file scope private @@ -1431,15 +1432,32 @@ def LoopUnrollHint : StmtAttr { } def IntelReqdSubGroupSize: InheritableAttr { - let Spellings = [GNU<"intel_reqd_sub_group_size">, - CXX11<"intel", "reqd_sub_group_size">]; + let Spellings = [ + GNU<"intel_reqd_sub_group_size">, CXX11<"intel", "reqd_sub_group_size">, + CXX11<"intel", "sub_group_size"> // SYCL2020 spelling. + ]; let Args = [ExprArgument<"Value">]; let Subjects = SubjectList<[Function], ErrorDiag>; let Documentation = [IntelReqdSubGroupSizeDocs]; let LangOpts = [OpenCL, SYCLIsDevice, SilentlyIgnoreSYCLIsHost]; let SupportsNonconformingLambdaSyntax = 1; + let Accessors = + [Accessor<"isSYCL2020Spelling", [CXX11<"intel", "sub_group_size">]>]; } +def IntelNamedSubGroupSize : InheritableAttr { + let Spellings = [CXX11<"intel", "named_sub_group_size">]; + let Args = [EnumArgument<"Type", "SubGroupSizeType", ["automatic", "primary"], + ["Automatic", "Primary"]>]; + let Subjects = SubjectList<[Function], ErrorDiag>; + let Documentation = [IntelNamedSubGroupSizeDocs]; + let LangOpts = [OpenCL, SYCLIsDevice, SilentlyIgnoreSYCLIsHost]; + let SupportsNonconformingLambdaSyntax = 1; +} + +def : + MutualExclusions<[IntelReqdSubGroupSize, IntelNamedSubGroupSize, SYCLSimd]>; + // This attribute is both a type attribute, and a declaration attribute (for // parameter variables). def OpenCLAccess : Attr { diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td index 0dd0a9003ccdd..73cd65eb7aa4f 100644 --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -4354,9 +4354,9 @@ This attribute can be used in both OpenCL and SYCL. OpenCL documentation: The optional attribute intel_reqd_sub_group_size can be used to indicate that -the kernel must be compiled and executed with the specified subgroup size. When +the kernel must be compiled and executed with the specified sub group size. When this attribute is present, get_max_sub_group_size() is guaranteed to return the -specified integer value. This is important for the correctness of many subgroup +specified integer value. This is important for the correctness of many sub group algorithms, and in some cases may be used by the compiler to generate more optimal code. See `cl_intel_required_subgroup_size ` @@ -4367,6 +4367,13 @@ The [[intel::reqd_sub_group_size(n)]] attribute indicates that the kernel must be compiled and executed with a sub-group of size n. The value of n must be set to a sub-group size supported by the device, or device compilation will fail. +The ``[[intel::sub_group_size(n)]]`` attribute has the same effect as the other +attribute spellings, except that it follows the SYCL 2020 Attribute Rules. See + the ``[[intel::named_sub_group_size(NAME)]]`` documentation for clarification. + +This attribute is mutually exclusive with ``[[intel::named_sub_group_size(NAME)]]`` +and ``[[intel::sycl_explicit_simd]]``. + In addition to device functions, the required sub-group size attribute may also be specified in the definition of a named functor object and lambda functions, as in the examples below: @@ -4390,6 +4397,47 @@ See Sub-groups for NDRange Parallelism proposal in sycl/doc/extensions/sub_group }]; } +def IntelNamedSubGroupSizeDocs : Documentation { + let Category = DocCatFunction; + let Content = [{ +The ``[[intel::named_sub_group_size(NAME)]]`` attribute works similar to +``[[intel::sub_group_size(N)]]`` attribute in that it defines the specific +sub group size for the kernel. The ``[[intel::named_sub_group_size(NAME)]]`` +form accepts a required parameter of either ``automatic`` or ``primary``. + +``automatic`` specifies that the implementation is free to select any of the +valid sub-group sizes associated with the device to which the kernel is +submitted. ``primary`` specifies that the implementation should select the +device's primary sub-group size as reported by +``info::device::primary_sub_group_size``. + +This attribute may not be combined with ``[[intel::sub_group_size(N)]]``, as +the two attributes have different meanings. It is also mutually exclusive with +``[[intel::sycl_explicit_simd]]``. + +In addition to the attributes, a default sub-group size strategy may be +specified by the ``-fsycl-default-sub-group-size`` command line option, which +accepts either ``automatic``, ``primary``, or a default size as an integer. +These values match and have the same behavior as the ``automatic``, ``primary``, +and ``[[intel::sub_group_size(N)]]`` values respectively. + +SYCL 2020 Attribute Rules: +SYCL 2020 specifies that kernel-type attributes should only be specified on the +kernel or a ``SYCL_EXTERNAL`` function. This implementation permits these +attributes to appear on all function declarations for the purposes of +self-documenting declarations. However, these attributes must match the kernel's +sub-group size as configured by the command line, or via an attribute +specifically. + +In addition to the SYCL 2020 Attribute Rules, this attribute and the +``[[intel::sub_group_size(N)]]`` attribute also require that any +``SYCL_EXTERNAL`` functions defined in a different translation unit must have a +matching sub-group size specification, so ``SYCL_EXTERNAL`` functions not +defined in this translation unit must also have a matching sub-group +specification to the kernel function that calls it. +}]; +} + def OpenCLAccessDocs : Documentation { let Category = DocCatStmt; let Heading = "__read_only, __write_only, __read_write (read_only, write_only, read_write)"; diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 90fc9cda0faaf..bebcd308f8fab 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -11317,6 +11317,11 @@ def note_spelling_suggestion : Note< "did you mean to use %0 instead?">; def warn_attribute_requires_non_negative_integer_argument : Warning, InGroup; +def err_sycl_mismatch_group_size + : Error<"%select{kernel-called|undefined 'SYCL_EXTERNAL'}0 function must " + "have a sub group size that matches the size specified for the " + "kernel">; +def note_sycl_kernel_declared_here : Note<"kernel declared here">; // errors of expect.with.probability def err_probability_not_constant_float : Error< diff --git a/clang/include/clang/Basic/LangOptions.def b/clang/include/clang/Basic/LangOptions.def index aa8f6cbf547fa..d0c80557e3fca 100644 --- a/clang/include/clang/Basic/LangOptions.def +++ b/clang/include/clang/Basic/LangOptions.def @@ -423,6 +423,14 @@ LANGOPT(RelativeCXXABIVTables, 1, 0, LANGOPT(ArmSveVectorBits, 32, 0, "SVE vector size in bits") +ENUM_LANGOPT(DefaultSubGroupSizeType, SubGroupSizeType, 2, + SubGroupSizeType::None, + "Strategy via which sub group is assigned for SYCL kernel " + "types if not overridden via attributes") + +VALUE_LANGOPT(DefaultSubGroupSize, 32, 0, + "If DefaultSubGroupSizeType is Integer contains the value") + #undef LANGOPT #undef COMPATIBLE_LANGOPT #undef BENIGN_LANGOPT diff --git a/clang/include/clang/Basic/LangOptions.h b/clang/include/clang/Basic/LangOptions.h index e05e763d11a00..c037a167a188b 100644 --- a/clang/include/clang/Basic/LangOptions.h +++ b/clang/include/clang/Basic/LangOptions.h @@ -264,6 +264,8 @@ class LangOptions : public LangOptionsBase { Single }; + enum class SubGroupSizeType { None, Auto, Primary, Integer }; + public: /// The used language standard. LangStandard::Kind LangStd; diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 41328686dd521..7792c907a1a92 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -5728,6 +5728,14 @@ def sycl_std_EQ : Joined<["-"], "sycl-std=">, Group, MarshallingInfoEnum, "SYCL_None">, ShouldParseIf; +def fsycl_default_sub_group_size + : Separate<["-"], "fsycl-default-sub-group-size">, + HelpText<"Set the default sub group size for SYCL kernels">, + Flags<[CC1Option]>; +def fsycl_default_sub_group_size_EQ + : Joined<["-"], "fsycl-default-sub-group-size=">, + Alias, Flags<[CC1Option]>; + defm cuda_approx_transcendentals : BoolFOption<"cuda-approx-transcendentals", LangOpts<"CUDADeviceApproxTranscendentals">, DefaultFalse, PosFlag, NegFlag, diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index 8653adee31e38..398dde010c120 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -10275,6 +10275,8 @@ class Sema final { Expr *E); IntelReqdSubGroupSizeAttr * MergeIntelReqdSubGroupSizeAttr(Decl *D, const IntelReqdSubGroupSizeAttr &A); + IntelNamedSubGroupSizeAttr * + MergeIntelNamedSubGroupSizeAttr(Decl *D, const IntelNamedSubGroupSizeAttr &A); void AddSYCLIntelNumSimdWorkItemsAttr(Decl *D, const AttributeCommonInfo &CI, Expr *E); SYCLIntelNumSimdWorkItemsAttr * diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp index 9d80898d27f6d..223f5a9fdb887 100644 --- a/clang/lib/CodeGen/CodeGenFunction.cpp +++ b/clang/lib/CodeGen/CodeGenFunction.cpp @@ -592,7 +592,7 @@ CodeGenFunction::DecodeAddrUsedInPrologue(llvm::Value *F, void CodeGenFunction::EmitOpenCLKernelMetadata(const FunctionDecl *FD, llvm::Function *Fn) { - if (!FD->hasAttr()) + if (!FD->hasAttr() && !FD->hasAttr()) return; // TODO Module identifier is not reliable for this purpose since two modules @@ -602,7 +602,8 @@ void CodeGenFunction::EmitOpenCLKernelMetadata(const FunctionDecl *FD, llvm::LLVMContext &Context = getLLVMContext(); - CGM.GenOpenCLArgMetadata(Fn, FD, this); + if (FD->hasAttr()) + CGM.GenOpenCLArgMetadata(Fn, FD, this); if (const VecTypeHintAttr *A = FD->getAttr()) { QualType HintQTy = A->getTypeHint(); @@ -648,15 +649,52 @@ void CodeGenFunction::EmitOpenCLKernelMetadata(const FunctionDecl *FD, llvm::MDNode::get(Context, AttrMDArgs)); } - if (const IntelReqdSubGroupSizeAttr *A = - FD->getAttr()) { - const auto *CE = dyn_cast(A->getValue()); + bool IsKernelOrDevice = + FD->hasAttr() || FD->hasAttr(); + const IntelReqdSubGroupSizeAttr *ReqSubGroup = + FD->getAttr(); + + // To support the SYCL 2020 spelling with no propagation, only emit for + // kernel-or-device when that spelling, fall-back to old behavior. + if (ReqSubGroup && (IsKernelOrDevice || !ReqSubGroup->isSYCL2020Spelling())) { + const auto *CE = dyn_cast(ReqSubGroup->getValue()); assert(CE && "Not an integer constant expression"); Optional ArgVal = CE->getResultAsAPSInt(); llvm::Metadata *AttrMDArgs[] = {llvm::ConstantAsMetadata::get( Builder.getInt32(ArgVal->getSExtValue()))}; Fn->setMetadata("intel_reqd_sub_group_size", llvm::MDNode::get(Context, AttrMDArgs)); + } else if (IsKernelOrDevice && + CGM.getLangOpts().getDefaultSubGroupSizeType() == + LangOptions::SubGroupSizeType::Integer) { + llvm::Metadata *AttrMDArgs[] = {llvm::ConstantAsMetadata::get( + Builder.getInt32(CGM.getLangOpts().DefaultSubGroupSize))}; + Fn->setMetadata("intel_reqd_sub_group_size", + llvm::MDNode::get(Context, AttrMDArgs)); + } + + // SCYL2020 doesn't propagate attributes, so don't put it in an intermediate + // location. + if (IsKernelOrDevice) { + if (const auto *A = FD->getAttr()) { + llvm::Metadata *AttrMDArgs[] = {llvm::MDString::get( + Context, A->getType() == IntelNamedSubGroupSizeAttr::Primary + ? "primary" + : "automatic")}; + Fn->setMetadata("intel_reqd_sub_group_size", + llvm::MDNode::get(Context, AttrMDArgs)); + } else if (CGM.getLangOpts().getDefaultSubGroupSizeType() == + LangOptions::SubGroupSizeType::Auto) { + llvm::Metadata *AttrMDArgs[] = { + llvm::MDString::get(Context, "automatic")}; + Fn->setMetadata("intel_reqd_sub_group_size", + llvm::MDNode::get(Context, AttrMDArgs)); + } else if (CGM.getLangOpts().getDefaultSubGroupSizeType() == + LangOptions::SubGroupSizeType::Primary) { + llvm::Metadata *AttrMDArgs[] = {llvm::MDString::get(Context, "primary")}; + Fn->setMetadata("intel_reqd_sub_group_size", + llvm::MDNode::get(Context, AttrMDArgs)); + } } if (FD->hasAttr()) { diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 099b096781d3e..111f088cf6abe 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -4394,6 +4394,9 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, HeaderOpt.append(Header); CmdArgs.push_back(Args.MakeArgString(HeaderOpt)); } + + // Forward -fsycl-default-sub-group-size if in SYCL mode. + Args.AddLastArg(CmdArgs, options::OPT_fsycl_default_sub_group_size); } if (IsSYCL) { diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index 24238f5fe5c40..e6b74741c713b 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -3502,6 +3502,20 @@ void CompilerInvocation::GenerateLangArgs(const LangOptions &Opts, LangOptions::SignReturnAddressKeyKind::BKey) GenerateArg(Args, OPT_msign_return_address_key_EQ, "b_key", SA); + switch (Opts.getDefaultSubGroupSizeType()) { + case LangOptions::SubGroupSizeType::Auto: + GenerateArg(Args, OPT_fsycl_default_sub_group_size, "automatic", SA); + break; + case LangOptions::SubGroupSizeType::Primary: + GenerateArg(Args, OPT_fsycl_default_sub_group_size, "primary", SA); + break; + case LangOptions::SubGroupSizeType::Integer: + GenerateArg(Args, OPT_fsycl_default_sub_group_size, + Twine(Opts.DefaultSubGroupSize), SA); + break; + case LangOptions::SubGroupSizeType::None: + break; + } } bool CompilerInvocation::ParseLangArgs(LangOptions &Opts, ArgList &Args, @@ -3592,6 +3606,28 @@ bool CompilerInvocation::ParseLangArgs(LangOptions &Opts, ArgList &Args, } } + // Parse SYCL Default Sub group size. + if (const Arg *A = Args.getLastArg(OPT_fsycl_default_sub_group_size)) { + StringRef Value = A->getValue(); + Opts.setDefaultSubGroupSizeType( + llvm::StringSwitch(Value) + .Case("automatic", LangOptions::SubGroupSizeType::Auto) + .Case("primary", LangOptions::SubGroupSizeType::Primary) + .Default(LangOptions::SubGroupSizeType::Integer)); + + if (Opts.getDefaultSubGroupSizeType() == + LangOptions::SubGroupSizeType::Integer) { + int64_t IntResult; + if (!Value.getAsInteger(10, IntResult)) { + Opts.DefaultSubGroupSize = IntResult; + } else { + Diags.Report(diag::err_drv_invalid_value) + << A->getAsString(Args) << A->getValue(); + Opts.setDefaultSubGroupSizeType(LangOptions::SubGroupSizeType::None); + } + } + } + // These need to be parsed now. They are used to set OpenCL defaults. Opts.IncludeDefaultHeader = Args.hasArg(OPT_finclude_default_header); Opts.DeclareOpenCLBuiltins = Args.hasArg(OPT_fdeclare_opencl_builtins); diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index 5a431d1dd0a56..568120ff4441a 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -2623,6 +2623,8 @@ static bool mergeDeclAttribute(Sema &S, NamedDecl *D, NewAttr = S.mergeEnforceTCBLeafAttr(D, *TCBLA); else if (const auto *A = dyn_cast(Attr)) NewAttr = S.MergeIntelReqdSubGroupSizeAttr(D, *A); + else if (const auto *A = dyn_cast(Attr)) + NewAttr = S.MergeIntelNamedSubGroupSizeAttr(D, *A); else if (const auto *A = dyn_cast(Attr)) NewAttr = S.MergeSYCLIntelNumSimdWorkItemsAttr(D, *A); else if (const auto *A = dyn_cast(Attr)) diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp index 2e245df16a856..3a4355e2bd6c0 100644 --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -3206,6 +3206,42 @@ static void handleIntelReqdSubGroupSize(Sema &S, Decl *D, S.AddIntelReqdSubGroupSize(D, AL, E); } +IntelNamedSubGroupSizeAttr * +Sema::MergeIntelNamedSubGroupSizeAttr(Decl *D, + const IntelNamedSubGroupSizeAttr &A) { + // Check to see if there's a duplicate attribute with different values + // already applied to the declaration. + if (const auto *DeclAttr = D->getAttr()) { + if (DeclAttr->getType() != A.getType()) { + Diag(DeclAttr->getLoc(), diag::warn_duplicate_attribute) << &A; + Diag(A.getLoc(), diag::note_previous_attribute); + } + return nullptr; + } + + return IntelNamedSubGroupSizeAttr::Create(Context, A.getType(), A); +} + +static void handleIntelNamedSubGroupSize(Sema &S, Decl *D, + const ParsedAttr &AL) { + StringRef SizeStr; + SourceLocation Loc; + if (AL.isArgIdent(0)) { + IdentifierLoc *IL = AL.getArgAsIdent(0); + SizeStr = IL->Ident->getName(); + Loc = IL->Loc; + } else if (!S.checkStringLiteralArgumentAttr(AL, 0, SizeStr, &Loc)) { + return; + } + + IntelNamedSubGroupSizeAttr::SubGroupSizeType SizeType; + if (!IntelNamedSubGroupSizeAttr::ConvertStrToSubGroupSizeType(SizeStr, + SizeType)) { + S.Diag(Loc, diag::warn_attribute_type_not_supported) << AL << SizeStr; + } + D->addAttr(IntelNamedSubGroupSizeAttr::Create(S.Context, SizeType, AL)); +} + void Sema::AddSYCLIntelNumSimdWorkItemsAttr(Decl *D, const AttributeCommonInfo &CI, Expr *E) { @@ -9214,6 +9250,9 @@ static void ProcessDeclAttribute(Sema &S, Scope *scope, Decl *D, case ParsedAttr::AT_IntelReqdSubGroupSize: handleIntelReqdSubGroupSize(S, D, AL); break; + case ParsedAttr::AT_IntelNamedSubGroupSize: + handleIntelNamedSubGroupSize(S, D, AL); + break; case ParsedAttr::AT_SYCLIntelNumSimdWorkItems: handleSYCLIntelNumSimdWorkItemsAttr(S, D, AL); break; diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp index 5d09ab7035be2..c64da6ba38f7f 100644 --- a/clang/lib/Sema/SemaSYCL.cpp +++ b/clang/lib/Sema/SemaSYCL.cpp @@ -345,8 +345,9 @@ static void collectSYCLAttributes(Sema &S, FunctionDecl *FD, llvm::copy_if(FD->getAttrs(), std::back_inserter(Attrs), [](Attr *A) { // FIXME: Make this list self-adapt as new SYCL attributes are added. - return isa(A); @@ -678,6 +679,10 @@ class SingleDeviceFunctionTracker { return CollectedAttributes; } + llvm::SmallPtrSetImpl &GetDeviceFunctions() { + return DeviceFunctions; + } + ~SingleDeviceFunctionTracker() { Parent.AddSingleFunction(DeviceFunctions, RecursiveFunctions); } @@ -1932,6 +1937,13 @@ class SyclKernelDeclCreator : public SyclKernelFieldHandler { KernelDecl->setType(FuncType); KernelDecl->setParams(Params); + // Make sure that this is marked as a kernel so that the code-gen can make + // decisions based on that. We cannot add this earlier, otherwise the call + // to TransformStmt in replaceWithLocalClone can diagnose something that got + // diagnosed on the actual kernel. + KernelDecl->addAttr( + SYCLKernelAttr::CreateImplicit(SemaRef.getASTContext())); + SemaRef.addSyclDeviceDecl(KernelDecl); } @@ -2291,6 +2303,7 @@ class SyclKernelBodyCreator : public SyclKernelFieldHandler { BodyStmts.insert(BodyStmts.end(), FinalizeStmts.begin(), FinalizeStmts.end()); + return CompoundStmt::Create(SemaRef.getASTContext(), BodyStmts, {}, {}); } @@ -3558,12 +3571,136 @@ void Sema::ConstructOpenCLKernel(FunctionDecl *KernelCallerFunc, } } -static void PropagateAndDiagnoseDeviceAttr(Sema &S, Attr *A, - FunctionDecl *SYCLKernel, - FunctionDecl *KernelBody) { +// Figure out the sub-group for the this function. First we check the +// attributes, then the global settings. +static std::pair +CalcEffectiveSubGroup(ASTContext &Ctx, const LangOptions &LO, + const FunctionDecl *FD) { + if (const auto *A = FD->getAttr()) { + int64_t Val = getIntExprValue(A->getValue(), Ctx); + return {LangOptions::SubGroupSizeType::Integer, Val}; + } + + if (const auto *A = FD->getAttr()) { + if (A->getType() == IntelNamedSubGroupSizeAttr::Primary) + return {LangOptions::SubGroupSizeType::Primary, 0}; + return {LangOptions::SubGroupSizeType::Auto, 0}; + } + + // Return the global settings. + return {LO.getDefaultSubGroupSizeType(), + static_cast(LO.DefaultSubGroupSize)}; +} + +static SourceLocation GetSubGroupLoc(const FunctionDecl *FD) { + if (const auto *A = FD->getAttr()) + return A->getLocation(); + if (const auto *A = FD->getAttr()) + return A->getLocation(); + return SourceLocation{}; +} + +static void CheckSYCL2020SubGroupSizes(Sema &S, FunctionDecl *SYCLKernel, + const FunctionDecl *FD) { + // If they are the same, no error. + if (CalcEffectiveSubGroup(S.Context, S.getLangOpts(), SYCLKernel) == + CalcEffectiveSubGroup(S.Context, S.getLangOpts(), FD)) + return; + + // Else we need to figure out why they don't match. + SourceLocation FDAttrLoc = GetSubGroupLoc(FD); + SourceLocation KernelAttrLoc = GetSubGroupLoc(SYCLKernel); + + if (FDAttrLoc.isValid()) { + // This side was caused by an attribute. + S.Diag(FDAttrLoc, diag::err_sycl_mismatch_group_size) + << /*kernel called*/ 0; + + if (KernelAttrLoc.isValid()) { + S.Diag(KernelAttrLoc, diag::note_conflicting_attribute); + } else { + // Kernel is 'default'. + S.Diag(SYCLKernel->getLocation(), diag::note_sycl_kernel_declared_here); + } + return; + } + + // Else this doesn't have an attribute, which can only be caused by this being + // an undefined SYCL_EXTERNAL, and the kernel has an attribute that conflicts. + if (const auto *A = SYCLKernel->getAttr()) { + // Don't diagnose this if the kernel got its size from the 'old' attribute + // spelling. + if (!A->isSYCL2020Spelling()) + return; + } + + assert(KernelAttrLoc.isValid() && "Kernel doesn't have attribute either?"); + S.Diag(FD->getLocation(), diag::err_sycl_mismatch_group_size) + << /*undefined SYCL_EXTERNAL*/ 1; + S.Diag(KernelAttrLoc, diag::note_conflicting_attribute); +} + +// Check SYCL2020 Attributes. 2020 attributes don't propogate, they are only +// valid if they match the attribute on the kernel. Note that this is a slight +// difference from what the spec says, which says these attributes are only +// valid on SYCL Kernels and SYCL_EXTERNAL, but we felt that for +// self-documentation purposes that it would be nice to be able to repeat these +// on subsequent functions. +static void CheckSYCL2020Attributes( + Sema &S, FunctionDecl *SYCLKernel, FunctionDecl *KernelBody, + const llvm::SmallPtrSetImpl &CalledFuncs) { + + if (KernelBody) { + // Make sure the kernel itself has all the 2020 attributes, since we don't + // do propagation of these. + if (auto *A = KernelBody->getAttr()) + if (A->isSYCL2020Spelling()) + SYCLKernel->addAttr(A); + if (auto *A = KernelBody->getAttr()) + SYCLKernel->addAttr(A); + + // If the kernel has a body, we should get the attributes for the kernel + // from there instead, so that we get the functor object. + SYCLKernel = KernelBody; + } + + for (auto *FD : CalledFuncs) { + if (FD == SYCLKernel || FD == KernelBody) + continue; + for (auto *Attr : FD->attrs()) { + switch (Attr->getKind()) { + case attr::Kind::IntelReqdSubGroupSize: + // Pre SYCL2020 spellings handled during collection. + if (!cast(Attr)->isSYCL2020Spelling()) + break; + LLVM_FALLTHROUGH; + case attr::Kind::IntelNamedSubGroupSize: + CheckSYCL2020SubGroupSizes(S, SYCLKernel, FD); + break; + case attr::Kind::SYCLDevice: + // If a SYCL_EXTERNAL function is not defined in this TU, its necessary + // that it has a compatible sub-group-size. Don't diagnose if it has a + // sub-group attribute, we can count on the other checks to catch this. + if (!FD->isDefined() && !FD->hasAttr() && + !FD->hasAttr()) + CheckSYCL2020SubGroupSizes(S, SYCLKernel, FD); + break; + default: + break; + } + } + } +} + +static void PropagateAndDiagnoseDeviceAttr( + Sema &S, const SingleDeviceFunctionTracker &Tracker, Attr *A, + FunctionDecl *SYCLKernel, FunctionDecl *KernelBody) { switch (A->getKind()) { case attr::Kind::IntelReqdSubGroupSize: { auto *Attr = cast(A); + + if (Attr->isSYCL2020Spelling()) + break; const auto *KBSimdAttr = KernelBody ? KernelBody->getAttr() : nullptr; if (auto *Existing = SYCLKernel->getAttr()) { @@ -3658,6 +3795,9 @@ static void PropagateAndDiagnoseDeviceAttr(Sema &S, Attr *A, case attr::Kind::SYCLIntelFPGAInitiationInterval: SYCLKernel->addAttr(A); break; + case attr::Kind::IntelNamedSubGroupSize: + // Nothing to do here, handled in the SYCL2020 spelling. + break; // TODO: vec_len_hint should be handled here default: // Seeing this means that CollectPossibleKernelAttributes was @@ -3681,8 +3821,10 @@ void Sema::MarkDevices() { // kernel at a time. SingleDeviceFunctionTracker T{Tracker, SYCLKernel}; + CheckSYCL2020Attributes(*this, T.GetSYCLKernel(), T.GetKernelBody(), + T.GetDeviceFunctions()); for (auto *A : T.GetCollectedAttributes()) - PropagateAndDiagnoseDeviceAttr(*this, A, T.GetSYCLKernel(), + PropagateAndDiagnoseDeviceAttr(*this, T, A, T.GetSYCLKernel(), T.GetKernelBody()); } } diff --git a/clang/test/CodeGenSYCL/esimd_metadata2.cpp b/clang/test/CodeGenSYCL/esimd_metadata2.cpp index 5c8ebb09f26b0..3d3d53920a798 100644 --- a/clang/test/CodeGenSYCL/esimd_metadata2.cpp +++ b/clang/test/CodeGenSYCL/esimd_metadata2.cpp @@ -8,8 +8,8 @@ __attribute__((sycl_device)) void shared_func() { shared_func_decl(); } __attribute__((sycl_device)) __attribute__((sycl_explicit_simd)) void esimd_func() { shared_func(); } -// CHECK-ESIMD-DAG: define {{.*}}spir_kernel void @{{.*}}kernel_cm() #{{[0-9]+}} !sycl_explicit_simd !{{[0-9]+}} {{.*}} !intel_reqd_sub_group_size ![[SGSIZE1:[0-9]+]] {{.*}}{ -// CHECK-ESIMD-DAG: define {{.*}}spir_func void @{{.*}}esimd_funcv() #{{[0-9]+}} !sycl_explicit_simd !{{[0-9]+}} { +// CHECK-ESIMD-DAG: define {{.*}}spir_kernel void @{{.*}}kernel_cm() #{{[0-9]+}} !sycl_explicit_simd !{{[0-9]+}} !intel_reqd_sub_group_size ![[SGSIZE1:[0-9]+]] {{.*}}{ +// CHECK-ESIMD-DAG: define {{.*}}spir_func void @{{.*}}esimd_funcv() #{{[0-9]+}} !sycl_explicit_simd !{{[0-9]+}} !intel_reqd_sub_group_size ![[SGSIZE1]] { // CHECK-ESIMD-DAG: define {{.*}}spir_func void @{{.*}}shared_funcv() #{{[0-9]+}} { // CHECK-ESIMD-DAG: define linkonce_odr spir_func void @_ZN12ESIMDFunctorclEv({{.*}}) #{{[0-9]+}} {{.*}} !sycl_explicit_simd !{{[0-9]+}} { // CHECK-ESIMD-DAG: declare spir_func void @{{.*}}shared_func_declv() #{{[0-9]+}} diff --git a/clang/test/CodeGenSYCL/sub-group-size.cpp b/clang/test/CodeGenSYCL/sub-group-size.cpp new file mode 100644 index 0000000000000..5df720dbf586c --- /dev/null +++ b/clang/test/CodeGenSYCL/sub-group-size.cpp @@ -0,0 +1,41 @@ +// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -sycl-std=2020 -triple spir64-unknown-unknown-sycldevice -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s --check-prefixes=NONE,ALL +// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -fsycl-default-sub-group-size=primary -sycl-std=2020 -triple spir64-unknown-unknown-sycldevice -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s --check-prefixes=PRIM_DEF,ALL +// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -fsycl-default-sub-group-size=10 -sycl-std=2020 -triple spir64-unknown-unknown-sycldevice -disable-llvm-passes -emit-llvm -o - %s | FileCheck %s --check-prefixes=TEN_DEF,ALL + +// Ensure that both forms of the new sub_group_size properly emit their metadata +// on sycl-kernel and sycl-external functions. + +#include "Inputs/sycl.hpp" +using namespace cl::sycl; + +[[intel::named_sub_group_size(primary)]] SYCL_EXTERNAL void external_primary() {} +// ALL-DAG: define {{.*}}spir_func void @{{.*}}external_primary{{.*}}() #{{[0-9]+}} !intel_reqd_sub_group_size ![[PRIMARY:[0-9]+]] + +[[intel::sub_group_size(10)]] SYCL_EXTERNAL void external_10() {} +// ALL-DAG: define {{.*}}spir_func void @{{.*}}external_10{{.*}}() #{{[0-9]+}} !intel_reqd_sub_group_size ![[TEN:[0-9]+]] + +SYCL_EXTERNAL void external_default_behavior() {} +// NONE-DAG: define {{.*}}spir_func void @{{.*}}external_default_behavior{{.*}}() #{{[0-9]+}} { +// PRIM_DEF-DAG: define {{.*}}spir_func void @{{.*}}external_default_behavior{{.*}}() #{{[0-9]+}} !intel_reqd_sub_group_size ![[PRIMARY]] { +// TEN_DEF-DAG: define {{.*}}spir_func void @{{.*}}external_default_behavior{{.*}}() #{{[0-9]+}} !intel_reqd_sub_group_size ![[TEN]] { + +void default_behavior() { + kernel_single_task([]() { + }); +} +// NONE-DAG: define {{.*}}spir_kernel void @{{.*}}Kernel1() #{{[0-9]+}} !kernel_arg_buffer_location !{{[0-9]+}} { +// PRIM_DEF-DAG: define {{.*}}spir_kernel void @{{.*}}Kernel1() #{{[0-9]+}} !intel_reqd_sub_group_size ![[PRIMARY]] +// TEN_DEF-DAG: define {{.*}}spir_kernel void @{{.*}}Kernel1() #{{[0-9]+}} !intel_reqd_sub_group_size ![[TEN]] + +void primary() { + kernel_single_task([]() [[intel::named_sub_group_size(primary)]]{}); +} +// ALL-DAG: define {{.*}}spir_kernel void @{{.*}}Kernel2() #{{[0-9]+}} !intel_reqd_sub_group_size ![[PRIMARY]] + +void ten() { + kernel_single_task([]() [[intel::sub_group_size(10)]]{}); +} +// ALL-DAG: define {{.*}}spir_kernel void @{{.*}}Kernel3() #{{[0-9]+}} !intel_reqd_sub_group_size ![[TEN]] + +// PRIM_DEF: ![[PRIMARY]] = !{!"primary"} +// TEN_DEF: ![[TEN]] = !{i32 10} diff --git a/clang/test/Driver/sycl-sub-group-size.cpp b/clang/test/Driver/sycl-sub-group-size.cpp new file mode 100644 index 0000000000000..a5f85c6581f97 --- /dev/null +++ b/clang/test/Driver/sycl-sub-group-size.cpp @@ -0,0 +1,12 @@ +// Ensure that by default the -fsycl-default-sub-group-size doesn't get passed +// to the cc1 invocation. +// RUN: %clang -### -fsycl %s 2>&1 | FileCheck %s -check-prefix=CHECK-DEFAULT +// CHECK-DEFAULT-NOT: "fsycl-default-sub-group-size" + +// The next two tests make sure that the -fsycl-default-sub-group-size command +// line arguments get properly passed unaltered to the cc1 invocation. +// RUN: %clang -### -fsycl -fsycl-default-sub-group-size=primary %s 2>&1 | FileCheck %s -check-prefix=PRIM +// PRIM: "-fsycl-default-sub-group-size" "primary" + +// RUN: %clang -### -fsycl -fsycl-default-sub-group-size=10 %s 2>&1 | FileCheck %s -check-prefix=TEN +// TEN: "-fsycl-default-sub-group-size" "10" diff --git a/clang/test/Misc/pragma-attribute-supported-attributes-list.test b/clang/test/Misc/pragma-attribute-supported-attributes-list.test index f2cb54fc70deb..d97d7da0fae2c 100644 --- a/clang/test/Misc/pragma-attribute-supported-attributes-list.test +++ b/clang/test/Misc/pragma-attribute-supported-attributes-list.test @@ -71,6 +71,7 @@ // CHECK-NEXT: IBAction (SubjectMatchRule_objc_method_is_instance) // CHECK-NEXT: IFunc (SubjectMatchRule_function) // CHECK-NEXT: InitPriority (SubjectMatchRule_variable) +// CHECK-NEXT: IntelNamedSubGroupSize (SubjectMatchRule_function) // CHECK-NEXT: IntelReqdSubGroupSize (SubjectMatchRule_function) // CHECK-NEXT: InternalLinkage (SubjectMatchRule_variable, SubjectMatchRule_function, SubjectMatchRule_record) // CHECK-NEXT: LTOVisibilityPublic (SubjectMatchRule_record) diff --git a/clang/test/SemaSYCL/allow-constexpr-recursion.cpp b/clang/test/SemaSYCL/allow-constexpr-recursion.cpp index a73ff61280ce6..4dde146900849 100644 --- a/clang/test/SemaSYCL/allow-constexpr-recursion.cpp +++ b/clang/test/SemaSYCL/allow-constexpr-recursion.cpp @@ -36,7 +36,7 @@ struct ConditionallyExplicitCtor { void conditionally_noexcept() noexcept(constexpr_recurse(5)) {} -template +template void ConstexprIf1() { if constexpr (I == 1) ConstexprIf1(); @@ -44,7 +44,7 @@ void ConstexprIf1() { // Same as the above, but split up so the diagnostic is more clear. // expected-note@+2 2{{function implemented using recursion declared here}} -template +template void ConstexprIf2() { if constexpr (I == 1) // expected-error@+1{{SYCL kernel cannot call a recursive function}} diff --git a/clang/test/SemaSYCL/sub-group-size.cpp b/clang/test/SemaSYCL/sub-group-size.cpp new file mode 100644 index 0000000000000..6578e9e41ee2a --- /dev/null +++ b/clang/test/SemaSYCL/sub-group-size.cpp @@ -0,0 +1,157 @@ +// RUN: %clang_cc1 -internal-isystem %S/Inputs -fsycl-is-device -sycl-std=2020 -internal-isystem %S/Inputs -fsyntax-only -verify=expected,primary,integer %s +// RUN: %clang_cc1 -internal-isystem %S/Inputs -fsycl-is-device -fsycl-default-sub-group-size=primary -sycl-std=2020 -internal-isystem %S/Inputs -fsyntax-only -verify=expected,integer %s +// RUN: %clang_cc1 -internal-isystem %S/Inputs -fsycl-is-device -fsycl-default-sub-group-size=10 -sycl-std=2020 -internal-isystem %S/Inputs -fsyntax-only -verify=expected,primary %s + +// Validate the semantic analysis checks for the interaction betwen the +// named_sub_group_size and sub_group_size attributes. These are not able to be +// combined, and require that they only be applied to non-sycl-kernel/ +// non-sycl-device functions if they match the kernel they are being called +// from. + +#include "Inputs/sycl.hpp" + +// expected-error@+2 {{'named_sub_group_size' and 'sub_group_size' attributes are not compatible}} +// expected-note@+1 {{conflicting attribute is here}} +[[intel::sub_group_size(1)]] [[intel::named_sub_group_size(automatic)]] void f1(); +// expected-error@+2 {{'sub_group_size' and 'named_sub_group_size' attributes are not compatible}} +// expected-note@+1 {{conflicting attribute is here}} +[[intel::named_sub_group_size(primary)]] [[intel::sub_group_size(1)]] void f2(); + +// expected-note@+1 {{conflicting attribute is here}} +[[intel::sub_group_size(1)]] void f3(); +// expected-error@+1 {{'named_sub_group_size' and 'sub_group_size' attributes are not compatible}} +[[intel::named_sub_group_size(primary)]] void f3(); + +// expected-note@+1 {{conflicting attribute is here}} +[[intel::named_sub_group_size(primary)]] void f4(); +// expected-error@+1 {{'sub_group_size' and 'named_sub_group_size' attributes are not compatible}} +[[intel::sub_group_size(1)]] void f4(); + +// expected-note@+1 {{previous attribute is here}} +[[intel::named_sub_group_size(automatic)]] void f5(); + +// expected-warning@+1 {{attribute 'named_sub_group_size' is already applied with different arguments}} +[[intel::named_sub_group_size(primary)]] void f5(); + +[[intel::named_sub_group_size(automatic)]] void f6(); + +[[intel::named_sub_group_size(automatic)]] void f6(); + +// expected-warning@+1 {{'named_sub_group_size' attribute argument not supported: invalid}} +[[intel::named_sub_group_size(invalid)]] void f7(); + +// expected-error@+2 {{'named_sub_group_size' and 'sycl_explicit_simd' attributes are not compatible}} +// expected-note@+1 {{conflicting attribute is here}} +[[intel::sycl_explicit_simd]] [[intel::named_sub_group_size(automatic)]] void f8(); +// expected-error@+2 {{'sub_group_size' and 'sycl_explicit_simd' attributes are not compatible}} +// expected-note@+1 {{conflicting attribute is here}} +[[intel::sycl_explicit_simd]] [[intel::sub_group_size(1)]] void f9(); + +// expected-note@+1 {{conflicting attribute is here}} +[[intel::named_sub_group_size(primary)]] void f10(); +// expected-error@+1 {{'sycl_explicit_simd' and 'named_sub_group_size' attributes are not compatible}} +[[intel::sycl_explicit_simd]] void f10(); + +// expected-note@+1 {{conflicting attribute is here}} +[[intel::named_sub_group_size("primary")]] void f11(); +// expected-error@+1 {{'sycl_explicit_simd' and 'named_sub_group_size' attributes are not compatible}} +[[intel::sycl_explicit_simd]] void f11(); + +// expected-note@+1 {{conflicting attribute is here}} +[[intel::named_sub_group_size("automatic")]] void f12(); +// expected-error@+1 {{'sycl_explicit_simd' and 'named_sub_group_size' attributes are not compatible}} +[[intel::sycl_explicit_simd]] void f12(); + +// expected-warning@+1 {{'named_sub_group_size' attribute argument not supported: invalid string}} +[[intel::named_sub_group_size("invalid string")]] void f13(); + +void NoAttrFunc() {} +SYCL_EXTERNAL void NoAttrExternalDefined() {} +SYCL_EXTERNAL void NoAttrExternalNotDefined(); // #NoAttrExternalNotDefined + +// If the kernel function has an attribute, only an undefined SYCL_EXTERNAL +// should diagnose. +void calls_kernel_1() { + sycl::kernel_single_task([]() [[intel::named_sub_group_size(automatic)]] { + NoAttrFunc(); + NoAttrExternalDefined(); + // expected-error@#NoAttrExternalNotDefined{{undefined 'SYCL_EXTERNAL' function must have a sub group size that matches the size specified for the kernel}} + // expected-note@-4{{conflicting attribute is here}} + NoAttrExternalNotDefined(); + }); +} + +struct Functor { + [[intel::named_sub_group_size(automatic)]] void operator()() const { + NoAttrFunc(); + // NoAttrExternalDefined(); + // expected-error@#NoAttrExternalNotDefined{{undefined 'SYCL_EXTERNAL' function must have a sub group size that matches the size specified for the kernel}} + // expected-note@-4{{conflicting attribute is here}} + NoAttrExternalNotDefined(); + } +}; + +void calls_kernel_2() { + Functor F; + sycl::kernel_single_task(F); +} + +// If the kernel doesn't have an attribute, +[[intel::named_sub_group_size(primary)]] void AttrFunc() {} // #AttrFunc +[[intel::named_sub_group_size(primary)]] SYCL_EXTERNAL void AttrExternalDefined() {} // #AttrExternalDefined +[[intel::named_sub_group_size(primary)]] SYCL_EXTERNAL void AttrExternalNotDefined(); // #AttrExternalNotDefined + +void calls_kernel_3() { + sycl::kernel_single_task([]() { // #Kernel3 + // primary-error@#AttrFunc{{kernel-called function must have a sub group size that matches the size specified for the kernel}} + // primary-note@#Kernel3{{kernel declared here}} + AttrFunc(); + // primary-error@#AttrExternalDefined{{kernel-called function must have a sub group size that matches the size specified for the kernel}} + // primary-note@#Kernel3{{kernel declared here}} + AttrExternalDefined(); + // primary-error@#AttrExternalNotDefined{{kernel-called function must have a sub group size that matches the size specified for the kernel}} + // primary-note@#Kernel3{{kernel declared here}} + AttrExternalNotDefined(); + }); +} + +[[intel::sub_group_size(10)]] void AttrFunc2() {} // #AttrFunc2 +[[intel::sub_group_size(10)]] SYCL_EXTERNAL void AttrExternalDefined2() {} // #AttrExternalDefined2 +[[intel::sub_group_size(10)]] SYCL_EXTERNAL void AttrExternalNotDefined2(); // #AttrExternalNotDefined2 + +void calls_kernel_4() { + sycl::kernel_single_task([]() { // #Kernel4 + // integer-error@#AttrFunc2{{kernel-called function must have a sub group size that matches the size specified for the kernel}} + // integer-note@#Kernel4{{kernel declared here}} + AttrFunc2(); + // integer-error@#AttrExternalDefined2{{kernel-called function must have a sub group size that matches the size specified for the kernel}} + // integer-note@#Kernel4{{kernel declared here}} + AttrExternalDefined2(); + // integer-error@#AttrExternalNotDefined2{{kernel-called function must have a sub group size that matches the size specified for the kernel}} + // integer-note@#Kernel4{{kernel declared here}} + AttrExternalNotDefined2(); + }); +} + +// Both have an attribute. +void calls_kernel_5() { + sycl::kernel_single_task([]() [[intel::named_sub_group_size(automatic)]] { // #Kernel5 + // expected-error@#AttrFunc{{kernel-called function must have a sub group size that matches the size specified for the kernel}} + // expected-note@#Kernel5{{conflicting attribute is here}} + AttrFunc(); + // expected-error@#AttrExternalDefined{{kernel-called function must have a sub group size that matches the size specified for the kernel}} + // expected-note@#Kernel5{{conflicting attribute is here}} + AttrExternalDefined(); + // expected-error@#AttrExternalNotDefined{{kernel-called function must have a sub group size that matches the size specified for the kernel}} + // expected-note@#Kernel5{{conflicting attribute is here}} + AttrExternalNotDefined(); + + }); +} + +// Don't diag with the old sub-group-size. +void calls_kernel_6() { + sycl::kernel_single_task([]() [[intel::reqd_sub_group_size(10)]] { // #Kernel6 + NoAttrExternalNotDefined(); + }); +}