From dbe18de8aa459250a67af45311ea70f2cb023597 Mon Sep 17 00:00:00 2001 From: "Podchishchaeva, Mariya" Date: Thu, 10 Apr 2025 05:53:19 -0700 Subject: [PATCH 01/16] Initial change for testing --- clang/include/clang/Basic/CodeGenOptions.def | 3 ++ clang/include/clang/Driver/Options.td | 5 ++++ clang/lib/CodeGen/ABIInfoImpl.cpp | 30 +++++++++++++++----- clang/lib/CodeGen/ABIInfoImpl.h | 2 +- clang/lib/CodeGen/CGCall.cpp | 8 ++++-- clang/lib/CodeGen/ItaniumCXXABI.cpp | 8 ++++-- clang/lib/CodeGen/MicrosoftCXXABI.cpp | 8 ++++-- clang/lib/CodeGen/Targets/AArch64.cpp | 2 +- clang/lib/CodeGen/Targets/ARM.cpp | 2 +- clang/lib/CodeGen/Targets/SPIR.cpp | 16 +++++++++-- clang/lib/CodeGen/Targets/X86.cpp | 4 +-- 11 files changed, 67 insertions(+), 21 deletions(-) diff --git a/clang/include/clang/Basic/CodeGenOptions.def b/clang/include/clang/Basic/CodeGenOptions.def index 4f1c2535d1289..1229b159861e2 100644 --- a/clang/include/clang/Basic/CodeGenOptions.def +++ b/clang/include/clang/Basic/CodeGenOptions.def @@ -480,6 +480,9 @@ CODEGENOPT(DisableSYCLEarlyOpts, 1, 0) /// which do not contain "user" code. CODEGENOPT(OptimizeSYCLFramework, 1, 0) +/// Whether to use alloca address spate for `sret` arguments. +CODEGENOPT(UseAllocaASForSrets, 1, 0) + /// Turn on fp64 partial emulation for kernels with only fp64 conversion /// operations and no fp64 computation operations (requires Intel GPU backend /// supporting fp64 partial emulation) diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 8795fa05613c0..5daebe313efe7 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -8827,6 +8827,11 @@ def fsycl_is_native_cpu : Flag<["-"], "fsycl-is-native-cpu">, HelpText<"Perform device compilation for Native CPU.">, Visibility<[CC1Option]>, MarshallingInfoFlag>; +defm foffload_use_alloca_addrspace_for_srets : BoolFOption<"foffload-use-alloca-addrspace-for-srets", + CodeGenOpts<"UseAllocaASForSrets">, + DefaultFalse, + PosFlag, + NegFlag>; } // let Visibility = [CC1Option] diff --git a/clang/lib/CodeGen/ABIInfoImpl.cpp b/clang/lib/CodeGen/ABIInfoImpl.cpp index 0a612d3461dc2..468bd09ca8d9a 100644 --- a/clang/lib/CodeGen/ABIInfoImpl.cpp +++ b/clang/lib/CodeGen/ABIInfoImpl.cpp @@ -21,10 +21,16 @@ ABIArgInfo DefaultABIInfo::classifyArgumentType(QualType Ty) const { // Records with non-trivial destructors/copy-constructors should not be // passed by value. if (CGCXXABI::RecordArgABI RAA = getRecordArgABI(Ty, getCXXABI())) - return getNaturalAlignIndirect(Ty, getDataLayout().getAllocaAddrSpace(), + return getNaturalAlignIndirect(Ty, + getCodeGenOpts().UseAllocaASForSrets + ? getDataLayout().getAllocaAddrSpace() + : CGT.getTargetAddressSpace(Ty), RAA == CGCXXABI::RAA_DirectInMemory); - return getNaturalAlignIndirect(Ty, getDataLayout().getAllocaAddrSpace()); + return getNaturalAlignIndirect(Ty, + getCodeGenOpts().UseAllocaASForSrets + ? getDataLayout().getAllocaAddrSpace() + : CGT.getTargetAddressSpace(Ty)); } // Treat an enum type as its underlying type. @@ -37,7 +43,10 @@ ABIArgInfo DefaultABIInfo::classifyArgumentType(QualType Ty) const { Context.getTypeSize(Context.getTargetInfo().hasInt128Type() ? Context.Int128Ty : Context.LongLongTy)) - return getNaturalAlignIndirect(Ty, getDataLayout().getAllocaAddrSpace()); + return getNaturalAlignIndirect(Ty, + getCodeGenOpts().UseAllocaASForSrets + ? getDataLayout().getAllocaAddrSpace() + : CGT.getTargetAddressSpace(Ty)); return (isPromotableIntegerTypeForABI(Ty) ? ABIArgInfo::getExtend(Ty, CGT.ConvertType(Ty)) @@ -49,7 +58,10 @@ ABIArgInfo DefaultABIInfo::classifyReturnType(QualType RetTy) const { return ABIArgInfo::getIgnore(); if (isAggregateTypeForABI(RetTy)) - return getNaturalAlignIndirect(RetTy, getDataLayout().getAllocaAddrSpace()); + return getNaturalAlignIndirect(RetTy, + getCodeGenOpts().UseAllocaASForSrets + ? getDataLayout().getAllocaAddrSpace() + : CGT.getTargetAddressSpace(RetTy)); // Treat an enum type as its underlying type. if (const EnumType *EnumTy = RetTy->getAs()) @@ -61,7 +73,9 @@ ABIArgInfo DefaultABIInfo::classifyReturnType(QualType RetTy) const { ? getContext().Int128Ty : getContext().LongLongTy)) return getNaturalAlignIndirect(RetTy, - getDataLayout().getAllocaAddrSpace()); + getCodeGenOpts().UseAllocaASForSrets + ? getDataLayout().getAllocaAddrSpace() + : CGT.getTargetAddressSpace(RetTy)); return (isPromotableIntegerTypeForABI(RetTy) ? ABIArgInfo::getExtend(RetTy) : ABIArgInfo::getDirect()); @@ -122,14 +136,16 @@ CGCXXABI::RecordArgABI CodeGen::getRecordArgABI(QualType T, CGCXXABI &CXXABI) { } bool CodeGen::classifyReturnType(const CGCXXABI &CXXABI, CGFunctionInfo &FI, - const ABIInfo &Info) { + const ABIInfo &Info, CodeGenTypes &CGT) { QualType Ty = FI.getReturnType(); if (const auto *RT = Ty->getAs()) if (!isa(RT->getDecl()) && !RT->getDecl()->canPassInRegisters()) { FI.getReturnInfo() = Info.getNaturalAlignIndirect( - Ty, Info.getDataLayout().getAllocaAddrSpace()); + Ty, Info.getCodeGenOpts().UseAllocaASForSrets + ? Info.getDataLayout().getAllocaAddrSpace() + : CGT.getTargetAddressSpace(Ty)); return true; } diff --git a/clang/lib/CodeGen/ABIInfoImpl.h b/clang/lib/CodeGen/ABIInfoImpl.h index d9d79c6a55ddb..87113c9d9f33c 100644 --- a/clang/lib/CodeGen/ABIInfoImpl.h +++ b/clang/lib/CodeGen/ABIInfoImpl.h @@ -46,7 +46,7 @@ CGCXXABI::RecordArgABI getRecordArgABI(const RecordType *RT, CGCXXABI &CXXABI); CGCXXABI::RecordArgABI getRecordArgABI(QualType T, CGCXXABI &CXXABI); bool classifyReturnType(const CGCXXABI &CXXABI, CGFunctionInfo &FI, - const ABIInfo &Info); + const ABIInfo &Info, CodeGenTypes &CGT); /// Pass transparent unions as if they were the type of the first element. Sema /// should ensure that all elements of the union have the same "machine type". diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index c9dcc7ab6d69f..95107d781b3e4 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -1719,8 +1719,12 @@ CodeGenTypes::GetFunctionType(const CGFunctionInfo &FI) { // Add type for sret argument. if (IRFunctionArgs.hasSRetArg()) { - ArgTypes[IRFunctionArgs.getSRetArgNo()] = llvm::PointerType::get( - getLLVMContext(), FI.getReturnInfo().getIndirectAddrSpace()); + QualType Ret = FI.getReturnType(); + unsigned AddressSpace = CGM.getCodeGenOpts().UseAllocaASForSrets + ? FI.getReturnInfo().getIndirectAddrSpace() + : CGM.getTypes().getTargetAddressSpace(Ret); + ArgTypes[IRFunctionArgs.getSRetArgNo()] = + llvm::PointerType::get(getLLVMContext(), AddressSpace); } // Add type for inalloca argument. diff --git a/clang/lib/CodeGen/ItaniumCXXABI.cpp b/clang/lib/CodeGen/ItaniumCXXABI.cpp index 0356f33725abf..bbafd8ee995f8 100644 --- a/clang/lib/CodeGen/ItaniumCXXABI.cpp +++ b/clang/lib/CodeGen/ItaniumCXXABI.cpp @@ -1349,9 +1349,13 @@ bool ItaniumCXXABI::classifyReturnType(CGFunctionInfo &FI) const { // If C++ prohibits us from making a copy, return by address. if (!RD->canPassInRegisters()) { - auto Align = CGM.getContext().getTypeAlignInChars(FI.getReturnType()); + QualType Ret = FI.getReturnType(); + auto Align = CGM.getContext().getTypeAlignInChars(Ret); + unsigned AddressSpace = CGM.getCodeGenOpts().UseAllocaASForSrets + ? FI.getReturnInfo().getIndirectAddrSpace() + : CGM.getTypes().getTargetAddressSpace(Ret); FI.getReturnInfo() = ABIArgInfo::getIndirect( - Align, /*AddrSpace=*/CGM.getDataLayout().getAllocaAddrSpace(), + Align, /*AddrSpace=*/AddressSpace, /*ByVal=*/false); return true; } diff --git a/clang/lib/CodeGen/MicrosoftCXXABI.cpp b/clang/lib/CodeGen/MicrosoftCXXABI.cpp index 0393fed7c8ead..bd4fbf11b5656 100644 --- a/clang/lib/CodeGen/MicrosoftCXXABI.cpp +++ b/clang/lib/CodeGen/MicrosoftCXXABI.cpp @@ -1172,9 +1172,13 @@ bool MicrosoftCXXABI::classifyReturnType(CGFunctionInfo &FI) const { bool isIndirectReturn = !isTrivialForABI || FI.isInstanceMethod(); if (isIndirectReturn) { - CharUnits Align = CGM.getContext().getTypeAlignInChars(FI.getReturnType()); + QualType Ret = FI.getReturnType(); + CharUnits Align = CGM.getContext().getTypeAlignInChars(Ret); + unsigned AddressSpace = CGM.getCodeGenOpts().UseAllocaASForSrets + ? FI.getReturnInfo().getIndirectAddrSpace() + : CGM.getTypes().getTargetAddressSpace(Ret); FI.getReturnInfo() = ABIArgInfo::getIndirect( - Align, /*AddrSpace=*/CGM.getDataLayout().getAllocaAddrSpace(), + Align, /*AddrSpace=*/AddressSpace, /*ByVal=*/false); // MSVC always passes `this` before the `sret` parameter. diff --git a/clang/lib/CodeGen/Targets/AArch64.cpp b/clang/lib/CodeGen/Targets/AArch64.cpp index 073ca3cc82690..9e364735798ab 100644 --- a/clang/lib/CodeGen/Targets/AArch64.cpp +++ b/clang/lib/CodeGen/Targets/AArch64.cpp @@ -60,7 +60,7 @@ class AArch64ABIInfo : public ABIInfo { SmallVectorImpl &Flattened) const; void computeInfo(CGFunctionInfo &FI) const override { - if (!::classifyReturnType(getCXXABI(), FI, *this)) + if (!::classifyReturnType(getCXXABI(), FI, *this, CGT)) FI.getReturnInfo() = classifyReturnType(FI.getReturnType(), FI.isVariadic()); diff --git a/clang/lib/CodeGen/Targets/ARM.cpp b/clang/lib/CodeGen/Targets/ARM.cpp index a6d9a5549355c..beece62621abb 100644 --- a/clang/lib/CodeGen/Targets/ARM.cpp +++ b/clang/lib/CodeGen/Targets/ARM.cpp @@ -232,7 +232,7 @@ void WindowsARMTargetCodeGenInfo::setTargetAttributes( } void ARMABIInfo::computeInfo(CGFunctionInfo &FI) const { - if (!::classifyReturnType(getCXXABI(), FI, *this)) + if (!::classifyReturnType(getCXXABI(), FI, *this, CGT)) FI.getReturnInfo() = classifyReturnType(FI.getReturnType(), FI.isVariadic(), FI.getCallingConvention()); diff --git a/clang/lib/CodeGen/Targets/SPIR.cpp b/clang/lib/CodeGen/Targets/SPIR.cpp index a110124ff9cb6..9068b8b8b609a 100644 --- a/clang/lib/CodeGen/Targets/SPIR.cpp +++ b/clang/lib/CodeGen/Targets/SPIR.cpp @@ -51,7 +51,10 @@ ABIArgInfo CommonSPIRABIInfo::classifyKernelArgumentType(QualType Ty) const { } // Pass all aggregate types allowed by Sema by value. if (isAggregateTypeForABI(Ty)) - return getNaturalAlignIndirect(Ty, getDataLayout().getAllocaAddrSpace()); + return getNaturalAlignIndirect(Ty, + getCodeGenOpts().UseAllocaASForSrets + ? getDataLayout().getAllocaAddrSpace() + : CGT.getTargetAddressSpace(Ty)); } return DefaultABIInfo::classifyArgumentType(Ty); @@ -129,7 +132,11 @@ ABIArgInfo CommonSPIRABIInfo::classifyRegcallArgumentType(QualType Ty) const { // Records with non-trivial destructors/copy-constructors should not be // passed by value. if (auto RAA = getRecordArgABI(Ty, getCXXABI())) - return getNaturalAlignIndirect(Ty, RAA == CGCXXABI::RAA_DirectInMemory); + return getNaturalAlignIndirect(Ty, + getCodeGenOpts().UseAllocaASForSrets + ? getDataLayout().getAllocaAddrSpace() + : CGT.getTargetAddressSpace(Ty), + RAA == CGCXXABI::RAA_DirectInMemory); // Ignore empty structs/unions. if (isEmptyRecord(getContext(), Ty, true)) @@ -321,7 +328,10 @@ ABIArgInfo SPIRVABIInfo::classifyArgumentType(QualType Ty) const { // Records with non-trivial destructors/copy-constructors should not be // passed by value. if (auto RAA = getRecordArgABI(Ty, getCXXABI())) - return getNaturalAlignIndirect(Ty, getDataLayout().getAllocaAddrSpace(), + return getNaturalAlignIndirect(Ty, + getCodeGenOpts().UseAllocaASForSrets + ? getDataLayout().getAllocaAddrSpace() + : CGT.getTargetAddressSpace(Ty), RAA == CGCXXABI::RAA_DirectInMemory); if (const RecordType *RT = Ty->getAs()) { diff --git a/clang/lib/CodeGen/Targets/X86.cpp b/clang/lib/CodeGen/Targets/X86.cpp index fd81266e8188c..684b13cc02d56 100644 --- a/clang/lib/CodeGen/Targets/X86.cpp +++ b/clang/lib/CodeGen/Targets/X86.cpp @@ -970,7 +970,7 @@ void X86_32ABIInfo::computeInfo(CGFunctionInfo &FI) const { } else State.FreeRegs = DefaultNumRegisterParameters; - if (!::classifyReturnType(getCXXABI(), FI, *this)) { + if (!::classifyReturnType(getCXXABI(), FI, *this, CGT)) { FI.getReturnInfo() = classifyReturnType(FI.getReturnType(), State); } else if (FI.getReturnInfo().isIndirect()) { // The C++ ABI is not aware of register usage, so we have to check if the @@ -2980,7 +2980,7 @@ void X86_64ABIInfo::computeInfo(CGFunctionInfo &FI) const { unsigned FreeSSERegs = IsRegCall ? 16 : 8; unsigned NeededInt = 0, NeededSSE = 0, MaxVectorWidth = 0; - if (!::classifyReturnType(getCXXABI(), FI, *this)) { + if (!::classifyReturnType(getCXXABI(), FI, *this, CGT)) { if (IsRegCall && FI.getReturnType()->getTypePtr()->isRecordType() && !FI.getReturnType()->getTypePtr()->isUnionType()) { FI.getReturnInfo() = classifyRegCallStructType( From 53d4d29dbf8845788d88acef115049d4e14b5022 Mon Sep 17 00:00:00 2001 From: "Podchishchaeva, Mariya" Date: Fri, 11 Apr 2025 06:21:03 -0700 Subject: [PATCH 02/16] Fix crashes related to lifetimes emission --- clang/lib/CodeGen/CGCall.cpp | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index 95107d781b3e4..599f67b7ac634 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -5313,6 +5313,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, // If the call returns a temporary with struct return, create a temporary // alloca to hold the result, unless one is given to us. Address SRetPtr = Address::invalid(); + RawAddress SRetAlloca = RawAddress::invalid(); llvm::Value *UnusedReturnSizePtr = nullptr; if (RetAI.isIndirect() || RetAI.isInAlloca() || RetAI.isCoerceAndExpand()) { // For virtual function pointer thunks and musttail calls, we must always @@ -5326,11 +5327,17 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, } else if (!ReturnValue.isNull()) { SRetPtr = ReturnValue.getAddress(); } else { - SRetPtr = CreateMemTempWithoutCast(RetTy, "tmp"); + SRetPtr = CGM.getCodeGenOpts().UseAllocaASForSrets + ? CreateMemTempWithoutCast(RetTy, "tmp") + : CreateMemTemp(RetTy, "tmp", &SRetAlloca); if (HaveInsertPoint() && ReturnValue.isUnused()) { llvm::TypeSize size = CGM.getDataLayout().getTypeAllocSize(ConvertTypeForMem(RetTy)); - UnusedReturnSizePtr = EmitLifetimeStart(size, SRetPtr.getBasePointer()); + if (CGM.getCodeGenOpts().UseAllocaASForSrets) + UnusedReturnSizePtr = + EmitLifetimeStart(size, SRetPtr.getBasePointer()); + else + UnusedReturnSizePtr = EmitLifetimeStart(size, SRetAlloca.getPointer()); } } if (IRFunctionArgs.hasSRetArg()) { @@ -5338,7 +5345,8 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, // chosen IndirectAS can happen e.g. when passing the this pointer through // a chain involving stores to / loads from the DefaultAS; we address this // here, symmetrically with the handling we have for normal pointer args. - if (SRetPtr.getAddressSpace() != RetAI.getIndirectAddrSpace()) { + if (CGM.getCodeGenOpts().UseAllocaASForSrets && + (SRetPtr.getAddressSpace() != RetAI.getIndirectAddrSpace())) { llvm::Value *V = SRetPtr.getBasePointer(); LangAS SAS = getLangASFromTargetAS(SRetPtr.getAddressSpace()); LangAS DAS = getLangASFromTargetAS(RetAI.getIndirectAddrSpace()); @@ -5920,9 +5928,15 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, // can't depend on being inside of an ExprWithCleanups, so we need to manually // pop this cleanup later on. Being eager about this is OK, since this // temporary is 'invisible' outside of the callee. - if (UnusedReturnSizePtr) - pushFullExprCleanup(NormalEHLifetimeMarker, SRetPtr, - UnusedReturnSizePtr); + if (UnusedReturnSizePtr) { + if (CGM.getCodeGenOpts().UseAllocaASForSrets) + pushFullExprCleanup(NormalEHLifetimeMarker, SRetPtr, + UnusedReturnSizePtr); + else + pushFullExprCleanup(NormalEHLifetimeMarker, SRetAlloca, + UnusedReturnSizePtr); + } + llvm::BasicBlock *InvokeDest = CannotThrow ? nullptr : getInvokeDest(); From 5e569a7b01d66ec9a06810d37663bace5e7fd28f Mon Sep 17 00:00:00 2001 From: Premanand M Rao Date: Sat, 19 Apr 2025 19:53:18 -0700 Subject: [PATCH 03/16] Additional changes to make the option work in non-SYCL modes --- clang/include/clang/Basic/CodeGenOptions.def | 2 +- clang/include/clang/Driver/Options.td | 4 ++-- clang/lib/CodeGen/ItaniumCXXABI.cpp | 2 +- clang/lib/CodeGen/MicrosoftCXXABI.cpp | 2 +- clang/lib/Driver/ToolChains/Clang.cpp | 11 +++++++++++ clang/test/CodeGenSYCL/regcall-cc-test.cpp | 2 +- clang/test/Driver/sycl-device.cpp | 12 ++++++++++++ clang/test/Driver/sycl-int-footer-old-model.cpp | 6 +++--- clang/test/Driver/sycl-int-header-footer.cpp | 6 +++--- 9 files changed, 35 insertions(+), 12 deletions(-) diff --git a/clang/include/clang/Basic/CodeGenOptions.def b/clang/include/clang/Basic/CodeGenOptions.def index 1229b159861e2..b127df69295b3 100644 --- a/clang/include/clang/Basic/CodeGenOptions.def +++ b/clang/include/clang/Basic/CodeGenOptions.def @@ -480,7 +480,7 @@ CODEGENOPT(DisableSYCLEarlyOpts, 1, 0) /// which do not contain "user" code. CODEGENOPT(OptimizeSYCLFramework, 1, 0) -/// Whether to use alloca address spate for `sret` arguments. +/// Whether to use alloca address space for `sret` arguments. CODEGENOPT(UseAllocaASForSrets, 1, 0) /// Turn on fp64 partial emulation for kernels with only fp64 conversion diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 5daebe313efe7..4101962349406 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -8827,9 +8827,9 @@ def fsycl_is_native_cpu : Flag<["-"], "fsycl-is-native-cpu">, HelpText<"Perform device compilation for Native CPU.">, Visibility<[CC1Option]>, MarshallingInfoFlag>; -defm foffload_use_alloca_addrspace_for_srets : BoolFOption<"foffload-use-alloca-addrspace-for-srets", +defm offload_use_alloca_addrspace_for_srets : BoolFOption<"offload-use-alloca-addrspace-for-srets", CodeGenOpts<"UseAllocaASForSrets">, - DefaultFalse, + DefaultTrue, PosFlag, NegFlag>; diff --git a/clang/lib/CodeGen/ItaniumCXXABI.cpp b/clang/lib/CodeGen/ItaniumCXXABI.cpp index bbafd8ee995f8..e10273220dafd 100644 --- a/clang/lib/CodeGen/ItaniumCXXABI.cpp +++ b/clang/lib/CodeGen/ItaniumCXXABI.cpp @@ -1352,7 +1352,7 @@ bool ItaniumCXXABI::classifyReturnType(CGFunctionInfo &FI) const { QualType Ret = FI.getReturnType(); auto Align = CGM.getContext().getTypeAlignInChars(Ret); unsigned AddressSpace = CGM.getCodeGenOpts().UseAllocaASForSrets - ? FI.getReturnInfo().getIndirectAddrSpace() + ? CGM.getDataLayout().getAllocaAddrSpace() : CGM.getTypes().getTargetAddressSpace(Ret); FI.getReturnInfo() = ABIArgInfo::getIndirect( Align, /*AddrSpace=*/AddressSpace, diff --git a/clang/lib/CodeGen/MicrosoftCXXABI.cpp b/clang/lib/CodeGen/MicrosoftCXXABI.cpp index bd4fbf11b5656..93e38f99c10db 100644 --- a/clang/lib/CodeGen/MicrosoftCXXABI.cpp +++ b/clang/lib/CodeGen/MicrosoftCXXABI.cpp @@ -1175,7 +1175,7 @@ bool MicrosoftCXXABI::classifyReturnType(CGFunctionInfo &FI) const { QualType Ret = FI.getReturnType(); CharUnits Align = CGM.getContext().getTypeAlignInChars(Ret); unsigned AddressSpace = CGM.getCodeGenOpts().UseAllocaASForSrets - ? FI.getReturnInfo().getIndirectAddrSpace() + ? CGM.getDataLayout().getAllocaAddrSpace() : CGM.getTypes().getTargetAddressSpace(Ret); FI.getReturnInfo() = ABIArgInfo::getIndirect( Align, /*AddrSpace=*/AddressSpace, diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 43fbf18006ea5..7a904543cd22b 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -5811,6 +5811,13 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back("-fsycl-optimize-non-user-code"); } + + // Disable this option for SYCL by default. + // TODO: This needs to be re-enabled once we have a real fix. + if (!Args.hasArg(options::OPT_foffload_use_alloca_addrspace_for_srets) && + !Args.hasArg(options::OPT_fno_offload_use_alloca_addrspace_for_srets)) + CmdArgs.push_back("-fno-offload-use-alloca-addrspace-for-srets"); + // Add any predefined macros associated with intel_gpu* type targets // passed in with -fsycl-targets // TODO: Macros are populated during device compilations and saved for @@ -6364,6 +6371,10 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, Args.addOptOutFlag(CmdArgs, options::OPT_foptimize_sibling_calls, options::OPT_fno_optimize_sibling_calls); + Args.addOptOutFlag(CmdArgs, + options::OPT_foffload_use_alloca_addrspace_for_srets, + options::OPT_fno_offload_use_alloca_addrspace_for_srets); + RenderFloatingPointOptions(TC, D, isOptimizationLevelFast(Args), Args, CmdArgs, JA, NoOffloadFP32PrecDiv, NoOffloadFP32PrecSqrt); diff --git a/clang/test/CodeGenSYCL/regcall-cc-test.cpp b/clang/test/CodeGenSYCL/regcall-cc-test.cpp index 8f5dd318b8ea2..1c3a4ef0f0ab6 100644 --- a/clang/test/CodeGenSYCL/regcall-cc-test.cpp +++ b/clang/test/CodeGenSYCL/regcall-cc-test.cpp @@ -333,7 +333,7 @@ struct NonCopyable { // CHECK-DAG: %struct.NonCopyable = type { i32 } SYCL_DEVICE int __regcall bar(NonCopyable x) { -// CHECK-DAG: define dso_local x86_regcallcc noundef i32 @_Z15__regcall3__bar11NonCopyable(ptr noundef byval(%struct.NonCopyable) align 4 %x) +// CHECK-DAG: define dso_local x86_regcallcc noundef i32 @_Z15__regcall3__bar11NonCopyable(ptr noundef %x) return x.a; } diff --git a/clang/test/Driver/sycl-device.cpp b/clang/test/Driver/sycl-device.cpp index ba14a409bf795..8013b64d7386f 100644 --- a/clang/test/Driver/sycl-device.cpp +++ b/clang/test/Driver/sycl-device.cpp @@ -58,3 +58,15 @@ // PHASES-PREPROC-DEPS: 0: input, {{.*}}, c++, (device-sycl) // PHASES-PROPROC-DEPS: 1: preprocessor, {0}, dependencies, (device-sycl) // PHASES-PREPROC-DEPS: 2: offload, "device-sycl (spir64-unknown-unknown)" {1}, none + +/// Check that "-fno-offload-use-alloca-addrspace-for-srets" is not set by +/// default on the command-line in a non-sycl compilation. +// RUN: %clang -### %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHECK-ALLOCA-ADDRSPACE %s +// CHECK-ALLOCA-ADDRSPACE-NOT: clang{{.*}} "-fno-offload-use-alloca-addrspace-for-srets" + +/// Check that "-fno-offload-use-alloca-addrspace-for-srets" is set if it is +/// not specified on the command-line by the user with -fsycl +// RUN: %clang -### -fsycl %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHECK-NO-ALLOCA-ADDRSPACE %s +// CHECK-NO-ALLOCA-ADDRSPACE: clang{{.*}} "-fno-offload-use-alloca-addrspace-for-srets" diff --git a/clang/test/Driver/sycl-int-footer-old-model.cpp b/clang/test/Driver/sycl-int-footer-old-model.cpp index f317d936bef9c..ba3e7d4330410 100644 --- a/clang/test/Driver/sycl-int-footer-old-model.cpp +++ b/clang/test/Driver/sycl-int-footer-old-model.cpp @@ -1,7 +1,7 @@ /// Check compilation tool steps when using the integration footer // RUN: %clangxx -fsycl --no-offload-new-driver -I cmdline/dir -include dummy.h %/s -### 2>&1 \ // RUN: | FileCheck -check-prefix FOOTER %s -DSRCDIR=%/S -DCMDDIR=cmdline/dir -// FOOTER: clang{{.*}} "-fsycl-is-device"{{.*}} "-fsycl-int-header=[[INTHEADER:.+\.h]]" "-fsycl-int-footer=[[INTFOOTER:.+\h]]" "-sycl-std={{.*}}"{{.*}} "-include" "dummy.h" +// FOOTER: clang{{.*}} "-fsycl-is-device"{{.*}} "-fsycl-int-header=[[INTHEADER:.+\.h]]" "-fsycl-int-footer=[[INTFOOTER:.+\h]]"{{.*}} "-sycl-std={{.*}}"{{.*}} "-include" "dummy.h" // FOOTER: clang{{.*}} "-fsycl-is-host" // FOOTER-SAME: "-include-internal-header" "[[INTHEADER]]" // FOOTER-SAME: "-dependency-filter" "[[INTHEADER]]" @@ -10,7 +10,7 @@ /// Preprocessed file creation with integration footer // RUN: %clangxx -fsycl --no-offload-new-driver -E %/s -### 2>&1 \ // RUN: | FileCheck -check-prefix FOOTER_PREPROC_GEN %s -// FOOTER_PREPROC_GEN: clang{{.*}} "-fsycl-is-device"{{.*}} "-fsycl-int-header=[[INTHEADER:.+\.h]]" "-fsycl-int-footer=[[INTFOOTER:.+\h]]" "-sycl-std={{.*}}" "-o" "[[PREPROC_DEVICE:.+\.ii]]" +// FOOTER_PREPROC_GEN: clang{{.*}} "-fsycl-is-device"{{.*}} "-fsycl-int-header=[[INTHEADER:.+\.h]]" "-fsycl-int-footer=[[INTFOOTER:.+\h]]"{{.*}} "-sycl-std={{.*}}" "-o" "[[PREPROC_DEVICE:.+\.ii]]" // FOOTER_PREPROC_GEN: clang{{.*}} "-fsycl-is-host" // FOOTER_PREPROC_GEN-SAME: "-include-internal-header" "[[INTHEADER]]" // FOOTER_PREPROC_GEN-SAME: "-dependency-filter" "[[INTHEADER]]" @@ -29,7 +29,7 @@ /// Check that integration footer can be disabled // RUN: %clangxx -fsycl --no-offload-new-driver -fno-sycl-use-footer %s -### 2>&1 \ // RUN: | FileCheck -check-prefix NO-FOOTER --implicit-check-not "-fsycl-int-footer" %s -// NO-FOOTER: clang{{.*}} "-fsycl-is-device"{{.*}} "-fsycl-int-header=[[INTHEADER:.+\.h]]" "-sycl-std={{.*}}" +// NO-FOOTER: clang{{.*}} "-fsycl-is-device"{{.*}} "-fsycl-int-header=[[INTHEADER:.+\.h]]"{{.*}} "-sycl-std={{.*}}" // NO-FOOTER-NOT: append-file // NO-FOOTER: clang{{.*}} "-fsycl-is-host"{{.*}} "-include-internal-header" "[[INTHEADER]]" diff --git a/clang/test/Driver/sycl-int-header-footer.cpp b/clang/test/Driver/sycl-int-header-footer.cpp index 8692f29c765e8..7a67f8753bfc3 100644 --- a/clang/test/Driver/sycl-int-header-footer.cpp +++ b/clang/test/Driver/sycl-int-header-footer.cpp @@ -2,7 +2,7 @@ // RUN: %clangxx -fsycl --offload-new-driver -I cmdline/dir -include dummy.h %/s -### 2>&1 \ // RUN: | FileCheck -check-prefix FOOTER %s -DSRCDIR=%/S -DCMDDIR=cmdline/dir -// FOOTER: clang{{.*}} "-fsycl-is-device"{{.*}} "-fsycl-int-header=[[INTHEADER:.+\.h]]" "-fsycl-int-footer=[[INTFOOTER:.+\h]]" "-sycl-std={{.*}}"{{.*}} "-include" "dummy.h" +// FOOTER: clang{{.*}} "-fsycl-is-device"{{.*}} "-fsycl-int-header=[[INTHEADER:.+\.h]]" "-fsycl-int-footer=[[INTFOOTER:.+\h]]"{{.*}} "-sycl-std={{.*}}"{{.*}} "-include" "dummy.h" // FOOTER: clang{{.*}} "-fsycl-is-host" // FOOTER-SAME: "-include-internal-header" "[[INTHEADER]]" // FOOTER-SAME: "-include" "dummy.h"{{.*}} "-I" "cmdline/dir" @@ -11,7 +11,7 @@ /// Preprocessed file creation with integration footer // RUN: %clangxx -fsycl --offload-new-driver -E %/s -### 2>&1 \ // RUN: | FileCheck -check-prefix FOOTER_PREPROC_GEN %s -// FOOTER_PREPROC_GEN: clang{{.*}} "-fsycl-is-device"{{.*}} "-fsycl-int-header=[[INTHEADER:.+\.h]]" "-fsycl-int-footer=[[INTFOOTER:.+\h]]" "-sycl-std={{.*}}" "-o" "[[PREPROC_DEVICE:.+\.ii]]" +// FOOTER_PREPROC_GEN: clang{{.*}} "-fsycl-is-device"{{.*}} "-fsycl-int-header=[[INTHEADER:.+\.h]]" "-fsycl-int-footer=[[INTFOOTER:.+\h]]"{{.*}} "-sycl-std={{.*}}" "-o" "[[PREPROC_DEVICE:.+\.ii]]" // FOOTER_PREPROC_GEN: clang{{.*}} "-fsycl-is-host" // FOOTER_PREPROC_GEN-SAME: "-include-internal-header" "[[INTHEADER]]" // FOOTER_PREPROC_GEN-SAME: "-dependency-filter" "[[INTHEADER]]" @@ -28,7 +28,7 @@ /// Check that integration footer can be disabled // RUN: %clangxx -fsycl --offload-new-driver -fno-sycl-use-footer %s -### 2>&1 \ // RUN: | FileCheck -check-prefix NO-FOOTER --implicit-check-not "-fsycl-int-footer" %s -// NO-FOOTER: clang{{.*}} "-fsycl-is-device"{{.*}} "-fsycl-int-header=[[INTHEADER:.+\.h]]" "-sycl-std={{.*}}" +// NO-FOOTER: clang{{.*}} "-fsycl-is-device"{{.*}} "-fsycl-int-header=[[INTHEADER:.+\.h]]"{{.*}} "-sycl-std={{.*}}" // NO-FOOTER: clang{{.*}} "-fsycl-is-host"{{.*}} "-include-internal-header" "[[INTHEADER]]" /// Check phases without integration footer From 9d98cde53db29eaf1be13883fed3997d327399c3 Mon Sep 17 00:00:00 2001 From: Premanand M Rao Date: Sat, 19 Apr 2025 20:10:45 -0700 Subject: [PATCH 04/16] Fix clang-format issues --- clang/lib/CodeGen/CGCall.cpp | 4 ++-- clang/lib/CodeGen/ItaniumCXXABI.cpp | 6 +++--- clang/lib/CodeGen/MicrosoftCXXABI.cpp | 6 +++--- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index 599f67b7ac634..89611247124fc 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -5337,7 +5337,8 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, UnusedReturnSizePtr = EmitLifetimeStart(size, SRetPtr.getBasePointer()); else - UnusedReturnSizePtr = EmitLifetimeStart(size, SRetAlloca.getPointer()); + UnusedReturnSizePtr = + EmitLifetimeStart(size, SRetAlloca.getPointer()); } } if (IRFunctionArgs.hasSRetArg()) { @@ -5937,7 +5938,6 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo, UnusedReturnSizePtr); } - llvm::BasicBlock *InvokeDest = CannotThrow ? nullptr : getInvokeDest(); SmallVector BundleList = diff --git a/clang/lib/CodeGen/ItaniumCXXABI.cpp b/clang/lib/CodeGen/ItaniumCXXABI.cpp index e10273220dafd..b9e33b170b4a1 100644 --- a/clang/lib/CodeGen/ItaniumCXXABI.cpp +++ b/clang/lib/CodeGen/ItaniumCXXABI.cpp @@ -1354,9 +1354,9 @@ bool ItaniumCXXABI::classifyReturnType(CGFunctionInfo &FI) const { unsigned AddressSpace = CGM.getCodeGenOpts().UseAllocaASForSrets ? CGM.getDataLayout().getAllocaAddrSpace() : CGM.getTypes().getTargetAddressSpace(Ret); - FI.getReturnInfo() = ABIArgInfo::getIndirect( - Align, /*AddrSpace=*/AddressSpace, - /*ByVal=*/false); + FI.getReturnInfo() = + ABIArgInfo::getIndirect(Align, /*AddrSpace=*/AddressSpace, + /*ByVal=*/false); return true; } return false; diff --git a/clang/lib/CodeGen/MicrosoftCXXABI.cpp b/clang/lib/CodeGen/MicrosoftCXXABI.cpp index 93e38f99c10db..3dc7c33fab301 100644 --- a/clang/lib/CodeGen/MicrosoftCXXABI.cpp +++ b/clang/lib/CodeGen/MicrosoftCXXABI.cpp @@ -1177,9 +1177,9 @@ bool MicrosoftCXXABI::classifyReturnType(CGFunctionInfo &FI) const { unsigned AddressSpace = CGM.getCodeGenOpts().UseAllocaASForSrets ? CGM.getDataLayout().getAllocaAddrSpace() : CGM.getTypes().getTargetAddressSpace(Ret); - FI.getReturnInfo() = ABIArgInfo::getIndirect( - Align, /*AddrSpace=*/AddressSpace, - /*ByVal=*/false); + FI.getReturnInfo() = + ABIArgInfo::getIndirect(Align, /*AddrSpace=*/AddressSpace, + /*ByVal=*/false); // MSVC always passes `this` before the `sret` parameter. FI.getReturnInfo().setSRetAfterThis(FI.isInstanceMethod()); From 3c3ad2703c6d5b84c115c5260ae8b3c0bd2b05db Mon Sep 17 00:00:00 2001 From: Premanand M Rao Date: Sun, 20 Apr 2025 19:15:38 -0700 Subject: [PATCH 05/16] Revert "Merge commit '3a353b1faa83' into llvmspirv_pulldown" This reverts commit afe4c53da40461d4f909845d34120a544389a752, reversing changes made to bd2d590c7320ceee5af2572a22cc4f8e1e09397b. --- clang/test/CodeGenSYCL/sub-group-size.cpp | 4 +-- llvm/lib/SYCLLowerIR/CleanupSYCLMetadata.cpp | 11 ++----- .../sycl-function-metadata.ll | 30 ------------------- sycl/test/check_device_code/atomic_ref.cpp | 2 +- .../bf16_vector_conversion.cpp | 24 +++++++-------- .../check_device_code/device_has_func.cpp | 17 +++++++++++ .../check_device_code/device_has_kernel.cpp | 17 +++++++++++ .../check_device_code/vector/as_preview.cpp | 2 +- 8 files changed, 53 insertions(+), 54 deletions(-) delete mode 100644 llvm/test/SYCLLowerIR/CleanupSYCLCompilerInternalMetadata/sycl-function-metadata.ll diff --git a/clang/test/CodeGenSYCL/sub-group-size.cpp b/clang/test/CodeGenSYCL/sub-group-size.cpp index 3f4cf91818f82..528c89b918c88 100644 --- a/clang/test/CodeGenSYCL/sub-group-size.cpp +++ b/clang/test/CodeGenSYCL/sub-group-size.cpp @@ -15,7 +15,7 @@ using namespace sycl; // ALL-DAG: define {{.*}}spir_func void @{{.*}}external_10{{.*}}() #{{[0-9]+}}{{.*}} !intel_reqd_sub_group_size ![[TEN:[0-9]+]] SYCL_EXTERNAL void external_default_behavior() {} -// NONE-DAG: define {{.*}}spir_func void @{{.*}}external_default_behavior{{.*}}() #{{[0-9]+}} !sycl_fixed_targets !{{[0-9]+}} { +// NONE-DAG: define {{.*}}spir_func void @{{.*}}external_default_behavior{{.*}}() #{{[0-9]+}} !srcloc !{{[0-9]+}} !sycl_fixed_targets !{{[0-9]+}} { // PRIM_DEF-DAG: define {{.*}}spir_func void @{{.*}}external_default_behavior{{.*}}() #{{[0-9]+}}{{.*}} !intel_reqd_sub_group_size ![[PRIMARY]] // TEN_DEF-DAG: define {{.*}}spir_func void @{{.*}}external_default_behavior{{.*}}() #{{[0-9]+}}{{.*}} !intel_reqd_sub_group_size ![[TEN]] @@ -23,7 +23,7 @@ void default_behavior() { kernel_single_task([]() { }); } -// NONE-DAG: define {{.*}}spir_kernel void @{{.*}}Kernel1() #{{[0-9]+}} !kernel_arg_buffer_location !{{[0-9]+}} !sycl_fixed_targets !{{[0-9]+}} { +// NONE-DAG: define {{.*}}spir_kernel void @{{.*}}Kernel1() #{{[0-9]+}} !srcloc !{{[0-9]+}} !kernel_arg_buffer_location !{{[0-9]+}} !sycl_fixed_targets !{{[0-9]+}} { // PRIM_DEF-DAG: define {{.*}}spir_kernel void @{{.*}}Kernel1() #{{[0-9]+}}{{.*}} !intel_reqd_sub_group_size ![[PRIMARY]] // TEN_DEF-DAG: define {{.*}}spir_kernel void @{{.*}}Kernel1() #{{[0-9]+}}{{.*}} !intel_reqd_sub_group_size ![[TEN]] diff --git a/llvm/lib/SYCLLowerIR/CleanupSYCLMetadata.cpp b/llvm/lib/SYCLLowerIR/CleanupSYCLMetadata.cpp index c70d4eeb1b7fb..74c894cb6e63e 100644 --- a/llvm/lib/SYCLLowerIR/CleanupSYCLMetadata.cpp +++ b/llvm/lib/SYCLLowerIR/CleanupSYCLMetadata.cpp @@ -24,7 +24,8 @@ using namespace llvm; namespace { -void cleanupSYCLCompilerModuleMetadata(const Module &M, llvm::StringRef MD) { + +void cleanupSYCLCompilerMetadata(const Module &M, llvm::StringRef MD) { NamedMDNode *Node = M.getNamedMetadata(MD); if (!Node) return; @@ -64,13 +65,7 @@ PreservedAnalyses CleanupSYCLMetadataPass::run(Module &M, SmallVector ModuleMDToRemove = {"sycl_aspects", "sycl_types_that_use_aspects"}; for (const auto &MD : ModuleMDToRemove) - cleanupSYCLCompilerModuleMetadata(M, MD); - - // Cleanup no longer needed function metadata. - for (auto &F : M) { - if (F.getMetadata("srcloc")) - F.setMetadata("srcloc", nullptr); - } + cleanupSYCLCompilerMetadata(M, MD); return PreservedAnalyses::all(); } diff --git a/llvm/test/SYCLLowerIR/CleanupSYCLCompilerInternalMetadata/sycl-function-metadata.ll b/llvm/test/SYCLLowerIR/CleanupSYCLCompilerInternalMetadata/sycl-function-metadata.ll deleted file mode 100644 index db8986976e94e..0000000000000 --- a/llvm/test/SYCLLowerIR/CleanupSYCLCompilerInternalMetadata/sycl-function-metadata.ll +++ /dev/null @@ -1,30 +0,0 @@ -; RUN: opt -passes=cleanup-sycl-metadata -S < %s | FileCheck %s -; -; Test checks that the pass is able to cleanup srcloc metadata -; function metadata - -; CHECK-NOT: srcloc - -; ModuleID = 'test.cpp' -source_filename = "test.cpp" -target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64-G1" -target triple = "spir64-unknown-unknown" - -$_ZNK13KernelFunctorclEv = comdat any - -define dso_local spir_func void @_Z6func10v() !sycl_declared_aspects !1 !srcloc !2 { -entry: - ret void -} - - -define linkonce_odr spir_func void @_ZNK13KernelFunctorclEv() !sycl_declared_aspects !3 !srcloc !4 { -entry: - call spir_func void @_Z6func10v() - ret void -} - -!1 = !{i32 5} -!2 = !{i32 2457} -!3 = !{i32 1} -!4 = !{i32 2547} diff --git a/sycl/test/check_device_code/atomic_ref.cpp b/sycl/test/check_device_code/atomic_ref.cpp index 899d547497e5e..648ae01ddb192 100644 --- a/sycl/test/check_device_code/atomic_ref.cpp +++ b/sycl/test/check_device_code/atomic_ref.cpp @@ -4,7 +4,7 @@ #include // CHECK-LABEL: define dso_local spir_func noundef i32 @_Z17atomic_ref_globalRi( -// CHECK-SAME: ptr addrspace(4) noundef align 4 dereferenceable(4) [[I:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] !sycl_fixed_targets [[META6:![0-9]+]] { +// CHECK-SAME: ptr addrspace(4) noundef align 4 dereferenceable(4) [[I:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] !srcloc [[META6:![0-9]+]] !sycl_fixed_targets [[META7:![0-9]+]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP:%.*]] = addrspacecast ptr addrspace(4) [[I]] to ptr addrspace(1) // CHECK-NEXT: [[CALL3_I_I:%.*]] = tail call spir_func noundef i32 @_Z18__spirv_AtomicLoadPU3AS1KiN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE(ptr addrspace(1) noundef [[TMP]], i32 noundef 1, i32 noundef 898) #[[ATTR4:[0-9]+]] diff --git a/sycl/test/check_device_code/bf16_vector_conversion.cpp b/sycl/test/check_device_code/bf16_vector_conversion.cpp index c322ba8fea1e5..e423150d7bf88 100644 --- a/sycl/test/check_device_code/bf16_vector_conversion.cpp +++ b/sycl/test/check_device_code/bf16_vector_conversion.cpp @@ -10,7 +10,7 @@ using namespace sycl; using bfloat16 = sycl::ext::oneapi::bfloat16; // CHECK-LABEL: define dso_local spir_func void @_Z10TestBFtoF1PN4sycl3_V13ext6oneapi8bfloat16EPf( -// CHECK-SAME: ptr addrspace(4) noundef [[A:%.*]], ptr addrspace(4) noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] !sycl_fixed_targets [[META6:![0-9]+]] { +// CHECK-SAME: ptr addrspace(4) noundef [[A:%.*]], ptr addrspace(4) noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] !srcloc [[META5:![0-9]+]] !sycl_fixed_targets [[META6:![0-9]+]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call spir_func void @__devicelib_ConvertBF16ToFINTELVec1(ptr addrspace(4) noundef [[A]], ptr addrspace(4) noundef [[B]]) #[[ATTR2:[0-9]+]] // CHECK-NEXT: ret void @@ -20,7 +20,7 @@ SYCL_EXTERNAL auto TestBFtoF1(bfloat16 *a, float *b) { } // CHECK-LABEL: define dso_local spir_func void @_Z10TestFtoBF1PfPN4sycl3_V13ext6oneapi8bfloat16Ei( -// CHECK-SAME: ptr addrspace(4) noundef [[A:%.*]], ptr addrspace(4) noundef [[B:%.*]], i32 noundef [[SIZE:%.*]]) local_unnamed_addr #[[ATTR0]] !sycl_fixed_targets [[META6]] { +// CHECK-SAME: ptr addrspace(4) noundef [[A:%.*]], ptr addrspace(4) noundef [[B:%.*]], i32 noundef [[SIZE:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META7:![0-9]+]] !sycl_fixed_targets [[META6]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call spir_func void @__devicelib_ConvertFToBF16INTELVec1(ptr addrspace(4) noundef [[A]], ptr addrspace(4) noundef [[B]]) #[[ATTR2]] // CHECK-NEXT: ret void @@ -30,7 +30,7 @@ SYCL_EXTERNAL auto TestFtoBF1(float *a, bfloat16 *b, int size) { } // CHECK-LABEL: define dso_local spir_func void @_Z10TestBFtoF2PN4sycl3_V13ext6oneapi8bfloat16EPf( -// CHECK-SAME: ptr addrspace(4) noundef [[A:%.*]], ptr addrspace(4) noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !sycl_fixed_targets [[META6]] { +// CHECK-SAME: ptr addrspace(4) noundef [[A:%.*]], ptr addrspace(4) noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META8:![0-9]+]] !sycl_fixed_targets [[META6]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call spir_func void @__devicelib_ConvertBF16ToFINTELVec2(ptr addrspace(4) noundef [[A]], ptr addrspace(4) noundef [[B]]) #[[ATTR2]] // CHECK-NEXT: ret void @@ -40,7 +40,7 @@ SYCL_EXTERNAL auto TestBFtoF2(bfloat16 *a, float *b) { } // CHECK-LABEL: define dso_local spir_func void @_Z10TestFtoBF2PfPN4sycl3_V13ext6oneapi8bfloat16Ei( -// CHECK-SAME: ptr addrspace(4) noundef [[A:%.*]], ptr addrspace(4) noundef [[B:%.*]], i32 noundef [[SIZE:%.*]]) local_unnamed_addr #[[ATTR0]] !sycl_fixed_targets [[META6]] { +// CHECK-SAME: ptr addrspace(4) noundef [[A:%.*]], ptr addrspace(4) noundef [[B:%.*]], i32 noundef [[SIZE:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META9:![0-9]+]] !sycl_fixed_targets [[META6]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call spir_func void @__devicelib_ConvertFToBF16INTELVec2(ptr addrspace(4) noundef [[A]], ptr addrspace(4) noundef [[B]]) #[[ATTR2]] // CHECK-NEXT: ret void @@ -50,7 +50,7 @@ SYCL_EXTERNAL auto TestFtoBF2(float *a, bfloat16 *b, int size) { } // CHECK-LABEL: define dso_local spir_func void @_Z10TestBFtoF3PN4sycl3_V13ext6oneapi8bfloat16EPf( -// CHECK-SAME: ptr addrspace(4) noundef [[A:%.*]], ptr addrspace(4) noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !sycl_fixed_targets [[META6]] { +// CHECK-SAME: ptr addrspace(4) noundef [[A:%.*]], ptr addrspace(4) noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META10:![0-9]+]] !sycl_fixed_targets [[META6]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call spir_func void @__devicelib_ConvertBF16ToFINTELVec3(ptr addrspace(4) noundef [[A]], ptr addrspace(4) noundef [[B]]) #[[ATTR2]] // CHECK-NEXT: ret void @@ -60,7 +60,7 @@ SYCL_EXTERNAL auto TestBFtoF3(bfloat16 *a, float *b) { } // CHECK-LABEL: define dso_local spir_func void @_Z10TestFtoBF3PfPN4sycl3_V13ext6oneapi8bfloat16Ei( -// CHECK-SAME: ptr addrspace(4) noundef [[A:%.*]], ptr addrspace(4) noundef [[B:%.*]], i32 noundef [[SIZE:%.*]]) local_unnamed_addr #[[ATTR0]] !sycl_fixed_targets [[META6]] { +// CHECK-SAME: ptr addrspace(4) noundef [[A:%.*]], ptr addrspace(4) noundef [[B:%.*]], i32 noundef [[SIZE:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META11:![0-9]+]] !sycl_fixed_targets [[META6]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call spir_func void @__devicelib_ConvertFToBF16INTELVec3(ptr addrspace(4) noundef [[A]], ptr addrspace(4) noundef [[B]]) #[[ATTR2]] // CHECK-NEXT: ret void @@ -70,7 +70,7 @@ SYCL_EXTERNAL auto TestFtoBF3(float *a, bfloat16 *b, int size) { } // CHECK-LABEL: define dso_local spir_func void @_Z10TestBFtoF4PN4sycl3_V13ext6oneapi8bfloat16EPf( -// CHECK-SAME: ptr addrspace(4) noundef [[A:%.*]], ptr addrspace(4) noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !sycl_fixed_targets [[META6]] { +// CHECK-SAME: ptr addrspace(4) noundef [[A:%.*]], ptr addrspace(4) noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META12:![0-9]+]] !sycl_fixed_targets [[META6]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call spir_func void @__devicelib_ConvertBF16ToFINTELVec4(ptr addrspace(4) noundef [[A]], ptr addrspace(4) noundef [[B]]) #[[ATTR2]] // CHECK-NEXT: ret void @@ -80,7 +80,7 @@ SYCL_EXTERNAL auto TestBFtoF4(bfloat16 *a, float *b) { } // CHECK-LABEL: define dso_local spir_func void @_Z10TestFtoBF4PfPN4sycl3_V13ext6oneapi8bfloat16Ei( -// CHECK-SAME: ptr addrspace(4) noundef [[A:%.*]], ptr addrspace(4) noundef [[B:%.*]], i32 noundef [[SIZE:%.*]]) local_unnamed_addr #[[ATTR0]] !sycl_fixed_targets [[META6]] { +// CHECK-SAME: ptr addrspace(4) noundef [[A:%.*]], ptr addrspace(4) noundef [[B:%.*]], i32 noundef [[SIZE:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META13:![0-9]+]] !sycl_fixed_targets [[META6]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call spir_func void @__devicelib_ConvertFToBF16INTELVec4(ptr addrspace(4) noundef [[A]], ptr addrspace(4) noundef [[B]]) #[[ATTR2]] // CHECK-NEXT: ret void @@ -90,7 +90,7 @@ SYCL_EXTERNAL auto TestFtoBF4(float *a, bfloat16 *b, int size) { } // CHECK-LABEL: define dso_local spir_func void @_Z10TestBFtoF8PN4sycl3_V13ext6oneapi8bfloat16EPf( -// CHECK-SAME: ptr addrspace(4) noundef [[A:%.*]], ptr addrspace(4) noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !sycl_fixed_targets [[META6]] { +// CHECK-SAME: ptr addrspace(4) noundef [[A:%.*]], ptr addrspace(4) noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META14:![0-9]+]] !sycl_fixed_targets [[META6]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call spir_func void @__devicelib_ConvertBF16ToFINTELVec8(ptr addrspace(4) noundef [[A]], ptr addrspace(4) noundef [[B]]) #[[ATTR2]] // CHECK-NEXT: ret void @@ -100,7 +100,7 @@ SYCL_EXTERNAL auto TestBFtoF8(bfloat16 *a, float *b) { } // CHECK-LABEL: define dso_local spir_func void @_Z10TestFtoBF8PfPN4sycl3_V13ext6oneapi8bfloat16Ei( -// CHECK-SAME: ptr addrspace(4) noundef [[A:%.*]], ptr addrspace(4) noundef [[B:%.*]], i32 noundef [[SIZE:%.*]]) local_unnamed_addr #[[ATTR0]] !sycl_fixed_targets [[META6]] { +// CHECK-SAME: ptr addrspace(4) noundef [[A:%.*]], ptr addrspace(4) noundef [[B:%.*]], i32 noundef [[SIZE:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META15:![0-9]+]] !sycl_fixed_targets [[META6]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call spir_func void @__devicelib_ConvertFToBF16INTELVec8(ptr addrspace(4) noundef [[A]], ptr addrspace(4) noundef [[B]]) #[[ATTR2]] // CHECK-NEXT: ret void @@ -110,7 +110,7 @@ SYCL_EXTERNAL auto TestFtoBF8(float *a, bfloat16 *b, int size) { } // CHECK-LABEL: define dso_local spir_func void @_Z11TestBFtoF16PN4sycl3_V13ext6oneapi8bfloat16EPf( -// CHECK-SAME: ptr addrspace(4) noundef [[A:%.*]], ptr addrspace(4) noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !sycl_fixed_targets [[META6]] { +// CHECK-SAME: ptr addrspace(4) noundef [[A:%.*]], ptr addrspace(4) noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META16:![0-9]+]] !sycl_fixed_targets [[META6]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call spir_func void @__devicelib_ConvertBF16ToFINTELVec16(ptr addrspace(4) noundef [[A]], ptr addrspace(4) noundef [[B]]) #[[ATTR2]] // CHECK-NEXT: ret void @@ -120,7 +120,7 @@ SYCL_EXTERNAL auto TestBFtoF16(bfloat16 *a, float *b) { } // CHECK-LABEL: define dso_local spir_func void @_Z11TestFtoBF16PfPN4sycl3_V13ext6oneapi8bfloat16Ei( -// CHECK-SAME: ptr addrspace(4) noundef [[A:%.*]], ptr addrspace(4) noundef [[B:%.*]], i32 noundef [[SIZE:%.*]]) local_unnamed_addr #[[ATTR0]] !sycl_fixed_targets [[META6]] { +// CHECK-SAME: ptr addrspace(4) noundef [[A:%.*]], ptr addrspace(4) noundef [[B:%.*]], i32 noundef [[SIZE:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META17:![0-9]+]] !sycl_fixed_targets [[META6]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call spir_func void @__devicelib_ConvertFToBF16INTELVec16(ptr addrspace(4) noundef [[A]], ptr addrspace(4) noundef [[B]]) #[[ATTR2]] // CHECK-NEXT: ret void diff --git a/sycl/test/check_device_code/device_has_func.cpp b/sycl/test/check_device_code/device_has_func.cpp index 702f6bde5bfca..00f7ad6d042bb 100644 --- a/sycl/test/check_device_code/device_has_func.cpp +++ b/sycl/test/check_device_code/device_has_func.cpp @@ -1,5 +1,6 @@ // RUN: %clangxx -fsycl -Xclang -fsycl-is-device -fsycl-device-only -Xclang -fno-sycl-early-optimizations -S -emit-llvm %s -o %t.ll // RUN: FileCheck %s --input-file %t.ll --check-prefix=CHECK-ASPECTS +// RUN: FileCheck %s --input-file %t.ll --check-prefix=CHECK-SRCLOC // Tests for IR of device_has(aspect, ...) attribute and // !sycl_used_aspects metadata. @@ -10,30 +11,37 @@ using namespace sycl; // CHECK-ASPECTS: define dso_local spir_func void @{{.*}}kernel_name_1{{.*}} !sycl_declared_aspects ![[ASPECTS1:[0-9]+]] {{.*}} +// CHECK-SRCLOC: define dso_local spir_func void @{{.*}}kernel_name_1{{.*}} !srcloc ![[SRCLOC1:[0-9]+]] {{.*}} // CHECK-ASPECTS: define {{.*}}spir_func void @{{.*}}func1{{.*}} !sycl_declared_aspects ![[ASPECTS1]] // CHECK-ASPECTS-SAME: !sycl_used_aspects ![[ASPECTS1]] +// CHECK-SRCLOC: define {{.*}}spir_func void @{{.*}}func1{{.*}} !srcloc ![[SRCLOC2:[0-9]+]] [[sycl::device_has(sycl::aspect::cpu)]] void func1() {} // CHECK-ASPECTS: define {{.*}}spir_func void @{{.*}}func2{{.*}} !sycl_declared_aspects ![[ASPECTS2:[0-9]+]] // CHECK-ASPECTS-SAME: !sycl_used_aspects ![[ASPECTS2]] +// CHECK-SRCLOC: define {{.*}}spir_func void @{{.*}}func2{{.*}} !srcloc ![[SRCLOC3:[0-9]+]] [[sycl::device_has(sycl::aspect::fp16, sycl::aspect::gpu)]] void func2() {} // CHECK-ASPECTS: define {{.*}}spir_func void @{{.*}}func3{{.*}} !sycl_declared_aspects ![[EMPTYASPECTS:[0-9]+]] +// CHECK-SRCLOC: define {{.*}}spir_func void @{{.*}}func3{{.*}} !srcloc ![[SRCLOC4:[0-9]+]] [[sycl::device_has()]] void func3() {} // CHECK-ASPECTS: define {{.*}}spir_func void @{{.*}}func4{{.*}} !sycl_declared_aspects ![[ASPECTS3:[0-9]+]] // CHECK-ASPECTS-SAME: !sycl_used_aspects ![[ASPECTS3]] +// CHECK-SRCLOC: define {{.*}}spir_func void @{{.*}}func4{{.*}} !srcloc ![[SRCLOC5:[0-9]+]] template [[sycl::device_has(Aspect)]] void func4() {} // CHECK-ASPECTS: define {{.*}}spir_func void @{{.*}}func5{{.*}} !sycl_declared_aspects ![[ASPECTS1]] // CHECK-ASPECTS-SAME: !sycl_used_aspects ![[ASPECTS1]] +// CHECK-SRCLOC: define {{.*}}spir_func void @{{.*}}func5{{.*}} !srcloc ![[SRCLOC6:[0-9]+]] [[sycl::device_has(sycl::aspect::cpu)]] void func5(); void func5() {} constexpr sycl::aspect getAspect() { return sycl::aspect::cpu; } // CHECK-ASPECTS: define {{.*}}spir_func void @{{.*}}func6{{.*}} !sycl_declared_aspects ![[ASPECTS1]] // CHECK-ASPECTS-SAME: !sycl_used_aspects ![[ASPECTS1]] +// CHECK-SRCLOC: define {{.*}}spir_func void @{{.*}}func6{{.*}} !srcloc ![[SRCLOC7:[0-9]+]] [[sycl::device_has(getAspect())]] void func6() {} SYCL_EXTERNAL [[sycl::device_has(sycl::aspect::cpu)]] void kernel_name_1() { @@ -46,14 +54,23 @@ SYCL_EXTERNAL [[sycl::device_has(sycl::aspect::cpu)]] void kernel_name_1() { } // CHECK-ASPECTS: define dso_local spir_func void @{{.*}}kernel_name_2{{.*}} !sycl_declared_aspects ![[ASPECTS4:[0-9]+]] +// CHECK-SRCLOC: define dso_local spir_func void @{{.*}}kernel_name_2{{.*}} !srcloc ![[SRCLOC8:[0-9]+]] {{.*}} SYCL_EXTERNAL [[sycl::device_has(sycl::aspect::gpu)]] void kernel_name_2() {} // CHECK-ASPECTS-DAG: [[ASPECTS1]] = !{![[ASPECTCPU:[0-9]+]]} // CHECK-ASPECTS-DAG: [[ASPECTCPU]] = !{!"cpu", i32 1} +// CHECK-SRCLOC-DAG: [[SRCLOC1]] = !{i32 {{[0-9]+}}} // CHECK-ASPECTS-DAG: [[EMPTYASPECTS]] = !{} +// CHECK-SRCLOC-DAG: [[SRCLOC2]] = !{i32 {{[0-9]+}}} // CHECK-ASPECTS-DAG: [[ASPECTS2]] = !{![[ASPECTFP16:[0-9]+]], ![[ASPECTGPU:[0-9]+]]} // CHECK-ASPECTS-DAG: [[ASPECTFP16]] = !{!"fp16", i32 5} // CHECK-ASPECTS-DAG: [[ASPECTGPU]] = !{!"gpu", i32 2} +// CHECK-SRCLOC-DAG: [[SRCLOC3]] = !{i32 {{[0-9]+}}} +// CHECK-SRCLOC-DAG: [[SRCLOC4]] = !{i32 {{[0-9]+}}} // CHECK-ASPECTS-DAG: [[ASPECTS3]] = !{![[ASPECTHOST:[0-9]+]]} // CHECK-ASPECTS-DAG: [[ASPECTHOST]] = !{!"host", i32 0} +// CHECK-SRCLOC-DAG: [[SRCLOC5]] = !{i32 {{[0-9]+}}} +// CHECK-SRCLOC-DAG: [[SRCLOC6]] = !{i32 {{[0-9]+}}} +// CHECK-SRCLOC-DAG: [[SRCLOC7]] = !{i32 {{[0-9]+}}} // CHECK-ASPECTS-DAG: [[ASPECTS4]] = !{![[ASPECTGPU]]} +// CHECK-SRCLOC-DAG: [[SRCLOC8]] = !{i32 {{[0-9]+}}} diff --git a/sycl/test/check_device_code/device_has_kernel.cpp b/sycl/test/check_device_code/device_has_kernel.cpp index 191dfe49d2030..92dbcb795a4b4 100644 --- a/sycl/test/check_device_code/device_has_kernel.cpp +++ b/sycl/test/check_device_code/device_has_kernel.cpp @@ -1,5 +1,6 @@ // RUN: %clangxx -fsycl -Xclang -fsycl-is-device -fsycl-device-only -Xclang -fno-sycl-early-optimizations -S -emit-llvm %s -o %t.ll // RUN: FileCheck %s --input-file %t.ll --check-prefix=CHECK-ASPECTS +// RUN: FileCheck %s --input-file %t.ll --check-prefix=CHECK-SRCLOC // Tests for IR of device_has(aspect, ...) attribute and // !sycl_used_aspects metadata. @@ -11,30 +12,37 @@ using namespace sycl; queue q; // CHECK-ASPECTS: define weak_odr dso_local spir_kernel void @{{.*}}kernel_name_1{{.*}} !sycl_declared_aspects ![[ASPECTS1:[0-9]+]] {{.*}} +// CHECK-SRCLOC: define weak_odr dso_local spir_kernel void @{{.*}}kernel_name_1{{.*}} !srcloc ![[SRCLOC1:[0-9]+]] {{.*}} // CHECK-ASPECTS: define {{.*}}spir_func void @{{.*}}func1{{.*}} !sycl_declared_aspects ![[ASPECTS1]] // CHECK-ASPECTS-SAME: !sycl_used_aspects ![[ASPECTS1]] +// CHECK-SRCLOC: define {{.*}}spir_func void @{{.*}}func1{{.*}} !srcloc ![[SRCLOC2:[0-9]+]] [[sycl::device_has(sycl::aspect::cpu)]] void func1() {} // CHECK-ASPECTS: define {{.*}}spir_func void @{{.*}}func2{{.*}} !sycl_declared_aspects ![[ASPECTS2:[0-9]+]] // CHECK-ASPECTS-SAME: !sycl_used_aspects ![[ASPECTS2]] +// CHECK-SRCLOC: define {{.*}}spir_func void @{{.*}}func2{{.*}} !srcloc ![[SRCLOC3:[0-9]+]] [[sycl::device_has(sycl::aspect::fp16, sycl::aspect::gpu)]] void func2() {} // CHECK-ASPECTS: define {{.*}}spir_func void @{{.*}}func3{{.*}} !sycl_declared_aspects ![[EMPTYASPECTS:[0-9]+]] +// CHECK-SRCLOC: define {{.*}}spir_func void @{{.*}}func3{{.*}} !srcloc ![[SRCLOC4:[0-9]+]] [[sycl::device_has()]] void func3() {} // CHECK-ASPECTS: define {{.*}}spir_func void @{{.*}}func4{{.*}} !sycl_declared_aspects ![[ASPECTS3:[0-9]+]] // CHECK-ASPECTS-SAME: !sycl_used_aspects ![[ASPECTS3]] +// CHECK-SRCLOC: define {{.*}}spir_func void @{{.*}}func4{{.*}} !srcloc ![[SRCLOC5:[0-9]+]] template [[sycl::device_has(Aspect)]] void func4() {} // CHECK-ASPECTS: define {{.*}}spir_func void @{{.*}}func5{{.*}} !sycl_declared_aspects ![[ASPECTS1]] // CHECK-ASPECTS-SAME: !sycl_used_aspects ![[ASPECTS1]] +// CHECK-SRCLOC: define {{.*}}spir_func void @{{.*}}func5{{.*}} !srcloc ![[SRCLOC6:[0-9]+]] [[sycl::device_has(sycl::aspect::cpu)]] void func5(); void func5() {} constexpr sycl::aspect getAspect() { return sycl::aspect::cpu; } // CHECK-ASPECTS: define {{.*}}spir_func void @{{.*}}func6{{.*}} !sycl_declared_aspects ![[ASPECTS1]] // CHECK-ASPECTS-SAME: !sycl_used_aspects ![[ASPECTS1]] +// CHECK-SRCLOC: define {{.*}}spir_func void @{{.*}}func6{{.*}} !srcloc ![[SRCLOC7:[0-9]+]] [[sycl::device_has(getAspect())]] void func6() {} class KernelFunctor { @@ -54,6 +62,7 @@ void foo() { KernelFunctor f1; h.single_task(f1); // CHECK-ASPECTS: define weak_odr dso_local spir_kernel void @{{.*}}kernel_name_2{{.*}} !sycl_declared_aspects ![[ASPECTS4:[0-9]+]] + // CHECK-SRCLOC: define weak_odr dso_local spir_kernel void @{{.*}}kernel_name_2{{.*}} !srcloc ![[SRCLOC8:[0-9]+]] {{.*}} h.single_task( []() [[sycl::device_has(sycl::aspect::gpu)]] {}); }); @@ -61,10 +70,18 @@ void foo() { // CHECK-ASPECTS-DAG: [[ASPECTS1]] = !{![[ASPECTCPU:[0-9]+]]} // CHECK-ASPECTS-DAG: [[ASPECTCPU]] = !{!"cpu", i32 1} +// CHECK-SRCLOC-DAG: [[SRCLOC1]] = !{i32 {{[0-9]+}}} // CHECK-ASPECTS-DAG: [[EMPTYASPECTS]] = !{} +// CHECK-SRCLOC-DAG: [[SRCLOC2]] = !{i32 {{[0-9]+}}} // CHECK-ASPECTS-DAG: [[ASPECTS2]] = !{![[ASPECTFP16:[0-9]+]], ![[ASPECTGPU:[0-9]+]]} // CHECK-ASPECTS-DAG: [[ASPECTFP16]] = !{!"fp16", i32 5} // CHECK-ASPECTS-DAG: [[ASPECTGPU]] = !{!"gpu", i32 2} +// CHECK-SRCLOC-DAG: [[SRCLOC3]] = !{i32 {{[0-9]+}}} +// CHECK-SRCLOC-DAG: [[SRCLOC4]] = !{i32 {{[0-9]+}}} // CHECK-ASPECTS-DAG: [[ASPECTS3]] = !{![[ASPECTHOST:[0-9]+]]} // CHECK-ASPECTS-DAG: [[ASPECTHOST]] = !{!"host", i32 0} +// CHECK-SRCLOC-DAG: [[SRCLOC5]] = !{i32 {{[0-9]+}}} +// CHECK-SRCLOC-DAG: [[SRCLOC6]] = !{i32 {{[0-9]+}}} +// CHECK-SRCLOC-DAG: [[SRCLOC7]] = !{i32 {{[0-9]+}}} // CHECK-ASPECTS-DAG: [[ASPECTS4]] = !{![[ASPECTGPU]]} +// CHECK-SRCLOC-DAG: [[SRCLOC8]] = !{i32 {{[0-9]+}}} diff --git a/sycl/test/check_device_code/vector/as_preview.cpp b/sycl/test/check_device_code/vector/as_preview.cpp index 16aadcb3a837d..63b64293ef873 100644 --- a/sycl/test/check_device_code/vector/as_preview.cpp +++ b/sycl/test/check_device_code/vector/as_preview.cpp @@ -13,7 +13,7 @@ template SYCL_EXTERNAL sycl::vec sycl::vec::as>() const; // CHECK-LABEL: define weak_odr dso_local spir_func void @_ZNK4sycl3_V13vecIfLi4EE2asINS1_IiLi4EEEEET_v( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable sret(%"class.sycl::_V1::vec") align 16 [[AGG_RESULT:%.*]], ptr addrspace(4) noundef align 16 dereferenceable_or_null(16) [[THIS:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] comdat align 2 !sycl_fixed_targets [[META5:![0-9]+]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable sret(%"class.sycl::_V1::vec") align 16 [[AGG_RESULT:%.*]], ptr addrspace(4) noundef align 16 dereferenceable_or_null(16) [[THIS:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] comdat align 2 !srcloc [[META5:![0-9]+]] !sycl_fixed_targets [[META6:![0-9]+]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.memcpy.p4.p4.i64(ptr addrspace(4) noundef align 16 dereferenceable(16) [[AGG_RESULT]], ptr addrspace(4) noundef align 16 dereferenceable(16) [[THIS]], i64 16, i1 false) // CHECK-NEXT: ret void From 4dee04deaa7f3ac0a3c3e6d1888adfa36ede65d7 Mon Sep 17 00:00:00 2001 From: Premanand M Rao Date: Sun, 20 Apr 2025 19:33:09 -0700 Subject: [PATCH 06/16] Revert "Revert "Merge commit '3a353b1faa83' into llvmspirv_pulldown"" This reverts commit 3c3ad2703c6d5b84c115c5260ae8b3c0bd2b05db. --- clang/test/CodeGenSYCL/sub-group-size.cpp | 4 +-- llvm/lib/SYCLLowerIR/CleanupSYCLMetadata.cpp | 11 +++++-- .../sycl-function-metadata.ll | 30 +++++++++++++++++++ sycl/test/check_device_code/atomic_ref.cpp | 2 +- .../bf16_vector_conversion.cpp | 24 +++++++-------- .../check_device_code/device_has_func.cpp | 17 ----------- .../check_device_code/device_has_kernel.cpp | 17 ----------- .../check_device_code/vector/as_preview.cpp | 2 +- 8 files changed, 54 insertions(+), 53 deletions(-) create mode 100644 llvm/test/SYCLLowerIR/CleanupSYCLCompilerInternalMetadata/sycl-function-metadata.ll diff --git a/clang/test/CodeGenSYCL/sub-group-size.cpp b/clang/test/CodeGenSYCL/sub-group-size.cpp index 528c89b918c88..3f4cf91818f82 100644 --- a/clang/test/CodeGenSYCL/sub-group-size.cpp +++ b/clang/test/CodeGenSYCL/sub-group-size.cpp @@ -15,7 +15,7 @@ using namespace sycl; // ALL-DAG: define {{.*}}spir_func void @{{.*}}external_10{{.*}}() #{{[0-9]+}}{{.*}} !intel_reqd_sub_group_size ![[TEN:[0-9]+]] SYCL_EXTERNAL void external_default_behavior() {} -// NONE-DAG: define {{.*}}spir_func void @{{.*}}external_default_behavior{{.*}}() #{{[0-9]+}} !srcloc !{{[0-9]+}} !sycl_fixed_targets !{{[0-9]+}} { +// NONE-DAG: define {{.*}}spir_func void @{{.*}}external_default_behavior{{.*}}() #{{[0-9]+}} !sycl_fixed_targets !{{[0-9]+}} { // PRIM_DEF-DAG: define {{.*}}spir_func void @{{.*}}external_default_behavior{{.*}}() #{{[0-9]+}}{{.*}} !intel_reqd_sub_group_size ![[PRIMARY]] // TEN_DEF-DAG: define {{.*}}spir_func void @{{.*}}external_default_behavior{{.*}}() #{{[0-9]+}}{{.*}} !intel_reqd_sub_group_size ![[TEN]] @@ -23,7 +23,7 @@ void default_behavior() { kernel_single_task([]() { }); } -// NONE-DAG: define {{.*}}spir_kernel void @{{.*}}Kernel1() #{{[0-9]+}} !srcloc !{{[0-9]+}} !kernel_arg_buffer_location !{{[0-9]+}} !sycl_fixed_targets !{{[0-9]+}} { +// NONE-DAG: define {{.*}}spir_kernel void @{{.*}}Kernel1() #{{[0-9]+}} !kernel_arg_buffer_location !{{[0-9]+}} !sycl_fixed_targets !{{[0-9]+}} { // PRIM_DEF-DAG: define {{.*}}spir_kernel void @{{.*}}Kernel1() #{{[0-9]+}}{{.*}} !intel_reqd_sub_group_size ![[PRIMARY]] // TEN_DEF-DAG: define {{.*}}spir_kernel void @{{.*}}Kernel1() #{{[0-9]+}}{{.*}} !intel_reqd_sub_group_size ![[TEN]] diff --git a/llvm/lib/SYCLLowerIR/CleanupSYCLMetadata.cpp b/llvm/lib/SYCLLowerIR/CleanupSYCLMetadata.cpp index 74c894cb6e63e..c70d4eeb1b7fb 100644 --- a/llvm/lib/SYCLLowerIR/CleanupSYCLMetadata.cpp +++ b/llvm/lib/SYCLLowerIR/CleanupSYCLMetadata.cpp @@ -24,8 +24,7 @@ using namespace llvm; namespace { - -void cleanupSYCLCompilerMetadata(const Module &M, llvm::StringRef MD) { +void cleanupSYCLCompilerModuleMetadata(const Module &M, llvm::StringRef MD) { NamedMDNode *Node = M.getNamedMetadata(MD); if (!Node) return; @@ -65,7 +64,13 @@ PreservedAnalyses CleanupSYCLMetadataPass::run(Module &M, SmallVector ModuleMDToRemove = {"sycl_aspects", "sycl_types_that_use_aspects"}; for (const auto &MD : ModuleMDToRemove) - cleanupSYCLCompilerMetadata(M, MD); + cleanupSYCLCompilerModuleMetadata(M, MD); + + // Cleanup no longer needed function metadata. + for (auto &F : M) { + if (F.getMetadata("srcloc")) + F.setMetadata("srcloc", nullptr); + } return PreservedAnalyses::all(); } diff --git a/llvm/test/SYCLLowerIR/CleanupSYCLCompilerInternalMetadata/sycl-function-metadata.ll b/llvm/test/SYCLLowerIR/CleanupSYCLCompilerInternalMetadata/sycl-function-metadata.ll new file mode 100644 index 0000000000000..db8986976e94e --- /dev/null +++ b/llvm/test/SYCLLowerIR/CleanupSYCLCompilerInternalMetadata/sycl-function-metadata.ll @@ -0,0 +1,30 @@ +; RUN: opt -passes=cleanup-sycl-metadata -S < %s | FileCheck %s +; +; Test checks that the pass is able to cleanup srcloc metadata +; function metadata + +; CHECK-NOT: srcloc + +; ModuleID = 'test.cpp' +source_filename = "test.cpp" +target datalayout = "e-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-n8:16:32:64-G1" +target triple = "spir64-unknown-unknown" + +$_ZNK13KernelFunctorclEv = comdat any + +define dso_local spir_func void @_Z6func10v() !sycl_declared_aspects !1 !srcloc !2 { +entry: + ret void +} + + +define linkonce_odr spir_func void @_ZNK13KernelFunctorclEv() !sycl_declared_aspects !3 !srcloc !4 { +entry: + call spir_func void @_Z6func10v() + ret void +} + +!1 = !{i32 5} +!2 = !{i32 2457} +!3 = !{i32 1} +!4 = !{i32 2547} diff --git a/sycl/test/check_device_code/atomic_ref.cpp b/sycl/test/check_device_code/atomic_ref.cpp index 648ae01ddb192..899d547497e5e 100644 --- a/sycl/test/check_device_code/atomic_ref.cpp +++ b/sycl/test/check_device_code/atomic_ref.cpp @@ -4,7 +4,7 @@ #include // CHECK-LABEL: define dso_local spir_func noundef i32 @_Z17atomic_ref_globalRi( -// CHECK-SAME: ptr addrspace(4) noundef align 4 dereferenceable(4) [[I:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] !srcloc [[META6:![0-9]+]] !sycl_fixed_targets [[META7:![0-9]+]] { +// CHECK-SAME: ptr addrspace(4) noundef align 4 dereferenceable(4) [[I:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] !sycl_fixed_targets [[META6:![0-9]+]] { // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP:%.*]] = addrspacecast ptr addrspace(4) [[I]] to ptr addrspace(1) // CHECK-NEXT: [[CALL3_I_I:%.*]] = tail call spir_func noundef i32 @_Z18__spirv_AtomicLoadPU3AS1KiN5__spv5Scope4FlagENS1_19MemorySemanticsMask4FlagE(ptr addrspace(1) noundef [[TMP]], i32 noundef 1, i32 noundef 898) #[[ATTR4:[0-9]+]] diff --git a/sycl/test/check_device_code/bf16_vector_conversion.cpp b/sycl/test/check_device_code/bf16_vector_conversion.cpp index e423150d7bf88..c322ba8fea1e5 100644 --- a/sycl/test/check_device_code/bf16_vector_conversion.cpp +++ b/sycl/test/check_device_code/bf16_vector_conversion.cpp @@ -10,7 +10,7 @@ using namespace sycl; using bfloat16 = sycl::ext::oneapi::bfloat16; // CHECK-LABEL: define dso_local spir_func void @_Z10TestBFtoF1PN4sycl3_V13ext6oneapi8bfloat16EPf( -// CHECK-SAME: ptr addrspace(4) noundef [[A:%.*]], ptr addrspace(4) noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] !srcloc [[META5:![0-9]+]] !sycl_fixed_targets [[META6:![0-9]+]] { +// CHECK-SAME: ptr addrspace(4) noundef [[A:%.*]], ptr addrspace(4) noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] !sycl_fixed_targets [[META6:![0-9]+]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call spir_func void @__devicelib_ConvertBF16ToFINTELVec1(ptr addrspace(4) noundef [[A]], ptr addrspace(4) noundef [[B]]) #[[ATTR2:[0-9]+]] // CHECK-NEXT: ret void @@ -20,7 +20,7 @@ SYCL_EXTERNAL auto TestBFtoF1(bfloat16 *a, float *b) { } // CHECK-LABEL: define dso_local spir_func void @_Z10TestFtoBF1PfPN4sycl3_V13ext6oneapi8bfloat16Ei( -// CHECK-SAME: ptr addrspace(4) noundef [[A:%.*]], ptr addrspace(4) noundef [[B:%.*]], i32 noundef [[SIZE:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META7:![0-9]+]] !sycl_fixed_targets [[META6]] { +// CHECK-SAME: ptr addrspace(4) noundef [[A:%.*]], ptr addrspace(4) noundef [[B:%.*]], i32 noundef [[SIZE:%.*]]) local_unnamed_addr #[[ATTR0]] !sycl_fixed_targets [[META6]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call spir_func void @__devicelib_ConvertFToBF16INTELVec1(ptr addrspace(4) noundef [[A]], ptr addrspace(4) noundef [[B]]) #[[ATTR2]] // CHECK-NEXT: ret void @@ -30,7 +30,7 @@ SYCL_EXTERNAL auto TestFtoBF1(float *a, bfloat16 *b, int size) { } // CHECK-LABEL: define dso_local spir_func void @_Z10TestBFtoF2PN4sycl3_V13ext6oneapi8bfloat16EPf( -// CHECK-SAME: ptr addrspace(4) noundef [[A:%.*]], ptr addrspace(4) noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META8:![0-9]+]] !sycl_fixed_targets [[META6]] { +// CHECK-SAME: ptr addrspace(4) noundef [[A:%.*]], ptr addrspace(4) noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !sycl_fixed_targets [[META6]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call spir_func void @__devicelib_ConvertBF16ToFINTELVec2(ptr addrspace(4) noundef [[A]], ptr addrspace(4) noundef [[B]]) #[[ATTR2]] // CHECK-NEXT: ret void @@ -40,7 +40,7 @@ SYCL_EXTERNAL auto TestBFtoF2(bfloat16 *a, float *b) { } // CHECK-LABEL: define dso_local spir_func void @_Z10TestFtoBF2PfPN4sycl3_V13ext6oneapi8bfloat16Ei( -// CHECK-SAME: ptr addrspace(4) noundef [[A:%.*]], ptr addrspace(4) noundef [[B:%.*]], i32 noundef [[SIZE:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META9:![0-9]+]] !sycl_fixed_targets [[META6]] { +// CHECK-SAME: ptr addrspace(4) noundef [[A:%.*]], ptr addrspace(4) noundef [[B:%.*]], i32 noundef [[SIZE:%.*]]) local_unnamed_addr #[[ATTR0]] !sycl_fixed_targets [[META6]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call spir_func void @__devicelib_ConvertFToBF16INTELVec2(ptr addrspace(4) noundef [[A]], ptr addrspace(4) noundef [[B]]) #[[ATTR2]] // CHECK-NEXT: ret void @@ -50,7 +50,7 @@ SYCL_EXTERNAL auto TestFtoBF2(float *a, bfloat16 *b, int size) { } // CHECK-LABEL: define dso_local spir_func void @_Z10TestBFtoF3PN4sycl3_V13ext6oneapi8bfloat16EPf( -// CHECK-SAME: ptr addrspace(4) noundef [[A:%.*]], ptr addrspace(4) noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META10:![0-9]+]] !sycl_fixed_targets [[META6]] { +// CHECK-SAME: ptr addrspace(4) noundef [[A:%.*]], ptr addrspace(4) noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !sycl_fixed_targets [[META6]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call spir_func void @__devicelib_ConvertBF16ToFINTELVec3(ptr addrspace(4) noundef [[A]], ptr addrspace(4) noundef [[B]]) #[[ATTR2]] // CHECK-NEXT: ret void @@ -60,7 +60,7 @@ SYCL_EXTERNAL auto TestBFtoF3(bfloat16 *a, float *b) { } // CHECK-LABEL: define dso_local spir_func void @_Z10TestFtoBF3PfPN4sycl3_V13ext6oneapi8bfloat16Ei( -// CHECK-SAME: ptr addrspace(4) noundef [[A:%.*]], ptr addrspace(4) noundef [[B:%.*]], i32 noundef [[SIZE:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META11:![0-9]+]] !sycl_fixed_targets [[META6]] { +// CHECK-SAME: ptr addrspace(4) noundef [[A:%.*]], ptr addrspace(4) noundef [[B:%.*]], i32 noundef [[SIZE:%.*]]) local_unnamed_addr #[[ATTR0]] !sycl_fixed_targets [[META6]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call spir_func void @__devicelib_ConvertFToBF16INTELVec3(ptr addrspace(4) noundef [[A]], ptr addrspace(4) noundef [[B]]) #[[ATTR2]] // CHECK-NEXT: ret void @@ -70,7 +70,7 @@ SYCL_EXTERNAL auto TestFtoBF3(float *a, bfloat16 *b, int size) { } // CHECK-LABEL: define dso_local spir_func void @_Z10TestBFtoF4PN4sycl3_V13ext6oneapi8bfloat16EPf( -// CHECK-SAME: ptr addrspace(4) noundef [[A:%.*]], ptr addrspace(4) noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META12:![0-9]+]] !sycl_fixed_targets [[META6]] { +// CHECK-SAME: ptr addrspace(4) noundef [[A:%.*]], ptr addrspace(4) noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !sycl_fixed_targets [[META6]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call spir_func void @__devicelib_ConvertBF16ToFINTELVec4(ptr addrspace(4) noundef [[A]], ptr addrspace(4) noundef [[B]]) #[[ATTR2]] // CHECK-NEXT: ret void @@ -80,7 +80,7 @@ SYCL_EXTERNAL auto TestBFtoF4(bfloat16 *a, float *b) { } // CHECK-LABEL: define dso_local spir_func void @_Z10TestFtoBF4PfPN4sycl3_V13ext6oneapi8bfloat16Ei( -// CHECK-SAME: ptr addrspace(4) noundef [[A:%.*]], ptr addrspace(4) noundef [[B:%.*]], i32 noundef [[SIZE:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META13:![0-9]+]] !sycl_fixed_targets [[META6]] { +// CHECK-SAME: ptr addrspace(4) noundef [[A:%.*]], ptr addrspace(4) noundef [[B:%.*]], i32 noundef [[SIZE:%.*]]) local_unnamed_addr #[[ATTR0]] !sycl_fixed_targets [[META6]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call spir_func void @__devicelib_ConvertFToBF16INTELVec4(ptr addrspace(4) noundef [[A]], ptr addrspace(4) noundef [[B]]) #[[ATTR2]] // CHECK-NEXT: ret void @@ -90,7 +90,7 @@ SYCL_EXTERNAL auto TestFtoBF4(float *a, bfloat16 *b, int size) { } // CHECK-LABEL: define dso_local spir_func void @_Z10TestBFtoF8PN4sycl3_V13ext6oneapi8bfloat16EPf( -// CHECK-SAME: ptr addrspace(4) noundef [[A:%.*]], ptr addrspace(4) noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META14:![0-9]+]] !sycl_fixed_targets [[META6]] { +// CHECK-SAME: ptr addrspace(4) noundef [[A:%.*]], ptr addrspace(4) noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !sycl_fixed_targets [[META6]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call spir_func void @__devicelib_ConvertBF16ToFINTELVec8(ptr addrspace(4) noundef [[A]], ptr addrspace(4) noundef [[B]]) #[[ATTR2]] // CHECK-NEXT: ret void @@ -100,7 +100,7 @@ SYCL_EXTERNAL auto TestBFtoF8(bfloat16 *a, float *b) { } // CHECK-LABEL: define dso_local spir_func void @_Z10TestFtoBF8PfPN4sycl3_V13ext6oneapi8bfloat16Ei( -// CHECK-SAME: ptr addrspace(4) noundef [[A:%.*]], ptr addrspace(4) noundef [[B:%.*]], i32 noundef [[SIZE:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META15:![0-9]+]] !sycl_fixed_targets [[META6]] { +// CHECK-SAME: ptr addrspace(4) noundef [[A:%.*]], ptr addrspace(4) noundef [[B:%.*]], i32 noundef [[SIZE:%.*]]) local_unnamed_addr #[[ATTR0]] !sycl_fixed_targets [[META6]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call spir_func void @__devicelib_ConvertFToBF16INTELVec8(ptr addrspace(4) noundef [[A]], ptr addrspace(4) noundef [[B]]) #[[ATTR2]] // CHECK-NEXT: ret void @@ -110,7 +110,7 @@ SYCL_EXTERNAL auto TestFtoBF8(float *a, bfloat16 *b, int size) { } // CHECK-LABEL: define dso_local spir_func void @_Z11TestBFtoF16PN4sycl3_V13ext6oneapi8bfloat16EPf( -// CHECK-SAME: ptr addrspace(4) noundef [[A:%.*]], ptr addrspace(4) noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META16:![0-9]+]] !sycl_fixed_targets [[META6]] { +// CHECK-SAME: ptr addrspace(4) noundef [[A:%.*]], ptr addrspace(4) noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] !sycl_fixed_targets [[META6]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call spir_func void @__devicelib_ConvertBF16ToFINTELVec16(ptr addrspace(4) noundef [[A]], ptr addrspace(4) noundef [[B]]) #[[ATTR2]] // CHECK-NEXT: ret void @@ -120,7 +120,7 @@ SYCL_EXTERNAL auto TestBFtoF16(bfloat16 *a, float *b) { } // CHECK-LABEL: define dso_local spir_func void @_Z11TestFtoBF16PfPN4sycl3_V13ext6oneapi8bfloat16Ei( -// CHECK-SAME: ptr addrspace(4) noundef [[A:%.*]], ptr addrspace(4) noundef [[B:%.*]], i32 noundef [[SIZE:%.*]]) local_unnamed_addr #[[ATTR0]] !srcloc [[META17:![0-9]+]] !sycl_fixed_targets [[META6]] { +// CHECK-SAME: ptr addrspace(4) noundef [[A:%.*]], ptr addrspace(4) noundef [[B:%.*]], i32 noundef [[SIZE:%.*]]) local_unnamed_addr #[[ATTR0]] !sycl_fixed_targets [[META6]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call spir_func void @__devicelib_ConvertFToBF16INTELVec16(ptr addrspace(4) noundef [[A]], ptr addrspace(4) noundef [[B]]) #[[ATTR2]] // CHECK-NEXT: ret void diff --git a/sycl/test/check_device_code/device_has_func.cpp b/sycl/test/check_device_code/device_has_func.cpp index 00f7ad6d042bb..702f6bde5bfca 100644 --- a/sycl/test/check_device_code/device_has_func.cpp +++ b/sycl/test/check_device_code/device_has_func.cpp @@ -1,6 +1,5 @@ // RUN: %clangxx -fsycl -Xclang -fsycl-is-device -fsycl-device-only -Xclang -fno-sycl-early-optimizations -S -emit-llvm %s -o %t.ll // RUN: FileCheck %s --input-file %t.ll --check-prefix=CHECK-ASPECTS -// RUN: FileCheck %s --input-file %t.ll --check-prefix=CHECK-SRCLOC // Tests for IR of device_has(aspect, ...) attribute and // !sycl_used_aspects metadata. @@ -11,37 +10,30 @@ using namespace sycl; // CHECK-ASPECTS: define dso_local spir_func void @{{.*}}kernel_name_1{{.*}} !sycl_declared_aspects ![[ASPECTS1:[0-9]+]] {{.*}} -// CHECK-SRCLOC: define dso_local spir_func void @{{.*}}kernel_name_1{{.*}} !srcloc ![[SRCLOC1:[0-9]+]] {{.*}} // CHECK-ASPECTS: define {{.*}}spir_func void @{{.*}}func1{{.*}} !sycl_declared_aspects ![[ASPECTS1]] // CHECK-ASPECTS-SAME: !sycl_used_aspects ![[ASPECTS1]] -// CHECK-SRCLOC: define {{.*}}spir_func void @{{.*}}func1{{.*}} !srcloc ![[SRCLOC2:[0-9]+]] [[sycl::device_has(sycl::aspect::cpu)]] void func1() {} // CHECK-ASPECTS: define {{.*}}spir_func void @{{.*}}func2{{.*}} !sycl_declared_aspects ![[ASPECTS2:[0-9]+]] // CHECK-ASPECTS-SAME: !sycl_used_aspects ![[ASPECTS2]] -// CHECK-SRCLOC: define {{.*}}spir_func void @{{.*}}func2{{.*}} !srcloc ![[SRCLOC3:[0-9]+]] [[sycl::device_has(sycl::aspect::fp16, sycl::aspect::gpu)]] void func2() {} // CHECK-ASPECTS: define {{.*}}spir_func void @{{.*}}func3{{.*}} !sycl_declared_aspects ![[EMPTYASPECTS:[0-9]+]] -// CHECK-SRCLOC: define {{.*}}spir_func void @{{.*}}func3{{.*}} !srcloc ![[SRCLOC4:[0-9]+]] [[sycl::device_has()]] void func3() {} // CHECK-ASPECTS: define {{.*}}spir_func void @{{.*}}func4{{.*}} !sycl_declared_aspects ![[ASPECTS3:[0-9]+]] // CHECK-ASPECTS-SAME: !sycl_used_aspects ![[ASPECTS3]] -// CHECK-SRCLOC: define {{.*}}spir_func void @{{.*}}func4{{.*}} !srcloc ![[SRCLOC5:[0-9]+]] template [[sycl::device_has(Aspect)]] void func4() {} // CHECK-ASPECTS: define {{.*}}spir_func void @{{.*}}func5{{.*}} !sycl_declared_aspects ![[ASPECTS1]] // CHECK-ASPECTS-SAME: !sycl_used_aspects ![[ASPECTS1]] -// CHECK-SRCLOC: define {{.*}}spir_func void @{{.*}}func5{{.*}} !srcloc ![[SRCLOC6:[0-9]+]] [[sycl::device_has(sycl::aspect::cpu)]] void func5(); void func5() {} constexpr sycl::aspect getAspect() { return sycl::aspect::cpu; } // CHECK-ASPECTS: define {{.*}}spir_func void @{{.*}}func6{{.*}} !sycl_declared_aspects ![[ASPECTS1]] // CHECK-ASPECTS-SAME: !sycl_used_aspects ![[ASPECTS1]] -// CHECK-SRCLOC: define {{.*}}spir_func void @{{.*}}func6{{.*}} !srcloc ![[SRCLOC7:[0-9]+]] [[sycl::device_has(getAspect())]] void func6() {} SYCL_EXTERNAL [[sycl::device_has(sycl::aspect::cpu)]] void kernel_name_1() { @@ -54,23 +46,14 @@ SYCL_EXTERNAL [[sycl::device_has(sycl::aspect::cpu)]] void kernel_name_1() { } // CHECK-ASPECTS: define dso_local spir_func void @{{.*}}kernel_name_2{{.*}} !sycl_declared_aspects ![[ASPECTS4:[0-9]+]] -// CHECK-SRCLOC: define dso_local spir_func void @{{.*}}kernel_name_2{{.*}} !srcloc ![[SRCLOC8:[0-9]+]] {{.*}} SYCL_EXTERNAL [[sycl::device_has(sycl::aspect::gpu)]] void kernel_name_2() {} // CHECK-ASPECTS-DAG: [[ASPECTS1]] = !{![[ASPECTCPU:[0-9]+]]} // CHECK-ASPECTS-DAG: [[ASPECTCPU]] = !{!"cpu", i32 1} -// CHECK-SRCLOC-DAG: [[SRCLOC1]] = !{i32 {{[0-9]+}}} // CHECK-ASPECTS-DAG: [[EMPTYASPECTS]] = !{} -// CHECK-SRCLOC-DAG: [[SRCLOC2]] = !{i32 {{[0-9]+}}} // CHECK-ASPECTS-DAG: [[ASPECTS2]] = !{![[ASPECTFP16:[0-9]+]], ![[ASPECTGPU:[0-9]+]]} // CHECK-ASPECTS-DAG: [[ASPECTFP16]] = !{!"fp16", i32 5} // CHECK-ASPECTS-DAG: [[ASPECTGPU]] = !{!"gpu", i32 2} -// CHECK-SRCLOC-DAG: [[SRCLOC3]] = !{i32 {{[0-9]+}}} -// CHECK-SRCLOC-DAG: [[SRCLOC4]] = !{i32 {{[0-9]+}}} // CHECK-ASPECTS-DAG: [[ASPECTS3]] = !{![[ASPECTHOST:[0-9]+]]} // CHECK-ASPECTS-DAG: [[ASPECTHOST]] = !{!"host", i32 0} -// CHECK-SRCLOC-DAG: [[SRCLOC5]] = !{i32 {{[0-9]+}}} -// CHECK-SRCLOC-DAG: [[SRCLOC6]] = !{i32 {{[0-9]+}}} -// CHECK-SRCLOC-DAG: [[SRCLOC7]] = !{i32 {{[0-9]+}}} // CHECK-ASPECTS-DAG: [[ASPECTS4]] = !{![[ASPECTGPU]]} -// CHECK-SRCLOC-DAG: [[SRCLOC8]] = !{i32 {{[0-9]+}}} diff --git a/sycl/test/check_device_code/device_has_kernel.cpp b/sycl/test/check_device_code/device_has_kernel.cpp index 92dbcb795a4b4..191dfe49d2030 100644 --- a/sycl/test/check_device_code/device_has_kernel.cpp +++ b/sycl/test/check_device_code/device_has_kernel.cpp @@ -1,6 +1,5 @@ // RUN: %clangxx -fsycl -Xclang -fsycl-is-device -fsycl-device-only -Xclang -fno-sycl-early-optimizations -S -emit-llvm %s -o %t.ll // RUN: FileCheck %s --input-file %t.ll --check-prefix=CHECK-ASPECTS -// RUN: FileCheck %s --input-file %t.ll --check-prefix=CHECK-SRCLOC // Tests for IR of device_has(aspect, ...) attribute and // !sycl_used_aspects metadata. @@ -12,37 +11,30 @@ using namespace sycl; queue q; // CHECK-ASPECTS: define weak_odr dso_local spir_kernel void @{{.*}}kernel_name_1{{.*}} !sycl_declared_aspects ![[ASPECTS1:[0-9]+]] {{.*}} -// CHECK-SRCLOC: define weak_odr dso_local spir_kernel void @{{.*}}kernel_name_1{{.*}} !srcloc ![[SRCLOC1:[0-9]+]] {{.*}} // CHECK-ASPECTS: define {{.*}}spir_func void @{{.*}}func1{{.*}} !sycl_declared_aspects ![[ASPECTS1]] // CHECK-ASPECTS-SAME: !sycl_used_aspects ![[ASPECTS1]] -// CHECK-SRCLOC: define {{.*}}spir_func void @{{.*}}func1{{.*}} !srcloc ![[SRCLOC2:[0-9]+]] [[sycl::device_has(sycl::aspect::cpu)]] void func1() {} // CHECK-ASPECTS: define {{.*}}spir_func void @{{.*}}func2{{.*}} !sycl_declared_aspects ![[ASPECTS2:[0-9]+]] // CHECK-ASPECTS-SAME: !sycl_used_aspects ![[ASPECTS2]] -// CHECK-SRCLOC: define {{.*}}spir_func void @{{.*}}func2{{.*}} !srcloc ![[SRCLOC3:[0-9]+]] [[sycl::device_has(sycl::aspect::fp16, sycl::aspect::gpu)]] void func2() {} // CHECK-ASPECTS: define {{.*}}spir_func void @{{.*}}func3{{.*}} !sycl_declared_aspects ![[EMPTYASPECTS:[0-9]+]] -// CHECK-SRCLOC: define {{.*}}spir_func void @{{.*}}func3{{.*}} !srcloc ![[SRCLOC4:[0-9]+]] [[sycl::device_has()]] void func3() {} // CHECK-ASPECTS: define {{.*}}spir_func void @{{.*}}func4{{.*}} !sycl_declared_aspects ![[ASPECTS3:[0-9]+]] // CHECK-ASPECTS-SAME: !sycl_used_aspects ![[ASPECTS3]] -// CHECK-SRCLOC: define {{.*}}spir_func void @{{.*}}func4{{.*}} !srcloc ![[SRCLOC5:[0-9]+]] template [[sycl::device_has(Aspect)]] void func4() {} // CHECK-ASPECTS: define {{.*}}spir_func void @{{.*}}func5{{.*}} !sycl_declared_aspects ![[ASPECTS1]] // CHECK-ASPECTS-SAME: !sycl_used_aspects ![[ASPECTS1]] -// CHECK-SRCLOC: define {{.*}}spir_func void @{{.*}}func5{{.*}} !srcloc ![[SRCLOC6:[0-9]+]] [[sycl::device_has(sycl::aspect::cpu)]] void func5(); void func5() {} constexpr sycl::aspect getAspect() { return sycl::aspect::cpu; } // CHECK-ASPECTS: define {{.*}}spir_func void @{{.*}}func6{{.*}} !sycl_declared_aspects ![[ASPECTS1]] // CHECK-ASPECTS-SAME: !sycl_used_aspects ![[ASPECTS1]] -// CHECK-SRCLOC: define {{.*}}spir_func void @{{.*}}func6{{.*}} !srcloc ![[SRCLOC7:[0-9]+]] [[sycl::device_has(getAspect())]] void func6() {} class KernelFunctor { @@ -62,7 +54,6 @@ void foo() { KernelFunctor f1; h.single_task(f1); // CHECK-ASPECTS: define weak_odr dso_local spir_kernel void @{{.*}}kernel_name_2{{.*}} !sycl_declared_aspects ![[ASPECTS4:[0-9]+]] - // CHECK-SRCLOC: define weak_odr dso_local spir_kernel void @{{.*}}kernel_name_2{{.*}} !srcloc ![[SRCLOC8:[0-9]+]] {{.*}} h.single_task( []() [[sycl::device_has(sycl::aspect::gpu)]] {}); }); @@ -70,18 +61,10 @@ void foo() { // CHECK-ASPECTS-DAG: [[ASPECTS1]] = !{![[ASPECTCPU:[0-9]+]]} // CHECK-ASPECTS-DAG: [[ASPECTCPU]] = !{!"cpu", i32 1} -// CHECK-SRCLOC-DAG: [[SRCLOC1]] = !{i32 {{[0-9]+}}} // CHECK-ASPECTS-DAG: [[EMPTYASPECTS]] = !{} -// CHECK-SRCLOC-DAG: [[SRCLOC2]] = !{i32 {{[0-9]+}}} // CHECK-ASPECTS-DAG: [[ASPECTS2]] = !{![[ASPECTFP16:[0-9]+]], ![[ASPECTGPU:[0-9]+]]} // CHECK-ASPECTS-DAG: [[ASPECTFP16]] = !{!"fp16", i32 5} // CHECK-ASPECTS-DAG: [[ASPECTGPU]] = !{!"gpu", i32 2} -// CHECK-SRCLOC-DAG: [[SRCLOC3]] = !{i32 {{[0-9]+}}} -// CHECK-SRCLOC-DAG: [[SRCLOC4]] = !{i32 {{[0-9]+}}} // CHECK-ASPECTS-DAG: [[ASPECTS3]] = !{![[ASPECTHOST:[0-9]+]]} // CHECK-ASPECTS-DAG: [[ASPECTHOST]] = !{!"host", i32 0} -// CHECK-SRCLOC-DAG: [[SRCLOC5]] = !{i32 {{[0-9]+}}} -// CHECK-SRCLOC-DAG: [[SRCLOC6]] = !{i32 {{[0-9]+}}} -// CHECK-SRCLOC-DAG: [[SRCLOC7]] = !{i32 {{[0-9]+}}} // CHECK-ASPECTS-DAG: [[ASPECTS4]] = !{![[ASPECTGPU]]} -// CHECK-SRCLOC-DAG: [[SRCLOC8]] = !{i32 {{[0-9]+}}} diff --git a/sycl/test/check_device_code/vector/as_preview.cpp b/sycl/test/check_device_code/vector/as_preview.cpp index 63b64293ef873..16aadcb3a837d 100644 --- a/sycl/test/check_device_code/vector/as_preview.cpp +++ b/sycl/test/check_device_code/vector/as_preview.cpp @@ -13,7 +13,7 @@ template SYCL_EXTERNAL sycl::vec sycl::vec::as>() const; // CHECK-LABEL: define weak_odr dso_local spir_func void @_ZNK4sycl3_V13vecIfLi4EE2asINS1_IiLi4EEEEET_v( -// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable sret(%"class.sycl::_V1::vec") align 16 [[AGG_RESULT:%.*]], ptr addrspace(4) noundef align 16 dereferenceable_or_null(16) [[THIS:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] comdat align 2 !srcloc [[META5:![0-9]+]] !sycl_fixed_targets [[META6:![0-9]+]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable sret(%"class.sycl::_V1::vec") align 16 [[AGG_RESULT:%.*]], ptr addrspace(4) noundef align 16 dereferenceable_or_null(16) [[THIS:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] comdat align 2 !sycl_fixed_targets [[META5:![0-9]+]] { // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.memcpy.p4.p4.i64(ptr addrspace(4) noundef align 16 dereferenceable(16) [[AGG_RESULT]], ptr addrspace(4) noundef align 16 dereferenceable(16) [[THIS]], i64 16, i1 false) // CHECK-NEXT: ret void From 408135b64bb42a690353339e7ba91857e2d3e194 Mon Sep 17 00:00:00 2001 From: Premanand M Rao Date: Sun, 20 Apr 2025 20:35:40 -0700 Subject: [PATCH 07/16] Fix some tests that were changed earlier --- .../extensions/address_cast.cpp | 40 +++++----- sycl/test/check_device_code/vector/as.cpp | 4 +- .../vector/bf16_builtins.cpp | 24 +++--- .../vector/bf16_builtins_preview.cpp | 24 +++--- .../vector/convert_bfloat.cpp | 36 ++++----- .../vector/convert_bfloat_preview.cpp | 36 ++++----- .../check_device_code/vector/math_ops.cpp | 76 +++++++++---------- .../vector/math_ops_preview.cpp | 76 +++++++++---------- 8 files changed, 158 insertions(+), 158 deletions(-) diff --git a/sycl/test/check_device_code/extensions/address_cast.cpp b/sycl/test/check_device_code/extensions/address_cast.cpp index a0b03f242984a..7f7ffbe227921 100644 --- a/sycl/test/check_device_code/extensions/address_cast.cpp +++ b/sycl/test/check_device_code/extensions/address_cast.cpp @@ -13,41 +13,41 @@ using namespace sycl::ext::oneapi::experimental; namespace static_as_cast { // CHECK-LABEL: define dso_local spir_func void @_ZN14static_as_cast19to_global_decoratedEN4sycl3_V19multi_ptrIiLNS1_6access13address_spaceE6ELNS3_9decoratedE1EEE( -// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::multi_ptr") align 8 captures(none) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::multi_ptr.0") align 8 captures(none) [[P:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {{.*}}{ +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::multi_ptr") align 8 captures(none) initializes((0, 8)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::multi_ptr.0") align 8 captures(none) [[P:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {{.*}}{ // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[P]], align 8, !tbaa [[TBAA8:![0-9]+]] // CHECK-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) // CHECK-NEXT: [[TMP2:%.*]] = addrspacecast ptr addrspace(4) [[TMP1]] to ptr addrspace(1) -// CHECK-NEXT: store ptr addrspace(1) [[TMP2]], ptr [[AGG_RESULT]], align 8, !tbaa [[TBAA13:![0-9]+]], !alias.scope [[META15:![0-9]+]] +// CHECK-NEXT: store ptr addrspace(1) [[TMP2]], ptr addrspace(4) [[AGG_RESULT]], align 8, !tbaa [[TBAA13:![0-9]+]], !alias.scope [[META15:![0-9]+]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto to_global_decorated(decorated_generic_ptr p) { return static_address_cast(p); } // CHECK-LABEL: define dso_local spir_func void @_ZN14static_as_cast23to_global_not_decoratedEPi( -// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::multi_ptr.1") align 8 captures(none) [[AGG_RESULT:%.*]], ptr addrspace(4) noundef [[P:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] {{.*}}{ +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::multi_ptr.1") align 8 captures(none) initializes((0, 8)) [[AGG_RESULT:%.*]], ptr addrspace(4) noundef [[P:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] {{.*}}{ // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(4) [[P]] to ptr addrspace(1) -// CHECK-NEXT: store ptr addrspace(1) [[TMP0]], ptr [[AGG_RESULT]], align 8, !tbaa [[TBAA21:![0-9]+]], !alias.scope [[META23:![0-9]+]] +// CHECK-NEXT: store ptr addrspace(1) [[TMP0]], ptr addrspace(4) [[AGG_RESULT]], align 8, !tbaa [[TBAA21:![0-9]+]], !alias.scope [[META23:![0-9]+]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto to_global_not_decorated(int *p) { return static_address_cast(p); } // CHECK-LABEL: define dso_local spir_func void @_ZN14static_as_cast20to_generic_decoratedEN4sycl3_V19multi_ptrIiLNS1_6access13address_spaceE6ELNS3_9decoratedE1EEE( -// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::multi_ptr.0") align 8 captures(none) initializes((0, 8)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::multi_ptr.0") align 8 captures(none) [[P:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::multi_ptr.0") align 8 captures(none) initializes((0, 8)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::multi_ptr.0") align 8 captures(none) [[P:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[P]], align 8, !tbaa [[TBAA8]] -// CHECK-NEXT: store i64 [[TMP0]], ptr [[AGG_RESULT]], align 8, !tbaa [[TBAA8]], !alias.scope [[META27:![0-9]+]] +// CHECK-NEXT: store i64 [[TMP0]], ptr addrspace(4) [[AGG_RESULT]], align 8, !tbaa [[TBAA8]], !alias.scope [[META27:![0-9]+]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto to_generic_decorated(decorated_generic_ptr p) { return static_address_cast(p); } // CHECK-LABEL: define dso_local spir_func void @_ZN14static_as_cast24to_generic_not_decoratedEPi( -// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::multi_ptr.2") align 8 captures(none) [[AGG_RESULT:%.*]], ptr addrspace(4) noundef [[P:%.*]]) local_unnamed_addr #[[ATTR1]] {{.*}}{ +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::multi_ptr.2") align 8 captures(none) initializes((0, 8)) [[AGG_RESULT:%.*]], ptr addrspace(4) noundef [[P:%.*]]) local_unnamed_addr #[[ATTR1]] {{.*}}{ // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: store ptr addrspace(4) [[P]], ptr [[AGG_RESULT]], align 8, !tbaa [[TBAA31:![0-9]+]], !alias.scope [[META33:![0-9]+]] +// CHECK-NEXT: store ptr addrspace(4) [[P]], ptr addrspace(4) [[AGG_RESULT]], align 8, !tbaa [[TBAA31:![0-9]+]], !alias.scope [[META33:![0-9]+]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto to_generic_not_decorated(int *p) { @@ -55,10 +55,10 @@ SYCL_EXTERNAL auto to_generic_not_decorated(int *p) { } // CHECK-LABEL: define dso_local spir_func void @_ZN14static_as_cast16to_global_deviceEPi( -// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::multi_ptr.3") align 8 captures(none) [[AGG_RESULT:%.*]], ptr addrspace(4) noundef [[P:%.*]]) local_unnamed_addr #[[ATTR1]] {{.*}}{ +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::multi_ptr.3") align 8 captures(none) initializes((0, 8)) [[AGG_RESULT:%.*]], ptr addrspace(4) noundef [[P:%.*]]) local_unnamed_addr #[[ATTR1]] {{.*}}{ // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(4) [[P]] to ptr addrspace(5) -// CHECK-NEXT: store ptr addrspace(5) [[TMP0]], ptr [[AGG_RESULT]], align 8, !tbaa [[TBAA37:![0-9]+]], !alias.scope [[META39:![0-9]+]] +// CHECK-NEXT: store ptr addrspace(5) [[TMP0]], ptr addrspace(4) [[AGG_RESULT]], align 8, !tbaa [[TBAA37:![0-9]+]], !alias.scope [[META39:![0-9]+]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto to_global_device(int *p) { @@ -66,10 +66,10 @@ SYCL_EXTERNAL auto to_global_device(int *p) { } // CHECK-LABEL: define dso_local spir_func void @_ZN14static_as_cast14to_global_hostEPi( -// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::multi_ptr.4") align 8 captures(none) [[AGG_RESULT:%.*]], ptr addrspace(4) noundef [[P:%.*]]) local_unnamed_addr #[[ATTR1]] {{.*}}{ +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::multi_ptr.4") align 8 captures(none) initializes((0, 8)) [[AGG_RESULT:%.*]], ptr addrspace(4) noundef [[P:%.*]]) local_unnamed_addr #[[ATTR1]] {{.*}}{ // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(4) [[P]] to ptr addrspace(6) -// CHECK-NEXT: store ptr addrspace(6) [[TMP0]], ptr [[AGG_RESULT]], align 8, !tbaa [[TBAA43:![0-9]+]], !alias.scope [[META45:![0-9]+]] +// CHECK-NEXT: store ptr addrspace(6) [[TMP0]], ptr addrspace(4) [[AGG_RESULT]], align 8, !tbaa [[TBAA43:![0-9]+]], !alias.scope [[META45:![0-9]+]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto to_global_host(int *p) { @@ -79,41 +79,41 @@ SYCL_EXTERNAL auto to_global_host(int *p) { namespace dynamic_as_cast { // CHECK-LABEL: define dso_local spir_func void @_ZN15dynamic_as_cast19to_global_decoratedEN4sycl3_V19multi_ptrIiLNS1_6access13address_spaceE6ELNS3_9decoratedE1EEE( -// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::multi_ptr") align 8 captures(none) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::multi_ptr.0") align 8 captures(none) [[P:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] {{.*}}{ +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::multi_ptr") align 8 captures(none) initializes((0, 8)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::multi_ptr.0") align 8 captures(none) [[P:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] {{.*}}{ // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[P]], align 8, !tbaa [[TBAA8]] // CHECK-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) // CHECK-NEXT: [[CALL_I_I_I:%.*]] = tail call spir_func noundef ptr addrspace(1) @_Z41__spirv_GenericCastToPtrExplicit_ToGlobalPvi(ptr addrspace(4) noundef [[TMP1]], i32 noundef 5) #[[ATTR5:[0-9]+]] -// CHECK-NEXT: store ptr addrspace(1) [[CALL_I_I_I]], ptr [[AGG_RESULT]], align 8, !tbaa [[TBAA13]], !alias.scope [[META49:![0-9]+]] +// CHECK-NEXT: store ptr addrspace(1) [[CALL_I_I_I]], ptr addrspace(4) [[AGG_RESULT]], align 8, !tbaa [[TBAA13]], !alias.scope [[META49:![0-9]+]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto to_global_decorated(decorated_generic_ptr p) { return dynamic_address_cast(p); } // CHECK-LABEL: define dso_local spir_func void @_ZN15dynamic_as_cast23to_global_not_decoratedEPi( -// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::multi_ptr.1") align 8 captures(none) [[AGG_RESULT:%.*]], ptr addrspace(4) noundef [[P:%.*]]) local_unnamed_addr #[[ATTR4:[0-9]+]] {{.*}}{ +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::multi_ptr.1") align 8 captures(none) initializes((0, 8)) [[AGG_RESULT:%.*]], ptr addrspace(4) noundef [[P:%.*]]) local_unnamed_addr #[[ATTR4:[0-9]+]] {{.*}}{ // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[CALL_I_I:%.*]] = tail call spir_func noundef ptr addrspace(1) @_Z41__spirv_GenericCastToPtrExplicit_ToGlobalPvi(ptr addrspace(4) noundef [[P]], i32 noundef 5) #[[ATTR5]] -// CHECK-NEXT: store ptr addrspace(1) [[CALL_I_I]], ptr [[AGG_RESULT]], align 8, !tbaa [[TBAA21]], !alias.scope [[META55:![0-9]+]] +// CHECK-NEXT: store ptr addrspace(1) [[CALL_I_I]], ptr addrspace(4) [[AGG_RESULT]], align 8, !tbaa [[TBAA21]], !alias.scope [[META55:![0-9]+]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto to_global_not_decorated(int *p) { return dynamic_address_cast(p); } // CHECK-LABEL: define dso_local spir_func void @_ZN15dynamic_as_cast20to_generic_decoratedEN4sycl3_V19multi_ptrIiLNS1_6access13address_spaceE6ELNS3_9decoratedE1EEE( -// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::multi_ptr.0") align 8 captures(none) initializes((0, 8)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::multi_ptr.0") align 8 captures(none) [[P:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::multi_ptr.0") align 8 captures(none) initializes((0, 8)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::multi_ptr.0") align 8 captures(none) [[P:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ // CHECK-NEXT: [[ENTRY:.*:]] // CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[P]], align 8, !tbaa [[TBAA8]] -// CHECK-NEXT: store i64 [[TMP0]], ptr [[AGG_RESULT]], align 8, !tbaa [[TBAA8]], !alias.scope [[META59:![0-9]+]] +// CHECK-NEXT: store i64 [[TMP0]], ptr addrspace(4) [[AGG_RESULT]], align 8, !tbaa [[TBAA8]], !alias.scope [[META59:![0-9]+]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto to_generic_decorated(decorated_generic_ptr p) { return dynamic_address_cast(p); } // CHECK-LABEL: define dso_local spir_func void @_ZN15dynamic_as_cast24to_generic_not_decoratedEPi( -// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::multi_ptr.2") align 8 captures(none) [[AGG_RESULT:%.*]], ptr addrspace(4) noundef [[P:%.*]]) local_unnamed_addr #[[ATTR1]] {{.*}}{ +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::multi_ptr.2") align 8 captures(none) initializes((0, 8)) [[AGG_RESULT:%.*]], ptr addrspace(4) noundef [[P:%.*]]) local_unnamed_addr #[[ATTR1]] {{.*}}{ // CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: store ptr addrspace(4) [[P]], ptr [[AGG_RESULT]], align 8, !tbaa [[TBAA31]], !alias.scope [[META63:![0-9]+]] +// CHECK-NEXT: store ptr addrspace(4) [[P]], ptr addrspace(4) [[AGG_RESULT]], align 8, !tbaa [[TBAA31]], !alias.scope [[META63:![0-9]+]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto to_generic_not_decorated(int *p) { diff --git a/sycl/test/check_device_code/vector/as.cpp b/sycl/test/check_device_code/vector/as.cpp index 641897f51d3fb..96a4c7a951e56 100644 --- a/sycl/test/check_device_code/vector/as.cpp +++ b/sycl/test/check_device_code/vector/as.cpp @@ -14,9 +14,9 @@ template SYCL_EXTERNAL sycl::vec sycl::vec::as>() const; // CHECK-LABEL: define weak_odr dso_local spir_func void @_ZNK4sycl3_V13vecIfLi4EE2asINS1_IiLi4EEEEET_v( -// CHECK-SAME: ptr dead_on_unwind noalias writable sret(%"class.sycl::_V1::vec") align 16 [[AGG_RESULT:%.*]], ptr addrspace(4) noundef align 16 dereferenceable_or_null(16) [[THIS:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] comdat align 2 !sycl_fixed_targets [[META6:![0-9]+]] { +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable sret(%"class.sycl::_V1::vec") align 16 [[AGG_RESULT:%.*]], ptr addrspace(4) noundef align 16 dereferenceable_or_null(16) [[THIS:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] comdat align 2 !sycl_fixed_targets [[META6:![0-9]+]] { // CHECK-NEXT: entry: -// CHECK-NEXT: tail call void @llvm.memcpy.p0.p4.i64(ptr noundef nonnull align 16 dereferenceable(16) [[AGG_RESULT]], ptr addrspace(4) noundef align 16 dereferenceable(16) [[THIS]], i64 16, i1 false) +// CHECK-NEXT: tail call void @llvm.memcpy.p4.p4.i64(ptr addrspace(4) noundef align 16 dereferenceable(16) [[AGG_RESULT]], ptr addrspace(4) noundef align 16 dereferenceable(16) [[THIS]], i64 16, i1 false) // CHECK-NEXT: ret void // //. diff --git a/sycl/test/check_device_code/vector/bf16_builtins.cpp b/sycl/test/check_device_code/vector/bf16_builtins.cpp index c392ea874ea3e..5466988adb78b 100644 --- a/sycl/test/check_device_code/vector/bf16_builtins.cpp +++ b/sycl/test/check_device_code/vector/bf16_builtins.cpp @@ -21,7 +21,7 @@ using namespace sycl::ext::oneapi; using namespace sycl::ext::oneapi::experimental; // CHECK-LABEL: define dso_local spir_func void @_Z8TestFMinN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi2EEES5_( -// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec") align 4 captures(none) initializes((0, 4)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec") align 4 captures(none) [[A:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec") align 4 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {{.*}}{ +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec") align 4 captures(none) initializes((0, 4)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec") align 4 captures(none) [[A:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec") align 4 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {{.*}}{ // CHECK-NEXT: entry: // CHECK-NEXT: [[VEC_ADDR_I_I_I_I8_I:%.*]] = alloca <2 x float>, align 8 // CHECK-NEXT: [[DST_I_I_I_I9_I:%.*]] = alloca [2 x %"class.sycl::_V1::ext::oneapi::bfloat16"], align 2 @@ -61,7 +61,7 @@ using namespace sycl::ext::oneapi::experimental; // CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[DST_I_I_I_I9_I]], align 2, !tbaa [[TBAA14]], !noalias [[META23]] // CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[VEC_ADDR_I_I_I_I8_I]]), !noalias [[META23]] // CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[DST_I_I_I_I9_I]]), !noalias [[META23]] -// CHECK-NEXT: store i32 [[TMP4]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META23]] +// CHECK-NEXT: store i32 [[TMP4]], ptr addrspace(4) [[AGG_RESULT]], align 4, !alias.scope [[META23]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestFMin(vec a, vec b) { @@ -69,7 +69,7 @@ SYCL_EXTERNAL auto TestFMin(vec a, vec b) { } // CHECK-LABEL: define dso_local spir_func void @_Z8TestFMaxN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi3EEES5_( -// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.70") align 8 captures(none) initializes((0, 8)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.70") align 8 captures(none) [[A:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.70") align 8 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.70") align 8 captures(none) initializes((0, 8)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.70") align 8 captures(none) [[A:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.70") align 8 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ // CHECK-NEXT: entry: // CHECK-NEXT: [[VEC_ADDR_I_I_I_I12_I:%.*]] = alloca <3 x float>, align 16 // CHECK-NEXT: [[DST_I_I_I_I13_I:%.*]] = alloca [4 x %"class.sycl::_V1::ext::oneapi::bfloat16"], align 2 @@ -115,7 +115,7 @@ SYCL_EXTERNAL auto TestFMin(vec a, vec b) { // CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr nonnull [[VEC_ADDR_I_I_I_I12_I]]), !noalias [[META37]] // CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[DST_I_I_I_I13_I]]), !noalias [[META37]] // CHECK-NEXT: [[EXTRACTVEC_I19_I:%.*]] = shufflevector <4 x i16> [[LOADVECN_I_I_I_I_I18_I]], <4 x i16> poison, <4 x i32> -// CHECK-NEXT: store <4 x i16> [[EXTRACTVEC_I19_I]], ptr [[AGG_RESULT]], align 8, !alias.scope [[META37]] +// CHECK-NEXT: store <4 x i16> [[EXTRACTVEC_I19_I]], ptr addrspace(4) [[AGG_RESULT]], align 8, !alias.scope [[META37]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestFMax(vec a, vec b) { @@ -123,7 +123,7 @@ SYCL_EXTERNAL auto TestFMax(vec a, vec b) { } // CHECK-LABEL: define dso_local spir_func void @_Z9TestIsNanN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi4EEE( -// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.146") align 8 captures(none) initializes((0, 8)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.184") align 8 captures(none) [[A:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.146") align 8 captures(none) initializes((0, 8)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.184") align 8 captures(none) [[A:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ // CHECK-NEXT: entry: // CHECK-NEXT: [[VEC_ADDR_I_I_I_I_I:%.*]] = alloca <4 x i16>, align 8 // CHECK-NEXT: [[DST_I_I_I_I_I:%.*]] = alloca [4 x float], align 4 @@ -141,7 +141,7 @@ SYCL_EXTERNAL auto TestFMax(vec a, vec b) { // CHECK-NEXT: [[CALL_I_I_I_I:%.*]] = call spir_func noundef <4 x i8> @_Z13__spirv_IsNanDv4_f(<4 x float> noundef [[TMP1]]) #[[ATTR6]] // CHECK-NEXT: [[CALL_I_I_I_I_I_I:%.*]] = call spir_func noundef <4 x i32> @_Z22__spirv_SConvert_Rint4Dv4_a(<4 x i8> noundef [[CALL_I_I_I_I]]) #[[ATTR6]] // CHECK-NEXT: [[CALL_I_I_I2_I:%.*]] = call spir_func noundef <4 x i16> @_Z24__spirv_SConvert_Rshort4Dv4_i(<4 x i32> noundef [[CALL_I_I_I_I_I_I]]) #[[ATTR6]] -// CHECK-NEXT: store <4 x i16> [[CALL_I_I_I2_I]], ptr [[AGG_RESULT]], align 8, !alias.scope [[META45:![0-9]+]] +// CHECK-NEXT: store <4 x i16> [[CALL_I_I_I2_I]], ptr addrspace(4) [[AGG_RESULT]], align 8, !alias.scope [[META45:![0-9]+]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestIsNan(vec a) { @@ -149,7 +149,7 @@ SYCL_EXTERNAL auto TestIsNan(vec a) { } // CHECK-LABEL: define dso_local spir_func void @_Z8TestFabsN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi8EEE( -// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.336") align 16 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.336") align 16 captures(none) [[A:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.336") align 16 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.336") align 16 captures(none) [[A:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ // CHECK-NEXT: entry: // CHECK-NEXT: [[VEC_ADDR_I_I_I_I2_I:%.*]] = alloca <8 x float>, align 32 // CHECK-NEXT: [[DST_I_I_I_I3_I:%.*]] = alloca [8 x %"class.sycl::_V1::ext::oneapi::bfloat16"], align 2 @@ -177,7 +177,7 @@ SYCL_EXTERNAL auto TestIsNan(vec a) { // CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[DST_I_I_I_I3_I]], align 2, !tbaa [[TBAA14]], !noalias [[META58]] // CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr nonnull [[VEC_ADDR_I_I_I_I2_I]]), !noalias [[META58]] // CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr nonnull [[DST_I_I_I_I3_I]]), !noalias [[META58]] -// CHECK-NEXT: store <8 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 16, !alias.scope [[META58]] +// CHECK-NEXT: store <8 x i16> [[TMP1]], ptr addrspace(4) [[AGG_RESULT]], align 16, !alias.scope [[META58]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestFabs(vec a) { @@ -185,7 +185,7 @@ SYCL_EXTERNAL auto TestFabs(vec a) { } // CHECK-LABEL: define dso_local spir_func void @_Z8TestCeilN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi8EEE( -// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.336") align 16 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.336") align 16 captures(none) [[A:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.336") align 16 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.336") align 16 captures(none) [[A:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ // CHECK-NEXT: entry: // CHECK-NEXT: [[VEC_ADDR_I_I_I_I2_I:%.*]] = alloca <8 x float>, align 32 // CHECK-NEXT: [[DST_I_I_I_I3_I:%.*]] = alloca [8 x %"class.sycl::_V1::ext::oneapi::bfloat16"], align 2 @@ -213,7 +213,7 @@ SYCL_EXTERNAL auto TestFabs(vec a) { // CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[DST_I_I_I_I3_I]], align 2, !tbaa [[TBAA14]], !noalias [[META69]] // CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr nonnull [[VEC_ADDR_I_I_I_I2_I]]), !noalias [[META69]] // CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr nonnull [[DST_I_I_I_I3_I]]), !noalias [[META69]] -// CHECK-NEXT: store <8 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 16, !alias.scope [[META69]] +// CHECK-NEXT: store <8 x i16> [[TMP1]], ptr addrspace(4) [[AGG_RESULT]], align 16, !alias.scope [[META69]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestCeil(vec a) { @@ -221,7 +221,7 @@ SYCL_EXTERNAL auto TestCeil(vec a) { } // CHECK-LABEL: define dso_local spir_func void @_Z7TestFMAN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi16EEES5_S5_( -// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.412") align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.412") align 32 captures(none) [[A:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.412") align 32 captures(none) [[B:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.412") align 32 captures(none) [[C:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.412") align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.412") align 32 captures(none) [[A:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.412") align 32 captures(none) [[B:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.412") align 32 captures(none) [[C:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ // CHECK-NEXT: entry: // CHECK-NEXT: [[VEC_ADDR_I_I_I_I14_I:%.*]] = alloca <16 x float>, align 64 // CHECK-NEXT: [[DST_I_I_I_I15_I:%.*]] = alloca [16 x %"class.sycl::_V1::ext::oneapi::bfloat16"], align 2 @@ -273,7 +273,7 @@ SYCL_EXTERNAL auto TestCeil(vec a) { // CHECK-NEXT: [[TMP3:%.*]] = load <16 x i16>, ptr [[DST_I_I_I_I15_I]], align 2, !tbaa [[TBAA14]], !noalias [[META86]] // CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 64, ptr nonnull [[VEC_ADDR_I_I_I_I14_I]]), !noalias [[META86]] // CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr nonnull [[DST_I_I_I_I15_I]]), !noalias [[META86]] -// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT]], align 32, !alias.scope [[META86]] +// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr addrspace(4) [[AGG_RESULT]], align 32, !alias.scope [[META86]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestFMA(vec a, vec b, diff --git a/sycl/test/check_device_code/vector/bf16_builtins_preview.cpp b/sycl/test/check_device_code/vector/bf16_builtins_preview.cpp index 944c4c566bfa1..bc1a44c56686d 100644 --- a/sycl/test/check_device_code/vector/bf16_builtins_preview.cpp +++ b/sycl/test/check_device_code/vector/bf16_builtins_preview.cpp @@ -21,7 +21,7 @@ using namespace sycl::ext::oneapi; using namespace sycl::ext::oneapi::experimental; // CHECK-LABEL: define dso_local spir_func void @_Z8TestFMinN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi2EEES5_( -// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec") align 4 captures(none) initializes((0, 4)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec") align 4 captures(none) [[A:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec") align 4 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {{.*}}{ +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec") align 4 captures(none) initializes((0, 4)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec") align 4 captures(none) [[A:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec") align 4 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {{.*}}{ // CHECK-NEXT: entry: // CHECK-NEXT: [[VEC_ADDR_I_I_I_I8_I:%.*]] = alloca <2 x float>, align 8 // CHECK-NEXT: [[DST_I_I_I_I9_I:%.*]] = alloca [2 x %"class.sycl::_V1::ext::oneapi::bfloat16"], align 2 @@ -61,7 +61,7 @@ using namespace sycl::ext::oneapi::experimental; // CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[DST_I_I_I_I9_I]], align 2, !tbaa [[TBAA14]], !noalias [[META23]] // CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[VEC_ADDR_I_I_I_I8_I]]), !noalias [[META23]] // CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr nonnull [[DST_I_I_I_I9_I]]), !noalias [[META23]] -// CHECK-NEXT: store i32 [[TMP4]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META23]] +// CHECK-NEXT: store i32 [[TMP4]], ptr addrspace(4) [[AGG_RESULT]], align 4, !alias.scope [[META23]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestFMin(vec a, vec b) { @@ -69,7 +69,7 @@ SYCL_EXTERNAL auto TestFMin(vec a, vec b) { } // CHECK-LABEL: define dso_local spir_func void @_Z8TestFMaxN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi3EEES5_( -// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.70") align 8 captures(none) initializes((0, 8)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.70") align 8 captures(none) [[A:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.70") align 8 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.70") align 8 captures(none) initializes((0, 8)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.70") align 8 captures(none) [[A:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.70") align 8 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ // CHECK-NEXT: entry: // CHECK-NEXT: [[VEC_ADDR_I_I_I_I12_I:%.*]] = alloca <3 x float>, align 16 // CHECK-NEXT: [[DST_I_I_I_I13_I:%.*]] = alloca [4 x %"class.sycl::_V1::ext::oneapi::bfloat16"], align 2 @@ -115,7 +115,7 @@ SYCL_EXTERNAL auto TestFMin(vec a, vec b) { // CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr nonnull [[VEC_ADDR_I_I_I_I12_I]]), !noalias [[META37]] // CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[DST_I_I_I_I13_I]]), !noalias [[META37]] // CHECK-NEXT: [[EXTRACTVEC_I19_I:%.*]] = shufflevector <4 x i16> [[LOADVECN_I_I_I_I_I18_I]], <4 x i16> poison, <4 x i32> -// CHECK-NEXT: store <4 x i16> [[EXTRACTVEC_I19_I]], ptr [[AGG_RESULT]], align 8, !alias.scope [[META37]] +// CHECK-NEXT: store <4 x i16> [[EXTRACTVEC_I19_I]], ptr addrspace(4) [[AGG_RESULT]], align 8, !alias.scope [[META37]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestFMax(vec a, vec b) { @@ -123,7 +123,7 @@ SYCL_EXTERNAL auto TestFMax(vec a, vec b) { } // CHECK-LABEL: define dso_local spir_func void @_Z9TestIsNanN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi4EEE( -// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.146") align 8 captures(none) initializes((0, 8)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.184") align 8 captures(none) [[A:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.146") align 8 captures(none) initializes((0, 8)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.184") align 8 captures(none) [[A:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ // CHECK-NEXT: entry: // CHECK-NEXT: [[VEC_ADDR_I_I_I_I_I:%.*]] = alloca <4 x i16>, align 8 // CHECK-NEXT: [[DST_I_I_I_I_I:%.*]] = alloca [4 x float], align 4 @@ -141,7 +141,7 @@ SYCL_EXTERNAL auto TestFMax(vec a, vec b) { // CHECK-NEXT: [[CALL_I_I_I_I:%.*]] = call spir_func noundef <4 x i8> @_Z13__spirv_IsNanDv4_f(<4 x float> noundef [[TMP1]]) #[[ATTR6]] // CHECK-NEXT: [[CALL_I_I_I_I_I_I:%.*]] = call spir_func noundef <4 x i32> @_Z22__spirv_SConvert_Rint4Dv4_a(<4 x i8> noundef [[CALL_I_I_I_I]]) #[[ATTR6]] // CHECK-NEXT: [[CALL_I_I_I2_I:%.*]] = call spir_func noundef <4 x i16> @_Z24__spirv_SConvert_Rshort4Dv4_i(<4 x i32> noundef [[CALL_I_I_I_I_I_I]]) #[[ATTR6]] -// CHECK-NEXT: store <4 x i16> [[CALL_I_I_I2_I]], ptr [[AGG_RESULT]], align 8, !alias.scope [[META45:![0-9]+]] +// CHECK-NEXT: store <4 x i16> [[CALL_I_I_I2_I]], ptr addrspace(4) [[AGG_RESULT]], align 8, !alias.scope [[META45:![0-9]+]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestIsNan(vec a) { @@ -149,7 +149,7 @@ SYCL_EXTERNAL auto TestIsNan(vec a) { } // CHECK-LABEL: define dso_local spir_func void @_Z8TestFabsN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi8EEE( -// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.336") align 16 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.336") align 16 captures(none) [[A:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.336") align 16 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.336") align 16 captures(none) [[A:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ // CHECK-NEXT: entry: // CHECK-NEXT: [[VEC_ADDR_I_I_I_I2_I:%.*]] = alloca <8 x float>, align 32 // CHECK-NEXT: [[DST_I_I_I_I3_I:%.*]] = alloca [8 x %"class.sycl::_V1::ext::oneapi::bfloat16"], align 2 @@ -177,7 +177,7 @@ SYCL_EXTERNAL auto TestIsNan(vec a) { // CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[DST_I_I_I_I3_I]], align 2, !tbaa [[TBAA14]], !noalias [[META58]] // CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr nonnull [[VEC_ADDR_I_I_I_I2_I]]), !noalias [[META58]] // CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr nonnull [[DST_I_I_I_I3_I]]), !noalias [[META58]] -// CHECK-NEXT: store <8 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 16, !alias.scope [[META58]] +// CHECK-NEXT: store <8 x i16> [[TMP1]], ptr addrspace(4) [[AGG_RESULT]], align 16, !alias.scope [[META58]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestFabs(vec a) { @@ -185,7 +185,7 @@ SYCL_EXTERNAL auto TestFabs(vec a) { } // CHECK-LABEL: define dso_local spir_func void @_Z8TestCeilN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi8EEE( -// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.336") align 16 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.336") align 16 captures(none) [[A:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.336") align 16 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.336") align 16 captures(none) [[A:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ // CHECK-NEXT: entry: // CHECK-NEXT: [[VEC_ADDR_I_I_I_I2_I:%.*]] = alloca <8 x float>, align 32 // CHECK-NEXT: [[DST_I_I_I_I3_I:%.*]] = alloca [8 x %"class.sycl::_V1::ext::oneapi::bfloat16"], align 2 @@ -213,7 +213,7 @@ SYCL_EXTERNAL auto TestFabs(vec a) { // CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, ptr [[DST_I_I_I_I3_I]], align 2, !tbaa [[TBAA14]], !noalias [[META69]] // CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr nonnull [[VEC_ADDR_I_I_I_I2_I]]), !noalias [[META69]] // CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr nonnull [[DST_I_I_I_I3_I]]), !noalias [[META69]] -// CHECK-NEXT: store <8 x i16> [[TMP1]], ptr [[AGG_RESULT]], align 16, !alias.scope [[META69]] +// CHECK-NEXT: store <8 x i16> [[TMP1]], ptr addrspace(4) [[AGG_RESULT]], align 16, !alias.scope [[META69]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestCeil(vec a) { @@ -221,7 +221,7 @@ SYCL_EXTERNAL auto TestCeil(vec a) { } // CHECK-LABEL: define dso_local spir_func void @_Z7TestFMAN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi16EEES5_S5_( -// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.412") align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.412") align 32 captures(none) [[A:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.412") align 32 captures(none) [[B:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.412") align 32 captures(none) [[C:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.412") align 32 captures(none) initializes((0, 32)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.412") align 32 captures(none) [[A:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.412") align 32 captures(none) [[B:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.412") align 32 captures(none) [[C:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ // CHECK-NEXT: entry: // CHECK-NEXT: [[VEC_ADDR_I_I_I_I14_I:%.*]] = alloca <16 x float>, align 64 // CHECK-NEXT: [[DST_I_I_I_I15_I:%.*]] = alloca [16 x %"class.sycl::_V1::ext::oneapi::bfloat16"], align 2 @@ -273,7 +273,7 @@ SYCL_EXTERNAL auto TestCeil(vec a) { // CHECK-NEXT: [[TMP3:%.*]] = load <16 x i16>, ptr [[DST_I_I_I_I15_I]], align 2, !tbaa [[TBAA14]], !noalias [[META86]] // CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 64, ptr nonnull [[VEC_ADDR_I_I_I_I14_I]]), !noalias [[META86]] // CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr nonnull [[DST_I_I_I_I15_I]]), !noalias [[META86]] -// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr [[AGG_RESULT]], align 32, !alias.scope [[META86]] +// CHECK-NEXT: store <16 x i16> [[TMP3]], ptr addrspace(4) [[AGG_RESULT]], align 32, !alias.scope [[META86]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestFMA(vec a, vec b, diff --git a/sycl/test/check_device_code/vector/convert_bfloat.cpp b/sycl/test/check_device_code/vector/convert_bfloat.cpp index 5af5a42f790b7..976083fb2c9d6 100644 --- a/sycl/test/check_device_code/vector/convert_bfloat.cpp +++ b/sycl/test/check_device_code/vector/convert_bfloat.cpp @@ -13,7 +13,7 @@ using namespace sycl; using bfloat16 = sycl::ext::oneapi::bfloat16; // CHECK-LABEL: define dso_local spir_func void @_Z18TestBFtoFDeviceRNERN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi3EEE( -// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec") align 16 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]], ptr addrspace(4) noundef readonly align 8 captures(none) dereferenceable(8) [[INP:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {{.*}}{ +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec") align 16 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]], ptr addrspace(4) noundef readonly align 8 captures(none) dereferenceable(8) [[INP:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {{.*}}{ // CHECK-NEXT: entry: // CHECK-NEXT: [[VEC_ADDR_I_I_I_I:%.*]] = alloca <3 x i16>, align 8 // CHECK-NEXT: [[DST_I_I_I_I:%.*]] = alloca [4 x float], align 4 @@ -30,7 +30,7 @@ using bfloat16 = sycl::ext::oneapi::bfloat16; // CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[VEC_ADDR_I_I_I_I]]), !noalias [[META8]] // CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr nonnull [[DST_I_I_I_I]]), !noalias [[META8]] // CHECK-NEXT: [[EXTRACTVEC_I:%.*]] = shufflevector <4 x float> [[LOADVECN_I_I_I_I_I]], <4 x float> poison, <4 x i32> -// CHECK-NEXT: store <4 x float> [[EXTRACTVEC_I]], ptr [[AGG_RESULT]], align 16, !alias.scope [[META8]] +// CHECK-NEXT: store <4 x float> [[EXTRACTVEC_I]], ptr addrspace(4) [[AGG_RESULT]], align 16, !alias.scope [[META8]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestBFtoFDeviceRNE(vec &inp) { @@ -38,7 +38,7 @@ SYCL_EXTERNAL auto TestBFtoFDeviceRNE(vec &inp) { } // CHECK-LABEL: define dso_local spir_func void @_Z17TestBFtoFDeviceRZRN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi3EEE( -// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec") align 16 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]], ptr addrspace(4) noundef readonly align 8 captures(none) dereferenceable(8) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec") align 16 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]], ptr addrspace(4) noundef readonly align 8 captures(none) dereferenceable(8) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ // CHECK-NEXT: entry: // CHECK-NEXT: [[VEC_ADDR_I_I_I_I:%.*]] = alloca <3 x i16>, align 8 // CHECK-NEXT: [[DST_I_I_I_I:%.*]] = alloca [4 x float], align 4 @@ -55,7 +55,7 @@ SYCL_EXTERNAL auto TestBFtoFDeviceRNE(vec &inp) { // CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[VEC_ADDR_I_I_I_I]]), !noalias [[META15]] // CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr nonnull [[DST_I_I_I_I]]), !noalias [[META15]] // CHECK-NEXT: [[EXTRACTVEC_I:%.*]] = shufflevector <4 x float> [[LOADVECN_I_I_I_I_I]], <4 x float> poison, <4 x i32> -// CHECK-NEXT: store <4 x float> [[EXTRACTVEC_I]], ptr [[AGG_RESULT]], align 16, !alias.scope [[META15]] +// CHECK-NEXT: store <4 x float> [[EXTRACTVEC_I]], ptr addrspace(4) [[AGG_RESULT]], align 16, !alias.scope [[META15]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestBFtoFDeviceRZ(vec &inp) { @@ -63,7 +63,7 @@ SYCL_EXTERNAL auto TestBFtoFDeviceRZ(vec &inp) { } // CHECK-LABEL: define dso_local spir_func void @_Z19TestBFtointDeviceRZRN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi3EEE( -// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.70") align 16 captures(none) [[AGG_RESULT:%.*]], ptr addrspace(4) noundef readonly align 8 captures(none) dereferenceable(8) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.70") align 16 captures(none) [[AGG_RESULT:%.*]], ptr addrspace(4) noundef readonly align 8 captures(none) dereferenceable(8) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META19:![0-9]+]]) // CHECK-NEXT: [[LOADVECN_I_I:%.*]] = load <4 x i16>, ptr addrspace(4) [[INP]], align 8, !noalias [[META19]] @@ -82,7 +82,7 @@ SYCL_EXTERNAL auto TestBFtoFDeviceRZ(vec &inp) { // CHECK-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK: _ZNK4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi3EE7convertIiLNS_13rounding_modeE2EEENS1_IT_Li3EEEv.exit: // CHECK-NEXT: [[EXTRACTVEC_I:%.*]] = shufflevector <3 x i32> [[RETVAL1_SROA_0_0_I_I_I]], <3 x i32> poison, <4 x i32> -// CHECK-NEXT: store <4 x i32> [[EXTRACTVEC_I]], ptr [[AGG_RESULT]], align 16, !alias.scope [[META19]] +// CHECK-NEXT: store <4 x i32> [[EXTRACTVEC_I]], ptr addrspace(4) [[AGG_RESULT]], align 16, !alias.scope [[META19]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestBFtointDeviceRZ(vec &inp) { @@ -90,12 +90,12 @@ SYCL_EXTERNAL auto TestBFtointDeviceRZ(vec &inp) { } // CHECK-LABEL: define dso_local spir_func void @_Z20TestBFtointDeviceRNERN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi1EEE( -// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.108") align 4 captures(none) initializes((0, 4)) [[AGG_RESULT:%.*]], ptr addrspace(4) noundef readonly align 2 captures(none) dereferenceable(2) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.108") align 4 captures(none) initializes((0, 4)) [[AGG_RESULT:%.*]], ptr addrspace(4) noundef readonly align 2 captures(none) dereferenceable(2) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META25:![0-9]+]]) // CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr addrspace(4) [[INP]], align 2, !tbaa [[TBAA11]], !noalias [[META25]] // CHECK-NEXT: [[CALL_I_I_I_I:%.*]] = tail call spir_func noundef i32 @__imf_bfloat162int_rn(i16 noundef zeroext [[TMP0]]) #[[ATTR4]], !noalias [[META25]] -// CHECK-NEXT: store i32 [[CALL_I_I_I_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META25]] +// CHECK-NEXT: store i32 [[CALL_I_I_I_I]], ptr addrspace(4) [[AGG_RESULT]], align 4, !alias.scope [[META25]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestBFtointDeviceRNE(vec &inp) { @@ -103,7 +103,7 @@ SYCL_EXTERNAL auto TestBFtointDeviceRNE(vec &inp) { } // CHECK-LABEL: define dso_local spir_func void @_Z18TestFtoBFDeviceRNERN4sycl3_V13vecIfLi3EEE( -// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.32") align 8 captures(none) initializes((0, 8)) [[AGG_RESULT:%.*]], ptr addrspace(4) noundef readonly align 16 captures(none) dereferenceable(16) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.32") align 8 captures(none) initializes((0, 8)) [[AGG_RESULT:%.*]], ptr addrspace(4) noundef readonly align 16 captures(none) dereferenceable(16) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ // CHECK-NEXT: entry: // CHECK-NEXT: [[VEC_ADDR_I_I_I_I:%.*]] = alloca <3 x float>, align 16 // CHECK-NEXT: [[DST_I_I_I_I:%.*]] = alloca [4 x %"class.sycl::_V1::ext::oneapi::bfloat16"], align 2 @@ -120,7 +120,7 @@ SYCL_EXTERNAL auto TestBFtointDeviceRNE(vec &inp) { // CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr nonnull [[VEC_ADDR_I_I_I_I]]), !noalias [[META29]] // CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[DST_I_I_I_I]]), !noalias [[META29]] // CHECK-NEXT: [[EXTRACTVEC_I:%.*]] = shufflevector <4 x i16> [[LOADVECN_I_I_I_I_I]], <4 x i16> poison, <4 x i32> -// CHECK-NEXT: store <4 x i16> [[EXTRACTVEC_I]], ptr [[AGG_RESULT]], align 8, !alias.scope [[META29]] +// CHECK-NEXT: store <4 x i16> [[EXTRACTVEC_I]], ptr addrspace(4) [[AGG_RESULT]], align 8, !alias.scope [[META29]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestFtoBFDeviceRNE(vec &inp) { @@ -128,7 +128,7 @@ SYCL_EXTERNAL auto TestFtoBFDeviceRNE(vec &inp) { } // CHECK-LABEL: define dso_local spir_func void @_Z17TestFtoBFDeviceRZRN4sycl3_V13vecIfLi3EEE( -// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.32") align 8 captures(none) [[AGG_RESULT:%.*]], ptr addrspace(4) noundef readonly align 16 captures(none) dereferenceable(16) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.32") align 8 captures(none) [[AGG_RESULT:%.*]], ptr addrspace(4) noundef readonly align 16 captures(none) dereferenceable(16) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META33:![0-9]+]]) // CHECK-NEXT: [[LOADVECN_I_I:%.*]] = load <4 x float>, ptr addrspace(4) [[INP]], align 16, !noalias [[META33]] @@ -147,7 +147,7 @@ SYCL_EXTERNAL auto TestFtoBFDeviceRNE(vec &inp) { // CHECK-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK: _ZNK4sycl3_V13vecIfLi3EE7convertINS0_3ext6oneapi8bfloat16ELNS_13rounding_modeE2EEENS1_IT_Li3EEEv.exit: // CHECK-NEXT: [[EXTRACTVEC_I:%.*]] = shufflevector <3 x i16> [[RETVAL1_SROA_0_0_I_I_I]], <3 x i16> poison, <4 x i32> -// CHECK-NEXT: store <4 x i16> [[EXTRACTVEC_I]], ptr [[AGG_RESULT]], align 8, !alias.scope [[META33]] +// CHECK-NEXT: store <4 x i16> [[EXTRACTVEC_I]], ptr addrspace(4) [[AGG_RESULT]], align 8, !alias.scope [[META33]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestFtoBFDeviceRZ(vec &inp) { @@ -155,7 +155,7 @@ SYCL_EXTERNAL auto TestFtoBFDeviceRZ(vec &inp) { } // CHECK-LABEL: define dso_local spir_func void @_Z19TestInttoBFDeviceRZRN4sycl3_V13vecIiLi3EEE( -// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.32") align 8 captures(none) [[AGG_RESULT:%.*]], ptr addrspace(4) noundef readonly align 16 captures(none) dereferenceable(16) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.32") align 8 captures(none) [[AGG_RESULT:%.*]], ptr addrspace(4) noundef readonly align 16 captures(none) dereferenceable(16) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META38:![0-9]+]]) // CHECK-NEXT: [[LOADVECN_I_I:%.*]] = load <4 x i32>, ptr addrspace(4) [[INP]], align 16, !noalias [[META38]] @@ -174,7 +174,7 @@ SYCL_EXTERNAL auto TestFtoBFDeviceRZ(vec &inp) { // CHECK-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP41:![0-9]+]] // CHECK: _ZNK4sycl3_V13vecIiLi3EE7convertINS0_3ext6oneapi8bfloat16ELNS_13rounding_modeE2EEENS1_IT_Li3EEEv.exit: // CHECK-NEXT: [[EXTRACTVEC_I:%.*]] = shufflevector <3 x i16> [[RETVAL1_SROA_0_0_I_I_I]], <3 x i16> poison, <4 x i32> -// CHECK-NEXT: store <4 x i16> [[EXTRACTVEC_I]], ptr [[AGG_RESULT]], align 8, !alias.scope [[META38]] +// CHECK-NEXT: store <4 x i16> [[EXTRACTVEC_I]], ptr addrspace(4) [[AGG_RESULT]], align 8, !alias.scope [[META38]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestInttoBFDeviceRZ(vec &inp) { @@ -182,12 +182,12 @@ SYCL_EXTERNAL auto TestInttoBFDeviceRZ(vec &inp) { } // CHECK-LABEL: define dso_local spir_func void @_Z19TestLLtoBFDeviceRTPRN4sycl3_V13vecIxLi1EEE( -// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.146") align 2 captures(none) initializes((0, 2)) [[AGG_RESULT:%.*]], ptr addrspace(4) noundef readonly align 8 captures(none) dereferenceable(8) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.146") align 2 captures(none) initializes((0, 2)) [[AGG_RESULT:%.*]], ptr addrspace(4) noundef readonly align 8 captures(none) dereferenceable(8) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META43:![0-9]+]]) // CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr addrspace(4) [[INP]], align 8, !tbaa [[TBAA46:![0-9]+]], !noalias [[META43]] // CHECK-NEXT: [[CALL_I_I_I_I:%.*]] = tail call spir_func noundef zeroext i16 @__imf_ll2bfloat16_ru(i64 noundef [[TMP0]]) #[[ATTR4]], !noalias [[META43]] -// CHECK-NEXT: store i16 [[CALL_I_I_I_I]], ptr [[AGG_RESULT]], align 2, !alias.scope [[META43]] +// CHECK-NEXT: store i16 [[CALL_I_I_I_I]], ptr addrspace(4) [[AGG_RESULT]], align 2, !alias.scope [[META43]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestLLtoBFDeviceRTP(vec &inp) { @@ -195,7 +195,7 @@ SYCL_EXTERNAL auto TestLLtoBFDeviceRTP(vec &inp) { } // CHECK-LABEL: define dso_local spir_func void @_Z22TestShorttoBFDeviceRTNRN4sycl3_V13vecIsLi2EEE( -// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.224") align 4 captures(none) [[AGG_RESULT:%.*]], ptr addrspace(4) noundef readonly align 4 captures(none) dereferenceable(4) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.224") align 4 captures(none) [[AGG_RESULT:%.*]], ptr addrspace(4) noundef readonly align 4 captures(none) dereferenceable(4) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META49:![0-9]+]]) // CHECK-NEXT: [[TMP0:%.*]] = load <2 x i16>, ptr addrspace(4) [[INP]], align 4, !tbaa [[TBAA11]], !noalias [[META49]] @@ -212,7 +212,7 @@ SYCL_EXTERNAL auto TestLLtoBFDeviceRTP(vec &inp) { // CHECK-NEXT: [[INC_I_I_I]] = add nuw nsw i32 [[I_0_I_I_I]], 1 // CHECK-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP52:![0-9]+]] // CHECK: _ZNK4sycl3_V13vecIsLi2EE7convertINS0_3ext6oneapi8bfloat16ELNS_13rounding_modeE4EEENS1_IT_Li2EEEv.exit: -// CHECK-NEXT: store <2 x i16> [[RETVAL1_0_I_I_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META49]] +// CHECK-NEXT: store <2 x i16> [[RETVAL1_0_I_I_I]], ptr addrspace(4) [[AGG_RESULT]], align 4, !alias.scope [[META49]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestShorttoBFDeviceRTN(vec &inp) { diff --git a/sycl/test/check_device_code/vector/convert_bfloat_preview.cpp b/sycl/test/check_device_code/vector/convert_bfloat_preview.cpp index ff4f016449a61..1ccdf759d66f3 100644 --- a/sycl/test/check_device_code/vector/convert_bfloat_preview.cpp +++ b/sycl/test/check_device_code/vector/convert_bfloat_preview.cpp @@ -13,7 +13,7 @@ using namespace sycl; using bfloat16 = sycl::ext::oneapi::bfloat16; // CHECK-LABEL: define dso_local spir_func void @_Z18TestBFtoFDeviceRNERN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi3EEE( -// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec") align 16 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]], ptr addrspace(4) noundef readonly align 8 captures(none) dereferenceable(8) [[INP:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {{.*}}{ +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec") align 16 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]], ptr addrspace(4) noundef readonly align 8 captures(none) dereferenceable(8) [[INP:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {{.*}}{ // CHECK-NEXT: entry: // CHECK-NEXT: [[VEC_ADDR_I_I_I_I:%.*]] = alloca <3 x i16>, align 8 // CHECK-NEXT: [[DST_I_I_I_I:%.*]] = alloca [4 x float], align 4 @@ -30,7 +30,7 @@ using bfloat16 = sycl::ext::oneapi::bfloat16; // CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[VEC_ADDR_I_I_I_I]]), !noalias [[META8]] // CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr nonnull [[DST_I_I_I_I]]), !noalias [[META8]] // CHECK-NEXT: [[EXTRACTVEC_I:%.*]] = shufflevector <4 x float> [[LOADVECN_I_I_I_I_I]], <4 x float> poison, <4 x i32> -// CHECK-NEXT: store <4 x float> [[EXTRACTVEC_I]], ptr [[AGG_RESULT]], align 16, !alias.scope [[META8]] +// CHECK-NEXT: store <4 x float> [[EXTRACTVEC_I]], ptr addrspace(4) [[AGG_RESULT]], align 16, !alias.scope [[META8]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestBFtoFDeviceRNE(vec &inp) { @@ -38,7 +38,7 @@ SYCL_EXTERNAL auto TestBFtoFDeviceRNE(vec &inp) { } // CHECK-LABEL: define dso_local spir_func void @_Z17TestBFtoFDeviceRZRN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi3EEE( -// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec") align 16 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]], ptr addrspace(4) noundef readonly align 8 captures(none) dereferenceable(8) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec") align 16 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]], ptr addrspace(4) noundef readonly align 8 captures(none) dereferenceable(8) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ // CHECK-NEXT: entry: // CHECK-NEXT: [[VEC_ADDR_I_I_I_I:%.*]] = alloca <3 x i16>, align 8 // CHECK-NEXT: [[DST_I_I_I_I:%.*]] = alloca [4 x float], align 4 @@ -55,7 +55,7 @@ SYCL_EXTERNAL auto TestBFtoFDeviceRNE(vec &inp) { // CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[VEC_ADDR_I_I_I_I]]), !noalias [[META15]] // CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr nonnull [[DST_I_I_I_I]]), !noalias [[META15]] // CHECK-NEXT: [[EXTRACTVEC_I:%.*]] = shufflevector <4 x float> [[LOADVECN_I_I_I_I_I]], <4 x float> poison, <4 x i32> -// CHECK-NEXT: store <4 x float> [[EXTRACTVEC_I]], ptr [[AGG_RESULT]], align 16, !alias.scope [[META15]] +// CHECK-NEXT: store <4 x float> [[EXTRACTVEC_I]], ptr addrspace(4) [[AGG_RESULT]], align 16, !alias.scope [[META15]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestBFtoFDeviceRZ(vec &inp) { @@ -63,7 +63,7 @@ SYCL_EXTERNAL auto TestBFtoFDeviceRZ(vec &inp) { } // CHECK-LABEL: define dso_local spir_func void @_Z19TestBFtointDeviceRZRN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi3EEE( -// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.70") align 16 captures(none) [[AGG_RESULT:%.*]], ptr addrspace(4) noundef readonly align 8 captures(none) dereferenceable(8) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.70") align 16 captures(none) [[AGG_RESULT:%.*]], ptr addrspace(4) noundef readonly align 8 captures(none) dereferenceable(8) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META19:![0-9]+]]) // CHECK-NEXT: [[LOADVECN_I_I:%.*]] = load <4 x i16>, ptr addrspace(4) [[INP]], align 8, !noalias [[META19]] @@ -82,7 +82,7 @@ SYCL_EXTERNAL auto TestBFtoFDeviceRZ(vec &inp) { // CHECK-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK: _ZNK4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi3EE7convertIiLNS_13rounding_modeE2EEENS1_IT_Li3EEEv.exit: // CHECK-NEXT: [[EXTRACTVEC_I:%.*]] = shufflevector <3 x i32> [[RETVAL1_SROA_0_0_I_I_I]], <3 x i32> poison, <4 x i32> -// CHECK-NEXT: store <4 x i32> [[EXTRACTVEC_I]], ptr [[AGG_RESULT]], align 16, !alias.scope [[META19]] +// CHECK-NEXT: store <4 x i32> [[EXTRACTVEC_I]], ptr addrspace(4) [[AGG_RESULT]], align 16, !alias.scope [[META19]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestBFtointDeviceRZ(vec &inp) { @@ -90,12 +90,12 @@ SYCL_EXTERNAL auto TestBFtointDeviceRZ(vec &inp) { } // CHECK-LABEL: define dso_local spir_func void @_Z20TestBFtointDeviceRNERN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi1EEE( -// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.108") align 4 captures(none) initializes((0, 4)) [[AGG_RESULT:%.*]], ptr addrspace(4) noundef readonly align 2 captures(none) dereferenceable(2) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.108") align 4 captures(none) initializes((0, 4)) [[AGG_RESULT:%.*]], ptr addrspace(4) noundef readonly align 2 captures(none) dereferenceable(2) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META25:![0-9]+]]) // CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr addrspace(4) [[INP]], align 2, !tbaa [[TBAA11]], !noalias [[META25]] // CHECK-NEXT: [[CALL_I_I_I_I:%.*]] = tail call spir_func noundef i32 @__imf_bfloat162int_rn(i16 noundef zeroext [[TMP0]]) #[[ATTR4]], !noalias [[META25]] -// CHECK-NEXT: store i32 [[CALL_I_I_I_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META25]] +// CHECK-NEXT: store i32 [[CALL_I_I_I_I]], ptr addrspace(4) [[AGG_RESULT]], align 4, !alias.scope [[META25]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestBFtointDeviceRNE(vec &inp) { @@ -103,7 +103,7 @@ SYCL_EXTERNAL auto TestBFtointDeviceRNE(vec &inp) { } // CHECK-LABEL: define dso_local spir_func void @_Z18TestFtoBFDeviceRNERN4sycl3_V13vecIfLi3EEE( -// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.32") align 8 captures(none) initializes((0, 8)) [[AGG_RESULT:%.*]], ptr addrspace(4) noundef readonly align 16 captures(none) dereferenceable(16) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.32") align 8 captures(none) initializes((0, 8)) [[AGG_RESULT:%.*]], ptr addrspace(4) noundef readonly align 16 captures(none) dereferenceable(16) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ // CHECK-NEXT: entry: // CHECK-NEXT: [[VEC_ADDR_I_I_I_I:%.*]] = alloca <3 x float>, align 16 // CHECK-NEXT: [[DST_I_I_I_I:%.*]] = alloca [4 x %"class.sycl::_V1::ext::oneapi::bfloat16"], align 2 @@ -120,7 +120,7 @@ SYCL_EXTERNAL auto TestBFtointDeviceRNE(vec &inp) { // CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 16, ptr nonnull [[VEC_ADDR_I_I_I_I]]), !noalias [[META29]] // CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[DST_I_I_I_I]]), !noalias [[META29]] // CHECK-NEXT: [[EXTRACTVEC_I:%.*]] = shufflevector <4 x i16> [[LOADVECN_I_I_I_I_I]], <4 x i16> poison, <4 x i32> -// CHECK-NEXT: store <4 x i16> [[EXTRACTVEC_I]], ptr [[AGG_RESULT]], align 8, !alias.scope [[META29]] +// CHECK-NEXT: store <4 x i16> [[EXTRACTVEC_I]], ptr addrspace(4) [[AGG_RESULT]], align 8, !alias.scope [[META29]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestFtoBFDeviceRNE(vec &inp) { @@ -128,7 +128,7 @@ SYCL_EXTERNAL auto TestFtoBFDeviceRNE(vec &inp) { } // CHECK-LABEL: define dso_local spir_func void @_Z17TestFtoBFDeviceRZRN4sycl3_V13vecIfLi3EEE( -// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.32") align 8 captures(none) [[AGG_RESULT:%.*]], ptr addrspace(4) noundef readonly align 16 captures(none) dereferenceable(16) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.32") align 8 captures(none) [[AGG_RESULT:%.*]], ptr addrspace(4) noundef readonly align 16 captures(none) dereferenceable(16) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META33:![0-9]+]]) // CHECK-NEXT: [[LOADVECN_I_I:%.*]] = load <4 x float>, ptr addrspace(4) [[INP]], align 16, !noalias [[META33]] @@ -147,7 +147,7 @@ SYCL_EXTERNAL auto TestFtoBFDeviceRNE(vec &inp) { // CHECK-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK: _ZNK4sycl3_V13vecIfLi3EE7convertINS0_3ext6oneapi8bfloat16ELNS_13rounding_modeE2EEENS1_IT_Li3EEEv.exit: // CHECK-NEXT: [[EXTRACTVEC_I:%.*]] = shufflevector <3 x i16> [[RETVAL1_SROA_0_0_I_I_I]], <3 x i16> poison, <4 x i32> -// CHECK-NEXT: store <4 x i16> [[EXTRACTVEC_I]], ptr [[AGG_RESULT]], align 8, !alias.scope [[META33]] +// CHECK-NEXT: store <4 x i16> [[EXTRACTVEC_I]], ptr addrspace(4) [[AGG_RESULT]], align 8, !alias.scope [[META33]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestFtoBFDeviceRZ(vec &inp) { @@ -155,7 +155,7 @@ SYCL_EXTERNAL auto TestFtoBFDeviceRZ(vec &inp) { } // CHECK-LABEL: define dso_local spir_func void @_Z19TestInttoBFDeviceRZRN4sycl3_V13vecIiLi3EEE( -// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.32") align 8 captures(none) [[AGG_RESULT:%.*]], ptr addrspace(4) noundef readonly align 16 captures(none) dereferenceable(16) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.32") align 8 captures(none) [[AGG_RESULT:%.*]], ptr addrspace(4) noundef readonly align 16 captures(none) dereferenceable(16) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META38:![0-9]+]]) // CHECK-NEXT: [[LOADVECN_I_I:%.*]] = load <4 x i32>, ptr addrspace(4) [[INP]], align 16, !noalias [[META38]] @@ -174,7 +174,7 @@ SYCL_EXTERNAL auto TestFtoBFDeviceRZ(vec &inp) { // CHECK-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP41:![0-9]+]] // CHECK: _ZNK4sycl3_V13vecIiLi3EE7convertINS0_3ext6oneapi8bfloat16ELNS_13rounding_modeE2EEENS1_IT_Li3EEEv.exit: // CHECK-NEXT: [[EXTRACTVEC_I:%.*]] = shufflevector <3 x i16> [[RETVAL1_SROA_0_0_I_I_I]], <3 x i16> poison, <4 x i32> -// CHECK-NEXT: store <4 x i16> [[EXTRACTVEC_I]], ptr [[AGG_RESULT]], align 8, !alias.scope [[META38]] +// CHECK-NEXT: store <4 x i16> [[EXTRACTVEC_I]], ptr addrspace(4) [[AGG_RESULT]], align 8, !alias.scope [[META38]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestInttoBFDeviceRZ(vec &inp) { @@ -182,12 +182,12 @@ SYCL_EXTERNAL auto TestInttoBFDeviceRZ(vec &inp) { } // CHECK-LABEL: define dso_local spir_func void @_Z19TestLLtoBFDeviceRTPRN4sycl3_V13vecIxLi1EEE( -// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.146") align 2 captures(none) initializes((0, 2)) [[AGG_RESULT:%.*]], ptr addrspace(4) noundef readonly align 8 captures(none) dereferenceable(8) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.146") align 2 captures(none) initializes((0, 2)) [[AGG_RESULT:%.*]], ptr addrspace(4) noundef readonly align 8 captures(none) dereferenceable(8) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META43:![0-9]+]]) // CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr addrspace(4) [[INP]], align 8, !tbaa [[TBAA46:![0-9]+]], !noalias [[META43]] // CHECK-NEXT: [[CALL_I_I_I_I:%.*]] = tail call spir_func noundef zeroext i16 @__imf_ll2bfloat16_ru(i64 noundef [[TMP0]]) #[[ATTR4]], !noalias [[META43]] -// CHECK-NEXT: store i16 [[CALL_I_I_I_I]], ptr [[AGG_RESULT]], align 2, !alias.scope [[META43]] +// CHECK-NEXT: store i16 [[CALL_I_I_I_I]], ptr addrspace(4) [[AGG_RESULT]], align 2, !alias.scope [[META43]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestLLtoBFDeviceRTP(vec &inp) { @@ -195,7 +195,7 @@ SYCL_EXTERNAL auto TestLLtoBFDeviceRTP(vec &inp) { } // CHECK-LABEL: define dso_local spir_func void @_Z22TestShorttoBFDeviceRTNRN4sycl3_V13vecIsLi2EEE( -// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.224") align 4 captures(none) [[AGG_RESULT:%.*]], ptr addrspace(4) noundef readonly align 4 captures(none) dereferenceable(4) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.224") align 4 captures(none) [[AGG_RESULT:%.*]], ptr addrspace(4) noundef readonly align 4 captures(none) dereferenceable(4) [[INP:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META49:![0-9]+]]) // CHECK-NEXT: [[TMP0:%.*]] = load <2 x i16>, ptr addrspace(4) [[INP]], align 4, !tbaa [[TBAA11]], !noalias [[META49]] @@ -212,7 +212,7 @@ SYCL_EXTERNAL auto TestLLtoBFDeviceRTP(vec &inp) { // CHECK-NEXT: [[INC_I_I_I]] = add nuw nsw i32 [[I_0_I_I_I]], 1 // CHECK-NEXT: br label [[FOR_COND_I_I_I]], !llvm.loop [[LOOP52:![0-9]+]] // CHECK: _ZNK4sycl3_V13vecIsLi2EE7convertINS0_3ext6oneapi8bfloat16ELNS_13rounding_modeE4EEENS1_IT_Li2EEEv.exit: -// CHECK-NEXT: store <2 x i16> [[RETVAL1_0_I_I_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META49]] +// CHECK-NEXT: store <2 x i16> [[RETVAL1_0_I_I_I]], ptr addrspace(4) [[AGG_RESULT]], align 4, !alias.scope [[META49]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestShorttoBFDeviceRTN(vec &inp) { diff --git a/sycl/test/check_device_code/vector/math_ops.cpp b/sycl/test/check_device_code/vector/math_ops.cpp index 3ad3dcbde6ae9..4c6378632cac6 100644 --- a/sycl/test/check_device_code/vector/math_ops.cpp +++ b/sycl/test/check_device_code/vector/math_ops.cpp @@ -19,20 +19,20 @@ using namespace sycl; /*************** Binary Arithmetic Ops ******************/ // CHECK-LABEL: define dso_local spir_func void @_Z7TestAddN4sycl3_V13vecIiLi2EEES2_( -// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec") align 8 captures(none) initializes((0, 8)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec") align 8 captures(none) [[A:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec") align 8 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {{.*}}{ +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec") align 8 captures(none) initializes((0, 8)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec") align 8 captures(none) [[A:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec") align 8 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {{.*}}{ // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]]) // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META11:![0-9]+]]) // CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[A]], align 8, !tbaa [[TBAA14:![0-9]+]], !noalias [[META17:![0-9]+]] // CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[B]], align 8, !tbaa [[TBAA14]], !noalias [[META17]] // CHECK-NEXT: [[ADD_I_I_I_I_I:%.*]] = add <2 x i32> [[TMP0]], [[TMP1]] -// CHECK-NEXT: store <2 x i32> [[ADD_I_I_I_I_I]], ptr [[AGG_RESULT]], align 8, !alias.scope [[META18:![0-9]+]] +// CHECK-NEXT: store <2 x i32> [[ADD_I_I_I_I_I]], ptr addrspace(4) [[AGG_RESULT]], align 8, !alias.scope [[META18:![0-9]+]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestAdd(vec a, vec b) { return a + b; } // CHECK-LABEL: define dso_local spir_func void @_Z7TestAddN4sycl3_V13vecIfLi3EEES2_( -// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.33") align 16 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.33") align 16 captures(none) [[A:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.33") align 16 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.33") align 16 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.33") align 16 captures(none) [[A:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.33") align 16 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META22:![0-9]+]]) // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META25:![0-9]+]]) @@ -40,34 +40,34 @@ SYCL_EXTERNAL auto TestAdd(vec a, vec b) { return a + b; } // CHECK-NEXT: [[LOADVECN_I6_I_I:%.*]] = load <4 x float>, ptr [[B]], align 16, !noalias [[META28]] // CHECK-NEXT: [[TMP0:%.*]] = fadd <4 x float> [[LOADVECN_I_I_I]], [[LOADVECN_I6_I_I]] // CHECK-NEXT: [[EXTRACTVEC_I_I:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <4 x i32> -// CHECK-NEXT: store <4 x float> [[EXTRACTVEC_I_I]], ptr [[AGG_RESULT]], align 16, !alias.scope [[META28]] +// CHECK-NEXT: store <4 x float> [[EXTRACTVEC_I_I]], ptr addrspace(4) [[AGG_RESULT]], align 16, !alias.scope [[META28]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestAdd(vec a, vec b) { return a + b; } // CHECK-LABEL: define dso_local spir_func void @_Z7TestAddN4sycl3_V13vecIcLi16EEES2_( -// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.73") align 16 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.73") align 16 captures(none) [[A:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.73") align 16 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.73") align 16 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.73") align 16 captures(none) [[A:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.73") align 16 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META30:![0-9]+]]) // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META33:![0-9]+]]) // CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr [[A]], align 16, !tbaa [[TBAA14]], !noalias [[META36:![0-9]+]] // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[B]], align 16, !tbaa [[TBAA14]], !noalias [[META36]] // CHECK-NEXT: [[ADD_I_I_I_I_I:%.*]] = add <16 x i8> [[TMP0]], [[TMP1]] -// CHECK-NEXT: store <16 x i8> [[ADD_I_I_I_I_I]], ptr [[AGG_RESULT]], align 16, !alias.scope [[META36]] +// CHECK-NEXT: store <16 x i8> [[ADD_I_I_I_I_I]], ptr addrspace(4) [[AGG_RESULT]], align 16, !alias.scope [[META36]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestAdd(vec a, vec b) { return a + b; } // std::byte does not support '+'. Therefore, using bitwise XOR as a substitute. // CHECK-LABEL: define dso_local spir_func void @_Z7TestXorN4sycl3_V13vecISt4byteLi8EEES3_( -// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.113") align 8 captures(none) initializes((0, 8)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.113") align 8 captures(none) [[A:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.113") align 8 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.113") align 8 captures(none) initializes((0, 8)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.113") align 8 captures(none) [[A:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.113") align 8 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META38:![0-9]+]]) // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META41:![0-9]+]]) // CHECK-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr [[A]], align 8, !tbaa [[TBAA14]], !noalias [[META44:![0-9]+]] // CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[B]], align 8, !tbaa [[TBAA14]], !noalias [[META44]] // CHECK-NEXT: [[XOR_I_I_I_I_I:%.*]] = xor <8 x i8> [[TMP0]], [[TMP1]] -// CHECK-NEXT: store <8 x i8> [[XOR_I_I_I_I_I]], ptr [[AGG_RESULT]], align 8, !alias.scope [[META45:![0-9]+]] +// CHECK-NEXT: store <8 x i8> [[XOR_I_I_I_I_I]], ptr addrspace(4) [[AGG_RESULT]], align 8, !alias.scope [[META45:![0-9]+]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestXor(vec a, vec b) { @@ -75,7 +75,7 @@ SYCL_EXTERNAL auto TestXor(vec a, vec b) { } // CHECK-LABEL: define dso_local spir_func void @_Z7TestAddN4sycl3_V13vecIbLi4EEES2_( -// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.123") align 4 captures(none) initializes((0, 4)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.123") align 4 captures(none) [[A:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.123") align 4 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.123") align 4 captures(none) initializes((0, 4)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.123") align 4 captures(none) [[A:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.123") align 4 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META49:![0-9]+]]) // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META52:![0-9]+]]) @@ -84,13 +84,13 @@ SYCL_EXTERNAL auto TestXor(vec a, vec b) { // CHECK-NEXT: [[ADD_I_I_I_I_I:%.*]] = sub <4 x i8> zeroinitializer, [[TMP1]] // CHECK-NEXT: [[CMP_I_I:%.*]] = icmp ne <4 x i8> [[TMP0]], [[ADD_I_I_I_I_I]] // CHECK-NEXT: [[SEXT_NEG_I_I:%.*]] = zext <4 x i1> [[CMP_I_I]] to <4 x i8> -// CHECK-NEXT: store <4 x i8> [[SEXT_NEG_I_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META56:![0-9]+]] +// CHECK-NEXT: store <4 x i8> [[SEXT_NEG_I_I]], ptr addrspace(4) [[AGG_RESULT]], align 4, !alias.scope [[META56:![0-9]+]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestAdd(vec a, vec b) { return a + b; } // CHECK-LABEL: define dso_local spir_func void @_Z7TestAddN4sycl3_V13vecINS0_6detail9half_impl4halfELi3EEES5_( -// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.163") align 8 captures(none) initializes((0, 8)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.163") align 8 captures(none) [[A:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.163") align 8 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.163") align 8 captures(none) initializes((0, 8)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.163") align 8 captures(none) [[A:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.163") align 8 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META62:![0-9]+]]) // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META65:![0-9]+]]) @@ -98,13 +98,13 @@ SYCL_EXTERNAL auto TestAdd(vec a, vec b) { return a + b; } // CHECK-NEXT: [[LOADVECN_I6_I_I:%.*]] = load <4 x half>, ptr [[B]], align 8, !noalias [[META68]] // CHECK-NEXT: [[TMP0:%.*]] = fadd <4 x half> [[LOADVECN_I_I_I]], [[LOADVECN_I6_I_I]] // CHECK-NEXT: [[EXTRACTVEC_I_I:%.*]] = shufflevector <4 x half> [[TMP0]], <4 x half> poison, <4 x i32> -// CHECK-NEXT: store <4 x half> [[EXTRACTVEC_I_I]], ptr [[AGG_RESULT]], align 8, !alias.scope [[META69:![0-9]+]] +// CHECK-NEXT: store <4 x half> [[EXTRACTVEC_I_I]], ptr addrspace(4) [[AGG_RESULT]], align 8, !alias.scope [[META69:![0-9]+]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestAdd(vec a, vec b) { return a + b; } // CHECK-LABEL: define dso_local spir_func void @_Z7TestAddN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi3EEES5_( -// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.203") align 8 captures(none) [[AGG_RESULT:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.203") align 8 [[A:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.203") align 8 [[B:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] {{.*}}{ +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.203") align 8 captures(none) [[AGG_RESULT:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.203") align 8 [[A:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.203") align 8 [[B:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] {{.*}}{ // CHECK-NEXT: entry: // CHECK-NEXT: [[REF_TMP_I_I_I_I:%.*]] = alloca float, align 4 // CHECK-NEXT: [[RES_I_I:%.*]] = alloca %"class.sycl::_V1::vec.203", align 8 @@ -136,7 +136,7 @@ SYCL_EXTERNAL auto TestAdd(vec a, vec b) { return a + b; } // CHECK-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP90:![0-9]+]] // CHECK: _ZN4sycl3_V16detailplINS0_3ext6oneapi8bfloat16EEENSt9enable_ifIX24is_op_available_for_typeISt4plusIvET_EENS0_3vecIS5_Li3EEEE4typeERKSB_SF_.exit: // CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[RES_I_I]], align 8, !noalias [[META79]] -// CHECK-NEXT: store i64 [[TMP0]], ptr [[AGG_RESULT]], align 8, !alias.scope [[META79]] +// CHECK-NEXT: store i64 [[TMP0]], ptr addrspace(4) [[AGG_RESULT]], align 8, !alias.scope [[META79]] // CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[RES_I_I]]), !noalias [[META73]] // CHECK-NEXT: ret void // @@ -148,7 +148,7 @@ SYCL_EXTERNAL auto TestAdd(vec a, /***************** Binary Logical Ops *******************/ // CHECK-LABEL: define dso_local spir_func void @_Z15TestGreaterThanN4sycl3_V13vecIiLi16EEES2_( -// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.241") align 64 captures(none) initializes((0, 64)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.241") align 64 captures(none) [[A:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.241") align 64 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.241") align 64 captures(none) initializes((0, 64)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.241") align 64 captures(none) [[A:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.241") align 64 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META93:![0-9]+]]) // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META96:![0-9]+]]) @@ -156,7 +156,7 @@ SYCL_EXTERNAL auto TestAdd(vec a, // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr [[B]], align 64, !tbaa [[TBAA14]], !noalias [[META99]] // CHECK-NEXT: [[CMP_I_I_I_I:%.*]] = icmp sgt <16 x i32> [[TMP0]], [[TMP1]] // CHECK-NEXT: [[SEXT_I_I_I_I:%.*]] = sext <16 x i1> [[CMP_I_I_I_I]] to <16 x i32> -// CHECK-NEXT: store <16 x i32> [[SEXT_I_I_I_I]], ptr [[AGG_RESULT]], align 64, !alias.scope [[META99]] +// CHECK-NEXT: store <16 x i32> [[SEXT_I_I_I_I]], ptr addrspace(4) [[AGG_RESULT]], align 64, !alias.scope [[META99]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestGreaterThan(vec a, vec b) { @@ -178,7 +178,7 @@ SYCL_EXTERNAL auto TestGreaterThan(vec a, vec b) { } // CHECK-LABEL: define dso_local spir_func void @_Z15TestGreaterThanN4sycl3_V13vecIbLi2EEES2_( -// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.290") align 2 captures(none) initializes((0, 2)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.328") align 2 captures(none) [[A:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.328") align 2 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.290") align 2 captures(none) initializes((0, 2)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.328") align 2 captures(none) [[A:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.328") align 2 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META102:![0-9]+]]) // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META105:![0-9]+]]) @@ -186,7 +186,7 @@ SYCL_EXTERNAL auto TestGreaterThan(vec a, vec b) { // CHECK-NEXT: [[TMP1:%.*]] = load <2 x i8>, ptr [[B]], align 2, !tbaa [[TBAA14]], !noalias [[META108]] // CHECK-NEXT: [[CMP_I_I_I_I:%.*]] = icmp sgt <2 x i8> [[TMP0]], [[TMP1]] // CHECK-NEXT: [[SEXT_I_I_I_I:%.*]] = sext <2 x i1> [[CMP_I_I_I_I]] to <2 x i8> -// CHECK-NEXT: store <2 x i8> [[SEXT_I_I_I_I]], ptr [[AGG_RESULT]], align 2, !alias.scope [[META109:![0-9]+]] +// CHECK-NEXT: store <2 x i8> [[SEXT_I_I_I_I]], ptr addrspace(4) [[AGG_RESULT]], align 2, !alias.scope [[META109:![0-9]+]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestGreaterThan(vec a, vec b) { @@ -194,7 +194,7 @@ SYCL_EXTERNAL auto TestGreaterThan(vec a, vec b) { } // CHECK-LABEL: define dso_local spir_func void @_Z15TestGreaterThanN4sycl3_V13vecINS0_6detail9half_impl4halfELi8EEES5_( -// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.370") align 16 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.408") align 16 captures(none) [[A:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.408") align 16 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.370") align 16 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.408") align 16 captures(none) [[A:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.408") align 16 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META113:![0-9]+]]) // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META116:![0-9]+]]) @@ -202,7 +202,7 @@ SYCL_EXTERNAL auto TestGreaterThan(vec a, vec b) { // CHECK-NEXT: [[TMP1:%.*]] = load <8 x half>, ptr [[B]], align 16, !tbaa [[TBAA14]], !noalias [[META119]] // CHECK-NEXT: [[CMP_I_I_I_I:%.*]] = fcmp ogt <8 x half> [[TMP0]], [[TMP1]] // CHECK-NEXT: [[SEXT_I_I_I_I:%.*]] = sext <8 x i1> [[CMP_I_I_I_I]] to <8 x i16> -// CHECK-NEXT: store <8 x i16> [[SEXT_I_I_I_I]], ptr [[AGG_RESULT]], align 16, !alias.scope [[META119]] +// CHECK-NEXT: store <8 x i16> [[SEXT_I_I_I_I]], ptr addrspace(4) [[AGG_RESULT]], align 16, !alias.scope [[META119]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestGreaterThan(vec a, vec b) { @@ -210,7 +210,7 @@ SYCL_EXTERNAL auto TestGreaterThan(vec a, vec b) { } // CHECK-LABEL: define dso_local spir_func void @_Z15TestGreaterThanN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi4EEES5_( -// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.450") align 8 captures(none) [[AGG_RESULT:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.488") align 8 [[A:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.488") align 8 [[B:%.*]]) local_unnamed_addr #[[ATTR1]] {{.*}}{ +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.450") align 8 captures(none) [[AGG_RESULT:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.488") align 8 [[A:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.488") align 8 [[B:%.*]]) local_unnamed_addr #[[ATTR1]] {{.*}}{ // CHECK-NEXT: entry: // CHECK-NEXT: [[RES_I_I:%.*]] = alloca %"class.sycl::_V1::vec.450", align 8 // CHECK-NEXT: [[A_ASCAST:%.*]] = addrspacecast ptr [[A]] to ptr addrspace(4) @@ -237,7 +237,7 @@ SYCL_EXTERNAL auto TestGreaterThan(vec a, vec b) { // CHECK-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP128:![0-9]+]] // CHECK: _ZN4sycl3_V16detailgtINS0_3ext6oneapi8bfloat16EEENSt9enable_ifIX24is_op_available_for_typeISt7greaterIvET_EENS0_3vecIsLi4EEEE4typeERKNSA_IS5_Li4EEESG_.exit: // CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[RES_I_I]], align 8, !noalias [[META127]] -// CHECK-NEXT: store i64 [[TMP0]], ptr [[AGG_RESULT]], align 8, !alias.scope [[META127]] +// CHECK-NEXT: store i64 [[TMP0]], ptr addrspace(4) [[AGG_RESULT]], align 8, !alias.scope [[META127]] // CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[RES_I_I]]), !noalias [[META121]] // CHECK-NEXT: ret void // @@ -249,7 +249,7 @@ SYCL_EXTERNAL auto TestGreaterThan(vec a, /********************** Unary Ops **********************/ // CHECK-LABEL: define dso_local spir_func void @_Z12TestNegationN4sycl3_V13vecIiLi3EEE( -// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.526") align 16 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.526") align 16 captures(none) [[A:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.526") align 16 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.526") align 16 captures(none) [[A:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META130:![0-9]+]]) // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META133:![0-9]+]]) @@ -258,76 +258,76 @@ SYCL_EXTERNAL auto TestGreaterThan(vec a, // CHECK-NEXT: [[CMP_I_I_I_I:%.*]] = icmp eq <3 x i32> [[EXTRACTVEC_I_I_I]], zeroinitializer // CHECK-NEXT: [[SEXT_I_I_I_I:%.*]] = sext <3 x i1> [[CMP_I_I_I_I]] to <3 x i32> // CHECK-NEXT: [[EXTRACTVEC_I_I:%.*]] = shufflevector <3 x i32> [[SEXT_I_I_I_I]], <3 x i32> poison, <4 x i32> -// CHECK-NEXT: store <4 x i32> [[EXTRACTVEC_I_I]], ptr [[AGG_RESULT]], align 16, !alias.scope [[META136]] +// CHECK-NEXT: store <4 x i32> [[EXTRACTVEC_I_I]], ptr addrspace(4) [[AGG_RESULT]], align 16, !alias.scope [[META136]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestNegation(vec a) { return !a; } // CHECK-LABEL: define dso_local spir_func void @_Z9TestMinusN4sycl3_V13vecIiLi4EEE( -// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.565") align 16 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.565") align 16 captures(none) [[A:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.565") align 16 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.565") align 16 captures(none) [[A:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META138:![0-9]+]]) // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META141:![0-9]+]]) // CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[A]], align 16, !tbaa [[TBAA14]], !noalias [[META144:![0-9]+]] // CHECK-NEXT: [[SUB_I_I_I_I:%.*]] = sub <4 x i32> zeroinitializer, [[TMP0]] -// CHECK-NEXT: store <4 x i32> [[SUB_I_I_I_I]], ptr [[AGG_RESULT]], align 16, !alias.scope [[META144]] +// CHECK-NEXT: store <4 x i32> [[SUB_I_I_I_I]], ptr addrspace(4) [[AGG_RESULT]], align 16, !alias.scope [[META144]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestMinus(vec a) { return -a; } // Negation is not valid for std::byte. Therefore, using bitwise negation. // CHECK-LABEL: define dso_local spir_func void @_Z19TestBitwiseNegationN4sycl3_V13vecISt4byteLi16EEE( -// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.604") align 16 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.604") align 16 captures(none) [[A:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.604") align 16 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.604") align 16 captures(none) [[A:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META146:![0-9]+]]) // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META149:![0-9]+]]) // CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr [[A]], align 16, !tbaa [[TBAA14]], !noalias [[META152:![0-9]+]] // CHECK-NEXT: [[NOT_I_I_I_I:%.*]] = xor <16 x i8> [[TMP0]], splat (i8 -1) -// CHECK-NEXT: store <16 x i8> [[NOT_I_I_I_I]], ptr [[AGG_RESULT]], align 16, !alias.scope [[META152]] +// CHECK-NEXT: store <16 x i8> [[NOT_I_I_I_I]], ptr addrspace(4) [[AGG_RESULT]], align 16, !alias.scope [[META152]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestBitwiseNegation(vec a) { return ~a; } // CHECK-LABEL: define dso_local spir_func void @_Z12TestNegationN4sycl3_V13vecIbLi4EEE( -// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.613") align 4 captures(none) initializes((0, 4)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.123") align 4 captures(none) [[A:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.613") align 4 captures(none) initializes((0, 4)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.123") align 4 captures(none) [[A:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META154:![0-9]+]]) // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META157:![0-9]+]]) // CHECK-NEXT: [[TMP0:%.*]] = load <4 x i8>, ptr [[A]], align 4, !tbaa [[TBAA14]], !noalias [[META160:![0-9]+]] // CHECK-NEXT: [[CMP_I_I_I_I:%.*]] = icmp eq <4 x i8> [[TMP0]], zeroinitializer // CHECK-NEXT: [[SEXT_I_I_I_I:%.*]] = sext <4 x i1> [[CMP_I_I_I_I]] to <4 x i8> -// CHECK-NEXT: store <4 x i8> [[SEXT_I_I_I_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META161:![0-9]+]] +// CHECK-NEXT: store <4 x i8> [[SEXT_I_I_I_I]], ptr addrspace(4) [[AGG_RESULT]], align 4, !alias.scope [[META161:![0-9]+]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestNegation(vec a) { return !a; } // CHECK-LABEL: define dso_local spir_func void @_Z12TestNegationN4sycl3_V13vecINS0_6detail9half_impl4halfELi2EEE( -// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.652") align 4 captures(none) initializes((0, 4)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.690") align 4 captures(none) [[A:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.652") align 4 captures(none) initializes((0, 4)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.690") align 4 captures(none) [[A:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META165:![0-9]+]]) // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META168:![0-9]+]]) // CHECK-NEXT: [[TMP0:%.*]] = load <2 x half>, ptr [[A]], align 4, !tbaa [[TBAA14]], !noalias [[META171:![0-9]+]] // CHECK-NEXT: [[CMP_I_I_I_I:%.*]] = fcmp oeq <2 x half> [[TMP0]], zeroinitializer // CHECK-NEXT: [[SEXT_I_I_I_I:%.*]] = sext <2 x i1> [[CMP_I_I_I_I]] to <2 x i16> -// CHECK-NEXT: store <2 x i16> [[SEXT_I_I_I_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META172:![0-9]+]] +// CHECK-NEXT: store <2 x i16> [[SEXT_I_I_I_I]], ptr addrspace(4) [[AGG_RESULT]], align 4, !alias.scope [[META172:![0-9]+]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestNegation(vec a) { return !a; } // CHECK-LABEL: define dso_local spir_func void @_Z9TestMinusN4sycl3_V13vecINS0_6detail9half_impl4halfELi8EEE( -// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.408") align 16 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.408") align 16 captures(none) [[A:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.408") align 16 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.408") align 16 captures(none) [[A:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META176:![0-9]+]]) // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META179:![0-9]+]]) // CHECK-NEXT: [[TMP0:%.*]] = load <8 x half>, ptr [[A]], align 16, !tbaa [[TBAA14]], !noalias [[META182:![0-9]+]] // CHECK-NEXT: [[FNEG_I_I_I_I:%.*]] = fneg <8 x half> [[TMP0]] -// CHECK-NEXT: store <8 x half> [[FNEG_I_I_I_I]], ptr [[AGG_RESULT]], align 16, !alias.scope [[META182]] +// CHECK-NEXT: store <8 x half> [[FNEG_I_I_I_I]], ptr addrspace(4) [[AGG_RESULT]], align 16, !alias.scope [[META182]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestMinus(vec a) { return -a; } // CHECK-LABEL: define dso_local spir_func void @_Z12TestNegationN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi3EEE( -// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.730") align 8 captures(none) [[AGG_RESULT:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.203") align 8 [[A:%.*]]) local_unnamed_addr #[[ATTR1]] {{.*}}{ +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.730") align 8 captures(none) [[AGG_RESULT:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.203") align 8 [[A:%.*]]) local_unnamed_addr #[[ATTR1]] {{.*}}{ // CHECK-NEXT: entry: // CHECK-NEXT: [[RES_I_I:%.*]] = alloca %"class.sycl::_V1::vec.730", align 8 // CHECK-NEXT: [[A_ASCAST:%.*]] = addrspacecast ptr [[A]] to ptr addrspace(4) @@ -351,14 +351,14 @@ SYCL_EXTERNAL auto TestMinus(vec a) { return -a; } // CHECK-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP191:![0-9]+]] // CHECK: _ZN4sycl3_V16detailntERKNS0_3vecINS0_3ext6oneapi8bfloat16ELi3EEE.exit: // CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[RES_I_I]], align 8, !noalias [[META190]] -// CHECK-NEXT: store i64 [[TMP0]], ptr [[AGG_RESULT]], align 8, !alias.scope [[META190]] +// CHECK-NEXT: store i64 [[TMP0]], ptr addrspace(4) [[AGG_RESULT]], align 8, !alias.scope [[META190]] // CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[RES_I_I]]), !noalias [[META184]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestNegation(vec a) { return !a; } // CHECK-LABEL: define dso_local spir_func void @_Z9TestMinusN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi16EEE( -// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.768") align 32 captures(none) [[AGG_RESULT:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.768") align 32 [[A:%.*]]) local_unnamed_addr #[[ATTR1]] {{.*}}{ +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.768") align 32 captures(none) [[AGG_RESULT:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.768") align 32 [[A:%.*]]) local_unnamed_addr #[[ATTR1]] {{.*}}{ // CHECK-NEXT: entry: // CHECK-NEXT: [[REF_TMP_I_I_I_I:%.*]] = alloca float, align 4 // CHECK-NEXT: [[RES_I_I:%.*]] = alloca %"class.sycl::_V1::vec.768", align 32 @@ -384,7 +384,7 @@ SYCL_EXTERNAL auto TestNegation(vec a) { return !a; } // CHECK-NEXT: [[INC_I_I]] = add nuw nsw i64 [[I_0_I_I]], 1 // CHECK-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP205:![0-9]+]] // CHECK: _ZN4sycl3_V16detailngERKNS0_3vecINS0_3ext6oneapi8bfloat16ELi16EEE.exit: -// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 32 [[AGG_RESULT]], ptr align 32 [[RES_I_I]], i64 32, i1 false) +// CHECK-NEXT: call void @llvm.memcpy.p4.p0.i64(ptr addrspace(4) align 32 [[AGG_RESULT]], ptr align 32 [[RES_I_I]], i64 32, i1 false) // CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr nonnull [[RES_I_I]]), !noalias [[META193]] // CHECK-NEXT: ret void // diff --git a/sycl/test/check_device_code/vector/math_ops_preview.cpp b/sycl/test/check_device_code/vector/math_ops_preview.cpp index b78018f3bfaab..2f21e47161024 100644 --- a/sycl/test/check_device_code/vector/math_ops_preview.cpp +++ b/sycl/test/check_device_code/vector/math_ops_preview.cpp @@ -19,20 +19,20 @@ using namespace sycl; /*************** Binary Arithmetic Ops ******************/ // CHECK-LABEL: define dso_local spir_func void @_Z7TestAddN4sycl3_V13vecIiLi2EEES2_( -// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec") align 8 captures(none) initializes((0, 8)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec") align 8 captures(none) [[A:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec") align 8 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {{.*}}{ +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec") align 8 captures(none) initializes((0, 8)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec") align 8 captures(none) [[A:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec") align 8 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {{.*}}{ // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]]) // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META11:![0-9]+]]) // CHECK-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[A]], align 8, !tbaa [[TBAA14:![0-9]+]], !noalias [[META17:![0-9]+]] // CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[B]], align 8, !tbaa [[TBAA14]], !noalias [[META17]] // CHECK-NEXT: [[ADD_I_I_I_I_I:%.*]] = add <2 x i32> [[TMP0]], [[TMP1]] -// CHECK-NEXT: store <2 x i32> [[ADD_I_I_I_I_I]], ptr [[AGG_RESULT]], align 8, !alias.scope [[META18:![0-9]+]] +// CHECK-NEXT: store <2 x i32> [[ADD_I_I_I_I_I]], ptr addrspace(4) [[AGG_RESULT]], align 8, !alias.scope [[META18:![0-9]+]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestAdd(vec a, vec b) { return a + b; } // CHECK-LABEL: define dso_local spir_func void @_Z7TestAddN4sycl3_V13vecIfLi3EEES2_( -// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.33") align 16 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.33") align 16 captures(none) [[A:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.33") align 16 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.33") align 16 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.33") align 16 captures(none) [[A:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.33") align 16 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META22:![0-9]+]]) // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META25:![0-9]+]]) @@ -40,34 +40,34 @@ SYCL_EXTERNAL auto TestAdd(vec a, vec b) { return a + b; } // CHECK-NEXT: [[LOADVECN_I6_I_I:%.*]] = load <4 x float>, ptr [[B]], align 16, !noalias [[META28]] // CHECK-NEXT: [[TMP0:%.*]] = fadd <4 x float> [[LOADVECN_I_I_I]], [[LOADVECN_I6_I_I]] // CHECK-NEXT: [[EXTRACTVEC_I_I:%.*]] = shufflevector <4 x float> [[TMP0]], <4 x float> poison, <4 x i32> -// CHECK-NEXT: store <4 x float> [[EXTRACTVEC_I_I]], ptr [[AGG_RESULT]], align 16, !alias.scope [[META28]] +// CHECK-NEXT: store <4 x float> [[EXTRACTVEC_I_I]], ptr addrspace(4) [[AGG_RESULT]], align 16, !alias.scope [[META28]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestAdd(vec a, vec b) { return a + b; } // CHECK-LABEL: define dso_local spir_func void @_Z7TestAddN4sycl3_V13vecIcLi16EEES2_( -// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.73") align 16 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.73") align 16 captures(none) [[A:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.73") align 16 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.73") align 16 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.73") align 16 captures(none) [[A:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.73") align 16 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META30:![0-9]+]]) // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META33:![0-9]+]]) // CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr [[A]], align 16, !tbaa [[TBAA14]], !noalias [[META36:![0-9]+]] // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, ptr [[B]], align 16, !tbaa [[TBAA14]], !noalias [[META36]] // CHECK-NEXT: [[ADD_I_I_I_I_I:%.*]] = add <16 x i8> [[TMP0]], [[TMP1]] -// CHECK-NEXT: store <16 x i8> [[ADD_I_I_I_I_I]], ptr [[AGG_RESULT]], align 16, !alias.scope [[META36]] +// CHECK-NEXT: store <16 x i8> [[ADD_I_I_I_I_I]], ptr addrspace(4) [[AGG_RESULT]], align 16, !alias.scope [[META36]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestAdd(vec a, vec b) { return a + b; } // std::byte does not support '+'. Therefore, using bitwise XOR as a substitute. // CHECK-LABEL: define dso_local spir_func void @_Z7TestXorN4sycl3_V13vecISt4byteLi8EEES3_( -// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.113") align 8 captures(none) initializes((0, 8)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.113") align 8 captures(none) [[A:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.113") align 8 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.113") align 8 captures(none) initializes((0, 8)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.113") align 8 captures(none) [[A:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.113") align 8 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META38:![0-9]+]]) // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META41:![0-9]+]]) // CHECK-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr [[A]], align 8, !tbaa [[TBAA14]], !noalias [[META44:![0-9]+]] // CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[B]], align 8, !tbaa [[TBAA14]], !noalias [[META44]] // CHECK-NEXT: [[XOR_I_I_I_I_I:%.*]] = xor <8 x i8> [[TMP0]], [[TMP1]] -// CHECK-NEXT: store <8 x i8> [[XOR_I_I_I_I_I]], ptr [[AGG_RESULT]], align 8, !alias.scope [[META45:![0-9]+]] +// CHECK-NEXT: store <8 x i8> [[XOR_I_I_I_I_I]], ptr addrspace(4) [[AGG_RESULT]], align 8, !alias.scope [[META45:![0-9]+]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestXor(vec a, vec b) { @@ -75,7 +75,7 @@ SYCL_EXTERNAL auto TestXor(vec a, vec b) { } // CHECK-LABEL: define dso_local spir_func void @_Z7TestAddN4sycl3_V13vecIbLi4EEES2_( -// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.123") align 4 captures(none) initializes((0, 4)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.123") align 4 captures(none) [[A:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.123") align 4 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.123") align 4 captures(none) initializes((0, 4)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.123") align 4 captures(none) [[A:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.123") align 4 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META49:![0-9]+]]) // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META52:![0-9]+]]) @@ -84,13 +84,13 @@ SYCL_EXTERNAL auto TestXor(vec a, vec b) { // CHECK-NEXT: [[ADD_I_I_I_I_I:%.*]] = sub <4 x i8> zeroinitializer, [[TMP1]] // CHECK-NEXT: [[CMP_I_I:%.*]] = icmp ne <4 x i8> [[TMP0]], [[ADD_I_I_I_I_I]] // CHECK-NEXT: [[SEXT_NEG_I_I:%.*]] = zext <4 x i1> [[CMP_I_I]] to <4 x i8> -// CHECK-NEXT: store <4 x i8> [[SEXT_NEG_I_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META56:![0-9]+]] +// CHECK-NEXT: store <4 x i8> [[SEXT_NEG_I_I]], ptr addrspace(4) [[AGG_RESULT]], align 4, !alias.scope [[META56:![0-9]+]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestAdd(vec a, vec b) { return a + b; } // CHECK-LABEL: define dso_local spir_func void @_Z7TestAddN4sycl3_V13vecINS0_6detail9half_impl4halfELi3EEES5_( -// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.163") align 8 captures(none) initializes((0, 8)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.163") align 8 captures(none) [[A:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.163") align 8 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.163") align 8 captures(none) initializes((0, 8)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.163") align 8 captures(none) [[A:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.163") align 8 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META62:![0-9]+]]) // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META65:![0-9]+]]) @@ -98,13 +98,13 @@ SYCL_EXTERNAL auto TestAdd(vec a, vec b) { return a + b; } // CHECK-NEXT: [[LOADVECN_I6_I_I:%.*]] = load <4 x half>, ptr [[B]], align 8, !noalias [[META68]] // CHECK-NEXT: [[TMP0:%.*]] = fadd <4 x half> [[LOADVECN_I_I_I]], [[LOADVECN_I6_I_I]] // CHECK-NEXT: [[EXTRACTVEC_I_I:%.*]] = shufflevector <4 x half> [[TMP0]], <4 x half> poison, <4 x i32> -// CHECK-NEXT: store <4 x half> [[EXTRACTVEC_I_I]], ptr [[AGG_RESULT]], align 8, !alias.scope [[META69:![0-9]+]] +// CHECK-NEXT: store <4 x half> [[EXTRACTVEC_I_I]], ptr addrspace(4) [[AGG_RESULT]], align 8, !alias.scope [[META69:![0-9]+]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestAdd(vec a, vec b) { return a + b; } // CHECK-LABEL: define dso_local spir_func void @_Z7TestAddN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi3EEES5_( -// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.203") align 8 captures(none) [[AGG_RESULT:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.203") align 8 [[A:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.203") align 8 [[B:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] {{.*}}{ +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.203") align 8 captures(none) [[AGG_RESULT:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.203") align 8 [[A:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.203") align 8 [[B:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] {{.*}}{ // CHECK-NEXT: entry: // CHECK-NEXT: [[REF_TMP_I_I_I_I:%.*]] = alloca float, align 4 // CHECK-NEXT: [[RES_I_I:%.*]] = alloca %"class.sycl::_V1::vec.203", align 8 @@ -136,7 +136,7 @@ SYCL_EXTERNAL auto TestAdd(vec a, vec b) { return a + b; } // CHECK-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP90:![0-9]+]] // CHECK: _ZN4sycl3_V16detailplINS0_3ext6oneapi8bfloat16EEENSt9enable_ifIX24is_op_available_for_typeISt4plusIvET_EENS0_3vecIS5_Li3EEEE4typeERKSB_SF_.exit: // CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[RES_I_I]], align 8, !noalias [[META79]] -// CHECK-NEXT: store i64 [[TMP0]], ptr [[AGG_RESULT]], align 8, !alias.scope [[META79]] +// CHECK-NEXT: store i64 [[TMP0]], ptr addrspace(4) [[AGG_RESULT]], align 8, !alias.scope [[META79]] // CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[RES_I_I]]), !noalias [[META73]] // CHECK-NEXT: ret void // @@ -148,7 +148,7 @@ SYCL_EXTERNAL auto TestAdd(vec a, /***************** Binary Logical Ops *******************/ // CHECK-LABEL: define dso_local spir_func void @_Z15TestGreaterThanN4sycl3_V13vecIiLi16EEES2_( -// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.241") align 64 captures(none) initializes((0, 64)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.241") align 64 captures(none) [[A:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.241") align 64 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.241") align 64 captures(none) initializes((0, 64)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.241") align 64 captures(none) [[A:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.241") align 64 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META93:![0-9]+]]) // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META96:![0-9]+]]) @@ -156,7 +156,7 @@ SYCL_EXTERNAL auto TestAdd(vec a, // CHECK-NEXT: [[TMP1:%.*]] = load <16 x i32>, ptr [[B]], align 64, !tbaa [[TBAA14]], !noalias [[META99]] // CHECK-NEXT: [[CMP_I_I_I_I:%.*]] = icmp sgt <16 x i32> [[TMP0]], [[TMP1]] // CHECK-NEXT: [[SEXT_I_I_I_I:%.*]] = sext <16 x i1> [[CMP_I_I_I_I]] to <16 x i32> -// CHECK-NEXT: store <16 x i32> [[SEXT_I_I_I_I]], ptr [[AGG_RESULT]], align 64, !alias.scope [[META99]] +// CHECK-NEXT: store <16 x i32> [[SEXT_I_I_I_I]], ptr addrspace(4) [[AGG_RESULT]], align 64, !alias.scope [[META99]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestGreaterThan(vec a, vec b) { @@ -178,7 +178,7 @@ SYCL_EXTERNAL auto TestGreaterThan(vec a, vec b) { } // CHECK-LABEL: define dso_local spir_func void @_Z15TestGreaterThanN4sycl3_V13vecIbLi2EEES2_( -// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.290") align 2 captures(none) initializes((0, 2)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.328") align 2 captures(none) [[A:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.328") align 2 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.290") align 2 captures(none) initializes((0, 2)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.328") align 2 captures(none) [[A:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.328") align 2 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META102:![0-9]+]]) // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META105:![0-9]+]]) @@ -186,7 +186,7 @@ SYCL_EXTERNAL auto TestGreaterThan(vec a, vec b) { // CHECK-NEXT: [[TMP1:%.*]] = load <2 x i8>, ptr [[B]], align 2, !tbaa [[TBAA14]], !noalias [[META108]] // CHECK-NEXT: [[CMP_I_I_I_I:%.*]] = icmp sgt <2 x i8> [[TMP0]], [[TMP1]] // CHECK-NEXT: [[SEXT_I_I_I_I:%.*]] = sext <2 x i1> [[CMP_I_I_I_I]] to <2 x i8> -// CHECK-NEXT: store <2 x i8> [[SEXT_I_I_I_I]], ptr [[AGG_RESULT]], align 2, !alias.scope [[META109:![0-9]+]] +// CHECK-NEXT: store <2 x i8> [[SEXT_I_I_I_I]], ptr addrspace(4) [[AGG_RESULT]], align 2, !alias.scope [[META109:![0-9]+]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestGreaterThan(vec a, vec b) { @@ -194,7 +194,7 @@ SYCL_EXTERNAL auto TestGreaterThan(vec a, vec b) { } // CHECK-LABEL: define dso_local spir_func void @_Z15TestGreaterThanN4sycl3_V13vecINS0_6detail9half_impl4halfELi8EEES5_( -// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.370") align 16 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.408") align 16 captures(none) [[A:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.408") align 16 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.370") align 16 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.408") align 16 captures(none) [[A:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.408") align 16 captures(none) [[B:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META113:![0-9]+]]) // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META116:![0-9]+]]) @@ -202,7 +202,7 @@ SYCL_EXTERNAL auto TestGreaterThan(vec a, vec b) { // CHECK-NEXT: [[TMP1:%.*]] = load <8 x half>, ptr [[B]], align 16, !tbaa [[TBAA14]], !noalias [[META119]] // CHECK-NEXT: [[CMP_I_I_I_I:%.*]] = fcmp ogt <8 x half> [[TMP0]], [[TMP1]] // CHECK-NEXT: [[SEXT_I_I_I_I:%.*]] = sext <8 x i1> [[CMP_I_I_I_I]] to <8 x i16> -// CHECK-NEXT: store <8 x i16> [[SEXT_I_I_I_I]], ptr [[AGG_RESULT]], align 16, !alias.scope [[META119]] +// CHECK-NEXT: store <8 x i16> [[SEXT_I_I_I_I]], ptr addrspace(4) [[AGG_RESULT]], align 16, !alias.scope [[META119]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestGreaterThan(vec a, vec b) { @@ -210,7 +210,7 @@ SYCL_EXTERNAL auto TestGreaterThan(vec a, vec b) { } // CHECK-LABEL: define dso_local spir_func void @_Z15TestGreaterThanN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi4EEES5_( -// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.450") align 8 captures(none) [[AGG_RESULT:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.488") align 8 [[A:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.488") align 8 [[B:%.*]]) local_unnamed_addr #[[ATTR1]] {{.*}}{ +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.450") align 8 captures(none) [[AGG_RESULT:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.488") align 8 [[A:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.488") align 8 [[B:%.*]]) local_unnamed_addr #[[ATTR1]] {{.*}}{ // CHECK-NEXT: entry: // CHECK-NEXT: [[RES_I_I:%.*]] = alloca %"class.sycl::_V1::vec.450", align 8 // CHECK-NEXT: [[A_ASCAST:%.*]] = addrspacecast ptr [[A]] to ptr addrspace(4) @@ -237,7 +237,7 @@ SYCL_EXTERNAL auto TestGreaterThan(vec a, vec b) { // CHECK-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP128:![0-9]+]] // CHECK: _ZN4sycl3_V16detailgtINS0_3ext6oneapi8bfloat16EEENSt9enable_ifIX24is_op_available_for_typeISt7greaterIvET_EENS0_3vecIsLi4EEEE4typeERKNSA_IS5_Li4EEESG_.exit: // CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[RES_I_I]], align 8, !noalias [[META127]] -// CHECK-NEXT: store i64 [[TMP0]], ptr [[AGG_RESULT]], align 8, !alias.scope [[META127]] +// CHECK-NEXT: store i64 [[TMP0]], ptr addrspace(4) [[AGG_RESULT]], align 8, !alias.scope [[META127]] // CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[RES_I_I]]), !noalias [[META121]] // CHECK-NEXT: ret void // @@ -249,7 +249,7 @@ SYCL_EXTERNAL auto TestGreaterThan(vec a, /********************** Unary Ops **********************/ // CHECK-LABEL: define dso_local spir_func void @_Z12TestNegationN4sycl3_V13vecIiLi3EEE( -// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.526") align 16 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.526") align 16 captures(none) [[A:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.526") align 16 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.526") align 16 captures(none) [[A:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META130:![0-9]+]]) // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META133:![0-9]+]]) @@ -258,76 +258,76 @@ SYCL_EXTERNAL auto TestGreaterThan(vec a, // CHECK-NEXT: [[CMP_I_I_I_I:%.*]] = icmp eq <3 x i32> [[EXTRACTVEC_I_I_I]], zeroinitializer // CHECK-NEXT: [[SEXT_I_I_I_I:%.*]] = sext <3 x i1> [[CMP_I_I_I_I]] to <3 x i32> // CHECK-NEXT: [[EXTRACTVEC_I_I:%.*]] = shufflevector <3 x i32> [[SEXT_I_I_I_I]], <3 x i32> poison, <4 x i32> -// CHECK-NEXT: store <4 x i32> [[EXTRACTVEC_I_I]], ptr [[AGG_RESULT]], align 16, !alias.scope [[META136]] +// CHECK-NEXT: store <4 x i32> [[EXTRACTVEC_I_I]], ptr addrspace(4) [[AGG_RESULT]], align 16, !alias.scope [[META136]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestNegation(vec a) { return !a; } // CHECK-LABEL: define dso_local spir_func void @_Z9TestMinusN4sycl3_V13vecIiLi4EEE( -// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.565") align 16 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.565") align 16 captures(none) [[A:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.565") align 16 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.565") align 16 captures(none) [[A:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META138:![0-9]+]]) // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META141:![0-9]+]]) // CHECK-NEXT: [[TMP0:%.*]] = load <4 x i32>, ptr [[A]], align 16, !tbaa [[TBAA14]], !noalias [[META144:![0-9]+]] // CHECK-NEXT: [[SUB_I_I_I_I:%.*]] = sub <4 x i32> zeroinitializer, [[TMP0]] -// CHECK-NEXT: store <4 x i32> [[SUB_I_I_I_I]], ptr [[AGG_RESULT]], align 16, !alias.scope [[META144]] +// CHECK-NEXT: store <4 x i32> [[SUB_I_I_I_I]], ptr addrspace(4) [[AGG_RESULT]], align 16, !alias.scope [[META144]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestMinus(vec a) { return -a; } // Negation is not valid for std::byte. Therefore, using bitwise negation. // CHECK-LABEL: define dso_local spir_func void @_Z19TestBitwiseNegationN4sycl3_V13vecISt4byteLi16EEE( -// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.604") align 16 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.604") align 16 captures(none) [[A:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.604") align 16 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.604") align 16 captures(none) [[A:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META146:![0-9]+]]) // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META149:![0-9]+]]) // CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr [[A]], align 16, !tbaa [[TBAA14]], !noalias [[META152:![0-9]+]] // CHECK-NEXT: [[NOT_I_I_I_I:%.*]] = xor <16 x i8> [[TMP0]], splat (i8 -1) -// CHECK-NEXT: store <16 x i8> [[NOT_I_I_I_I]], ptr [[AGG_RESULT]], align 16, !alias.scope [[META152]] +// CHECK-NEXT: store <16 x i8> [[NOT_I_I_I_I]], ptr addrspace(4) [[AGG_RESULT]], align 16, !alias.scope [[META152]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestBitwiseNegation(vec a) { return ~a; } // CHECK-LABEL: define dso_local spir_func void @_Z12TestNegationN4sycl3_V13vecIbLi4EEE( -// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.613") align 4 captures(none) initializes((0, 4)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.123") align 4 captures(none) [[A:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.613") align 4 captures(none) initializes((0, 4)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.123") align 4 captures(none) [[A:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META154:![0-9]+]]) // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META157:![0-9]+]]) // CHECK-NEXT: [[TMP0:%.*]] = load <4 x i8>, ptr [[A]], align 4, !tbaa [[TBAA14]], !noalias [[META160:![0-9]+]] // CHECK-NEXT: [[CMP_I_I_I_I:%.*]] = icmp eq <4 x i8> [[TMP0]], zeroinitializer // CHECK-NEXT: [[SEXT_I_I_I_I:%.*]] = sext <4 x i1> [[CMP_I_I_I_I]] to <4 x i8> -// CHECK-NEXT: store <4 x i8> [[SEXT_I_I_I_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META161:![0-9]+]] +// CHECK-NEXT: store <4 x i8> [[SEXT_I_I_I_I]], ptr addrspace(4) [[AGG_RESULT]], align 4, !alias.scope [[META161:![0-9]+]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestNegation(vec a) { return !a; } // CHECK-LABEL: define dso_local spir_func void @_Z12TestNegationN4sycl3_V13vecINS0_6detail9half_impl4halfELi2EEE( -// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.652") align 4 captures(none) initializes((0, 4)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.690") align 4 captures(none) [[A:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.652") align 4 captures(none) initializes((0, 4)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.690") align 4 captures(none) [[A:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META165:![0-9]+]]) // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META168:![0-9]+]]) // CHECK-NEXT: [[TMP0:%.*]] = load <2 x half>, ptr [[A]], align 4, !tbaa [[TBAA14]], !noalias [[META171:![0-9]+]] // CHECK-NEXT: [[CMP_I_I_I_I:%.*]] = fcmp oeq <2 x half> [[TMP0]], zeroinitializer // CHECK-NEXT: [[SEXT_I_I_I_I:%.*]] = sext <2 x i1> [[CMP_I_I_I_I]] to <2 x i16> -// CHECK-NEXT: store <2 x i16> [[SEXT_I_I_I_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META172:![0-9]+]] +// CHECK-NEXT: store <2 x i16> [[SEXT_I_I_I_I]], ptr addrspace(4) [[AGG_RESULT]], align 4, !alias.scope [[META172:![0-9]+]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestNegation(vec a) { return !a; } // CHECK-LABEL: define dso_local spir_func void @_Z9TestMinusN4sycl3_V13vecINS0_6detail9half_impl4halfELi8EEE( -// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.408") align 16 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.408") align 16 captures(none) [[A:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.408") align 16 captures(none) initializes((0, 16)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::vec.408") align 16 captures(none) [[A:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ // CHECK-NEXT: entry: // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META176:![0-9]+]]) // CHECK-NEXT: tail call void @llvm.experimental.noalias.scope.decl(metadata [[META179:![0-9]+]]) // CHECK-NEXT: [[TMP0:%.*]] = load <8 x half>, ptr [[A]], align 16, !tbaa [[TBAA14]], !noalias [[META182:![0-9]+]] // CHECK-NEXT: [[FNEG_I_I_I_I:%.*]] = fneg <8 x half> [[TMP0]] -// CHECK-NEXT: store <8 x half> [[FNEG_I_I_I_I]], ptr [[AGG_RESULT]], align 16, !alias.scope [[META182]] +// CHECK-NEXT: store <8 x half> [[FNEG_I_I_I_I]], ptr addrspace(4) [[AGG_RESULT]], align 16, !alias.scope [[META182]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestMinus(vec a) { return -a; } // CHECK-LABEL: define dso_local spir_func void @_Z12TestNegationN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi3EEE( -// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.730") align 8 captures(none) [[AGG_RESULT:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.203") align 8 [[A:%.*]]) local_unnamed_addr #[[ATTR1]] {{.*}}{ +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.730") align 8 captures(none) [[AGG_RESULT:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.203") align 8 [[A:%.*]]) local_unnamed_addr #[[ATTR1]] {{.*}}{ // CHECK-NEXT: entry: // CHECK-NEXT: [[RES_I_I:%.*]] = alloca %"class.sycl::_V1::vec.730", align 8 // CHECK-NEXT: [[A_ASCAST:%.*]] = addrspacecast ptr [[A]] to ptr addrspace(4) @@ -351,14 +351,14 @@ SYCL_EXTERNAL auto TestMinus(vec a) { return -a; } // CHECK-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP191:![0-9]+]] // CHECK: _ZN4sycl3_V16detailntERKNS0_3vecINS0_3ext6oneapi8bfloat16ELi3EEE.exit: // CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[RES_I_I]], align 8, !noalias [[META190]] -// CHECK-NEXT: store i64 [[TMP0]], ptr [[AGG_RESULT]], align 8, !alias.scope [[META190]] +// CHECK-NEXT: store i64 [[TMP0]], ptr addrspace(4) [[AGG_RESULT]], align 8, !alias.scope [[META190]] // CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 8, ptr nonnull [[RES_I_I]]), !noalias [[META184]] // CHECK-NEXT: ret void // SYCL_EXTERNAL auto TestNegation(vec a) { return !a; } // CHECK-LABEL: define dso_local spir_func void @_Z9TestMinusN4sycl3_V13vecINS0_3ext6oneapi8bfloat16ELi16EEE( -// CHECK-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.768") align 32 captures(none) [[AGG_RESULT:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.768") align 32 [[A:%.*]]) local_unnamed_addr #[[ATTR1]] {{.*}}{ +// CHECK-SAME: ptr addrspace(4) dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::vec.768") align 32 captures(none) [[AGG_RESULT:%.*]], ptr noundef byval(%"class.sycl::_V1::vec.768") align 32 [[A:%.*]]) local_unnamed_addr #[[ATTR1]] {{.*}}{ // CHECK-NEXT: entry: // CHECK-NEXT: [[REF_TMP_I_I_I_I:%.*]] = alloca float, align 4 // CHECK-NEXT: [[RES_I_I:%.*]] = alloca %"class.sycl::_V1::vec.768", align 32 @@ -384,7 +384,7 @@ SYCL_EXTERNAL auto TestNegation(vec a) { return !a; } // CHECK-NEXT: [[INC_I_I]] = add nuw nsw i64 [[I_0_I_I]], 1 // CHECK-NEXT: br label [[FOR_COND_I_I]], !llvm.loop [[LOOP205:![0-9]+]] // CHECK: _ZN4sycl3_V16detailngERKNS0_3vecINS0_3ext6oneapi8bfloat16ELi16EEE.exit: -// CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 32 [[AGG_RESULT]], ptr align 32 [[RES_I_I]], i64 32, i1 false) +// CHECK-NEXT: call void @llvm.memcpy.p4.p0.i64(ptr addrspace(4) align 32 [[AGG_RESULT]], ptr align 32 [[RES_I_I]], i64 32, i1 false) // CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 32, ptr nonnull [[RES_I_I]]), !noalias [[META193]] // CHECK-NEXT: ret void // From c790e5206e05ab79a995105e44f6197648844024 Mon Sep 17 00:00:00 2001 From: Premanand M Rao Date: Sun, 20 Apr 2025 20:49:49 -0700 Subject: [PATCH 08/16] One more test --- sycl/test/esimd/ctor_codegen.cpp | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/sycl/test/esimd/ctor_codegen.cpp b/sycl/test/esimd/ctor_codegen.cpp index 2aec16fe34d84..07aaf47681f22 100644 --- a/sycl/test/esimd/ctor_codegen.cpp +++ b/sycl/test/esimd/ctor_codegen.cpp @@ -19,7 +19,7 @@ SYCL_EXTERNAL auto foo(double i) SYCL_ESIMD_FUNCTION { return val; // CHECK: %[[V0:[a-zA-Z0-9_\.]+]] = insertelement <2 x double> poison, double %[[I]], i64 0 // CHECK-NEXT: %[[V1:[a-zA-Z0-9_\.]+]] = shufflevector <2 x double> %[[V0]], <2 x double> poison, <2 x i32> zeroinitializer -// CHECK-NEXT: store <2 x double> %[[V1]], ptr %[[RES]] +// CHECK-NEXT: store <2 x double> %[[V1]], ptr addrspace(4) %[[RES]] // CHECK-NEXT: ret void // CHECK-NEXT: } } @@ -28,7 +28,7 @@ SYCL_EXTERNAL auto foo(double i) SYCL_ESIMD_FUNCTION { SYCL_EXTERNAL auto double_base_step_const() SYCL_ESIMD_FUNCTION { // CHECK: define dso_local spir_func void @_Z22double_base_step_constv({{.*}} %[[RES:[a-zA-Z0-9_\.]+]]){{.*}} { return simd{1.0, 3.0}; - // CHECK: store <64 x double> , ptr %[[RES]] + // CHECK: store <64 x double> , ptr addrspace(4) %[[RES]] // CHECK-NEXT: ret void } @@ -41,7 +41,7 @@ SYCL_EXTERNAL auto double_base_step_var(double base, double step) SYCL_ESIMD_FUN // CHECK: %[[STEP_VEC_TMP:[a-zA-Z0-9_\.]+]] = insertelement <32 x double> poison, double %[[STEP]], i64 0 // CHECK: %[[STEP_VEC:[a-zA-Z0-9_\.]+]] = shufflevector <32 x double> %[[STEP_VEC_TMP]], <32 x double> poison, <32 x i32> zeroinitializer // CHECK: %[[FMA_VEC:[a-zA-Z0-9_\.]+]] = tail call noundef <32 x double> @llvm.fmuladd.v32f64(<32 x double> %[[STEP_VEC]], <32 x double> , <32 x double> %[[BASE_VEC]]) - // CHECK: store <32 x double> %[[FMA_VEC]], ptr %[[RES]] + // CHECK: store <32 x double> %[[FMA_VEC]], ptr addrspace(4) %[[RES]] // CHECK-NEXT: ret void } @@ -50,7 +50,7 @@ SYCL_EXTERNAL auto int_base_step_const() SYCL_ESIMD_FUNCTION { // CHECK: define dso_local spir_func void @_Z19int_base_step_constv({{.*}} %[[RES:[a-zA-Z0-9_\.]+]]){{.*}} { simd val(17, 3); return val; - // CHECK: store <16 x i32> , ptr %[[RES]] + // CHECK: store <16 x i32> , ptr addrspace(4) %[[RES]] // CHECK-NEXT: ret void // CHECK-NEXT: } } @@ -65,7 +65,7 @@ SYCL_EXTERNAL auto int_base_step_var(int base, int step) SYCL_ESIMD_FUNCTION { // CHECK: %[[STEP_VEC:[a-zA-Z0-9_\.]+]] = shufflevector <32 x i32> %[[STEP_VEC_TMP]], <32 x i32> poison, <32 x i32> zeroinitializer // CHECK: %[[MUL_VEC:[a-zA-Z0-9_\.]+]] = mul <32 x i32> %[[STEP_VEC]], // CHECK: %[[ADD_VEC:[a-zA-Z0-9_\.]+]] = add <32 x i32> %[[BASE_VEC]], %[[MUL_VEC]] - // CHECK: store <32 x i32> %[[ADD_VEC]], ptr %[[RES]] + // CHECK: store <32 x i32> %[[ADD_VEC]], ptr addrspace(4) %[[RES]] // CHECK-NEXT: ret void } @@ -76,7 +76,7 @@ SYCL_EXTERNAL auto int_base_step_var_n2(int base, int step) SYCL_ESIMD_FUNCTION // CHECK: %[[BASE_VEC_TMP1:[a-zA-Z0-9_\.]+]] = insertelement <2 x i32> poison, i32 %[[BASE]], i64 0 // CHECK: %[[BASE_INC:[a-zA-Z0-9_\.]+]] = add nsw i32 %[[BASE]], %[[STEP]] // CHECK: %[[RESULT_VEC:[a-zA-Z0-9_\.]+]] = insertelement <2 x i32> %[[BASE_VEC_TMP1]], i32 %[[BASE_INC]], i64 1 - // CHECK: store <2 x i32> %[[RESULT_VEC]], ptr %[[RES]] + // CHECK: store <2 x i32> %[[RESULT_VEC]], ptr addrspace(4) %[[RES]] // CHECK-NEXT: ret void } @@ -85,7 +85,7 @@ SYCL_EXTERNAL auto gee() SYCL_ESIMD_FUNCTION { // CHECK: define dso_local spir_func void @_Z3geev({{.*}} %[[RES:[a-zA-Z0-9_\.]+]]){{.*}} { simd val(-7); return val; -// CHECK: store <2 x float> splat (float -7.000000e+00), ptr %[[RES]] +// CHECK: store <2 x float> splat (float -7.000000e+00), ptr addrspace(4) %[[RES]] // CHECK-NEXT: ret void // CHECK-NEXT: } } @@ -95,7 +95,7 @@ SYCL_EXTERNAL auto foomask() SYCL_ESIMD_FUNCTION { // CHECK: define dso_local spir_func void @_Z7foomaskv({{.*}} %[[RES:[a-zA-Z0-9_\.]+]]){{.*}} { simd_mask<2> val({ 1, 0 }); return val; -// CHECK: store <2 x i16> , ptr %[[RES]] +// CHECK: store <2 x i16> , ptr addrspace(4) %[[RES]] // CHECK-NEXT: ret void // CHECK-NEXT: } } @@ -105,7 +105,7 @@ SYCL_EXTERNAL auto geemask() SYCL_ESIMD_FUNCTION { // CHECK: define dso_local spir_func void @_Z7geemaskv({{.*}} %[[RES:[a-zA-Z0-9_\.]+]]){{.*}} { simd_mask<2> val(1); return val; -// CHECK: store <2 x i16> splat (i16 1), ptr %[[RES]] +// CHECK: store <2 x i16> splat (i16 1), ptr addrspace(4) %[[RES]] // CHECK-NEXT: ret void // CHECK-NEXT: } } @@ -130,7 +130,7 @@ SYCL_EXTERNAL auto geehalf() SYCL_ESIMD_FUNCTION { // CHECK: define dso_local spir_func void @_Z7geehalfv({{.*}} %[[RES:[a-zA-Z0-9_\.]+]]){{.*}} { simd val(-7); return val; -// CHECK: store <2 x half> splat (half 0xHC700), ptr %[[RES]] +// CHECK: store <2 x half> splat (half 0xHC700), ptr addrspace(4) %[[RES]] // CHECK-NEXT: ret void // CHECK-NEXT: } } From 78901db4f3f365f7c9612afe65596d9652c9a5ed Mon Sep 17 00:00:00 2001 From: Premanand M Rao Date: Mon, 21 Apr 2025 08:29:51 -0700 Subject: [PATCH 09/16] Remove false check --- clang/lib/Driver/ToolChains/Clang.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 7a904543cd22b..813157629bf3b 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -5814,9 +5814,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, // Disable this option for SYCL by default. // TODO: This needs to be re-enabled once we have a real fix. - if (!Args.hasArg(options::OPT_foffload_use_alloca_addrspace_for_srets) && - !Args.hasArg(options::OPT_fno_offload_use_alloca_addrspace_for_srets)) - CmdArgs.push_back("-fno-offload-use-alloca-addrspace-for-srets"); + CmdArgs.push_back("-fno-offload-use-alloca-addrspace-for-srets"); // Add any predefined macros associated with intel_gpu* type targets // passed in with -fsycl-targets From ee42c3736d945cc3ed895283fa2151bd0263d264 Mon Sep 17 00:00:00 2001 From: Premanand M Rao Date: Mon, 21 Apr 2025 12:04:48 -0700 Subject: [PATCH 10/16] Apply FE review comments --- clang/include/clang/Basic/CodeGenOptions.def | 2 ++ clang/include/clang/Driver/Options.td | 2 ++ 2 files changed, 4 insertions(+) diff --git a/clang/include/clang/Basic/CodeGenOptions.def b/clang/include/clang/Basic/CodeGenOptions.def index b127df69295b3..098eb2a2104c6 100644 --- a/clang/include/clang/Basic/CodeGenOptions.def +++ b/clang/include/clang/Basic/CodeGenOptions.def @@ -481,6 +481,8 @@ CODEGENOPT(DisableSYCLEarlyOpts, 1, 0) CODEGENOPT(OptimizeSYCLFramework, 1, 0) /// Whether to use alloca address space for `sret` arguments. +/// TODO: This option can be removed once a fix goes in that can +/// work with the community changes for using the alloca address space. CODEGENOPT(UseAllocaASForSrets, 1, 0) /// Turn on fp64 partial emulation for kernels with only fp64 conversion diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 4101962349406..12dacd5bffcdf 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -8827,6 +8827,8 @@ def fsycl_is_native_cpu : Flag<["-"], "fsycl-is-native-cpu">, HelpText<"Perform device compilation for Native CPU.">, Visibility<[CC1Option]>, MarshallingInfoFlag>; +// TODO: This option can be removed once a fix goes in that can +// work with the community changes for using the alloca address space. defm offload_use_alloca_addrspace_for_srets : BoolFOption<"offload-use-alloca-addrspace-for-srets", CodeGenOpts<"UseAllocaASForSrets">, DefaultTrue, From ad66fb34a4dd4acbb2cd90a584c2dc7eb9c900c0 Mon Sep 17 00:00:00 2001 From: Premanand M Rao Date: Mon, 21 Apr 2025 15:04:12 -0700 Subject: [PATCH 11/16] Move new option to minimize modifying tests --- clang/lib/Driver/ToolChains/Clang.cpp | 8 ++++---- clang/test/Driver/sycl-int-footer-old-model.cpp | 6 +++--- clang/test/Driver/sycl-int-header-footer.cpp | 6 +++--- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 813157629bf3b..af7f7219a422a 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -5685,6 +5685,10 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back("-fsycl-is-device"); CmdArgs.push_back("-fdeclare-spirv-builtins"); + // Disable this option for SYCL by default. + // TODO: This needs to be re-enabled once we have a real fix. + CmdArgs.push_back("-fno-offload-use-alloca-addrspace-for-srets"); + // Set O2 optimization level by default if (!Args.getLastArg(options::OPT_O_Group)) CmdArgs.push_back("-O2"); @@ -5812,10 +5816,6 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back("-fsycl-optimize-non-user-code"); } - // Disable this option for SYCL by default. - // TODO: This needs to be re-enabled once we have a real fix. - CmdArgs.push_back("-fno-offload-use-alloca-addrspace-for-srets"); - // Add any predefined macros associated with intel_gpu* type targets // passed in with -fsycl-targets // TODO: Macros are populated during device compilations and saved for diff --git a/clang/test/Driver/sycl-int-footer-old-model.cpp b/clang/test/Driver/sycl-int-footer-old-model.cpp index ba3e7d4330410..f317d936bef9c 100644 --- a/clang/test/Driver/sycl-int-footer-old-model.cpp +++ b/clang/test/Driver/sycl-int-footer-old-model.cpp @@ -1,7 +1,7 @@ /// Check compilation tool steps when using the integration footer // RUN: %clangxx -fsycl --no-offload-new-driver -I cmdline/dir -include dummy.h %/s -### 2>&1 \ // RUN: | FileCheck -check-prefix FOOTER %s -DSRCDIR=%/S -DCMDDIR=cmdline/dir -// FOOTER: clang{{.*}} "-fsycl-is-device"{{.*}} "-fsycl-int-header=[[INTHEADER:.+\.h]]" "-fsycl-int-footer=[[INTFOOTER:.+\h]]"{{.*}} "-sycl-std={{.*}}"{{.*}} "-include" "dummy.h" +// FOOTER: clang{{.*}} "-fsycl-is-device"{{.*}} "-fsycl-int-header=[[INTHEADER:.+\.h]]" "-fsycl-int-footer=[[INTFOOTER:.+\h]]" "-sycl-std={{.*}}"{{.*}} "-include" "dummy.h" // FOOTER: clang{{.*}} "-fsycl-is-host" // FOOTER-SAME: "-include-internal-header" "[[INTHEADER]]" // FOOTER-SAME: "-dependency-filter" "[[INTHEADER]]" @@ -10,7 +10,7 @@ /// Preprocessed file creation with integration footer // RUN: %clangxx -fsycl --no-offload-new-driver -E %/s -### 2>&1 \ // RUN: | FileCheck -check-prefix FOOTER_PREPROC_GEN %s -// FOOTER_PREPROC_GEN: clang{{.*}} "-fsycl-is-device"{{.*}} "-fsycl-int-header=[[INTHEADER:.+\.h]]" "-fsycl-int-footer=[[INTFOOTER:.+\h]]"{{.*}} "-sycl-std={{.*}}" "-o" "[[PREPROC_DEVICE:.+\.ii]]" +// FOOTER_PREPROC_GEN: clang{{.*}} "-fsycl-is-device"{{.*}} "-fsycl-int-header=[[INTHEADER:.+\.h]]" "-fsycl-int-footer=[[INTFOOTER:.+\h]]" "-sycl-std={{.*}}" "-o" "[[PREPROC_DEVICE:.+\.ii]]" // FOOTER_PREPROC_GEN: clang{{.*}} "-fsycl-is-host" // FOOTER_PREPROC_GEN-SAME: "-include-internal-header" "[[INTHEADER]]" // FOOTER_PREPROC_GEN-SAME: "-dependency-filter" "[[INTHEADER]]" @@ -29,7 +29,7 @@ /// Check that integration footer can be disabled // RUN: %clangxx -fsycl --no-offload-new-driver -fno-sycl-use-footer %s -### 2>&1 \ // RUN: | FileCheck -check-prefix NO-FOOTER --implicit-check-not "-fsycl-int-footer" %s -// NO-FOOTER: clang{{.*}} "-fsycl-is-device"{{.*}} "-fsycl-int-header=[[INTHEADER:.+\.h]]"{{.*}} "-sycl-std={{.*}}" +// NO-FOOTER: clang{{.*}} "-fsycl-is-device"{{.*}} "-fsycl-int-header=[[INTHEADER:.+\.h]]" "-sycl-std={{.*}}" // NO-FOOTER-NOT: append-file // NO-FOOTER: clang{{.*}} "-fsycl-is-host"{{.*}} "-include-internal-header" "[[INTHEADER]]" diff --git a/clang/test/Driver/sycl-int-header-footer.cpp b/clang/test/Driver/sycl-int-header-footer.cpp index 7a67f8753bfc3..8692f29c765e8 100644 --- a/clang/test/Driver/sycl-int-header-footer.cpp +++ b/clang/test/Driver/sycl-int-header-footer.cpp @@ -2,7 +2,7 @@ // RUN: %clangxx -fsycl --offload-new-driver -I cmdline/dir -include dummy.h %/s -### 2>&1 \ // RUN: | FileCheck -check-prefix FOOTER %s -DSRCDIR=%/S -DCMDDIR=cmdline/dir -// FOOTER: clang{{.*}} "-fsycl-is-device"{{.*}} "-fsycl-int-header=[[INTHEADER:.+\.h]]" "-fsycl-int-footer=[[INTFOOTER:.+\h]]"{{.*}} "-sycl-std={{.*}}"{{.*}} "-include" "dummy.h" +// FOOTER: clang{{.*}} "-fsycl-is-device"{{.*}} "-fsycl-int-header=[[INTHEADER:.+\.h]]" "-fsycl-int-footer=[[INTFOOTER:.+\h]]" "-sycl-std={{.*}}"{{.*}} "-include" "dummy.h" // FOOTER: clang{{.*}} "-fsycl-is-host" // FOOTER-SAME: "-include-internal-header" "[[INTHEADER]]" // FOOTER-SAME: "-include" "dummy.h"{{.*}} "-I" "cmdline/dir" @@ -11,7 +11,7 @@ /// Preprocessed file creation with integration footer // RUN: %clangxx -fsycl --offload-new-driver -E %/s -### 2>&1 \ // RUN: | FileCheck -check-prefix FOOTER_PREPROC_GEN %s -// FOOTER_PREPROC_GEN: clang{{.*}} "-fsycl-is-device"{{.*}} "-fsycl-int-header=[[INTHEADER:.+\.h]]" "-fsycl-int-footer=[[INTFOOTER:.+\h]]"{{.*}} "-sycl-std={{.*}}" "-o" "[[PREPROC_DEVICE:.+\.ii]]" +// FOOTER_PREPROC_GEN: clang{{.*}} "-fsycl-is-device"{{.*}} "-fsycl-int-header=[[INTHEADER:.+\.h]]" "-fsycl-int-footer=[[INTFOOTER:.+\h]]" "-sycl-std={{.*}}" "-o" "[[PREPROC_DEVICE:.+\.ii]]" // FOOTER_PREPROC_GEN: clang{{.*}} "-fsycl-is-host" // FOOTER_PREPROC_GEN-SAME: "-include-internal-header" "[[INTHEADER]]" // FOOTER_PREPROC_GEN-SAME: "-dependency-filter" "[[INTHEADER]]" @@ -28,7 +28,7 @@ /// Check that integration footer can be disabled // RUN: %clangxx -fsycl --offload-new-driver -fno-sycl-use-footer %s -### 2>&1 \ // RUN: | FileCheck -check-prefix NO-FOOTER --implicit-check-not "-fsycl-int-footer" %s -// NO-FOOTER: clang{{.*}} "-fsycl-is-device"{{.*}} "-fsycl-int-header=[[INTHEADER:.+\.h]]"{{.*}} "-sycl-std={{.*}}" +// NO-FOOTER: clang{{.*}} "-fsycl-is-device"{{.*}} "-fsycl-int-header=[[INTHEADER:.+\.h]]" "-sycl-std={{.*}}" // NO-FOOTER: clang{{.*}} "-fsycl-is-host"{{.*}} "-include-internal-header" "[[INTHEADER]]" /// Check phases without integration footer From 54f722a3aecd0d5b3e7a9e0312fd9ea34e226e87 Mon Sep 17 00:00:00 2001 From: Premanand M Rao Date: Mon, 21 Apr 2025 15:11:36 -0700 Subject: [PATCH 12/16] clang-format; inadvertent spaces --- clang/lib/Driver/ToolChains/Clang.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index af7f7219a422a..11fa88bc9b6e7 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -5684,7 +5684,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, // We want to compile sycl kernels. CmdArgs.push_back("-fsycl-is-device"); CmdArgs.push_back("-fdeclare-spirv-builtins"); - + // Disable this option for SYCL by default. // TODO: This needs to be re-enabled once we have a real fix. CmdArgs.push_back("-fno-offload-use-alloca-addrspace-for-srets"); From 04b377d4abc57c61ab0a861d9ac4aa19eb0ea9be Mon Sep 17 00:00:00 2001 From: Premanand M Rao Date: Mon, 21 Apr 2025 19:10:24 -0700 Subject: [PATCH 13/16] Remove extra line --- clang/lib/Driver/ToolChains/Clang.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 11fa88bc9b6e7..19665480cc02f 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -5815,7 +5815,6 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back("-fsycl-optimize-non-user-code"); } - // Add any predefined macros associated with intel_gpu* type targets // passed in with -fsycl-targets // TODO: Macros are populated during device compilations and saved for From 0c4b4895ae5c7ab46fdf2a4105bc72163262ee21 Mon Sep 17 00:00:00 2001 From: Premanand M Rao Date: Tue, 22 Apr 2025 08:48:52 -0700 Subject: [PATCH 14/16] Disable the option when target is SPIR --- clang/lib/Driver/ToolChains/Clang.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 19665480cc02f..784f220f903a2 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -5685,10 +5685,6 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back("-fsycl-is-device"); CmdArgs.push_back("-fdeclare-spirv-builtins"); - // Disable this option for SYCL by default. - // TODO: This needs to be re-enabled once we have a real fix. - CmdArgs.push_back("-fno-offload-use-alloca-addrspace-for-srets"); - // Set O2 optimization level by default if (!Args.getLastArg(options::OPT_O_Group)) CmdArgs.push_back("-O2"); @@ -6056,10 +6052,14 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, // are provided. TC.addClangWarningOptions(CmdArgs); - // FIXME: Subclass ToolChain for SPIR/SPIR-V and move this to - // addClangWarningOptions. - if (Triple.isSPIROrSPIRV()) + if (Triple.isSPIROrSPIRV()) { + // FIXME: Subclass ToolChain for SPIR/SPIR-V and move this to + // addClangWarningOptions. CmdArgs.push_back("-Wspir-compat"); + // Disable this option for SPIR targets. + // TODO: This needs to be re-enabled once we have a real fix. + CmdArgs.push_back("-fno-offload-use-alloca-addrspace-for-srets"); + } // Select the appropriate action. RewriteKind rewriteKind = RK_None; From 6760adb9c2a242428e19ad20e86e4f87c9362585 Mon Sep 17 00:00:00 2001 From: Premanand M Rao Date: Tue, 22 Apr 2025 11:29:54 -0700 Subject: [PATCH 15/16] Remove test exclusion; modify test to check other mode --- sycl/cts_exclude_filter/compfails | 2 - .../extensions/address_cast.cpp | 73 +++++++++++++++++++ 2 files changed, 73 insertions(+), 2 deletions(-) diff --git a/sycl/cts_exclude_filter/compfails b/sycl/cts_exclude_filter/compfails index 6b640940f40e6..44d3870b88048 100644 --- a/sycl/cts_exclude_filter/compfails +++ b/sycl/cts_exclude_filter/compfails @@ -1,4 +1,2 @@ # Please use "#" to add comments here. # Do not delete the file even if it's empty. -# CMPLRLLVM-66370 -hierarchical diff --git a/sycl/test/check_device_code/extensions/address_cast.cpp b/sycl/test/check_device_code/extensions/address_cast.cpp index 7f7ffbe227921..0cf56ed5e6a6f 100644 --- a/sycl/test/check_device_code/extensions/address_cast.cpp +++ b/sycl/test/check_device_code/extensions/address_cast.cpp @@ -1,5 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals none --version 5 // RUN: %clangxx -D__ENABLE_USM_ADDR_SPACE__ -O3 -fsycl -fsycl-device-only -fno-discard-value-names -S -emit-llvm -fno-sycl-instrument-device-code -o - %s | FileCheck %s +// RUN: %clangxx -D__ENABLE_USM_ADDR_SPACE__ -O3 -fsycl -fsycl-device-only -fno-discard-value-names -S -emit-llvm -fno-sycl-instrument-device-code -Xclang -foffload-use-alloca-addrspace-for-srets -o - %s | FileCheck %s --check-prefix=ALLOCA_AS // Linux/Windows have minor differences in the generated IR (e.g. TBAA // metadata). Having linux-only checks eases the maintenance without sacrifising @@ -21,6 +22,15 @@ namespace static_as_cast { // CHECK-NEXT: store ptr addrspace(1) [[TMP2]], ptr addrspace(4) [[AGG_RESULT]], align 8, !tbaa [[TBAA13:![0-9]+]], !alias.scope [[META15:![0-9]+]] // CHECK-NEXT: ret void // +// ALLOCA_AS-LABEL: define dso_local spir_func void @_ZN14static_as_cast19to_global_decoratedEN4sycl3_V19multi_ptrIiLNS1_6access13address_spaceE6ELNS3_9decoratedE1EEE( +// ALLOCA_AS-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::multi_ptr") align 8 captures(none) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::multi_ptr.0") align 8 captures(none) [[P:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {{.*}}{ +// ALLOCA_AS-NEXT: [[ENTRY:.*:]] +// ALLOCA_AS-NEXT: [[TMP0:%.*]] = load i64, ptr [[P]], align 8, !tbaa [[TBAA8:![0-9]+]] +// ALLOCA_AS-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) +// ALLOCA_AS-NEXT: [[TMP2:%.*]] = addrspacecast ptr addrspace(4) [[TMP1]] to ptr addrspace(1) +// ALLOCA_AS-NEXT: store ptr addrspace(1) [[TMP2]], ptr [[AGG_RESULT]], align 8, !tbaa [[TBAA13:![0-9]+]], !alias.scope [[META15:![0-9]+]] +// ALLOCA_AS-NEXT: ret void +// SYCL_EXTERNAL auto to_global_decorated(decorated_generic_ptr p) { return static_address_cast(p); } @@ -31,6 +41,13 @@ SYCL_EXTERNAL auto to_global_decorated(decorated_generic_ptr p) { // CHECK-NEXT: store ptr addrspace(1) [[TMP0]], ptr addrspace(4) [[AGG_RESULT]], align 8, !tbaa [[TBAA21:![0-9]+]], !alias.scope [[META23:![0-9]+]] // CHECK-NEXT: ret void // +// ALLOCA_AS-LABEL: define dso_local spir_func void @_ZN14static_as_cast23to_global_not_decoratedEPi( +// ALLOCA_AS-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::multi_ptr.1") align 8 captures(none) [[AGG_RESULT:%.*]], ptr addrspace(4) noundef [[P:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] {{.*}}{ +// ALLOCA_AS-NEXT: [[ENTRY:.*:]] +// ALLOCA_AS-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(4) [[P]] to ptr addrspace(1) +// ALLOCA_AS-NEXT: store ptr addrspace(1) [[TMP0]], ptr [[AGG_RESULT]], align 8, !tbaa [[TBAA21:![0-9]+]], !alias.scope [[META23:![0-9]+]] +// ALLOCA_AS-NEXT: ret void +// SYCL_EXTERNAL auto to_global_not_decorated(int *p) { return static_address_cast(p); } @@ -41,6 +58,13 @@ SYCL_EXTERNAL auto to_global_not_decorated(int *p) { // CHECK-NEXT: store i64 [[TMP0]], ptr addrspace(4) [[AGG_RESULT]], align 8, !tbaa [[TBAA8]], !alias.scope [[META27:![0-9]+]] // CHECK-NEXT: ret void // +// ALLOCA_AS-LABEL: define dso_local spir_func void @_ZN14static_as_cast20to_generic_decoratedEN4sycl3_V19multi_ptrIiLNS1_6access13address_spaceE6ELNS3_9decoratedE1EEE( +// ALLOCA_AS-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::multi_ptr.0") align 8 captures(none) initializes((0, 8)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::multi_ptr.0") align 8 captures(none) [[P:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ +// ALLOCA_AS-NEXT: [[ENTRY:.*:]] +// ALLOCA_AS-NEXT: [[TMP0:%.*]] = load i64, ptr [[P]], align 8, !tbaa [[TBAA8]] +// ALLOCA_AS-NEXT: store i64 [[TMP0]], ptr [[AGG_RESULT]], align 8, !tbaa [[TBAA8]], !alias.scope [[META27:![0-9]+]] +// ALLOCA_AS-NEXT: ret void +// SYCL_EXTERNAL auto to_generic_decorated(decorated_generic_ptr p) { return static_address_cast(p); } @@ -50,6 +74,12 @@ SYCL_EXTERNAL auto to_generic_decorated(decorated_generic_ptr p) { // CHECK-NEXT: store ptr addrspace(4) [[P]], ptr addrspace(4) [[AGG_RESULT]], align 8, !tbaa [[TBAA31:![0-9]+]], !alias.scope [[META33:![0-9]+]] // CHECK-NEXT: ret void // +// ALLOCA_AS-LABEL: define dso_local spir_func void @_ZN14static_as_cast24to_generic_not_decoratedEPi( +// ALLOCA_AS-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::multi_ptr.2") align 8 captures(none) [[AGG_RESULT:%.*]], ptr addrspace(4) noundef [[P:%.*]]) local_unnamed_addr #[[ATTR1]] {{.*}}{ +// ALLOCA_AS-NEXT: [[ENTRY:.*:]] +// ALLOCA_AS-NEXT: store ptr addrspace(4) [[P]], ptr [[AGG_RESULT]], align 8, !tbaa [[TBAA31:![0-9]+]], !alias.scope [[META33:![0-9]+]] +// ALLOCA_AS-NEXT: ret void +// SYCL_EXTERNAL auto to_generic_not_decorated(int *p) { return static_address_cast(p); } @@ -61,6 +91,13 @@ SYCL_EXTERNAL auto to_generic_not_decorated(int *p) { // CHECK-NEXT: store ptr addrspace(5) [[TMP0]], ptr addrspace(4) [[AGG_RESULT]], align 8, !tbaa [[TBAA37:![0-9]+]], !alias.scope [[META39:![0-9]+]] // CHECK-NEXT: ret void // +// ALLOCA_AS-LABEL: define dso_local spir_func void @_ZN14static_as_cast16to_global_deviceEPi( +// ALLOCA_AS-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::multi_ptr.3") align 8 captures(none) [[AGG_RESULT:%.*]], ptr addrspace(4) noundef [[P:%.*]]) local_unnamed_addr #[[ATTR1]] {{.*}}{ +// ALLOCA_AS-NEXT: [[ENTRY:.*:]] +// ALLOCA_AS-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(4) [[P]] to ptr addrspace(5) +// ALLOCA_AS-NEXT: store ptr addrspace(5) [[TMP0]], ptr [[AGG_RESULT]], align 8, !tbaa [[TBAA37:![0-9]+]], !alias.scope [[META39:![0-9]+]] +// ALLOCA_AS-NEXT: ret void +// SYCL_EXTERNAL auto to_global_device(int *p) { return static_address_cast(p); } @@ -72,6 +109,13 @@ SYCL_EXTERNAL auto to_global_device(int *p) { // CHECK-NEXT: store ptr addrspace(6) [[TMP0]], ptr addrspace(4) [[AGG_RESULT]], align 8, !tbaa [[TBAA43:![0-9]+]], !alias.scope [[META45:![0-9]+]] // CHECK-NEXT: ret void // +// ALLOCA_AS-LABEL: define dso_local spir_func void @_ZN14static_as_cast14to_global_hostEPi( +// ALLOCA_AS-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::multi_ptr.4") align 8 captures(none) [[AGG_RESULT:%.*]], ptr addrspace(4) noundef [[P:%.*]]) local_unnamed_addr #[[ATTR1]] {{.*}}{ +// ALLOCA_AS-NEXT: [[ENTRY:.*:]] +// ALLOCA_AS-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(4) [[P]] to ptr addrspace(6) +// ALLOCA_AS-NEXT: store ptr addrspace(6) [[TMP0]], ptr [[AGG_RESULT]], align 8, !tbaa [[TBAA43:![0-9]+]], !alias.scope [[META45:![0-9]+]] +// ALLOCA_AS-NEXT: ret void +// SYCL_EXTERNAL auto to_global_host(int *p) { return static_address_cast(p); } @@ -87,6 +131,15 @@ namespace dynamic_as_cast { // CHECK-NEXT: store ptr addrspace(1) [[CALL_I_I_I]], ptr addrspace(4) [[AGG_RESULT]], align 8, !tbaa [[TBAA13]], !alias.scope [[META49:![0-9]+]] // CHECK-NEXT: ret void // +// ALLOCA_AS-LABEL: define dso_local spir_func void @_ZN15dynamic_as_cast19to_global_decoratedEN4sycl3_V19multi_ptrIiLNS1_6access13address_spaceE6ELNS3_9decoratedE1EEE( +// ALLOCA_AS-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::multi_ptr") align 8 captures(none) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::multi_ptr.0") align 8 captures(none) [[P:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] {{.*}}{ +// ALLOCA_AS-NEXT: [[ENTRY:.*:]] +// ALLOCA_AS-NEXT: [[TMP0:%.*]] = load i64, ptr [[P]], align 8, !tbaa [[TBAA8]] +// ALLOCA_AS-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) +// ALLOCA_AS-NEXT: [[CALL_I_I_I:%.*]] = tail call spir_func noundef ptr addrspace(1) @_Z41__spirv_GenericCastToPtrExplicit_ToGlobalPvi(ptr addrspace(4) noundef [[TMP1]], i32 noundef 5) #[[ATTR5:[0-9]+]] +// ALLOCA_AS-NEXT: store ptr addrspace(1) [[CALL_I_I_I]], ptr [[AGG_RESULT]], align 8, !tbaa [[TBAA13]], !alias.scope [[META49:![0-9]+]] +// ALLOCA_AS-NEXT: ret void +// SYCL_EXTERNAL auto to_global_decorated(decorated_generic_ptr p) { return dynamic_address_cast(p); } @@ -97,6 +150,13 @@ SYCL_EXTERNAL auto to_global_decorated(decorated_generic_ptr p) { // CHECK-NEXT: store ptr addrspace(1) [[CALL_I_I]], ptr addrspace(4) [[AGG_RESULT]], align 8, !tbaa [[TBAA21]], !alias.scope [[META55:![0-9]+]] // CHECK-NEXT: ret void // +// ALLOCA_AS-LABEL: define dso_local spir_func void @_ZN15dynamic_as_cast23to_global_not_decoratedEPi( +// ALLOCA_AS-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::multi_ptr.1") align 8 captures(none) [[AGG_RESULT:%.*]], ptr addrspace(4) noundef [[P:%.*]]) local_unnamed_addr #[[ATTR4:[0-9]+]] {{.*}}{ +// ALLOCA_AS-NEXT: [[ENTRY:.*:]] +// ALLOCA_AS-NEXT: [[CALL_I_I:%.*]] = tail call spir_func noundef ptr addrspace(1) @_Z41__spirv_GenericCastToPtrExplicit_ToGlobalPvi(ptr addrspace(4) noundef [[P]], i32 noundef 5) #[[ATTR5]] +// ALLOCA_AS-NEXT: store ptr addrspace(1) [[CALL_I_I]], ptr [[AGG_RESULT]], align 8, !tbaa [[TBAA21]], !alias.scope [[META55:![0-9]+]] +// ALLOCA_AS-NEXT: ret void +// SYCL_EXTERNAL auto to_global_not_decorated(int *p) { return dynamic_address_cast(p); } @@ -107,6 +167,13 @@ SYCL_EXTERNAL auto to_global_not_decorated(int *p) { // CHECK-NEXT: store i64 [[TMP0]], ptr addrspace(4) [[AGG_RESULT]], align 8, !tbaa [[TBAA8]], !alias.scope [[META59:![0-9]+]] // CHECK-NEXT: ret void // +// ALLOCA_AS-LABEL: define dso_local spir_func void @_ZN15dynamic_as_cast20to_generic_decoratedEN4sycl3_V19multi_ptrIiLNS1_6access13address_spaceE6ELNS3_9decoratedE1EEE( +// ALLOCA_AS-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::multi_ptr.0") align 8 captures(none) initializes((0, 8)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::multi_ptr.0") align 8 captures(none) [[P:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ +// ALLOCA_AS-NEXT: [[ENTRY:.*:]] +// ALLOCA_AS-NEXT: [[TMP0:%.*]] = load i64, ptr [[P]], align 8, !tbaa [[TBAA8]] +// ALLOCA_AS-NEXT: store i64 [[TMP0]], ptr [[AGG_RESULT]], align 8, !tbaa [[TBAA8]], !alias.scope [[META59:![0-9]+]] +// ALLOCA_AS-NEXT: ret void +// SYCL_EXTERNAL auto to_generic_decorated(decorated_generic_ptr p) { return dynamic_address_cast(p); } @@ -116,6 +183,12 @@ SYCL_EXTERNAL auto to_generic_decorated(decorated_generic_ptr p) { // CHECK-NEXT: store ptr addrspace(4) [[P]], ptr addrspace(4) [[AGG_RESULT]], align 8, !tbaa [[TBAA31]], !alias.scope [[META63:![0-9]+]] // CHECK-NEXT: ret void // +// ALLOCA_AS-LABEL: define dso_local spir_func void @_ZN15dynamic_as_cast24to_generic_not_decoratedEPi( +// ALLOCA_AS-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::multi_ptr.2") align 8 captures(none) [[AGG_RESULT:%.*]], ptr addrspace(4) noundef [[P:%.*]]) local_unnamed_addr #[[ATTR1]] {{.*}}{ +// ALLOCA_AS-NEXT: [[ENTRY:.*:]] +// ALLOCA_AS-NEXT: store ptr addrspace(4) [[P]], ptr [[AGG_RESULT]], align 8, !tbaa [[TBAA31]], !alias.scope [[META63:![0-9]+]] +// ALLOCA_AS-NEXT: ret void +// SYCL_EXTERNAL auto to_generic_not_decorated(int *p) { return dynamic_address_cast(p); } From 8edcba2d0ebb0a8498b64c8db930707aadab18da Mon Sep 17 00:00:00 2001 From: Premanand M Rao Date: Tue, 22 Apr 2025 13:41:48 -0700 Subject: [PATCH 16/16] Rename FileCheck string to please clang-format --- .../extensions/address_cast.cpp | 126 +++++++++--------- 1 file changed, 63 insertions(+), 63 deletions(-) diff --git a/sycl/test/check_device_code/extensions/address_cast.cpp b/sycl/test/check_device_code/extensions/address_cast.cpp index 0cf56ed5e6a6f..3f54371d34c1a 100644 --- a/sycl/test/check_device_code/extensions/address_cast.cpp +++ b/sycl/test/check_device_code/extensions/address_cast.cpp @@ -1,6 +1,6 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals none --version 5 // RUN: %clangxx -D__ENABLE_USM_ADDR_SPACE__ -O3 -fsycl -fsycl-device-only -fno-discard-value-names -S -emit-llvm -fno-sycl-instrument-device-code -o - %s | FileCheck %s -// RUN: %clangxx -D__ENABLE_USM_ADDR_SPACE__ -O3 -fsycl -fsycl-device-only -fno-discard-value-names -S -emit-llvm -fno-sycl-instrument-device-code -Xclang -foffload-use-alloca-addrspace-for-srets -o - %s | FileCheck %s --check-prefix=ALLOCA_AS +// RUN: %clangxx -D__ENABLE_USM_ADDR_SPACE__ -O3 -fsycl -fsycl-device-only -fno-discard-value-names -S -emit-llvm -fno-sycl-instrument-device-code -Xclang -foffload-use-alloca-addrspace-for-srets -o - %s | FileCheck %s --check-prefix=CHECK_ALLOCA_AS // Linux/Windows have minor differences in the generated IR (e.g. TBAA // metadata). Having linux-only checks eases the maintenance without sacrifising @@ -22,14 +22,14 @@ namespace static_as_cast { // CHECK-NEXT: store ptr addrspace(1) [[TMP2]], ptr addrspace(4) [[AGG_RESULT]], align 8, !tbaa [[TBAA13:![0-9]+]], !alias.scope [[META15:![0-9]+]] // CHECK-NEXT: ret void // -// ALLOCA_AS-LABEL: define dso_local spir_func void @_ZN14static_as_cast19to_global_decoratedEN4sycl3_V19multi_ptrIiLNS1_6access13address_spaceE6ELNS3_9decoratedE1EEE( -// ALLOCA_AS-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::multi_ptr") align 8 captures(none) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::multi_ptr.0") align 8 captures(none) [[P:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {{.*}}{ -// ALLOCA_AS-NEXT: [[ENTRY:.*:]] -// ALLOCA_AS-NEXT: [[TMP0:%.*]] = load i64, ptr [[P]], align 8, !tbaa [[TBAA8:![0-9]+]] -// ALLOCA_AS-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) -// ALLOCA_AS-NEXT: [[TMP2:%.*]] = addrspacecast ptr addrspace(4) [[TMP1]] to ptr addrspace(1) -// ALLOCA_AS-NEXT: store ptr addrspace(1) [[TMP2]], ptr [[AGG_RESULT]], align 8, !tbaa [[TBAA13:![0-9]+]], !alias.scope [[META15:![0-9]+]] -// ALLOCA_AS-NEXT: ret void +// CHECK_ALLOCA_AS-LABEL: define dso_local spir_func void @_ZN14static_as_cast19to_global_decoratedEN4sycl3_V19multi_ptrIiLNS1_6access13address_spaceE6ELNS3_9decoratedE1EEE( +// CHECK_ALLOCA_AS-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::multi_ptr") align 8 captures(none) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::multi_ptr.0") align 8 captures(none) [[P:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] {{.*}}{ +// CHECK_ALLOCA_AS-NEXT: [[ENTRY:.*:]] +// CHECK_ALLOCA_AS-NEXT: [[TMP0:%.*]] = load i64, ptr [[P]], align 8, !tbaa [[TBAA8:![0-9]+]] +// CHECK_ALLOCA_AS-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) +// CHECK_ALLOCA_AS-NEXT: [[TMP2:%.*]] = addrspacecast ptr addrspace(4) [[TMP1]] to ptr addrspace(1) +// CHECK_ALLOCA_AS-NEXT: store ptr addrspace(1) [[TMP2]], ptr [[AGG_RESULT]], align 8, !tbaa [[TBAA13:![0-9]+]], !alias.scope [[META15:![0-9]+]] +// CHECK_ALLOCA_AS-NEXT: ret void // SYCL_EXTERNAL auto to_global_decorated(decorated_generic_ptr p) { return static_address_cast(p); @@ -41,12 +41,12 @@ SYCL_EXTERNAL auto to_global_decorated(decorated_generic_ptr p) { // CHECK-NEXT: store ptr addrspace(1) [[TMP0]], ptr addrspace(4) [[AGG_RESULT]], align 8, !tbaa [[TBAA21:![0-9]+]], !alias.scope [[META23:![0-9]+]] // CHECK-NEXT: ret void // -// ALLOCA_AS-LABEL: define dso_local spir_func void @_ZN14static_as_cast23to_global_not_decoratedEPi( -// ALLOCA_AS-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::multi_ptr.1") align 8 captures(none) [[AGG_RESULT:%.*]], ptr addrspace(4) noundef [[P:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] {{.*}}{ -// ALLOCA_AS-NEXT: [[ENTRY:.*:]] -// ALLOCA_AS-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(4) [[P]] to ptr addrspace(1) -// ALLOCA_AS-NEXT: store ptr addrspace(1) [[TMP0]], ptr [[AGG_RESULT]], align 8, !tbaa [[TBAA21:![0-9]+]], !alias.scope [[META23:![0-9]+]] -// ALLOCA_AS-NEXT: ret void +// CHECK_ALLOCA_AS-LABEL: define dso_local spir_func void @_ZN14static_as_cast23to_global_not_decoratedEPi( +// CHECK_ALLOCA_AS-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::multi_ptr.1") align 8 captures(none) [[AGG_RESULT:%.*]], ptr addrspace(4) noundef [[P:%.*]]) local_unnamed_addr #[[ATTR1:[0-9]+]] {{.*}}{ +// CHECK_ALLOCA_AS-NEXT: [[ENTRY:.*:]] +// CHECK_ALLOCA_AS-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(4) [[P]] to ptr addrspace(1) +// CHECK_ALLOCA_AS-NEXT: store ptr addrspace(1) [[TMP0]], ptr [[AGG_RESULT]], align 8, !tbaa [[TBAA21:![0-9]+]], !alias.scope [[META23:![0-9]+]] +// CHECK_ALLOCA_AS-NEXT: ret void // SYCL_EXTERNAL auto to_global_not_decorated(int *p) { return static_address_cast(p); @@ -58,12 +58,12 @@ SYCL_EXTERNAL auto to_global_not_decorated(int *p) { // CHECK-NEXT: store i64 [[TMP0]], ptr addrspace(4) [[AGG_RESULT]], align 8, !tbaa [[TBAA8]], !alias.scope [[META27:![0-9]+]] // CHECK-NEXT: ret void // -// ALLOCA_AS-LABEL: define dso_local spir_func void @_ZN14static_as_cast20to_generic_decoratedEN4sycl3_V19multi_ptrIiLNS1_6access13address_spaceE6ELNS3_9decoratedE1EEE( -// ALLOCA_AS-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::multi_ptr.0") align 8 captures(none) initializes((0, 8)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::multi_ptr.0") align 8 captures(none) [[P:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ -// ALLOCA_AS-NEXT: [[ENTRY:.*:]] -// ALLOCA_AS-NEXT: [[TMP0:%.*]] = load i64, ptr [[P]], align 8, !tbaa [[TBAA8]] -// ALLOCA_AS-NEXT: store i64 [[TMP0]], ptr [[AGG_RESULT]], align 8, !tbaa [[TBAA8]], !alias.scope [[META27:![0-9]+]] -// ALLOCA_AS-NEXT: ret void +// CHECK_ALLOCA_AS-LABEL: define dso_local spir_func void @_ZN14static_as_cast20to_generic_decoratedEN4sycl3_V19multi_ptrIiLNS1_6access13address_spaceE6ELNS3_9decoratedE1EEE( +// CHECK_ALLOCA_AS-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::multi_ptr.0") align 8 captures(none) initializes((0, 8)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::multi_ptr.0") align 8 captures(none) [[P:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ +// CHECK_ALLOCA_AS-NEXT: [[ENTRY:.*:]] +// CHECK_ALLOCA_AS-NEXT: [[TMP0:%.*]] = load i64, ptr [[P]], align 8, !tbaa [[TBAA8]] +// CHECK_ALLOCA_AS-NEXT: store i64 [[TMP0]], ptr [[AGG_RESULT]], align 8, !tbaa [[TBAA8]], !alias.scope [[META27:![0-9]+]] +// CHECK_ALLOCA_AS-NEXT: ret void // SYCL_EXTERNAL auto to_generic_decorated(decorated_generic_ptr p) { return static_address_cast(p); @@ -74,11 +74,11 @@ SYCL_EXTERNAL auto to_generic_decorated(decorated_generic_ptr p) { // CHECK-NEXT: store ptr addrspace(4) [[P]], ptr addrspace(4) [[AGG_RESULT]], align 8, !tbaa [[TBAA31:![0-9]+]], !alias.scope [[META33:![0-9]+]] // CHECK-NEXT: ret void // -// ALLOCA_AS-LABEL: define dso_local spir_func void @_ZN14static_as_cast24to_generic_not_decoratedEPi( -// ALLOCA_AS-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::multi_ptr.2") align 8 captures(none) [[AGG_RESULT:%.*]], ptr addrspace(4) noundef [[P:%.*]]) local_unnamed_addr #[[ATTR1]] {{.*}}{ -// ALLOCA_AS-NEXT: [[ENTRY:.*:]] -// ALLOCA_AS-NEXT: store ptr addrspace(4) [[P]], ptr [[AGG_RESULT]], align 8, !tbaa [[TBAA31:![0-9]+]], !alias.scope [[META33:![0-9]+]] -// ALLOCA_AS-NEXT: ret void +// CHECK_ALLOCA_AS-LABEL: define dso_local spir_func void @_ZN14static_as_cast24to_generic_not_decoratedEPi( +// CHECK_ALLOCA_AS-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::multi_ptr.2") align 8 captures(none) [[AGG_RESULT:%.*]], ptr addrspace(4) noundef [[P:%.*]]) local_unnamed_addr #[[ATTR1]] {{.*}}{ +// CHECK_ALLOCA_AS-NEXT: [[ENTRY:.*:]] +// CHECK_ALLOCA_AS-NEXT: store ptr addrspace(4) [[P]], ptr [[AGG_RESULT]], align 8, !tbaa [[TBAA31:![0-9]+]], !alias.scope [[META33:![0-9]+]] +// CHECK_ALLOCA_AS-NEXT: ret void // SYCL_EXTERNAL auto to_generic_not_decorated(int *p) { return static_address_cast(p); @@ -91,12 +91,12 @@ SYCL_EXTERNAL auto to_generic_not_decorated(int *p) { // CHECK-NEXT: store ptr addrspace(5) [[TMP0]], ptr addrspace(4) [[AGG_RESULT]], align 8, !tbaa [[TBAA37:![0-9]+]], !alias.scope [[META39:![0-9]+]] // CHECK-NEXT: ret void // -// ALLOCA_AS-LABEL: define dso_local spir_func void @_ZN14static_as_cast16to_global_deviceEPi( -// ALLOCA_AS-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::multi_ptr.3") align 8 captures(none) [[AGG_RESULT:%.*]], ptr addrspace(4) noundef [[P:%.*]]) local_unnamed_addr #[[ATTR1]] {{.*}}{ -// ALLOCA_AS-NEXT: [[ENTRY:.*:]] -// ALLOCA_AS-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(4) [[P]] to ptr addrspace(5) -// ALLOCA_AS-NEXT: store ptr addrspace(5) [[TMP0]], ptr [[AGG_RESULT]], align 8, !tbaa [[TBAA37:![0-9]+]], !alias.scope [[META39:![0-9]+]] -// ALLOCA_AS-NEXT: ret void +// CHECK_ALLOCA_AS-LABEL: define dso_local spir_func void @_ZN14static_as_cast16to_global_deviceEPi( +// CHECK_ALLOCA_AS-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::multi_ptr.3") align 8 captures(none) [[AGG_RESULT:%.*]], ptr addrspace(4) noundef [[P:%.*]]) local_unnamed_addr #[[ATTR1]] {{.*}}{ +// CHECK_ALLOCA_AS-NEXT: [[ENTRY:.*:]] +// CHECK_ALLOCA_AS-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(4) [[P]] to ptr addrspace(5) +// CHECK_ALLOCA_AS-NEXT: store ptr addrspace(5) [[TMP0]], ptr [[AGG_RESULT]], align 8, !tbaa [[TBAA37:![0-9]+]], !alias.scope [[META39:![0-9]+]] +// CHECK_ALLOCA_AS-NEXT: ret void // SYCL_EXTERNAL auto to_global_device(int *p) { return static_address_cast(p); @@ -109,12 +109,12 @@ SYCL_EXTERNAL auto to_global_device(int *p) { // CHECK-NEXT: store ptr addrspace(6) [[TMP0]], ptr addrspace(4) [[AGG_RESULT]], align 8, !tbaa [[TBAA43:![0-9]+]], !alias.scope [[META45:![0-9]+]] // CHECK-NEXT: ret void // -// ALLOCA_AS-LABEL: define dso_local spir_func void @_ZN14static_as_cast14to_global_hostEPi( -// ALLOCA_AS-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::multi_ptr.4") align 8 captures(none) [[AGG_RESULT:%.*]], ptr addrspace(4) noundef [[P:%.*]]) local_unnamed_addr #[[ATTR1]] {{.*}}{ -// ALLOCA_AS-NEXT: [[ENTRY:.*:]] -// ALLOCA_AS-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(4) [[P]] to ptr addrspace(6) -// ALLOCA_AS-NEXT: store ptr addrspace(6) [[TMP0]], ptr [[AGG_RESULT]], align 8, !tbaa [[TBAA43:![0-9]+]], !alias.scope [[META45:![0-9]+]] -// ALLOCA_AS-NEXT: ret void +// CHECK_ALLOCA_AS-LABEL: define dso_local spir_func void @_ZN14static_as_cast14to_global_hostEPi( +// CHECK_ALLOCA_AS-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::multi_ptr.4") align 8 captures(none) [[AGG_RESULT:%.*]], ptr addrspace(4) noundef [[P:%.*]]) local_unnamed_addr #[[ATTR1]] {{.*}}{ +// CHECK_ALLOCA_AS-NEXT: [[ENTRY:.*:]] +// CHECK_ALLOCA_AS-NEXT: [[TMP0:%.*]] = addrspacecast ptr addrspace(4) [[P]] to ptr addrspace(6) +// CHECK_ALLOCA_AS-NEXT: store ptr addrspace(6) [[TMP0]], ptr [[AGG_RESULT]], align 8, !tbaa [[TBAA43:![0-9]+]], !alias.scope [[META45:![0-9]+]] +// CHECK_ALLOCA_AS-NEXT: ret void // SYCL_EXTERNAL auto to_global_host(int *p) { return static_address_cast(p); @@ -131,14 +131,14 @@ namespace dynamic_as_cast { // CHECK-NEXT: store ptr addrspace(1) [[CALL_I_I_I]], ptr addrspace(4) [[AGG_RESULT]], align 8, !tbaa [[TBAA13]], !alias.scope [[META49:![0-9]+]] // CHECK-NEXT: ret void // -// ALLOCA_AS-LABEL: define dso_local spir_func void @_ZN15dynamic_as_cast19to_global_decoratedEN4sycl3_V19multi_ptrIiLNS1_6access13address_spaceE6ELNS3_9decoratedE1EEE( -// ALLOCA_AS-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::multi_ptr") align 8 captures(none) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::multi_ptr.0") align 8 captures(none) [[P:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] {{.*}}{ -// ALLOCA_AS-NEXT: [[ENTRY:.*:]] -// ALLOCA_AS-NEXT: [[TMP0:%.*]] = load i64, ptr [[P]], align 8, !tbaa [[TBAA8]] -// ALLOCA_AS-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) -// ALLOCA_AS-NEXT: [[CALL_I_I_I:%.*]] = tail call spir_func noundef ptr addrspace(1) @_Z41__spirv_GenericCastToPtrExplicit_ToGlobalPvi(ptr addrspace(4) noundef [[TMP1]], i32 noundef 5) #[[ATTR5:[0-9]+]] -// ALLOCA_AS-NEXT: store ptr addrspace(1) [[CALL_I_I_I]], ptr [[AGG_RESULT]], align 8, !tbaa [[TBAA13]], !alias.scope [[META49:![0-9]+]] -// ALLOCA_AS-NEXT: ret void +// CHECK_ALLOCA_AS-LABEL: define dso_local spir_func void @_ZN15dynamic_as_cast19to_global_decoratedEN4sycl3_V19multi_ptrIiLNS1_6access13address_spaceE6ELNS3_9decoratedE1EEE( +// CHECK_ALLOCA_AS-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::multi_ptr") align 8 captures(none) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::multi_ptr.0") align 8 captures(none) [[P:%.*]]) local_unnamed_addr #[[ATTR2:[0-9]+]] {{.*}}{ +// CHECK_ALLOCA_AS-NEXT: [[ENTRY:.*:]] +// CHECK_ALLOCA_AS-NEXT: [[TMP0:%.*]] = load i64, ptr [[P]], align 8, !tbaa [[TBAA8]] +// CHECK_ALLOCA_AS-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to ptr addrspace(4) +// CHECK_ALLOCA_AS-NEXT: [[CALL_I_I_I:%.*]] = tail call spir_func noundef ptr addrspace(1) @_Z41__spirv_GenericCastToPtrExplicit_ToGlobalPvi(ptr addrspace(4) noundef [[TMP1]], i32 noundef 5) #[[ATTR5:[0-9]+]] +// CHECK_ALLOCA_AS-NEXT: store ptr addrspace(1) [[CALL_I_I_I]], ptr [[AGG_RESULT]], align 8, !tbaa [[TBAA13]], !alias.scope [[META49:![0-9]+]] +// CHECK_ALLOCA_AS-NEXT: ret void // SYCL_EXTERNAL auto to_global_decorated(decorated_generic_ptr p) { return dynamic_address_cast(p); @@ -150,12 +150,12 @@ SYCL_EXTERNAL auto to_global_decorated(decorated_generic_ptr p) { // CHECK-NEXT: store ptr addrspace(1) [[CALL_I_I]], ptr addrspace(4) [[AGG_RESULT]], align 8, !tbaa [[TBAA21]], !alias.scope [[META55:![0-9]+]] // CHECK-NEXT: ret void // -// ALLOCA_AS-LABEL: define dso_local spir_func void @_ZN15dynamic_as_cast23to_global_not_decoratedEPi( -// ALLOCA_AS-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::multi_ptr.1") align 8 captures(none) [[AGG_RESULT:%.*]], ptr addrspace(4) noundef [[P:%.*]]) local_unnamed_addr #[[ATTR4:[0-9]+]] {{.*}}{ -// ALLOCA_AS-NEXT: [[ENTRY:.*:]] -// ALLOCA_AS-NEXT: [[CALL_I_I:%.*]] = tail call spir_func noundef ptr addrspace(1) @_Z41__spirv_GenericCastToPtrExplicit_ToGlobalPvi(ptr addrspace(4) noundef [[P]], i32 noundef 5) #[[ATTR5]] -// ALLOCA_AS-NEXT: store ptr addrspace(1) [[CALL_I_I]], ptr [[AGG_RESULT]], align 8, !tbaa [[TBAA21]], !alias.scope [[META55:![0-9]+]] -// ALLOCA_AS-NEXT: ret void +// CHECK_ALLOCA_AS-LABEL: define dso_local spir_func void @_ZN15dynamic_as_cast23to_global_not_decoratedEPi( +// CHECK_ALLOCA_AS-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::multi_ptr.1") align 8 captures(none) [[AGG_RESULT:%.*]], ptr addrspace(4) noundef [[P:%.*]]) local_unnamed_addr #[[ATTR4:[0-9]+]] {{.*}}{ +// CHECK_ALLOCA_AS-NEXT: [[ENTRY:.*:]] +// CHECK_ALLOCA_AS-NEXT: [[CALL_I_I:%.*]] = tail call spir_func noundef ptr addrspace(1) @_Z41__spirv_GenericCastToPtrExplicit_ToGlobalPvi(ptr addrspace(4) noundef [[P]], i32 noundef 5) #[[ATTR5]] +// CHECK_ALLOCA_AS-NEXT: store ptr addrspace(1) [[CALL_I_I]], ptr [[AGG_RESULT]], align 8, !tbaa [[TBAA21]], !alias.scope [[META55:![0-9]+]] +// CHECK_ALLOCA_AS-NEXT: ret void // SYCL_EXTERNAL auto to_global_not_decorated(int *p) { return dynamic_address_cast(p); @@ -167,12 +167,12 @@ SYCL_EXTERNAL auto to_global_not_decorated(int *p) { // CHECK-NEXT: store i64 [[TMP0]], ptr addrspace(4) [[AGG_RESULT]], align 8, !tbaa [[TBAA8]], !alias.scope [[META59:![0-9]+]] // CHECK-NEXT: ret void // -// ALLOCA_AS-LABEL: define dso_local spir_func void @_ZN15dynamic_as_cast20to_generic_decoratedEN4sycl3_V19multi_ptrIiLNS1_6access13address_spaceE6ELNS3_9decoratedE1EEE( -// ALLOCA_AS-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::multi_ptr.0") align 8 captures(none) initializes((0, 8)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::multi_ptr.0") align 8 captures(none) [[P:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ -// ALLOCA_AS-NEXT: [[ENTRY:.*:]] -// ALLOCA_AS-NEXT: [[TMP0:%.*]] = load i64, ptr [[P]], align 8, !tbaa [[TBAA8]] -// ALLOCA_AS-NEXT: store i64 [[TMP0]], ptr [[AGG_RESULT]], align 8, !tbaa [[TBAA8]], !alias.scope [[META59:![0-9]+]] -// ALLOCA_AS-NEXT: ret void +// CHECK_ALLOCA_AS-LABEL: define dso_local spir_func void @_ZN15dynamic_as_cast20to_generic_decoratedEN4sycl3_V19multi_ptrIiLNS1_6access13address_spaceE6ELNS3_9decoratedE1EEE( +// CHECK_ALLOCA_AS-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::multi_ptr.0") align 8 captures(none) initializes((0, 8)) [[AGG_RESULT:%.*]], ptr noundef readonly byval(%"class.sycl::_V1::multi_ptr.0") align 8 captures(none) [[P:%.*]]) local_unnamed_addr #[[ATTR0]] {{.*}}{ +// CHECK_ALLOCA_AS-NEXT: [[ENTRY:.*:]] +// CHECK_ALLOCA_AS-NEXT: [[TMP0:%.*]] = load i64, ptr [[P]], align 8, !tbaa [[TBAA8]] +// CHECK_ALLOCA_AS-NEXT: store i64 [[TMP0]], ptr [[AGG_RESULT]], align 8, !tbaa [[TBAA8]], !alias.scope [[META59:![0-9]+]] +// CHECK_ALLOCA_AS-NEXT: ret void // SYCL_EXTERNAL auto to_generic_decorated(decorated_generic_ptr p) { return dynamic_address_cast(p); @@ -183,11 +183,11 @@ SYCL_EXTERNAL auto to_generic_decorated(decorated_generic_ptr p) { // CHECK-NEXT: store ptr addrspace(4) [[P]], ptr addrspace(4) [[AGG_RESULT]], align 8, !tbaa [[TBAA31]], !alias.scope [[META63:![0-9]+]] // CHECK-NEXT: ret void // -// ALLOCA_AS-LABEL: define dso_local spir_func void @_ZN15dynamic_as_cast24to_generic_not_decoratedEPi( -// ALLOCA_AS-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::multi_ptr.2") align 8 captures(none) [[AGG_RESULT:%.*]], ptr addrspace(4) noundef [[P:%.*]]) local_unnamed_addr #[[ATTR1]] {{.*}}{ -// ALLOCA_AS-NEXT: [[ENTRY:.*:]] -// ALLOCA_AS-NEXT: store ptr addrspace(4) [[P]], ptr [[AGG_RESULT]], align 8, !tbaa [[TBAA31]], !alias.scope [[META63:![0-9]+]] -// ALLOCA_AS-NEXT: ret void +// CHECK_ALLOCA_AS-LABEL: define dso_local spir_func void @_ZN15dynamic_as_cast24to_generic_not_decoratedEPi( +// CHECK_ALLOCA_AS-SAME: ptr dead_on_unwind noalias writable writeonly sret(%"class.sycl::_V1::multi_ptr.2") align 8 captures(none) [[AGG_RESULT:%.*]], ptr addrspace(4) noundef [[P:%.*]]) local_unnamed_addr #[[ATTR1]] {{.*}}{ +// CHECK_ALLOCA_AS-NEXT: [[ENTRY:.*:]] +// CHECK_ALLOCA_AS-NEXT: store ptr addrspace(4) [[P]], ptr [[AGG_RESULT]], align 8, !tbaa [[TBAA31]], !alias.scope [[META63:![0-9]+]] +// CHECK_ALLOCA_AS-NEXT: ret void // SYCL_EXTERNAL auto to_generic_not_decorated(int *p) { return dynamic_address_cast(p);