diff --git a/sycl/source/detail/device_image_impl.cpp b/sycl/source/detail/device_image_impl.cpp index 423df27bd489d..ea0935935dbaa 100644 --- a/sycl/source/detail/device_image_impl.cpp +++ b/sycl/source/detail/device_image_impl.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include +#include #include namespace sycl { @@ -47,10 +48,14 @@ std::shared_ptr device_image_impl::tryGetExtensionKernel( &UrKernel); // Kernel created by urKernelCreate is implicitly retained. + const KernelArgMask *ArgMask = nullptr; + if (auto ArgMaskIt = MEliminatedKernelArgMasks.find(AdjustedName); + ArgMaskIt != MEliminatedKernelArgMasks.end()) + ArgMask = &ArgMaskIt->second; + return std::make_shared( UrKernel, *detail::getSyclObjImpl(Context), shared_from_this(), - OwnerBundle, - /*ArgMask=*/nullptr, UrProgram, /*CacheMutex=*/nullptr); + OwnerBundle, ArgMask, UrProgram, /*CacheMutex=*/nullptr); } } // namespace detail diff --git a/sycl/source/detail/device_image_impl.hpp b/sycl/source/detail/device_image_impl.hpp index f0050e7aa0918..c47f2cbf71fdc 100644 --- a/sycl/source/detail/device_image_impl.hpp +++ b/sycl/source/detail/device_image_impl.hpp @@ -152,6 +152,7 @@ class ManagedDeviceBinaries { using MangledKernelNameMapT = std::map>; using KernelNameSetT = std::set>; +using KernelNameToArgMaskMap = std::unordered_map; // Information unique to images compiled at runtime through the // ext_oneapi_kernel_compiler extension. @@ -260,12 +261,23 @@ class device_image_impl MKernelIDs(std::move(KernelIDs)), MSpecConstsDefValBlob(getSpecConstsDefValBlob()), MOrigins(Origins) { updateSpecConstSymMap(); - // SYCLBIN files have the kernel names embedded in the binaries, so we - // collect them. - if (BinImage && (MOrigins & ImageOriginSYCLBIN)) + if (BinImage && (MOrigins & ImageOriginSYCLBIN)) { + // SYCLBIN files have the kernel names embedded in the binaries, so we + // collect them. for (const sycl_device_binary_property &KNProp : BinImage->getKernelNames()) MKernelNames.insert(KNProp->Name); + + KernelArgMask ArgMask; + if (BinImage->getKernelParamOptInfo().isAvailable()) { + // Extract argument mask from the image. + const RTDeviceBinaryImage::PropertyRange &KPOIRange = + BinImage->getKernelParamOptInfo(); + for (const auto &Info : KPOIRange) + MEliminatedKernelArgMasks[Info->Name] = + createKernelArgMask(DeviceBinaryProperty(Info).asByteArray()); + } + } } device_image_impl( @@ -276,10 +288,12 @@ class device_image_impl const std::vector &SpecConstsBlob, uint8_t Origins, std::optional &&RTCInfo, KernelNameSetT &&KernelNames, + KernelNameToArgMaskMap &&EliminatedKernelArgMasks, std::unique_ptr &&MergedImageStorage, private_tag) : MBinImage(BinImage), MContext(std::move(Context)), MDevices(std::move(Devices)), MState(State), MProgram(Program), MKernelIDs(std::move(KernelIDs)), MKernelNames{std::move(KernelNames)}, + MEliminatedKernelArgMasks{std::move(EliminatedKernelArgMasks)}, MSpecConstsBlob(SpecConstsBlob), MSpecConstsDefValBlob(getSpecConstsDefValBlob()), MSpecConstSymMap(SpecConstMap), MOrigins(Origins), @@ -289,11 +303,14 @@ class device_image_impl device_image_impl(const RTDeviceBinaryImage *BinImage, const context &Context, const std::vector &Devices, bundle_state State, ur_program_handle_t Program, syclex::source_language Lang, - KernelNameSetT &&KernelNames, private_tag) + KernelNameSetT &&KernelNames, + KernelNameToArgMaskMap &&EliminatedKernelArgMasks, + private_tag) : MBinImage(BinImage), MContext(std::move(Context)), MDevices(std::move(Devices)), MState(State), MProgram(Program), MKernelIDs(std::make_shared>()), MKernelNames{std::move(KernelNames)}, + MEliminatedKernelArgMasks{std::move(EliminatedKernelArgMasks)}, MSpecConstsDefValBlob(getSpecConstsDefValBlob()), MOrigins(ImageOriginKernelCompiler), MRTCBinInfo(KernelCompilerBinaryInfo{Lang}) { @@ -674,6 +691,10 @@ class device_image_impl const KernelNameSetT &getKernelNames() const noexcept { return MKernelNames; } + const KernelNameToArgMaskMap &getEliminatedKernelArgMasks() const noexcept { + return MEliminatedKernelArgMasks; + } + bool isNonSYCLSourceBased() const noexcept { return (getOriginMask() & ImageOriginKernelCompiler) && !isFromSourceLanguage(syclex::source_language::sycl); @@ -1266,6 +1287,10 @@ class device_image_impl // List of known kernel names. KernelNameSetT MKernelNames; + // Map for storing kernel argument masks for kernels. This is currently only + // used for images created from SYCLBIN. + KernelNameToArgMaskMap MEliminatedKernelArgMasks; + // A mutex for sycnhronizing access to spec constants blob. Mutable because // needs to be locked in the const method for getting spec constant value. mutable std::mutex MSpecConstAccessMtx; diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index 6b55e6cde8344..6bd9348473b69 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -2868,6 +2868,8 @@ ProgramManager::compile(const DevImgPlainWithDeps &ImgWithDeps, setSpecializationConstants(InputImpl, Prog, Adapter); KernelNameSetT KernelNames = InputImpl.getKernelNames(); + std::unordered_map EliminatedKernelArgMasks = + InputImpl.getEliminatedKernelArgMasks(); std::optional RTCInfo = InputImpl.getRTCInfo(); @@ -2878,7 +2880,7 @@ ProgramManager::compile(const DevImgPlainWithDeps &ImgWithDeps, InputImpl.get_spec_const_data_ref(), InputImpl.get_spec_const_blob_ref(), InputImpl.getOriginMask(), std::move(RTCInfo), std::move(KernelNames), - /*MergedImageStorage = */ nullptr); + std::move(EliminatedKernelArgMasks), nullptr); std::string CompileOptions; applyCompileOptionsFromEnvironment(CompileOptions); @@ -3063,12 +3065,16 @@ ProgramManager::link(const std::vector &Imgs, RTCInfoPtrs; RTCInfoPtrs.reserve(Imgs.size()); KernelNameSetT MergedKernelNames; + std::unordered_map MergedEliminatedKernelArgMasks; for (const device_image_plain &DevImg : Imgs) { const DeviceImageImplPtr &DevImgImpl = getSyclObjImpl(DevImg); CombinedOrigins |= DevImgImpl->getOriginMask(); RTCInfoPtrs.emplace_back(&(DevImgImpl->getRTCInfo())); MergedKernelNames.insert(DevImgImpl->getKernelNames().begin(), DevImgImpl->getKernelNames().end()); + MergedEliminatedKernelArgMasks.insert( + DevImgImpl->getEliminatedKernelArgMasks().begin(), + DevImgImpl->getEliminatedKernelArgMasks().end()); } auto MergedRTCInfo = detail::KernelCompilerBinaryInfo::Merge(RTCInfoPtrs); @@ -3076,7 +3082,8 @@ ProgramManager::link(const std::vector &Imgs, NewBinImg, Context, std::vector{Devs}, bundle_state::executable, std::move(KernelIDs), LinkedProg, std::move(NewSpecConstMap), std::move(NewSpecConstBlob), CombinedOrigins, std::move(MergedRTCInfo), - std::move(MergedKernelNames), std::move(MergedImageStorage)); + std::move(MergedKernelNames), std::move(MergedEliminatedKernelArgMasks), + std::move(MergedImageStorage)); // TODO: Make multiple sets of device images organized by devices they are // compiled for. @@ -3144,11 +3151,15 @@ ProgramManager::build(const DevImgPlainWithDeps &DevImgWithDeps, RTCInfoPtrs; RTCInfoPtrs.reserve(DevImgWithDeps.size()); KernelNameSetT MergedKernelNames; + std::unordered_map MergedEliminatedKernelArgMasks; for (const device_image_plain &DevImg : DevImgWithDeps) { const auto &DevImgImpl = getSyclObjImpl(DevImg); RTCInfoPtrs.emplace_back(&(DevImgImpl->getRTCInfo())); MergedKernelNames.insert(DevImgImpl->getKernelNames().begin(), DevImgImpl->getKernelNames().end()); + MergedEliminatedKernelArgMasks.insert( + DevImgImpl->getEliminatedKernelArgMasks().begin(), + DevImgImpl->getEliminatedKernelArgMasks().end()); } auto MergedRTCInfo = detail::KernelCompilerBinaryInfo::Merge(RTCInfoPtrs); @@ -3157,7 +3168,7 @@ ProgramManager::build(const DevImgPlainWithDeps &DevImgWithDeps, bundle_state::executable, std::move(KernelIDs), ResProgram, std::move(SpecConstMap), std::move(SpecConstBlob), CombinedOrigins, std::move(MergedRTCInfo), std::move(MergedKernelNames), - std::move(MergedImageStorage)); + std::move(MergedEliminatedKernelArgMasks), std::move(MergedImageStorage)); return createSyclObjFromImpl(std::move(ExecImpl)); } diff --git a/sycl/test-e2e/SYCLBIN/Inputs/dae.hpp b/sycl/test-e2e/SYCLBIN/Inputs/dae.hpp new file mode 100644 index 0000000000000..cead009c1957f --- /dev/null +++ b/sycl/test-e2e/SYCLBIN/Inputs/dae.hpp @@ -0,0 +1,49 @@ +#include "common.hpp" + +#include + +static constexpr size_t NUM = 1024; +static constexpr size_t WGSIZE = 16; +static constexpr float EPS = 0.001; + +int main(int argc, char *argv[]) { + assert(argc == 2); + + sycl::queue Q; + + int Failed = CommonLoadCheck(Q.get_context(), argv[1]); + +#if defined(SYCLBIN_INPUT_STATE) + auto KBInput = syclexp::get_kernel_bundle( + Q.get_context(), std::string{argv[1]}); + auto KBExe = sycl::build(KBInput); +#elif defined(SYCLBIN_OBJECT_STATE) + auto KBObj = syclexp::get_kernel_bundle( + Q.get_context(), std::string{argv[1]}); + auto KBExe = sycl::link(KBObj); +#else // defined(SYCLBIN_EXECUTABLE_STATE) + auto KBExe = syclexp::get_kernel_bundle( + Q.get_context(), std::string{argv[1]}); +#endif + + assert(KBExe.ext_oneapi_has_kernel("iota")); + sycl::kernel IotaKern = KBExe.ext_oneapi_get_kernel("iota"); + + float *Ptr = sycl::malloc_shared(NUM, Q); + Q.submit([&](sycl::handler &CGH) { + // First arugment is unused, but should still be passed, even if eliminated + // by DAE. + CGH.set_args(3.14f, Ptr); + CGH.parallel_for(sycl::nd_range{{NUM}, {WGSIZE}}, IotaKern); + }).wait_and_throw(); + + for (int I = 0; I < NUM; I++) { + const float Truth = static_cast(I); + if (std::abs(Ptr[I] - Truth) > EPS) { + std::cout << "Result: " << Ptr[I] << " expected " << I << "\n"; + ++Failed; + } + } + sycl::free(Ptr, Q); + return Failed; +} diff --git a/sycl/test-e2e/SYCLBIN/Inputs/dae_kernel.cpp b/sycl/test-e2e/SYCLBIN/Inputs/dae_kernel.cpp new file mode 100644 index 0000000000000..12aec888a44aa --- /dev/null +++ b/sycl/test-e2e/SYCLBIN/Inputs/dae_kernel.cpp @@ -0,0 +1,10 @@ +#include + +namespace syclexp = sycl::ext::oneapi::experimental; +namespace syclext = sycl::ext::oneapi; + +extern "C" SYCL_EXT_ONEAPI_FUNCTION_PROPERTY( + (syclexp::nd_range_kernel<1>)) void iota(float, float *ptr) { + size_t id = syclext::this_work_item::get_nd_item<1>().get_global_linear_id(); + ptr[id] = static_cast(id); +} diff --git a/sycl/test-e2e/SYCLBIN/dae_executable.cpp b/sycl/test-e2e/SYCLBIN/dae_executable.cpp new file mode 100644 index 0000000000000..e413b05e5ad5f --- /dev/null +++ b/sycl/test-e2e/SYCLBIN/dae_executable.cpp @@ -0,0 +1,23 @@ +//==----------- dae_executable.cpp --- SYCLBIN extension tests -------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: aspect-usm_device_allocations + +// -- Test for using a kernel from a SYCLBIN with a dead argument. + +// SYCLBIN currently only properly detects SPIR-V binaries. +// XFAIL: !target-spir +// XFAIL-TRACKER: CMPLRLLVM-68811 + +// RUN: %clangxx --offload-new-driver -fsyclbin=executable %{sycl_target_opts} %S/Inputs/dae_kernel.cpp -o %t.syclbin +// RUN: %{build} -o %t.out +// RUN: %{l0_leak_check} %{run} %t.out %t.syclbin + +#define SYCLBIN_EXECUTABLE_STATE + +#include "Inputs/dae.hpp" diff --git a/sycl/test-e2e/SYCLBIN/dae_input.cpp b/sycl/test-e2e/SYCLBIN/dae_input.cpp new file mode 100644 index 0000000000000..0a2e0d1a20df9 --- /dev/null +++ b/sycl/test-e2e/SYCLBIN/dae_input.cpp @@ -0,0 +1,23 @@ +//==----------- dae_input.cpp --- SYCLBIN extension tests ------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: aspect-usm_device_allocations + +// -- Test for using a kernel from a SYCLBIN with a dead argument. + +// SYCLBIN currently only properly detects SPIR-V binaries. +// XFAIL: !target-spir +// XFAIL-TRACKER: CMPLRLLVM-68811 + +// RUN: %clangxx --offload-new-driver -fsyclbin=input %{sycl_target_opts} %S/Inputs/dae_kernel.cpp -o %t.syclbin +// RUN: %{build} -o %t.out +// RUN: %{l0_leak_check} %{run} %t.out %t.syclbin + +#define SYCLBIN_INPUT_STATE + +#include "Inputs/dae.hpp" diff --git a/sycl/test-e2e/SYCLBIN/dae_object.cpp b/sycl/test-e2e/SYCLBIN/dae_object.cpp new file mode 100644 index 0000000000000..820628dc01981 --- /dev/null +++ b/sycl/test-e2e/SYCLBIN/dae_object.cpp @@ -0,0 +1,23 @@ +//==----------- dae_object.cpp --- SYCLBIN extension tests -----------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: aspect-usm_device_allocations + +// -- Test for using a kernel from a SYCLBIN with a dead argument. + +// SYCLBIN currently only properly detects SPIR-V binaries. +// XFAIL: !target-spir +// XFAIL-TRACKER: CMPLRLLVM-68811 + +// RUN: %clangxx --offload-new-driver -fsyclbin=object %{sycl_target_opts} %S/Inputs/dae_kernel.cpp -o %t.syclbin +// RUN: %{build} -o %t.out +// RUN: %{l0_leak_check} %{run} %t.out %t.syclbin + +#define SYCLBIN_OBJECT_STATE + +#include "Inputs/dae.hpp" diff --git a/sycl/test/e2e_test_requirements/no_sycl_hpp_in_e2e_tests.cpp b/sycl/test/e2e_test_requirements/no_sycl_hpp_in_e2e_tests.cpp index de2b939756ea0..5b97ae7a0cac9 100644 --- a/sycl/test/e2e_test_requirements/no_sycl_hpp_in_e2e_tests.cpp +++ b/sycl/test/e2e_test_requirements/no_sycl_hpp_in_e2e_tests.cpp @@ -6,7 +6,7 @@ // CHECK-DAG: README.md // CHECK-DAG: lit.cfg.py // -// CHECK-NUM-MATCHES: 25 +// CHECK-NUM-MATCHES: 26 // // This test verifies that `` isn't used in E2E tests. Instead, // fine-grained includes should used, see