From 6ff1202d5a8445ea57b6c576c396dc31b2ce7eb4 Mon Sep 17 00:00:00 2001 From: Dmitry Vodopyanov Date: Wed, 21 Apr 2021 16:56:29 +0300 Subject: [PATCH 01/10] [SYCL] Add DPC++ RT support for non-native SYCL 2020 spec constants This patch adds support of non-native SYCL 2020 specialization constants to DPC++ runtime. Non-native specialization constants emulate the usage of native specialization constants for AOT compilation and CUDA --- sycl/source/detail/device_image_impl.hpp | 21 ++++++++ sycl/source/detail/kernel_bundle_impl.hpp | 43 ++++++++++++++- .../program_manager/program_manager.cpp | 3 +- sycl/source/detail/scheduler/commands.cpp | 39 +++++++++----- sycl/source/detail/scheduler/commands.hpp | 7 +-- sycl/source/handler.cpp | 10 ++-- .../kernel_lambda_with_kernel_handler_arg.cpp | 4 -- .../non_native/Inputs/common.cpp | 54 +++++++++++++++++++ .../non_native/accelerator.cpp | 7 +++ .../aot_w_kernel_handler_wo_spec_consts.cpp | 34 ++++++++++++ .../non_native/cpu.cpp | 7 +++ .../non_native/cuda.cpp | 7 +++ .../non_native/gpu.cpp | 9 ++++ 13 files changed, 221 insertions(+), 24 deletions(-) create mode 100644 sycl/test/on-device/basic_tests/specialization_constants/non_native/Inputs/common.cpp create mode 100644 sycl/test/on-device/basic_tests/specialization_constants/non_native/accelerator.cpp create mode 100644 sycl/test/on-device/basic_tests/specialization_constants/non_native/aot_w_kernel_handler_wo_spec_consts.cpp create mode 100644 sycl/test/on-device/basic_tests/specialization_constants/non_native/cpu.cpp create mode 100644 sycl/test/on-device/basic_tests/specialization_constants/non_native/cuda.cpp create mode 100644 sycl/test/on-device/basic_tests/specialization_constants/non_native/gpu.cpp diff --git a/sycl/source/detail/device_image_impl.hpp b/sycl/source/detail/device_image_impl.hpp index 9fa83db6aa150..3aa3bf504361b 100644 --- a/sycl/source/detail/device_image_impl.hpp +++ b/sycl/source/detail/device_image_impl.hpp @@ -161,12 +161,29 @@ class device_image_impl { const context &get_context() const noexcept { return MContext; } + void set_kernel_ids(std::vector KernelIDs) noexcept { + MKernelIDs = std::move(KernelIDs); + } + std::vector &get_kernel_ids_ref() noexcept { return MKernelIDs; } std::vector &get_spec_const_blob_ref() noexcept { return MSpecConstsBlob; } + RT::PiMem &get_spec_const_buffer_ref() noexcept { + std::lock_guard Lock{MSpecConstAccessMtx}; + if (nullptr == MSpecConstsBuffer) { + const detail::plugin &Plugin = getSyclObjImpl(MContext)->getPlugin(); + Plugin.call( + detail::getSyclObjImpl(MContext)->getHandleRef(), + PI_MEM_FLAGS_ACCESS_RW | PI_MEM_FLAGS_HOST_PTR_USE, + MSpecConstsBlob.size(), MSpecConstsBlob.data(), + &MSpecConstsBuffer, nullptr); + } + return MSpecConstsBuffer; + } + const std::map> & get_spec_const_data_ref() const noexcept { return MSpecConstSymMap; @@ -244,6 +261,10 @@ class device_image_impl { // Binary blob which can have values of all specialization constants in the // image std::vector MSpecConstsBlob; + // Buffer containing binary blob which can have values of all specialization + // constants in the image, it is using for storing non-native specialization + // constants + RT::PiMem MSpecConstsBuffer = nullptr; // Contains map of spec const names to their descriptions + offsets in // the MSpecConstsBlob std::map> MSpecConstSymMap; diff --git a/sycl/source/detail/kernel_bundle_impl.hpp b/sycl/source/detail/kernel_bundle_impl.hpp index 1cdec6113a31b..3354d023ba6d9 100644 --- a/sycl/source/detail/kernel_bundle_impl.hpp +++ b/sycl/source/detail/kernel_bundle_impl.hpp @@ -281,6 +281,43 @@ class kernel_bundle_impl { kernel get_kernel(const kernel_id &KernelID, const std::shared_ptr &Self) const { + // TODO: remove this workaround after AOT binaries contain kernel IDs by + // default + bool HasKernelIdProp = false; + for (const auto &DeviceImage : MDeviceImages) { + if (!getSyclObjImpl(DeviceImage)->get_kernel_ids().empty()) { + HasKernelIdProp = true; + } + } + if (!HasKernelIdProp) { + for (const auto &DeviceImage : MDeviceImages) { + size_t Size; + const detail::plugin &Plugin = getSyclObjImpl(MContext)->getPlugin(); + if (nullptr == getSyclObjImpl(DeviceImage)->get_program_ref()) { + continue; + } + Plugin.call( + getSyclObjImpl(DeviceImage)->get_program_ref(), + PI_PROGRAM_INFO_KERNEL_NAMES, 0, nullptr, &Size); + std::string RawResult(Size, ' '); + Plugin.call( + getSyclObjImpl(DeviceImage)->get_program_ref(), + PI_PROGRAM_INFO_KERNEL_NAMES, RawResult.size(), &RawResult[0], + nullptr); + // Get rid of the null terminator + RawResult.pop_back(); + std::vector KernelNames(split_string(RawResult, ';')); + std::vector KernelIDs; + for (const auto &KernelName : KernelNames) { + KernelIDs.push_back(detail::createSyclObjFromImpl( + std::make_shared(KernelName))); + } + + std::sort(KernelIDs.begin(), KernelIDs.end(), detail::LessByNameComp{}); + + getSyclObjImpl(DeviceImage)->set_kernel_ids(KernelIDs); + } + } auto It = std::find_if(MDeviceImages.begin(), MDeviceImages.end(), [&KernelID](const device_image_plain &DeviceImage) { @@ -373,7 +410,11 @@ class kernel_bundle_impl { }); } - const device_image_plain *begin() const { return &MDeviceImages.front(); } + const device_image_plain *begin() const { + assert(!MDeviceImages.empty() && "MDeviceImages can't be empty"); + // UB in case MDeviceImages is empty + return &MDeviceImages.front(); + } const device_image_plain *end() const { return &MDeviceImages.back() + 1; } diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index c497c6a64905d..e0973cc3b1f16 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -1309,7 +1309,8 @@ void ProgramManager::bringSYCLDeviceImagesToState( break; } case bundle_state::executable: - // Device image is already in the desired state. + DevImage = build(DevImage, getSyclObjImpl(DevImage)->get_devices(), + /*PropList=*/{}); break; } break; diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index af1ce7df439fe..3f4aaedbe783e 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -1636,8 +1636,9 @@ static void ReverseRangeDimensionsForKernel(NDRDescT &NDR) { } pi_result ExecCGCommand::SetKernelParamsAndLaunch( - CGExecKernel *ExecKernel, RT::PiKernel Kernel, NDRDescT &NDRDesc, - std::vector &RawEvents, RT::PiEvent &Event, + CGExecKernel *ExecKernel, + std::shared_ptr DeviceImageImpl, RT::PiKernel Kernel, + NDRDescT &NDRDesc, std::vector &RawEvents, RT::PiEvent &Event, ProgramManager::KernelArgMask EliminatedArgMask) { vector_class &Args = ExecKernel->MArgs; // TODO this is not necessary as long as we can guarantee that the arguments @@ -1692,9 +1693,21 @@ pi_result ExecCGCommand::SetKernelParamsAndLaunch( break; } case kernel_param_kind_t::kind_specialization_constants_buffer: { - throw cl::sycl::feature_not_supported( - "SYCL2020 specialization constants are not yet fully supported", - PI_INVALID_OPERATION); + if (MQueue->is_host()) { + throw cl::sycl::feature_not_supported( + "SYCL2020 specialization constants are not yet supported on host " + "device", + PI_INVALID_OPERATION); + } + if (DeviceImageImpl != nullptr) { + RT::PiMem SpecConstsBuffer = + DeviceImageImpl->get_spec_const_buffer_ref(); + Plugin.call( + Kernel, NextTrueIndex, sizeof(RT::PiMem), &SpecConstsBuffer); + } else { + Plugin.call(Kernel, NextTrueIndex, + sizeof(RT::PiMem), nullptr); + } break; } } @@ -1917,6 +1930,8 @@ cl_int ExecCGCommand::enqueueImp() { bool KnownProgram = true; std::shared_ptr SyclKernelImpl; + std::shared_ptr DeviceImageImpl; + // Use kernel_bundle is available if (KernelBundleImplPtr) { @@ -1930,9 +1945,7 @@ cl_int ExecCGCommand::enqueueImp() { SyclKernelImpl = detail::getSyclObjImpl(SyclKernel); Kernel = SyclKernelImpl->getHandleRef(); - - std::shared_ptr DeviceImageImpl = - SyclKernelImpl->getDeviceImage(); + DeviceImageImpl = SyclKernelImpl->getDeviceImage(); Program = DeviceImageImpl->get_program_ref(); @@ -1980,11 +1993,13 @@ cl_int ExecCGCommand::enqueueImp() { if (KernelMutex != nullptr) { // For cacheable kernels, we use per-kernel mutex std::lock_guard Lock(*KernelMutex); - Error = SetKernelParamsAndLaunch(ExecKernel, Kernel, NDRDesc, RawEvents, - Event, EliminatedArgMask); + Error = + SetKernelParamsAndLaunch(ExecKernel, DeviceImageImpl, Kernel, NDRDesc, + RawEvents, Event, EliminatedArgMask); } else { - Error = SetKernelParamsAndLaunch(ExecKernel, Kernel, NDRDesc, RawEvents, - Event, EliminatedArgMask); + Error = + SetKernelParamsAndLaunch(ExecKernel, DeviceImageImpl, Kernel, NDRDesc, + RawEvents, Event, EliminatedArgMask); } if (PI_SUCCESS != Error) { diff --git a/sycl/source/detail/scheduler/commands.hpp b/sycl/source/detail/scheduler/commands.hpp index 8713096be9127..677cdc0cfcceb 100644 --- a/sycl/source/detail/scheduler/commands.hpp +++ b/sycl/source/detail/scheduler/commands.hpp @@ -519,9 +519,10 @@ class ExecCGCommand : public Command { AllocaCommandBase *getAllocaForReq(Requirement *Req); pi_result SetKernelParamsAndLaunch( - CGExecKernel *ExecKernel, RT::PiKernel Kernel, NDRDescT &NDRDesc, - std::vector &RawEvents, RT::PiEvent &Event, - ProgramManager::KernelArgMask EliminatedArgMask); + CGExecKernel *ExecKernel, + std::shared_ptr DeviceImageImpl, RT::PiKernel Kernel, + NDRDescT &NDRDesc, std::vector &RawEvents, + RT::PiEvent &Event, ProgramManager::KernelArgMask EliminatedArgMask); std::unique_ptr MCommandGroup; diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index 9ba4887ca935f..fd8f255e82743 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -57,6 +57,10 @@ handler::getOrInsertHandlerKernelBundle(bool Insert) const { if (!KernelBundleImpPtr && Insert) { KernelBundleImpPtr = detail::getSyclObjImpl( get_kernel_bundle(MQueue->get_context())); + if (KernelBundleImpPtr->empty()) { + KernelBundleImpPtr = detail::getSyclObjImpl( + get_kernel_bundle(MQueue->get_context())); + } detail::ExtendedMemberT EMember = { detail::ExtendedMembersType::HANDLER_KERNEL_BUNDLE, KernelBundleImpPtr}; @@ -336,9 +340,9 @@ void handler::processArg(void *Ptr, const detail::kernel_param_kind_t &Kind, break; } case kernel_param_kind_t::kind_specialization_constants_buffer: { - throw cl::sycl::feature_not_supported( - "SYCL2020 specialization constants are not yet fully supported", - PI_INVALID_OPERATION); + MArgs.emplace_back( + kernel_param_kind_t::kind_specialization_constants_buffer, Ptr, Size, + Index + IndexShift); break; } } diff --git a/sycl/test/on-device/basic_tests/specialization_constants/kernel_lambda_with_kernel_handler_arg.cpp b/sycl/test/on-device/basic_tests/specialization_constants/kernel_lambda_with_kernel_handler_arg.cpp index 398561d795208..d249d4396a00e 100644 --- a/sycl/test/on-device/basic_tests/specialization_constants/kernel_lambda_with_kernel_handler_arg.cpp +++ b/sycl/test/on-device/basic_tests/specialization_constants/kernel_lambda_with_kernel_handler_arg.cpp @@ -5,10 +5,6 @@ // and parallel_for_work_group to verify that this code compiles and runs // correctly with user's lambda with and without sycl::kernel_handler argument -// TODO: enable cuda support when non-native spec constants started to be -// supported -// UNSUPPORTED: cuda - #include int main() { diff --git a/sycl/test/on-device/basic_tests/specialization_constants/non_native/Inputs/common.cpp b/sycl/test/on-device/basic_tests/specialization_constants/non_native/Inputs/common.cpp new file mode 100644 index 0000000000000..692e3c776fd4c --- /dev/null +++ b/sycl/test/on-device/basic_tests/specialization_constants/non_native/Inputs/common.cpp @@ -0,0 +1,54 @@ +#include + +#include + +class Kernel1Name; +class Kernel2Name; + +struct TestStruct { + int a; + int b; +}; + +const static sycl::specialization_id SpecConst1{42}; +const static sycl::specialization_id SpecConst2{42}; +const static sycl::specialization_id SpecConst3{TestStruct{42, 42}}; +const static sycl::specialization_id SpecConst4{42}; + +int main() { + sycl::queue Q; + + // No support for host device so far + if (Q.is_host()) + return 0; + + { + sycl::buffer Buf{sycl::range{1}}; + Q.submit([&](sycl::handler &CGH) { + CGH.set_specialization_constant(1); + auto Acc = Buf.get_access(CGH); + CGH.single_task([=](sycl::kernel_handler KH) { + Acc[0] = KH.get_specialization_constant(); + }); + }); + auto Acc = Buf.get_access(); + assert(Acc[0] == 1); + } + + { + sycl::buffer Buf{sycl::range{1}}; + Q.submit([&](sycl::handler &CGH) { + auto Acc = Buf.get_access(CGH); + CGH.set_specialization_constant(TestStruct{1, 2}); + const auto SC = CGH.get_specialization_constant(); + assert(SC == 42); + CGH.single_task([=](sycl::kernel_handler KH) { + Acc[0] = KH.get_specialization_constant(); + }); + }); + auto Acc = Buf.get_access(); + assert(Acc[0].a == 1 && Acc[0].b == 2); + } + + return 0; +} diff --git a/sycl/test/on-device/basic_tests/specialization_constants/non_native/accelerator.cpp b/sycl/test/on-device/basic_tests/specialization_constants/non_native/accelerator.cpp new file mode 100644 index 0000000000000..c11958d10d07a --- /dev/null +++ b/sycl/test/on-device/basic_tests/specialization_constants/non_native/accelerator.cpp @@ -0,0 +1,7 @@ +// REQUIRES: aoc, accelerator + +// RUN: %clangxx -fsycl -fsycl-targets=spir64_fpga-unknown-unknown-sycldevice %S/Inputs/common.cpp -o %t.out +// RUN: %ACC_RUN_PLACEHOLDER %t.out + +// This test checks correctness of SYCL2020 non-native specialization constants +// on accelerator device diff --git a/sycl/test/on-device/basic_tests/specialization_constants/non_native/aot_w_kernel_handler_wo_spec_consts.cpp b/sycl/test/on-device/basic_tests/specialization_constants/non_native/aot_w_kernel_handler_wo_spec_consts.cpp new file mode 100644 index 0000000000000..4a52afbcbca16 --- /dev/null +++ b/sycl/test/on-device/basic_tests/specialization_constants/non_native/aot_w_kernel_handler_wo_spec_consts.cpp @@ -0,0 +1,34 @@ +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out + +// This test checks correctness of compiling and running of application with +// kernel lambdas containing kernel_handler arguments and w/o usage of +// specialization constants in AOT mode + +#include + +int main() { + sycl::queue q; + + q.submit([&](sycl::handler &cgh) { + cgh.single_task( + [=](sycl::kernel_handler kh) {}); + }); + + q.submit([&](sycl::handler &cgh) { + cgh.parallel_for( + sycl::nd_range<3>(sycl::range<3>(4, 4, 4), sycl::range<3>(2, 2, 2)), + [=](sycl::nd_item<3> item, sycl::kernel_handler kh) {}); + }); + + // parallel_for_work_group with kernel_handler arg + q.submit([&](sycl::handler &cgh) { + cgh.parallel_for_work_group< + class KernelParallelForWorkGroupWithoutKernelHandler>( + sycl::range<3>(2, 2, 2), sycl::range<3>(2, 2, 2), + [=](sycl::group<3> myGroup, sycl::kernel_handler kh) { + myGroup.parallel_for_work_item([&](sycl::h_item<3> myItem) {}); + myGroup.parallel_for_work_item([&](sycl::h_item<3> myItem) {}); + }); + }); +} diff --git a/sycl/test/on-device/basic_tests/specialization_constants/non_native/cpu.cpp b/sycl/test/on-device/basic_tests/specialization_constants/non_native/cpu.cpp new file mode 100644 index 0000000000000..a2c91f1b58de3 --- /dev/null +++ b/sycl/test/on-device/basic_tests/specialization_constants/non_native/cpu.cpp @@ -0,0 +1,7 @@ +// REQUIRES: opencl-aot, cpu + +// RUN: %clangxx -fsycl -fsycl-targets=spir64_x86_64-unknown-unknown-sycldevice %S/Inputs/common.cpp -o %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out + +// This test checks correctness of SYCL2020 non-native specialization constants +// on CPU device diff --git a/sycl/test/on-device/basic_tests/specialization_constants/non_native/cuda.cpp b/sycl/test/on-device/basic_tests/specialization_constants/non_native/cuda.cpp new file mode 100644 index 0000000000000..1ba4d2a8454cf --- /dev/null +++ b/sycl/test/on-device/basic_tests/specialization_constants/non_native/cuda.cpp @@ -0,0 +1,7 @@ +// REQUIRES: cuda + +// RUN: %clangxx -fsycl -fsycl-targets=nvptx64-unknown-unknown-sycldevice %S/Inputs/common.cpp -o %t.out +// RUN: env SYCL_DEVICE_FILTER=cuda %t.out + +// This test checks correctness of SYCL2020 non-native specialization constants +// on CUDA device diff --git a/sycl/test/on-device/basic_tests/specialization_constants/non_native/gpu.cpp b/sycl/test/on-device/basic_tests/specialization_constants/non_native/gpu.cpp new file mode 100644 index 0000000000000..1be0704cbe1c3 --- /dev/null +++ b/sycl/test/on-device/basic_tests/specialization_constants/non_native/gpu.cpp @@ -0,0 +1,9 @@ +// REQUIRES: ocloc, gpu +// UNSUPPORTED: cuda +// CUDA is not compatible with SPIR. + +// RUN: %clangxx -fsycl -fsycl-targets=spir64_gen-unknown-unknown-sycldevice %S/Inputs/common.cpp -o %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out + +// This test checks correctness of SYCL2020 non-native specialization constants +// on GPU device From bb81670ac5c13882efecc99b2637cf51fcc08af5 Mon Sep 17 00:00:00 2001 From: Dmitry Vodopyanov Date: Wed, 21 Apr 2021 17:07:30 +0300 Subject: [PATCH 02/10] Fix clang-format --- sycl/source/detail/device_image_impl.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/sycl/source/detail/device_image_impl.hpp b/sycl/source/detail/device_image_impl.hpp index 3aa3bf504361b..47aecf1df4185 100644 --- a/sycl/source/detail/device_image_impl.hpp +++ b/sycl/source/detail/device_image_impl.hpp @@ -178,8 +178,8 @@ class device_image_impl { Plugin.call( detail::getSyclObjImpl(MContext)->getHandleRef(), PI_MEM_FLAGS_ACCESS_RW | PI_MEM_FLAGS_HOST_PTR_USE, - MSpecConstsBlob.size(), MSpecConstsBlob.data(), - &MSpecConstsBuffer, nullptr); + MSpecConstsBlob.size(), MSpecConstsBlob.data(), &MSpecConstsBuffer, + nullptr); } return MSpecConstsBuffer; } From 43669f668c89efcc1d0fc8c5afa9999d61544f17 Mon Sep 17 00:00:00 2001 From: Dmitry Vodopyanov Date: Mon, 26 Apr 2021 14:35:47 +0300 Subject: [PATCH 03/10] Address PR comments --- sycl/include/CL/sycl/detail/pi.h | 1 + sycl/source/detail/device_image_impl.hpp | 2 +- sycl/source/detail/kernel_bundle_impl.hpp | 11 +++++------ 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/sycl/include/CL/sycl/detail/pi.h b/sycl/include/CL/sycl/detail/pi.h index bb010482d64dd..4a382acdcedb7 100644 --- a/sycl/include/CL/sycl/detail/pi.h +++ b/sycl/include/CL/sycl/detail/pi.h @@ -504,6 +504,7 @@ typedef enum { using pi_mem_flags = pi_bitfield; // Access constexpr pi_mem_flags PI_MEM_FLAGS_ACCESS_RW = CL_MEM_READ_WRITE; +constexpr pi_mem_flags PI_MEM_FLAGS_ACCESS_RO = CL_MEM_READ_ONLY; // Host pointer constexpr pi_mem_flags PI_MEM_FLAGS_HOST_PTR_USE = CL_MEM_USE_HOST_PTR; constexpr pi_mem_flags PI_MEM_FLAGS_HOST_PTR_COPY = CL_MEM_COPY_HOST_PTR; diff --git a/sycl/source/detail/device_image_impl.hpp b/sycl/source/detail/device_image_impl.hpp index 47aecf1df4185..def8928513322 100644 --- a/sycl/source/detail/device_image_impl.hpp +++ b/sycl/source/detail/device_image_impl.hpp @@ -177,7 +177,7 @@ class device_image_impl { const detail::plugin &Plugin = getSyclObjImpl(MContext)->getPlugin(); Plugin.call( detail::getSyclObjImpl(MContext)->getHandleRef(), - PI_MEM_FLAGS_ACCESS_RW | PI_MEM_FLAGS_HOST_PTR_USE, + PI_MEM_FLAGS_ACCESS_RO | PI_MEM_FLAGS_HOST_PTR_USE, MSpecConstsBlob.size(), MSpecConstsBlob.data(), &MSpecConstsBuffer, nullptr); } diff --git a/sycl/source/detail/kernel_bundle_impl.hpp b/sycl/source/detail/kernel_bundle_impl.hpp index 3354d023ba6d9..dbe9c542ebdd6 100644 --- a/sycl/source/detail/kernel_bundle_impl.hpp +++ b/sycl/source/detail/kernel_bundle_impl.hpp @@ -283,12 +283,11 @@ class kernel_bundle_impl { const std::shared_ptr &Self) const { // TODO: remove this workaround after AOT binaries contain kernel IDs by // default - bool HasKernelIdProp = false; - for (const auto &DeviceImage : MDeviceImages) { - if (!getSyclObjImpl(DeviceImage)->get_kernel_ids().empty()) { - HasKernelIdProp = true; - } - } + bool HasKernelIdProp = + std::any_of(MDeviceImages.begin(), MDeviceImages.end(), + [](const auto &DeviceImage) { + return !getSyclObjImpl(DeviceImage)->get_kernel_ids().empty(); + }); if (!HasKernelIdProp) { for (const auto &DeviceImage : MDeviceImages) { size_t Size; From 06a15c2623eda064d49d24ddfb2941fea58ed414 Mon Sep 17 00:00:00 2001 From: Dmitry Vodopyanov Date: Mon, 26 Apr 2021 14:52:35 +0300 Subject: [PATCH 04/10] Update GPU AOT test --- .../basic_tests/specialization_constants/non_native/gpu.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/test/on-device/basic_tests/specialization_constants/non_native/gpu.cpp b/sycl/test/on-device/basic_tests/specialization_constants/non_native/gpu.cpp index 1be0704cbe1c3..df9f0a7937dc3 100644 --- a/sycl/test/on-device/basic_tests/specialization_constants/non_native/gpu.cpp +++ b/sycl/test/on-device/basic_tests/specialization_constants/non_native/gpu.cpp @@ -2,7 +2,7 @@ // UNSUPPORTED: cuda // CUDA is not compatible with SPIR. -// RUN: %clangxx -fsycl -fsycl-targets=spir64_gen-unknown-unknown-sycldevice %S/Inputs/common.cpp -o %t.out +// RUN: %clangxx -fsycl -fsycl-targets=spir64_gen-unknown-unknown-sycldevice -Xsycl-target-backend=spir64_gen-unknown-unknown-sycldevice "-device *" %S/Inputs/common.cpp -o %t.out // RUN: %GPU_RUN_PLACEHOLDER %t.out // This test checks correctness of SYCL2020 non-native specialization constants From b5eb9b4f724d1e4db7ab6700629ab45e16db5c56 Mon Sep 17 00:00:00 2001 From: Dmitry Vodopyanov Date: Mon, 26 Apr 2021 14:57:38 +0300 Subject: [PATCH 05/10] Fix clang-format --- sycl/source/detail/kernel_bundle_impl.hpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/sycl/source/detail/kernel_bundle_impl.hpp b/sycl/source/detail/kernel_bundle_impl.hpp index dbe9c542ebdd6..250310794bc8f 100644 --- a/sycl/source/detail/kernel_bundle_impl.hpp +++ b/sycl/source/detail/kernel_bundle_impl.hpp @@ -283,11 +283,11 @@ class kernel_bundle_impl { const std::shared_ptr &Self) const { // TODO: remove this workaround after AOT binaries contain kernel IDs by // default - bool HasKernelIdProp = - std::any_of(MDeviceImages.begin(), MDeviceImages.end(), - [](const auto &DeviceImage) { - return !getSyclObjImpl(DeviceImage)->get_kernel_ids().empty(); - }); + bool HasKernelIdProp = std::any_of( + MDeviceImages.begin(), MDeviceImages.end(), + [](const auto &DeviceImage) { + return !getSyclObjImpl(DeviceImage)->get_kernel_ids().empty(); + }); if (!HasKernelIdProp) { for (const auto &DeviceImage : MDeviceImages) { size_t Size; From 8e944f572df3829dbe53111cdea36d2af752566b Mon Sep 17 00:00:00 2001 From: Dmitry Vodopyanov Date: Fri, 30 Apr 2021 13:28:54 +0300 Subject: [PATCH 06/10] Small update --- sycl/source/detail/device_image_impl.hpp | 4 --- sycl/source/detail/kernel_bundle_impl.hpp | 36 ------------------- .../non_native/Inputs/common.cpp | 4 +-- .../non_native/cuda.cpp | 3 ++ 4 files changed, 5 insertions(+), 42 deletions(-) diff --git a/sycl/source/detail/device_image_impl.hpp b/sycl/source/detail/device_image_impl.hpp index def8928513322..dd85644c942ce 100644 --- a/sycl/source/detail/device_image_impl.hpp +++ b/sycl/source/detail/device_image_impl.hpp @@ -161,10 +161,6 @@ class device_image_impl { const context &get_context() const noexcept { return MContext; } - void set_kernel_ids(std::vector KernelIDs) noexcept { - MKernelIDs = std::move(KernelIDs); - } - std::vector &get_kernel_ids_ref() noexcept { return MKernelIDs; } std::vector &get_spec_const_blob_ref() noexcept { diff --git a/sycl/source/detail/kernel_bundle_impl.hpp b/sycl/source/detail/kernel_bundle_impl.hpp index 250310794bc8f..3381dc1c4ac9e 100644 --- a/sycl/source/detail/kernel_bundle_impl.hpp +++ b/sycl/source/detail/kernel_bundle_impl.hpp @@ -281,42 +281,6 @@ class kernel_bundle_impl { kernel get_kernel(const kernel_id &KernelID, const std::shared_ptr &Self) const { - // TODO: remove this workaround after AOT binaries contain kernel IDs by - // default - bool HasKernelIdProp = std::any_of( - MDeviceImages.begin(), MDeviceImages.end(), - [](const auto &DeviceImage) { - return !getSyclObjImpl(DeviceImage)->get_kernel_ids().empty(); - }); - if (!HasKernelIdProp) { - for (const auto &DeviceImage : MDeviceImages) { - size_t Size; - const detail::plugin &Plugin = getSyclObjImpl(MContext)->getPlugin(); - if (nullptr == getSyclObjImpl(DeviceImage)->get_program_ref()) { - continue; - } - Plugin.call( - getSyclObjImpl(DeviceImage)->get_program_ref(), - PI_PROGRAM_INFO_KERNEL_NAMES, 0, nullptr, &Size); - std::string RawResult(Size, ' '); - Plugin.call( - getSyclObjImpl(DeviceImage)->get_program_ref(), - PI_PROGRAM_INFO_KERNEL_NAMES, RawResult.size(), &RawResult[0], - nullptr); - // Get rid of the null terminator - RawResult.pop_back(); - std::vector KernelNames(split_string(RawResult, ';')); - std::vector KernelIDs; - for (const auto &KernelName : KernelNames) { - KernelIDs.push_back(detail::createSyclObjFromImpl( - std::make_shared(KernelName))); - } - - std::sort(KernelIDs.begin(), KernelIDs.end(), detail::LessByNameComp{}); - - getSyclObjImpl(DeviceImage)->set_kernel_ids(KernelIDs); - } - } auto It = std::find_if(MDeviceImages.begin(), MDeviceImages.end(), [&KernelID](const device_image_plain &DeviceImage) { diff --git a/sycl/test/on-device/basic_tests/specialization_constants/non_native/Inputs/common.cpp b/sycl/test/on-device/basic_tests/specialization_constants/non_native/Inputs/common.cpp index 692e3c776fd4c..94bf445f1119d 100644 --- a/sycl/test/on-device/basic_tests/specialization_constants/non_native/Inputs/common.cpp +++ b/sycl/test/on-device/basic_tests/specialization_constants/non_native/Inputs/common.cpp @@ -27,7 +27,7 @@ int main() { Q.submit([&](sycl::handler &CGH) { CGH.set_specialization_constant(1); auto Acc = Buf.get_access(CGH); - CGH.single_task([=](sycl::kernel_handler KH) { + CGH.single_task([=](sycl::kernel_handler KH) { Acc[0] = KH.get_specialization_constant(); }); }); @@ -42,7 +42,7 @@ int main() { CGH.set_specialization_constant(TestStruct{1, 2}); const auto SC = CGH.get_specialization_constant(); assert(SC == 42); - CGH.single_task([=](sycl::kernel_handler KH) { + CGH.single_task([=](sycl::kernel_handler KH) { Acc[0] = KH.get_specialization_constant(); }); }); diff --git a/sycl/test/on-device/basic_tests/specialization_constants/non_native/cuda.cpp b/sycl/test/on-device/basic_tests/specialization_constants/non_native/cuda.cpp index 1ba4d2a8454cf..01d0d37e04f26 100644 --- a/sycl/test/on-device/basic_tests/specialization_constants/non_native/cuda.cpp +++ b/sycl/test/on-device/basic_tests/specialization_constants/non_native/cuda.cpp @@ -3,5 +3,8 @@ // RUN: %clangxx -fsycl -fsycl-targets=nvptx64-unknown-unknown-sycldevice %S/Inputs/common.cpp -o %t.out // RUN: env SYCL_DEVICE_FILTER=cuda %t.out +// TODO: enable this test then compile-time error in sycl-post-link is fixed +// UNSUPPORTED: cuda + // This test checks correctness of SYCL2020 non-native specialization constants // on CUDA device From 2d85e6251014135468a087f6cffadcc1aee21e6b Mon Sep 17 00:00:00 2001 From: Dmitry Vodopyanov Date: Fri, 30 Apr 2021 20:37:55 +0300 Subject: [PATCH 07/10] Explicitly mark that non-native/gpu.cpp requires OpenCL as on Windows builbot it runs with L0 --- .../basic_tests/specialization_constants/non_native/gpu.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/test/on-device/basic_tests/specialization_constants/non_native/gpu.cpp b/sycl/test/on-device/basic_tests/specialization_constants/non_native/gpu.cpp index df9f0a7937dc3..5715a86b32689 100644 --- a/sycl/test/on-device/basic_tests/specialization_constants/non_native/gpu.cpp +++ b/sycl/test/on-device/basic_tests/specialization_constants/non_native/gpu.cpp @@ -1,4 +1,4 @@ -// REQUIRES: ocloc, gpu +// REQUIRES: opencl, ocloc, gpu // UNSUPPORTED: cuda // CUDA is not compatible with SPIR. From 7ae2181fd1a7570d6a0022e2fbaa083344f977bb Mon Sep 17 00:00:00 2001 From: Dmitry Vodopyanov Date: Fri, 30 Apr 2021 23:23:32 +0300 Subject: [PATCH 08/10] Remove "REQUIRES: opencl" from non-native/gpu.cpp test --- .../basic_tests/specialization_constants/non_native/gpu.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/test/on-device/basic_tests/specialization_constants/non_native/gpu.cpp b/sycl/test/on-device/basic_tests/specialization_constants/non_native/gpu.cpp index 5715a86b32689..df9f0a7937dc3 100644 --- a/sycl/test/on-device/basic_tests/specialization_constants/non_native/gpu.cpp +++ b/sycl/test/on-device/basic_tests/specialization_constants/non_native/gpu.cpp @@ -1,4 +1,4 @@ -// REQUIRES: opencl, ocloc, gpu +// REQUIRES: ocloc, gpu // UNSUPPORTED: cuda // CUDA is not compatible with SPIR. From 05010084cbcc24570dbef3ef74a4254d48c50311 Mon Sep 17 00:00:00 2001 From: Dmitry Vodopyanov Date: Sat, 1 May 2021 00:08:23 +0300 Subject: [PATCH 09/10] Make the buffer RW as Level Zero supports only RW buffers --- sycl/include/CL/sycl/detail/pi.h | 1 - sycl/source/detail/device_image_impl.hpp | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/sycl/include/CL/sycl/detail/pi.h b/sycl/include/CL/sycl/detail/pi.h index 20628c08087d2..a6cdadf664310 100644 --- a/sycl/include/CL/sycl/detail/pi.h +++ b/sycl/include/CL/sycl/detail/pi.h @@ -515,7 +515,6 @@ typedef enum { using pi_mem_flags = pi_bitfield; // Access constexpr pi_mem_flags PI_MEM_FLAGS_ACCESS_RW = CL_MEM_READ_WRITE; -constexpr pi_mem_flags PI_MEM_FLAGS_ACCESS_RO = CL_MEM_READ_ONLY; // Host pointer constexpr pi_mem_flags PI_MEM_FLAGS_HOST_PTR_USE = CL_MEM_USE_HOST_PTR; constexpr pi_mem_flags PI_MEM_FLAGS_HOST_PTR_COPY = CL_MEM_COPY_HOST_PTR; diff --git a/sycl/source/detail/device_image_impl.hpp b/sycl/source/detail/device_image_impl.hpp index 9d330fd8b5127..4be9ec4dfdd34 100644 --- a/sycl/source/detail/device_image_impl.hpp +++ b/sycl/source/detail/device_image_impl.hpp @@ -175,7 +175,7 @@ class device_image_impl { const detail::plugin &Plugin = getSyclObjImpl(MContext)->getPlugin(); Plugin.call( detail::getSyclObjImpl(MContext)->getHandleRef(), - PI_MEM_FLAGS_ACCESS_RO | PI_MEM_FLAGS_HOST_PTR_USE, + PI_MEM_FLAGS_ACCESS_RW | PI_MEM_FLAGS_HOST_PTR_USE, MSpecConstsBlob.size(), MSpecConstsBlob.data(), &MSpecConstsBuffer, nullptr); } From 022503ae2530d8d83169c24e8073a49fa41bf227 Mon Sep 17 00:00:00 2001 From: Dmitry Vodopyanov Date: Sat, 1 May 2021 01:13:46 +0300 Subject: [PATCH 10/10] Replace piKernelSetArg with piextKernelSetArgMemObj to support Level Zero specific --- sycl/source/detail/scheduler/commands.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 74ef1c386bfca..5535e397b2006 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -1702,11 +1702,11 @@ pi_result ExecCGCommand::SetKernelParamsAndLaunch( if (DeviceImageImpl != nullptr) { RT::PiMem SpecConstsBuffer = DeviceImageImpl->get_spec_const_buffer_ref(); - Plugin.call( - Kernel, NextTrueIndex, sizeof(RT::PiMem), &SpecConstsBuffer); + Plugin.call(Kernel, NextTrueIndex, + &SpecConstsBuffer); } else { - Plugin.call(Kernel, NextTrueIndex, - sizeof(RT::PiMem), nullptr); + Plugin.call(Kernel, NextTrueIndex, + nullptr); } break; }