diff --git a/sycl/source/detail/device_image_impl.hpp b/sycl/source/detail/device_image_impl.hpp index 697404d27a5db..4be9ec4dfdd34 100644 --- a/sycl/source/detail/device_image_impl.hpp +++ b/sycl/source/detail/device_image_impl.hpp @@ -169,6 +169,19 @@ class device_image_impl { return MSpecConstsBlob; } + RT::PiMem &get_spec_const_buffer_ref() noexcept { + std::lock_guard Lock{MSpecConstAccessMtx}; + if (nullptr == MSpecConstsBuffer) { + const detail::plugin &Plugin = getSyclObjImpl(MContext)->getPlugin(); + Plugin.call( + detail::getSyclObjImpl(MContext)->getHandleRef(), + PI_MEM_FLAGS_ACCESS_RW | PI_MEM_FLAGS_HOST_PTR_USE, + MSpecConstsBlob.size(), MSpecConstsBlob.data(), &MSpecConstsBuffer, + nullptr); + } + return MSpecConstsBuffer; + } + const std::map> & get_spec_const_data_ref() const noexcept { return MSpecConstSymMap; @@ -262,6 +275,10 @@ class device_image_impl { // Binary blob which can have values of all specialization constants in the // image std::vector MSpecConstsBlob; + // Buffer containing binary blob which can have values of all specialization + // constants in the image, it is using for storing non-native specialization + // constants + RT::PiMem MSpecConstsBuffer = nullptr; // Contains map of spec const names to their descriptions + offsets in // the MSpecConstsBlob std::map> MSpecConstSymMap; diff --git a/sycl/source/detail/kernel_bundle_impl.hpp b/sycl/source/detail/kernel_bundle_impl.hpp index 4cf92c3e85b2a..6abc889b10b65 100644 --- a/sycl/source/detail/kernel_bundle_impl.hpp +++ b/sycl/source/detail/kernel_bundle_impl.hpp @@ -442,7 +442,11 @@ class kernel_bundle_impl { return SetInDevImg || MSpecConstValues.count(std::string{SpecName}) != 0; } - const device_image_plain *begin() const { return &MDeviceImages.front(); } + const device_image_plain *begin() const { + assert(!MDeviceImages.empty() && "MDeviceImages can't be empty"); + // UB in case MDeviceImages is empty + return &MDeviceImages.front(); + } const device_image_plain *end() const { return &MDeviceImages.back() + 1; } diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index c497c6a64905d..e0973cc3b1f16 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -1309,7 +1309,8 @@ void ProgramManager::bringSYCLDeviceImagesToState( break; } case bundle_state::executable: - // Device image is already in the desired state. + DevImage = build(DevImage, getSyclObjImpl(DevImage)->get_devices(), + /*PropList=*/{}); break; } break; diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 471c11e19e6b1..5535e397b2006 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -1636,8 +1636,9 @@ static void ReverseRangeDimensionsForKernel(NDRDescT &NDR) { } pi_result ExecCGCommand::SetKernelParamsAndLaunch( - CGExecKernel *ExecKernel, RT::PiKernel Kernel, NDRDescT &NDRDesc, - std::vector &RawEvents, RT::PiEvent &Event, + CGExecKernel *ExecKernel, + std::shared_ptr DeviceImageImpl, RT::PiKernel Kernel, + NDRDescT &NDRDesc, std::vector &RawEvents, RT::PiEvent &Event, ProgramManager::KernelArgMask EliminatedArgMask) { vector_class &Args = ExecKernel->MArgs; // TODO this is not necessary as long as we can guarantee that the arguments @@ -1692,9 +1693,21 @@ pi_result ExecCGCommand::SetKernelParamsAndLaunch( break; } case kernel_param_kind_t::kind_specialization_constants_buffer: { - throw cl::sycl::feature_not_supported( - "SYCL2020 specialization constants are not yet fully supported", - PI_INVALID_OPERATION); + if (MQueue->is_host()) { + throw cl::sycl::feature_not_supported( + "SYCL2020 specialization constants are not yet supported on host " + "device", + PI_INVALID_OPERATION); + } + if (DeviceImageImpl != nullptr) { + RT::PiMem SpecConstsBuffer = + DeviceImageImpl->get_spec_const_buffer_ref(); + Plugin.call(Kernel, NextTrueIndex, + &SpecConstsBuffer); + } else { + Plugin.call(Kernel, NextTrueIndex, + nullptr); + } break; } } @@ -1916,6 +1929,8 @@ cl_int ExecCGCommand::enqueueImp() { bool KnownProgram = true; std::shared_ptr SyclKernelImpl; + std::shared_ptr DeviceImageImpl; + // Use kernel_bundle is available if (KernelBundleImplPtr) { @@ -1929,9 +1944,7 @@ cl_int ExecCGCommand::enqueueImp() { SyclKernelImpl = detail::getSyclObjImpl(SyclKernel); Kernel = SyclKernelImpl->getHandleRef(); - - std::shared_ptr DeviceImageImpl = - SyclKernelImpl->getDeviceImage(); + DeviceImageImpl = SyclKernelImpl->getDeviceImage(); Program = DeviceImageImpl->get_program_ref(); @@ -1979,11 +1992,13 @@ cl_int ExecCGCommand::enqueueImp() { if (KernelMutex != nullptr) { // For cacheable kernels, we use per-kernel mutex std::lock_guard Lock(*KernelMutex); - Error = SetKernelParamsAndLaunch(ExecKernel, Kernel, NDRDesc, RawEvents, - Event, EliminatedArgMask); + Error = + SetKernelParamsAndLaunch(ExecKernel, DeviceImageImpl, Kernel, NDRDesc, + RawEvents, Event, EliminatedArgMask); } else { - Error = SetKernelParamsAndLaunch(ExecKernel, Kernel, NDRDesc, RawEvents, - Event, EliminatedArgMask); + Error = + SetKernelParamsAndLaunch(ExecKernel, DeviceImageImpl, Kernel, NDRDesc, + RawEvents, Event, EliminatedArgMask); } if (PI_SUCCESS != Error) { diff --git a/sycl/source/detail/scheduler/commands.hpp b/sycl/source/detail/scheduler/commands.hpp index 8713096be9127..677cdc0cfcceb 100644 --- a/sycl/source/detail/scheduler/commands.hpp +++ b/sycl/source/detail/scheduler/commands.hpp @@ -519,9 +519,10 @@ class ExecCGCommand : public Command { AllocaCommandBase *getAllocaForReq(Requirement *Req); pi_result SetKernelParamsAndLaunch( - CGExecKernel *ExecKernel, RT::PiKernel Kernel, NDRDescT &NDRDesc, - std::vector &RawEvents, RT::PiEvent &Event, - ProgramManager::KernelArgMask EliminatedArgMask); + CGExecKernel *ExecKernel, + std::shared_ptr DeviceImageImpl, RT::PiKernel Kernel, + NDRDescT &NDRDesc, std::vector &RawEvents, + RT::PiEvent &Event, ProgramManager::KernelArgMask EliminatedArgMask); std::unique_ptr MCommandGroup; diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index 81c1bc6846fe2..c7c1f6c06ab2c 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -57,6 +57,10 @@ handler::getOrInsertHandlerKernelBundle(bool Insert) const { if (!KernelBundleImpPtr && Insert) { KernelBundleImpPtr = detail::getSyclObjImpl( get_kernel_bundle(MQueue->get_context())); + if (KernelBundleImpPtr->empty()) { + KernelBundleImpPtr = detail::getSyclObjImpl( + get_kernel_bundle(MQueue->get_context())); + } detail::ExtendedMemberT EMember = { detail::ExtendedMembersType::HANDLER_KERNEL_BUNDLE, KernelBundleImpPtr}; @@ -340,9 +344,9 @@ void handler::processArg(void *Ptr, const detail::kernel_param_kind_t &Kind, break; } case kernel_param_kind_t::kind_specialization_constants_buffer: { - throw cl::sycl::feature_not_supported( - "SYCL2020 specialization constants are not yet fully supported", - PI_INVALID_OPERATION); + MArgs.emplace_back( + kernel_param_kind_t::kind_specialization_constants_buffer, Ptr, Size, + Index + IndexShift); break; } } diff --git a/sycl/test/on-device/basic_tests/specialization_constants/kernel_lambda_with_kernel_handler_arg.cpp b/sycl/test/on-device/basic_tests/specialization_constants/kernel_lambda_with_kernel_handler_arg.cpp index 398561d795208..d249d4396a00e 100644 --- a/sycl/test/on-device/basic_tests/specialization_constants/kernel_lambda_with_kernel_handler_arg.cpp +++ b/sycl/test/on-device/basic_tests/specialization_constants/kernel_lambda_with_kernel_handler_arg.cpp @@ -5,10 +5,6 @@ // and parallel_for_work_group to verify that this code compiles and runs // correctly with user's lambda with and without sycl::kernel_handler argument -// TODO: enable cuda support when non-native spec constants started to be -// supported -// UNSUPPORTED: cuda - #include int main() { diff --git a/sycl/test/on-device/basic_tests/specialization_constants/non_native/Inputs/common.cpp b/sycl/test/on-device/basic_tests/specialization_constants/non_native/Inputs/common.cpp new file mode 100644 index 0000000000000..94bf445f1119d --- /dev/null +++ b/sycl/test/on-device/basic_tests/specialization_constants/non_native/Inputs/common.cpp @@ -0,0 +1,54 @@ +#include + +#include + +class Kernel1Name; +class Kernel2Name; + +struct TestStruct { + int a; + int b; +}; + +const static sycl::specialization_id SpecConst1{42}; +const static sycl::specialization_id SpecConst2{42}; +const static sycl::specialization_id SpecConst3{TestStruct{42, 42}}; +const static sycl::specialization_id SpecConst4{42}; + +int main() { + sycl::queue Q; + + // No support for host device so far + if (Q.is_host()) + return 0; + + { + sycl::buffer Buf{sycl::range{1}}; + Q.submit([&](sycl::handler &CGH) { + CGH.set_specialization_constant(1); + auto Acc = Buf.get_access(CGH); + CGH.single_task([=](sycl::kernel_handler KH) { + Acc[0] = KH.get_specialization_constant(); + }); + }); + auto Acc = Buf.get_access(); + assert(Acc[0] == 1); + } + + { + sycl::buffer Buf{sycl::range{1}}; + Q.submit([&](sycl::handler &CGH) { + auto Acc = Buf.get_access(CGH); + CGH.set_specialization_constant(TestStruct{1, 2}); + const auto SC = CGH.get_specialization_constant(); + assert(SC == 42); + CGH.single_task([=](sycl::kernel_handler KH) { + Acc[0] = KH.get_specialization_constant(); + }); + }); + auto Acc = Buf.get_access(); + assert(Acc[0].a == 1 && Acc[0].b == 2); + } + + return 0; +} diff --git a/sycl/test/on-device/basic_tests/specialization_constants/non_native/accelerator.cpp b/sycl/test/on-device/basic_tests/specialization_constants/non_native/accelerator.cpp new file mode 100644 index 0000000000000..c11958d10d07a --- /dev/null +++ b/sycl/test/on-device/basic_tests/specialization_constants/non_native/accelerator.cpp @@ -0,0 +1,7 @@ +// REQUIRES: aoc, accelerator + +// RUN: %clangxx -fsycl -fsycl-targets=spir64_fpga-unknown-unknown-sycldevice %S/Inputs/common.cpp -o %t.out +// RUN: %ACC_RUN_PLACEHOLDER %t.out + +// This test checks correctness of SYCL2020 non-native specialization constants +// on accelerator device diff --git a/sycl/test/on-device/basic_tests/specialization_constants/non_native/aot_w_kernel_handler_wo_spec_consts.cpp b/sycl/test/on-device/basic_tests/specialization_constants/non_native/aot_w_kernel_handler_wo_spec_consts.cpp new file mode 100644 index 0000000000000..4a52afbcbca16 --- /dev/null +++ b/sycl/test/on-device/basic_tests/specialization_constants/non_native/aot_w_kernel_handler_wo_spec_consts.cpp @@ -0,0 +1,34 @@ +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out + +// This test checks correctness of compiling and running of application with +// kernel lambdas containing kernel_handler arguments and w/o usage of +// specialization constants in AOT mode + +#include + +int main() { + sycl::queue q; + + q.submit([&](sycl::handler &cgh) { + cgh.single_task( + [=](sycl::kernel_handler kh) {}); + }); + + q.submit([&](sycl::handler &cgh) { + cgh.parallel_for( + sycl::nd_range<3>(sycl::range<3>(4, 4, 4), sycl::range<3>(2, 2, 2)), + [=](sycl::nd_item<3> item, sycl::kernel_handler kh) {}); + }); + + // parallel_for_work_group with kernel_handler arg + q.submit([&](sycl::handler &cgh) { + cgh.parallel_for_work_group< + class KernelParallelForWorkGroupWithoutKernelHandler>( + sycl::range<3>(2, 2, 2), sycl::range<3>(2, 2, 2), + [=](sycl::group<3> myGroup, sycl::kernel_handler kh) { + myGroup.parallel_for_work_item([&](sycl::h_item<3> myItem) {}); + myGroup.parallel_for_work_item([&](sycl::h_item<3> myItem) {}); + }); + }); +} diff --git a/sycl/test/on-device/basic_tests/specialization_constants/non_native/cpu.cpp b/sycl/test/on-device/basic_tests/specialization_constants/non_native/cpu.cpp new file mode 100644 index 0000000000000..a2c91f1b58de3 --- /dev/null +++ b/sycl/test/on-device/basic_tests/specialization_constants/non_native/cpu.cpp @@ -0,0 +1,7 @@ +// REQUIRES: opencl-aot, cpu + +// RUN: %clangxx -fsycl -fsycl-targets=spir64_x86_64-unknown-unknown-sycldevice %S/Inputs/common.cpp -o %t.out +// RUN: %CPU_RUN_PLACEHOLDER %t.out + +// This test checks correctness of SYCL2020 non-native specialization constants +// on CPU device diff --git a/sycl/test/on-device/basic_tests/specialization_constants/non_native/cuda.cpp b/sycl/test/on-device/basic_tests/specialization_constants/non_native/cuda.cpp new file mode 100644 index 0000000000000..01d0d37e04f26 --- /dev/null +++ b/sycl/test/on-device/basic_tests/specialization_constants/non_native/cuda.cpp @@ -0,0 +1,10 @@ +// REQUIRES: cuda + +// RUN: %clangxx -fsycl -fsycl-targets=nvptx64-unknown-unknown-sycldevice %S/Inputs/common.cpp -o %t.out +// RUN: env SYCL_DEVICE_FILTER=cuda %t.out + +// TODO: enable this test then compile-time error in sycl-post-link is fixed +// UNSUPPORTED: cuda + +// This test checks correctness of SYCL2020 non-native specialization constants +// on CUDA device diff --git a/sycl/test/on-device/basic_tests/specialization_constants/non_native/gpu.cpp b/sycl/test/on-device/basic_tests/specialization_constants/non_native/gpu.cpp new file mode 100644 index 0000000000000..df9f0a7937dc3 --- /dev/null +++ b/sycl/test/on-device/basic_tests/specialization_constants/non_native/gpu.cpp @@ -0,0 +1,9 @@ +// REQUIRES: ocloc, gpu +// UNSUPPORTED: cuda +// CUDA is not compatible with SPIR. + +// RUN: %clangxx -fsycl -fsycl-targets=spir64_gen-unknown-unknown-sycldevice -Xsycl-target-backend=spir64_gen-unknown-unknown-sycldevice "-device *" %S/Inputs/common.cpp -o %t.out +// RUN: %GPU_RUN_PLACEHOLDER %t.out + +// This test checks correctness of SYCL2020 non-native specialization constants +// on GPU device