From bff864de12713a0b953cf0d2fdd68dd2d8e3693e Mon Sep 17 00:00:00 2001 From: Dmitry Vodopyanov Date: Fri, 19 Mar 2021 20:49:06 +0300 Subject: [PATCH 01/14] [SYCL] Add DPC++ RT support for SYCL 2020 spec constants (part 1) This patch adds partial implementation of specialization constants in DPC++ RT: 1. Implementation of `specialization_id` class 2. Implementation of `kernel_handler` class 3. Support for user's device lambdas which take `kernel_handler` argument --- sycl/include/CL/sycl.hpp | 2 + sycl/include/CL/sycl/detail/cg_types.hpp | 74 +++++++- sycl/include/CL/sycl/handler.hpp | 159 ++++++++++++++++-- sycl/include/CL/sycl/kernel_handler.hpp | 72 ++++++++ sycl/include/CL/sycl/specialization_id.hpp | 35 ++++ .../kernel_handler.cpp | 27 +++ 6 files changed, 350 insertions(+), 19 deletions(-) create mode 100644 sycl/include/CL/sycl/kernel_handler.hpp create mode 100644 sycl/include/CL/sycl/specialization_id.hpp create mode 100644 sycl/test/on-device/basic_tests/specialization_constants/kernel_handler.cpp diff --git a/sycl/include/CL/sycl.hpp b/sycl/include/CL/sycl.hpp index 954686895a57d..e96952a9d9ac5 100644 --- a/sycl/include/CL/sycl.hpp +++ b/sycl/include/CL/sycl.hpp @@ -34,6 +34,7 @@ #include #include #include +#include #include #include #include @@ -47,6 +48,7 @@ #include #include #include +#include #include #include #include diff --git a/sycl/include/CL/sycl/detail/cg_types.hpp b/sycl/include/CL/sycl/detail/cg_types.hpp index f8acc96550fda..c306b4d758bd6 100644 --- a/sycl/include/CL/sycl/detail/cg_types.hpp +++ b/sycl/include/CL/sycl/detail/cg_types.hpp @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -122,6 +123,32 @@ class NDRDescT { size_t Dims; }; +template +struct KernelArgsInfo : KernelArgsInfo {}; + +template +struct KernelArgsInfo { + constexpr static size_t argsCount() { return sizeof...(Args); }; + constexpr static bool hasArgs() { return sizeof...(Args) > 0; }; + template struct ArgType { + typedef typename std::tuple_element>::type type; + }; +}; + +template +std::enable_if_t::hasArgs(), + bool> constexpr kernelHandlerIsLastElementTypeOfKernel() { + return std::is_same::template ArgType< + KernelArgsInfo::argsCount() - 1>::type, + kernel_handler>::value; +} + +template +std::enable_if_t::hasArgs(), + bool> constexpr kernelHandlerIsLastElementTypeOfKernel() { + return false; +} + // The pure virtual class aimed to store lambda/functors of any type. class HostKernelBase { public: @@ -197,7 +224,7 @@ class HostKernel : public HostKernelBase { template typename detail::enable_if_t::value> runOnHost(const NDRDescT &) { - MKernel(); + runKernelWithoutArg(); } template @@ -228,7 +255,8 @@ class HostKernel : public HostKernelBase { store_id(&ID); store_item(&Item); } - MKernel(ID); + runKernelWithArg &, + decltype(MKernel)>(ID); }); } @@ -253,7 +281,8 @@ class HostKernel : public HostKernelBase { store_id(&ID); store_item(&ItemWithOffset); } - MKernel(Item); + runKernelWithArg, decltype(MKernel)>( + Item); }); } @@ -286,7 +315,9 @@ class HostKernel : public HostKernelBase { store_id(&ID); store_item(&Item); } - MKernel(Item); + runKernelWithArg< + sycl::item, + decltype(MKernel)>(Item); }); } @@ -336,7 +367,7 @@ class HostKernel : public HostKernelBase { auto g = NDItem.get_group(); store_group(&g); } - MKernel(NDItem); + runKernelWithArg, decltype(MKernel)>(NDItem); }); }); } @@ -364,11 +395,42 @@ class HostKernel : public HostKernelBase { detail::NDLoop::iterate(NGroups, [&](const id &GroupID) { sycl::group Group = IDBuilder::createGroup(GlobalSize, LocalSize, NGroups, GroupID); - MKernel(Group); + runKernelWithArg, decltype(MKernel)>(Group); }); } ~HostKernel() = default; + +private: + template + std::enable_if_t(), + void> + runKernelWithoutArg() { + kernel_handler KH; + MKernel(KH); + } + + template + std::enable_if_t(), + void> + runKernelWithoutArg() { + MKernel(); + } + + template + std::enable_if_t(), + void> + runKernelWithArg(ArgType Arg) { + kernel_handler KH; + MKernel(Arg, KH); + } + + template + std::enable_if_t(), + void> + runKernelWithArg(ArgType Arg) { + MKernel(Arg); + } }; } // namespace detail diff --git a/sycl/include/CL/sycl/handler.hpp b/sycl/include/CL/sycl/handler.hpp index 7c370fb8c2816..6bad0a3cba74f 100644 --- a/sycl/include/CL/sycl/handler.hpp +++ b/sycl/include/CL/sycl/handler.hpp @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -514,6 +515,12 @@ class __SYCL_EXPORT handler { template void StoreLambda(KernelType KernelFunc) { + if (detail::kernelHandlerIsLastElementTypeOfKernel() && + MIsHost) { + throw cl::sycl::feature_not_supported( + "kernel_handler is not supported by host device.", + PI_INVALID_OPERATION); + } MHostKernel.reset( new detail::HostKernel( KernelFunc)); @@ -810,7 +817,13 @@ class __SYCL_EXPORT handler { if (Arg[0] >= NumWorkItems[0]) return; Arg.set_allowed_range(NumWorkItems); - KernelFunc(Arg); + if constexpr (detail::kernelHandlerIsLastElementTypeOfKernel< + KernelType>()) { + kernel_handler KH; + KernelFunc(Arg, KH); + } else { + KernelFunc(Arg); + } }; range AdjustedRange = NumWorkItems; @@ -830,7 +843,13 @@ class __SYCL_EXPORT handler { { #ifdef __SYCL_DEVICE_ONLY__ (void)NumWorkItems; - kernel_parallel_for(KernelFunc); + if constexpr (detail::kernelHandlerIsLastElementTypeOfKernel< + KernelType>()) { + kernel_handler KH; + kernel_parallel_for(KernelFunc, KH); + } else { + kernel_parallel_for(KernelFunc); + } #else detail::checkValueRange(NumWorkItems); MNDRDesc.set(std::move(NumWorkItems)); @@ -874,6 +893,18 @@ class __SYCL_EXPORT handler { KernelFunc(); } + // NOTE: the name of this function - "kernel_single_task" - is used by the + // Front End to determine kernel invocation kind. + template + __attribute__((sycl_kernel)) void +#ifdef __SYCL_NONCONST_FUNCTOR__ + kernel_single_task(KernelType KernelFunc, kernel_handler KH) { +#else + kernel_single_task(const KernelType &KernelFunc, kernel_handler KH) { +#endif + KernelFunc(KH); + } + // NOTE: the name of these functions - "kernel_parallel_for" - are used by the // Front End to determine kernel invocation kind. template @@ -886,6 +917,18 @@ class __SYCL_EXPORT handler { KernelFunc(detail::Builder::getElement(detail::declptr())); } + // NOTE: the name of these functions - "kernel_parallel_for" - are used by the + // Front End to determine kernel invocation kind. + template + __attribute__((sycl_kernel)) void +#ifdef __SYCL_NONCONST_FUNCTOR__ + kernel_parallel_for(KernelType KernelFunc, kernel_handler KH) { +#else + kernel_parallel_for(const KernelType &KernelFunc, kernel_handler KH) { +#endif + KernelFunc(detail::Builder::getElement(detail::declptr()), KH); + } + // NOTE: the name of this function - "kernel_parallel_for_work_group" - is // used by the Front End to determine kernel invocation kind. template @@ -898,6 +941,18 @@ class __SYCL_EXPORT handler { KernelFunc(detail::Builder::getElement(detail::declptr())); } + // NOTE: the name of this function - "kernel_parallel_for_work_group" - is + // used by the Front End to determine kernel invocation kind. + template + __attribute__((sycl_kernel)) void +#ifdef __SYCL_NONCONST_FUNCTOR__ + kernel_parallel_for_work_group(KernelType KernelFunc, kernel_handler KH) { +#else + kernel_parallel_for_work_group(const KernelType &KernelFunc, + kernel_handler KH) { +#endif + KernelFunc(detail::Builder::getElement(detail::declptr()), KH); + } #endif public: @@ -1006,7 +1061,13 @@ class __SYCL_EXPORT handler { using NameT = typename detail::get_kernel_name_t::name; #ifdef __SYCL_DEVICE_ONLY__ - kernel_single_task(KernelFunc); + if constexpr (detail::kernelHandlerIsLastElementTypeOfKernel< + KernelType>()) { + kernel_handler KH; + kernel_single_task(KernelFunc, KH); + } else { + kernel_single_task(KernelFunc); + } #else // No need to check if range is out of INT_MAX limits as it's compile-time // known constant. @@ -1120,7 +1181,13 @@ class __SYCL_EXPORT handler { #ifdef __SYCL_DEVICE_ONLY__ (void)NumWorkItems; (void)WorkItemOffset; - kernel_parallel_for(KernelFunc); + if constexpr (detail::kernelHandlerIsLastElementTypeOfKernel< + KernelType>()) { + kernel_handler KH; + kernel_parallel_for(KernelFunc, KH); + } else { + kernel_parallel_for(KernelFunc); + } #else detail::checkValueRange(NumWorkItems, WorkItemOffset); MNDRDesc.set(std::move(NumWorkItems), std::move(WorkItemOffset)); @@ -1152,7 +1219,13 @@ class __SYCL_EXPORT handler { sycl::detail::lambda_arg_type>; #ifdef __SYCL_DEVICE_ONLY__ (void)ExecutionRange; - kernel_parallel_for(KernelFunc); + if constexpr (detail::kernelHandlerIsLastElementTypeOfKernel< + KernelType>()) { + kernel_handler KH; + kernel_parallel_for(KernelFunc, KH); + } else { + kernel_parallel_for(KernelFunc); + } #else detail::checkValueRange(ExecutionRange); MNDRDesc.set(std::move(ExecutionRange)); @@ -1373,7 +1446,13 @@ class __SYCL_EXPORT handler { sycl::detail::lambda_arg_type>; #ifdef __SYCL_DEVICE_ONLY__ (void)NumWorkGroups; - kernel_parallel_for_work_group(KernelFunc); + if constexpr (detail::kernelHandlerIsLastElementTypeOfKernel< + KernelType>()) { + kernel_handler KH; + kernel_parallel_for_work_group(KernelFunc, KH); + } else { + kernel_parallel_for_work_group(KernelFunc); + } #else detail::checkValueRange(NumWorkGroups); MNDRDesc.setNumWorkGroups(NumWorkGroups); @@ -1407,7 +1486,13 @@ class __SYCL_EXPORT handler { #ifdef __SYCL_DEVICE_ONLY__ (void)NumWorkGroups; (void)WorkGroupSize; - kernel_parallel_for_work_group(KernelFunc); + if constexpr (detail::kernelHandlerIsLastElementTypeOfKernel< + KernelType>()) { + kernel_handler KH; + kernel_parallel_for_work_group(KernelFunc, KH); + } else { + kernel_parallel_for_work_group(KernelFunc); + } #else nd_range ExecRange = nd_range(NumWorkGroups * WorkGroupSize, WorkGroupSize); @@ -1501,8 +1586,20 @@ class __SYCL_EXPORT handler { typename detail::get_kernel_name_t::name; #ifdef __SYCL_DEVICE_ONLY__ (void)Kernel; - kernel_single_task(KernelFunc); + if constexpr (detail::kernelHandlerIsLastElementTypeOfKernel< + KernelType>()) { + kernel_handler KH; + kernel_single_task(KernelFunc, KH); + } else { + kernel_single_task(KernelFunc); + } #else + // if (detail::kernelHandlerIsLastElementTypeOfKernel() && + // MQueue->get_device().is_host()) { + // throw cl::sycl::feature_not_supported( + // "kernel_handler is not supported by host device.", + // PI_INVALID_OPERATION); + // } // No need to check if range is out of INT_MAX limits as it's compile-time // known constant MNDRDesc.set(range<1>{1}); @@ -1543,7 +1640,13 @@ class __SYCL_EXPORT handler { #ifdef __SYCL_DEVICE_ONLY__ (void)Kernel; (void)NumWorkItems; - kernel_parallel_for(KernelFunc); + if constexpr (detail::kernelHandlerIsLastElementTypeOfKernel< + KernelType>()) { + kernel_handler KH; + kernel_parallel_for(KernelFunc, KH); + } else { + kernel_parallel_for(KernelFunc); + } #else detail::checkValueRange(NumWorkItems); MNDRDesc.set(std::move(NumWorkItems)); @@ -1579,7 +1682,13 @@ class __SYCL_EXPORT handler { (void)Kernel; (void)NumWorkItems; (void)WorkItemOffset; - kernel_parallel_for(KernelFunc); + if constexpr (detail::kernelHandlerIsLastElementTypeOfKernel< + KernelType>()) { + kernel_handler KH; + kernel_parallel_for(KernelFunc, KH); + } else { + kernel_parallel_for(KernelFunc); + } #else detail::checkValueRange(NumWorkItems, WorkItemOffset); MNDRDesc.set(std::move(NumWorkItems), std::move(WorkItemOffset)); @@ -1615,8 +1724,20 @@ class __SYCL_EXPORT handler { #ifdef __SYCL_DEVICE_ONLY__ (void)Kernel; (void)NDRange; - kernel_parallel_for(KernelFunc); + if constexpr (detail::kernelHandlerIsLastElementTypeOfKernel< + KernelType>()) { + kernel_handler KH; + kernel_parallel_for(KernelFunc, KH); + } else { + kernel_parallel_for(KernelFunc); + } #else + // if (detail::kernelHandlerIsLastElementTypeOfKernel() && + // MQueue->get_device().is_host()) { + // throw cl::sycl::feature_not_supported( + // "kernel_handler is not supported by host device.", + // PI_INVALID_OPERATION); + // } detail::checkValueRange(NDRange); MNDRDesc.set(std::move(NDRange)); MKernel = detail::getSyclObjImpl(std::move(Kernel)); @@ -1655,7 +1776,13 @@ class __SYCL_EXPORT handler { #ifdef __SYCL_DEVICE_ONLY__ (void)Kernel; (void)NumWorkGroups; - kernel_parallel_for_work_group(KernelFunc); + if constexpr (detail::kernelHandlerIsLastElementTypeOfKernel< + KernelType>()) { + kernel_handler KH; + kernel_parallel_for_work_group(KernelFunc, KH); + } else { + kernel_parallel_for_work_group(KernelFunc); + } #else detail::checkValueRange(NumWorkGroups); MNDRDesc.setNumWorkGroups(NumWorkGroups); @@ -1694,7 +1821,13 @@ class __SYCL_EXPORT handler { (void)Kernel; (void)NumWorkGroups; (void)WorkGroupSize; - kernel_parallel_for_work_group(KernelFunc); + if constexpr (detail::kernelHandlerIsLastElementTypeOfKernel< + KernelType>()) { + kernel_handler KH; + kernel_parallel_for_work_group(KernelFunc, KH); + } else { + kernel_parallel_for_work_group(KernelFunc); + } #else nd_range ExecRange = nd_range(NumWorkGroups * WorkGroupSize, WorkGroupSize); diff --git a/sycl/include/CL/sycl/kernel_handler.hpp b/sycl/include/CL/sycl/kernel_handler.hpp new file mode 100644 index 0000000000000..2de0986c13459 --- /dev/null +++ b/sycl/include/CL/sycl/kernel_handler.hpp @@ -0,0 +1,72 @@ +//==------ kernel_handler.hpp -- SYCL standard header file -----*- C++ -*---==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#pragma once + +__SYCL_INLINE_NAMESPACE(cl) { +namespace sycl { +namespace detail { + +// This guard is needed because the libsycl.so can compiled with C++ <=14 +// while the code requires C++17. This code is not supposed to be used by the +// libsycl.so so it should not be a problem. +#if __cplusplus > 201402L +template struct specialization_id_name_generator {}; +#endif + +} // namespace detail + +/// Reading the value of a specialization constant +/// +/// \ingroup sycl_api +class kernel_handler { +public: +#if __cplusplus > 201402L + template + typename std::remove_reference_t get_specialization_constant() { +#ifdef __SYCL_DEVICE_ONLY__ + return getSpecializationConstantOnDevice(); +#else + // throw cl::sycl::feature_not_supported( + // "kernel_handler::get_specialization_constant() is not supported by " + // "host device.", + // PI_INVALID_OPERATION); +#endif // __SYCL_DEVICE_ONLY__ + } +#endif // __cplusplus > 201402L + + void __init_specialization_constants_buffer( + char *_SpecializationConstantsBuffer = nullptr) { + SpecializationConstantsBuffer = _SpecializationConstantsBuffer; + } + +private: +#ifdef __SYCL_DEVICE_ONLY__ + template , + std::enable_if_t>> + T getSpecializationConstantOnDevice() { + const char *SymbolicID = __builtin_unique_stable_name( + detail::specialization_id_name_generator); + return __sycl_getScalar2020SpecConstantValue( + SymbolicID, &S, SpecializationConstantsBuffer); + } + template , + std::enable_if_t>> + T getSpecializationConstantOnDevice() { + const char *SymbolicID = __builtin_unique_stable_name( + detail::specialization_id_name_generator); + return __sycl_getComposite2020SpecConstantValue( + SymbolicID, &S, SpecializationConstantsBuffer); + } +#endif // __SYCL_DEVICE_ONLY__ + + char *SpecializationConstantsBuffer = nullptr; +}; + +} // namespace sycl +} // __SYCL_INLINE_NAMESPACE(cl) \ No newline at end of file diff --git a/sycl/include/CL/sycl/specialization_id.hpp b/sycl/include/CL/sycl/specialization_id.hpp new file mode 100644 index 0000000000000..52722af436d47 --- /dev/null +++ b/sycl/include/CL/sycl/specialization_id.hpp @@ -0,0 +1,35 @@ +//==---- specialization_id.hpp -- SYCL standard header file ----*- C++ -*---==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#pragma once + +__SYCL_INLINE_NAMESPACE(cl) { +namespace sycl { + +/// Declaring a specialization constant +/// +/// \ingroup sycl_api +template class specialization_id { +public: + using value_type = T; + + template + explicit constexpr specialization_id(Args &&... args) + : SpecializationConstantValue(args...) {} + + specialization_id(const specialization_id &rhs) = delete; + specialization_id(specialization_id &&rhs) = delete; + specialization_id &operator=(const specialization_id &rhs) = delete; + specialization_id &operator=(specialization_id &&rhs) = delete; + +private: + T SpecializationConstantValue; +}; + +} // namespace sycl +} // __SYCL_INLINE_NAMESPACE(cl) \ No newline at end of file diff --git a/sycl/test/on-device/basic_tests/specialization_constants/kernel_handler.cpp b/sycl/test/on-device/basic_tests/specialization_constants/kernel_handler.cpp new file mode 100644 index 0000000000000..59de7e2a160c3 --- /dev/null +++ b/sycl/test/on-device/basic_tests/specialization_constants/kernel_handler.cpp @@ -0,0 +1,27 @@ +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: %t.out + +#include + +int main() { + sycl::queue q; + + q.submit([&](sycl::handler &cgh) { + cgh.single_task([=](sycl::kernel_handler kh) {}); + }); + + q.submit([&](sycl::handler &cgh) { + cgh.parallel_for( + sycl::range<3>(3, 3, 3), + [](sycl::item<3> it, sycl::kernel_handler kh) {}); + }); + + q.submit([&](sycl::handler &cgh) { + cgh.parallel_for_work_group( + sycl::range<3>(2, 2, 2), sycl::range<3>(2, 2, 2), + [=](sycl::group<3> myGroup, sycl::kernel_handler kh) { + myGroup.parallel_for_work_item([&](sycl::h_item<3> myItem) {}); + myGroup.parallel_for_work_item([&](sycl::h_item<3> myItem) {}); + }); + }); +} \ No newline at end of file From 9fd54741b2e6196601d9d4954f45a6c309333f72 Mon Sep 17 00:00:00 2001 From: Dmitry Vodopyanov Date: Fri, 19 Mar 2021 21:04:23 +0300 Subject: [PATCH 02/14] Code style fixes --- sycl/include/CL/sycl/handler.hpp | 12 ------------ sycl/include/CL/sycl/kernel_handler.hpp | 10 +++++----- sycl/include/CL/sycl/specialization_id.hpp | 2 +- .../specialization_constants/kernel_handler.cpp | 2 +- 4 files changed, 7 insertions(+), 19 deletions(-) diff --git a/sycl/include/CL/sycl/handler.hpp b/sycl/include/CL/sycl/handler.hpp index 6bad0a3cba74f..484388aa84885 100644 --- a/sycl/include/CL/sycl/handler.hpp +++ b/sycl/include/CL/sycl/handler.hpp @@ -1594,12 +1594,6 @@ class __SYCL_EXPORT handler { kernel_single_task(KernelFunc); } #else - // if (detail::kernelHandlerIsLastElementTypeOfKernel() && - // MQueue->get_device().is_host()) { - // throw cl::sycl::feature_not_supported( - // "kernel_handler is not supported by host device.", - // PI_INVALID_OPERATION); - // } // No need to check if range is out of INT_MAX limits as it's compile-time // known constant MNDRDesc.set(range<1>{1}); @@ -1732,12 +1726,6 @@ class __SYCL_EXPORT handler { kernel_parallel_for(KernelFunc); } #else - // if (detail::kernelHandlerIsLastElementTypeOfKernel() && - // MQueue->get_device().is_host()) { - // throw cl::sycl::feature_not_supported( - // "kernel_handler is not supported by host device.", - // PI_INVALID_OPERATION); - // } detail::checkValueRange(NDRange); MNDRDesc.set(std::move(NDRange)); MKernel = detail::getSyclObjImpl(std::move(Kernel)); diff --git a/sycl/include/CL/sycl/kernel_handler.hpp b/sycl/include/CL/sycl/kernel_handler.hpp index 2de0986c13459..16119492dd6f6 100644 --- a/sycl/include/CL/sycl/kernel_handler.hpp +++ b/sycl/include/CL/sycl/kernel_handler.hpp @@ -32,10 +32,10 @@ class kernel_handler { #ifdef __SYCL_DEVICE_ONLY__ return getSpecializationConstantOnDevice(); #else - // throw cl::sycl::feature_not_supported( - // "kernel_handler::get_specialization_constant() is not supported by " - // "host device.", - // PI_INVALID_OPERATION); + throw cl::sycl::feature_not_supported( + "kernel_handler::get_specialization_constant() is not supported by " + "host device.", + PI_INVALID_OPERATION); #endif // __SYCL_DEVICE_ONLY__ } #endif // __cplusplus > 201402L @@ -69,4 +69,4 @@ class kernel_handler { }; } // namespace sycl -} // __SYCL_INLINE_NAMESPACE(cl) \ No newline at end of file +} // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/include/CL/sycl/specialization_id.hpp b/sycl/include/CL/sycl/specialization_id.hpp index 52722af436d47..62cb00736ffff 100644 --- a/sycl/include/CL/sycl/specialization_id.hpp +++ b/sycl/include/CL/sycl/specialization_id.hpp @@ -32,4 +32,4 @@ template class specialization_id { }; } // namespace sycl -} // __SYCL_INLINE_NAMESPACE(cl) \ No newline at end of file +} // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/test/on-device/basic_tests/specialization_constants/kernel_handler.cpp b/sycl/test/on-device/basic_tests/specialization_constants/kernel_handler.cpp index 59de7e2a160c3..998dd8b013a2e 100644 --- a/sycl/test/on-device/basic_tests/specialization_constants/kernel_handler.cpp +++ b/sycl/test/on-device/basic_tests/specialization_constants/kernel_handler.cpp @@ -24,4 +24,4 @@ int main() { myGroup.parallel_for_work_item([&](sycl::h_item<3> myItem) {}); }); }); -} \ No newline at end of file +} From da5af9e553981865443e7683675c5e5c800052f4 Mon Sep 17 00:00:00 2001 From: Dmitry Vodopyanov Date: Wed, 24 Mar 2021 02:47:58 +0300 Subject: [PATCH 03/14] Fix pre-commit --- sycl/include/CL/sycl/detail/cg_types.hpp | 56 +++---- sycl/include/CL/sycl/handler.hpp | 153 +++++++++++------- sycl/include/CL/sycl/id.hpp | 10 +- sycl/include/CL/sycl/item.hpp | 11 +- sycl/include/CL/sycl/kernel_handler.hpp | 12 +- sycl/include/CL/sycl/specialization_id.hpp | 4 +- .../function-pointers/fp-as-kernel-arg.cpp | 2 +- .../pass-fp-through-buffer.cpp | 2 +- .../kernel_handler.cpp | 4 + ...clude-gnu14.cpp => sycl-include-gnu17.cpp} | 2 +- 10 files changed, 155 insertions(+), 101 deletions(-) rename sycl/test/regression/{sycl-include-gnu14.cpp => sycl-include-gnu17.cpp} (74%) diff --git a/sycl/include/CL/sycl/detail/cg_types.hpp b/sycl/include/CL/sycl/detail/cg_types.hpp index c306b4d758bd6..2a51efe0dc831 100644 --- a/sycl/include/CL/sycl/detail/cg_types.hpp +++ b/sycl/include/CL/sycl/detail/cg_types.hpp @@ -123,30 +123,30 @@ class NDRDescT { size_t Dims; }; -template -struct KernelArgsInfo : KernelArgsInfo {}; - -template -struct KernelArgsInfo { - constexpr static size_t argsCount() { return sizeof...(Args); }; - constexpr static bool hasArgs() { return sizeof...(Args) > 0; }; - template struct ArgType { - typedef typename std::tuple_element>::type type; - }; +template struct check_fn_signature { + static_assert(std::integral_constant::value, + "Second template parameter is required to be of function type"); }; -template -std::enable_if_t::hasArgs(), - bool> constexpr kernelHandlerIsLastElementTypeOfKernel() { - return std::is_same::template ArgType< - KernelArgsInfo::argsCount() - 1>::type, - kernel_handler>::value; -} +template +struct check_fn_signature { +private: + template + static constexpr auto check(T *) -> typename std::is_same< + decltype(std::declval().operator()(std::declval()...)), + RetT>::type; + + template static constexpr std::false_type check(...); + + using type = decltype(check(0)); + +public: + static constexpr bool value = type::value; +}; -template -std::enable_if_t::hasArgs(), - bool> constexpr kernelHandlerIsLastElementTypeOfKernel() { - return false; +template +static constexpr bool check_kernel_arg_types() { + return check_fn_signature, void(Args...)>::value; } // The pure virtual class aimed to store lambda/functors of any type. @@ -402,8 +402,10 @@ class HostKernel : public HostKernelBase { ~HostKernel() = default; private: + // TODO: replace run* funcs below with "constexpr if" when DPC++ RT switched + // to C++17 template - std::enable_if_t(), + std::enable_if_t(), void> runKernelWithoutArg() { kernel_handler KH; @@ -411,23 +413,21 @@ class HostKernel : public HostKernelBase { } template - std::enable_if_t(), - void> + std::enable_if_t(), void> runKernelWithoutArg() { MKernel(); } template - std::enable_if_t(), - void> + std::enable_if_t< + detail::check_kernel_arg_types(), void> runKernelWithArg(ArgType Arg) { kernel_handler KH; MKernel(Arg, KH); } template - std::enable_if_t(), - void> + std::enable_if_t(), void> runKernelWithArg(ArgType Arg) { MKernel(Arg); } diff --git a/sycl/include/CL/sycl/handler.hpp b/sycl/include/CL/sycl/handler.hpp index 484388aa84885..0ffdf48ad8d64 100644 --- a/sycl/include/CL/sycl/handler.hpp +++ b/sycl/include/CL/sycl/handler.hpp @@ -116,27 +116,6 @@ template struct get_kernel_wrapper_name_t { typename get_kernel_name_t::name>; }; -template struct check_fn_signature { - static_assert(std::integral_constant::value, - "Second template parameter is required to be of function type"); -}; - -template -struct check_fn_signature { -private: - template - static constexpr auto check(T *) -> typename std::is_same< - decltype(std::declval().operator()(std::declval()...)), - RetT>::type; - - template static constexpr std::false_type check(...); - - using type = decltype(check(0)); - -public: - static constexpr bool value = type::value; -}; - __SYCL_EXPORT device getDeviceFromHandler(handler &); #if __SYCL_ID_QUERIES_FIT_IN_INT__ @@ -167,6 +146,66 @@ checkValueRangeImpl(ValT V) { } #endif +template +std::enable_if_t::value, bool> +checkKernelArgTypesIgnoreFirstVoid() { + return detail::check_kernel_arg_types(); +} + +template +std::enable_if_t::value, bool> +checkKernelArgTypesIgnoreFirstVoid() { + return detail::check_kernel_arg_types(); +} + +template +class RangeRoundedLambda; + +template +class RangeRoundedLambda< + TransformedArgType, KernelType, Dims, + typename std::enable_if_t()>> { +public: + RangeRoundedLambda(const KernelType &KernelFunc, range NumWorkItems) + : MKernelFunc(KernelFunc), MNumWorkItems(NumWorkItems) {} + + void operator()(TransformedArgType Arg) const { + if (Arg[0] >= MNumWorkItems[0]) + return; + Arg.set_allowed_range(MNumWorkItems); + kernel_handler KH; + MKernelFunc(Arg, KH); + } + +private: + const KernelType &MKernelFunc; + range MNumWorkItems; +}; + +template +class RangeRoundedLambda< + TransformedArgType, KernelType, Dims, + typename std::enable_if_t< + detail::check_kernel_arg_types()>> { +public: + RangeRoundedLambda(const KernelType &KernelFunc, range NumWorkItems) + : MKernelFunc(KernelFunc), MNumWorkItems(NumWorkItems) {} + + void operator()(TransformedArgType Arg) const { + if (Arg[0] >= MNumWorkItems[0]) + return; + Arg.set_allowed_range(MNumWorkItems); + MKernelFunc(Arg); + } + +private: + KernelType MKernelFunc; + range MNumWorkItems; +}; + template typename detail::enable_if_t>::value || std::is_same>::value> @@ -515,8 +554,11 @@ class __SYCL_EXPORT handler { template void StoreLambda(KernelType KernelFunc) { - if (detail::kernelHandlerIsLastElementTypeOfKernel() && - MIsHost) { + // TODO: replace detail::checkKernelArgTypesIgnoreFirstVoid with + // "constexpr if" when DPC++ RT switched to C++17 + auto ContainsKernelHandler = + detail::checkKernelArgTypesIgnoreFirstVoid(); + if (ContainsKernelHandler && MIsHost) { throw cl::sycl::feature_not_supported( "kernel_handler is not supported by host device.", PI_INVALID_OPERATION); @@ -813,18 +855,11 @@ class __SYCL_EXPORT handler { if (getenv("SYCL_PARALLEL_FOR_RANGE_ROUNDING_TRACE") != nullptr) std::cout << "parallel_for range adjusted from " << NumWorkItems[0] << " to " << NewValX << std::endl; - auto Wrapper = [=](TransformedArgType Arg) { - if (Arg[0] >= NumWorkItems[0]) - return; - Arg.set_allowed_range(NumWorkItems); - if constexpr (detail::kernelHandlerIsLastElementTypeOfKernel< - KernelType>()) { - kernel_handler KH; - KernelFunc(Arg, KH); - } else { - KernelFunc(Arg); - } - }; + + // TODO: replace detail::RangeRoundedLambda with + // "constexpr if" when DPC++ RT switched to C++17 + detail::RangeRoundedLambda Wrapper{ + KernelFunc, NumWorkItems}; range AdjustedRange = NumWorkItems; AdjustedRange.set_range_dim0(NewValX); @@ -843,8 +878,8 @@ class __SYCL_EXPORT handler { { #ifdef __SYCL_DEVICE_ONLY__ (void)NumWorkItems; - if constexpr (detail::kernelHandlerIsLastElementTypeOfKernel< - KernelType>()) { + if constexpr (detail::check_kernel_arg_types< + KernelType, TransformedArgType, kernel_handler>()) { kernel_handler KH; kernel_parallel_for(KernelFunc, KH); } else { @@ -1061,8 +1096,8 @@ class __SYCL_EXPORT handler { using NameT = typename detail::get_kernel_name_t::name; #ifdef __SYCL_DEVICE_ONLY__ - if constexpr (detail::kernelHandlerIsLastElementTypeOfKernel< - KernelType>()) { + if constexpr (detail::check_kernel_arg_types()) { kernel_handler KH; kernel_single_task(KernelFunc, KH); } else { @@ -1181,8 +1216,8 @@ class __SYCL_EXPORT handler { #ifdef __SYCL_DEVICE_ONLY__ (void)NumWorkItems; (void)WorkItemOffset; - if constexpr (detail::kernelHandlerIsLastElementTypeOfKernel< - KernelType>()) { + if constexpr (detail::check_kernel_arg_types()) { kernel_handler KH; kernel_parallel_for(KernelFunc, KH); } else { @@ -1219,8 +1254,8 @@ class __SYCL_EXPORT handler { sycl::detail::lambda_arg_type>; #ifdef __SYCL_DEVICE_ONLY__ (void)ExecutionRange; - if constexpr (detail::kernelHandlerIsLastElementTypeOfKernel< - KernelType>()) { + if constexpr (detail::check_kernel_arg_types()) { kernel_handler KH; kernel_parallel_for(KernelFunc, KH); } else { @@ -1446,8 +1481,8 @@ class __SYCL_EXPORT handler { sycl::detail::lambda_arg_type>; #ifdef __SYCL_DEVICE_ONLY__ (void)NumWorkGroups; - if constexpr (detail::kernelHandlerIsLastElementTypeOfKernel< - KernelType>()) { + if constexpr (detail::check_kernel_arg_types()) { kernel_handler KH; kernel_parallel_for_work_group(KernelFunc, KH); } else { @@ -1486,8 +1521,8 @@ class __SYCL_EXPORT handler { #ifdef __SYCL_DEVICE_ONLY__ (void)NumWorkGroups; (void)WorkGroupSize; - if constexpr (detail::kernelHandlerIsLastElementTypeOfKernel< - KernelType>()) { + if constexpr (detail::check_kernel_arg_types()) { kernel_handler KH; kernel_parallel_for_work_group(KernelFunc, KH); } else { @@ -1586,8 +1621,8 @@ class __SYCL_EXPORT handler { typename detail::get_kernel_name_t::name; #ifdef __SYCL_DEVICE_ONLY__ (void)Kernel; - if constexpr (detail::kernelHandlerIsLastElementTypeOfKernel< - KernelType>()) { + if constexpr (detail::check_kernel_arg_types()) { kernel_handler KH; kernel_single_task(KernelFunc, KH); } else { @@ -1634,8 +1669,8 @@ class __SYCL_EXPORT handler { #ifdef __SYCL_DEVICE_ONLY__ (void)Kernel; (void)NumWorkItems; - if constexpr (detail::kernelHandlerIsLastElementTypeOfKernel< - KernelType>()) { + if constexpr (detail::check_kernel_arg_types()) { kernel_handler KH; kernel_parallel_for(KernelFunc, KH); } else { @@ -1676,8 +1711,8 @@ class __SYCL_EXPORT handler { (void)Kernel; (void)NumWorkItems; (void)WorkItemOffset; - if constexpr (detail::kernelHandlerIsLastElementTypeOfKernel< - KernelType>()) { + if constexpr (detail::check_kernel_arg_types()) { kernel_handler KH; kernel_parallel_for(KernelFunc, KH); } else { @@ -1718,8 +1753,8 @@ class __SYCL_EXPORT handler { #ifdef __SYCL_DEVICE_ONLY__ (void)Kernel; (void)NDRange; - if constexpr (detail::kernelHandlerIsLastElementTypeOfKernel< - KernelType>()) { + if constexpr (detail::check_kernel_arg_types()) { kernel_handler KH; kernel_parallel_for(KernelFunc, KH); } else { @@ -1764,8 +1799,8 @@ class __SYCL_EXPORT handler { #ifdef __SYCL_DEVICE_ONLY__ (void)Kernel; (void)NumWorkGroups; - if constexpr (detail::kernelHandlerIsLastElementTypeOfKernel< - KernelType>()) { + if constexpr (detail::check_kernel_arg_types()) { kernel_handler KH; kernel_parallel_for_work_group(KernelFunc, KH); } else { @@ -1809,8 +1844,8 @@ class __SYCL_EXPORT handler { (void)Kernel; (void)NumWorkGroups; (void)WorkGroupSize; - if constexpr (detail::kernelHandlerIsLastElementTypeOfKernel< - KernelType>()) { + if constexpr (detail::check_kernel_arg_types()) { kernel_handler KH; kernel_parallel_for_work_group(KernelFunc, KH); } else { diff --git a/sycl/include/CL/sycl/id.hpp b/sycl/include/CL/sycl/id.hpp index 151657aa661e8..8ea62a368c7c1 100644 --- a/sycl/include/CL/sycl/id.hpp +++ b/sycl/include/CL/sycl/id.hpp @@ -19,6 +19,12 @@ namespace sycl { template class range; template class item; +namespace detail { +template +class RangeRoundedLambda; +} // namespace detail + /// A unique identifier of an item in an index space. /// /// \ingroup sycl_api @@ -241,7 +247,9 @@ template class id : public detail::array { #undef __SYCL_GEN_OPT private: - friend class handler; + template + friend class detail::RangeRoundedLambda; void set_allowed_range(range rnwi) { (void)rnwi[0]; } }; diff --git a/sycl/include/CL/sycl/item.hpp b/sycl/include/CL/sycl/item.hpp index a8aa9c8ef09f5..3e58f3ec9cd3b 100644 --- a/sycl/include/CL/sycl/item.hpp +++ b/sycl/include/CL/sycl/item.hpp @@ -21,7 +21,11 @@ __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { namespace detail { class Builder; -} + +template +class RangeRoundedLambda; +} // namespace detail template class id; template class range; @@ -118,7 +122,10 @@ template class item { friend class detail::Builder; private: - friend class handler; + template + friend class detail::RangeRoundedLambda; + void set_allowed_range(const range rnwi) { MImpl.MExtent = rnwi; } detail::ItemBase MImpl; diff --git a/sycl/include/CL/sycl/kernel_handler.hpp b/sycl/include/CL/sycl/kernel_handler.hpp index 16119492dd6f6..80f46ae5a8445 100644 --- a/sycl/include/CL/sycl/kernel_handler.hpp +++ b/sycl/include/CL/sycl/kernel_handler.hpp @@ -40,12 +40,12 @@ class kernel_handler { } #endif // __cplusplus > 201402L +private: void __init_specialization_constants_buffer( - char *_SpecializationConstantsBuffer = nullptr) { - SpecializationConstantsBuffer = _SpecializationConstantsBuffer; + char *SpecializationConstantsBuffer = nullptr) { + MSpecializationConstantsBuffer = SpecializationConstantsBuffer; } -private: #ifdef __SYCL_DEVICE_ONLY__ template , std::enable_if_t>> @@ -53,7 +53,7 @@ class kernel_handler { const char *SymbolicID = __builtin_unique_stable_name( detail::specialization_id_name_generator); return __sycl_getScalar2020SpecConstantValue( - SymbolicID, &S, SpecializationConstantsBuffer); + SymbolicID, &S, MSpecializationConstantsBuffer); } template , std::enable_if_t>> @@ -61,11 +61,11 @@ class kernel_handler { const char *SymbolicID = __builtin_unique_stable_name( detail::specialization_id_name_generator); return __sycl_getComposite2020SpecConstantValue( - SymbolicID, &S, SpecializationConstantsBuffer); + SymbolicID, &S, MSpecializationConstantsBuffer); } #endif // __SYCL_DEVICE_ONLY__ - char *SpecializationConstantsBuffer = nullptr; + char *MSpecializationConstantsBuffer = nullptr; }; } // namespace sycl diff --git a/sycl/include/CL/sycl/specialization_id.hpp b/sycl/include/CL/sycl/specialization_id.hpp index 62cb00736ffff..b59865c2b94da 100644 --- a/sycl/include/CL/sycl/specialization_id.hpp +++ b/sycl/include/CL/sycl/specialization_id.hpp @@ -20,7 +20,7 @@ template class specialization_id { template explicit constexpr specialization_id(Args &&... args) - : SpecializationConstantValue(args...) {} + : MSpecializationConstantValue(args...) {} specialization_id(const specialization_id &rhs) = delete; specialization_id(specialization_id &&rhs) = delete; @@ -28,7 +28,7 @@ template class specialization_id { specialization_id &operator=(specialization_id &&rhs) = delete; private: - T SpecializationConstantValue; + T MSpecializationConstantValue; }; } // namespace sycl diff --git a/sycl/test/function-pointers/fp-as-kernel-arg.cpp b/sycl/test/function-pointers/fp-as-kernel-arg.cpp index 5973c2b419bc8..65b6d0c6be988 100644 --- a/sycl/test/function-pointers/fp-as-kernel-arg.cpp +++ b/sycl/test/function-pointers/fp-as-kernel-arg.cpp @@ -1,4 +1,4 @@ -// RUN: %clangxx -Xclang -fsycl-allow-func-ptr -std=c++14 -fsycl %s -o %t.out +// RUN: %clangxx -Xclang -fsycl-allow-func-ptr -fsycl %s -o %t.out // RUN: %RUN_ON_HOST %t.out // FIXME: This test should use runtime early exit once correct check for // corresponding extension is implemented diff --git a/sycl/test/function-pointers/pass-fp-through-buffer.cpp b/sycl/test/function-pointers/pass-fp-through-buffer.cpp index 60ca4cd5f4c71..aef94716ecd85 100644 --- a/sycl/test/function-pointers/pass-fp-through-buffer.cpp +++ b/sycl/test/function-pointers/pass-fp-through-buffer.cpp @@ -1,4 +1,4 @@ -// RUN: %clangxx -Xclang -fsycl-allow-func-ptr -std=c++14 -fsycl %s -o %t.out +// RUN: %clangxx -Xclang -fsycl-allow-func-ptr -fsycl %s -o %t.out // RUN: %RUN_ON_HOST %t.out // FIXME: This test should use runtime early exit once correct check for // corresponding extension is implemented diff --git a/sycl/test/on-device/basic_tests/specialization_constants/kernel_handler.cpp b/sycl/test/on-device/basic_tests/specialization_constants/kernel_handler.cpp index 998dd8b013a2e..7576f630d6f88 100644 --- a/sycl/test/on-device/basic_tests/specialization_constants/kernel_handler.cpp +++ b/sycl/test/on-device/basic_tests/specialization_constants/kernel_handler.cpp @@ -16,6 +16,10 @@ int main() { [](sycl::item<3> it, sycl::kernel_handler kh) {}); }); + q.submit([&](sycl::handler &cgh) { + cgh.parallel_for(sycl::range<3>(3, 3, 3), [](auto it) {}); + }); + q.submit([&](sycl::handler &cgh) { cgh.parallel_for_work_group( sycl::range<3>(2, 2, 2), sycl::range<3>(2, 2, 2), diff --git a/sycl/test/regression/sycl-include-gnu14.cpp b/sycl/test/regression/sycl-include-gnu17.cpp similarity index 74% rename from sycl/test/regression/sycl-include-gnu14.cpp rename to sycl/test/regression/sycl-include-gnu17.cpp index 0147acf143de5..f56d262c1f0bd 100644 --- a/sycl/test/regression/sycl-include-gnu14.cpp +++ b/sycl/test/regression/sycl-include-gnu17.cpp @@ -1,4 +1,4 @@ -// RUN: %clangxx -std=gnu++14 -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: %clangxx -std=gnu++17 -fsycl -fsycl-targets=%sycl_triple %s -o %t.out // RUN: %RUN_ON_HOST %t.out // UNSUPPORTED: system-windows From 4f2a03c541c530a5ef8dd664de345531c582d5ac Mon Sep 17 00:00:00 2001 From: Dmitry Vodopyanov Date: Wed, 24 Mar 2021 03:19:11 +0300 Subject: [PATCH 04/14] Minor changes --- sycl/include/CL/sycl/handler.hpp | 6 +++--- .../basic_tests/specialization_constants/kernel_handler.cpp | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/sycl/include/CL/sycl/handler.hpp b/sycl/include/CL/sycl/handler.hpp index 0ffdf48ad8d64..1adba21df282a 100644 --- a/sycl/include/CL/sycl/handler.hpp +++ b/sycl/include/CL/sycl/handler.hpp @@ -169,7 +169,7 @@ class RangeRoundedLambda< typename std::enable_if_t()>> { public: - RangeRoundedLambda(const KernelType &KernelFunc, range NumWorkItems) + RangeRoundedLambda(KernelType KernelFunc, range NumWorkItems) : MKernelFunc(KernelFunc), MNumWorkItems(NumWorkItems) {} void operator()(TransformedArgType Arg) const { @@ -181,7 +181,7 @@ class RangeRoundedLambda< } private: - const KernelType &MKernelFunc; + KernelType MKernelFunc; range MNumWorkItems; }; @@ -191,7 +191,7 @@ class RangeRoundedLambda< typename std::enable_if_t< detail::check_kernel_arg_types()>> { public: - RangeRoundedLambda(const KernelType &KernelFunc, range NumWorkItems) + RangeRoundedLambda(KernelType KernelFunc, range NumWorkItems) : MKernelFunc(KernelFunc), MNumWorkItems(NumWorkItems) {} void operator()(TransformedArgType Arg) const { diff --git a/sycl/test/on-device/basic_tests/specialization_constants/kernel_handler.cpp b/sycl/test/on-device/basic_tests/specialization_constants/kernel_handler.cpp index 7576f630d6f88..86bb69a86706d 100644 --- a/sycl/test/on-device/basic_tests/specialization_constants/kernel_handler.cpp +++ b/sycl/test/on-device/basic_tests/specialization_constants/kernel_handler.cpp @@ -17,11 +17,11 @@ int main() { }); q.submit([&](sycl::handler &cgh) { - cgh.parallel_for(sycl::range<3>(3, 3, 3), [](auto it) {}); + cgh.parallel_for(sycl::range<3>(3, 3, 3), [](auto it) {}); }); q.submit([&](sycl::handler &cgh) { - cgh.parallel_for_work_group( + cgh.parallel_for_work_group( sycl::range<3>(2, 2, 2), sycl::range<3>(2, 2, 2), [=](sycl::group<3> myGroup, sycl::kernel_handler kh) { myGroup.parallel_for_work_item([&](sycl::h_item<3> myItem) {}); From 90dbbe330b0a3f77d516d6cdcae138dc5d4033d5 Mon Sep 17 00:00:00 2001 From: Dmitry Vodopyanov Date: Wed, 24 Mar 2021 16:28:09 +0300 Subject: [PATCH 05/14] Fix build failure on Windows --- sycl/include/CL/sycl/detail/cg_types.hpp | 125 +++++++++++------------ 1 file changed, 62 insertions(+), 63 deletions(-) diff --git a/sycl/include/CL/sycl/detail/cg_types.hpp b/sycl/include/CL/sycl/detail/cg_types.hpp index 2a51efe0dc831..931b07b49fac4 100644 --- a/sycl/include/CL/sycl/detail/cg_types.hpp +++ b/sycl/include/CL/sycl/detail/cg_types.hpp @@ -149,6 +149,37 @@ static constexpr bool check_kernel_arg_types() { return check_fn_signature, void(Args...)>::value; } +// TODO: replace run* funcs below with "constexpr if" when DPC++ RT switched +// to C++17 +template ()> * = nullptr> +static constexpr void runKernelWithoutArg(KernelType KernelName) { + kernel_handler KH; + KernelName(KH); +} + +template < + typename KernelType, + typename std::enable_if_t()> * = nullptr> +static constexpr void runKernelWithoutArg(KernelType KernelName) { + KernelName(); +} + +template ()> * = nullptr> +static constexpr void runKernelWithArg(KernelType KernelName, ArgType Arg) { + kernel_handler KH; + KernelName(Arg, KH); +} + +template ()> * = nullptr> +static constexpr void runKernelWithArg(KernelType KernelName, ArgType Arg) { + KernelName(Arg); +} + // The pure virtual class aimed to store lambda/functors of any type. class HostKernelBase { public: @@ -224,7 +255,7 @@ class HostKernel : public HostKernelBase { template typename detail::enable_if_t::value> runOnHost(const NDRDescT &) { - runKernelWithoutArg(); + runKernelWithoutArg(MKernel); } template @@ -245,19 +276,18 @@ class HostKernel : public HostKernelBase { UpperBound[I] = Range[I] + Offset[I]; } - detail::NDLoop::iterate(/*LowerBound=*/Offset, Stride, UpperBound, - [&](const sycl::id &ID) { - sycl::item Item = - IDBuilder::createItem( - Range, ID, Offset); - - if (StoreLocation) { - store_id(&ID); - store_item(&Item); - } - runKernelWithArg &, - decltype(MKernel)>(ID); - }); + detail::NDLoop::iterate( + /*LowerBound=*/Offset, Stride, UpperBound, + [&](const sycl::id &ID) { + sycl::item Item = + IDBuilder::createItem(Range, ID, Offset); + + if (StoreLocation) { + store_id(&ID); + store_item(&Item); + } + runKernelWithArg &>(MKernel, ID); + }); } template @@ -281,8 +311,8 @@ class HostKernel : public HostKernelBase { store_id(&ID); store_item(&ItemWithOffset); } - runKernelWithArg, decltype(MKernel)>( - Item); + runKernelWithArg>(MKernel, + Item); }); } @@ -305,20 +335,19 @@ class HostKernel : public HostKernelBase { UpperBound[I] = Range[I] + Offset[I]; } - detail::NDLoop::iterate(/*LowerBound=*/Offset, Stride, UpperBound, - [&](const sycl::id &ID) { - sycl::item Item = - IDBuilder::createItem( - Range, ID, Offset); - - if (StoreLocation) { - store_id(&ID); - store_item(&Item); - } - runKernelWithArg< - sycl::item, - decltype(MKernel)>(Item); - }); + detail::NDLoop::iterate( + /*LowerBound=*/Offset, Stride, UpperBound, + [&](const sycl::id &ID) { + sycl::item Item = + IDBuilder::createItem(Range, ID, Offset); + + if (StoreLocation) { + store_id(&ID); + store_item(&Item); + } + runKernelWithArg>( + MKernel, Item); + }); } template @@ -367,7 +396,8 @@ class HostKernel : public HostKernelBase { auto g = NDItem.get_group(); store_group(&g); } - runKernelWithArg, decltype(MKernel)>(NDItem); + runKernelWithArg>(MKernel, + NDItem); }); }); } @@ -395,42 +425,11 @@ class HostKernel : public HostKernelBase { detail::NDLoop::iterate(NGroups, [&](const id &GroupID) { sycl::group Group = IDBuilder::createGroup(GlobalSize, LocalSize, NGroups, GroupID); - runKernelWithArg, decltype(MKernel)>(Group); + runKernelWithArg>(MKernel, Group); }); } ~HostKernel() = default; - -private: - // TODO: replace run* funcs below with "constexpr if" when DPC++ RT switched - // to C++17 - template - std::enable_if_t(), - void> - runKernelWithoutArg() { - kernel_handler KH; - MKernel(KH); - } - - template - std::enable_if_t(), void> - runKernelWithoutArg() { - MKernel(); - } - - template - std::enable_if_t< - detail::check_kernel_arg_types(), void> - runKernelWithArg(ArgType Arg) { - kernel_handler KH; - MKernel(Arg, KH); - } - - template - std::enable_if_t(), void> - runKernelWithArg(ArgType Arg) { - MKernel(Arg); - } }; } // namespace detail From fb62c8b92a89d2115d523bd83220bd35ff06355e Mon Sep 17 00:00:00 2001 From: Dmitry Vodopyanov Date: Thu, 25 Mar 2021 18:54:26 +0300 Subject: [PATCH 06/14] Fix CR commnets --- sycl/include/CL/sycl/detail/cg_types.hpp | 69 +++-- sycl/include/CL/sycl/handler.hpp | 285 ++++++++---------- sycl/include/CL/sycl/id.hpp | 10 +- sycl/include/CL/sycl/item.hpp | 8 +- sycl/include/CL/sycl/kernel_handler.hpp | 4 +- .../kernel_handler.cpp | 31 -- .../kernel_lambda_with_kernel_handler_arg.cpp | 104 +++++++ 7 files changed, 281 insertions(+), 230 deletions(-) delete mode 100644 sycl/test/on-device/basic_tests/specialization_constants/kernel_handler.cpp create mode 100644 sycl/test/on-device/basic_tests/specialization_constants/kernel_lambda_with_kernel_handler_arg.cpp diff --git a/sycl/include/CL/sycl/detail/cg_types.hpp b/sycl/include/CL/sycl/detail/cg_types.hpp index 931b07b49fac4..e28db0906c837 100644 --- a/sycl/include/CL/sycl/detail/cg_types.hpp +++ b/sycl/include/CL/sycl/detail/cg_types.hpp @@ -145,38 +145,56 @@ struct check_fn_signature { }; template -static constexpr bool check_kernel_arg_types() { +static constexpr bool check_kernel_lambda_takes_args() { return check_fn_signature, void(Args...)>::value; } -// TODO: replace run* funcs below with "constexpr if" when DPC++ RT switched -// to C++17 -template ()> * = nullptr> -static constexpr void runKernelWithoutArg(KernelType KernelName) { +// Type traits to find out if kernal lambda has kernel_handler argument + +template ::value> + * = nullptr> +constexpr bool isKernelLambdaCallableWithKernelHandler() { + return check_kernel_lambda_takes_args(); +} + +template ::value> + * = nullptr> +constexpr bool isKernelLambdaCallableWithKernelHandler() { + return check_kernel_lambda_takes_args(); +} + +// Helpers for running kernel lambda on the host device + +template ()> * = nullptr> +constexpr void runKernelWithoutArg(KernelType KernelName) { kernel_handler KH; KernelName(KH); } -template < - typename KernelType, - typename std::enable_if_t()> * = nullptr> -static constexpr void runKernelWithoutArg(KernelType KernelName) { +template ()> * = nullptr> +constexpr void runKernelWithoutArg(KernelType KernelName) { KernelName(); } -template ()> * = nullptr> -static constexpr void runKernelWithArg(KernelType KernelName, ArgType Arg) { +template ()> * = nullptr> +constexpr void runKernelWithArg(KernelType KernelName, ArgType Arg) { kernel_handler KH; KernelName(Arg, KH); } -template ()> * = nullptr> -static constexpr void runKernelWithArg(KernelType KernelName, ArgType Arg) { +template ()> * = nullptr> +constexpr void runKernelWithArg(KernelType KernelName, ArgType Arg) { KernelName(Arg); } @@ -255,7 +273,7 @@ class HostKernel : public HostKernelBase { template typename detail::enable_if_t::value> runOnHost(const NDRDescT &) { - runKernelWithoutArg(MKernel); + runKernelWithoutArg(MKernel); } template @@ -286,7 +304,7 @@ class HostKernel : public HostKernelBase { store_id(&ID); store_item(&Item); } - runKernelWithArg &>(MKernel, ID); + runKernelWithArg &>(MKernel, ID); }); } @@ -311,8 +329,7 @@ class HostKernel : public HostKernelBase { store_id(&ID); store_item(&ItemWithOffset); } - runKernelWithArg>(MKernel, - Item); + runKernelWithArg>(MKernel, Item); }); } @@ -345,8 +362,7 @@ class HostKernel : public HostKernelBase { store_id(&ID); store_item(&Item); } - runKernelWithArg>( - MKernel, Item); + runKernelWithArg>(MKernel, Item); }); } @@ -396,8 +412,7 @@ class HostKernel : public HostKernelBase { auto g = NDItem.get_group(); store_group(&g); } - runKernelWithArg>(MKernel, - NDItem); + runKernelWithArg>(MKernel, NDItem); }); }); } @@ -425,7 +440,7 @@ class HostKernel : public HostKernelBase { detail::NDLoop::iterate(NGroups, [&](const id &GroupID) { sycl::group Group = IDBuilder::createGroup(GlobalSize, LocalSize, NGroups, GroupID); - runKernelWithArg>(MKernel, Group); + runKernelWithArg>(MKernel, Group); }); } diff --git a/sycl/include/CL/sycl/handler.hpp b/sycl/include/CL/sycl/handler.hpp index 1adba21df282a..7c3b47585d303 100644 --- a/sycl/include/CL/sycl/handler.hpp +++ b/sycl/include/CL/sycl/handler.hpp @@ -146,66 +146,6 @@ checkValueRangeImpl(ValT V) { } #endif -template -std::enable_if_t::value, bool> -checkKernelArgTypesIgnoreFirstVoid() { - return detail::check_kernel_arg_types(); -} - -template -std::enable_if_t::value, bool> -checkKernelArgTypesIgnoreFirstVoid() { - return detail::check_kernel_arg_types(); -} - -template -class RangeRoundedLambda; - -template -class RangeRoundedLambda< - TransformedArgType, KernelType, Dims, - typename std::enable_if_t()>> { -public: - RangeRoundedLambda(KernelType KernelFunc, range NumWorkItems) - : MKernelFunc(KernelFunc), MNumWorkItems(NumWorkItems) {} - - void operator()(TransformedArgType Arg) const { - if (Arg[0] >= MNumWorkItems[0]) - return; - Arg.set_allowed_range(MNumWorkItems); - kernel_handler KH; - MKernelFunc(Arg, KH); - } - -private: - KernelType MKernelFunc; - range MNumWorkItems; -}; - -template -class RangeRoundedLambda< - TransformedArgType, KernelType, Dims, - typename std::enable_if_t< - detail::check_kernel_arg_types()>> { -public: - RangeRoundedLambda(KernelType KernelFunc, range NumWorkItems) - : MKernelFunc(KernelFunc), MNumWorkItems(NumWorkItems) {} - - void operator()(TransformedArgType Arg) const { - if (Arg[0] >= MNumWorkItems[0]) - return; - Arg.set_allowed_range(MNumWorkItems); - MKernelFunc(Arg); - } - -private: - KernelType MKernelFunc; - range MNumWorkItems; -}; - template typename detail::enable_if_t>::value || std::is_same>::value> @@ -554,11 +494,9 @@ class __SYCL_EXPORT handler { template void StoreLambda(KernelType KernelFunc) { - // TODO: replace detail::checkKernelArgTypesIgnoreFirstVoid with - // "constexpr if" when DPC++ RT switched to C++17 - auto ContainsKernelHandler = - detail::checkKernelArgTypesIgnoreFirstVoid(); - if (ContainsKernelHandler && MIsHost) { + if (detail::isKernelLambdaCallableWithKernelHandler() && + MIsHost) { throw cl::sycl::feature_not_supported( "kernel_handler is not supported by host device.", PI_INVALID_OPERATION); @@ -856,15 +794,13 @@ class __SYCL_EXPORT handler { std::cout << "parallel_for range adjusted from " << NumWorkItems[0] << " to " << NewValX << std::endl; - // TODO: replace detail::RangeRoundedLambda with - // "constexpr if" when DPC++ RT switched to C++17 - detail::RangeRoundedLambda Wrapper{ - KernelFunc, NumWorkItems}; + auto Wrapper = getRangeRoundedKernelLambda( + KernelFunc, NumWorkItems); range AdjustedRange = NumWorkItems; AdjustedRange.set_range_dim0(NewValX); #ifdef __SYCL_DEVICE_ONLY__ - kernel_parallel_for(Wrapper); + kernel_parallel_for_wrapper(Wrapper); #else detail::checkValueRange(AdjustedRange); MNDRDesc.set(std::move(AdjustedRange)); @@ -878,13 +814,7 @@ class __SYCL_EXPORT handler { { #ifdef __SYCL_DEVICE_ONLY__ (void)NumWorkItems; - if constexpr (detail::check_kernel_arg_types< - KernelType, TransformedArgType, kernel_handler>()) { - kernel_handler KH; - kernel_parallel_for(KernelFunc, KH); - } else { - kernel_parallel_for(KernelFunc); - } + kernel_parallel_for_wrapper(KernelFunc); #else detail::checkValueRange(NumWorkItems); MNDRDesc.set(std::move(NumWorkItems)); @@ -988,6 +918,91 @@ class __SYCL_EXPORT handler { #endif KernelFunc(detail::Builder::getElement(detail::declptr()), KH); } + + // Wrappers for kernel_*** functions above with and without support of + // additional kernel_handler argument. + + // NOTE: to support kernel_handler argument in kernel lambdas, only + // kernel_***_wrapper functions must be called in this code + + // Wrappers for kernel_single_task(...) + + template + std::enable_if_t< + detail::isKernelLambdaCallableWithKernelHandler(), void> +#ifdef __SYCL_NONCONST_FUNCTOR__ + kernel_single_task_wrapper(KernelType KernelFunc) { +#else + kernel_single_task_wrapper(const KernelType &KernelFunc) { +#endif + kernel_handler KH; + kernel_single_task(KernelFunc, KH); + } + + template + std::enable_if_t< + !detail::isKernelLambdaCallableWithKernelHandler(), void> +#ifdef __SYCL_NONCONST_FUNCTOR__ + kernel_single_task_wrapper(KernelType KernelFunc) { +#else + kernel_single_task_wrapper(const KernelType &KernelFunc) { +#endif + kernel_single_task(KernelFunc); + } + + // Wrappers for kernel_parallel_for(...) + + template + std::enable_if_t(), + void> +#ifdef __SYCL_NONCONST_FUNCTOR__ + kernel_parallel_for_wrapper(KernelType KernelFunc) { +#else + kernel_parallel_for_wrapper(const KernelType &KernelFunc) { +#endif + kernel_handler KH; + kernel_parallel_for(KernelFunc, KH); + } + + template + std::enable_if_t(), + void> +#ifdef __SYCL_NONCONST_FUNCTOR__ + kernel_parallel_for_wrapper(KernelType KernelFunc) { +#else + kernel_parallel_for_wrapper(const KernelType &KernelFunc) { +#endif + kernel_parallel_for(KernelFunc); + } + + // Wrappers for kernel_parallel_for_work_group(...) + + template + std::enable_if_t(), + void> +#ifdef __SYCL_NONCONST_FUNCTOR__ + kernel_parallel_for_work_group_wrapper(KernelType KernelFunc) { +#else + kernel_parallel_for_work_group_wrapper(const KernelType &KernelFunc) { +#endif + kernel_handler KH; + kernel_parallel_for_work_group(KernelFunc, KH); + } + + template + std::enable_if_t(), + void> +#ifdef __SYCL_NONCONST_FUNCTOR__ + kernel_parallel_for_work_group_wrapper(KernelType KernelFunc) { +#else + kernel_parallel_for_work_group_wrapper(const KernelType &KernelFunc) { +#endif + kernel_parallel_for_work_group(KernelFunc); + } #endif public: @@ -1096,13 +1111,7 @@ class __SYCL_EXPORT handler { using NameT = typename detail::get_kernel_name_t::name; #ifdef __SYCL_DEVICE_ONLY__ - if constexpr (detail::check_kernel_arg_types()) { - kernel_handler KH; - kernel_single_task(KernelFunc, KH); - } else { - kernel_single_task(KernelFunc); - } + kernel_single_task_wrapper(KernelFunc); #else // No need to check if range is out of INT_MAX limits as it's compile-time // known constant. @@ -1216,13 +1225,7 @@ class __SYCL_EXPORT handler { #ifdef __SYCL_DEVICE_ONLY__ (void)NumWorkItems; (void)WorkItemOffset; - if constexpr (detail::check_kernel_arg_types()) { - kernel_handler KH; - kernel_parallel_for(KernelFunc, KH); - } else { - kernel_parallel_for(KernelFunc); - } + kernel_parallel_for_wrapper(KernelFunc); #else detail::checkValueRange(NumWorkItems, WorkItemOffset); MNDRDesc.set(std::move(NumWorkItems), std::move(WorkItemOffset)); @@ -1254,13 +1257,7 @@ class __SYCL_EXPORT handler { sycl::detail::lambda_arg_type>; #ifdef __SYCL_DEVICE_ONLY__ (void)ExecutionRange; - if constexpr (detail::check_kernel_arg_types()) { - kernel_handler KH; - kernel_parallel_for(KernelFunc, KH); - } else { - kernel_parallel_for(KernelFunc); - } + kernel_parallel_for_wrapper(KernelFunc); #else detail::checkValueRange(ExecutionRange); MNDRDesc.set(std::move(ExecutionRange)); @@ -1481,13 +1478,7 @@ class __SYCL_EXPORT handler { sycl::detail::lambda_arg_type>; #ifdef __SYCL_DEVICE_ONLY__ (void)NumWorkGroups; - if constexpr (detail::check_kernel_arg_types()) { - kernel_handler KH; - kernel_parallel_for_work_group(KernelFunc, KH); - } else { - kernel_parallel_for_work_group(KernelFunc); - } + kernel_parallel_for_work_group_wrapper(KernelFunc); #else detail::checkValueRange(NumWorkGroups); MNDRDesc.setNumWorkGroups(NumWorkGroups); @@ -1521,13 +1512,7 @@ class __SYCL_EXPORT handler { #ifdef __SYCL_DEVICE_ONLY__ (void)NumWorkGroups; (void)WorkGroupSize; - if constexpr (detail::check_kernel_arg_types()) { - kernel_handler KH; - kernel_parallel_for_work_group(KernelFunc, KH); - } else { - kernel_parallel_for_work_group(KernelFunc); - } + kernel_parallel_for_work_group_wrapper(KernelFunc); #else nd_range ExecRange = nd_range(NumWorkGroups * WorkGroupSize, WorkGroupSize); @@ -1621,13 +1606,7 @@ class __SYCL_EXPORT handler { typename detail::get_kernel_name_t::name; #ifdef __SYCL_DEVICE_ONLY__ (void)Kernel; - if constexpr (detail::check_kernel_arg_types()) { - kernel_handler KH; - kernel_single_task(KernelFunc, KH); - } else { - kernel_single_task(KernelFunc); - } + kernel_single_task(KernelFunc); #else // No need to check if range is out of INT_MAX limits as it's compile-time // known constant @@ -1669,13 +1648,7 @@ class __SYCL_EXPORT handler { #ifdef __SYCL_DEVICE_ONLY__ (void)Kernel; (void)NumWorkItems; - if constexpr (detail::check_kernel_arg_types()) { - kernel_handler KH; - kernel_parallel_for(KernelFunc, KH); - } else { - kernel_parallel_for(KernelFunc); - } + kernel_parallel_for_wrapper(KernelFunc); #else detail::checkValueRange(NumWorkItems); MNDRDesc.set(std::move(NumWorkItems)); @@ -1711,13 +1684,7 @@ class __SYCL_EXPORT handler { (void)Kernel; (void)NumWorkItems; (void)WorkItemOffset; - if constexpr (detail::check_kernel_arg_types()) { - kernel_handler KH; - kernel_parallel_for(KernelFunc, KH); - } else { - kernel_parallel_for(KernelFunc); - } + kernel_parallel_for_wrapper(KernelFunc); #else detail::checkValueRange(NumWorkItems, WorkItemOffset); MNDRDesc.set(std::move(NumWorkItems), std::move(WorkItemOffset)); @@ -1753,13 +1720,7 @@ class __SYCL_EXPORT handler { #ifdef __SYCL_DEVICE_ONLY__ (void)Kernel; (void)NDRange; - if constexpr (detail::check_kernel_arg_types()) { - kernel_handler KH; - kernel_parallel_for(KernelFunc, KH); - } else { - kernel_parallel_for(KernelFunc); - } + kernel_parallel_for_wrapper(KernelFunc); #else detail::checkValueRange(NDRange); MNDRDesc.set(std::move(NDRange)); @@ -1799,13 +1760,7 @@ class __SYCL_EXPORT handler { #ifdef __SYCL_DEVICE_ONLY__ (void)Kernel; (void)NumWorkGroups; - if constexpr (detail::check_kernel_arg_types()) { - kernel_handler KH; - kernel_parallel_for_work_group(KernelFunc, KH); - } else { - kernel_parallel_for_work_group(KernelFunc); - } + kernel_parallel_for_work_group_wrapper(KernelFunc); #else detail::checkValueRange(NumWorkGroups); MNDRDesc.setNumWorkGroups(NumWorkGroups); @@ -1844,13 +1799,7 @@ class __SYCL_EXPORT handler { (void)Kernel; (void)NumWorkGroups; (void)WorkGroupSize; - if constexpr (detail::check_kernel_arg_types()) { - kernel_handler KH; - kernel_parallel_for_work_group(KernelFunc, KH); - } else { - kernel_parallel_for_work_group(KernelFunc); - } + kernel_parallel_for_work_group_wrapper(KernelFunc); #else nd_range ExecRange = nd_range(NumWorkGroups * WorkGroupSize, WorkGroupSize); @@ -2251,6 +2200,34 @@ class __SYCL_EXPORT handler { access::target); friend class ::MockHandler; + + template < + typename TransformedArgType, int Dims, typename KernelType, + typename std::enable_if_t()> * = nullptr> + auto getRangeRoundedKernelLambda(KernelType KernelFunc, + range NumWorkItems) { + return [=](TransformedArgType Arg, kernel_handler KH) { + if (Arg[0] >= NumWorkItems[0]) + return; + Arg.set_allowed_range(NumWorkItems); + KernelFunc(Arg, KH); + }; + } + + template ()> * = nullptr> + auto getRangeRoundedKernelLambda(KernelType KernelFunc, + range NumWorkItems) { + return [=](TransformedArgType Arg) { + if (Arg[0] >= NumWorkItems[0]) + return; + Arg.set_allowed_range(NumWorkItems); + KernelFunc(Arg); + }; + } }; } // namespace sycl } // __SYCL_INLINE_NAMESPACE(cl) diff --git a/sycl/include/CL/sycl/id.hpp b/sycl/include/CL/sycl/id.hpp index 8ea62a368c7c1..151657aa661e8 100644 --- a/sycl/include/CL/sycl/id.hpp +++ b/sycl/include/CL/sycl/id.hpp @@ -19,12 +19,6 @@ namespace sycl { template class range; template class item; -namespace detail { -template -class RangeRoundedLambda; -} // namespace detail - /// A unique identifier of an item in an index space. /// /// \ingroup sycl_api @@ -247,9 +241,7 @@ template class id : public detail::array { #undef __SYCL_GEN_OPT private: - template - friend class detail::RangeRoundedLambda; + friend class handler; void set_allowed_range(range rnwi) { (void)rnwi[0]; } }; diff --git a/sycl/include/CL/sycl/item.hpp b/sycl/include/CL/sycl/item.hpp index 3e58f3ec9cd3b..7df259f14ebac 100644 --- a/sycl/include/CL/sycl/item.hpp +++ b/sycl/include/CL/sycl/item.hpp @@ -21,10 +21,6 @@ __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { namespace detail { class Builder; - -template -class RangeRoundedLambda; } // namespace detail template class id; template class range; @@ -122,9 +118,7 @@ template class item { friend class detail::Builder; private: - template - friend class detail::RangeRoundedLambda; + friend class handler; void set_allowed_range(const range rnwi) { MImpl.MExtent = rnwi; } diff --git a/sycl/include/CL/sycl/kernel_handler.hpp b/sycl/include/CL/sycl/kernel_handler.hpp index 80f46ae5a8445..02a1c3b0f01ef 100644 --- a/sycl/include/CL/sycl/kernel_handler.hpp +++ b/sycl/include/CL/sycl/kernel_handler.hpp @@ -48,7 +48,7 @@ class kernel_handler { #ifdef __SYCL_DEVICE_ONLY__ template , - std::enable_if_t>> + std::enable_if_t> * = nullptr> T getSpecializationConstantOnDevice() { const char *SymbolicID = __builtin_unique_stable_name( detail::specialization_id_name_generator); @@ -56,7 +56,7 @@ class kernel_handler { SymbolicID, &S, MSpecializationConstantsBuffer); } template , - std::enable_if_t>> + std::enable_if_t> * = nullptr> T getSpecializationConstantOnDevice() { const char *SymbolicID = __builtin_unique_stable_name( detail::specialization_id_name_generator); diff --git a/sycl/test/on-device/basic_tests/specialization_constants/kernel_handler.cpp b/sycl/test/on-device/basic_tests/specialization_constants/kernel_handler.cpp deleted file mode 100644 index 86bb69a86706d..0000000000000 --- a/sycl/test/on-device/basic_tests/specialization_constants/kernel_handler.cpp +++ /dev/null @@ -1,31 +0,0 @@ -// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out -// RUN: %t.out - -#include - -int main() { - sycl::queue q; - - q.submit([&](sycl::handler &cgh) { - cgh.single_task([=](sycl::kernel_handler kh) {}); - }); - - q.submit([&](sycl::handler &cgh) { - cgh.parallel_for( - sycl::range<3>(3, 3, 3), - [](sycl::item<3> it, sycl::kernel_handler kh) {}); - }); - - q.submit([&](sycl::handler &cgh) { - cgh.parallel_for(sycl::range<3>(3, 3, 3), [](auto it) {}); - }); - - q.submit([&](sycl::handler &cgh) { - cgh.parallel_for_work_group( - sycl::range<3>(2, 2, 2), sycl::range<3>(2, 2, 2), - [=](sycl::group<3> myGroup, sycl::kernel_handler kh) { - myGroup.parallel_for_work_item([&](sycl::h_item<3> myItem) {}); - myGroup.parallel_for_work_item([&](sycl::h_item<3> myItem) {}); - }); - }); -} diff --git a/sycl/test/on-device/basic_tests/specialization_constants/kernel_lambda_with_kernel_handler_arg.cpp b/sycl/test/on-device/basic_tests/specialization_constants/kernel_lambda_with_kernel_handler_arg.cpp new file mode 100644 index 0000000000000..5eb6bc355695a --- /dev/null +++ b/sycl/test/on-device/basic_tests/specialization_constants/kernel_lambda_with_kernel_handler_arg.cpp @@ -0,0 +1,104 @@ +// RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out +// RUN: %t.out + +#include + +int main() { + sycl::queue q; + + // single_task w/o kernel_handler arg + q.submit([&](sycl::handler &cgh) { + cgh.single_task([=]() {}); + }); + + // single_task with kernel_handler arg + q.submit([&](sycl::handler &cgh) { + cgh.single_task( + [=](sycl::kernel_handler kh) {}); + }); + + // parallel_for with id and w/o kernel_handler arg + q.submit([&](sycl::handler &cgh) { + cgh.parallel_for( + sycl::range<1>(1), [](sycl::id<1> i) {}); + }); + + // parallel_for with id and kernel_handler args + q.submit([&](sycl::handler &cgh) { + cgh.parallel_for( + sycl::range<1>(1), [](sycl::id<1> i, sycl::kernel_handler kh) {}); + }); + + // parallel_for with item and w/o kernel_handler arg + q.submit([&](sycl::handler &cgh) { + cgh.parallel_for( + sycl::range<3>(3, 3, 3), [](sycl::item<3> it) {}); + }); + + // parallel_for with item and kernel_handler args + q.submit([&](sycl::handler &cgh) { + cgh.parallel_for( + sycl::range<3>(3, 3, 3), + [](sycl::item<3> it, sycl::kernel_handler kh) {}); + }); + + // parallel_for with nd_item and w/o kernel_handler arg + q.submit([&](sycl::handler &cgh) { + cgh.parallel_for( + sycl::nd_range<3>(sycl::range<3>(4, 4, 4), sycl::range<3>(2, 2, 2)), + [=](sycl::nd_item<3> item) {}); + }); + + // parallel_for with nd_item and kernel_handler args + q.submit([&](sycl::handler &cgh) { + cgh.parallel_for( + sycl::nd_range<3>(sycl::range<3>(4, 4, 4), sycl::range<3>(2, 2, 2)), + [=](sycl::nd_item<3> item, sycl::kernel_handler kh) {}); + }); + + // parallel_for with generic lambda w/o kernel_handler arg + q.submit([&](sycl::handler &cgh) { + cgh.parallel_for( + sycl::range<3>(3, 3, 3), [](auto it) {}); + }); + + // parallel_for with generic lambda with kernel_handler arg + q.submit([&](sycl::handler &cgh) { + cgh.parallel_for( + sycl::range<3>(3, 3, 3), [](auto it, sycl::kernel_handler kh) {}); + }); + + // parallel_for with integral type arg and w/o kernel_handler arg + q.submit([&](sycl::handler &cgh) { + cgh.parallel_for( + sycl::range<1>(1), [](int index) {}); + }); + + // parallel_for with integral type and kernel_handler args + q.submit([&](sycl::handler &cgh) { + cgh.parallel_for( + sycl::range<1>(1), [](int index, sycl::kernel_handler kh) {}); + }); + + // parallel_for_work_group without kernel_handler arg + q.submit([&](sycl::handler &cgh) { + cgh.parallel_for_work_group< + class KernelParallelForWorkGroupWithKernelHandler>( + sycl::range<3>(2, 2, 2), sycl::range<3>(2, 2, 2), + [=](sycl::group<3> myGroup) { + myGroup.parallel_for_work_item([&](sycl::h_item<3> myItem) {}); + myGroup.parallel_for_work_item([&](sycl::h_item<3> myItem) {}); + }); + }); + + // parallel_for_work_group with kernel_handler arg + q.submit([&](sycl::handler &cgh) { + cgh.parallel_for_work_group< + class KernelParallelForWorkGroupWithoutKernelHandler>( + sycl::range<3>(2, 2, 2), sycl::range<3>(2, 2, 2), + [=](sycl::group<3> myGroup, sycl::kernel_handler kh) { + myGroup.parallel_for_work_item([&](sycl::h_item<3> myItem) {}); + myGroup.parallel_for_work_item([&](sycl::h_item<3> myItem) {}); + }); + }); +} From c6cf04f5b2675ed44649b84ba003d46a9b854a8d Mon Sep 17 00:00:00 2001 From: Dmitry Vodopyanov Date: Thu, 25 Mar 2021 19:01:10 +0300 Subject: [PATCH 07/14] Remove unnecessary changes --- sycl/include/CL/sycl/item.hpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sycl/include/CL/sycl/item.hpp b/sycl/include/CL/sycl/item.hpp index 7df259f14ebac..a8aa9c8ef09f5 100644 --- a/sycl/include/CL/sycl/item.hpp +++ b/sycl/include/CL/sycl/item.hpp @@ -21,7 +21,7 @@ __SYCL_INLINE_NAMESPACE(cl) { namespace sycl { namespace detail { class Builder; -} // namespace detail +} template class id; template class range; @@ -119,7 +119,6 @@ template class item { private: friend class handler; - void set_allowed_range(const range rnwi) { MImpl.MExtent = rnwi; } detail::ItemBase MImpl; From f92ccfe2fc13ea419621a248e303e703bdaa1b34 Mon Sep 17 00:00:00 2001 From: Dmitry Vodopyanov Date: Fri, 26 Mar 2021 00:02:08 +0300 Subject: [PATCH 08/14] Fix compilation error with msvc --- sycl/include/CL/sycl/detail/cg_types.hpp | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/sycl/include/CL/sycl/detail/cg_types.hpp b/sycl/include/CL/sycl/detail/cg_types.hpp index e28db0906c837..5faa502102ba3 100644 --- a/sycl/include/CL/sycl/detail/cg_types.hpp +++ b/sycl/include/CL/sycl/detail/cg_types.hpp @@ -149,23 +149,39 @@ static constexpr bool check_kernel_lambda_takes_args() { return check_fn_signature, void(Args...)>::value; } -// Type traits to find out if kernal lambda has kernel_handler argument +// isKernelLambdaCallableWithKernelHandlerImpl checks if LambdaArgType is void +// (e.g., in single_task), and based on that, calls +// check_kernel_lambda_takes_args with proper set of arguments. Also this type +// trait workarounds compilation error which happens only with msvc. -template ::value> * = nullptr> -constexpr bool isKernelLambdaCallableWithKernelHandler() { +constexpr bool isKernelLambdaCallableWithKernelHandlerImpl() { return check_kernel_lambda_takes_args(); } template ::value> * = nullptr> -constexpr bool isKernelLambdaCallableWithKernelHandler() { +constexpr bool isKernelLambdaCallableWithKernelHandlerImpl() { return check_kernel_lambda_takes_args(); } +// Type traits to find out if kernal lambda has kernel_handler argument + +template +constexpr bool isKernelLambdaCallableWithKernelHandler() { + return check_kernel_lambda_takes_args(); +} + +template +constexpr bool isKernelLambdaCallableWithKernelHandler() { + return isKernelLambdaCallableWithKernelHandlerImpl(); +} + // Helpers for running kernel lambda on the host device template Date: Fri, 26 Mar 2021 12:57:20 +0300 Subject: [PATCH 09/14] Apply CR comments --- sycl/include/CL/sycl/handler.hpp | 2 +- sycl/include/CL/sycl/kernel_handler.hpp | 2 +- sycl/include/CL/sycl/specialization_id.hpp | 2 +- .../kernel_lambda_with_kernel_handler_arg.cpp | 4 ++++ 4 files changed, 7 insertions(+), 3 deletions(-) diff --git a/sycl/include/CL/sycl/handler.hpp b/sycl/include/CL/sycl/handler.hpp index 7c3b47585d303..12f05869d29a0 100644 --- a/sycl/include/CL/sycl/handler.hpp +++ b/sycl/include/CL/sycl/handler.hpp @@ -498,7 +498,7 @@ class __SYCL_EXPORT handler { LambdaArgType>() && MIsHost) { throw cl::sycl::feature_not_supported( - "kernel_handler is not supported by host device.", + "kernel_handler is not yet supported by host device.", PI_INVALID_OPERATION); } MHostKernel.reset( diff --git a/sycl/include/CL/sycl/kernel_handler.hpp b/sycl/include/CL/sycl/kernel_handler.hpp index 02a1c3b0f01ef..598c880df5c94 100644 --- a/sycl/include/CL/sycl/kernel_handler.hpp +++ b/sycl/include/CL/sycl/kernel_handler.hpp @@ -33,7 +33,7 @@ class kernel_handler { return getSpecializationConstantOnDevice(); #else throw cl::sycl::feature_not_supported( - "kernel_handler::get_specialization_constant() is not supported by " + "kernel_handler::get_specialization_constant() is not yet supported by " "host device.", PI_INVALID_OPERATION); #endif // __SYCL_DEVICE_ONLY__ diff --git a/sycl/include/CL/sycl/specialization_id.hpp b/sycl/include/CL/sycl/specialization_id.hpp index b59865c2b94da..8cb61695c8444 100644 --- a/sycl/include/CL/sycl/specialization_id.hpp +++ b/sycl/include/CL/sycl/specialization_id.hpp @@ -20,7 +20,7 @@ template class specialization_id { template explicit constexpr specialization_id(Args &&... args) - : MSpecializationConstantValue(args...) {} + : MSpecializationConstantValue(std::forward(args)...) {} specialization_id(const specialization_id &rhs) = delete; specialization_id(specialization_id &&rhs) = delete; diff --git a/sycl/test/on-device/basic_tests/specialization_constants/kernel_lambda_with_kernel_handler_arg.cpp b/sycl/test/on-device/basic_tests/specialization_constants/kernel_lambda_with_kernel_handler_arg.cpp index 5eb6bc355695a..4b3703927a549 100644 --- a/sycl/test/on-device/basic_tests/specialization_constants/kernel_lambda_with_kernel_handler_arg.cpp +++ b/sycl/test/on-device/basic_tests/specialization_constants/kernel_lambda_with_kernel_handler_arg.cpp @@ -1,6 +1,10 @@ // RUN: %clangxx -fsycl -fsycl-targets=%sycl_triple %s -o %t.out // RUN: %t.out +// This test checks all possible scenarios of running single_task, parallel_for +// and parallel_for_work_group to verify that this code compiles and runs +// correctly with user's lambda with and without sycl::kernel_handler argument + #include int main() { From 558f24baa78c5d3823fc025a82e4c25de37b32be Mon Sep 17 00:00:00 2001 From: Dmitry Vodopyanov Date: Fri, 26 Mar 2021 14:49:40 +0300 Subject: [PATCH 10/14] Add TODO for support spec consts on host device --- sycl/include/CL/sycl/kernel_handler.hpp | 1 + 1 file changed, 1 insertion(+) diff --git a/sycl/include/CL/sycl/kernel_handler.hpp b/sycl/include/CL/sycl/kernel_handler.hpp index 598c880df5c94..0a3f77381b364 100644 --- a/sycl/include/CL/sycl/kernel_handler.hpp +++ b/sycl/include/CL/sycl/kernel_handler.hpp @@ -32,6 +32,7 @@ class kernel_handler { #ifdef __SYCL_DEVICE_ONLY__ return getSpecializationConstantOnDevice(); #else + // TODO: add support of host device throw cl::sycl::feature_not_supported( "kernel_handler::get_specialization_constant() is not yet supported by " "host device.", From 39c4df8fcb1578480f021b39fd623ab7ee885ca6 Mon Sep 17 00:00:00 2001 From: Dmitry Vodopyanov Date: Fri, 26 Mar 2021 16:56:15 +0300 Subject: [PATCH 11/14] Add kind_specialization_constants_buffer to kernel_desc.hpp --- sycl/include/CL/sycl/detail/kernel_desc.hpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sycl/include/CL/sycl/detail/kernel_desc.hpp b/sycl/include/CL/sycl/detail/kernel_desc.hpp index 535c7069c2974..7ddbee517a2c2 100644 --- a/sycl/include/CL/sycl/detail/kernel_desc.hpp +++ b/sycl/include/CL/sycl/detail/kernel_desc.hpp @@ -26,7 +26,8 @@ enum class kernel_param_kind_t { kind_accessor = 0, kind_std_layout = 1, // standard layout object parameters kind_sampler = 2, - kind_pointer = 3 + kind_pointer = 3, + kind_specialization_constants_buffer = 4 }; // describes a kernel parameter From 9b167abdf552fab5d8d12dd55cdb260fd9397c34 Mon Sep 17 00:00:00 2001 From: Dmitry Vodopyanov Date: Fri, 26 Mar 2021 18:18:20 +0300 Subject: [PATCH 12/14] Fix subsequent build failure --- sycl/include/CL/sycl/detail/kernel_desc.hpp | 2 +- sycl/include/CL/sycl/specialization_id.hpp | 4 ++-- sycl/source/detail/scheduler/commands.cpp | 6 ++++++ sycl/source/handler.cpp | 6 ++++++ 4 files changed, 15 insertions(+), 3 deletions(-) diff --git a/sycl/include/CL/sycl/detail/kernel_desc.hpp b/sycl/include/CL/sycl/detail/kernel_desc.hpp index 7ddbee517a2c2..7fbe0d1dffe76 100644 --- a/sycl/include/CL/sycl/detail/kernel_desc.hpp +++ b/sycl/include/CL/sycl/detail/kernel_desc.hpp @@ -27,7 +27,7 @@ enum class kernel_param_kind_t { kind_std_layout = 1, // standard layout object parameters kind_sampler = 2, kind_pointer = 3, - kind_specialization_constants_buffer = 4 + kind_specialization_constants_buffer = 4, }; // describes a kernel parameter diff --git a/sycl/include/CL/sycl/specialization_id.hpp b/sycl/include/CL/sycl/specialization_id.hpp index 8cb61695c8444..cc4f25d8ebad6 100644 --- a/sycl/include/CL/sycl/specialization_id.hpp +++ b/sycl/include/CL/sycl/specialization_id.hpp @@ -20,7 +20,7 @@ template class specialization_id { template explicit constexpr specialization_id(Args &&... args) - : MSpecializationConstantValue(std::forward(args)...) {} + : MDefaultValue(std::forward(args)...) {} specialization_id(const specialization_id &rhs) = delete; specialization_id(specialization_id &&rhs) = delete; @@ -28,7 +28,7 @@ template class specialization_id { specialization_id &operator=(specialization_id &&rhs) = delete; private: - T MSpecializationConstantValue; + T MDefaultValue; }; } // namespace sycl diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 2e0e28a3538f7..59b658e76e6ff 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -1695,6 +1695,12 @@ pi_result ExecCGCommand::SetKernelParamsAndLaunch( Arg.MSize, Arg.MPtr); break; } + case kernel_param_kind_t::kind_specialization_constants_buffer: { + throw cl::sycl::feature_not_supported( + "Specialization constants are not yet fully supported", + PI_INVALID_OPERATION); + break; + } } ++NextTrueIndex; } diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index 5e63addf8e446..4980222d79190 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -236,6 +236,12 @@ void handler::processArg(void *Ptr, const detail::kernel_param_kind_t &Kind, Index + IndexShift); break; } + case kernel_param_kind_t::kind_specialization_constants_buffer: { + throw cl::sycl::feature_not_supported( + "Specialization constants are not yet fully supported", + PI_INVALID_OPERATION); + break; + } } } From dd4d2600804907599b74c5fb5b2ab06d9d975a30 Mon Sep 17 00:00:00 2001 From: Dmitry Vodopyanov Date: Fri, 26 Mar 2021 21:21:49 +0300 Subject: [PATCH 13/14] Apply CR comments --- sycl/source/detail/scheduler/commands.cpp | 4 ++-- sycl/source/handler.cpp | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 59b658e76e6ff..b29e25eef3650 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -1697,8 +1697,8 @@ pi_result ExecCGCommand::SetKernelParamsAndLaunch( } case kernel_param_kind_t::kind_specialization_constants_buffer: { throw cl::sycl::feature_not_supported( - "Specialization constants are not yet fully supported", - PI_INVALID_OPERATION); + "SYCL2020 specialization constants are not yet fully supported", + PI_INVALID_OPERATION); break; } } diff --git a/sycl/source/handler.cpp b/sycl/source/handler.cpp index 4980222d79190..a2ac1f1276a8d 100644 --- a/sycl/source/handler.cpp +++ b/sycl/source/handler.cpp @@ -238,7 +238,7 @@ void handler::processArg(void *Ptr, const detail::kernel_param_kind_t &Kind, } case kernel_param_kind_t::kind_specialization_constants_buffer: { throw cl::sycl::feature_not_supported( - "Specialization constants are not yet fully supported", + "SYCL2020 specialization constants are not yet fully supported", PI_INVALID_OPERATION); break; } From 1a0b20012d3442d717e369c140cd9f8037d5ca22 Mon Sep 17 00:00:00 2001 From: Dmitry Vodopyanov Date: Mon, 29 Mar 2021 18:23:18 +0300 Subject: [PATCH 14/14] Temporarily disabling spec const test for CUDA --- .../kernel_lambda_with_kernel_handler_arg.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sycl/test/on-device/basic_tests/specialization_constants/kernel_lambda_with_kernel_handler_arg.cpp b/sycl/test/on-device/basic_tests/specialization_constants/kernel_lambda_with_kernel_handler_arg.cpp index 4b3703927a549..7adf90fe48b7a 100644 --- a/sycl/test/on-device/basic_tests/specialization_constants/kernel_lambda_with_kernel_handler_arg.cpp +++ b/sycl/test/on-device/basic_tests/specialization_constants/kernel_lambda_with_kernel_handler_arg.cpp @@ -5,6 +5,10 @@ // and parallel_for_work_group to verify that this code compiles and runs // correctly with user's lambda with and without sycl::kernel_handler argument +// TODO: enable cuda support when non-native spec constants started to be +// supported +// UNSUPPORTED: cuda + #include int main() {