From 4b241e6d56a491d6f9900480cb37b0e9e86d1068 Mon Sep 17 00:00:00 2001 From: "Cai, Justin" Date: Fri, 28 Jul 2023 15:53:20 -0700 Subject: [PATCH 01/13] [SYCL] Improve handling of large reqd_sub_group_size values --- .../sycl-post-link/SYCLDeviceRequirements.cpp | 41 +++-- .../program_manager/program_manager.cpp | 152 ++++++++++-------- .../program_manager/program_manager.hpp | 3 + .../large-reqd-work-group-size.cpp | 65 ++++++++ 4 files changed, 183 insertions(+), 78 deletions(-) create mode 100644 sycl/test-e2e/OptionalKernelFeatures/large-reqd-work-group-size.cpp diff --git a/llvm/tools/sycl-post-link/SYCLDeviceRequirements.cpp b/llvm/tools/sycl-post-link/SYCLDeviceRequirements.cpp index a9c791877a079..b48cc95a60059 100644 --- a/llvm/tools/sycl-post-link/SYCLDeviceRequirements.cpp +++ b/llvm/tools/sycl-post-link/SYCLDeviceRequirements.cpp @@ -15,19 +15,19 @@ #include #include +#include using namespace llvm; +auto ExtractIntegerFromMDNodeOperand(const MDOperand &operand) { + Constant *C = + cast(operand.get())->getValue(); + return C->getUniqueInteger().getSExtValue(); +} + void llvm::getSYCLDeviceRequirements( const module_split::ModuleDesc &MD, std::map &Requirements) { - auto ExtractIntegerFromMDNodeOperand = [=](const MDNode *N, - unsigned OpNo) -> int32_t { - Constant *C = - cast(N->getOperand(OpNo).get())->getValue(); - return static_cast(C->getUniqueInteger().getSExtValue()); - }; - // { LLVM-IR metadata name , [SYCL/Device requirements] property name }, see: // https://github.com/intel/llvm/blob/sycl/sycl/doc/design/OptionalDeviceFeatures.md#create-the-sycldevice-requirements-property-set // Scan the module and if the metadata is present fill the corresponing @@ -38,13 +38,13 @@ void llvm::getSYCLDeviceRequirements( {"reqd_work_group_size", "reqd_work_group_size"}}; for (const auto &[MDName, MappedName] : ReqdMDs) { - std::set Values; + std::set Values; for (const Function &F : MD.getModule()) { if (const MDNode *MDN = F.getMetadata(MDName)) { - for (size_t I = 0, E = MDN->getNumOperands(); I < E; ++I) { + for (const auto &operand : MDN->operands()) { // Don't put internal aspects (with negative integer value) into the // requirements, they are used only for device image splitting. - auto Val = ExtractIntegerFromMDNodeOperand(MDN, I); + auto Val = ExtractIntegerFromMDNodeOperand(operand); if (Val >= 0) Values.insert(Val); } @@ -58,6 +58,25 @@ void llvm::getSYCLDeviceRequirements( std::vector(Values.begin(), Values.end()); } + std::optional> ReqdWorkGroupSize; + for (const Function &F: MD.getModule()) { + if (const MDNode *MDN = F.getMetadata("reqd_work_group_size")) { + llvm::SmallVector NewReqdWorkGroupSize; + for (const auto &operand : MDN->operands()) + NewReqdWorkGroupSize.push_back(ExtractIntegerFromMDNodeOperand(operand)); + if (!ReqdWorkGroupSize) + ReqdWorkGroupSize = NewReqdWorkGroupSize; + else if (!std::equal(ReqdWorkGroupSize->begin(), ReqdWorkGroupSize->end(), + NewReqdWorkGroupSize.begin())) { + // two functions in the module have different required work group sizes + } + } + } + + if (ReqdWorkGroupSize) + Requirements["reqd_work_group_size"] = + std::vector(ReqdWorkGroupSize->begin(), ReqdWorkGroupSize->end()); + // There should only be at most one function with // intel_reqd_sub_group_size metadata when considering the entry // points of a module, but not necessarily when considering all the @@ -69,7 +88,7 @@ void llvm::getSYCLDeviceRequirements( for (const Function *F : MD.entries()) { if (auto *MDN = F->getMetadata("intel_reqd_sub_group_size")) { assert(MDN->getNumOperands() == 1); - auto MDValue = ExtractIntegerFromMDNodeOperand(MDN, 0); + auto MDValue = ExtractIntegerFromMDNodeOperand(MDN->getOperand(0)); assert(MDValue >= 0); if (!SubGroupSize) SubGroupSize = MDValue; diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index a45a43e6bad94..4738092972bee 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -633,58 +633,8 @@ sycl::detail::pi::PiProgram ProgramManager::getBuiltPIProgram( const RTDeviceBinaryImage::PropertyRange &ARange = Img.getDeviceRequirements(); -#define __SYCL_ASPECT(ASPECT, ID) \ - case aspect::ASPECT: \ - return #ASPECT; -#define __SYCL_ASPECT_DEPRECATED(ASPECT, ID, MESSAGE) __SYCL_ASPECT(ASPECT, ID) -// We don't need "case aspect::usm_allocator" here because it will duplicate -// "case aspect::usm_system_allocations", therefore leave this macro empty -#define __SYCL_ASPECT_DEPRECATED_ALIAS(ASPECT, ID, MESSAGE) - auto getAspectNameStr = [](aspect AspectNum) -> std::string { - switch (AspectNum) { -#include -#include - } - throw sycl::exception(errc::kernel_not_supported, - "Unknown aspect " + - std::to_string(static_cast(AspectNum))); - }; -#undef __SYCL_ASPECT_DEPRECATED_ALIAS -#undef __SYCL_ASPECT_DEPRECATED -#undef __SYCL_ASPECT - - for (RTDeviceBinaryImage::PropertyRange::ConstIterator It : ARange) { - using namespace std::literals; - if ((*It)->Name == "aspects"sv) { - ByteArray Aspects = DeviceBinaryProperty(*It).asByteArray(); - // 8 because we need to skip 64-bits of size of the byte array - Aspects.dropBytes(8); - while (!Aspects.empty()) { - auto Aspect = static_cast(Aspects.consume()); - if (!Dev->has(Aspect)) - throw sycl::exception(errc::kernel_not_supported, - "Required aspect " + getAspectNameStr(Aspect) + - " is not supported on the device"); - } - } else if ((*It)->Name == "reqd_sub_group_size"sv) { - auto ReqdSubGroupSize = DeviceBinaryProperty(*It).asUint32(); - auto SupportedSubGroupSizes = - Device.get_info(); - - // !getUint32PropAsBool(Img, "isEsimdImage") is a WA for ESIMD, - // as ESIMD images have a reqd-sub-group-size of 1, but currently - // no backend currently includes 1 as a valid sub-group size. - // This can be removed if backends add 1 as a valid sub-group size. - if (!getUint32PropAsBool(Img, "isEsimdImage") && - std::none_of(SupportedSubGroupSizes.cbegin(), - SupportedSubGroupSizes.cend(), - [=](auto s) { return s == ReqdSubGroupSize; })) - throw sycl::exception(errc::kernel_not_supported, - "Sub-group size " + - std::to_string(ReqdSubGroupSize) + - " is not supported on the device"); - } - } + if (auto exception = checkDevSupportDeviceRequirements(Device, Img)) + throw *exception; auto BuildF = [this, &Img, &Context, &ContextImpl, &Device, Prg, &CompileOpts, &LinkOpts, SpecConsts] { @@ -2456,6 +2406,44 @@ ProgramManager::getOrCreateKernel(const context &Context, bool doesDevSupportDeviceRequirements(const device &Dev, const RTDeviceBinaryImage &Img) { + return !checkDevSupportDeviceRequirements(Dev, Img).has_value(); +} + +static std::string getAspectNameStr(sycl::aspect AspectNum) { +#define __SYCL_ASPECT(ASPECT, ID) \ + case aspect::ASPECT: \ + return #ASPECT; +#define __SYCL_ASPECT_DEPRECATED(ASPECT, ID, MESSAGE) __SYCL_ASPECT(ASPECT, ID) +// We don't need "case aspect::usm_allocator" here because it will duplicate +// "case aspect::usm_system_allocations", therefore leave this macro empty +#define __SYCL_ASPECT_DEPRECATED_ALIAS(ASPECT, ID, MESSAGE) + switch (AspectNum) { +#include +#include + } + throw sycl::exception(errc::kernel_not_supported, + "Unknown aspect " + + std::to_string(static_cast(AspectNum))); +#undef __SYCL_ASPECT_DEPRECATED_ALIAS +#undef __SYCL_ASPECT_DEPRECATED +#undef __SYCL_ASPECT +} + +// Check if the multiplication over unsigned integers overflows +template +static std::enable_if_t, std::optional> +multiply_with_overflow_check(T x, T y) { + if (y == 0) + return 0; + if (x > std::numeric_limits::max() / y) + return {}; + else + return x * y; +} + +std::optional +checkDevSupportDeviceRequirements(const device &Dev, + const RTDeviceBinaryImage &Img) { auto getPropIt = [&Img](const std::string &PropName) { const RTDeviceBinaryImage::PropertyRange &PropRange = Img.getDeviceRequirements(); @@ -2483,7 +2471,9 @@ bool doesDevSupportDeviceRequirements(const device &Dev, while (!Aspects.empty()) { aspect Aspect = Aspects.consume(); if (!Dev.has(Aspect)) - return false; + return sycl::exception(errc::kernel_not_supported, + "Required aspect " + getAspectNameStr(Aspect) + + " is not supported on the device"); } } @@ -2493,18 +2483,30 @@ bool doesDevSupportDeviceRequirements(const device &Dev, DeviceBinaryProperty(*(ReqdWGSizePropIt.value())).asByteArray(); // Drop 8 bytes describing the size of the byte array. ReqdWGSize.dropBytes(8); - int ReqdWGSizeAllDimsTotal = 1; - std::vector ReqdWGSizeVec; + uint64_t ReqdWGSizeAllDimsTotal = 1; + std::vector ReqdWGSizeVec; int Dims = 0; while (!ReqdWGSize.empty()) { - int SingleDimSize = ReqdWGSize.consume(); - ReqdWGSizeAllDimsTotal *= SingleDimSize; + // The reqd_work_group_size data is stored as uint32_t's, + // but we'll widen the result to uint64_t. + uint64_t SingleDimSize = ReqdWGSize.consume(); + if (auto res = multiply_with_overflow_check(ReqdWGSizeAllDimsTotal, + SingleDimSize)) + ReqdWGSizeAllDimsTotal = *res; + else + return sycl::exception(sycl::errc::kernel_not_supported, + "Required work-group size is not supported" + " (too large)"); ReqdWGSizeVec.push_back(SingleDimSize); Dims++; } - if (static_cast(ReqdWGSizeAllDimsTotal) > + + if (ReqdWGSizeAllDimsTotal > Dev.get_info()) - return false; + return sycl::exception(sycl::errc::kernel_not_supported, + "Required work-group size " + + std::to_string(ReqdWGSizeAllDimsTotal) + + " is not supported on the device"); // Creating std::variant to call max_work_item_sizes one time to avoid // performance drop std::variant, id<2>, id<3>> MaxWorkItemSizesVariant; @@ -2522,17 +2524,26 @@ bool doesDevSupportDeviceRequirements(const device &Dev, // issues after that if (Dims == 1) { // ReqdWGSizeVec is in reverse order compared to MaxWorkItemSizes - if (static_cast(ReqdWGSizeVec[i]) > + if (ReqdWGSizeVec[i] > std::get>(MaxWorkItemSizesVariant)[Dims - i - 1]) - return false; + return sycl::exception(sycl::errc::kernel_not_supported, + "Required work-group size " + + std::to_string(ReqdWGSizeVec[i]) + + " is not supported"); } else if (Dims == 2) { - if (static_cast(ReqdWGSizeVec[i]) > + if (ReqdWGSizeVec[i] > std::get>(MaxWorkItemSizesVariant)[Dims - i - 1]) - return false; + return sycl::exception(sycl::errc::kernel_not_supported, + "Required work-group size " + + std::to_string(ReqdWGSizeVec[i]) + + " is not supported"); } else // (Dims == 3) - if (static_cast(ReqdWGSizeVec[i]) > + if (ReqdWGSizeVec[i] > std::get>(MaxWorkItemSizesVariant)[Dims - i - 1]) - return false; + return sycl::exception(sycl::errc::kernel_not_supported, + "Required work-group size " + + std::to_string(ReqdWGSizeVec[i]) + + " is not supported"); } } @@ -2541,14 +2552,21 @@ bool doesDevSupportDeviceRequirements(const device &Dev, auto ReqdSubGroupSize = DeviceBinaryProperty(*(ReqdSubGroupSizePropIt.value())).asUint32(); auto SupportedSubGroupSizes = Dev.get_info(); + // !getUint32PropAsBool(Img, "isEsimdImage") is a WA for ESIMD, + // as ESIMD images have a reqd-sub-group-size of 1, but currently + // no backend currently includes 1 as a valid sub-group size. + // This can be removed if backends add 1 as a valid sub-group size. if (!getUint32PropAsBool(Img, "isEsimdImage") && std::none_of(SupportedSubGroupSizes.cbegin(), SupportedSubGroupSizes.cend(), [=](auto s) { return s == ReqdSubGroupSize; })) - return false; + return sycl::exception(sycl::errc::kernel_not_supported, + "Sub-group size " + + std::to_string(ReqdSubGroupSize) + + " is not supported on the device"); } - return true; + return {}; } } // namespace detail diff --git a/sycl/source/detail/program_manager/program_manager.hpp b/sycl/source/detail/program_manager/program_manager.hpp index b2640cfc065de..39692f2fa7f9d 100644 --- a/sycl/source/detail/program_manager/program_manager.hpp +++ b/sycl/source/detail/program_manager/program_manager.hpp @@ -51,6 +51,9 @@ namespace detail { bool doesDevSupportDeviceRequirements(const device &Dev, const RTDeviceBinaryImage &BinImages); +std::optional +checkDevSupportDeviceRequirements(const device &Dev, + const RTDeviceBinaryImage &BinImages); // This value must be the same as in libdevice/device_itt.h. // See sycl/doc/design/ITTAnnotations.md for more info. diff --git a/sycl/test-e2e/OptionalKernelFeatures/large-reqd-work-group-size.cpp b/sycl/test-e2e/OptionalKernelFeatures/large-reqd-work-group-size.cpp new file mode 100644 index 0000000000000..f5389e67a57fe --- /dev/null +++ b/sycl/test-e2e/OptionalKernelFeatures/large-reqd-work-group-size.cpp @@ -0,0 +1,65 @@ +// RUN: %{build} -o %t.out -fno-sycl-id-queries-fit-in-int -DNO_RANGE_GREATER_THAN_UINT32_MAX +// RUN: %{run} %t.out + +#include + +using namespace sycl; +queue q; +int n_fail = 0; + +template +void throws_kernel_not_supported(const char *test_name, FunctorT f) { + try { + f(); + } catch (const sycl::exception &e) { + if (e.code() != errc::kernel_not_supported) { + std::cout << "fail: " << test_name << "\n" + << "Caught wrong exception with error code " + << e.code() << "\n" + << e.what() << "\n"; + ++n_fail; + return; + } else { + std::cout << "pass: " << test_name << "\n" + << "Caught right exception:\n" + << e.what() << "\n"; + return; + } + } + std::cout << "fail: " << test_name << "\n" + << "No exception thrown\n"; + ++n_fail; + return; +} + +int main(int argc, char *argv[]) { + throws_kernel_not_supported("nd_range<1>", []{ + constexpr uint32_t N = 4294967295; + q.parallel_for(nd_range<1>(N, N), + [=](auto) [[sycl::reqd_work_group_size(N)]] {}); + }); + + throws_kernel_not_supported("nd_range<2>", []{ + constexpr uint32_t N = 4294967295; + q.parallel_for(nd_range<2>({N, N}, {N, N}), + [=](auto) [[sycl::reqd_work_group_size(N, N)]] {}); + }); + + throws_kernel_not_supported("nd_range<3>", []{ + constexpr uint32_t N = 4294967295; + q.parallel_for(nd_range<3>({N, N, N}, {N, N, N}), + [=](auto) [[sycl::reqd_work_group_size(N, N, N)]] {}); + }); + + // TODO: Due to truncation issues, this test cannot pass yet. + // Enable this test once fixed. +#ifndef NO_RANGE_GREATER_THAN_UINT32_MAX + throws_kernel_not_supported("uint32_max+2", []{ + constexpr uint64_t N = 4294967297; + q.parallel_for(nd_range<1>(N, N), + [=](auto) [[sycl::reqd_work_group_size(N)]] {}); + }); +#endif + + return n_fail; +} From 0bde9ff15f4f305f05035c177400ff1b6fb2bf9f Mon Sep 17 00:00:00 2001 From: "Cai, Justin" Date: Fri, 28 Jul 2023 22:56:58 +0000 Subject: [PATCH 02/13] clang-format --- .../sycl-post-link/SYCLDeviceRequirements.cpp | 16 +++++------ .../large-reqd-work-group-size.cpp | 28 +++++++++---------- 2 files changed, 22 insertions(+), 22 deletions(-) diff --git a/llvm/tools/sycl-post-link/SYCLDeviceRequirements.cpp b/llvm/tools/sycl-post-link/SYCLDeviceRequirements.cpp index b48cc95a60059..88d4a839cffa9 100644 --- a/llvm/tools/sycl-post-link/SYCLDeviceRequirements.cpp +++ b/llvm/tools/sycl-post-link/SYCLDeviceRequirements.cpp @@ -13,16 +13,15 @@ #include "llvm/IR/Module.h" #include "llvm/Support/PropertySetIO.h" +#include #include #include -#include using namespace llvm; auto ExtractIntegerFromMDNodeOperand(const MDOperand &operand) { - Constant *C = - cast(operand.get())->getValue(); - return C->getUniqueInteger().getSExtValue(); + Constant *C = cast(operand.get())->getValue(); + return C->getUniqueInteger().getSExtValue(); } void llvm::getSYCLDeviceRequirements( @@ -59,11 +58,12 @@ void llvm::getSYCLDeviceRequirements( } std::optional> ReqdWorkGroupSize; - for (const Function &F: MD.getModule()) { + for (const Function &F : MD.getModule()) { if (const MDNode *MDN = F.getMetadata("reqd_work_group_size")) { llvm::SmallVector NewReqdWorkGroupSize; for (const auto &operand : MDN->operands()) - NewReqdWorkGroupSize.push_back(ExtractIntegerFromMDNodeOperand(operand)); + NewReqdWorkGroupSize.push_back( + ExtractIntegerFromMDNodeOperand(operand)); if (!ReqdWorkGroupSize) ReqdWorkGroupSize = NewReqdWorkGroupSize; else if (!std::equal(ReqdWorkGroupSize->begin(), ReqdWorkGroupSize->end(), @@ -74,8 +74,8 @@ void llvm::getSYCLDeviceRequirements( } if (ReqdWorkGroupSize) - Requirements["reqd_work_group_size"] = - std::vector(ReqdWorkGroupSize->begin(), ReqdWorkGroupSize->end()); + Requirements["reqd_work_group_size"] = std::vector( + ReqdWorkGroupSize->begin(), ReqdWorkGroupSize->end()); // There should only be at most one function with // intel_reqd_sub_group_size metadata when considering the entry diff --git a/sycl/test-e2e/OptionalKernelFeatures/large-reqd-work-group-size.cpp b/sycl/test-e2e/OptionalKernelFeatures/large-reqd-work-group-size.cpp index f5389e67a57fe..077feb6d53cd2 100644 --- a/sycl/test-e2e/OptionalKernelFeatures/large-reqd-work-group-size.cpp +++ b/sycl/test-e2e/OptionalKernelFeatures/large-reqd-work-group-size.cpp @@ -1,5 +1,5 @@ // RUN: %{build} -o %t.out -fno-sycl-id-queries-fit-in-int -DNO_RANGE_GREATER_THAN_UINT32_MAX -// RUN: %{run} %t.out +// RUN: %{run} %t.out #include @@ -14,8 +14,7 @@ void throws_kernel_not_supported(const char *test_name, FunctorT f) { } catch (const sycl::exception &e) { if (e.code() != errc::kernel_not_supported) { std::cout << "fail: " << test_name << "\n" - << "Caught wrong exception with error code " - << e.code() << "\n" + << "Caught wrong exception with error code " << e.code() << "\n" << e.what() << "\n"; ++n_fail; return; @@ -33,33 +32,34 @@ void throws_kernel_not_supported(const char *test_name, FunctorT f) { } int main(int argc, char *argv[]) { - throws_kernel_not_supported("nd_range<1>", []{ + throws_kernel_not_supported("nd_range<1>", [] { constexpr uint32_t N = 4294967295; q.parallel_for(nd_range<1>(N, N), [=](auto) [[sycl::reqd_work_group_size(N)]] {}); }); - - throws_kernel_not_supported("nd_range<2>", []{ + + throws_kernel_not_supported("nd_range<2>", [] { constexpr uint32_t N = 4294967295; q.parallel_for(nd_range<2>({N, N}, {N, N}), [=](auto) [[sycl::reqd_work_group_size(N, N)]] {}); }); - throws_kernel_not_supported("nd_range<3>", []{ + throws_kernel_not_supported("nd_range<3>", [] { constexpr uint32_t N = 4294967295; q.parallel_for(nd_range<3>({N, N, N}, {N, N, N}), - [=](auto) [[sycl::reqd_work_group_size(N, N, N)]] {}); + [=](auto) + [[sycl::reqd_work_group_size(N, N, N)]] {}); }); // TODO: Due to truncation issues, this test cannot pass yet. // Enable this test once fixed. #ifndef NO_RANGE_GREATER_THAN_UINT32_MAX - throws_kernel_not_supported("uint32_max+2", []{ - constexpr uint64_t N = 4294967297; - q.parallel_for(nd_range<1>(N, N), - [=](auto) [[sycl::reqd_work_group_size(N)]] {}); - }); + throws_kernel_not_supported("uint32_max+2", [] { + constexpr uint64_t N = 4294967297; + q.parallel_for(nd_range<1>(N, N), + [=](auto) [[sycl::reqd_work_group_size(N)]] {}); + }); #endif - + return n_fail; } From 504002b06b5d0685d6f15d10243aec7890cd6a1a Mon Sep 17 00:00:00 2001 From: "Cai, Justin" Date: Mon, 31 Jul 2023 09:27:08 -0700 Subject: [PATCH 03/13] Remove iostream include --- llvm/tools/sycl-post-link/SYCLDeviceRequirements.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/tools/sycl-post-link/SYCLDeviceRequirements.cpp b/llvm/tools/sycl-post-link/SYCLDeviceRequirements.cpp index 88d4a839cffa9..9b447e8c2b8a8 100644 --- a/llvm/tools/sycl-post-link/SYCLDeviceRequirements.cpp +++ b/llvm/tools/sycl-post-link/SYCLDeviceRequirements.cpp @@ -13,7 +13,6 @@ #include "llvm/IR/Module.h" #include "llvm/Support/PropertySetIO.h" -#include #include #include From 43bd4e80037e568bc85b457fa47c2c18dd104c3d Mon Sep 17 00:00:00 2001 From: "Cai, Justin" Date: Mon, 31 Jul 2023 09:28:43 -0700 Subject: [PATCH 04/13] Remove unused variable --- sycl/source/detail/program_manager/program_manager.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index 4738092972bee..70c9bc0402996 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -630,9 +630,6 @@ sycl::detail::pi::PiProgram ProgramManager::getBuiltPIProgram( getDeviceImage(KSId, Context, Device, JITCompilationIsRequired); // Check that device supports all aspects used by the kernel - const RTDeviceBinaryImage::PropertyRange &ARange = - Img.getDeviceRequirements(); - if (auto exception = checkDevSupportDeviceRequirements(Device, Img)) throw *exception; From 644bbca4a11b507e9f5667ff134e7fba1b8bff51 Mon Sep 17 00:00:00 2001 From: "Cai, Justin" Date: Thu, 3 Aug 2023 21:32:46 +0000 Subject: [PATCH 05/13] Address some review comments --- .../sycl-post-link/SYCLDeviceRequirements.cpp | 20 ++++++------------- 1 file changed, 6 insertions(+), 14 deletions(-) diff --git a/llvm/tools/sycl-post-link/SYCLDeviceRequirements.cpp b/llvm/tools/sycl-post-link/SYCLDeviceRequirements.cpp index 8f6d658b5ecd1..e0c359b3da34c 100644 --- a/llvm/tools/sycl-post-link/SYCLDeviceRequirements.cpp +++ b/llvm/tools/sycl-post-link/SYCLDeviceRequirements.cpp @@ -18,11 +18,6 @@ using namespace llvm; -auto ExtractIntegerFromMDNodeOperand(const MDOperand &operand) { - Constant *C = cast(operand.get())->getValue(); - return C->getUniqueInteger().getSExtValue(); -} - void llvm::getSYCLDeviceRequirements( const module_split::ModuleDesc &MD, std::map &Requirements) { @@ -45,9 +40,7 @@ void llvm::getSYCLDeviceRequirements( // Scan the module and if the metadata is present fill the corresponing // property with metadata's aspects constexpr std::pair ReqdMDs[] = { - {"sycl_used_aspects", "aspects"}, - {"sycl_fixed_targets", "fixed_target"}, - {"reqd_work_group_size", "reqd_work_group_size"}}; + {"sycl_used_aspects", "aspects"}, {"sycl_fixed_targets", "fixed_target"}}; for (const auto &[MDName, MappedName] : ReqdMDs) { std::set Values; @@ -78,15 +71,14 @@ void llvm::getSYCLDeviceRequirements( for (const Function &F : MD.getModule()) { if (const MDNode *MDN = F.getMetadata("reqd_work_group_size")) { llvm::SmallVector NewReqdWorkGroupSize; - for (const auto &operand : MDN->operands()) + for (size_t I = 0, E = MDN->getNumOperands(); I < E; ++I) NewReqdWorkGroupSize.push_back( - ExtractIntegerFromMDNodeOperand(operand)); + ExtractUnsignedIntegerFromMDNodeOperand(MDN, I)); if (!ReqdWorkGroupSize) ReqdWorkGroupSize = NewReqdWorkGroupSize; - else if (!std::equal(ReqdWorkGroupSize->begin(), ReqdWorkGroupSize->end(), - NewReqdWorkGroupSize.begin())) { - // two functions in the module have different required work group sizes - } + else + assert(std::equal(ReqdWorkGroupSize->begin(), ReqdWorkGroupSize->end(), + NewReqdWorkGroupSize.begin())); } } From 1137476a0f5e9c83f1a971ae160906ef703493a7 Mon Sep 17 00:00:00 2001 From: "Cai, Justin" Date: Thu, 3 Aug 2023 22:28:28 +0000 Subject: [PATCH 06/13] Add reqd_work_group_size_size_t --- .../sycl-post-link/SYCLDeviceRequirements.cpp | 6 +++--- .../detail/program_manager/program_manager.cpp | 18 +++++++++++------- .../large-reqd-work-group-size.cpp | 2 +- 3 files changed, 15 insertions(+), 11 deletions(-) diff --git a/llvm/tools/sycl-post-link/SYCLDeviceRequirements.cpp b/llvm/tools/sycl-post-link/SYCLDeviceRequirements.cpp index e0c359b3da34c..e68cc2ffc51d4 100644 --- a/llvm/tools/sycl-post-link/SYCLDeviceRequirements.cpp +++ b/llvm/tools/sycl-post-link/SYCLDeviceRequirements.cpp @@ -67,10 +67,10 @@ void llvm::getSYCLDeviceRequirements( std::vector(Values.begin(), Values.end()); } - std::optional> ReqdWorkGroupSize; + std::optional> ReqdWorkGroupSize; for (const Function &F : MD.getModule()) { if (const MDNode *MDN = F.getMetadata("reqd_work_group_size")) { - llvm::SmallVector NewReqdWorkGroupSize; + llvm::SmallVector NewReqdWorkGroupSize; for (size_t I = 0, E = MDN->getNumOperands(); I < E; ++I) NewReqdWorkGroupSize.push_back( ExtractUnsignedIntegerFromMDNodeOperand(MDN, I)); @@ -83,7 +83,7 @@ void llvm::getSYCLDeviceRequirements( } if (ReqdWorkGroupSize) - Requirements["reqd_work_group_size"] = std::vector( + Requirements["reqd_work_group_size_size_t"] = std::vector( ReqdWorkGroupSize->begin(), ReqdWorkGroupSize->end()); // There should only be at most one function with diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index 70c9bc0402996..2bac4fa04892b 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -2456,7 +2456,8 @@ checkDevSupportDeviceRequirements(const device &Dev, }; auto AspectsPropIt = getPropIt("aspects"); - auto ReqdWGSizePropIt = getPropIt("reqd_work_group_size"); + auto ReqdWGSizeUint32TPropIt = getPropIt("reqd_work_group_size"); + auto ReqdWGSizeSizeTPropIt = getPropIt("reqd_work_group_size_size_t"); auto ReqdSubGroupSizePropIt = getPropIt("reqd_sub_group_size"); // Checking if device supports defined aspects @@ -2475,18 +2476,21 @@ checkDevSupportDeviceRequirements(const device &Dev, } // Checking if device supports defined required work group size - if (ReqdWGSizePropIt) { - ByteArray ReqdWGSize = - DeviceBinaryProperty(*(ReqdWGSizePropIt.value())).asByteArray(); + if (ReqdWGSizeUint32TPropIt || ReqdWGSizeSizeTPropIt) { + bool usingSizeT = ReqdWGSizeSizeTPropIt.has_value(); + auto it = usingSizeT ? ReqdWGSizeSizeTPropIt : ReqdWGSizeUint32TPropIt; + + ByteArray ReqdWGSize = DeviceBinaryProperty(*(it.value())).asByteArray(); // Drop 8 bytes describing the size of the byte array. ReqdWGSize.dropBytes(8); - uint64_t ReqdWGSizeAllDimsTotal = 1; - std::vector ReqdWGSizeVec; + size_t ReqdWGSizeAllDimsTotal = 1; + std::vector ReqdWGSizeVec; int Dims = 0; while (!ReqdWGSize.empty()) { // The reqd_work_group_size data is stored as uint32_t's, // but we'll widen the result to uint64_t. - uint64_t SingleDimSize = ReqdWGSize.consume(); + size_t SingleDimSize = usingSizeT ? ReqdWGSize.consume() + : ReqdWGSize.consume(); if (auto res = multiply_with_overflow_check(ReqdWGSizeAllDimsTotal, SingleDimSize)) ReqdWGSizeAllDimsTotal = *res; diff --git a/sycl/test-e2e/OptionalKernelFeatures/large-reqd-work-group-size.cpp b/sycl/test-e2e/OptionalKernelFeatures/large-reqd-work-group-size.cpp index 077feb6d53cd2..c8969822e5f29 100644 --- a/sycl/test-e2e/OptionalKernelFeatures/large-reqd-work-group-size.cpp +++ b/sycl/test-e2e/OptionalKernelFeatures/large-reqd-work-group-size.cpp @@ -1,4 +1,4 @@ -// RUN: %{build} -o %t.out -fno-sycl-id-queries-fit-in-int -DNO_RANGE_GREATER_THAN_UINT32_MAX +// RUN: %{build} -o %t.out -fno-sycl-id-queries-fit-in-int // RUN: %{run} %t.out #include From 5d0f4252d089389d690eda653c5ab88d669eabb0 Mon Sep 17 00:00:00 2001 From: "Cai, Justin" Date: Fri, 4 Aug 2023 14:12:19 -0700 Subject: [PATCH 07/13] Remove ifndef --- .../OptionalKernelFeatures/large-reqd-work-group-size.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/sycl/test-e2e/OptionalKernelFeatures/large-reqd-work-group-size.cpp b/sycl/test-e2e/OptionalKernelFeatures/large-reqd-work-group-size.cpp index c8969822e5f29..059ee8f0de44e 100644 --- a/sycl/test-e2e/OptionalKernelFeatures/large-reqd-work-group-size.cpp +++ b/sycl/test-e2e/OptionalKernelFeatures/large-reqd-work-group-size.cpp @@ -51,15 +51,11 @@ int main(int argc, char *argv[]) { [[sycl::reqd_work_group_size(N, N, N)]] {}); }); - // TODO: Due to truncation issues, this test cannot pass yet. - // Enable this test once fixed. -#ifndef NO_RANGE_GREATER_THAN_UINT32_MAX throws_kernel_not_supported("uint32_max+2", [] { constexpr uint64_t N = 4294967297; q.parallel_for(nd_range<1>(N, N), [=](auto) [[sycl::reqd_work_group_size(N)]] {}); }); -#endif return n_fail; } From acac6e6547603b6d8fa45dd743523f1b675a3f77 Mon Sep 17 00:00:00 2001 From: "Cai, Justin" Date: Fri, 4 Aug 2023 14:13:14 -0700 Subject: [PATCH 08/13] Allow for PropertyValue to be constructed from other containers --- llvm/include/llvm/Support/PropertySetIO.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/include/llvm/Support/PropertySetIO.h b/llvm/include/llvm/Support/PropertySetIO.h index d878065809692..cb257ba7e5c77 100644 --- a/llvm/include/llvm/Support/PropertySetIO.h +++ b/llvm/include/llvm/Support/PropertySetIO.h @@ -81,8 +81,8 @@ class PropertyValue { PropertyValue(uint32_t Val) : Ty(UINT32), Val({Val}) {} PropertyValue(const byte *Data, SizeTy DataBitSize); - template - PropertyValue(const std::vector &Data) + template + PropertyValue(const C &Data) : PropertyValue(reinterpret_cast(Data.data()), Data.size() * sizeof(T) * /* bits in one byte */ 8) {} PropertyValue(const llvm::StringRef &Str) From 3a2d67660d682dc0ee902aa39a3430c272d3b7aa Mon Sep 17 00:00:00 2001 From: "Cai, Justin" Date: Fri, 4 Aug 2023 14:14:31 -0700 Subject: [PATCH 09/13] Use fixed sized integers --- .../reqd-work-group-size.ll | 10 +++++-- .../sycl-post-link/SYCLDeviceRequirements.cpp | 16 +++++----- .../program_manager/program_manager.cpp | 30 +++++++++++-------- 3 files changed, 35 insertions(+), 21 deletions(-) diff --git a/llvm/test/tools/sycl-post-link/device-requirements/reqd-work-group-size.ll b/llvm/test/tools/sycl-post-link/device-requirements/reqd-work-group-size.ll index defd6939691c2..4a440332a3634 100644 --- a/llvm/test/tools/sycl-post-link/device-requirements/reqd-work-group-size.ll +++ b/llvm/test/tools/sycl-post-link/device-requirements/reqd-work-group-size.ll @@ -25,13 +25,19 @@ ; RUN: FileCheck %s -input-file=%t_0.prop --check-prefix CHECK-PROP-AUTO-SPLIT-0 ; RUN: FileCheck %s -input-file=%t_1.prop --check-prefix CHECK-PROP-AUTO-SPLIT-1 +; TODO: Before intel/llvm#10620, the reqd_work_group_size attribute +; stores its values as uint32_t, but this needed to be expanded to +; uint64_t. However, this change did not happen in ABI-breaking +; window, so we attach the required work-group size as the +; reqd_work_group_size_uint64_t attribute. At the next ABI-breaking +; window, this can be changed back to reqd_work_group_size. ; CHECK-PROP-AUTO-SPLIT-0: [SYCL/device requirements] ; CHECK-PROP-AUTO-SPLIT-0-NEXT: aspects=2|AAAAAAAAAAA -; CHECK-PROP-AUTO-SPLIT-0-NEXT: reqd_work_group_size=2|gAAAAAAAAAAQAAAA +; CHECK-PROP-AUTO-SPLIT-0-NEXT: reqd_work_group_size_uint64_t=2|ABAAAAAAAAAQAAAAAAAAAA ; CHECK-PROP-AUTO-SPLIT-1: [SYCL/device requirements] ; CHECK-PROP-AUTO-SPLIT-1-NEXT: aspects=2|AAAAAAAAAAA -; CHECK-PROP-AUTO-SPLIT-1-NEXT: reqd_work_group_size=2|gAAAAAAAAAAIAAAA +; CHECK-PROP-AUTO-SPLIT-1-NEXT: reqd_work_group_size_uint64_t=2|ABAAAAAAAAAIAAAAAAAAAA ; ModuleID = '/tmp/source-5f7d0d.bc' source_filename = "llvm-link" diff --git a/llvm/tools/sycl-post-link/SYCLDeviceRequirements.cpp b/llvm/tools/sycl-post-link/SYCLDeviceRequirements.cpp index e68cc2ffc51d4..69ebd6220be74 100644 --- a/llvm/tools/sycl-post-link/SYCLDeviceRequirements.cpp +++ b/llvm/tools/sycl-post-link/SYCLDeviceRequirements.cpp @@ -43,7 +43,7 @@ void llvm::getSYCLDeviceRequirements( {"sycl_used_aspects", "aspects"}, {"sycl_fixed_targets", "fixed_target"}}; for (const auto &[MDName, MappedName] : ReqdMDs) { - std::set Values; + std::set Values; for (const Function &F : MD.getModule()) { if (const MDNode *MDN = F.getMetadata(MDName)) { for (size_t I = 0, E = MDN->getNumOperands(); I < E; ++I) { @@ -67,7 +67,7 @@ void llvm::getSYCLDeviceRequirements( std::vector(Values.begin(), Values.end()); } - std::optional> ReqdWorkGroupSize; + std::optional> ReqdWorkGroupSize; for (const Function &F : MD.getModule()) { if (const MDNode *MDN = F.getMetadata("reqd_work_group_size")) { llvm::SmallVector NewReqdWorkGroupSize; @@ -76,15 +76,17 @@ void llvm::getSYCLDeviceRequirements( ExtractUnsignedIntegerFromMDNodeOperand(MDN, I)); if (!ReqdWorkGroupSize) ReqdWorkGroupSize = NewReqdWorkGroupSize; - else - assert(std::equal(ReqdWorkGroupSize->begin(), ReqdWorkGroupSize->end(), - NewReqdWorkGroupSize.begin())); } } + // TODO: Before intel/llvm#10620, the reqd_work_group_size attribute + // stores its values as uint32_t, but this needed to be expanded to + // uint64_t. However, this change did not happen in ABI-breaking + // window, so we attach the required work-group size as the + // reqd_work_group_size_uint64_t attribute. At the next ABI-breaking + // window, this can be changed back to reqd_work_group_size. if (ReqdWorkGroupSize) - Requirements["reqd_work_group_size_size_t"] = std::vector( - ReqdWorkGroupSize->begin(), ReqdWorkGroupSize->end()); + Requirements["reqd_work_group_size_uint64_t"] = *ReqdWorkGroupSize; // There should only be at most one function with // intel_reqd_sub_group_size metadata when considering the entry diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index 2bac4fa04892b..fb15d7077edc4 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -2457,7 +2457,7 @@ checkDevSupportDeviceRequirements(const device &Dev, auto AspectsPropIt = getPropIt("aspects"); auto ReqdWGSizeUint32TPropIt = getPropIt("reqd_work_group_size"); - auto ReqdWGSizeSizeTPropIt = getPropIt("reqd_work_group_size_size_t"); + auto ReqdWGSizeUint64TTPropIt = getPropIt("reqd_work_group_size_uint64_t"); auto ReqdSubGroupSizePropIt = getPropIt("reqd_sub_group_size"); // Checking if device supports defined aspects @@ -2476,34 +2476,40 @@ checkDevSupportDeviceRequirements(const device &Dev, } // Checking if device supports defined required work group size - if (ReqdWGSizeUint32TPropIt || ReqdWGSizeSizeTPropIt) { - bool usingSizeT = ReqdWGSizeSizeTPropIt.has_value(); - auto it = usingSizeT ? ReqdWGSizeSizeTPropIt : ReqdWGSizeUint32TPropIt; + if (ReqdWGSizeUint32TPropIt || ReqdWGSizeUint64TTPropIt) { + bool usingUint64_t = ReqdWGSizeUint64TTPropIt.has_value(); + auto it = + usingUint64_t ? ReqdWGSizeUint64TTPropIt : ReqdWGSizeUint32TPropIt; ByteArray ReqdWGSize = DeviceBinaryProperty(*(it.value())).asByteArray(); // Drop 8 bytes describing the size of the byte array. ReqdWGSize.dropBytes(8); - size_t ReqdWGSizeAllDimsTotal = 1; - std::vector ReqdWGSizeVec; + uint64_t ReqdWGSizeAllDimsTotal = 1; + std::vector ReqdWGSizeVec; int Dims = 0; while (!ReqdWGSize.empty()) { // The reqd_work_group_size data is stored as uint32_t's, // but we'll widen the result to uint64_t. - size_t SingleDimSize = usingSizeT ? ReqdWGSize.consume() - : ReqdWGSize.consume(); + uint64_t SingleDimSize = usingUint64_t ? ReqdWGSize.consume() + : ReqdWGSize.consume(); if (auto res = multiply_with_overflow_check(ReqdWGSizeAllDimsTotal, SingleDimSize)) ReqdWGSizeAllDimsTotal = *res; else - return sycl::exception(sycl::errc::kernel_not_supported, - "Required work-group size is not supported" - " (too large)"); + return sycl::exception( + sycl::errc::kernel_not_supported, + "Required work-group size is not supported" + " (total number of work-items requested can't fit into size_t)"); ReqdWGSizeVec.push_back(SingleDimSize); Dims++; } + // The SingleDimSize was computed in an uint64_t; size_t does not + // necessarily have to be the same uint64_t (but should fit in an + // uint64_t). if (ReqdWGSizeAllDimsTotal > - Dev.get_info()) + Dev.get_info() || + ReqdWGSizeAllDimsTotal > std::numeric_limits::max()) return sycl::exception(sycl::errc::kernel_not_supported, "Required work-group size " + std::to_string(ReqdWGSizeAllDimsTotal) + From 0ed23fd72cf21399a3d98b521b836e3e7bc03715 Mon Sep 17 00:00:00 2001 From: "Cai, Justin" Date: Tue, 8 Aug 2023 07:26:50 -0700 Subject: [PATCH 10/13] Disable for hip --- .../OptionalKernelFeatures/large-reqd-work-group-size.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/sycl/test-e2e/OptionalKernelFeatures/large-reqd-work-group-size.cpp b/sycl/test-e2e/OptionalKernelFeatures/large-reqd-work-group-size.cpp index 059ee8f0de44e..73e50d34a2a68 100644 --- a/sycl/test-e2e/OptionalKernelFeatures/large-reqd-work-group-size.cpp +++ b/sycl/test-e2e/OptionalKernelFeatures/large-reqd-work-group-size.cpp @@ -1,3 +1,4 @@ +// UNSUPPORTED: hip // RUN: %{build} -o %t.out -fno-sycl-id-queries-fit-in-int // RUN: %{run} %t.out From 5d4620aff028fd929f2331e12ea4d6dda0b35150 Mon Sep 17 00:00:00 2001 From: "Cai, Justin" Date: Tue, 8 Aug 2023 07:37:24 -0700 Subject: [PATCH 11/13] Update comment and change variable name --- .../detail/program_manager/program_manager.cpp | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index fb15d7077edc4..51780bc7a64ce 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -2457,7 +2457,7 @@ checkDevSupportDeviceRequirements(const device &Dev, auto AspectsPropIt = getPropIt("aspects"); auto ReqdWGSizeUint32TPropIt = getPropIt("reqd_work_group_size"); - auto ReqdWGSizeUint64TTPropIt = getPropIt("reqd_work_group_size_uint64_t"); + auto ReqdWGSizeUint64TPropIt = getPropIt("reqd_work_group_size_uint64_t"); auto ReqdSubGroupSizePropIt = getPropIt("reqd_sub_group_size"); // Checking if device supports defined aspects @@ -2476,10 +2476,16 @@ checkDevSupportDeviceRequirements(const device &Dev, } // Checking if device supports defined required work group size - if (ReqdWGSizeUint32TPropIt || ReqdWGSizeUint64TTPropIt) { - bool usingUint64_t = ReqdWGSizeUint64TTPropIt.has_value(); + if (ReqdWGSizeUint32TPropIt || ReqdWGSizeUint64TPropIt) { + /// TODO: Before intel/llvm#10620, the reqd_work_group_size attribute + // stores its values as uint32_t, but this needed to be expanded to + // uint64_t. However, this change did not happen in ABI-breaking + // window, so we attach the required work-group size as the + // reqd_work_group_size_uint64_t attribute. At the next ABI-breaking + // window, we can remove the logic for the 32 bit property. + bool usingUint64_t = ReqdWGSizeUint64TPropIt.has_value(); auto it = - usingUint64_t ? ReqdWGSizeUint64TTPropIt : ReqdWGSizeUint32TPropIt; + usingUint64_t ? ReqdWGSizeUint64TPropIt : ReqdWGSizeUint32TPropIt; ByteArray ReqdWGSize = DeviceBinaryProperty(*(it.value())).asByteArray(); // Drop 8 bytes describing the size of the byte array. @@ -2488,8 +2494,6 @@ checkDevSupportDeviceRequirements(const device &Dev, std::vector ReqdWGSizeVec; int Dims = 0; while (!ReqdWGSize.empty()) { - // The reqd_work_group_size data is stored as uint32_t's, - // but we'll widen the result to uint64_t. uint64_t SingleDimSize = usingUint64_t ? ReqdWGSize.consume() : ReqdWGSize.consume(); if (auto res = multiply_with_overflow_check(ReqdWGSizeAllDimsTotal, From febffa0cb080910335da1d7c85a4d6afee434999 Mon Sep 17 00:00:00 2001 From: "Cai, Justin" Date: Tue, 8 Aug 2023 14:39:39 +0000 Subject: [PATCH 12/13] clang-format --- sycl/source/detail/program_manager/program_manager.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index 51780bc7a64ce..7fc32f513c982 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -2484,8 +2484,7 @@ checkDevSupportDeviceRequirements(const device &Dev, // reqd_work_group_size_uint64_t attribute. At the next ABI-breaking // window, we can remove the logic for the 32 bit property. bool usingUint64_t = ReqdWGSizeUint64TPropIt.has_value(); - auto it = - usingUint64_t ? ReqdWGSizeUint64TPropIt : ReqdWGSizeUint32TPropIt; + auto it = usingUint64_t ? ReqdWGSizeUint64TPropIt : ReqdWGSizeUint32TPropIt; ByteArray ReqdWGSize = DeviceBinaryProperty(*(it.value())).asByteArray(); // Drop 8 bytes describing the size of the byte array. From 12acd6fe864e3aacae354bcf1469a279169bc9bf Mon Sep 17 00:00:00 2001 From: "Cai, Justin" Date: Wed, 9 Aug 2023 15:12:34 +0000 Subject: [PATCH 13/13] Use numeric_limits --- .../OptionalKernelFeatures/large-reqd-work-group-size.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sycl/test-e2e/OptionalKernelFeatures/large-reqd-work-group-size.cpp b/sycl/test-e2e/OptionalKernelFeatures/large-reqd-work-group-size.cpp index 73e50d34a2a68..2e610c8e75510 100644 --- a/sycl/test-e2e/OptionalKernelFeatures/large-reqd-work-group-size.cpp +++ b/sycl/test-e2e/OptionalKernelFeatures/large-reqd-work-group-size.cpp @@ -34,26 +34,26 @@ void throws_kernel_not_supported(const char *test_name, FunctorT f) { int main(int argc, char *argv[]) { throws_kernel_not_supported("nd_range<1>", [] { - constexpr uint32_t N = 4294967295; + constexpr uint32_t N = std::numeric_limits::max(); q.parallel_for(nd_range<1>(N, N), [=](auto) [[sycl::reqd_work_group_size(N)]] {}); }); throws_kernel_not_supported("nd_range<2>", [] { - constexpr uint32_t N = 4294967295; + constexpr uint32_t N = std::numeric_limits::max(); q.parallel_for(nd_range<2>({N, N}, {N, N}), [=](auto) [[sycl::reqd_work_group_size(N, N)]] {}); }); throws_kernel_not_supported("nd_range<3>", [] { - constexpr uint32_t N = 4294967295; + constexpr uint32_t N = std::numeric_limits::max(); q.parallel_for(nd_range<3>({N, N, N}, {N, N, N}), [=](auto) [[sycl::reqd_work_group_size(N, N, N)]] {}); }); throws_kernel_not_supported("uint32_max+2", [] { - constexpr uint64_t N = 4294967297; + constexpr uint64_t N = std::numeric_limits::max() + uint64_t(2); q.parallel_for(nd_range<1>(N, N), [=](auto) [[sycl::reqd_work_group_size(N)]] {}); });