From f39488caecf03a54d6776d31f4ea7f430b7a8670 Mon Sep 17 00:00:00 2001 From: "Cai, Justin" Date: Mon, 22 Apr 2024 13:07:46 -0700 Subject: [PATCH 1/5] [SYCL] Do not attach reqd_work_group_size info when multiple are detected --- .../SYCLLowerIR/SYCLDeviceRequirements.cpp | 6 +++ .../Regression/no-split-reqd-wg-size.cpp | 39 +++++++++++++++++++ 2 files changed, 45 insertions(+) create mode 100644 sycl/test-e2e/Regression/no-split-reqd-wg-size.cpp diff --git a/llvm/lib/SYCLLowerIR/SYCLDeviceRequirements.cpp b/llvm/lib/SYCLLowerIR/SYCLDeviceRequirements.cpp index 6c0f1c952030b..c2699e48f8309 100644 --- a/llvm/lib/SYCLLowerIR/SYCLDeviceRequirements.cpp +++ b/llvm/lib/SYCLLowerIR/SYCLDeviceRequirements.cpp @@ -38,6 +38,7 @@ static llvm::StringRef ExtractStringFromMDNodeOperand(const MDNode *N, SYCLDeviceRequirements llvm::computeDeviceRequirements(const module_split::ModuleDesc &MD) { SYCLDeviceRequirements Reqs; + bool MultipleReqdWGSize = false; // Process all functions in the module for (const Function &F : MD.getModule()) { if (auto *MDN = F.getMetadata("sycl_used_aspects")) { @@ -64,6 +65,8 @@ llvm::computeDeviceRequirements(const module_split::ModuleDesc &MD) { ExtractUnsignedIntegerFromMDNodeOperand(MDN, I)); if (!Reqs.ReqdWorkGroupSize.has_value()) Reqs.ReqdWorkGroupSize = NewReqdWorkGroupSize; + else + MultipleReqdWGSize = true; } if (auto *MDN = F.getMetadata("sycl_joint_matrix")) { @@ -99,6 +102,9 @@ llvm::computeDeviceRequirements(const module_split::ModuleDesc &MD) { assert(*Reqs.SubGroupSize == static_cast(MDValue)); } } + + if (MultipleReqdWGSize) + Reqs.ReqdWorkGroupSize.reset(); return Reqs; } diff --git a/sycl/test-e2e/Regression/no-split-reqd-wg-size.cpp b/sycl/test-e2e/Regression/no-split-reqd-wg-size.cpp new file mode 100644 index 0000000000000..d32ac347f8ca1 --- /dev/null +++ b/sycl/test-e2e/Regression/no-split-reqd-wg-size.cpp @@ -0,0 +1,39 @@ +// This test checks that with -fsycl-device-code-split=off, two kernels +// with different reqd_work_group_size dimensions can be launched. +// RUN: %{build} -fsycl -fsycl-device-code-split=off -o %t.out +// RUN: %{run} %t.out +#include + +constexpr int WGSIZE = 4; + +using namespace sycl; + +void kernel_launch_2(queue &q) { + range<1> globalRange(WGSIZE); + range<1> localRange(WGSIZE); + nd_range<1> NDRange(globalRange, localRange); + q.submit([&](handler &cgh) { + cgh.parallel_for( + NDRange, [=](nd_item<1> it) [[sycl::reqd_work_group_size(WGSIZE)]] {}); + }).wait(); +} + +void kernel_launch(queue &q) { + range<2> globalRange(WGSIZE, WGSIZE); + range<2> localRange(WGSIZE, WGSIZE); + nd_range<2> NDRange(globalRange, localRange); + q.submit([&](handler &cgh) { + cgh.parallel_for( + NDRange, + [=](nd_item<2> it) [[sycl::reqd_work_group_size(WGSIZE, WGSIZE)]] {}); + }).wait(); +} + +int main(int argc, char **argv) { + queue q; + + kernel_launch_2(q); + kernel_launch(q); + + return 0; +} From 65248c609a0075c9dbe7af2db48b9f6046c06f3e Mon Sep 17 00:00:00 2001 From: "Cai, Justin" Date: Tue, 23 Apr 2024 07:46:04 -0700 Subject: [PATCH 2/5] Fix logic --- llvm/lib/SYCLLowerIR/SYCLDeviceRequirements.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/SYCLLowerIR/SYCLDeviceRequirements.cpp b/llvm/lib/SYCLLowerIR/SYCLDeviceRequirements.cpp index c2699e48f8309..0b5e12b0ed8a2 100644 --- a/llvm/lib/SYCLLowerIR/SYCLDeviceRequirements.cpp +++ b/llvm/lib/SYCLLowerIR/SYCLDeviceRequirements.cpp @@ -65,7 +65,7 @@ llvm::computeDeviceRequirements(const module_split::ModuleDesc &MD) { ExtractUnsignedIntegerFromMDNodeOperand(MDN, I)); if (!Reqs.ReqdWorkGroupSize.has_value()) Reqs.ReqdWorkGroupSize = NewReqdWorkGroupSize; - else + if (Reqs.ReqdWorkGroupSize != NewReqdWorkGroupSize) MultipleReqdWGSize = true; } From 9620b2e8c6b860a1d917a517646cda20b7bce28d Mon Sep 17 00:00:00 2001 From: "Cai, Justin" Date: Tue, 23 Apr 2024 13:13:14 -0700 Subject: [PATCH 3/5] Tidy up test --- .../Regression/no-split-reqd-wg-size.cpp | 41 +++++++------------ 1 file changed, 14 insertions(+), 27 deletions(-) diff --git a/sycl/test-e2e/Regression/no-split-reqd-wg-size.cpp b/sycl/test-e2e/Regression/no-split-reqd-wg-size.cpp index d32ac347f8ca1..a1d739fd7a224 100644 --- a/sycl/test-e2e/Regression/no-split-reqd-wg-size.cpp +++ b/sycl/test-e2e/Regression/no-split-reqd-wg-size.cpp @@ -1,39 +1,26 @@ -// This test checks that with -fsycl-device-code-split=off, two kernels +// This test checks that with -fsycl-device-code-split=off, kernels // with different reqd_work_group_size dimensions can be launched. + // RUN: %{build} -fsycl -fsycl-device-code-split=off -o %t.out // RUN: %{run} %t.out -#include -constexpr int WGSIZE = 4; +#include using namespace sycl; -void kernel_launch_2(queue &q) { - range<1> globalRange(WGSIZE); - range<1> localRange(WGSIZE); - nd_range<1> NDRange(globalRange, localRange); - q.submit([&](handler &cgh) { - cgh.parallel_for( - NDRange, [=](nd_item<1> it) [[sycl::reqd_work_group_size(WGSIZE)]] {}); - }).wait(); -} - -void kernel_launch(queue &q) { - range<2> globalRange(WGSIZE, WGSIZE); - range<2> localRange(WGSIZE, WGSIZE); - nd_range<2> NDRange(globalRange, localRange); - q.submit([&](handler &cgh) { - cgh.parallel_for( - NDRange, - [=](nd_item<2> it) [[sycl::reqd_work_group_size(WGSIZE, WGSIZE)]] {}); - }).wait(); -} +#define TEST(...) \ + { \ + range globalRange(__VA_ARGS__); \ + range localRange(__VA_ARGS__); \ + nd_range NDRange(globalRange, localRange); \ + q.parallel_for(NDRange, \ + [=](auto) [[sycl::reqd_work_group_size(__VA_ARGS__)]] {}); \ + } int main(int argc, char **argv) { queue q; - - kernel_launch_2(q); - kernel_launch(q); - + TEST(4); + TEST(4, 5); + TEST(4, 5, 6); return 0; } From 087ae066e5bd07c5aece146ad1d963628fa2c94f Mon Sep 17 00:00:00 2001 From: "Cai, Justin" Date: Wed, 24 Apr 2024 06:47:10 -0700 Subject: [PATCH 4/5] Use core.hpp and unsupport hip --- sycl/test-e2e/Regression/no-split-reqd-wg-size.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sycl/test-e2e/Regression/no-split-reqd-wg-size.cpp b/sycl/test-e2e/Regression/no-split-reqd-wg-size.cpp index a1d739fd7a224..fcc2764de8eaa 100644 --- a/sycl/test-e2e/Regression/no-split-reqd-wg-size.cpp +++ b/sycl/test-e2e/Regression/no-split-reqd-wg-size.cpp @@ -4,7 +4,9 @@ // RUN: %{build} -fsycl -fsycl-device-code-split=off -o %t.out // RUN: %{run} %t.out -#include +// UNSUPPORTED: hip + +#include using namespace sycl; From 50887d92fd13a20f6d443fc4cd6ec5284adbd243 Mon Sep 17 00:00:00 2001 From: "Cai, Justin" Date: Wed, 24 Apr 2024 06:57:49 -0700 Subject: [PATCH 5/5] Add comment --- llvm/lib/SYCLLowerIR/SYCLDeviceRequirements.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/llvm/lib/SYCLLowerIR/SYCLDeviceRequirements.cpp b/llvm/lib/SYCLLowerIR/SYCLDeviceRequirements.cpp index 0b5e12b0ed8a2..66ae2f10367b1 100644 --- a/llvm/lib/SYCLLowerIR/SYCLDeviceRequirements.cpp +++ b/llvm/lib/SYCLLowerIR/SYCLDeviceRequirements.cpp @@ -103,6 +103,11 @@ llvm::computeDeviceRequirements(const module_split::ModuleDesc &MD) { } } + // Usually, we would only expect one ReqdWGSize, as the module passed to + // this function would be split according to that. However, when splitting + // is disabled, this cannot be guaranteed. In this case, we reset the value, + // which makes so that no value is reqd_work_group_size data is attached in + // in the device image. if (MultipleReqdWGSize) Reqs.ReqdWorkGroupSize.reset(); return Reqs;