From 6d29c30aabda0e79e3edfef1f95f83ec242bb0e4 Mon Sep 17 00:00:00 2001 From: "Cai, Justin" Date: Mon, 2 Dec 2024 17:25:44 +0000 Subject: [PATCH 1/8] [SYCL] Fix bug when using no device split and reqd_work_group_size --- .../SYCLLowerIR/ComputeModuleRuntimeInfo.h | 3 ++- .../SYCLLowerIR/ComputeModuleRuntimeInfo.cpp | 13 +++++++++++-- .../lib/SYCLLowerIR/SYCLDeviceRequirements.cpp | 10 ---------- llvm/tools/sycl-post-link/sycl-post-link.cpp | 2 +- .../Regression/no-split-reqd-wg-size-2.cpp | 18 ++++++++++++++++++ 5 files changed, 32 insertions(+), 14 deletions(-) create mode 100644 sycl/test-e2e/Regression/no-split-reqd-wg-size-2.cpp diff --git a/llvm/include/llvm/SYCLLowerIR/ComputeModuleRuntimeInfo.h b/llvm/include/llvm/SYCLLowerIR/ComputeModuleRuntimeInfo.h index e7cff6c730051..fdbf2d88461b2 100644 --- a/llvm/include/llvm/SYCLLowerIR/ComputeModuleRuntimeInfo.h +++ b/llvm/include/llvm/SYCLLowerIR/ComputeModuleRuntimeInfo.h @@ -34,7 +34,8 @@ using EntryPointSet = SetVector; PropSetRegTy computeModuleProperties(const Module &M, const EntryPointSet &EntryPoints, - const GlobalBinImageProps &GlobProps); + const GlobalBinImageProps &GlobProps, + module_split::IRSplitMode SplitMode); std::string computeModuleSymbolTable(const Module &M, const EntryPointSet &EntryPoints); diff --git a/llvm/lib/SYCLLowerIR/ComputeModuleRuntimeInfo.cpp b/llvm/lib/SYCLLowerIR/ComputeModuleRuntimeInfo.cpp index cfea28538017c..364f4d000d651 100644 --- a/llvm/lib/SYCLLowerIR/ComputeModuleRuntimeInfo.cpp +++ b/llvm/lib/SYCLLowerIR/ComputeModuleRuntimeInfo.cpp @@ -152,7 +152,8 @@ std::optional getKernelSingleEltMetadata(const Function &Func, PropSetRegTy computeModuleProperties(const Module &M, const EntryPointSet &EntryPoints, - const GlobalBinImageProps &GlobProps) { + const GlobalBinImageProps &GlobProps, + module_split::IRSplitMode SplitMode) { PropSetRegTy PropSet; { @@ -161,8 +162,16 @@ PropSetRegTy computeModuleProperties(const Module &M, PropSet.add(PropSetRegTy::SYCL_DEVICELIB_REQ_MASK, RMEntry); } { + // Usually, we would only expect one ReqdWGSize, as the module passed to + // this function would be split according to that. However, when splitting + // is disabled, this cannot be guaranteed. In this case, we reset the value, + // which makes so that no value is reqd_work_group_size data is attached in + // in the device image. + SYCLDeviceRequirements DeviceReqs = computeDeviceRequirements(M, EntryPoints); + if (SplitMode == module_split::SPLIT_NONE) + DeviceReqs.ReqdWorkGroupSize.reset(); PropSet.add(PropSetRegTy::SYCL_DEVICE_REQUIREMENTS, - computeDeviceRequirements(M, EntryPoints).asMap()); + DeviceReqs.asMap()); } // extract spec constant maps per each module diff --git a/llvm/lib/SYCLLowerIR/SYCLDeviceRequirements.cpp b/llvm/lib/SYCLLowerIR/SYCLDeviceRequirements.cpp index b4a307707ced4..b69eef02d06ef 100644 --- a/llvm/lib/SYCLLowerIR/SYCLDeviceRequirements.cpp +++ b/llvm/lib/SYCLLowerIR/SYCLDeviceRequirements.cpp @@ -40,7 +40,6 @@ SYCLDeviceRequirements llvm::computeDeviceRequirements(const Module &M, const SetVector &EntryPoints) { SYCLDeviceRequirements Reqs; - bool MultipleReqdWGSize = false; // Process all functions in the module for (const Function &F : M) { if (auto *MDN = F.getMetadata("sycl_used_aspects")) { @@ -81,8 +80,6 @@ llvm::computeDeviceRequirements(const Module &M, ExtractUnsignedIntegerFromMDNodeOperand(MDN, I)); if (!Reqs.ReqdWorkGroupSize.has_value()) Reqs.ReqdWorkGroupSize = NewReqdWorkGroupSize; - if (Reqs.ReqdWorkGroupSize != NewReqdWorkGroupSize) - MultipleReqdWGSize = true; } if (auto *MDN = F.getMetadata("sycl_joint_matrix")) { @@ -119,13 +116,6 @@ llvm::computeDeviceRequirements(const Module &M, } } - // Usually, we would only expect one ReqdWGSize, as the module passed to - // this function would be split according to that. However, when splitting - // is disabled, this cannot be guaranteed. In this case, we reset the value, - // which makes so that no value is reqd_work_group_size data is attached in - // in the device image. - if (MultipleReqdWGSize) - Reqs.ReqdWorkGroupSize.reset(); return Reqs; } diff --git a/llvm/tools/sycl-post-link/sycl-post-link.cpp b/llvm/tools/sycl-post-link/sycl-post-link.cpp index e84faf464c42e..df6691c4190ae 100644 --- a/llvm/tools/sycl-post-link/sycl-post-link.cpp +++ b/llvm/tools/sycl-post-link/sycl-post-link.cpp @@ -308,7 +308,7 @@ std::string saveModuleProperties(module_split::ModuleDesc &MD, const GlobalBinImageProps &GlobProps, int I, StringRef Suff, StringRef Target = "") { auto PropSet = - computeModuleProperties(MD.getModule(), MD.entries(), GlobProps); + computeModuleProperties(MD.getModule(), MD.entries(), GlobProps, SplitMode); std::string NewSuff = Suff.str(); if (!Target.empty()) { diff --git a/sycl/test-e2e/Regression/no-split-reqd-wg-size-2.cpp b/sycl/test-e2e/Regression/no-split-reqd-wg-size-2.cpp new file mode 100644 index 0000000000000..9c71669c38eb9 --- /dev/null +++ b/sycl/test-e2e/Regression/no-split-reqd-wg-size-2.cpp @@ -0,0 +1,18 @@ +// This test checks that with -fsycl-device-code-split=off, kernels +// with different reqd_work_group_size dimensions can be launched. + +// RUN: %{build} -fsycl -fsycl-device-code-split=off -o %t.out +// RUN: %{run} %t.out + +// UNSUPPORTED: hip + +#include + +using namespace sycl; + +int main(int argc, char **argv) { + queue q; + q.single_task([]{}); + q.parallel_for(range<2>(24, 1), [=](auto) [[sycl::reqd_work_group_size(24,1)]] {}); + return 0; +} From 3ab0bdf6ef2ebce26f7f8610f93971c64420e1b6 Mon Sep 17 00:00:00 2001 From: "Cai, Justin" Date: Mon, 2 Dec 2024 20:30:32 +0000 Subject: [PATCH 2/8] Rework logic to not add another argument to computeModuleProperties --- .../llvm/SYCLLowerIR/ComputeModuleRuntimeInfo.h | 3 +-- llvm/include/llvm/Support/PropertySetIO.h | 11 +++++++++++ llvm/lib/SYCLLowerIR/ComputeModuleRuntimeInfo.cpp | 13 ++----------- llvm/tools/sycl-post-link/sycl-post-link.cpp | 13 +++++++++++-- 4 files changed, 25 insertions(+), 15 deletions(-) diff --git a/llvm/include/llvm/SYCLLowerIR/ComputeModuleRuntimeInfo.h b/llvm/include/llvm/SYCLLowerIR/ComputeModuleRuntimeInfo.h index fdbf2d88461b2..e7cff6c730051 100644 --- a/llvm/include/llvm/SYCLLowerIR/ComputeModuleRuntimeInfo.h +++ b/llvm/include/llvm/SYCLLowerIR/ComputeModuleRuntimeInfo.h @@ -34,8 +34,7 @@ using EntryPointSet = SetVector; PropSetRegTy computeModuleProperties(const Module &M, const EntryPointSet &EntryPoints, - const GlobalBinImageProps &GlobProps, - module_split::IRSplitMode SplitMode); + const GlobalBinImageProps &GlobProps); std::string computeModuleSymbolTable(const Module &M, const EntryPointSet &EntryPoints); diff --git a/llvm/include/llvm/Support/PropertySetIO.h b/llvm/include/llvm/Support/PropertySetIO.h index 13cb687f3b08b..bfb61b4884bec 100644 --- a/llvm/include/llvm/Support/PropertySetIO.h +++ b/llvm/include/llvm/Support/PropertySetIO.h @@ -230,6 +230,17 @@ class PropertySetRegistry { PropSet.insert({PropName, PropertyValue(PropVal)}); } + void remove(StringRef Category, StringRef PropName) { + auto PropertySetIt = PropSetMap.find(Category); + if (PropertySetIt == PropSetMap.end()) + return; + auto &PropertySet = PropertySetIt->second; + auto PropIt = PropertySet.find(PropName); + if (PropIt == PropertySet.end()) + return; + PropertySet.erase(PropIt); + } + /// Parses from the given \p Buf a property set registry. static Expected> read(const MemoryBuffer *Buf); diff --git a/llvm/lib/SYCLLowerIR/ComputeModuleRuntimeInfo.cpp b/llvm/lib/SYCLLowerIR/ComputeModuleRuntimeInfo.cpp index 364f4d000d651..cfea28538017c 100644 --- a/llvm/lib/SYCLLowerIR/ComputeModuleRuntimeInfo.cpp +++ b/llvm/lib/SYCLLowerIR/ComputeModuleRuntimeInfo.cpp @@ -152,8 +152,7 @@ std::optional getKernelSingleEltMetadata(const Function &Func, PropSetRegTy computeModuleProperties(const Module &M, const EntryPointSet &EntryPoints, - const GlobalBinImageProps &GlobProps, - module_split::IRSplitMode SplitMode) { + const GlobalBinImageProps &GlobProps) { PropSetRegTy PropSet; { @@ -162,16 +161,8 @@ PropSetRegTy computeModuleProperties(const Module &M, PropSet.add(PropSetRegTy::SYCL_DEVICELIB_REQ_MASK, RMEntry); } { - // Usually, we would only expect one ReqdWGSize, as the module passed to - // this function would be split according to that. However, when splitting - // is disabled, this cannot be guaranteed. In this case, we reset the value, - // which makes so that no value is reqd_work_group_size data is attached in - // in the device image. - SYCLDeviceRequirements DeviceReqs = computeDeviceRequirements(M, EntryPoints); - if (SplitMode == module_split::SPLIT_NONE) - DeviceReqs.ReqdWorkGroupSize.reset(); PropSet.add(PropSetRegTy::SYCL_DEVICE_REQUIREMENTS, - DeviceReqs.asMap()); + computeDeviceRequirements(M, EntryPoints).asMap()); } // extract spec constant maps per each module diff --git a/llvm/tools/sycl-post-link/sycl-post-link.cpp b/llvm/tools/sycl-post-link/sycl-post-link.cpp index df6691c4190ae..86688969ca201 100644 --- a/llvm/tools/sycl-post-link/sycl-post-link.cpp +++ b/llvm/tools/sycl-post-link/sycl-post-link.cpp @@ -308,15 +308,24 @@ std::string saveModuleProperties(module_split::ModuleDesc &MD, const GlobalBinImageProps &GlobProps, int I, StringRef Suff, StringRef Target = "") { auto PropSet = - computeModuleProperties(MD.getModule(), MD.entries(), GlobProps, SplitMode); + computeModuleProperties(MD.getModule(), MD.entries(), GlobProps); + + // When the split mode is none, the required work group size will be added + // to the whole module, which will make the runtime unable to + // launch the other kernels in the module that have different + // required work group sizes or no requried work group sizes. So we need to + // remove the required work group size metadata in this case. + if (SplitMode == module_split::SPLIT_NONE) + PropSet.remove(PropSetRegTy::SYCL_DEVICE_REQUIREMENTS, + "reqd_work_group_size_uint64_t"); std::string NewSuff = Suff.str(); if (!Target.empty()) { PropSet.add(PropSetRegTy::SYCL_DEVICE_REQUIREMENTS, "compile_target", Target); NewSuff += "_"; - NewSuff += Target; } + NewSuff += Target; std::error_code EC; std::string SCFile = makeResultFileName(".prop", I, NewSuff); From 79e097569daf7fe86dd34598b8e1aaff18d75fc8 Mon Sep 17 00:00:00 2001 From: "Cai, Justin" Date: Mon, 2 Dec 2024 20:31:27 +0000 Subject: [PATCH 3/8] Remove erroneous change --- llvm/tools/sycl-post-link/sycl-post-link.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/tools/sycl-post-link/sycl-post-link.cpp b/llvm/tools/sycl-post-link/sycl-post-link.cpp index 86688969ca201..35ef36cfaf9df 100644 --- a/llvm/tools/sycl-post-link/sycl-post-link.cpp +++ b/llvm/tools/sycl-post-link/sycl-post-link.cpp @@ -324,8 +324,8 @@ std::string saveModuleProperties(module_split::ModuleDesc &MD, PropSet.add(PropSetRegTy::SYCL_DEVICE_REQUIREMENTS, "compile_target", Target); NewSuff += "_"; + NewSuff += Target; } - NewSuff += Target; std::error_code EC; std::string SCFile = makeResultFileName(".prop", I, NewSuff); From c68fd1761079462f82470c76398807291e4fdc1f Mon Sep 17 00:00:00 2001 From: "Cai, Justin" Date: Mon, 2 Dec 2024 20:56:14 +0000 Subject: [PATCH 4/8] clang-format --- sycl/test-e2e/Regression/no-split-reqd-wg-size-2.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/sycl/test-e2e/Regression/no-split-reqd-wg-size-2.cpp b/sycl/test-e2e/Regression/no-split-reqd-wg-size-2.cpp index 9c71669c38eb9..f80a977083f78 100644 --- a/sycl/test-e2e/Regression/no-split-reqd-wg-size-2.cpp +++ b/sycl/test-e2e/Regression/no-split-reqd-wg-size-2.cpp @@ -12,7 +12,8 @@ using namespace sycl; int main(int argc, char **argv) { queue q; - q.single_task([]{}); - q.parallel_for(range<2>(24, 1), [=](auto) [[sycl::reqd_work_group_size(24,1)]] {}); + q.single_task([] {}); + q.parallel_for(range<2>(24, 1), + [=](auto) [[sycl::reqd_work_group_size(24, 1)]] {}); return 0; } From e78f9375fa6e3011b22aedca5ddd309c061a2c0b Mon Sep 17 00:00:00 2001 From: "Cai, Justin" Date: Wed, 4 Dec 2024 18:39:56 +0000 Subject: [PATCH 5/8] Address review comments --- llvm/include/llvm/Support/PropertySetIO.h | 8 ++++---- llvm/lib/SYCLLowerIR/SYCLDeviceRequirements.cpp | 3 ++- llvm/tools/sycl-post-link/sycl-post-link.cpp | 4 ++-- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/llvm/include/llvm/Support/PropertySetIO.h b/llvm/include/llvm/Support/PropertySetIO.h index bfb61b4884bec..1e098411c0884 100644 --- a/llvm/include/llvm/Support/PropertySetIO.h +++ b/llvm/include/llvm/Support/PropertySetIO.h @@ -211,6 +211,9 @@ class PropertySetRegistry { static constexpr char SYCL_HOST_PIPES[] = "SYCL/host pipes"; static constexpr char SYCL_VIRTUAL_FUNCTIONS[] = "SYCL/virtual functions"; + static constexpr char PROPERTY_REQD_WORK_GROUP_SIZE[] = + "reqd_work_group_size_uint64_t"; + /// Function for bulk addition of an entire property set in the given /// \p Category . template void add(StringRef Category, const MapTy &Props) { @@ -235,10 +238,7 @@ class PropertySetRegistry { if (PropertySetIt == PropSetMap.end()) return; auto &PropertySet = PropertySetIt->second; - auto PropIt = PropertySet.find(PropName); - if (PropIt == PropertySet.end()) - return; - PropertySet.erase(PropIt); + PropertySet.erase(PropertySet.find(PropName)); } /// Parses from the given \p Buf a property set registry. diff --git a/llvm/lib/SYCLLowerIR/SYCLDeviceRequirements.cpp b/llvm/lib/SYCLLowerIR/SYCLDeviceRequirements.cpp index b69eef02d06ef..11f675625158d 100644 --- a/llvm/lib/SYCLLowerIR/SYCLDeviceRequirements.cpp +++ b/llvm/lib/SYCLLowerIR/SYCLDeviceRequirements.cpp @@ -142,7 +142,8 @@ std::map SYCLDeviceRequirements::asMap() const { // reqd_work_group_size_uint64_t attribute. At the next ABI-breaking // window, this can be changed back to reqd_work_group_size. if (ReqdWorkGroupSize.has_value()) - Requirements["reqd_work_group_size_uint64_t"] = *ReqdWorkGroupSize; + Requirements[util::PropertySetRegistry::PROPERTY_REQD_WORK_GROUP_SIZE] = + *ReqdWorkGroupSize; if (JointMatrix.has_value()) Requirements["joint_matrix"] = *JointMatrix; diff --git a/llvm/tools/sycl-post-link/sycl-post-link.cpp b/llvm/tools/sycl-post-link/sycl-post-link.cpp index 35ef36cfaf9df..91083cea614c7 100644 --- a/llvm/tools/sycl-post-link/sycl-post-link.cpp +++ b/llvm/tools/sycl-post-link/sycl-post-link.cpp @@ -313,11 +313,11 @@ std::string saveModuleProperties(module_split::ModuleDesc &MD, // When the split mode is none, the required work group size will be added // to the whole module, which will make the runtime unable to // launch the other kernels in the module that have different - // required work group sizes or no requried work group sizes. So we need to + // required work group sizes or no required work group sizes. So we need to // remove the required work group size metadata in this case. if (SplitMode == module_split::SPLIT_NONE) PropSet.remove(PropSetRegTy::SYCL_DEVICE_REQUIREMENTS, - "reqd_work_group_size_uint64_t"); + PropSetRegTy::PROPERTY_REQD_WORK_GROUP_SIZE); std::string NewSuff = Suff.str(); if (!Target.empty()) { From 8db26d3906598cda1b3c692f65b4a62f2157f9bc Mon Sep 17 00:00:00 2001 From: "Cai, Justin" Date: Wed, 4 Dec 2024 20:03:52 +0000 Subject: [PATCH 6/8] Add back check --- llvm/include/llvm/Support/PropertySetIO.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/llvm/include/llvm/Support/PropertySetIO.h b/llvm/include/llvm/Support/PropertySetIO.h index 1e098411c0884..c1106bc4b5898 100644 --- a/llvm/include/llvm/Support/PropertySetIO.h +++ b/llvm/include/llvm/Support/PropertySetIO.h @@ -239,6 +239,10 @@ class PropertySetRegistry { return; auto &PropertySet = PropertySetIt->second; PropertySet.erase(PropertySet.find(PropName)); + auto PropIt = PropertySet.find(PropName); + if (PropIt == PropertySet.end()) + return; + PropertySet.erase(PropIt); } /// Parses from the given \p Buf a property set registry. From a6e5326be08dd7a1ed5c721e0cae9c0f4428660c Mon Sep 17 00:00:00 2001 From: "Cai, Justin" Date: Wed, 4 Dec 2024 12:11:43 -0800 Subject: [PATCH 7/8] Remove leftover line --- llvm/include/llvm/Support/PropertySetIO.h | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/include/llvm/Support/PropertySetIO.h b/llvm/include/llvm/Support/PropertySetIO.h index c1106bc4b5898..d6c856af53623 100644 --- a/llvm/include/llvm/Support/PropertySetIO.h +++ b/llvm/include/llvm/Support/PropertySetIO.h @@ -238,7 +238,6 @@ class PropertySetRegistry { if (PropertySetIt == PropSetMap.end()) return; auto &PropertySet = PropertySetIt->second; - PropertySet.erase(PropertySet.find(PropName)); auto PropIt = PropertySet.find(PropName); if (PropIt == PropertySet.end()) return; From 1b1ff0d060dd137ae9b07c02d5ad786ced6f4e37 Mon Sep 17 00:00:00 2001 From: "Cai, Justin" Date: Thu, 5 Dec 2024 07:53:26 -0800 Subject: [PATCH 8/8] Remove extra -fsycl --- sycl/test-e2e/Regression/no-split-reqd-wg-size-2.cpp | 2 +- sycl/test-e2e/Regression/no-split-reqd-wg-size.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/sycl/test-e2e/Regression/no-split-reqd-wg-size-2.cpp b/sycl/test-e2e/Regression/no-split-reqd-wg-size-2.cpp index f80a977083f78..0ea4ed7a91775 100644 --- a/sycl/test-e2e/Regression/no-split-reqd-wg-size-2.cpp +++ b/sycl/test-e2e/Regression/no-split-reqd-wg-size-2.cpp @@ -1,7 +1,7 @@ // This test checks that with -fsycl-device-code-split=off, kernels // with different reqd_work_group_size dimensions can be launched. -// RUN: %{build} -fsycl -fsycl-device-code-split=off -o %t.out +// RUN: %{build} -fsycl-device-code-split=off -o %t.out // RUN: %{run} %t.out // UNSUPPORTED: hip diff --git a/sycl/test-e2e/Regression/no-split-reqd-wg-size.cpp b/sycl/test-e2e/Regression/no-split-reqd-wg-size.cpp index fcc2764de8eaa..f2edbae90710c 100644 --- a/sycl/test-e2e/Regression/no-split-reqd-wg-size.cpp +++ b/sycl/test-e2e/Regression/no-split-reqd-wg-size.cpp @@ -1,7 +1,7 @@ // This test checks that with -fsycl-device-code-split=off, kernels // with different reqd_work_group_size dimensions can be launched. -// RUN: %{build} -fsycl -fsycl-device-code-split=off -o %t.out +// RUN: %{build} -fsycl-device-code-split=off -o %t.out // RUN: %{run} %t.out // UNSUPPORTED: hip