diff --git a/llvm/include/llvm/Support/PropertySetIO.h b/llvm/include/llvm/Support/PropertySetIO.h index 13cb687f3b08b..d6c856af53623 100644 --- a/llvm/include/llvm/Support/PropertySetIO.h +++ b/llvm/include/llvm/Support/PropertySetIO.h @@ -211,6 +211,9 @@ class PropertySetRegistry { static constexpr char SYCL_HOST_PIPES[] = "SYCL/host pipes"; static constexpr char SYCL_VIRTUAL_FUNCTIONS[] = "SYCL/virtual functions"; + static constexpr char PROPERTY_REQD_WORK_GROUP_SIZE[] = + "reqd_work_group_size_uint64_t"; + /// Function for bulk addition of an entire property set in the given /// \p Category . template void add(StringRef Category, const MapTy &Props) { @@ -230,6 +233,17 @@ class PropertySetRegistry { PropSet.insert({PropName, PropertyValue(PropVal)}); } + void remove(StringRef Category, StringRef PropName) { + auto PropertySetIt = PropSetMap.find(Category); + if (PropertySetIt == PropSetMap.end()) + return; + auto &PropertySet = PropertySetIt->second; + auto PropIt = PropertySet.find(PropName); + if (PropIt == PropertySet.end()) + return; + PropertySet.erase(PropIt); + } + /// Parses from the given \p Buf a property set registry. static Expected> read(const MemoryBuffer *Buf); diff --git a/llvm/lib/SYCLLowerIR/SYCLDeviceRequirements.cpp b/llvm/lib/SYCLLowerIR/SYCLDeviceRequirements.cpp index b4a307707ced4..11f675625158d 100644 --- a/llvm/lib/SYCLLowerIR/SYCLDeviceRequirements.cpp +++ b/llvm/lib/SYCLLowerIR/SYCLDeviceRequirements.cpp @@ -40,7 +40,6 @@ SYCLDeviceRequirements llvm::computeDeviceRequirements(const Module &M, const SetVector &EntryPoints) { SYCLDeviceRequirements Reqs; - bool MultipleReqdWGSize = false; // Process all functions in the module for (const Function &F : M) { if (auto *MDN = F.getMetadata("sycl_used_aspects")) { @@ -81,8 +80,6 @@ llvm::computeDeviceRequirements(const Module &M, ExtractUnsignedIntegerFromMDNodeOperand(MDN, I)); if (!Reqs.ReqdWorkGroupSize.has_value()) Reqs.ReqdWorkGroupSize = NewReqdWorkGroupSize; - if (Reqs.ReqdWorkGroupSize != NewReqdWorkGroupSize) - MultipleReqdWGSize = true; } if (auto *MDN = F.getMetadata("sycl_joint_matrix")) { @@ -119,13 +116,6 @@ llvm::computeDeviceRequirements(const Module &M, } } - // Usually, we would only expect one ReqdWGSize, as the module passed to - // this function would be split according to that. However, when splitting - // is disabled, this cannot be guaranteed. In this case, we reset the value, - // which makes so that no value is reqd_work_group_size data is attached in - // in the device image. - if (MultipleReqdWGSize) - Reqs.ReqdWorkGroupSize.reset(); return Reqs; } @@ -152,7 +142,8 @@ std::map SYCLDeviceRequirements::asMap() const { // reqd_work_group_size_uint64_t attribute. At the next ABI-breaking // window, this can be changed back to reqd_work_group_size. if (ReqdWorkGroupSize.has_value()) - Requirements["reqd_work_group_size_uint64_t"] = *ReqdWorkGroupSize; + Requirements[util::PropertySetRegistry::PROPERTY_REQD_WORK_GROUP_SIZE] = + *ReqdWorkGroupSize; if (JointMatrix.has_value()) Requirements["joint_matrix"] = *JointMatrix; diff --git a/llvm/tools/sycl-post-link/sycl-post-link.cpp b/llvm/tools/sycl-post-link/sycl-post-link.cpp index e84faf464c42e..91083cea614c7 100644 --- a/llvm/tools/sycl-post-link/sycl-post-link.cpp +++ b/llvm/tools/sycl-post-link/sycl-post-link.cpp @@ -310,6 +310,15 @@ std::string saveModuleProperties(module_split::ModuleDesc &MD, auto PropSet = computeModuleProperties(MD.getModule(), MD.entries(), GlobProps); + // When the split mode is none, the required work group size will be added + // to the whole module, which will make the runtime unable to + // launch the other kernels in the module that have different + // required work group sizes or no required work group sizes. So we need to + // remove the required work group size metadata in this case. + if (SplitMode == module_split::SPLIT_NONE) + PropSet.remove(PropSetRegTy::SYCL_DEVICE_REQUIREMENTS, + PropSetRegTy::PROPERTY_REQD_WORK_GROUP_SIZE); + std::string NewSuff = Suff.str(); if (!Target.empty()) { PropSet.add(PropSetRegTy::SYCL_DEVICE_REQUIREMENTS, "compile_target", diff --git a/sycl/test-e2e/Regression/no-split-reqd-wg-size-2.cpp b/sycl/test-e2e/Regression/no-split-reqd-wg-size-2.cpp new file mode 100644 index 0000000000000..0ea4ed7a91775 --- /dev/null +++ b/sycl/test-e2e/Regression/no-split-reqd-wg-size-2.cpp @@ -0,0 +1,19 @@ +// This test checks that with -fsycl-device-code-split=off, kernels +// with different reqd_work_group_size dimensions can be launched. + +// RUN: %{build} -fsycl-device-code-split=off -o %t.out +// RUN: %{run} %t.out + +// UNSUPPORTED: hip + +#include + +using namespace sycl; + +int main(int argc, char **argv) { + queue q; + q.single_task([] {}); + q.parallel_for(range<2>(24, 1), + [=](auto) [[sycl::reqd_work_group_size(24, 1)]] {}); + return 0; +} diff --git a/sycl/test-e2e/Regression/no-split-reqd-wg-size.cpp b/sycl/test-e2e/Regression/no-split-reqd-wg-size.cpp index fcc2764de8eaa..f2edbae90710c 100644 --- a/sycl/test-e2e/Regression/no-split-reqd-wg-size.cpp +++ b/sycl/test-e2e/Regression/no-split-reqd-wg-size.cpp @@ -1,7 +1,7 @@ // This test checks that with -fsycl-device-code-split=off, kernels // with different reqd_work_group_size dimensions can be launched. -// RUN: %{build} -fsycl -fsycl-device-code-split=off -o %t.out +// RUN: %{build} -fsycl-device-code-split=off -o %t.out // RUN: %{run} %t.out // UNSUPPORTED: hip