From a3f0fd8f186da52586deacd8dbf4de5324069097 Mon Sep 17 00:00:00 2001 From: Chris Perkins Date: Tue, 24 Aug 2021 13:33:15 -0700 Subject: [PATCH 1/5] seperate linking of kernel-bundles. when fsycl-device-code-split=per_kernel is used in conjunction with multiple kernels, then explicit sycl::link(sycl::compile(my-kernel-bundle)) will fail, because we link all device images in a kernel together. But, the device-images are independent, there is presently no known situation where they should be linked together. So this PR works around this limitation by linking them seperately. This may have to be revisited once sharedlibrary style linking is supported, but that is likely true whether this bug is fixed or not. Signed-off-by: Chris Perkins --- sycl/source/detail/kernel_bundle_impl.hpp | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/sycl/source/detail/kernel_bundle_impl.hpp b/sycl/source/detail/kernel_bundle_impl.hpp index daa16ef08a717..7769d51a723f8 100644 --- a/sycl/source/detail/kernel_bundle_impl.hpp +++ b/sycl/source/detail/kernel_bundle_impl.hpp @@ -192,7 +192,6 @@ class kernel_bundle_impl { // TODO: Unify with c'tor for sycl::comile and sycl::build by calling // sycl::join on vector of kernel_bundles - std::vector DeviceImages; for (const kernel_bundle &ObjectBundle : ObjectBundles) { for (const device_image_plain &DeviceImage : ObjectBundle) { @@ -205,13 +204,15 @@ class kernel_bundle_impl { })) continue; - DeviceImages.insert(DeviceImages.end(), DeviceImage); + const std::vector VectorOfOneImage{DeviceImage}; + std::vector LinkedResults = + detail::ProgramManager::getInstance().link(VectorOfOneImage, + MDevices, PropList); + MDeviceImages.insert(MDeviceImages.end(), LinkedResults.begin(), + LinkedResults.end()); } } - MDeviceImages = detail::ProgramManager::getInstance().link( - std::move(DeviceImages), MDevices, PropList); - for (const kernel_bundle &Bundle : ObjectBundles) { const KernelBundleImplPtr BundlePtr = getSyclObjImpl(Bundle); for (const std::pair> From dca69d69f6586477f38df676fc74fc817e2f273f Mon Sep 17 00:00:00 2001 From: Chris Perkins Date: Mon, 30 Aug 2021 12:38:47 -0700 Subject: [PATCH 2/5] added test to cover linking multiple kernels. This depends upon PR 4407 which is not yet merged. Signed-off-by: Chris Perkins --- .../basic_tests/multiple-kernel-linking.cpp | 62 +++++++++++++++++++ 1 file changed, 62 insertions(+) create mode 100644 sycl/test/on-device/basic_tests/multiple-kernel-linking.cpp diff --git a/sycl/test/on-device/basic_tests/multiple-kernel-linking.cpp b/sycl/test/on-device/basic_tests/multiple-kernel-linking.cpp new file mode 100644 index 0000000000000..997813c30a8f5 --- /dev/null +++ b/sycl/test/on-device/basic_tests/multiple-kernel-linking.cpp @@ -0,0 +1,62 @@ +// RUN: %clangxx -fsycl -fno-sycl-early-optimizations -fsycl-device-code-split=per_kernel %s -o %t_per_kernel.out +// RUN: %clangxx -fsycl -fno-sycl-early-optimizations -fsycl-device-code-split=per_source %s -o %t_per_source.out +// RUN: %clangxx -fsycl -fno-sycl-early-optimizations -fsycl-device-code-split=off %s -o %t_off.out +// RUN: %clangxx -fsycl -fno-sycl-early-optimizations -fsycl-device-code-split=auto %s -o %t_auto.out +// RUN: %GPU_RUN_PLACEHOLDER %t_per_kernel.out %GPU_CHECK_PLACEHOLDER +// RUN: %CPU_RUN_PLACEHOLDER %t_per_kernel.out %CPU_CHECK_PLACEHOLDER +// RUN: %ACC_RUN_PLACEHOLDER %t_per_kernel.out %ACC_CHECK_PLACEHOLDER + +// RUN: %GPU_RUN_PLACEHOLDER %t_per_source.out %GPU_CHECK_PLACEHOLDER +// RUN: %CPU_RUN_PLACEHOLDER %t_per_source.out %CPU_CHECK_PLACEHOLDER +// RUN: %ACC_RUN_PLACEHOLDER %t_per_source.out %ACC_CHECK_PLACEHOLDER + +// RUN: %GPU_RUN_PLACEHOLDER %t_auto.out %GPU_CHECK_PLACEHOLDER +// RUN: %CPU_RUN_PLACEHOLDER %t_auto.out %CPU_CHECK_PLACEHOLDER +// RUN: %ACC_RUN_PLACEHOLDER %t_auto.out %ACC_CHECK_PLACEHOLDER + +// RUN: %GPU_RUN_PLACEHOLDER %t_off.out %GPU_CHECK_PLACEHOLDER +// RUN: %CPU_RUN_PLACEHOLDER %t_off.out %CPU_CHECK_PLACEHOLDER +// RUN: %ACC_RUN_PLACEHOLDER %t_off.out %ACC_CHECK_PLACEHOLDER + +#include + +using namespace sycl; + +// This function is used by two different kernels. +// We want to ensure that it does not lead so multiple symbol collision +// when building an executable via sycl::compile and sycl::link. +int foo(int a) { return a + 1; } + +template class kernel_name {}; + +int main() { + try { + queue q; + auto input = get_kernel_bundle( + q.get_context(), {q.get_device()}, + {get_kernel_id>(), get_kernel_id>()}); + + auto compiled = sycl::compile(input); + kernel_bundle linked = sycl::link(compiled); + + buffer b(range{1}); + q.submit([&](handler &cgh) { + cgh.use_kernel_bundle(linked); + auto acc = b.get_access(cgh); + cgh.single_task>([=]() { acc[0] = foo(acc[0]); }); + }); + + q.submit([&](handler &cgh) { + auto acc = b.get_access(cgh); + cgh.single_task>([=]() { acc[0] = foo(acc[0]); }); + }); + } catch (exception &e) { + std::cout << "Exception: " << e.what() << std::endl; + return 1; + } + + std::cout << "OK"; + return 0; +} + +//CHECK: OK \ No newline at end of file From ce7435b847ad02c55c0940a3e7561cbecb193605 Mon Sep 17 00:00:00 2001 From: Chris Perkins Date: Thu, 2 Sep 2021 14:29:22 -0700 Subject: [PATCH 3/5] moving test to llvm-test-suite Signed-off-by: Chris Perkins --- .../basic_tests/multiple-kernel-linking.cpp | 62 ------------------- 1 file changed, 62 deletions(-) delete mode 100644 sycl/test/on-device/basic_tests/multiple-kernel-linking.cpp diff --git a/sycl/test/on-device/basic_tests/multiple-kernel-linking.cpp b/sycl/test/on-device/basic_tests/multiple-kernel-linking.cpp deleted file mode 100644 index 997813c30a8f5..0000000000000 --- a/sycl/test/on-device/basic_tests/multiple-kernel-linking.cpp +++ /dev/null @@ -1,62 +0,0 @@ -// RUN: %clangxx -fsycl -fno-sycl-early-optimizations -fsycl-device-code-split=per_kernel %s -o %t_per_kernel.out -// RUN: %clangxx -fsycl -fno-sycl-early-optimizations -fsycl-device-code-split=per_source %s -o %t_per_source.out -// RUN: %clangxx -fsycl -fno-sycl-early-optimizations -fsycl-device-code-split=off %s -o %t_off.out -// RUN: %clangxx -fsycl -fno-sycl-early-optimizations -fsycl-device-code-split=auto %s -o %t_auto.out -// RUN: %GPU_RUN_PLACEHOLDER %t_per_kernel.out %GPU_CHECK_PLACEHOLDER -// RUN: %CPU_RUN_PLACEHOLDER %t_per_kernel.out %CPU_CHECK_PLACEHOLDER -// RUN: %ACC_RUN_PLACEHOLDER %t_per_kernel.out %ACC_CHECK_PLACEHOLDER - -// RUN: %GPU_RUN_PLACEHOLDER %t_per_source.out %GPU_CHECK_PLACEHOLDER -// RUN: %CPU_RUN_PLACEHOLDER %t_per_source.out %CPU_CHECK_PLACEHOLDER -// RUN: %ACC_RUN_PLACEHOLDER %t_per_source.out %ACC_CHECK_PLACEHOLDER - -// RUN: %GPU_RUN_PLACEHOLDER %t_auto.out %GPU_CHECK_PLACEHOLDER -// RUN: %CPU_RUN_PLACEHOLDER %t_auto.out %CPU_CHECK_PLACEHOLDER -// RUN: %ACC_RUN_PLACEHOLDER %t_auto.out %ACC_CHECK_PLACEHOLDER - -// RUN: %GPU_RUN_PLACEHOLDER %t_off.out %GPU_CHECK_PLACEHOLDER -// RUN: %CPU_RUN_PLACEHOLDER %t_off.out %CPU_CHECK_PLACEHOLDER -// RUN: %ACC_RUN_PLACEHOLDER %t_off.out %ACC_CHECK_PLACEHOLDER - -#include - -using namespace sycl; - -// This function is used by two different kernels. -// We want to ensure that it does not lead so multiple symbol collision -// when building an executable via sycl::compile and sycl::link. -int foo(int a) { return a + 1; } - -template class kernel_name {}; - -int main() { - try { - queue q; - auto input = get_kernel_bundle( - q.get_context(), {q.get_device()}, - {get_kernel_id>(), get_kernel_id>()}); - - auto compiled = sycl::compile(input); - kernel_bundle linked = sycl::link(compiled); - - buffer b(range{1}); - q.submit([&](handler &cgh) { - cgh.use_kernel_bundle(linked); - auto acc = b.get_access(cgh); - cgh.single_task>([=]() { acc[0] = foo(acc[0]); }); - }); - - q.submit([&](handler &cgh) { - auto acc = b.get_access(cgh); - cgh.single_task>([=]() { acc[0] = foo(acc[0]); }); - }); - } catch (exception &e) { - std::cout << "Exception: " << e.what() << std::endl; - return 1; - } - - std::cout << "OK"; - return 0; -} - -//CHECK: OK \ No newline at end of file From 9cf1a1be1af5e2bee3ae6e17457d4345db1d0371 Mon Sep 17 00:00:00 2001 From: Chris Perkins Date: Fri, 3 Sep 2021 12:56:52 -0700 Subject: [PATCH 4/5] inserted comment Signed-off-by: Chris Perkins --- sycl/source/detail/kernel_bundle_impl.hpp | 1 + 1 file changed, 1 insertion(+) diff --git a/sycl/source/detail/kernel_bundle_impl.hpp b/sycl/source/detail/kernel_bundle_impl.hpp index 7769d51a723f8..a669db60b5ae5 100644 --- a/sycl/source/detail/kernel_bundle_impl.hpp +++ b/sycl/source/detail/kernel_bundle_impl.hpp @@ -192,6 +192,7 @@ class kernel_bundle_impl { // TODO: Unify with c'tor for sycl::comile and sycl::build by calling // sycl::join on vector of kernel_bundles + // The loop below just links each device image separately, not linking any two device images together. This is correct so long as each device image has no unresolved symbols. That's the case when device images are created from generic SYCL APIs. There's no way in generic SYCL to create a kernel which references an undefined symbol. If we decide in the future to allow a backend interop API to create a "sycl::kernel_bundle" that references undefined symbols, then the logic in this loop will need to be changed. for (const kernel_bundle &ObjectBundle : ObjectBundles) { for (const device_image_plain &DeviceImage : ObjectBundle) { From f31d62f6831f8dca6d12ec20c8757cabfc2e1c56 Mon Sep 17 00:00:00 2001 From: Chris Perkins Date: Fri, 3 Sep 2021 13:43:45 -0700 Subject: [PATCH 5/5] overlooked clang-format --- sycl/source/detail/kernel_bundle_impl.hpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/sycl/source/detail/kernel_bundle_impl.hpp b/sycl/source/detail/kernel_bundle_impl.hpp index a669db60b5ae5..9d66f9a24df55 100644 --- a/sycl/source/detail/kernel_bundle_impl.hpp +++ b/sycl/source/detail/kernel_bundle_impl.hpp @@ -192,7 +192,13 @@ class kernel_bundle_impl { // TODO: Unify with c'tor for sycl::comile and sycl::build by calling // sycl::join on vector of kernel_bundles - // The loop below just links each device image separately, not linking any two device images together. This is correct so long as each device image has no unresolved symbols. That's the case when device images are created from generic SYCL APIs. There's no way in generic SYCL to create a kernel which references an undefined symbol. If we decide in the future to allow a backend interop API to create a "sycl::kernel_bundle" that references undefined symbols, then the logic in this loop will need to be changed. + // The loop below just links each device image separately, not linking any + // two device images together. This is correct so long as each device image + // has no unresolved symbols. That's the case when device images are created + // from generic SYCL APIs. There's no way in generic SYCL to create a kernel + // which references an undefined symbol. If we decide in the future to allow + // a backend interop API to create a "sycl::kernel_bundle" that references + // undefined symbols, then the logic in this loop will need to be changed. for (const kernel_bundle &ObjectBundle : ObjectBundles) { for (const device_image_plain &DeviceImage : ObjectBundle) {