Skip to content

[SYCL] Relax linking workaround and test SYCL interlinking #19171

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Jul 3, 2025
30 changes: 18 additions & 12 deletions sycl/source/detail/kernel_bundle_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -248,21 +248,26 @@ class kernel_bundle_impl

// Due to a bug in L0, specializations with conflicting IDs will overwrite
// each other when linked together, so to avoid this issue we link
// regular offline-compiled SYCL device images in separation.
// images with specialization constants in separation.
// TODO: Remove when spec const overwriting issue has been fixed in L0.
std::vector<const DevImgPlainWithDeps *> OfflineDeviceImages;
std::vector<const DevImgPlainWithDeps *> ImagesWithSpecConsts;
std::unordered_set<std::shared_ptr<device_image_impl>>
OfflineDeviceImageSet;
ImagesWithSpecConstsSet;
for (const kernel_bundle<bundle_state::object> &ObjectBundle :
ObjectBundles) {
for (const DevImgPlainWithDeps &DeviceImageWithDeps :
getSyclObjImpl(ObjectBundle)->MDeviceImages) {
if (getSyclObjImpl(DeviceImageWithDeps.getMain())->getOriginMask() &
ImageOriginSYCLOffline) {
OfflineDeviceImages.push_back(&DeviceImageWithDeps);
for (const device_image_plain &DevImg : DeviceImageWithDeps)
OfflineDeviceImageSet.insert(getSyclObjImpl(DevImg));
}
if (std::none_of(DeviceImageWithDeps.begin(), DeviceImageWithDeps.end(),
[](const device_image_plain &DevImg) {
const RTDeviceBinaryImage *BinImg =
getSyclObjImpl(DevImg)->get_bin_image_ref();
return BinImg && BinImg->getSpecConstants().size();
}))
continue;

ImagesWithSpecConsts.push_back(&DeviceImageWithDeps);
for (const device_image_plain &DevImg : DeviceImageWithDeps)
ImagesWithSpecConstsSet.insert(getSyclObjImpl(DevImg));
}
}

Expand All @@ -274,8 +279,8 @@ class kernel_bundle_impl
ObjectBundles)
for (const device_image_plain &DevImg :
getSyclObjImpl(ObjectBundle)->MUniqueDeviceImages)
if (OfflineDeviceImageSet.find(getSyclObjImpl(DevImg)) ==
OfflineDeviceImageSet.end())
if (ImagesWithSpecConstsSet.find(getSyclObjImpl(DevImg)) ==
ImagesWithSpecConstsSet.end())
DevImagesSet.insert(getSyclObjImpl(DevImg));
DevImages.reserve(DevImagesSet.size());
for (auto It = DevImagesSet.begin(); It != DevImagesSet.end();)
Expand Down Expand Up @@ -391,7 +396,8 @@ class kernel_bundle_impl
}

// ... And link the offline images in separation. (Workaround.)
for (const DevImgPlainWithDeps *DeviceImageWithDeps : OfflineDeviceImages) {
for (const DevImgPlainWithDeps *DeviceImageWithDeps :
ImagesWithSpecConsts) {
// Skip images which are not compatible with devices provided
if (std::none_of(MDevices.begin(), MDevices.end(),
[DeviceImageWithDeps](const device &Dev) {
Expand Down
5 changes: 4 additions & 1 deletion sycl/source/detail/program_manager/program_manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2598,7 +2598,10 @@ ProgramManager::getSYCLDeviceImagesWithCompatibleState(
std::lock_guard<std::mutex> KernelIDsGuard(m_KernelIDsMutex);
ImgInfo.KernelIDs = m_BinImg2KernelIDs[BinImage];
}
ImgInfo.Deps = collectDeviceImageDeps(*BinImage, {DevImpl});
ImgInfo.Deps =
collectDeviceImageDeps(*BinImage, {DevImpl},
/*ErrorOnUnresolvableImport=*/TargetState ==
bundle_state::executable);
}
const bundle_state ImgState = ImgInfo.State;
const std::shared_ptr<std::vector<sycl::kernel_id>> &ImageKernelIDs =
Expand Down
20 changes: 20 additions & 0 deletions sycl/test-e2e/SYCLBIN/Inputs/link_sycl_inline.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#include <sycl/sycl.hpp>

namespace syclext = sycl::ext::oneapi;
namespace syclexp = sycl::ext::oneapi::experimental;

typedef void (*FuncPtrT)(size_t *);

struct ArgsT {
size_t *Ptr;
FuncPtrT *FuncPtr;
};

SYCL_EXTERNAL size_t GetID() {
return syclext::this_work_item::get_nd_item<1>().get_global_id();
}

extern "C" SYCL_EXTERNAL SYCL_EXT_ONEAPI_FUNCTION_PROPERTY(
(syclexp::nd_range_kernel<1>)) void Kernel(ArgsT Args) {
(**Args.FuncPtr)(Args.Ptr);
}
82 changes: 82 additions & 0 deletions sycl/test-e2e/SYCLBIN/Inputs/link_sycl_inline.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
#include "common.hpp"

#include <sycl/usm.hpp>

namespace syclexp = sycl::ext::oneapi::experimental;

typedef void (*FuncPtrT)(size_t *);

struct ArgsT {
size_t *Ptr;
FuncPtrT *FuncPtr;
};

#ifdef __SYCL_DEVICE_ONLY__
SYCL_EXTERNAL size_t GetID();
#else
// Host-side code to avoid linker problems. Will never be called.
SYCL_EXTERNAL size_t GetID() { return 0; }
#endif

SYCL_EXTERNAL
void Func(size_t *Ptr) {
size_t GlobalID = GetID();
Ptr[GlobalID] = GlobalID;
}

SYCL_EXT_ONEAPI_FUNCTION_PROPERTY((syclexp::single_task_kernel))
void GetFuncPtr(ArgsT Args) { *Args.FuncPtr = Func; }

constexpr size_t N = 32;

int main(int argc, char *argv[]) {
assert(argc == 2);

sycl::queue Q;

int Failed = CommonLoadCheck(Q.get_context(), argv[1]);

#if defined(SYCLBIN_INPUT_STATE)
auto SYCLBINInput = syclexp::get_kernel_bundle<sycl::bundle_state::input>(
Q.get_context(), std::string{argv[1]});
auto SYCLBINObj = sycl::compile(SYCLBINInput);
#elif defined(SYCLBIN_OBJECT_STATE)
auto SYCLBINObj = syclexp::get_kernel_bundle<sycl::bundle_state::object>(
Q.get_context(), std::string{argv[1]});
#else // defined(SYCLBIN_EXECUTABLE_STATE)
#error "Test does not work with executable state."
#endif

auto KBObj =
syclexp::get_kernel_bundle<GetFuncPtr, sycl::bundle_state::object>(
Q.get_context());
auto KBExe = sycl::link({KBObj, SYCLBINObj});

ArgsT Args{};
Args.FuncPtr = sycl::malloc_shared<FuncPtrT>(N, Q);
Args.Ptr = sycl::malloc_shared<size_t>(N, Q);

sycl::kernel GetFuncPtrKern = KBExe.ext_oneapi_get_kernel<GetFuncPtr>();
Q.submit([&](sycl::handler &CGH) {
CGH.set_args(Args);
CGH.single_task(GetFuncPtrKern);
}).wait();

sycl::kernel Kern = KBExe.ext_oneapi_get_kernel("Kernel");
Q.submit([&](sycl::handler &CGH) {
CGH.set_args(Args);
CGH.parallel_for(sycl::nd_range{{N}, {N}}, Kern);
}).wait();

for (size_t I = 0; I < N; ++I) {
if (Args.Ptr[I] != I) {
std::cout << Args.Ptr[I] << " != " << I << std::endl;
++Failed;
}
}

sycl::free(Args.FuncPtr, Q);
sycl::free(Args.Ptr, Q);

return Failed;
}
31 changes: 31 additions & 0 deletions sycl/test-e2e/SYCLBIN/link_sycl_inline_input.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
//==-------- link_sycl_inline_input.cpp --- SYCLBIN extension tests --------==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

// REQUIRES: aspect-usm_shared_allocations

// -- Test for linking between inline SYCL code and SYCLBIN code.

// ptxas currently fails to compile images with unresolved symbols. Disable for
// other targets than SPIR-V until this has been resolved. (CMPLRLLVM-68810)
// Note: %{sycl_target_opts} should be added to the SYCLBIN compilation lines
// once fixed.
// REQUIRES: target-spir

// XFAIL: opencl && cpu
// XFAIL-TRACKER: CMPLRLLVM-68800

// XFAIL: linux && arch-intel_gpu_bmg_g21
// XFAIL-TRACKER: https://github.com/intel/llvm/issues/19258

// RUN: %clangxx --offload-new-driver -fsyclbin=input -fsycl-allow-device-image-dependencies -Xclang -fsycl-allow-func-ptr %S/Inputs/link_sycl_inline.cpp -o %t.syclbin
// RUN: %{build} -fsycl-allow-device-image-dependencies -o %t.out
// RUN: %{l0_leak_check} %{run} %t.out %t.syclbin

#define SYCLBIN_INPUT_STATE

#include "Inputs/link_sycl_inline.hpp"
32 changes: 32 additions & 0 deletions sycl/test-e2e/SYCLBIN/link_sycl_inline_object.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
//==-------- link_sycl_inline_object.cpp --- SYCLBIN extension tests
//--------==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

// REQUIRES: aspect-usm_shared_allocations

// -- Test for linking between inline SYCL code and SYCLBIN code.

// ptxas currently fails to compile images with unresolved symbols. Disable for
// other targets than SPIR-V until this has been resolved. (CMPLRLLVM-68810)
// Note: %{sycl_target_opts} should be added to the SYCLBIN compilation lines
// once fixed.
// REQUIRES: target-spir

// XFAIL: opencl && cpu
// XFAIL-TRACKER: CMPLRLLVM-68800

// XFAIL: linux && arch-intel_gpu_bmg_g21
// XFAIL-TRACKER: https://github.com/intel/llvm/issues/19258

// RUN: %clangxx --offload-new-driver -fsyclbin=object -fsycl-allow-device-image-dependencies -Xclang -fsycl-allow-func-ptr %S/Inputs/link_sycl_inline.cpp -o %t.syclbin
// RUN: %{build} -fsycl-allow-device-image-dependencies -o %t.out
// RUN: %{l0_leak_check} %{run} %t.out %t.syclbin

#define SYCLBIN_OBJECT_STATE

#include "Inputs/link_sycl_inline.hpp"
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
// CHECK-DAG: README.md
// CHECK-DAG: lit.cfg.py
//
// CHECK-NUM-MATCHES: 28
// CHECK-NUM-MATCHES: 29
//
// This test verifies that `<sycl/sycl.hpp>` isn't used in E2E tests. Instead,
// fine-grained includes should used, see
Expand Down
2 changes: 1 addition & 1 deletion sycl/unittests/kernel-and-program/OutOfResources.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,7 @@ TEST_P(OutOfResourcesTestSuite, urProgramLink) {
auto b3 = sycl::link({b1, b2});
EXPECT_FALSE(outOfResourcesToggle);
// one restart due to out of resources, one link per each of b1 and b2.
EXPECT_EQ(nProgramLink, 3);
EXPECT_EQ(nProgramLink, 2);
// no programs should be in the cache due to out of resources.
{
detail::KernelProgramCache::ProgramCache &Cache =
Expand Down