From 88c27ff4a032d98853409c53b7ed8bf7522bdf4a Mon Sep 17 00:00:00 2001 From: Ross Brunton Date: Wed, 6 Aug 2025 14:13:28 +0100 Subject: [PATCH 1/6] Handle `MAX_WORK_GROUP_SIZE` changes (#19695) Offload changed the semantics of `OL_DEVICE_INFO_MAX_WORK_GROUP_SIZE`, so update UR to follow. --- source/adapters/offload/device.cpp | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/source/adapters/offload/device.cpp b/source/adapters/offload/device.cpp index 9990258dae..1c026de279 100644 --- a/source/adapters/offload/device.cpp +++ b/source/adapters/offload/device.cpp @@ -85,6 +85,21 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, case UR_DEVICE_INFO_SUB_GROUP_SIZES_INTEL: // TODO: Implement subgroups in Offload return ReturnValue(1); + case UR_DEVICE_INFO_MAX_WORK_GROUP_SIZE: + if (pPropSizeRet) { + *pPropSizeRet = sizeof(size_t); + } + + if (pPropValue) { + uint32_t as32; + OL_RETURN_ON_ERR(olGetDeviceInfo(hDevice->OffloadDevice, + OL_DEVICE_INFO_MAX_WORK_GROUP_SIZE, + sizeof(as32), &as32)); + + *reinterpret_cast(pPropValue) = as32; + } + + return UR_RESULT_SUCCESS; case UR_DEVICE_INFO_MAX_WORK_ITEM_SIZES: { // OL dimensions are uint32_t while UR is size_t, so they need to be mapped if (pPropSizeRet) { @@ -94,9 +109,10 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, if (pPropValue) { ol_dimensions_t olVec; size_t *urVec = reinterpret_cast(pPropValue); - OL_RETURN_ON_ERR(olGetDeviceInfo(hDevice->OffloadDevice, - OL_DEVICE_INFO_MAX_WORK_GROUP_SIZE, - sizeof(olVec), &olVec)); + OL_RETURN_ON_ERR( + olGetDeviceInfo(hDevice->OffloadDevice, + OL_DEVICE_INFO_MAX_WORK_GROUP_SIZE_PER_DIMENSION, + sizeof(olVec), &olVec)); urVec[0] = olVec.x; urVec[1] = olVec.y; From 26222c4b3f96c45bf24c07587f90b0ed6397840e Mon Sep 17 00:00:00 2001 From: Maosu Zhao Date: Wed, 6 Aug 2025 21:37:54 +0800 Subject: [PATCH 2/6] Move AllocInfo into DeviceInfo to support indirect access (#19634) If we maintain AllocInfo in ContextInfo, this will cause the pointer which is allocated by another context can't be poisoned. --- .../layers/sanitizer/tsan/tsan_buffer.cpp | 6 ++-- .../sanitizer/tsan/tsan_interceptor.cpp | 35 ++++++++++++------- .../sanitizer/tsan/tsan_interceptor.hpp | 13 ++++--- 3 files changed, 33 insertions(+), 21 deletions(-) diff --git a/source/loader/layers/sanitizer/tsan/tsan_buffer.cpp b/source/loader/layers/sanitizer/tsan/tsan_buffer.cpp index c42a39d7cc..d95d1c6409 100644 --- a/source/loader/layers/sanitizer/tsan/tsan_buffer.cpp +++ b/source/loader/layers/sanitizer/tsan/tsan_buffer.cpp @@ -186,8 +186,10 @@ ur_result_t MemBuffer::getHandle(ur_device_handle_t Device, char *&Handle) { } ur_result_t MemBuffer::free() { - for (const auto &[_, Ptr] : Allocations) { - ur_result_t URes = getTsanInterceptor()->releaseMemory(Context, Ptr); + for (const auto &[Device, Ptr] : Allocations) { + ur_result_t URes = Device + ? getTsanInterceptor()->releaseMemory(Context, Ptr) + : getContext()->urDdiTable.USM.pfnFree(Context, Ptr); if (URes != UR_RESULT_SUCCESS) { UR_LOG_L(getContext()->logger, ERR, "Failed to free buffer handle {}", (void *)Ptr); diff --git a/source/loader/layers/sanitizer/tsan/tsan_interceptor.cpp b/source/loader/layers/sanitizer/tsan/tsan_interceptor.cpp index 86c12f9e89..3f9248489f 100644 --- a/source/loader/layers/sanitizer/tsan/tsan_interceptor.cpp +++ b/source/loader/layers/sanitizer/tsan/tsan_interceptor.cpp @@ -101,7 +101,7 @@ ur_result_t DeviceInfo::allocShadowMemory() { return UR_RESULT_SUCCESS; } -void ContextInfo::insertAllocInfo(TsanAllocInfo AI) { +void DeviceInfo::insertAllocInfo(TsanAllocInfo AI) { std::scoped_lock Guard(AllocInfosMutex); AllocInfos.insert(std::move(AI)); } @@ -153,7 +153,15 @@ ur_result_t TsanInterceptor::allocateMemory(ur_context_handle_t Context, auto AI = TsanAllocInfo{reinterpret_cast(Allocated), Size}; // For updating shadow memory - CI->insertAllocInfo(std::move(AI)); + if (Device) { + auto DI = getDeviceInfo(Device); + DI->insertAllocInfo(std::move(AI)); + } else { + for (const auto &Device : CI->DeviceList) { + auto DI = getDeviceInfo(Device); + DI->insertAllocInfo(AI); + } + } *ResultPtr = Allocated; return UR_RESULT_SUCCESS; @@ -163,11 +171,14 @@ ur_result_t TsanInterceptor::releaseMemory(ur_context_handle_t Context, void *Ptr) { auto CI = getContextInfo(Context); auto Addr = reinterpret_cast(Ptr); - { - std::scoped_lock Guard(CI->AllocInfosMutex); - auto It = std::find_if(CI->AllocInfos.begin(), CI->AllocInfos.end(), + + for (const auto &Device : CI->DeviceList) { + auto DI = getDeviceInfo(Device); + std::scoped_lock Guard(DI->AllocInfosMutex); + auto It = std::find_if(DI->AllocInfos.begin(), DI->AllocInfos.end(), [&](auto &P) { return P.AllocBegin == Addr; }); - CI->AllocInfos.erase(It); + if (It != DI->AllocInfos.end()) + DI->AllocInfos.erase(It); } UR_CALL(getContext()->urDdiTable.USM.pfnFree(Context, Ptr)); @@ -343,7 +354,7 @@ ur_result_t TsanInterceptor::preLaunchKernel(ur_kernel_handle_t Kernel, UR_CALL(prepareLaunch(CI, DI, InternalQueue, Kernel, LaunchInfo)); - UR_CALL(updateShadowMemory(CI, DI, Kernel, InternalQueue)); + UR_CALL(updateShadowMemory(DI, Kernel, InternalQueue)); UR_CALL(getContext()->urDdiTable.Queue.pfnFinish(InternalQueue)); @@ -470,12 +481,12 @@ ur_result_t TsanInterceptor::prepareLaunch(std::shared_ptr &, return UR_RESULT_SUCCESS; } -ur_result_t TsanInterceptor::updateShadowMemory( - std::shared_ptr &CI, std::shared_ptr &DI, - ur_kernel_handle_t Kernel, ur_queue_handle_t Queue) { +ur_result_t TsanInterceptor::updateShadowMemory(std::shared_ptr &DI, + ur_kernel_handle_t Kernel, + ur_queue_handle_t Queue) { auto &PI = getProgramInfo(GetProgram(Kernel)); - std::scoped_lock Guard(CI->AllocInfosMutex); - for (auto &AllocInfo : CI->AllocInfos) { + std::scoped_lock Guard(DI->AllocInfosMutex); + for (auto &AllocInfo : DI->AllocInfos) { UR_CALL(DI->Shadow->CleanShadow(Queue, AllocInfo.AllocBegin, AllocInfo.AllocSize)); } diff --git a/source/loader/layers/sanitizer/tsan/tsan_interceptor.hpp b/source/loader/layers/sanitizer/tsan/tsan_interceptor.hpp index e700e70294..eefcba4036 100644 --- a/source/loader/layers/sanitizer/tsan/tsan_interceptor.hpp +++ b/source/loader/layers/sanitizer/tsan/tsan_interceptor.hpp @@ -44,9 +44,14 @@ struct DeviceInfo { std::shared_ptr Shadow; + ur_shared_mutex AllocInfosMutex; + std::set AllocInfos; + explicit DeviceInfo(ur_device_handle_t Device) : Handle(Device) {} ur_result_t allocShadowMemory(); + + void insertAllocInfo(TsanAllocInfo AI); }; struct ContextInfo { @@ -56,9 +61,6 @@ struct ContextInfo { std::vector DeviceList; - ur_shared_mutex AllocInfosMutex; - std::set AllocInfos; - ur_shared_mutex InternalQueueMapMutex; std::unordered_map> InternalQueueMap; @@ -80,8 +82,6 @@ struct ContextInfo { ContextInfo &operator=(const ContextInfo &) = delete; - void insertAllocInfo(TsanAllocInfo AI); - ur_queue_handle_t getInternalQueue(ur_device_handle_t); }; @@ -297,8 +297,7 @@ class TsanInterceptor { ur_shared_mutex KernelLaunchMutex; private: - ur_result_t updateShadowMemory(std::shared_ptr &CI, - std::shared_ptr &DI, + ur_result_t updateShadowMemory(std::shared_ptr &DI, ur_kernel_handle_t Kernel, ur_queue_handle_t Queue); From bed7406e263880f89378597e197014088605a76a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C5=81ukasz=20Stolarczuk?= Date: Wed, 6 Aug 2025 16:04:22 +0200 Subject: [PATCH 3/6] Properly skip L0v2 kernels' tests if UR_DPCXX is missing (#19714) Without this change, when configuring UR and `UR_DPCXX` wasn't set I was getting an error about missing `generate_device_binaries` target. --- test/adapters/level_zero/CMakeLists.txt | 4 ++-- test/adapters/level_zero/v2/CMakeLists.txt | 19 +++++++++++++------ 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/test/adapters/level_zero/CMakeLists.txt b/test/adapters/level_zero/CMakeLists.txt index d43ca1cdb5..f0ff0ba890 100644 --- a/test/adapters/level_zero/CMakeLists.txt +++ b/test/adapters/level_zero/CMakeLists.txt @@ -1,4 +1,4 @@ -# Copyright (C) 2024 Intel Corporation +# Copyright (C) 2024-2025 Intel Corporation # Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. # See LICENSE.TXT # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -39,7 +39,7 @@ if(NOT UR_FOUND_DPCXX) # Tests that require kernels can't be used if we aren't generating # device binaries message(WARNING - "UR_DPCXX is not defined, skipping some adapter tests for ${adapter}") + "UR_DPCXX is not defined, skipping kernels' tests for L0") else() add_conformance_kernels_test(link urProgramLink.cpp) add_l0_loader_kernels_test(kernel_create urKernelCreateWithNativeHandle.cpp) diff --git a/test/adapters/level_zero/v2/CMakeLists.txt b/test/adapters/level_zero/v2/CMakeLists.txt index bd57d78459..bc2d0b0966 100644 --- a/test/adapters/level_zero/v2/CMakeLists.txt +++ b/test/adapters/level_zero/v2/CMakeLists.txt @@ -1,4 +1,4 @@ -# Copyright (C) 2024 Intel Corporation +# Copyright (C) 2024-2025 Intel Corporation # Part of the Unified-Runtime Project, under the Apache License v2.0 with LLVM Exceptions. # See LICENSE.TXT # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception @@ -67,8 +67,15 @@ add_l0_v2_devices_test(memory_residency ${PROJECT_SOURCE_DIR}/source/adapters/level_zero/ur_level_zero.cpp ) -add_l0_v2_kernels_test(deferred_kernel - deferred_kernel.cpp - ${PROJECT_SOURCE_DIR}/source/adapters/level_zero/common.cpp - ${PROJECT_SOURCE_DIR}/source/adapters/level_zero/ur_level_zero.cpp -) +if(NOT UR_FOUND_DPCXX) + # Tests that require kernels can't be used if we aren't generating + # device binaries + message(WARNING + "UR_DPCXX is not defined, skipping kernels' tests for L0v2") +else() + add_l0_v2_kernels_test(deferred_kernel + deferred_kernel.cpp + ${PROJECT_SOURCE_DIR}/source/adapters/level_zero/common.cpp + ${PROJECT_SOURCE_DIR}/source/adapters/level_zero/ur_level_zero.cpp + ) +endif() From daa43d8610b8e98e702eb01d3ed35c0888d773b8 Mon Sep 17 00:00:00 2001 From: Nick Sarnie Date: Wed, 6 Aug 2025 15:05:42 +0000 Subject: [PATCH 4/6] Support preinstalled GTest (#19720) Don't fetch and build it if a compatible version is installed on the system. GMock is considered part of GTest to CMake (and not even a component) but in some Linux distros its a separate package, so we need to handle the case where one is installed but not the other. Issue: https://github.com/intel/llvm/issues/19635 Signed-off-by: Sarnie, Nick --- test/CMakeLists.txt | 31 ++++++++++++++++++++++--------- 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 431b46d785..8ab7543fba 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -3,12 +3,23 @@ # See LICENSE.TXT # SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -include(FetchContent) -FetchContent_Declare( - googletest - GIT_REPOSITORY https://github.com/google/googletest.git - GIT_TAG v1.13.0 -) +set(GTEST_VER 1.13.0) + +find_package(GTest ${GTEST_VER} QUIET) + +if(GTest_FOUND AND NOT TARGET GTest::gmock) + message(WARNING "Found system install of GTest but not GMock. Building GTest and GMock from source") + set(GTest_FOUND FALSE) +endif() + +if(NOT GTest_FOUND) + include(FetchContent) + FetchContent_Declare( + googletest + GIT_REPOSITORY https://github.com/google/googletest.git + GIT_TAG v${GTEST_VER} + ) +endif() include(FindLit) @@ -22,8 +33,10 @@ if(CMAKE_CXX_COMPILER_ID STREQUAL "Clang" AND UR_DPCXX AND UR_TEST_FUZZTESTS) set(UR_FUZZTESTING_ENABLED ON) endif() -set(INSTALL_GTEST OFF) -FetchContent_MakeAvailable(googletest) +if(NOT GTest_FOUND) + set(INSTALL_GTEST OFF) + FetchContent_MakeAvailable(googletest) +endif() enable_testing() # At the time of writing this comment, this is only used for level_zero adapter testing. @@ -112,7 +125,7 @@ function(add_gtest_test name) add_testing_binary(${TEST_TARGET_NAME} ${ARGN}) target_link_libraries(${TEST_TARGET_NAME} PRIVATE - gmock + GTest::gmock GTest::gtest_main) endfunction() From 73b9e5e5307f0480c834830bb61e24d87a2ee107 Mon Sep 17 00:00:00 2001 From: Nick Sarnie Date: Wed, 6 Aug 2025 16:55:15 +0000 Subject: [PATCH 5/6] Improve handling of UMF and hdr_histogram (#19705) First, add support for pre-installed `hdr_histogram`. Second, make `UR_USE_EXTERNAL_UMF` on by default and have it fallback to `FetchContent` if it is not found instead of erroring. The user can disable it to force `FetchContent` even if they have a preinstalled version. We don't need repo or tag variables because the user can just set `FETCHCONTENT_SOURCE_DIR_UNIFIED-MEMORY-FRAMEWORK`, and this matches how we handle other deps. Issue: https://github.com/intel/llvm/issues/19635 --------- Signed-off-by: Sarnie, Nick --- source/common/CMakeLists.txt | 60 +++++++++++++++++++----------------- 1 file changed, 32 insertions(+), 28 deletions(-) diff --git a/source/common/CMakeLists.txt b/source/common/CMakeLists.txt index 4f20f4a29e..487c1ff9df 100644 --- a/source/common/CMakeLists.txt +++ b/source/common/CMakeLists.txt @@ -36,26 +36,6 @@ target_include_directories(ur_common PUBLIC $ ) -message(STATUS "Download Unified Memory Framework from github.com") -if (NOT DEFINED UMF_REPO) - set(UMF_REPO "https://github.com/oneapi-src/unified-memory-framework.git") -endif() - -if (NOT DEFINED UMF_TAG) - # commit 1de269c00e46b7cbdbafa2247812c8c4bb4ed4a5 - # Author: Łukasz Stolarczuk - # Date: Mon Jul 21 15:42:59 2025 +0200 - # 1.0.0 release - set(UMF_TAG v1.0.0) -endif() - -message(STATUS "Will fetch Unified Memory Framework from ${UMF_REPO}") - -include(FetchContent) -FetchContent_Declare(unified-memory-framework - GIT_REPOSITORY ${UMF_REPO} - GIT_TAG ${UMF_TAG} -) if (UR_STATIC_ADAPTER_L0) if (UMF_BUILD_SHARED_LIBRARY) @@ -63,14 +43,34 @@ if (UR_STATIC_ADAPTER_L0) set(UMF_BUILD_SHARED_LIBRARY OFF) endif() endif() + +set(UR_USE_EXTERNAL_UMF ON CACHE BOOL "Use a pre-built UMF if available") -set(UR_USE_EXTERNAL_UMF OFF CACHE BOOL "Use a pre-built UMF") - -if (UR_USE_EXTERNAL_UMF) - find_package(umf REQUIRED) +if(UR_USE_EXTERNAL_UMF) + find_package(umf 1.0.0 QUIET) +endif() +if(umf_FOUND) + message(STATUS "Using preinstalled UMF at ${umf_DIR}, ignoring UMF build related options") # Add an alias matching the FetchContent case add_library(umf::headers ALIAS umf::umf_headers) else() + set(UMF_REPO "https://github.com/oneapi-src/unified-memory-framework.git") + + # commit 1de269c00e46b7cbdbafa2247812c8c4bb4ed4a5 + # Author: Łukasz Stolarczuk + # Date: Mon Jul 21 15:42:59 2025 +0200 + # 1.0.0 release + set(UMF_TAG v1.0.0) + + if(NOT FETCHCONTENT_SOURCE_DIR_UNIFIED-MEMORY-FRAMEWORK) + message(STATUS "Will fetch Unified Memory Framework from ${UMF_REPO}") + endif() + + include(FetchContent) + FetchContent_Declare(unified-memory-framework + GIT_REPOSITORY ${UMF_REPO} + GIT_TAG ${UMF_TAG} + ) set(UMF_BUILD_TESTS OFF CACHE INTERNAL "Build UMF tests") set(UMF_BUILD_EXAMPLES OFF CACHE INTERNAL "Build UMF examples") set(UMF_BUILD_SHARED_LIBRARY ${UMF_BUILD_SHARED_LIBRARY} CACHE INTERNAL "Build UMF shared library") @@ -80,6 +80,10 @@ else() endif() if(UR_ENABLE_LATENCY_HISTOGRAM) + find_package(hdr_histogram QUIET) + if(hdr_histogram_FOUND) + set(hdr_histogram_SOURCE_DIR "${hdr_histogram_DIR}") + else() set(HDR_HISTOGRAM_BUILD_STATIC CACHE INTERNAL ON "") set(HDR_HISTOGRAM_BUILD_SHARED CACHE INTERNAL OFF "") @@ -91,10 +95,10 @@ if(UR_ENABLE_LATENCY_HISTOGRAM) FetchContent_MakeAvailable(hdr_histogram) FetchContent_GetProperties(hdr_histogram) - - target_link_libraries(ur_common PUBLIC hdr_histogram_static) - target_include_directories(ur_common PUBLIC $) - target_compile_options(ur_common PUBLIC -DUR_ENABLE_LATENCY_HISTOGRAM=1) + endif() + target_link_libraries(ur_common PUBLIC hdr_histogram_static) + target_include_directories(ur_common PUBLIC $) + target_compile_options(ur_common PUBLIC -DUR_ENABLE_LATENCY_HISTOGRAM=1) endif() target_link_libraries(ur_common PUBLIC From fee15a3c5096c26ca3a66addd2175fe7975dc4d6 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Thu, 7 Aug 2025 00:45:03 +0000 Subject: [PATCH 6/6] Update intel/llvm mirror base commit to 06407ab5 --- .github/intel-llvm-mirror-base-commit | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/intel-llvm-mirror-base-commit b/.github/intel-llvm-mirror-base-commit index e049aa99ec..eb32b8eefe 100644 --- a/.github/intel-llvm-mirror-base-commit +++ b/.github/intel-llvm-mirror-base-commit @@ -1 +1 @@ -542a00b45276bd9a24ba85c041b0d5535a896593 +06407ab5626faccc61fb8367ac1017667045f9e1