From 08a117890ee24db77eca0e60cc57f0adbac27426 Mon Sep 17 00:00:00 2001 From: "Wu, Yingcong" Date: Mon, 20 Jan 2025 07:42:13 +0100 Subject: [PATCH 1/6] update tag --- sycl/cmake/modules/FetchUnifiedRuntime.cmake | 2 +- sycl/cmake/modules/UnifiedRuntimeTag.cmake | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/sycl/cmake/modules/FetchUnifiedRuntime.cmake b/sycl/cmake/modules/FetchUnifiedRuntime.cmake index 72841724fa01d..3e51f5e73764c 100644 --- a/sycl/cmake/modules/FetchUnifiedRuntime.cmake +++ b/sycl/cmake/modules/FetchUnifiedRuntime.cmake @@ -116,7 +116,7 @@ if(SYCL_UR_USE_FETCH_CONTENT) CACHE PATH "Path to external '${name}' adapter source dir" FORCE) endfunction() - set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime.git") + set(UNIFIED_RUNTIME_REPO "https://github.com/yingcong-wu/unified-runtime.git") include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules/UnifiedRuntimeTag.cmake) set(UMF_BUILD_EXAMPLES OFF CACHE INTERNAL "EXAMPLES") diff --git a/sycl/cmake/modules/UnifiedRuntimeTag.cmake b/sycl/cmake/modules/UnifiedRuntimeTag.cmake index 9c18fb006a799..4683dbfcb3f83 100644 --- a/sycl/cmake/modules/UnifiedRuntimeTag.cmake +++ b/sycl/cmake/modules/UnifiedRuntimeTag.cmake @@ -4,4 +4,4 @@ # Date: Fri Jan 17 10:28:34 2025 +0000 # Merge pull request #2561 from Bensuo/ben/cmd-buffer-l0-fence # [L0][CMDBUF] Optimize fence/event waits during update -set(UNIFIED_RUNTIME_TAG 222e4b1d51536bb38e03e2000a79679af0a44a6d) +set(UNIFIED_RUNTIME_TAG yc-test-main/0120-on-shadow-per-type) From c25fe40a2adf7f354140ec45e78e5ac65112453c Mon Sep 17 00:00:00 2001 From: "Wu, Yingcong" Date: Mon, 20 Jan 2025 07:48:54 +0100 Subject: [PATCH 2/6] add test --- .../dependency/shadow-virtual-mem.cpp | 145 ++++++++++++++++++ 1 file changed, 145 insertions(+) create mode 100644 sycl/test-e2e/AddressSanitizer/dependency/shadow-virtual-mem.cpp diff --git a/sycl/test-e2e/AddressSanitizer/dependency/shadow-virtual-mem.cpp b/sycl/test-e2e/AddressSanitizer/dependency/shadow-virtual-mem.cpp new file mode 100644 index 0000000000000..c1cfcc5fa14b1 --- /dev/null +++ b/sycl/test-e2e/AddressSanitizer/dependency/shadow-virtual-mem.cpp @@ -0,0 +1,145 @@ +// REQUIRES: aspect-ext_oneapi_virtual_mem, linux, (gpu && level_zero) +// RUN: %{build} -o %t.out +// RUN: %{run} NEOReadDebugKeys=1 CreateMultipleRootDevices=2 %t.out + +// Test for the assumption behide DevASAN shadow memory for L0GPU , which is it +// is okay to access VirtualMem from different device/context. + +#include + +#include +#include + +namespace syclext = sycl::ext::oneapi::experimental; + +// Find the least common multiple of the context and device granularities. This +// value can be used for aligning both physical memory allocations and for +// reserving virtual memory ranges. +size_t GetLCMGranularity( + const sycl::device &Dev, const sycl::context &Ctx, + syclext::granularity_mode Gm = syclext::granularity_mode::recommended) { + size_t CtxGranularity = syclext::get_mem_granularity(Ctx, Gm); + size_t DevGranularity = syclext::get_mem_granularity(Dev, Ctx, Gm); + + size_t GCD = CtxGranularity; + size_t Rem = DevGranularity % GCD; + while (Rem != 0) { + std::swap(GCD, Rem); + Rem %= GCD; + } + return (DevGranularity / GCD) * CtxGranularity; +} + +size_t GetAlignedByteSize(const size_t UnalignedBytes, + const size_t AligmentGranularity) { + return ((UnalignedBytes + AligmentGranularity - 1) / AligmentGranularity) * + AligmentGranularity; +} + +bool check_for_42(std::vector &vec, int ref_result = 42) { + return vec[42] == ref_result; +} + +int main() { + // Get all available devices + auto devices = sycl::device::get_devices(); + + // Filter out GPU devices + std::vector gpuDevices; + for (const auto &dev : devices) { + if (dev.is_gpu()) { + gpuDevices.push_back(dev); + } + } + + // Check if we have at least two GPU devices + if (gpuDevices.size() < 2) { + std::cerr << "Less than two GPU devices found." << std::endl; + return 1; + } + + // Create contexts for the first two GPU devices + auto dev1 = gpuDevices[0]; + auto dev2 = gpuDevices[1]; + sycl::context context1_d1(dev1); + sycl::context context2_d1(dev1); + sycl::context context_d2(dev2); + + sycl::queue Q1_d1(context1_d1, dev1); + sycl::queue Q2_d1(context2_d1, dev1); + sycl::queue Q1_d2(context_d2, dev2); + + constexpr size_t NumberOfElements = 1000; + size_t BytesRequired = NumberOfElements * sizeof(int); + size_t UsedGranularity = GetLCMGranularity(dev1, context2_d1); + size_t AlignedByteSize = + ((BytesRequired + UsedGranularity - 1) / UsedGranularity) * + UsedGranularity; + printf("UsedGranularity: %zu\n", UsedGranularity); + printf("AlignedByteSize: %zu\n", AlignedByteSize); + + syclext::physical_mem NewPhysicalMem{dev1, context2_d1, AlignedByteSize}; + + uintptr_t VirtualMemoryPtr = + syclext::reserve_virtual_mem(0, AlignedByteSize, context2_d1); + + void *MappedPtr = + NewPhysicalMem.map(VirtualMemoryPtr, AlignedByteSize, + syclext::address_access_mode::read_write); + + int *DataPtr = reinterpret_cast(MappedPtr); + printf("DataPtr: %p\n", DataPtr); + + std::vector ResultHostData(NumberOfElements); + constexpr int ExpectedValueAfterFill = 42; + + { + // Normal case, same device, same context + sycl::buffer CheckBuffer(ResultHostData); + Q2_d1.fill(DataPtr, ExpectedValueAfterFill, NumberOfElements) + .wait_and_throw(); + Q2_d1.submit([&](sycl::handler &Handle) { + sycl::accessor A(CheckBuffer, Handle, sycl::write_only); + Handle.parallel_for(NumberOfElements, + [=](sycl::id<1> Idx) { A[Idx] = DataPtr[Idx]; }); + }); + Q2_d1.wait(); + } + assert(check_for_42(ResultHostData)); + ResultHostData = std::vector(NumberOfElements); + Q2_d1.fill(DataPtr, 0, NumberOfElements).wait_and_throw(); + assert(check_for_42(ResultHostData, 0)); + + { + // !!! Same device, different context !!! + sycl::buffer CheckBuffer(ResultHostData); + Q1_d1.fill(DataPtr, ExpectedValueAfterFill, NumberOfElements) + .wait_and_throw(); + Q1_d1.submit([&](sycl::handler &Handle) { + sycl::accessor A(CheckBuffer, Handle, sycl::write_only); + Handle.parallel_for(NumberOfElements, + [=](sycl::id<1> Idx) { A[Idx] = DataPtr[Idx]; }); + }); + Q1_d1.wait(); + } + assert(check_for_42(ResultHostData)); + ResultHostData = std::vector(NumberOfElements); + Q1_d1.fill(DataPtr, 0, NumberOfElements).wait_and_throw(); + assert(check_for_42(ResultHostData, 0)); + + { + // !!! Different device, different context !!! + sycl::buffer CheckBuffer(ResultHostData); + Q1_d2.fill(DataPtr, ExpectedValueAfterFill, NumberOfElements) + .wait_and_throw(); + Q1_d2.submit([&](sycl::handler &Handle) { + sycl::accessor A(CheckBuffer, Handle, sycl::write_only); + Handle.parallel_for(NumberOfElements, + [=](sycl::id<1> Idx) { A[Idx] = DataPtr[Idx]; }); + }); + Q1_d2.wait(); + } + assert(check_for_42(ResultHostData)); + + return 0; +} From c1fbd92085d3c6d69f462adc89d1c660e49ae543 Mon Sep 17 00:00:00 2001 From: Wu Yingcong Date: Mon, 20 Jan 2025 15:24:11 +0800 Subject: [PATCH 3/6] Update shadow-virtual-mem.cpp --- .../test-e2e/AddressSanitizer/dependency/shadow-virtual-mem.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sycl/test-e2e/AddressSanitizer/dependency/shadow-virtual-mem.cpp b/sycl/test-e2e/AddressSanitizer/dependency/shadow-virtual-mem.cpp index c1cfcc5fa14b1..024106f0f8330 100644 --- a/sycl/test-e2e/AddressSanitizer/dependency/shadow-virtual-mem.cpp +++ b/sycl/test-e2e/AddressSanitizer/dependency/shadow-virtual-mem.cpp @@ -5,7 +5,7 @@ // Test for the assumption behide DevASAN shadow memory for L0GPU , which is it // is okay to access VirtualMem from different device/context. -#include +#include #include #include From 72eb9ad8c766b3fe2bac7ba5a30740fe63a49c71 Mon Sep 17 00:00:00 2001 From: "Kenneth Benzie (Benie)" Date: Tue, 21 Jan 2025 13:31:28 +0000 Subject: [PATCH 4/6] [UR] Bump main tag to b074893e --- sycl/cmake/modules/FetchUnifiedRuntime.cmake | 2 +- sycl/cmake/modules/UnifiedRuntimeTag.cmake | 14 +++++++------- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/sycl/cmake/modules/FetchUnifiedRuntime.cmake b/sycl/cmake/modules/FetchUnifiedRuntime.cmake index 3e51f5e73764c..72841724fa01d 100644 --- a/sycl/cmake/modules/FetchUnifiedRuntime.cmake +++ b/sycl/cmake/modules/FetchUnifiedRuntime.cmake @@ -116,7 +116,7 @@ if(SYCL_UR_USE_FETCH_CONTENT) CACHE PATH "Path to external '${name}' adapter source dir" FORCE) endfunction() - set(UNIFIED_RUNTIME_REPO "https://github.com/yingcong-wu/unified-runtime.git") + set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime.git") include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules/UnifiedRuntimeTag.cmake) set(UMF_BUILD_EXAMPLES OFF CACHE INTERNAL "EXAMPLES") diff --git a/sycl/cmake/modules/UnifiedRuntimeTag.cmake b/sycl/cmake/modules/UnifiedRuntimeTag.cmake index 4683dbfcb3f83..8a8efd1510c58 100644 --- a/sycl/cmake/modules/UnifiedRuntimeTag.cmake +++ b/sycl/cmake/modules/UnifiedRuntimeTag.cmake @@ -1,7 +1,7 @@ -# commit 222e4b1d51536bb38e03e2000a79679af0a44a6d -# Merge: 30d183a0 28108a7e -# Author: Kenneth Benzie (Benie) -# Date: Fri Jan 17 10:28:34 2025 +0000 -# Merge pull request #2561 from Bensuo/ben/cmd-buffer-l0-fence -# [L0][CMDBUF] Optimize fence/event waits during update -set(UNIFIED_RUNTIME_TAG yc-test-main/0120-on-shadow-per-type) +# commit b074893e854d28141cd67bc5935ed87e47eb3bb6 +# Merge: 71a5eab0 128ea023 +# Author: Ross Brunton +# Date: Tue Jan 21 11:21:50 2025 +0000 +# Merge pull request #2539 from RossBrunton/ross/specconst +# Added `DEVICE_INFO_PROGRAM_SET_SPECIALIZATION_CONSTANTS` +set(UNIFIED_RUNTIME_TAG b074893e854d28141cd67bc5935ed87e47eb3bb6) From b1714471e4ea88f67e6bf6dfb2d1a5446ba7f83b Mon Sep 17 00:00:00 2001 From: "Kenneth Benzie (Benie)" Date: Tue, 21 Jan 2025 13:33:08 +0000 Subject: [PATCH 5/6] [UR] Bump main tag to 871061f1 --- sycl/cmake/modules/UnifiedRuntimeTag.cmake | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/sycl/cmake/modules/UnifiedRuntimeTag.cmake b/sycl/cmake/modules/UnifiedRuntimeTag.cmake index 8a8efd1510c58..17899bdefc3dc 100644 --- a/sycl/cmake/modules/UnifiedRuntimeTag.cmake +++ b/sycl/cmake/modules/UnifiedRuntimeTag.cmake @@ -1,7 +1,7 @@ -# commit b074893e854d28141cd67bc5935ed87e47eb3bb6 -# Merge: 71a5eab0 128ea023 -# Author: Ross Brunton -# Date: Tue Jan 21 11:21:50 2025 +0000 -# Merge pull request #2539 from RossBrunton/ross/specconst -# Added `DEVICE_INFO_PROGRAM_SET_SPECIALIZATION_CONSTANTS` -set(UNIFIED_RUNTIME_TAG b074893e854d28141cd67bc5935ed87e47eb3bb6) +# commit 871061f1aa3b8ade57e0a2ed63d8000e257548cc +# Merge: 262ec93e 7cca93f9 +# Author: Kenneth Benzie (Benie) +# Date: Tue Jan 21 13:26:45 2025 +0000 +# Merge pull request #2588 from kbenzie/benie/ci-delete-prerelease +# Remove the prerelease.yml job +set(UNIFIED_RUNTIME_TAG 871061f1aa3b8ade57e0a2ed63d8000e257548cc) From 667787c7df810cb68edb6e7fe97cd1fa9960fc47 Mon Sep 17 00:00:00 2001 From: "Wu, Yingcong" Date: Wed, 22 Jan 2025 02:59:00 +0100 Subject: [PATCH 6/6] trigger ci