diff --git a/sycl/cmake/modules/UnifiedRuntimeTag.cmake b/sycl/cmake/modules/UnifiedRuntimeTag.cmake index 2bfa1cbef4657..17899bdefc3dc 100644 --- a/sycl/cmake/modules/UnifiedRuntimeTag.cmake +++ b/sycl/cmake/modules/UnifiedRuntimeTag.cmake @@ -1,8 +1,7 @@ -# commit b074893e854d28141cd67bc5935ed87e47eb3bb6 -# Merge: 71a5eab0 128ea023 -# Author: Ross Brunton -# Date: Tue Jan 21 11:21:50 2025 +0000 -# Merge pull request #2539 from RossBrunton/ross/specconst -# -# Added `DEVICE_INFO_PROGRAM_SET_SPECIALIZATION_CONSTANTS` -set(UNIFIED_RUNTIME_TAG b074893e854d28141cd67bc5935ed87e47eb3bb6) +# commit 871061f1aa3b8ade57e0a2ed63d8000e257548cc +# Merge: 262ec93e 7cca93f9 +# Author: Kenneth Benzie (Benie) +# Date: Tue Jan 21 13:26:45 2025 +0000 +# Merge pull request #2588 from kbenzie/benie/ci-delete-prerelease +# Remove the prerelease.yml job +set(UNIFIED_RUNTIME_TAG 871061f1aa3b8ade57e0a2ed63d8000e257548cc) diff --git a/sycl/test-e2e/AddressSanitizer/dependency/shadow-virtual-mem.cpp b/sycl/test-e2e/AddressSanitizer/dependency/shadow-virtual-mem.cpp new file mode 100644 index 0000000000000..024106f0f8330 --- /dev/null +++ b/sycl/test-e2e/AddressSanitizer/dependency/shadow-virtual-mem.cpp @@ -0,0 +1,145 @@ +// REQUIRES: aspect-ext_oneapi_virtual_mem, linux, (gpu && level_zero) +// RUN: %{build} -o %t.out +// RUN: %{run} NEOReadDebugKeys=1 CreateMultipleRootDevices=2 %t.out + +// Test for the assumption behide DevASAN shadow memory for L0GPU , which is it +// is okay to access VirtualMem from different device/context. + +#include + +#include +#include + +namespace syclext = sycl::ext::oneapi::experimental; + +// Find the least common multiple of the context and device granularities. This +// value can be used for aligning both physical memory allocations and for +// reserving virtual memory ranges. +size_t GetLCMGranularity( + const sycl::device &Dev, const sycl::context &Ctx, + syclext::granularity_mode Gm = syclext::granularity_mode::recommended) { + size_t CtxGranularity = syclext::get_mem_granularity(Ctx, Gm); + size_t DevGranularity = syclext::get_mem_granularity(Dev, Ctx, Gm); + + size_t GCD = CtxGranularity; + size_t Rem = DevGranularity % GCD; + while (Rem != 0) { + std::swap(GCD, Rem); + Rem %= GCD; + } + return (DevGranularity / GCD) * CtxGranularity; +} + +size_t GetAlignedByteSize(const size_t UnalignedBytes, + const size_t AligmentGranularity) { + return ((UnalignedBytes + AligmentGranularity - 1) / AligmentGranularity) * + AligmentGranularity; +} + +bool check_for_42(std::vector &vec, int ref_result = 42) { + return vec[42] == ref_result; +} + +int main() { + // Get all available devices + auto devices = sycl::device::get_devices(); + + // Filter out GPU devices + std::vector gpuDevices; + for (const auto &dev : devices) { + if (dev.is_gpu()) { + gpuDevices.push_back(dev); + } + } + + // Check if we have at least two GPU devices + if (gpuDevices.size() < 2) { + std::cerr << "Less than two GPU devices found." << std::endl; + return 1; + } + + // Create contexts for the first two GPU devices + auto dev1 = gpuDevices[0]; + auto dev2 = gpuDevices[1]; + sycl::context context1_d1(dev1); + sycl::context context2_d1(dev1); + sycl::context context_d2(dev2); + + sycl::queue Q1_d1(context1_d1, dev1); + sycl::queue Q2_d1(context2_d1, dev1); + sycl::queue Q1_d2(context_d2, dev2); + + constexpr size_t NumberOfElements = 1000; + size_t BytesRequired = NumberOfElements * sizeof(int); + size_t UsedGranularity = GetLCMGranularity(dev1, context2_d1); + size_t AlignedByteSize = + ((BytesRequired + UsedGranularity - 1) / UsedGranularity) * + UsedGranularity; + printf("UsedGranularity: %zu\n", UsedGranularity); + printf("AlignedByteSize: %zu\n", AlignedByteSize); + + syclext::physical_mem NewPhysicalMem{dev1, context2_d1, AlignedByteSize}; + + uintptr_t VirtualMemoryPtr = + syclext::reserve_virtual_mem(0, AlignedByteSize, context2_d1); + + void *MappedPtr = + NewPhysicalMem.map(VirtualMemoryPtr, AlignedByteSize, + syclext::address_access_mode::read_write); + + int *DataPtr = reinterpret_cast(MappedPtr); + printf("DataPtr: %p\n", DataPtr); + + std::vector ResultHostData(NumberOfElements); + constexpr int ExpectedValueAfterFill = 42; + + { + // Normal case, same device, same context + sycl::buffer CheckBuffer(ResultHostData); + Q2_d1.fill(DataPtr, ExpectedValueAfterFill, NumberOfElements) + .wait_and_throw(); + Q2_d1.submit([&](sycl::handler &Handle) { + sycl::accessor A(CheckBuffer, Handle, sycl::write_only); + Handle.parallel_for(NumberOfElements, + [=](sycl::id<1> Idx) { A[Idx] = DataPtr[Idx]; }); + }); + Q2_d1.wait(); + } + assert(check_for_42(ResultHostData)); + ResultHostData = std::vector(NumberOfElements); + Q2_d1.fill(DataPtr, 0, NumberOfElements).wait_and_throw(); + assert(check_for_42(ResultHostData, 0)); + + { + // !!! Same device, different context !!! + sycl::buffer CheckBuffer(ResultHostData); + Q1_d1.fill(DataPtr, ExpectedValueAfterFill, NumberOfElements) + .wait_and_throw(); + Q1_d1.submit([&](sycl::handler &Handle) { + sycl::accessor A(CheckBuffer, Handle, sycl::write_only); + Handle.parallel_for(NumberOfElements, + [=](sycl::id<1> Idx) { A[Idx] = DataPtr[Idx]; }); + }); + Q1_d1.wait(); + } + assert(check_for_42(ResultHostData)); + ResultHostData = std::vector(NumberOfElements); + Q1_d1.fill(DataPtr, 0, NumberOfElements).wait_and_throw(); + assert(check_for_42(ResultHostData, 0)); + + { + // !!! Different device, different context !!! + sycl::buffer CheckBuffer(ResultHostData); + Q1_d2.fill(DataPtr, ExpectedValueAfterFill, NumberOfElements) + .wait_and_throw(); + Q1_d2.submit([&](sycl::handler &Handle) { + sycl::accessor A(CheckBuffer, Handle, sycl::write_only); + Handle.parallel_for(NumberOfElements, + [=](sycl::id<1> Idx) { A[Idx] = DataPtr[Idx]; }); + }); + Q1_d2.wait(); + } + assert(check_for_42(ResultHostData)); + + return 0; +}