From c2aa7cde6b1695d55134deda01660ea96960d16b Mon Sep 17 00:00:00 2001 From: "Wu, Yingcong" Date: Mon, 20 Jan 2025 08:17:44 +0100 Subject: [PATCH 1/6] port --- .../loader/layers/sanitizer/asan/asan_ddi.cpp | 45 +++++++++++-------- .../sanitizer/asan/asan_interceptor.cpp | 36 ++++++++++----- .../sanitizer/asan/asan_interceptor.hpp | 11 ++--- .../sanitizer_common/sanitizer_utils.cpp | 2 +- 4 files changed, 60 insertions(+), 34 deletions(-) diff --git a/source/loader/layers/sanitizer/asan/asan_ddi.cpp b/source/loader/layers/sanitizer/asan/asan_ddi.cpp index ff2461d48d..252ef1e004 100644 --- a/source/loader/layers/sanitizer/asan/asan_ddi.cpp +++ b/source/loader/layers/sanitizer/asan/asan_ddi.cpp @@ -29,25 +29,34 @@ ur_result_t setupContext(ur_context_handle_t Context, uint32_t numDevices, const ur_device_handle_t *phDevices) { std::shared_ptr CI; UR_CALL(getAsanInterceptor()->insertContext(Context, CI)); - for (uint32_t i = 0; i < numDevices; ++i) { - auto hDevice = phDevices[i]; - std::shared_ptr DI; - UR_CALL(getAsanInterceptor()->insertDevice(hDevice, DI)); - DI->Type = GetDeviceType(Context, hDevice); - if (DI->Type == DeviceType::UNKNOWN) { - getContext()->logger.error("Unsupport device"); - return UR_RESULT_ERROR_INVALID_DEVICE; - } - getContext()->logger.info( - "DeviceInfo {} (Type={}, IsSupportSharedSystemUSM={})", - (void *)DI->Handle, ToString(DI->Type), DI->IsSupportSharedSystemUSM); - getContext()->logger.info("Add {} into context {}", (void *)DI->Handle, - (void *)Context); - if (!DI->Shadow) { - UR_CALL(DI->allocShadowMemory(Context)); + + if (numDevices > 0) { + auto DeviceType = GetDeviceType(Context, phDevices[0]); + auto ShadowMemory = + getAsanInterceptor()->getOrCreateShadowMemory(phDevices[0], DeviceType); + + for (uint32_t i = 0; i < numDevices; ++i) { + auto hDevice = phDevices[i]; + std::shared_ptr DI; + UR_CALL(getAsanInterceptor()->insertDevice(hDevice, DI)); + DI->Type = GetDeviceType(Context, hDevice); + if (DI->Type == DeviceType::UNKNOWN) { + getContext()->logger.error("Unsupport device"); + return UR_RESULT_ERROR_INVALID_DEVICE; + } + if (DI->Type != DeviceType) { + getContext()->logger.error("Different device type in the same context"); + return UR_RESULT_ERROR_INVALID_DEVICE; + } + getContext()->logger.info( + "DeviceInfo {} (Type={}, IsSupportSharedSystemUSM={})", + (void *)DI->Handle, ToString(DI->Type), DI->IsSupportSharedSystemUSM); + getContext()->logger.info("Add {} into context {}", (void *)DI->Handle, + (void *)Context); + DI->Shadow = ShadowMemory; + CI->DeviceList.emplace_back(hDevice); + CI->AllocInfosMap[hDevice]; } - CI->DeviceList.emplace_back(hDevice); - CI->AllocInfosMap[hDevice]; } return UR_RESULT_SUCCESS; } diff --git a/source/loader/layers/sanitizer/asan/asan_interceptor.cpp b/source/loader/layers/sanitizer/asan/asan_interceptor.cpp index fb2a01101e..e6255f4cab 100644 --- a/source/loader/layers/sanitizer/asan/asan_interceptor.cpp +++ b/source/loader/layers/sanitizer/asan/asan_interceptor.cpp @@ -36,8 +36,7 @@ AsanInterceptor::~AsanInterceptor() { // We must release these objects before releasing adapters, since // they may use the adapter in their destructor for (const auto &[_, DeviceInfo] : m_DeviceMap) { - [[maybe_unused]] auto URes = DeviceInfo->Shadow->Destory(); - assert(URes == UR_RESULT_SUCCESS); + DeviceInfo->Shadow = nullptr; } m_Quarantine = nullptr; @@ -48,6 +47,11 @@ AsanInterceptor::~AsanInterceptor() { // detection depends on it. m_AllocationMap.clear(); + for (auto &[_, ShadowManager] : m_ShadowMap) { + ShadowManager->Destory(); + getContext()->urDdiTable.Context.pfnRelease(ShadowManager->Context); + } + for (auto Adapter : m_Adapters) { getContext()->urDdiTable.Global.pfnAdapterRelease(Adapter); } @@ -303,14 +307,26 @@ ur_result_t AsanInterceptor::postLaunchKernel(ur_kernel_handle_t Kernel, return Result; } -ur_result_t DeviceInfo::allocShadowMemory(ur_context_handle_t Context) { - Shadow = GetShadowMemory(Context, Handle, Type); - assert(Shadow && "Failed to get shadow memory"); - UR_CALL(Shadow->Setup()); - getContext()->logger.info("ShadowMemory(Global): {} - {}", - (void *)Shadow->ShadowBegin, - (void *)Shadow->ShadowEnd); - return UR_RESULT_SUCCESS; +std::shared_ptr +AsanInterceptor::getOrCreateShadowMemory(ur_device_handle_t Device, + DeviceType Type) { + if (m_ShadowMap.find(Type) == m_ShadowMap.end()) { + std::scoped_lock Guard(m_ShadowMapMutex); + if (m_ShadowMap.find(Type) == m_ShadowMap.end()) { + ur_context_handle_t InternalContext; + auto Res = getContext()->urDdiTable.Context.pfnCreate(1, &Device, nullptr, + &InternalContext); + if (Res != UR_RESULT_SUCCESS) { + getContext()->logger.error("Failed to create shadow context"); + return nullptr; + } + std::shared_ptr CI; + insertContext(InternalContext, CI); + m_ShadowMap[Type] = GetShadowMemory(InternalContext, Device, Type); + m_ShadowMap[Type]->Setup(); + } + } + return m_ShadowMap[Type]; } /// Each 8 bytes of application memory are mapped into one byte of shadow memory diff --git a/source/loader/layers/sanitizer/asan/asan_interceptor.hpp b/source/loader/layers/sanitizer/asan/asan_interceptor.hpp index 8d5c05cc99..2f1c712d8e 100644 --- a/source/loader/layers/sanitizer/asan/asan_interceptor.hpp +++ b/source/loader/layers/sanitizer/asan/asan_interceptor.hpp @@ -57,8 +57,6 @@ struct DeviceInfo { // Device handles are special and alive in the whole process lifetime, // so we needn't retain&release here. explicit DeviceInfo(ur_device_handle_t Device) : Handle(Device) {} - - ur_result_t allocShadowMemory(ur_context_handle_t Context); }; struct QueueInfo { @@ -353,6 +351,9 @@ class AsanInterceptor { bool isNormalExit() { return m_NormalExit; } + std::shared_ptr + getOrCreateShadowMemory(ur_device_handle_t Device, DeviceType Type); + private: ur_result_t updateShadowMemory(std::shared_ptr &ContextInfo, std::shared_ptr &DeviceInfo, @@ -368,9 +369,6 @@ class AsanInterceptor { ur_queue_handle_t Queue, ur_kernel_handle_t Kernel, LaunchInfo &LaunchInfo); - ur_result_t allocShadowMemory(ur_context_handle_t Context, - std::shared_ptr &DeviceInfo); - ur_result_t registerDeviceGlobals(ur_program_handle_t Program); ur_result_t registerSpirKernels(ur_program_handle_t Program); @@ -406,6 +404,9 @@ class AsanInterceptor { ur_shared_mutex m_AdaptersMutex; bool m_NormalExit = true; + + std::unordered_map> m_ShadowMap; + ur_shared_mutex m_ShadowMapMutex; }; } // namespace asan diff --git a/source/loader/layers/sanitizer/sanitizer_common/sanitizer_utils.cpp b/source/loader/layers/sanitizer/sanitizer_common/sanitizer_utils.cpp index c758d03266..794ffce472 100644 --- a/source/loader/layers/sanitizer/sanitizer_common/sanitizer_utils.cpp +++ b/source/loader/layers/sanitizer/sanitizer_common/sanitizer_utils.cpp @@ -156,7 +156,7 @@ DeviceType GetDeviceType(ur_context_handle_t Context, // by the value of device USM pointer (see "USM Allocation Range" in // asan_shadow.cpp) auto Type = DeviceType::UNKNOWN; - if (Ptr >> 48 == 0xff00U) { + if (((Ptr >> 48) & 0xff00U) == 0xff00U) { Type = DeviceType::GPU_PVC; } else { Type = DeviceType::GPU_DG2; From 6a42493231ae07f006ee4752037c2ca26592fab7 Mon Sep 17 00:00:00 2001 From: "Wu, Yingcong" Date: Mon, 20 Jan 2025 08:32:32 +0100 Subject: [PATCH 2/6] change name --- source/loader/layers/sanitizer/asan/asan_interceptor.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/source/loader/layers/sanitizer/asan/asan_interceptor.cpp b/source/loader/layers/sanitizer/asan/asan_interceptor.cpp index e6255f4cab..5f17091b97 100644 --- a/source/loader/layers/sanitizer/asan/asan_interceptor.cpp +++ b/source/loader/layers/sanitizer/asan/asan_interceptor.cpp @@ -47,9 +47,9 @@ AsanInterceptor::~AsanInterceptor() { // detection depends on it. m_AllocationMap.clear(); - for (auto &[_, ShadowManager] : m_ShadowMap) { - ShadowManager->Destory(); - getContext()->urDdiTable.Context.pfnRelease(ShadowManager->Context); + for (auto &[_, ShadowMemory] : m_ShadowMap) { + ShadowMemory->Destory(); + getContext()->urDdiTable.Context.pfnRelease(ShadowMemory->Context); } for (auto Adapter : m_Adapters) { From a680cfc4d346cce46dd2f4ffed47fbee27bbf41f Mon Sep 17 00:00:00 2001 From: "Wu, Yingcong" Date: Mon, 20 Jan 2025 09:45:16 +0100 Subject: [PATCH 3/6] update comment --- .../layers/sanitizer/sanitizer_common/sanitizer_utils.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/source/loader/layers/sanitizer/sanitizer_common/sanitizer_utils.cpp b/source/loader/layers/sanitizer/sanitizer_common/sanitizer_utils.cpp index 794ffce472..c908ed643c 100644 --- a/source/loader/layers/sanitizer/sanitizer_common/sanitizer_utils.cpp +++ b/source/loader/layers/sanitizer/sanitizer_common/sanitizer_utils.cpp @@ -156,6 +156,10 @@ DeviceType GetDeviceType(ur_context_handle_t Context, // by the value of device USM pointer (see "USM Allocation Range" in // asan_shadow.cpp) auto Type = DeviceType::UNKNOWN; + + // ((Ptr >> 48) & 0xff00U) is because I have seen a DeviceUSM of PVC having + // 0xff01xxxx addr. We need to confirm this with L0 team later about the + // address space mapping. if (((Ptr >> 48) & 0xff00U) == 0xff00U) { Type = DeviceType::GPU_PVC; } else { From 31fe197889b656365c714e2c518e7f55fb5b50f8 Mon Sep 17 00:00:00 2001 From: "Wu, Yingcong" Date: Tue, 21 Jan 2025 05:57:17 +0100 Subject: [PATCH 4/6] update getDeviceType --- .../layers/sanitizer/sanitizer_common/sanitizer_utils.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/source/loader/layers/sanitizer/sanitizer_common/sanitizer_utils.cpp b/source/loader/layers/sanitizer/sanitizer_common/sanitizer_utils.cpp index c908ed643c..83b58de407 100644 --- a/source/loader/layers/sanitizer/sanitizer_common/sanitizer_utils.cpp +++ b/source/loader/layers/sanitizer/sanitizer_common/sanitizer_utils.cpp @@ -157,10 +157,9 @@ DeviceType GetDeviceType(ur_context_handle_t Context, // asan_shadow.cpp) auto Type = DeviceType::UNKNOWN; - // ((Ptr >> 48) & 0xff00U) is because I have seen a DeviceUSM of PVC having - // 0xff01xxxx addr. We need to confirm this with L0 team later about the - // address space mapping. - if (((Ptr >> 48) & 0xff00U) == 0xff00U) { + // L0 changes their VA layout. + // TODO: update our shadow memory layout/algorithms to accordingly. + if (((Ptr >> 52) & 0xff0U) == 0xff0U) { Type = DeviceType::GPU_PVC; } else { Type = DeviceType::GPU_DG2; From 01bf246d451664eaef893aaad7f5e41e826afd68 Mon Sep 17 00:00:00 2001 From: "Wu, Yingcong" Date: Tue, 21 Jan 2025 08:25:33 +0100 Subject: [PATCH 5/6] lock directly --- .../sanitizer/asan/asan_interceptor.cpp | 24 +++++++++---------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/source/loader/layers/sanitizer/asan/asan_interceptor.cpp b/source/loader/layers/sanitizer/asan/asan_interceptor.cpp index 5f17091b97..d5d5c2c729 100644 --- a/source/loader/layers/sanitizer/asan/asan_interceptor.cpp +++ b/source/loader/layers/sanitizer/asan/asan_interceptor.cpp @@ -310,21 +310,19 @@ ur_result_t AsanInterceptor::postLaunchKernel(ur_kernel_handle_t Kernel, std::shared_ptr AsanInterceptor::getOrCreateShadowMemory(ur_device_handle_t Device, DeviceType Type) { + std::scoped_lock Guard(m_ShadowMapMutex); if (m_ShadowMap.find(Type) == m_ShadowMap.end()) { - std::scoped_lock Guard(m_ShadowMapMutex); - if (m_ShadowMap.find(Type) == m_ShadowMap.end()) { - ur_context_handle_t InternalContext; - auto Res = getContext()->urDdiTable.Context.pfnCreate(1, &Device, nullptr, - &InternalContext); - if (Res != UR_RESULT_SUCCESS) { - getContext()->logger.error("Failed to create shadow context"); - return nullptr; - } - std::shared_ptr CI; - insertContext(InternalContext, CI); - m_ShadowMap[Type] = GetShadowMemory(InternalContext, Device, Type); - m_ShadowMap[Type]->Setup(); + ur_context_handle_t InternalContext; + auto Res = getContext()->urDdiTable.Context.pfnCreate(1, &Device, nullptr, + &InternalContext); + if (Res != UR_RESULT_SUCCESS) { + getContext()->logger.error("Failed to create shadow context"); + return nullptr; } + std::shared_ptr CI; + insertContext(InternalContext, CI); + m_ShadowMap[Type] = GetShadowMemory(InternalContext, Device, Type); + m_ShadowMap[Type]->Setup(); } return m_ShadowMap[Type]; } From 6b161a50b95b3879f64eb53acedbad526d7ea4b9 Mon Sep 17 00:00:00 2001 From: "Wu, Yingcong" Date: Tue, 21 Jan 2025 08:36:50 +0100 Subject: [PATCH 6/6] ci