From dc588e483b4f44fee5467737e2b8bde3a9b4650c Mon Sep 17 00:00:00 2001 From: "Zhao, Maosu" Date: Tue, 29 Oct 2024 19:37:38 -0700 Subject: [PATCH] [DeviceASAN] Re-use shadow if required size is not larger than last one --- .../layers/sanitizer/asan_interceptor.cpp | 149 ++++-------------- .../loader/layers/sanitizer/asan_shadow.cpp | 106 +++++++++++-- .../loader/layers/sanitizer/asan_shadow.hpp | 32 ++++ 3 files changed, 163 insertions(+), 124 deletions(-) diff --git a/source/loader/layers/sanitizer/asan_interceptor.cpp b/source/loader/layers/sanitizer/asan_interceptor.cpp index 4a315588fd..fbcc401909 100644 --- a/source/loader/layers/sanitizer/asan_interceptor.cpp +++ b/source/loader/layers/sanitizer/asan_interceptor.cpp @@ -682,28 +682,6 @@ ur_result_t SanitizerInterceptor::prepareLaunch( LocalWorkSize[Dim]; } - auto EnqueueAllocateShadowMemory = [Context = ContextInfo->Handle, - Device = DeviceInfo->Handle, - Queue](size_t Size, uptr &Ptr) { - void *Allocated = nullptr; - auto URes = getContext()->urDdiTable.USM.pfnDeviceAlloc( - Context, Device, nullptr, nullptr, Size, &Allocated); - if (URes != UR_RESULT_SUCCESS) { - return URes; - } - // Initialize shadow memory - URes = EnqueueUSMBlockingSet(Queue, Allocated, 0, Size); - if (URes != UR_RESULT_SUCCESS) { - [[maybe_unused]] auto URes = - getContext()->urDdiTable.USM.pfnFree(Context, Allocated); - assert(URes == UR_RESULT_SUCCESS && - "urUSMFree failed at allocating shadow memory"); - Allocated = nullptr; - } - Ptr = (uptr)Allocated; - return URes; - }; - auto LocalMemoryUsage = GetKernelLocalMemorySize(Kernel, DeviceInfo->Handle); auto PrivateMemoryUsage = @@ -715,86 +693,45 @@ ur_result_t SanitizerInterceptor::prepareLaunch( // Write shadow memory offset for local memory if (getOptions().DetectLocals) { - // CPU needn't this - if (DeviceInfo->Type == DeviceType::GPU_PVC || - DeviceInfo->Type == DeviceType::GPU_DG2) { - const size_t LocalMemorySize = - GetDeviceLocalMemorySize(DeviceInfo->Handle); - const size_t LocalShadowMemorySize = - (NumWG * LocalMemorySize) >> ASAN_SHADOW_SCALE; - - getContext()->logger.debug( - "LocalMemory(WorkGroup={}, LocalMemorySize={}, " - "LocalShadowMemorySize={})", - NumWG, LocalMemorySize, LocalShadowMemorySize); - - if (EnqueueAllocateShadowMemory( - LocalShadowMemorySize, - LaunchInfo.Data->LocalShadowOffset) != - UR_RESULT_SUCCESS) { - getContext()->logger.warning( - "Failed to allocate shadow memory for local " - "memory, maybe the number of workgroup ({}) is too " - "large", - NumWG); - getContext()->logger.warning( - "Skip checking local memory of kernel <{}>", - GetKernelName(Kernel)); - } else { - LaunchInfo.Data->LocalShadowOffsetEnd = - LaunchInfo.Data->LocalShadowOffset + - LocalShadowMemorySize - 1; - - ContextInfo->Stats.UpdateShadowMalloced( - LocalShadowMemorySize); - - getContext()->logger.info( - "ShadowMemory(Local, {} - {})", - (void *)LaunchInfo.Data->LocalShadowOffset, - (void *)LaunchInfo.Data->LocalShadowOffsetEnd); - } + if (DeviceInfo->Shadow->AllocLocalShadow( + Queue, NumWG, LaunchInfo.Data->LocalShadowOffset, + LaunchInfo.Data->LocalShadowOffsetEnd) != + UR_RESULT_SUCCESS) { + getContext()->logger.warning( + "Failed to allocate shadow memory for local " + "memory, maybe the number of workgroup ({}) is too " + "large", + NumWG); + getContext()->logger.warning( + "Skip checking local memory of kernel <{}>", + GetKernelName(Kernel)); + } else { + getContext()->logger.info( + "ShadowMemory(Local, WorkGroup{}, {} - {})", NumWG, + (void *)LaunchInfo.Data->LocalShadowOffset, + (void *)LaunchInfo.Data->LocalShadowOffsetEnd); } } // Write shadow memory offset for private memory if (getOptions().DetectPrivates) { - if (DeviceInfo->Type == DeviceType::CPU) { - LaunchInfo.Data->PrivateShadowOffset = - DeviceInfo->Shadow->ShadowBegin; - } else if (DeviceInfo->Type == DeviceType::GPU_PVC || - DeviceInfo->Type == DeviceType::GPU_DG2) { - const size_t PrivateShadowMemorySize = - (NumWG * ASAN_PRIVATE_SIZE) >> ASAN_SHADOW_SCALE; - - getContext()->logger.debug("PrivateMemory(WorkGroup={}, " - "PrivateShadowMemorySize={})", - NumWG, PrivateShadowMemorySize); - - if (EnqueueAllocateShadowMemory( - PrivateShadowMemorySize, - LaunchInfo.Data->PrivateShadowOffset) != - UR_RESULT_SUCCESS) { - getContext()->logger.warning( - "Failed to allocate shadow memory for private " - "memory, maybe the number of workgroup ({}) is too " - "large", - NumWG); - getContext()->logger.warning( - "Skip checking private memory of kernel <{}>", - GetKernelName(Kernel)); - } else { - LaunchInfo.Data->PrivateShadowOffsetEnd = - LaunchInfo.Data->PrivateShadowOffset + - PrivateShadowMemorySize - 1; - - ContextInfo->Stats.UpdateShadowMalloced( - PrivateShadowMemorySize); - - getContext()->logger.info( - "ShadowMemory(Private, {} - {})", - (void *)LaunchInfo.Data->PrivateShadowOffset, - (void *)LaunchInfo.Data->PrivateShadowOffsetEnd); - } + if (DeviceInfo->Shadow->AllocPrivateShadow( + Queue, NumWG, LaunchInfo.Data->PrivateShadowOffset, + LaunchInfo.Data->PrivateShadowOffsetEnd) != + UR_RESULT_SUCCESS) { + getContext()->logger.warning( + "Failed to allocate shadow memory for private " + "memory, maybe the number of workgroup ({}) is too " + "large", + NumWG); + getContext()->logger.warning( + "Skip checking private memory of kernel <{}>", + GetKernelName(Kernel)); + } else { + getContext()->logger.info( + "ShadowMemory(Private, WorkGroup{}, {} - {})", NumWG, + (void *)LaunchInfo.Data->PrivateShadowOffset, + (void *)LaunchInfo.Data->PrivateShadowOffsetEnd); } } } while (false); @@ -878,25 +815,7 @@ ur_result_t USMLaunchInfo::updateKernelInfo(const KernelInfo &KI) { USMLaunchInfo::~USMLaunchInfo() { [[maybe_unused]] ur_result_t Result; if (Data) { - auto Type = GetDeviceType(Context, Device); auto ContextInfo = getContext()->interceptor->getContextInfo(Context); - if (Type == DeviceType::GPU_PVC || Type == DeviceType::GPU_DG2) { - if (Data->PrivateShadowOffset) { - ContextInfo->Stats.UpdateShadowFreed( - Data->PrivateShadowOffsetEnd - Data->PrivateShadowOffset + - 1); - Result = getContext()->urDdiTable.USM.pfnFree( - Context, (void *)Data->PrivateShadowOffset); - assert(Result == UR_RESULT_SUCCESS); - } - if (Data->LocalShadowOffset) { - ContextInfo->Stats.UpdateShadowFreed( - Data->LocalShadowOffsetEnd - Data->LocalShadowOffset + 1); - Result = getContext()->urDdiTable.USM.pfnFree( - Context, (void *)Data->LocalShadowOffset); - assert(Result == UR_RESULT_SUCCESS); - } - } if (Data->LocalArgs) { Result = getContext()->urDdiTable.USM.pfnFree( Context, (void *)Data->LocalArgs); diff --git a/source/loader/layers/sanitizer/asan_shadow.cpp b/source/loader/layers/sanitizer/asan_shadow.cpp index 629ce3a491..f5800a694c 100644 --- a/source/loader/layers/sanitizer/asan_shadow.cpp +++ b/source/loader/layers/sanitizer/asan_shadow.cpp @@ -131,16 +131,23 @@ ur_result_t ShadowMemoryGPU::Setup() { } ur_result_t ShadowMemoryGPU::Destory() { - if (ShadowBegin == 0) { - return UR_RESULT_SUCCESS; + if (PrivateShadowOffset != 0) { + UR_CALL(getContext()->urDdiTable.USM.pfnFree( + Context, (void *)PrivateShadowOffset)); + PrivateShadowOffset = 0; } - static ur_result_t Result = [this]() { - auto Result = getContext()->urDdiTable.VirtualMem.pfnFree( - Context, (const void *)ShadowBegin, GetShadowSize()); - getContext()->urDdiTable.Context.pfnRelease(Context); - return Result; - }(); - return Result; + if (LocalShadowOffset != 0) { + UR_CALL(getContext()->urDdiTable.USM.pfnFree( + Context, (void *)LocalShadowOffset)); + LocalShadowOffset = 0; + } + if (ShadowBegin != 0) { + UR_CALL(getContext()->urDdiTable.VirtualMem.pfnFree( + Context, (const void *)ShadowBegin, GetShadowSize())); + UR_CALL(getContext()->urDdiTable.Context.pfnRelease(Context)); + ShadowBegin = ShadowEnd = 0; + } + return UR_RESULT_SUCCESS; } ur_result_t ShadowMemoryGPU::EnqueuePoisonShadow(ur_queue_handle_t Queue, @@ -255,6 +262,87 @@ ur_result_t ShadowMemoryGPU::ReleaseShadow(std::shared_ptr AI) { return UR_RESULT_SUCCESS; } +ur_result_t ShadowMemoryGPU::AllocLocalShadow(ur_queue_handle_t Queue, + uint32_t NumWG, uptr &Begin, + uptr &End) { + const size_t LocalMemorySize = GetDeviceLocalMemorySize(Device); + const size_t RequiredShadowSize = + (NumWG * LocalMemorySize) >> ASAN_SHADOW_SCALE; + static size_t LastAllocedSize = 0; + if (RequiredShadowSize > LastAllocedSize) { + auto ContextInfo = getContext()->interceptor->getContextInfo(Context); + if (LocalShadowOffset) { + UR_CALL(getContext()->urDdiTable.USM.pfnFree( + Context, (void *)LocalShadowOffset)); + ContextInfo->Stats.UpdateShadowFreed(LastAllocedSize); + LocalShadowOffset = 0; + LastAllocedSize = 0; + } + + UR_CALL(getContext()->urDdiTable.USM.pfnDeviceAlloc( + Context, Device, nullptr, nullptr, RequiredShadowSize, + (void **)&LocalShadowOffset)); + + // Initialize shadow memory + ur_result_t URes = EnqueueUSMBlockingSet( + Queue, (void *)LocalShadowOffset, 0, RequiredShadowSize); + if (URes != UR_RESULT_SUCCESS) { + UR_CALL(getContext()->urDdiTable.USM.pfnFree( + Context, (void *)LocalShadowOffset)); + LocalShadowOffset = 0; + LastAllocedSize = 0; + } + + ContextInfo->Stats.UpdateShadowMalloced(RequiredShadowSize); + + LastAllocedSize = RequiredShadowSize; + } + + Begin = LocalShadowOffset; + End = LocalShadowOffset + RequiredShadowSize - 1; + return UR_RESULT_SUCCESS; +} + +ur_result_t ShadowMemoryGPU::AllocPrivateShadow(ur_queue_handle_t Queue, + uint32_t NumWG, uptr &Begin, + uptr &End) { + const size_t RequiredShadowSize = + (NumWG * ASAN_PRIVATE_SIZE) >> ASAN_SHADOW_SCALE; + static size_t LastAllocedSize = 0; + if (RequiredShadowSize > LastAllocedSize) { + auto ContextInfo = getContext()->interceptor->getContextInfo(Context); + if (PrivateShadowOffset) { + UR_CALL(getContext()->urDdiTable.USM.pfnFree( + Context, (void *)PrivateShadowOffset)); + ContextInfo->Stats.UpdateShadowFreed(LastAllocedSize); + PrivateShadowOffset = 0; + LastAllocedSize = 0; + } + + UR_CALL(getContext()->urDdiTable.USM.pfnDeviceAlloc( + Context, Device, nullptr, nullptr, RequiredShadowSize, + (void **)&PrivateShadowOffset)); + + // Initialize shadow memory + ur_result_t URes = EnqueueUSMBlockingSet( + Queue, (void *)PrivateShadowOffset, 0, RequiredShadowSize); + if (URes != UR_RESULT_SUCCESS) { + UR_CALL(getContext()->urDdiTable.USM.pfnFree( + Context, (void *)PrivateShadowOffset)); + PrivateShadowOffset = 0; + LastAllocedSize = 0; + } + + ContextInfo->Stats.UpdateShadowMalloced(RequiredShadowSize); + + LastAllocedSize = RequiredShadowSize; + } + + Begin = PrivateShadowOffset; + End = PrivateShadowOffset + RequiredShadowSize - 1; + return UR_RESULT_SUCCESS; +} + uptr ShadowMemoryPVC::MemToShadow(uptr Ptr) { if (Ptr & 0xFF00000000000000ULL) { // Device USM return ShadowBegin + 0x80000000000ULL + diff --git a/source/loader/layers/sanitizer/asan_shadow.hpp b/source/loader/layers/sanitizer/asan_shadow.hpp index 7ae095062a..d6d6e634e6 100644 --- a/source/loader/layers/sanitizer/asan_shadow.hpp +++ b/source/loader/layers/sanitizer/asan_shadow.hpp @@ -39,6 +39,14 @@ struct ShadowMemory { virtual size_t GetShadowSize() = 0; + virtual ur_result_t AllocLocalShadow(ur_queue_handle_t Queue, + uint32_t NumWG, uptr &Begin, + uptr &End) = 0; + + virtual ur_result_t AllocPrivateShadow(ur_queue_handle_t Queue, + uint32_t NumWG, uptr &Begin, + uptr &End) = 0; + ur_context_handle_t Context{}; ur_device_handle_t Device{}; @@ -62,6 +70,20 @@ struct ShadowMemoryCPU final : public ShadowMemory { uptr Size, u8 Value) override; size_t GetShadowSize() override { return 0x80000000000ULL; } + + ur_result_t AllocLocalShadow(ur_queue_handle_t, uint32_t, uptr &Begin, + uptr &End) override { + Begin = ShadowBegin; + End = ShadowEnd; + return UR_RESULT_SUCCESS; + } + + ur_result_t AllocPrivateShadow(ur_queue_handle_t, uint32_t, uptr &Begin, + uptr &End) override { + Begin = ShadowBegin; + End = ShadowEnd; + return UR_RESULT_SUCCESS; + } }; struct ShadowMemoryGPU : public ShadowMemory { @@ -76,12 +98,22 @@ struct ShadowMemoryGPU : public ShadowMemory { ur_result_t ReleaseShadow(std::shared_ptr AI) override final; + ur_result_t AllocLocalShadow(ur_queue_handle_t Queue, uint32_t NumWG, + uptr &Begin, uptr &End) override final; + + ur_result_t AllocPrivateShadow(ur_queue_handle_t Queue, uint32_t NumWG, + uptr &Begin, uptr &End) override final; + ur_mutex VirtualMemMapsMutex; std::unordered_map< uptr, std::pair>>> VirtualMemMaps; + + uptr LocalShadowOffset = 0; + + uptr PrivateShadowOffset = 0; }; /// Shadow Memory layout of GPU PVC device