diff --git a/source/loader/layers/sanitizer/asan/asan_interceptor.cpp b/source/loader/layers/sanitizer/asan/asan_interceptor.cpp index 8d2a9e5ee2..1a1185e1ba 100644 --- a/source/loader/layers/sanitizer/asan/asan_interceptor.cpp +++ b/source/loader/layers/sanitizer/asan/asan_interceptor.cpp @@ -36,7 +36,8 @@ AsanInterceptor::~AsanInterceptor() { // We must release these objects before releasing adapters, since // they may use the adapter in their destructor for (const auto &[_, DeviceInfo] : m_DeviceMap) { - DeviceInfo->Shadow->Destory(); + [[maybe_unused]] auto URes = DeviceInfo->Shadow->Destory(); + assert(URes == UR_RESULT_SUCCESS); } m_Quarantine = nullptr; @@ -96,6 +97,10 @@ ur_result_t AsanInterceptor::allocateMemory(ur_context_handle_t Context, void *Allocated = nullptr; + if (Pool == nullptr) { + Pool = ContextInfo->getUSMPool(); + } + if (Type == AllocType::DEVICE_USM) { UR_CALL(getContext()->urDdiTable.USM.pfnDeviceAlloc( Context, Device, Properties, Pool, NeededSize, &Allocated)); @@ -228,16 +233,6 @@ ur_result_t AsanInterceptor::releaseMemory(ur_context_handle_t Context, ContextInfo->Stats.UpdateUSMRealFreed( ToFreeAllocInfo->AllocSize, ToFreeAllocInfo->getRedzoneSize()); - if (ToFreeAllocInfo->Type == AllocType::HOST_USM) { - for (auto &Device : ContextInfo->DeviceList) { - UR_CALL(getDeviceInfo(Device)->Shadow->ReleaseShadow( - ToFreeAllocInfo)); - } - } else { - UR_CALL(getDeviceInfo(ToFreeAllocInfo->Device) - ->Shadow->ReleaseShadow(ToFreeAllocInfo)); - } - UR_CALL(getContext()->urDdiTable.USM.pfnFree( Context, (void *)(ToFreeAllocInfo->AllocBegin))); @@ -436,12 +431,6 @@ ur_result_t AsanInterceptor::unregisterProgram(ur_program_handle_t Program) { auto ProgramInfo = getProgramInfo(Program); assert(ProgramInfo != nullptr && "unregistered program!"); - for (auto AI : ProgramInfo->AllocInfoForGlobals) { - UR_CALL(getDeviceInfo(AI->Device)->Shadow->ReleaseShadow(AI)); - m_AllocationMap.erase(AI->AllocBegin); - } - ProgramInfo->AllocInfoForGlobals.clear(); - ProgramInfo->InstrumentedKernels.clear(); return UR_RESULT_SUCCESS; @@ -560,10 +549,6 @@ AsanInterceptor::registerDeviceGlobals(ur_program_handle_t Program) { {}}); ContextInfo->insertAllocInfo({Device}, AI); - ProgramInfo->AllocInfoForGlobals.emplace(AI); - - std::scoped_lock Guard(m_AllocationMapMutex); - m_AllocationMap.emplace(AI->AllocBegin, std::move(AI)); } } @@ -887,9 +872,14 @@ bool ProgramInfo::isKernelInstrumented(ur_kernel_handle_t Kernel) const { ContextInfo::~ContextInfo() { Stats.Print(Handle); - [[maybe_unused]] auto Result = - getContext()->urDdiTable.Context.pfnRelease(Handle); - assert(Result == UR_RESULT_SUCCESS); + [[maybe_unused]] ur_result_t URes; + if (USMPool) { + URes = getContext()->urDdiTable.USM.pfnPoolRelease(USMPool); + assert(URes == UR_RESULT_SUCCESS); + } + + URes = getContext()->urDdiTable.Context.pfnRelease(Handle); + assert(URes == UR_RESULT_SUCCESS); // check memory leaks if (getAsanInterceptor()->getOptions().DetectLeaks && @@ -905,6 +895,22 @@ ContextInfo::~ContextInfo() { } } +ur_usm_pool_handle_t ContextInfo::getUSMPool() { + std::call_once(PoolInit, [this]() { + ur_usm_pool_desc_t Desc{UR_STRUCTURE_TYPE_USM_POOL_DESC, nullptr, 0}; + auto URes = + getContext()->urDdiTable.USM.pfnPoolCreate(Handle, &Desc, &USMPool); + if (URes != UR_RESULT_SUCCESS && + URes != UR_RESULT_ERROR_UNSUPPORTED_FEATURE) { + getContext()->logger.warning( + "Failed to create USM pool, the memory overhead " + "may increase: {}", + URes); + } + }); + return USMPool; +} + AsanRuntimeDataWrapper::~AsanRuntimeDataWrapper() { [[maybe_unused]] ur_result_t Result; if (Host.LocalArgs) { diff --git a/source/loader/layers/sanitizer/asan/asan_interceptor.hpp b/source/loader/layers/sanitizer/asan/asan_interceptor.hpp index 122865bd11..f1e80dae56 100644 --- a/source/loader/layers/sanitizer/asan/asan_interceptor.hpp +++ b/source/loader/layers/sanitizer/asan/asan_interceptor.hpp @@ -112,7 +112,6 @@ struct ProgramInfo { std::atomic RefCount = 1; // Program is built only once, so we don't need to lock it - std::unordered_set> AllocInfoForGlobals; std::unordered_set InstrumentedKernels; explicit ProgramInfo(ur_program_handle_t Program) : Handle(Program) { @@ -132,6 +131,10 @@ struct ProgramInfo { struct ContextInfo { ur_context_handle_t Handle; + + ur_usm_pool_handle_t USMPool{}; + std::once_flag PoolInit; + std::atomic RefCount = 1; std::vector DeviceList; @@ -155,6 +158,8 @@ struct ContextInfo { AllocInfos.List.emplace_back(AI); } } + + ur_usm_pool_handle_t getUSMPool(); }; struct AsanRuntimeDataWrapper { diff --git a/source/loader/layers/sanitizer/asan/asan_shadow.cpp b/source/loader/layers/sanitizer/asan/asan_shadow.cpp index 8e2329ac06..de0679687b 100644 --- a/source/loader/layers/sanitizer/asan/asan_shadow.cpp +++ b/source/loader/layers/sanitizer/asan/asan_shadow.cpp @@ -108,11 +108,15 @@ ur_result_t ShadowMemoryGPU::Setup() { // TODO: Protect Bad Zone auto Result = getContext()->urDdiTable.VirtualMem.pfnReserve( Context, nullptr, ShadowSize, (void **)&ShadowBegin); - if (Result == UR_RESULT_SUCCESS) { - ShadowEnd = ShadowBegin + ShadowSize; - // Retain the context which reserves shadow memory - getContext()->urDdiTable.Context.pfnRetain(Context); + if (Result != UR_RESULT_SUCCESS) { + getContext()->logger.error( + "Shadow memory reserved failed with size {}: {}", + (void *)ShadowSize, Result); + return Result; } + ShadowEnd = ShadowBegin + ShadowSize; + // Retain the context which reserves shadow memory + getContext()->urDdiTable.Context.pfnRetain(Context); // Set shadow memory for null pointer // For GPU, wu use up to 1 page of shadow memory @@ -137,6 +141,24 @@ ur_result_t ShadowMemoryGPU::Destory() { Context, (void *)PrivateShadowOffset)); PrivateShadowOffset = 0; } + + static ur_result_t Result = [this]() { + const size_t PageSize = GetVirtualMemGranularity(Context, Device); + for (auto [MappedPtr, PhysicalMem] : VirtualMemMaps) { + UR_CALL(getContext()->urDdiTable.VirtualMem.pfnUnmap( + Context, (void *)MappedPtr, PageSize)); + UR_CALL( + getContext()->urDdiTable.PhysicalMem.pfnRelease(PhysicalMem)); + } + UR_CALL(getContext()->urDdiTable.VirtualMem.pfnFree( + Context, (const void *)ShadowBegin, GetShadowSize())); + UR_CALL(getContext()->urDdiTable.Context.pfnRelease(Context)); + return UR_RESULT_SUCCESS; + }(); + if (!Result) { + return Result; + } + if (LocalShadowOffset != 0) { UR_CALL(getContext()->urDdiTable.USM.pfnFree( Context, (void *)LocalShadowOffset)); @@ -205,19 +227,8 @@ ur_result_t ShadowMemoryGPU::EnqueuePoisonShadow(ur_queue_handle_t Queue, return URes; } - VirtualMemMaps[MappedPtr].first = PhysicalMem; + VirtualMemMaps[MappedPtr] = PhysicalMem; } - - // We don't need to record virtual memory map for null pointer, - // since it doesn't have an alloc info. - if (Ptr == 0) { - continue; - } - - auto AllocInfoIt = - getAsanInterceptor()->findAllocInfoByAddress(Ptr); - assert(AllocInfoIt); - VirtualMemMaps[MappedPtr].second.insert((*AllocInfoIt)->second); } } @@ -235,35 +246,6 @@ ur_result_t ShadowMemoryGPU::EnqueuePoisonShadow(ur_queue_handle_t Queue, return UR_RESULT_SUCCESS; } -ur_result_t ShadowMemoryGPU::ReleaseShadow(std::shared_ptr AI) { - uptr ShadowBegin = MemToShadow(AI->AllocBegin); - uptr ShadowEnd = MemToShadow(AI->AllocBegin + AI->AllocSize); - assert(ShadowBegin <= ShadowEnd); - - static const size_t PageSize = GetVirtualMemGranularity(Context, Device); - - for (auto MappedPtr = RoundDownTo(ShadowBegin, PageSize); - MappedPtr <= ShadowEnd; MappedPtr += PageSize) { - std::scoped_lock Guard(VirtualMemMapsMutex); - if (VirtualMemMaps.find(MappedPtr) == VirtualMemMaps.end()) { - continue; - } - VirtualMemMaps[MappedPtr].second.erase(AI); - if (VirtualMemMaps[MappedPtr].second.empty()) { - UR_CALL(getContext()->urDdiTable.VirtualMem.pfnUnmap( - Context, (void *)MappedPtr, PageSize)); - UR_CALL(getContext()->urDdiTable.PhysicalMem.pfnRelease( - VirtualMemMaps[MappedPtr].first)); - getContext()->logger.debug("urVirtualMemUnmap: {} ~ {}", - (void *)MappedPtr, - (void *)(MappedPtr + PageSize - 1)); - VirtualMemMaps.erase(MappedPtr); - } - } - - return UR_RESULT_SUCCESS; -} - ur_result_t ShadowMemoryGPU::AllocLocalShadow(ur_queue_handle_t Queue, uint32_t NumWG, uptr &Begin, uptr &End) { diff --git a/source/loader/layers/sanitizer/asan/asan_shadow.hpp b/source/loader/layers/sanitizer/asan/asan_shadow.hpp index 0658a07925..48054378fe 100644 --- a/source/loader/layers/sanitizer/asan/asan_shadow.hpp +++ b/source/loader/layers/sanitizer/asan/asan_shadow.hpp @@ -35,10 +35,6 @@ struct ShadowMemory { virtual ur_result_t EnqueuePoisonShadow(ur_queue_handle_t Queue, uptr Ptr, uptr Size, u8 Value) = 0; - virtual ur_result_t ReleaseShadow(std::shared_ptr) { - return UR_RESULT_SUCCESS; - } - virtual size_t GetShadowSize() = 0; virtual ur_result_t AllocLocalShadow(ur_queue_handle_t Queue, @@ -98,8 +94,6 @@ struct ShadowMemoryGPU : public ShadowMemory { ur_result_t EnqueuePoisonShadow(ur_queue_handle_t Queue, uptr Ptr, uptr Size, u8 Value) override final; - ur_result_t ReleaseShadow(std::shared_ptr AI) override final; - ur_result_t AllocLocalShadow(ur_queue_handle_t Queue, uint32_t NumWG, uptr &Begin, uptr &End) override final; @@ -108,10 +102,7 @@ struct ShadowMemoryGPU : public ShadowMemory { ur_mutex VirtualMemMapsMutex; - std::unordered_map< - uptr, std::pair>>> - VirtualMemMaps; + std::unordered_map VirtualMemMaps; uptr LocalShadowOffset = 0;