Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 30 additions & 24 deletions source/loader/layers/sanitizer/asan/asan_interceptor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,8 @@ AsanInterceptor::~AsanInterceptor() {
// We must release these objects before releasing adapters, since
// they may use the adapter in their destructor
for (const auto &[_, DeviceInfo] : m_DeviceMap) {
DeviceInfo->Shadow->Destory();
[[maybe_unused]] auto URes = DeviceInfo->Shadow->Destory();
assert(URes == UR_RESULT_SUCCESS);
}

m_Quarantine = nullptr;
Expand Down Expand Up @@ -96,6 +97,10 @@ ur_result_t AsanInterceptor::allocateMemory(ur_context_handle_t Context,

void *Allocated = nullptr;

if (Pool == nullptr) {
Pool = ContextInfo->getUSMPool();
}

if (Type == AllocType::DEVICE_USM) {
UR_CALL(getContext()->urDdiTable.USM.pfnDeviceAlloc(
Context, Device, Properties, Pool, NeededSize, &Allocated));
Expand Down Expand Up @@ -228,16 +233,6 @@ ur_result_t AsanInterceptor::releaseMemory(ur_context_handle_t Context,
ContextInfo->Stats.UpdateUSMRealFreed(
ToFreeAllocInfo->AllocSize, ToFreeAllocInfo->getRedzoneSize());

if (ToFreeAllocInfo->Type == AllocType::HOST_USM) {
for (auto &Device : ContextInfo->DeviceList) {
UR_CALL(getDeviceInfo(Device)->Shadow->ReleaseShadow(
ToFreeAllocInfo));
}
} else {
UR_CALL(getDeviceInfo(ToFreeAllocInfo->Device)
->Shadow->ReleaseShadow(ToFreeAllocInfo));
}

UR_CALL(getContext()->urDdiTable.USM.pfnFree(
Context, (void *)(ToFreeAllocInfo->AllocBegin)));

Expand Down Expand Up @@ -436,12 +431,6 @@ ur_result_t AsanInterceptor::unregisterProgram(ur_program_handle_t Program) {
auto ProgramInfo = getProgramInfo(Program);
assert(ProgramInfo != nullptr && "unregistered program!");

for (auto AI : ProgramInfo->AllocInfoForGlobals) {
UR_CALL(getDeviceInfo(AI->Device)->Shadow->ReleaseShadow(AI));
m_AllocationMap.erase(AI->AllocBegin);
}
ProgramInfo->AllocInfoForGlobals.clear();

ProgramInfo->InstrumentedKernels.clear();

return UR_RESULT_SUCCESS;
Expand Down Expand Up @@ -560,10 +549,6 @@ AsanInterceptor::registerDeviceGlobals(ur_program_handle_t Program) {
{}});

ContextInfo->insertAllocInfo({Device}, AI);
ProgramInfo->AllocInfoForGlobals.emplace(AI);

std::scoped_lock<ur_shared_mutex> Guard(m_AllocationMapMutex);
m_AllocationMap.emplace(AI->AllocBegin, std::move(AI));
}
}

Expand Down Expand Up @@ -887,9 +872,14 @@ bool ProgramInfo::isKernelInstrumented(ur_kernel_handle_t Kernel) const {
ContextInfo::~ContextInfo() {
Stats.Print(Handle);

[[maybe_unused]] auto Result =
getContext()->urDdiTable.Context.pfnRelease(Handle);
assert(Result == UR_RESULT_SUCCESS);
[[maybe_unused]] ur_result_t URes;
if (USMPool) {
URes = getContext()->urDdiTable.USM.pfnPoolRelease(USMPool);
assert(URes == UR_RESULT_SUCCESS);
}

URes = getContext()->urDdiTable.Context.pfnRelease(Handle);
assert(URes == UR_RESULT_SUCCESS);

// check memory leaks
if (getAsanInterceptor()->getOptions().DetectLeaks &&
Expand All @@ -905,6 +895,22 @@ ContextInfo::~ContextInfo() {
}
}

ur_usm_pool_handle_t ContextInfo::getUSMPool() {
std::call_once(PoolInit, [this]() {
ur_usm_pool_desc_t Desc{UR_STRUCTURE_TYPE_USM_POOL_DESC, nullptr, 0};
auto URes =
getContext()->urDdiTable.USM.pfnPoolCreate(Handle, &Desc, &USMPool);
if (URes != UR_RESULT_SUCCESS &&
URes != UR_RESULT_ERROR_UNSUPPORTED_FEATURE) {
getContext()->logger.warning(
"Failed to create USM pool, the memory overhead "
"may increase: {}",
URes);
}
});
return USMPool;
}

AsanRuntimeDataWrapper::~AsanRuntimeDataWrapper() {
[[maybe_unused]] ur_result_t Result;
if (Host.LocalArgs) {
Expand Down
7 changes: 6 additions & 1 deletion source/loader/layers/sanitizer/asan/asan_interceptor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,6 @@ struct ProgramInfo {
std::atomic<int32_t> RefCount = 1;

// Program is built only once, so we don't need to lock it
std::unordered_set<std::shared_ptr<AllocInfo>> AllocInfoForGlobals;
std::unordered_set<std::string> InstrumentedKernels;

explicit ProgramInfo(ur_program_handle_t Program) : Handle(Program) {
Expand All @@ -132,6 +131,10 @@ struct ProgramInfo {

struct ContextInfo {
ur_context_handle_t Handle;

ur_usm_pool_handle_t USMPool{};
std::once_flag PoolInit;

std::atomic<int32_t> RefCount = 1;

std::vector<ur_device_handle_t> DeviceList;
Expand All @@ -155,6 +158,8 @@ struct ContextInfo {
AllocInfos.List.emplace_back(AI);
}
}

ur_usm_pool_handle_t getUSMPool();
};

struct AsanRuntimeDataWrapper {
Expand Down
72 changes: 27 additions & 45 deletions source/loader/layers/sanitizer/asan/asan_shadow.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -108,11 +108,15 @@ ur_result_t ShadowMemoryGPU::Setup() {
// TODO: Protect Bad Zone
auto Result = getContext()->urDdiTable.VirtualMem.pfnReserve(
Context, nullptr, ShadowSize, (void **)&ShadowBegin);
if (Result == UR_RESULT_SUCCESS) {
ShadowEnd = ShadowBegin + ShadowSize;
// Retain the context which reserves shadow memory
getContext()->urDdiTable.Context.pfnRetain(Context);
if (Result != UR_RESULT_SUCCESS) {
getContext()->logger.error(
"Shadow memory reserved failed with size {}: {}",
(void *)ShadowSize, Result);
return Result;
}
ShadowEnd = ShadowBegin + ShadowSize;
// Retain the context which reserves shadow memory
getContext()->urDdiTable.Context.pfnRetain(Context);

// Set shadow memory for null pointer
// For GPU, wu use up to 1 page of shadow memory
Expand All @@ -137,6 +141,24 @@ ur_result_t ShadowMemoryGPU::Destory() {
Context, (void *)PrivateShadowOffset));
PrivateShadowOffset = 0;
}

static ur_result_t Result = [this]() {
const size_t PageSize = GetVirtualMemGranularity(Context, Device);
for (auto [MappedPtr, PhysicalMem] : VirtualMemMaps) {
UR_CALL(getContext()->urDdiTable.VirtualMem.pfnUnmap(
Context, (void *)MappedPtr, PageSize));
UR_CALL(
getContext()->urDdiTable.PhysicalMem.pfnRelease(PhysicalMem));
}
UR_CALL(getContext()->urDdiTable.VirtualMem.pfnFree(
Context, (const void *)ShadowBegin, GetShadowSize()));
UR_CALL(getContext()->urDdiTable.Context.pfnRelease(Context));
return UR_RESULT_SUCCESS;
}();
if (!Result) {
return Result;
}

if (LocalShadowOffset != 0) {
UR_CALL(getContext()->urDdiTable.USM.pfnFree(
Context, (void *)LocalShadowOffset));
Expand Down Expand Up @@ -205,19 +227,8 @@ ur_result_t ShadowMemoryGPU::EnqueuePoisonShadow(ur_queue_handle_t Queue,
return URes;
}

VirtualMemMaps[MappedPtr].first = PhysicalMem;
VirtualMemMaps[MappedPtr] = PhysicalMem;
}

// We don't need to record virtual memory map for null pointer,
// since it doesn't have an alloc info.
if (Ptr == 0) {
continue;
}

auto AllocInfoIt =
getAsanInterceptor()->findAllocInfoByAddress(Ptr);
assert(AllocInfoIt);
VirtualMemMaps[MappedPtr].second.insert((*AllocInfoIt)->second);
}
}

Expand All @@ -235,35 +246,6 @@ ur_result_t ShadowMemoryGPU::EnqueuePoisonShadow(ur_queue_handle_t Queue,
return UR_RESULT_SUCCESS;
}

ur_result_t ShadowMemoryGPU::ReleaseShadow(std::shared_ptr<AllocInfo> AI) {
uptr ShadowBegin = MemToShadow(AI->AllocBegin);
uptr ShadowEnd = MemToShadow(AI->AllocBegin + AI->AllocSize);
assert(ShadowBegin <= ShadowEnd);

static const size_t PageSize = GetVirtualMemGranularity(Context, Device);

for (auto MappedPtr = RoundDownTo(ShadowBegin, PageSize);
MappedPtr <= ShadowEnd; MappedPtr += PageSize) {
std::scoped_lock<ur_mutex> Guard(VirtualMemMapsMutex);
if (VirtualMemMaps.find(MappedPtr) == VirtualMemMaps.end()) {
continue;
}
VirtualMemMaps[MappedPtr].second.erase(AI);
if (VirtualMemMaps[MappedPtr].second.empty()) {
UR_CALL(getContext()->urDdiTable.VirtualMem.pfnUnmap(
Context, (void *)MappedPtr, PageSize));
UR_CALL(getContext()->urDdiTable.PhysicalMem.pfnRelease(
VirtualMemMaps[MappedPtr].first));
getContext()->logger.debug("urVirtualMemUnmap: {} ~ {}",
(void *)MappedPtr,
(void *)(MappedPtr + PageSize - 1));
VirtualMemMaps.erase(MappedPtr);
}
}

return UR_RESULT_SUCCESS;
}

ur_result_t ShadowMemoryGPU::AllocLocalShadow(ur_queue_handle_t Queue,
uint32_t NumWG, uptr &Begin,
uptr &End) {
Expand Down
11 changes: 1 addition & 10 deletions source/loader/layers/sanitizer/asan/asan_shadow.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,6 @@ struct ShadowMemory {
virtual ur_result_t EnqueuePoisonShadow(ur_queue_handle_t Queue, uptr Ptr,
uptr Size, u8 Value) = 0;

virtual ur_result_t ReleaseShadow(std::shared_ptr<AllocInfo>) {
return UR_RESULT_SUCCESS;
}

virtual size_t GetShadowSize() = 0;

virtual ur_result_t AllocLocalShadow(ur_queue_handle_t Queue,
Expand Down Expand Up @@ -98,8 +94,6 @@ struct ShadowMemoryGPU : public ShadowMemory {
ur_result_t EnqueuePoisonShadow(ur_queue_handle_t Queue, uptr Ptr,
uptr Size, u8 Value) override final;

ur_result_t ReleaseShadow(std::shared_ptr<AllocInfo> AI) override final;

ur_result_t AllocLocalShadow(ur_queue_handle_t Queue, uint32_t NumWG,
uptr &Begin, uptr &End) override final;

Expand All @@ -108,10 +102,7 @@ struct ShadowMemoryGPU : public ShadowMemory {

ur_mutex VirtualMemMapsMutex;

std::unordered_map<
uptr, std::pair<ur_physical_mem_handle_t,
std::unordered_set<std::shared_ptr<AllocInfo>>>>
VirtualMemMaps;
std::unordered_map<uptr, ur_physical_mem_handle_t> VirtualMemMaps;

uptr LocalShadowOffset = 0;

Expand Down
Loading