Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
149 changes: 34 additions & 115 deletions source/loader/layers/sanitizer/asan/asan_interceptor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -773,28 +773,6 @@ ur_result_t AsanInterceptor::prepareLaunch(
LaunchInfo.Data.Host.DeviceTy = DeviceInfo->Type;
LaunchInfo.Data.Host.Debug = getOptions().Debug ? 1 : 0;

auto EnqueueAllocateShadowMemory = [Context = ContextInfo->Handle,
Device = DeviceInfo->Handle,
Queue](size_t Size, uptr &Ptr) {
void *Allocated = nullptr;
auto URes = getContext()->urDdiTable.USM.pfnDeviceAlloc(
Context, Device, nullptr, nullptr, Size, &Allocated);
if (URes != UR_RESULT_SUCCESS) {
return URes;
}
// Initialize shadow memory
URes = EnqueueUSMBlockingSet(Queue, Allocated, 0, Size);
if (URes != UR_RESULT_SUCCESS) {
[[maybe_unused]] auto URes =
getContext()->urDdiTable.USM.pfnFree(Context, Allocated);
assert(URes == UR_RESULT_SUCCESS &&
"urUSMFree failed at allocating shadow memory");
Allocated = nullptr;
}
Ptr = (uptr)Allocated;
return URes;
};

auto LocalMemoryUsage =
GetKernelLocalMemorySize(Kernel, DeviceInfo->Handle);
auto PrivateMemoryUsage =
Expand All @@ -806,86 +784,45 @@ ur_result_t AsanInterceptor::prepareLaunch(

// Write shadow memory offset for local memory
if (getOptions().DetectLocals) {
// CPU needn't this
if (DeviceInfo->Type == DeviceType::GPU_PVC ||
DeviceInfo->Type == DeviceType::GPU_DG2) {
const size_t LocalMemorySize =
GetDeviceLocalMemorySize(DeviceInfo->Handle);
const size_t LocalShadowMemorySize =
(NumWG * LocalMemorySize) >> ASAN_SHADOW_SCALE;

getContext()->logger.debug(
"LocalMemory(WorkGroup={}, LocalMemorySize={}, "
"LocalShadowMemorySize={})",
NumWG, LocalMemorySize, LocalShadowMemorySize);

if (EnqueueAllocateShadowMemory(
LocalShadowMemorySize,
LaunchInfo.Data.Host.LocalShadowOffset) !=
UR_RESULT_SUCCESS) {
getContext()->logger.warning(
"Failed to allocate shadow memory for local "
"memory, maybe the number of workgroup ({}) is too "
"large",
NumWG);
getContext()->logger.warning(
"Skip checking local memory of kernel <{}>",
GetKernelName(Kernel));
} else {
LaunchInfo.Data.Host.LocalShadowOffsetEnd =
LaunchInfo.Data.Host.LocalShadowOffset +
LocalShadowMemorySize - 1;

ContextInfo->Stats.UpdateShadowMalloced(
LocalShadowMemorySize);

getContext()->logger.info(
"ShadowMemory(Local, {} - {})",
(void *)LaunchInfo.Data.Host.LocalShadowOffset,
(void *)LaunchInfo.Data.Host.LocalShadowOffsetEnd);
}
if (DeviceInfo->Shadow->AllocLocalShadow(
Queue, NumWG, LaunchInfo.Data.Host.LocalShadowOffset,
LaunchInfo.Data.Host.LocalShadowOffsetEnd) !=
UR_RESULT_SUCCESS) {
getContext()->logger.warning(
"Failed to allocate shadow memory for local "
"memory, maybe the number of workgroup ({}) is too "
"large",
NumWG);
getContext()->logger.warning(
"Skip checking local memory of kernel <{}>",
GetKernelName(Kernel));
} else {
getContext()->logger.info(
"ShadowMemory(Local, WorkGroup{}, {} - {})", NumWG,
(void *)LaunchInfo.Data.Host.LocalShadowOffset,
(void *)LaunchInfo.Data.Host.LocalShadowOffsetEnd);
}
}

// Write shadow memory offset for private memory
if (getOptions().DetectPrivates) {
if (DeviceInfo->Type == DeviceType::CPU) {
LaunchInfo.Data.Host.PrivateShadowOffset =
DeviceInfo->Shadow->ShadowBegin;
} else if (DeviceInfo->Type == DeviceType::GPU_PVC ||
DeviceInfo->Type == DeviceType::GPU_DG2) {
const size_t PrivateShadowMemorySize =
(NumWG * ASAN_PRIVATE_SIZE) >> ASAN_SHADOW_SCALE;

getContext()->logger.debug("PrivateMemory(WorkGroup={}, "
"PrivateShadowMemorySize={})",
NumWG, PrivateShadowMemorySize);

if (EnqueueAllocateShadowMemory(
PrivateShadowMemorySize,
LaunchInfo.Data.Host.PrivateShadowOffset) !=
UR_RESULT_SUCCESS) {
getContext()->logger.warning(
"Failed to allocate shadow memory for private "
"memory, maybe the number of workgroup ({}) is too "
"large",
NumWG);
getContext()->logger.warning(
"Skip checking private memory of kernel <{}>",
GetKernelName(Kernel));
} else {
LaunchInfo.Data.Host.PrivateShadowOffsetEnd =
LaunchInfo.Data.Host.PrivateShadowOffset +
PrivateShadowMemorySize - 1;

ContextInfo->Stats.UpdateShadowMalloced(
PrivateShadowMemorySize);

getContext()->logger.info(
"ShadowMemory(Private, {} - {})",
(void *)LaunchInfo.Data.Host.PrivateShadowOffset,
(void *)LaunchInfo.Data.Host.PrivateShadowOffsetEnd);
}
if (DeviceInfo->Shadow->AllocPrivateShadow(
Queue, NumWG, LaunchInfo.Data.Host.PrivateShadowOffset,
LaunchInfo.Data.Host.PrivateShadowOffsetEnd) !=
UR_RESULT_SUCCESS) {
getContext()->logger.warning(
"Failed to allocate shadow memory for private "
"memory, maybe the number of workgroup ({}) is too "
"large",
NumWG);
getContext()->logger.warning(
"Skip checking private memory of kernel <{}>",
GetKernelName(Kernel));
} else {
getContext()->logger.info(
"ShadowMemory(Private, WorkGroup{}, {} - {})", NumWG,
(void *)LaunchInfo.Data.Host.PrivateShadowOffset,
(void *)LaunchInfo.Data.Host.PrivateShadowOffsetEnd);
}
}

Expand Down Expand Up @@ -970,24 +907,6 @@ ContextInfo::~ContextInfo() {

AsanRuntimeDataWrapper::~AsanRuntimeDataWrapper() {
[[maybe_unused]] ur_result_t Result;
auto Type = GetDeviceType(Context, Device);
auto ContextInfo = getAsanInterceptor()->getContextInfo(Context);
if (Type == DeviceType::GPU_PVC || Type == DeviceType::GPU_DG2) {
if (Host.PrivateShadowOffset) {
ContextInfo->Stats.UpdateShadowFreed(Host.PrivateShadowOffsetEnd -
Host.PrivateShadowOffset + 1);
Result = getContext()->urDdiTable.USM.pfnFree(
Context, (void *)Host.PrivateShadowOffset);
assert(Result == UR_RESULT_SUCCESS);
}
if (Host.LocalShadowOffset) {
ContextInfo->Stats.UpdateShadowFreed(Host.LocalShadowOffsetEnd -
Host.LocalShadowOffset + 1);
Result = getContext()->urDdiTable.USM.pfnFree(
Context, (void *)Host.LocalShadowOffset);
assert(Result == UR_RESULT_SUCCESS);
}
}
if (Host.LocalArgs) {
Result = getContext()->urDdiTable.USM.pfnFree(Context,
(void *)Host.LocalArgs);
Expand Down
106 changes: 97 additions & 9 deletions source/loader/layers/sanitizer/asan/asan_shadow.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -132,16 +132,23 @@ ur_result_t ShadowMemoryGPU::Setup() {
}

ur_result_t ShadowMemoryGPU::Destory() {
if (ShadowBegin == 0) {
return UR_RESULT_SUCCESS;
if (PrivateShadowOffset != 0) {
UR_CALL(getContext()->urDdiTable.USM.pfnFree(
Context, (void *)PrivateShadowOffset));
PrivateShadowOffset = 0;
}
static ur_result_t Result = [this]() {
auto Result = getContext()->urDdiTable.VirtualMem.pfnFree(
Context, (const void *)ShadowBegin, GetShadowSize());
getContext()->urDdiTable.Context.pfnRelease(Context);
return Result;
}();
return Result;
if (LocalShadowOffset != 0) {
UR_CALL(getContext()->urDdiTable.USM.pfnFree(
Context, (void *)LocalShadowOffset));
LocalShadowOffset = 0;
}
if (ShadowBegin != 0) {
UR_CALL(getContext()->urDdiTable.VirtualMem.pfnFree(
Context, (const void *)ShadowBegin, GetShadowSize()));
UR_CALL(getContext()->urDdiTable.Context.pfnRelease(Context));
ShadowBegin = ShadowEnd = 0;
}
return UR_RESULT_SUCCESS;
}

ur_result_t ShadowMemoryGPU::EnqueuePoisonShadow(ur_queue_handle_t Queue,
Expand Down Expand Up @@ -257,6 +264,87 @@ ur_result_t ShadowMemoryGPU::ReleaseShadow(std::shared_ptr<AllocInfo> AI) {
return UR_RESULT_SUCCESS;
}

ur_result_t ShadowMemoryGPU::AllocLocalShadow(ur_queue_handle_t Queue,
uint32_t NumWG, uptr &Begin,
uptr &End) {
const size_t LocalMemorySize = GetDeviceLocalMemorySize(Device);
const size_t RequiredShadowSize =
(NumWG * LocalMemorySize) >> ASAN_SHADOW_SCALE;
static size_t LastAllocedSize = 0;
if (RequiredShadowSize > LastAllocedSize) {
auto ContextInfo = getAsanInterceptor()->getContextInfo(Context);
if (LocalShadowOffset) {
UR_CALL(getContext()->urDdiTable.USM.pfnFree(
Context, (void *)LocalShadowOffset));
ContextInfo->Stats.UpdateShadowFreed(LastAllocedSize);
LocalShadowOffset = 0;
LastAllocedSize = 0;
}

UR_CALL(getContext()->urDdiTable.USM.pfnDeviceAlloc(
Context, Device, nullptr, nullptr, RequiredShadowSize,
(void **)&LocalShadowOffset));

// Initialize shadow memory
ur_result_t URes = EnqueueUSMBlockingSet(
Queue, (void *)LocalShadowOffset, 0, RequiredShadowSize);
if (URes != UR_RESULT_SUCCESS) {
UR_CALL(getContext()->urDdiTable.USM.pfnFree(
Context, (void *)LocalShadowOffset));
LocalShadowOffset = 0;
LastAllocedSize = 0;
}

ContextInfo->Stats.UpdateShadowMalloced(RequiredShadowSize);

LastAllocedSize = RequiredShadowSize;
}

Begin = LocalShadowOffset;
End = LocalShadowOffset + RequiredShadowSize - 1;
return UR_RESULT_SUCCESS;
}

ur_result_t ShadowMemoryGPU::AllocPrivateShadow(ur_queue_handle_t Queue,
uint32_t NumWG, uptr &Begin,
uptr &End) {
const size_t RequiredShadowSize =
(NumWG * ASAN_PRIVATE_SIZE) >> ASAN_SHADOW_SCALE;
static size_t LastAllocedSize = 0;
if (RequiredShadowSize > LastAllocedSize) {
auto ContextInfo = getAsanInterceptor()->getContextInfo(Context);
if (PrivateShadowOffset) {
UR_CALL(getContext()->urDdiTable.USM.pfnFree(
Context, (void *)PrivateShadowOffset));
ContextInfo->Stats.UpdateShadowFreed(LastAllocedSize);
PrivateShadowOffset = 0;
LastAllocedSize = 0;
}

UR_CALL(getContext()->urDdiTable.USM.pfnDeviceAlloc(
Context, Device, nullptr, nullptr, RequiredShadowSize,
(void **)&PrivateShadowOffset));

// Initialize shadow memory
ur_result_t URes = EnqueueUSMBlockingSet(
Queue, (void *)PrivateShadowOffset, 0, RequiredShadowSize);
if (URes != UR_RESULT_SUCCESS) {
UR_CALL(getContext()->urDdiTable.USM.pfnFree(
Context, (void *)PrivateShadowOffset));
PrivateShadowOffset = 0;
LastAllocedSize = 0;
}

ContextInfo->Stats.UpdateShadowMalloced(RequiredShadowSize);

LastAllocedSize = RequiredShadowSize;
}

Begin = PrivateShadowOffset;
End = PrivateShadowOffset + RequiredShadowSize - 1;
return UR_RESULT_SUCCESS;
}

uptr ShadowMemoryPVC::MemToShadow(uptr Ptr) {
if (Ptr & 0xFF00000000000000ULL) { // Device USM
return ShadowBegin + 0x80000000000ULL +
Expand Down
32 changes: 32 additions & 0 deletions source/loader/layers/sanitizer/asan/asan_shadow.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,14 @@ struct ShadowMemory {

virtual size_t GetShadowSize() = 0;

virtual ur_result_t AllocLocalShadow(ur_queue_handle_t Queue,
uint32_t NumWG, uptr &Begin,
uptr &End) = 0;

virtual ur_result_t AllocPrivateShadow(ur_queue_handle_t Queue,
uint32_t NumWG, uptr &Begin,
uptr &End) = 0;

ur_context_handle_t Context{};

ur_device_handle_t Device{};
Expand All @@ -64,6 +72,20 @@ struct ShadowMemoryCPU final : public ShadowMemory {
uptr Size, u8 Value) override;

size_t GetShadowSize() override { return 0x80000000000ULL; }

ur_result_t AllocLocalShadow(ur_queue_handle_t, uint32_t, uptr &Begin,
uptr &End) override {
Begin = ShadowBegin;
End = ShadowEnd;
return UR_RESULT_SUCCESS;
}

ur_result_t AllocPrivateShadow(ur_queue_handle_t, uint32_t, uptr &Begin,
uptr &End) override {
Begin = ShadowBegin;
End = ShadowEnd;
return UR_RESULT_SUCCESS;
}
};

struct ShadowMemoryGPU : public ShadowMemory {
Expand All @@ -78,12 +100,22 @@ struct ShadowMemoryGPU : public ShadowMemory {

ur_result_t ReleaseShadow(std::shared_ptr<AllocInfo> AI) override final;

ur_result_t AllocLocalShadow(ur_queue_handle_t Queue, uint32_t NumWG,
uptr &Begin, uptr &End) override final;

ur_result_t AllocPrivateShadow(ur_queue_handle_t Queue, uint32_t NumWG,
uptr &Begin, uptr &End) override final;

ur_mutex VirtualMemMapsMutex;

std::unordered_map<
uptr, std::pair<ur_physical_mem_handle_t,
std::unordered_set<std::shared_ptr<AllocInfo>>>>
VirtualMemMaps;

uptr LocalShadowOffset = 0;

uptr PrivateShadowOffset = 0;
};

/// Shadow Memory layout of GPU PVC device
Expand Down
Loading