Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 42 additions & 31 deletions compiler-rt/lib/asan/asan_allocator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1385,6 +1385,7 @@ int __asan_update_allocation_context(void* addr) {
}

#if SANITIZER_AMDGPU
DECLARE_REAL(hsa_status_t, hsa_init);
DECLARE_REAL(hsa_status_t, hsa_amd_agents_allow_access, uint32_t num_agents,
const hsa_agent_t *agents, const uint32_t *flags, const void *ptr)
DECLARE_REAL(hsa_status_t, hsa_amd_memory_pool_allocate,
Expand All @@ -1400,43 +1401,44 @@ DECLARE_REAL(hsa_status_t, hsa_amd_ipc_memory_detach, void *mapped_ptr)
DECLARE_REAL(hsa_status_t, hsa_amd_vmem_address_reserve_align, void** ptr,
size_t size, uint64_t address, uint64_t alignment, uint64_t flags)
DECLARE_REAL(hsa_status_t, hsa_amd_vmem_address_free, void* ptr, size_t size);
DECLARE_REAL(hsa_status_t, hsa_amd_register_system_event_handler,
hsa_amd_system_event_callback_t, void*)

namespace __asan {

// Always align to page boundary to match current ROCr behavior
static const size_t kPageSize_ = 4096;

hsa_status_t asan_hsa_amd_memory_pool_allocate(
hsa_amd_memory_pool_t memory_pool, size_t size, uint32_t flags, void **ptr,
BufferedStackTrace *stack) {
hsa_amd_memory_pool_t memory_pool, size_t size, uint32_t flags, void** ptr,
BufferedStackTrace* stack) {
AmdgpuAllocationInfo aa_info;
aa_info.alloc_func =
reinterpret_cast<void *>(asan_hsa_amd_memory_pool_allocate);
reinterpret_cast<void*>(asan_hsa_amd_memory_pool_allocate);
aa_info.memory_pool = memory_pool;
aa_info.size = size;
aa_info.flags = flags;
aa_info.ptr = nullptr;
SetErrnoOnNull(*ptr = instance.Allocate(size, kPageSize_, stack,
FROM_MALLOC, false, &aa_info));
SetErrnoOnNull(*ptr = instance.Allocate(size, kPageSize_, stack, FROM_MALLOC,
false, &aa_info));
return aa_info.status;
}

hsa_status_t asan_hsa_amd_memory_pool_free(
void *ptr,
BufferedStackTrace *stack) {
void *p = get_allocator().GetBlockBegin(ptr);
hsa_status_t asan_hsa_amd_memory_pool_free(void* ptr,
BufferedStackTrace* stack) {
void* p = get_allocator().GetBlockBegin(ptr);
if (p) {
instance.Deallocate(ptr, 0, 0, stack, FROM_MALLOC);
return HSA_STATUS_SUCCESS;
}
return REAL(hsa_amd_memory_pool_free)(ptr);
}

hsa_status_t asan_hsa_amd_agents_allow_access(
uint32_t num_agents, const hsa_agent_t *agents, const uint32_t *flags,
const void *ptr,
BufferedStackTrace *stack) {
void *p = get_allocator().GetBlockBegin(ptr);
hsa_status_t asan_hsa_amd_agents_allow_access(uint32_t num_agents,
const hsa_agent_t* agents,
const uint32_t* flags,
const void* ptr,
BufferedStackTrace* stack) {
void* p = get_allocator().GetBlockBegin(ptr);
return REAL(hsa_amd_agents_allow_access)(num_agents, agents, flags,
p ? p : ptr);
}
Expand All @@ -1446,44 +1448,45 @@ hsa_status_t asan_hsa_amd_agents_allow_access(
// is always one kPageSize_
// IPC calls use static_assert to make sure kMetadataSize = 0
//
#if SANITIZER_CAN_USE_ALLOCATOR64
# if SANITIZER_CAN_USE_ALLOCATOR64
static struct AP64<LocalAddressSpaceView> AP_;
#else
# else
static struct AP32<LocalAddressSpaceView> AP_;
#endif
# endif

hsa_status_t asan_hsa_amd_ipc_memory_create(void *ptr, size_t len,
hsa_amd_ipc_memory_t * handle) {
void *ptr_;
hsa_status_t asan_hsa_amd_ipc_memory_create(void* ptr, size_t len,
hsa_amd_ipc_memory_t* handle) {
void* ptr_;
size_t len_ = get_allocator().GetActuallyAllocatedSize(ptr);
if (len_) {
static_assert(AP_.kMetadataSize == 0, "Expression below requires this");
ptr_ = reinterpret_cast<void *>(reinterpret_cast<uptr>(ptr) - kPageSize_);
ptr_ = reinterpret_cast<void*>(reinterpret_cast<uptr>(ptr) - kPageSize_);
} else {
ptr_ = ptr;
len_ = len;
}
return REAL(hsa_amd_ipc_memory_create)(ptr_, len_, handle);
}

hsa_status_t asan_hsa_amd_ipc_memory_attach(const hsa_amd_ipc_memory_t *handle,
size_t len, uint32_t num_agents, const hsa_agent_t *mapping_agents,
void **mapped_ptr) {
hsa_status_t asan_hsa_amd_ipc_memory_attach(const hsa_amd_ipc_memory_t* handle,
size_t len, uint32_t num_agents,
const hsa_agent_t* mapping_agents,
void** mapped_ptr) {
static_assert(AP_.kMetadataSize == 0, "Expression below requires this");
size_t len_ = len + kPageSize_;
hsa_status_t status = REAL(hsa_amd_ipc_memory_attach)(
handle, len_, num_agents, mapping_agents, mapped_ptr);
handle, len_, num_agents, mapping_agents, mapped_ptr);
if (status == HSA_STATUS_SUCCESS && mapped_ptr) {
*mapped_ptr = reinterpret_cast<void *>(reinterpret_cast<uptr>(*mapped_ptr) +
kPageSize_);
*mapped_ptr = reinterpret_cast<void*>(reinterpret_cast<uptr>(*mapped_ptr) +
kPageSize_);
}
return status;
}

hsa_status_t asan_hsa_amd_ipc_memory_detach(void *mapped_ptr) {
hsa_status_t asan_hsa_amd_ipc_memory_detach(void* mapped_ptr) {
static_assert(AP_.kMetadataSize == 0, "Expression below requires this");
void *mapped_ptr_ =
reinterpret_cast<void *>(reinterpret_cast<uptr>(mapped_ptr) - kPageSize_);
void* mapped_ptr_ =
reinterpret_cast<void*>(reinterpret_cast<uptr>(mapped_ptr) - kPageSize_);
return REAL(hsa_amd_ipc_memory_detach)(mapped_ptr_);
}

Expand Down Expand Up @@ -1540,5 +1543,13 @@ hsa_status_t asan_hsa_amd_vmem_address_free(void* ptr, size_t size,
}
return REAL(hsa_amd_vmem_address_free)(ptr, size);
}

hsa_status_t asan_hsa_init() {
hsa_status_t status = REAL(hsa_init)();
if (status == HSA_STATUS_SUCCESS)
__sanitizer::AmdgpuMemFuncs::RegisterSystemEventHandlers();
return status;
}

} // namespace __asan
#endif
1 change: 1 addition & 0 deletions compiler-rt/lib/asan/asan_allocator.h
Original file line number Diff line number Diff line change
Expand Up @@ -341,6 +341,7 @@ hsa_status_t asan_hsa_amd_vmem_address_reserve_align(void** ptr, size_t size,
BufferedStackTrace* stack);
hsa_status_t asan_hsa_amd_vmem_address_free(void* ptr, size_t size,
BufferedStackTrace* stack);
hsa_status_t asan_hsa_init();
} // namespace __asan
#endif

Expand Down
8 changes: 7 additions & 1 deletion compiler-rt/lib/asan/asan_interceptors.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -948,7 +948,13 @@ INTERCEPTOR(hsa_status_t, hsa_amd_vmem_address_free, void* ptr, size_t size) {
return asan_hsa_amd_vmem_address_free(ptr, size, &stack);
}

INTERCEPTOR(hsa_status_t, hsa_init) {
AsanInitFromRtl();
return asan_hsa_init();
}

void InitializeAmdgpuInterceptors() {
ASAN_INTERCEPT_FUNC(hsa_init);
ASAN_INTERCEPT_FUNC(hsa_memory_copy);
ASAN_INTERCEPT_FUNC(hsa_amd_memory_pool_allocate);
ASAN_INTERCEPT_FUNC(hsa_amd_memory_pool_free);
Expand All @@ -965,7 +971,7 @@ void InitializeAmdgpuInterceptors() {
}

void ENSURE_HSA_INITED() {
if (!REAL(hsa_memory_copy))
if (!REAL(hsa_init))
InitializeAmdgpuInterceptors();
}
#endif
Expand Down
56 changes: 51 additions & 5 deletions compiler-rt/lib/sanitizer_common/sanitizer_allocator_amdgpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,29 +11,44 @@
//===----------------------------------------------------------------------===//
#if SANITIZER_AMDGPU
# include <dlfcn.h> // For dlsym

# include "sanitizer_allocator.h"
# include "sanitizer_atomic.h"

namespace __sanitizer {
struct HsaMemoryFunctions {
struct HsaFunctions {
// ---------------- Memory Functions ----------------
hsa_status_t (*memory_pool_allocate)(hsa_amd_memory_pool_t memory_pool,
size_t size, uint32_t flags, void **ptr);
hsa_status_t (*memory_pool_free)(void *ptr);
hsa_status_t (*pointer_info)(void *ptr, hsa_amd_pointer_info_t *info,
void *(*alloc)(size_t),
uint32_t *num_agents_accessible,
hsa_agent_t **accessible);
hsa_status_t (*vmem_address_reserve_align)(void** ptr, size_t size,
hsa_status_t (*vmem_address_reserve_align)(void **ptr, size_t size,
uint64_t address,
uint64_t alignment,
uint64_t flags);
hsa_status_t (*vmem_address_free)(void* ptr, size_t size);
hsa_status_t (*vmem_address_free)(void *ptr, size_t size);

// ----------------Event Functions ----------------
hsa_status_t (*register_system_event_handler)(
hsa_amd_system_event_callback_t callback, void *data);
};

static HsaMemoryFunctions hsa_amd;
static HsaFunctions hsa_amd;

// Always align to page boundary to match current ROCr behavior
static const size_t kPageSize_ = 4096;

static atomic_uint8_t amdgpu_runtime_shutdown{0};
static atomic_uint8_t amdgpu_event_registered{0};

bool AmdgpuMemFuncs::GetAmdgpuRuntimeShutdown() {
return static_cast<bool>(
atomic_load(&amdgpu_runtime_shutdown, memory_order_acquire));
}

bool AmdgpuMemFuncs::Init() {
hsa_amd.memory_pool_allocate =
(decltype(hsa_amd.memory_pool_allocate))dlsym(
Expand All @@ -47,15 +62,20 @@ bool AmdgpuMemFuncs::Init() {
RTLD_NEXT, "hsa_amd_vmem_address_reserve_align");
hsa_amd.vmem_address_free = (decltype(hsa_amd.vmem_address_free))dlsym(
RTLD_NEXT, "hsa_amd_vmem_address_free");
hsa_amd.register_system_event_handler =
(decltype(hsa_amd.register_system_event_handler))dlsym(
RTLD_NEXT, "hsa_amd_register_system_event_handler");
if (!hsa_amd.memory_pool_allocate || !hsa_amd.memory_pool_free ||
!hsa_amd.pointer_info || !hsa_amd.vmem_address_reserve_align ||
!hsa_amd.vmem_address_free)
!hsa_amd.vmem_address_free || !hsa_amd.register_system_event_handler)
return false;
return true;
}

void *AmdgpuMemFuncs::Allocate(uptr size, uptr alignment,
DeviceAllocationInfo *da_info) {
if (atomic_load(&amdgpu_runtime_shutdown, memory_order_acquire))
return nullptr;
AmdgpuAllocationInfo *aa_info =
reinterpret_cast<AmdgpuAllocationInfo *>(da_info);
if (!aa_info->memory_pool.handle) {
Expand All @@ -73,6 +93,8 @@ void *AmdgpuMemFuncs::Allocate(uptr size, uptr alignment,
}

void AmdgpuMemFuncs::Deallocate(void *p) {
if (atomic_load(&amdgpu_runtime_shutdown, memory_order_acquire))
return;
DevicePointerInfo DevPtrInfo;
if (AmdgpuMemFuncs::GetPointerInfo(reinterpret_cast<uptr>(p), &DevPtrInfo)) {
if (DevPtrInfo.type == HSA_EXT_POINTER_TYPE_HSA) {
Expand Down Expand Up @@ -103,6 +125,30 @@ bool AmdgpuMemFuncs::GetPointerInfo(uptr ptr, DevicePointerInfo* ptr_info) {
return true;
}

void AmdgpuMemFuncs::RegisterSystemEventHandlers() {
// Register shutdown system event handler only once
if (atomic_load(&amdgpu_event_registered, memory_order_acquire) == 0) {
// Callback to just detect runtime shutdown
hsa_amd_system_event_callback_t callback = [](const hsa_amd_event_t* event,
void* data) {
if (!event)
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
if (event->event_type == HSA_AMD_SYSTEM_SHUTDOWN_EVENT) {
uint8_t shutdown = 0;
if (atomic_compare_exchange_strong(&amdgpu_runtime_shutdown, &shutdown,
1, memory_order_acq_rel)) {
// Evict all allocations (add purge logic here).
}
}
return HSA_STATUS_SUCCESS;
};
hsa_status_t status =
hsa_amd.register_system_event_handler(callback, nullptr);
if (status == HSA_STATUS_SUCCESS)
atomic_store(&amdgpu_event_registered, 1, memory_order_release);
}
}

uptr AmdgpuMemFuncs::GetPageSize() { return kPageSize_; }
} // namespace __sanitizer
#endif // SANITIZER_AMDGPU
2 changes: 2 additions & 0 deletions compiler-rt/lib/sanitizer_common/sanitizer_allocator_amdgpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ class AmdgpuMemFuncs {
static void Deallocate(void *p);
static bool GetPointerInfo(uptr ptr, DevicePointerInfo* ptr_info);
static uptr GetPageSize();
static void RegisterSystemEventHandlers();
static bool GetAmdgpuRuntimeShutdown();
};

struct AmdgpuAllocationInfo : public DeviceAllocationInfo {
Expand Down
28 changes: 18 additions & 10 deletions compiler-rt/lib/sanitizer_common/sanitizer_allocator_device.h
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,8 @@ class DeviceAllocatorT {
CHECK_EQ(chunks_[idx], p_);
CHECK_LT(idx, n_chunks_);
h = GetHeader(chunks_[idx], &header);
CHECK(!dev_runtime_unloaded_);
if (dev_runtime_unloaded_)
return;
chunks_[idx] = chunks_[--n_chunks_];
chunks_sorted_ = false;
stats.n_frees++;
Expand All @@ -140,7 +141,8 @@ class DeviceAllocatorT {
uptr res = 0;
for (uptr i = 0; i < n_chunks_; i++) {
Header *h = GetHeader(chunks_[i], &header);
CHECK(!dev_runtime_unloaded_);
if (dev_runtime_unloaded_)
return 0;
res += RoundUpMapSize(h->map_size);
}
return res;
Expand Down Expand Up @@ -188,7 +190,6 @@ class DeviceAllocatorT {
CHECK_LT(nearest_chunk, h->map_beg + h->map_size);
CHECK_LE(nearest_chunk, p);
if (h->map_beg + h->map_size <= p) {
CHECK(!dev_runtime_unloaded_);
return nullptr;
}
}
Expand Down Expand Up @@ -306,14 +307,21 @@ class DeviceAllocatorT {
}

Header* GetHeader(uptr chunk, Header* h) const {
if (dev_runtime_unloaded_ || !DeviceMemFuncs::GetPointerInfo(chunk, h)) {
// Device allocator has dependency on device runtime. If device runtime
// is unloaded, GetPointerInfo() will fail. For such case, we can still
// return a valid value for map_beg, map_size will be limited to one page
h->map_beg = chunk;
h->map_size = page_size_;
dev_runtime_unloaded_ = true;
// Device allocator has dependency on device runtime. If device runtime
// is unloaded, GetPointerInfo() will fail. For such case, we can still
// return a valid value for map_beg, map_size will be limited to one page
if (!dev_runtime_unloaded_) {
if (DeviceMemFuncs::GetPointerInfo(chunk, h))
return h;
// If GetPointerInfo() fails, we don't assume the runtime is unloaded yet.
// We just return a conservative single-page header. Here mark/check the
// runtime shutdown state
dev_runtime_unloaded_ = DeviceMemFuncs::GetAmdgpuRuntimeShutdown();
}
// If we reach here, device runtime is unloaded.
// Fallback: conservative single-page header
h->map_beg = chunk;
h->map_size = page_size_;
return h;
}

Expand Down