diff --git a/compiler-rt/lib/asan/asan_allocator.cpp b/compiler-rt/lib/asan/asan_allocator.cpp index 06c827c41eacc..b6de4a37a94da 100644 --- a/compiler-rt/lib/asan/asan_allocator.cpp +++ b/compiler-rt/lib/asan/asan_allocator.cpp @@ -1385,6 +1385,7 @@ int __asan_update_allocation_context(void* addr) { } #if SANITIZER_AMDGPU +DECLARE_REAL(hsa_status_t, hsa_init); DECLARE_REAL(hsa_status_t, hsa_amd_agents_allow_access, uint32_t num_agents, const hsa_agent_t *agents, const uint32_t *flags, const void *ptr) DECLARE_REAL(hsa_status_t, hsa_amd_memory_pool_allocate, @@ -1400,31 +1401,31 @@ DECLARE_REAL(hsa_status_t, hsa_amd_ipc_memory_detach, void *mapped_ptr) DECLARE_REAL(hsa_status_t, hsa_amd_vmem_address_reserve_align, void** ptr, size_t size, uint64_t address, uint64_t alignment, uint64_t flags) DECLARE_REAL(hsa_status_t, hsa_amd_vmem_address_free, void* ptr, size_t size); +DECLARE_REAL(hsa_status_t, hsa_amd_register_system_event_handler, + hsa_amd_system_event_callback_t, void*) namespace __asan { - // Always align to page boundary to match current ROCr behavior static const size_t kPageSize_ = 4096; hsa_status_t asan_hsa_amd_memory_pool_allocate( - hsa_amd_memory_pool_t memory_pool, size_t size, uint32_t flags, void **ptr, - BufferedStackTrace *stack) { + hsa_amd_memory_pool_t memory_pool, size_t size, uint32_t flags, void** ptr, + BufferedStackTrace* stack) { AmdgpuAllocationInfo aa_info; aa_info.alloc_func = - reinterpret_cast(asan_hsa_amd_memory_pool_allocate); + reinterpret_cast(asan_hsa_amd_memory_pool_allocate); aa_info.memory_pool = memory_pool; aa_info.size = size; aa_info.flags = flags; aa_info.ptr = nullptr; - SetErrnoOnNull(*ptr = instance.Allocate(size, kPageSize_, stack, - FROM_MALLOC, false, &aa_info)); + SetErrnoOnNull(*ptr = instance.Allocate(size, kPageSize_, stack, FROM_MALLOC, + false, &aa_info)); return aa_info.status; } -hsa_status_t asan_hsa_amd_memory_pool_free( - void *ptr, - BufferedStackTrace *stack) { - void *p = get_allocator().GetBlockBegin(ptr); +hsa_status_t asan_hsa_amd_memory_pool_free(void* ptr, + BufferedStackTrace* stack) { + void* p = get_allocator().GetBlockBegin(ptr); if (p) { instance.Deallocate(ptr, 0, 0, stack, FROM_MALLOC); return HSA_STATUS_SUCCESS; @@ -1432,11 +1433,12 @@ hsa_status_t asan_hsa_amd_memory_pool_free( return REAL(hsa_amd_memory_pool_free)(ptr); } -hsa_status_t asan_hsa_amd_agents_allow_access( - uint32_t num_agents, const hsa_agent_t *agents, const uint32_t *flags, - const void *ptr, - BufferedStackTrace *stack) { - void *p = get_allocator().GetBlockBegin(ptr); +hsa_status_t asan_hsa_amd_agents_allow_access(uint32_t num_agents, + const hsa_agent_t* agents, + const uint32_t* flags, + const void* ptr, + BufferedStackTrace* stack) { + void* p = get_allocator().GetBlockBegin(ptr); return REAL(hsa_amd_agents_allow_access)(num_agents, agents, flags, p ? p : ptr); } @@ -1446,19 +1448,19 @@ hsa_status_t asan_hsa_amd_agents_allow_access( // is always one kPageSize_ // IPC calls use static_assert to make sure kMetadataSize = 0 // -#if SANITIZER_CAN_USE_ALLOCATOR64 +# if SANITIZER_CAN_USE_ALLOCATOR64 static struct AP64 AP_; -#else +# else static struct AP32 AP_; -#endif +# endif -hsa_status_t asan_hsa_amd_ipc_memory_create(void *ptr, size_t len, - hsa_amd_ipc_memory_t * handle) { - void *ptr_; +hsa_status_t asan_hsa_amd_ipc_memory_create(void* ptr, size_t len, + hsa_amd_ipc_memory_t* handle) { + void* ptr_; size_t len_ = get_allocator().GetActuallyAllocatedSize(ptr); if (len_) { static_assert(AP_.kMetadataSize == 0, "Expression below requires this"); - ptr_ = reinterpret_cast(reinterpret_cast(ptr) - kPageSize_); + ptr_ = reinterpret_cast(reinterpret_cast(ptr) - kPageSize_); } else { ptr_ = ptr; len_ = len; @@ -1466,24 +1468,25 @@ hsa_status_t asan_hsa_amd_ipc_memory_create(void *ptr, size_t len, return REAL(hsa_amd_ipc_memory_create)(ptr_, len_, handle); } -hsa_status_t asan_hsa_amd_ipc_memory_attach(const hsa_amd_ipc_memory_t *handle, - size_t len, uint32_t num_agents, const hsa_agent_t *mapping_agents, - void **mapped_ptr) { +hsa_status_t asan_hsa_amd_ipc_memory_attach(const hsa_amd_ipc_memory_t* handle, + size_t len, uint32_t num_agents, + const hsa_agent_t* mapping_agents, + void** mapped_ptr) { static_assert(AP_.kMetadataSize == 0, "Expression below requires this"); size_t len_ = len + kPageSize_; hsa_status_t status = REAL(hsa_amd_ipc_memory_attach)( - handle, len_, num_agents, mapping_agents, mapped_ptr); + handle, len_, num_agents, mapping_agents, mapped_ptr); if (status == HSA_STATUS_SUCCESS && mapped_ptr) { - *mapped_ptr = reinterpret_cast(reinterpret_cast(*mapped_ptr) + - kPageSize_); + *mapped_ptr = reinterpret_cast(reinterpret_cast(*mapped_ptr) + + kPageSize_); } return status; } -hsa_status_t asan_hsa_amd_ipc_memory_detach(void *mapped_ptr) { +hsa_status_t asan_hsa_amd_ipc_memory_detach(void* mapped_ptr) { static_assert(AP_.kMetadataSize == 0, "Expression below requires this"); - void *mapped_ptr_ = - reinterpret_cast(reinterpret_cast(mapped_ptr) - kPageSize_); + void* mapped_ptr_ = + reinterpret_cast(reinterpret_cast(mapped_ptr) - kPageSize_); return REAL(hsa_amd_ipc_memory_detach)(mapped_ptr_); } @@ -1540,5 +1543,13 @@ hsa_status_t asan_hsa_amd_vmem_address_free(void* ptr, size_t size, } return REAL(hsa_amd_vmem_address_free)(ptr, size); } + +hsa_status_t asan_hsa_init() { + hsa_status_t status = REAL(hsa_init)(); + if (status == HSA_STATUS_SUCCESS) + __sanitizer::AmdgpuMemFuncs::RegisterSystemEventHandlers(); + return status; +} + } // namespace __asan #endif diff --git a/compiler-rt/lib/asan/asan_allocator.h b/compiler-rt/lib/asan/asan_allocator.h index ced10f62b7a58..73e21b95d0f59 100644 --- a/compiler-rt/lib/asan/asan_allocator.h +++ b/compiler-rt/lib/asan/asan_allocator.h @@ -341,6 +341,7 @@ hsa_status_t asan_hsa_amd_vmem_address_reserve_align(void** ptr, size_t size, BufferedStackTrace* stack); hsa_status_t asan_hsa_amd_vmem_address_free(void* ptr, size_t size, BufferedStackTrace* stack); +hsa_status_t asan_hsa_init(); } // namespace __asan #endif diff --git a/compiler-rt/lib/asan/asan_interceptors.cpp b/compiler-rt/lib/asan/asan_interceptors.cpp index 0951a77b1b93e..0627aa4e85c93 100644 --- a/compiler-rt/lib/asan/asan_interceptors.cpp +++ b/compiler-rt/lib/asan/asan_interceptors.cpp @@ -948,7 +948,13 @@ INTERCEPTOR(hsa_status_t, hsa_amd_vmem_address_free, void* ptr, size_t size) { return asan_hsa_amd_vmem_address_free(ptr, size, &stack); } +INTERCEPTOR(hsa_status_t, hsa_init) { + AsanInitFromRtl(); + return asan_hsa_init(); +} + void InitializeAmdgpuInterceptors() { + ASAN_INTERCEPT_FUNC(hsa_init); ASAN_INTERCEPT_FUNC(hsa_memory_copy); ASAN_INTERCEPT_FUNC(hsa_amd_memory_pool_allocate); ASAN_INTERCEPT_FUNC(hsa_amd_memory_pool_free); @@ -965,7 +971,7 @@ void InitializeAmdgpuInterceptors() { } void ENSURE_HSA_INITED() { - if (!REAL(hsa_memory_copy)) + if (!REAL(hsa_init)) InitializeAmdgpuInterceptors(); } #endif diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_allocator_amdgpu.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_allocator_amdgpu.cpp index cf10cb773e746..3a02a3c5ce4aa 100755 --- a/compiler-rt/lib/sanitizer_common/sanitizer_allocator_amdgpu.cpp +++ b/compiler-rt/lib/sanitizer_common/sanitizer_allocator_amdgpu.cpp @@ -11,10 +11,13 @@ //===----------------------------------------------------------------------===// #if SANITIZER_AMDGPU # include // For dlsym + # include "sanitizer_allocator.h" +# include "sanitizer_atomic.h" namespace __sanitizer { -struct HsaMemoryFunctions { +struct HsaFunctions { + // ---------------- Memory Functions ---------------- hsa_status_t (*memory_pool_allocate)(hsa_amd_memory_pool_t memory_pool, size_t size, uint32_t flags, void **ptr); hsa_status_t (*memory_pool_free)(void *ptr); @@ -22,18 +25,30 @@ struct HsaMemoryFunctions { void *(*alloc)(size_t), uint32_t *num_agents_accessible, hsa_agent_t **accessible); - hsa_status_t (*vmem_address_reserve_align)(void** ptr, size_t size, + hsa_status_t (*vmem_address_reserve_align)(void **ptr, size_t size, uint64_t address, uint64_t alignment, uint64_t flags); - hsa_status_t (*vmem_address_free)(void* ptr, size_t size); + hsa_status_t (*vmem_address_free)(void *ptr, size_t size); + + // ----------------Event Functions ---------------- + hsa_status_t (*register_system_event_handler)( + hsa_amd_system_event_callback_t callback, void *data); }; -static HsaMemoryFunctions hsa_amd; +static HsaFunctions hsa_amd; // Always align to page boundary to match current ROCr behavior static const size_t kPageSize_ = 4096; +static atomic_uint8_t amdgpu_runtime_shutdown{0}; +static atomic_uint8_t amdgpu_event_registered{0}; + +bool AmdgpuMemFuncs::GetAmdgpuRuntimeShutdown() { + return static_cast( + atomic_load(&amdgpu_runtime_shutdown, memory_order_acquire)); +} + bool AmdgpuMemFuncs::Init() { hsa_amd.memory_pool_allocate = (decltype(hsa_amd.memory_pool_allocate))dlsym( @@ -47,15 +62,20 @@ bool AmdgpuMemFuncs::Init() { RTLD_NEXT, "hsa_amd_vmem_address_reserve_align"); hsa_amd.vmem_address_free = (decltype(hsa_amd.vmem_address_free))dlsym( RTLD_NEXT, "hsa_amd_vmem_address_free"); + hsa_amd.register_system_event_handler = + (decltype(hsa_amd.register_system_event_handler))dlsym( + RTLD_NEXT, "hsa_amd_register_system_event_handler"); if (!hsa_amd.memory_pool_allocate || !hsa_amd.memory_pool_free || !hsa_amd.pointer_info || !hsa_amd.vmem_address_reserve_align || - !hsa_amd.vmem_address_free) + !hsa_amd.vmem_address_free || !hsa_amd.register_system_event_handler) return false; return true; } void *AmdgpuMemFuncs::Allocate(uptr size, uptr alignment, DeviceAllocationInfo *da_info) { + if (atomic_load(&amdgpu_runtime_shutdown, memory_order_acquire)) + return nullptr; AmdgpuAllocationInfo *aa_info = reinterpret_cast(da_info); if (!aa_info->memory_pool.handle) { @@ -73,6 +93,8 @@ void *AmdgpuMemFuncs::Allocate(uptr size, uptr alignment, } void AmdgpuMemFuncs::Deallocate(void *p) { + if (atomic_load(&amdgpu_runtime_shutdown, memory_order_acquire)) + return; DevicePointerInfo DevPtrInfo; if (AmdgpuMemFuncs::GetPointerInfo(reinterpret_cast(p), &DevPtrInfo)) { if (DevPtrInfo.type == HSA_EXT_POINTER_TYPE_HSA) { @@ -103,6 +125,30 @@ bool AmdgpuMemFuncs::GetPointerInfo(uptr ptr, DevicePointerInfo* ptr_info) { return true; } +void AmdgpuMemFuncs::RegisterSystemEventHandlers() { + // Register shutdown system event handler only once + if (atomic_load(&amdgpu_event_registered, memory_order_acquire) == 0) { + // Callback to just detect runtime shutdown + hsa_amd_system_event_callback_t callback = [](const hsa_amd_event_t* event, + void* data) { + if (!event) + return HSA_STATUS_ERROR_INVALID_ARGUMENT; + if (event->event_type == HSA_AMD_SYSTEM_SHUTDOWN_EVENT) { + uint8_t shutdown = 0; + if (atomic_compare_exchange_strong(&amdgpu_runtime_shutdown, &shutdown, + 1, memory_order_acq_rel)) { + // Evict all allocations (add purge logic here). + } + } + return HSA_STATUS_SUCCESS; + }; + hsa_status_t status = + hsa_amd.register_system_event_handler(callback, nullptr); + if (status == HSA_STATUS_SUCCESS) + atomic_store(&amdgpu_event_registered, 1, memory_order_release); + } +} + uptr AmdgpuMemFuncs::GetPageSize() { return kPageSize_; } } // namespace __sanitizer #endif // SANITIZER_AMDGPU diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_allocator_amdgpu.h b/compiler-rt/lib/sanitizer_common/sanitizer_allocator_amdgpu.h index 84b62964e5145..5eb3b28cae8a3 100755 --- a/compiler-rt/lib/sanitizer_common/sanitizer_allocator_amdgpu.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_allocator_amdgpu.h @@ -22,6 +22,8 @@ class AmdgpuMemFuncs { static void Deallocate(void *p); static bool GetPointerInfo(uptr ptr, DevicePointerInfo* ptr_info); static uptr GetPageSize(); + static void RegisterSystemEventHandlers(); + static bool GetAmdgpuRuntimeShutdown(); }; struct AmdgpuAllocationInfo : public DeviceAllocationInfo { diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_allocator_device.h b/compiler-rt/lib/sanitizer_common/sanitizer_allocator_device.h index f76800da79ac3..838ddcb232cb2 100755 --- a/compiler-rt/lib/sanitizer_common/sanitizer_allocator_device.h +++ b/compiler-rt/lib/sanitizer_common/sanitizer_allocator_device.h @@ -122,7 +122,8 @@ class DeviceAllocatorT { CHECK_EQ(chunks_[idx], p_); CHECK_LT(idx, n_chunks_); h = GetHeader(chunks_[idx], &header); - CHECK(!dev_runtime_unloaded_); + if (dev_runtime_unloaded_) + return; chunks_[idx] = chunks_[--n_chunks_]; chunks_sorted_ = false; stats.n_frees++; @@ -140,7 +141,8 @@ class DeviceAllocatorT { uptr res = 0; for (uptr i = 0; i < n_chunks_; i++) { Header *h = GetHeader(chunks_[i], &header); - CHECK(!dev_runtime_unloaded_); + if (dev_runtime_unloaded_) + return 0; res += RoundUpMapSize(h->map_size); } return res; @@ -188,7 +190,6 @@ class DeviceAllocatorT { CHECK_LT(nearest_chunk, h->map_beg + h->map_size); CHECK_LE(nearest_chunk, p); if (h->map_beg + h->map_size <= p) { - CHECK(!dev_runtime_unloaded_); return nullptr; } } @@ -306,14 +307,21 @@ class DeviceAllocatorT { } Header* GetHeader(uptr chunk, Header* h) const { - if (dev_runtime_unloaded_ || !DeviceMemFuncs::GetPointerInfo(chunk, h)) { - // Device allocator has dependency on device runtime. If device runtime - // is unloaded, GetPointerInfo() will fail. For such case, we can still - // return a valid value for map_beg, map_size will be limited to one page - h->map_beg = chunk; - h->map_size = page_size_; - dev_runtime_unloaded_ = true; + // Device allocator has dependency on device runtime. If device runtime + // is unloaded, GetPointerInfo() will fail. For such case, we can still + // return a valid value for map_beg, map_size will be limited to one page + if (!dev_runtime_unloaded_) { + if (DeviceMemFuncs::GetPointerInfo(chunk, h)) + return h; + // If GetPointerInfo() fails, we don't assume the runtime is unloaded yet. + // We just return a conservative single-page header. Here mark/check the + // runtime shutdown state + dev_runtime_unloaded_ = DeviceMemFuncs::GetAmdgpuRuntimeShutdown(); } + // If we reach here, device runtime is unloaded. + // Fallback: conservative single-page header + h->map_beg = chunk; + h->map_size = page_size_; return h; }