oneapi-src · martygrant · Dec 6, 2024 · Oct 30, 2024 · Nov 6, 2024 · Nov 27, 2024
@@ -773,28 +773,6 @@ ur_result_t AsanInterceptor::prepareLaunch(
         LaunchInfo.Data.Host.DeviceTy = DeviceInfo->Type;
         LaunchInfo.Data.Host.Debug = getOptions().Debug ? 1 : 0;
 
-        auto EnqueueAllocateShadowMemory = [Context = ContextInfo->Handle,
-                                            Device = DeviceInfo->Handle,
-                                            Queue](size_t Size, uptr &Ptr) {
-            void *Allocated = nullptr;
-            auto URes = getContext()->urDdiTable.USM.pfnDeviceAlloc(
-                Context, Device, nullptr, nullptr, Size, &Allocated);
-            if (URes != UR_RESULT_SUCCESS) {
-                return URes;
-            }
-            // Initialize shadow memory
-            URes = EnqueueUSMBlockingSet(Queue, Allocated, 0, Size);
-            if (URes != UR_RESULT_SUCCESS) {
-                [[maybe_unused]] auto URes =
-                    getContext()->urDdiTable.USM.pfnFree(Context, Allocated);
-                assert(URes == UR_RESULT_SUCCESS &&
-                       "urUSMFree failed at allocating shadow memory");
-                Allocated = nullptr;
-            }
-            Ptr = (uptr)Allocated;
-            return URes;
-        };
-
         auto LocalMemoryUsage =
             GetKernelLocalMemorySize(Kernel, DeviceInfo->Handle);
         auto PrivateMemoryUsage =
@@ -806,86 +784,45 @@ ur_result_t AsanInterceptor::prepareLaunch(
 
         // Write shadow memory offset for local memory
         if (getOptions().DetectLocals) {
-            // CPU needn't this
-            if (DeviceInfo->Type == DeviceType::GPU_PVC ||
-                DeviceInfo->Type == DeviceType::GPU_DG2) {
-                const size_t LocalMemorySize =
-                    GetDeviceLocalMemorySize(DeviceInfo->Handle);
-                const size_t LocalShadowMemorySize =
-                    (NumWG * LocalMemorySize) >> ASAN_SHADOW_SCALE;
-
-                getContext()->logger.debug(
-                    "LocalMemory(WorkGroup={}, LocalMemorySize={}, "
-                    "LocalShadowMemorySize={})",
-                    NumWG, LocalMemorySize, LocalShadowMemorySize);
-
-                if (EnqueueAllocateShadowMemory(
-                        LocalShadowMemorySize,
-                        LaunchInfo.Data.Host.LocalShadowOffset) !=
-                    UR_RESULT_SUCCESS) {
-                    getContext()->logger.warning(
-                        "Failed to allocate shadow memory for local "
-                        "memory, maybe the number of workgroup ({}) is too "
-                        "large",
-                        NumWG);
-                    getContext()->logger.warning(
-                        "Skip checking local memory of kernel <{}>",
-                        GetKernelName(Kernel));
-                } else {
-                    LaunchInfo.Data.Host.LocalShadowOffsetEnd =
-                        LaunchInfo.Data.Host.LocalShadowOffset +
-                        LocalShadowMemorySize - 1;
-
-                    ContextInfo->Stats.UpdateShadowMalloced(
-                        LocalShadowMemorySize);
-
-                    getContext()->logger.info(
-                        "ShadowMemory(Local, {} - {})",
-                        (void *)LaunchInfo.Data.Host.LocalShadowOffset,
-                        (void *)LaunchInfo.Data.Host.LocalShadowOffsetEnd);
-                }
+            if (DeviceInfo->Shadow->AllocLocalShadow(
+                    Queue, NumWG, LaunchInfo.Data.Host.LocalShadowOffset,
+                    LaunchInfo.Data.Host.LocalShadowOffsetEnd) !=
+                UR_RESULT_SUCCESS) {
+                getContext()->logger.warning(
+                    "Failed to allocate shadow memory for local "
+                    "memory, maybe the number of workgroup ({}) is too "
+                    "large",
+                    NumWG);
+                getContext()->logger.warning(
+                    "Skip checking local memory of kernel <{}>",
+                    GetKernelName(Kernel));
+            } else {
+                getContext()->logger.info(
+                    "ShadowMemory(Local, WorkGroup{}, {} - {})", NumWG,
+                    (void *)LaunchInfo.Data.Host.LocalShadowOffset,
+                    (void *)LaunchInfo.Data.Host.LocalShadowOffsetEnd);
             }
         }
 
         // Write shadow memory offset for private memory
         if (getOptions().DetectPrivates) {
-            if (DeviceInfo->Type == DeviceType::CPU) {
-                LaunchInfo.Data.Host.PrivateShadowOffset =
-                    DeviceInfo->Shadow->ShadowBegin;
-            } else if (DeviceInfo->Type == DeviceType::GPU_PVC ||
-                       DeviceInfo->Type == DeviceType::GPU_DG2) {
-                const size_t PrivateShadowMemorySize =
-                    (NumWG * ASAN_PRIVATE_SIZE) >> ASAN_SHADOW_SCALE;
-
-                getContext()->logger.debug("PrivateMemory(WorkGroup={}, "
-                                           "PrivateShadowMemorySize={})",
-                                           NumWG, PrivateShadowMemorySize);
-
-                if (EnqueueAllocateShadowMemory(
-                        PrivateShadowMemorySize,
-                        LaunchInfo.Data.Host.PrivateShadowOffset) !=
-                    UR_RESULT_SUCCESS) {
-                    getContext()->logger.warning(
-                        "Failed to allocate shadow memory for private "
-                        "memory, maybe the number of workgroup ({}) is too "
-                        "large",
-                        NumWG);
-                    getContext()->logger.warning(
-                        "Skip checking private memory of kernel <{}>",
-                        GetKernelName(Kernel));
-                } else {
-                    LaunchInfo.Data.Host.PrivateShadowOffsetEnd =
-                        LaunchInfo.Data.Host.PrivateShadowOffset +
-                        PrivateShadowMemorySize - 1;
-
-                    ContextInfo->Stats.UpdateShadowMalloced(
-                        PrivateShadowMemorySize);
-
-                    getContext()->logger.info(
-                        "ShadowMemory(Private, {} - {})",
-                        (void *)LaunchInfo.Data.Host.PrivateShadowOffset,
-                        (void *)LaunchInfo.Data.Host.PrivateShadowOffsetEnd);
-                }
+            if (DeviceInfo->Shadow->AllocPrivateShadow(
+                    Queue, NumWG, LaunchInfo.Data.Host.PrivateShadowOffset,
+                    LaunchInfo.Data.Host.PrivateShadowOffsetEnd) !=
+                UR_RESULT_SUCCESS) {
+                getContext()->logger.warning(
+                    "Failed to allocate shadow memory for private "
+                    "memory, maybe the number of workgroup ({}) is too "
+                    "large",
+                    NumWG);
+                getContext()->logger.warning(
+                    "Skip checking private memory of kernel <{}>",
+                    GetKernelName(Kernel));
+            } else {
+                getContext()->logger.info(
+                    "ShadowMemory(Private, WorkGroup{}, {} - {})", NumWG,
+                    (void *)LaunchInfo.Data.Host.PrivateShadowOffset,
+                    (void *)LaunchInfo.Data.Host.PrivateShadowOffsetEnd);
             }
         }
 
@@ -970,24 +907,6 @@ ContextInfo::~ContextInfo() {
 
 AsanRuntimeDataWrapper::~AsanRuntimeDataWrapper() {
     [[maybe_unused]] ur_result_t Result;
-    auto Type = GetDeviceType(Context, Device);
-    auto ContextInfo = getAsanInterceptor()->getContextInfo(Context);
-    if (Type == DeviceType::GPU_PVC || Type == DeviceType::GPU_DG2) {
-        if (Host.PrivateShadowOffset) {
-            ContextInfo->Stats.UpdateShadowFreed(Host.PrivateShadowOffsetEnd -
-                                                 Host.PrivateShadowOffset + 1);
-            Result = getContext()->urDdiTable.USM.pfnFree(
-                Context, (void *)Host.PrivateShadowOffset);
-            assert(Result == UR_RESULT_SUCCESS);
-        }
-        if (Host.LocalShadowOffset) {
-            ContextInfo->Stats.UpdateShadowFreed(Host.LocalShadowOffsetEnd -
-                                                 Host.LocalShadowOffset + 1);
-            Result = getContext()->urDdiTable.USM.pfnFree(
-                Context, (void *)Host.LocalShadowOffset);
-            assert(Result == UR_RESULT_SUCCESS);
-        }
-    }
     if (Host.LocalArgs) {
         Result = getContext()->urDdiTable.USM.pfnFree(Context,
                                                       (void *)Host.LocalArgs);

@@ -132,16 +132,23 @@ ur_result_t ShadowMemoryGPU::Setup() {
 }
 
 ur_result_t ShadowMemoryGPU::Destory() {
-    if (ShadowBegin == 0) {
-        return UR_RESULT_SUCCESS;
+    if (PrivateShadowOffset != 0) {
+        UR_CALL(getContext()->urDdiTable.USM.pfnFree(
+            Context, (void *)PrivateShadowOffset));
+        PrivateShadowOffset = 0;
     }
-    static ur_result_t Result = [this]() {
-        auto Result = getContext()->urDdiTable.VirtualMem.pfnFree(
-            Context, (const void *)ShadowBegin, GetShadowSize());
-        getContext()->urDdiTable.Context.pfnRelease(Context);
-        return Result;
-    }();
-    return Result;
+    if (LocalShadowOffset != 0) {
+        UR_CALL(getContext()->urDdiTable.USM.pfnFree(
+            Context, (void *)LocalShadowOffset));
+        LocalShadowOffset = 0;
+    }
+    if (ShadowBegin != 0) {
+        UR_CALL(getContext()->urDdiTable.VirtualMem.pfnFree(
+            Context, (const void *)ShadowBegin, GetShadowSize()));
+        UR_CALL(getContext()->urDdiTable.Context.pfnRelease(Context));
+        ShadowBegin = ShadowEnd = 0;
+    }
+    return UR_RESULT_SUCCESS;
 }
 
 ur_result_t ShadowMemoryGPU::EnqueuePoisonShadow(ur_queue_handle_t Queue,
@@ -257,6 +264,87 @@ ur_result_t ShadowMemoryGPU::ReleaseShadow(std::shared_ptr<AllocInfo> AI) {
     return UR_RESULT_SUCCESS;
 }
 
+ur_result_t ShadowMemoryGPU::AllocLocalShadow(ur_queue_handle_t Queue,
+                                              uint32_t NumWG, uptr &Begin,
+                                              uptr &End) {
+    const size_t LocalMemorySize = GetDeviceLocalMemorySize(Device);
+    const size_t RequiredShadowSize =
+        (NumWG * LocalMemorySize) >> ASAN_SHADOW_SCALE;
+    static size_t LastAllocedSize = 0;
+    if (RequiredShadowSize > LastAllocedSize) {
+        auto ContextInfo = getAsanInterceptor()->getContextInfo(Context);
+        if (LocalShadowOffset) {
+            UR_CALL(getContext()->urDdiTable.USM.pfnFree(
+                Context, (void *)LocalShadowOffset));
+            ContextInfo->Stats.UpdateShadowFreed(LastAllocedSize);
+            LocalShadowOffset = 0;
+            LastAllocedSize = 0;
+        }
+
+        UR_CALL(getContext()->urDdiTable.USM.pfnDeviceAlloc(
+            Context, Device, nullptr, nullptr, RequiredShadowSize,
+            (void **)&LocalShadowOffset));
+
+        // Initialize shadow memory
+        ur_result_t URes = EnqueueUSMBlockingSet(
+            Queue, (void *)LocalShadowOffset, 0, RequiredShadowSize);
+        if (URes != UR_RESULT_SUCCESS) {
+            UR_CALL(getContext()->urDdiTable.USM.pfnFree(
+                Context, (void *)LocalShadowOffset));
+            LocalShadowOffset = 0;
+            LastAllocedSize = 0;
+        }
+
+        ContextInfo->Stats.UpdateShadowMalloced(RequiredShadowSize);
+
+        LastAllocedSize = RequiredShadowSize;
+    }
+
+    Begin = LocalShadowOffset;
+    End = LocalShadowOffset + RequiredShadowSize - 1;
+    return UR_RESULT_SUCCESS;
+}
+
+ur_result_t ShadowMemoryGPU::AllocPrivateShadow(ur_queue_handle_t Queue,
+                                                uint32_t NumWG, uptr &Begin,
+                                                uptr &End) {
+    const size_t RequiredShadowSize =
+        (NumWG * ASAN_PRIVATE_SIZE) >> ASAN_SHADOW_SCALE;
+    static size_t LastAllocedSize = 0;
+    if (RequiredShadowSize > LastAllocedSize) {
+        auto ContextInfo = getAsanInterceptor()->getContextInfo(Context);
+        if (PrivateShadowOffset) {
+            UR_CALL(getContext()->urDdiTable.USM.pfnFree(
+                Context, (void *)PrivateShadowOffset));
+            ContextInfo->Stats.UpdateShadowFreed(LastAllocedSize);
+            PrivateShadowOffset = 0;
+            LastAllocedSize = 0;
+        }
+
+        UR_CALL(getContext()->urDdiTable.USM.pfnDeviceAlloc(
+            Context, Device, nullptr, nullptr, RequiredShadowSize,
+            (void **)&PrivateShadowOffset));
+
+        // Initialize shadow memory
+        ur_result_t URes = EnqueueUSMBlockingSet(
+            Queue, (void *)PrivateShadowOffset, 0, RequiredShadowSize);
+        if (URes != UR_RESULT_SUCCESS) {
+            UR_CALL(getContext()->urDdiTable.USM.pfnFree(
+                Context, (void *)PrivateShadowOffset));
+            PrivateShadowOffset = 0;
+            LastAllocedSize = 0;
+        }
+
+        ContextInfo->Stats.UpdateShadowMalloced(RequiredShadowSize);
+
+        LastAllocedSize = RequiredShadowSize;
+    }
+
+    Begin = PrivateShadowOffset;
+    End = PrivateShadowOffset + RequiredShadowSize - 1;
+    return UR_RESULT_SUCCESS;
+}
+
 uptr ShadowMemoryPVC::MemToShadow(uptr Ptr) {
     if (Ptr & 0xFF00000000000000ULL) { // Device USM
         return ShadowBegin + 0x80000000000ULL +

@@ -41,6 +41,14 @@ struct ShadowMemory {
 
     virtual size_t GetShadowSize() = 0;
 
+    virtual ur_result_t AllocLocalShadow(ur_queue_handle_t Queue,
+                                         uint32_t NumWG, uptr &Begin,
+                                         uptr &End) = 0;
+
+    virtual ur_result_t AllocPrivateShadow(ur_queue_handle_t Queue,
+                                           uint32_t NumWG, uptr &Begin,
+                                           uptr &End) = 0;
+
     ur_context_handle_t Context{};
 
     ur_device_handle_t Device{};
@@ -64,6 +72,20 @@ struct ShadowMemoryCPU final : public ShadowMemory {
                                     uptr Size, u8 Value) override;
 
     size_t GetShadowSize() override { return 0x80000000000ULL; }
+
+    ur_result_t AllocLocalShadow(ur_queue_handle_t, uint32_t, uptr &Begin,
+                                 uptr &End) override {
+        Begin = ShadowBegin;
+        End = ShadowEnd;
+        return UR_RESULT_SUCCESS;
+    }
+
+    ur_result_t AllocPrivateShadow(ur_queue_handle_t, uint32_t, uptr &Begin,
+                                   uptr &End) override {
+        Begin = ShadowBegin;
+        End = ShadowEnd;
+        return UR_RESULT_SUCCESS;
+    }
 };
 
 struct ShadowMemoryGPU : public ShadowMemory {
@@ -78,12 +100,22 @@ struct ShadowMemoryGPU : public ShadowMemory {
 
     ur_result_t ReleaseShadow(std::shared_ptr<AllocInfo> AI) override final;
 
+    ur_result_t AllocLocalShadow(ur_queue_handle_t Queue, uint32_t NumWG,
+                                 uptr &Begin, uptr &End) override final;
+
+    ur_result_t AllocPrivateShadow(ur_queue_handle_t Queue, uint32_t NumWG,
+                                   uptr &Begin, uptr &End) override final;
+
     ur_mutex VirtualMemMapsMutex;
 
     std::unordered_map<
         uptr, std::pair<ur_physical_mem_handle_t,
                         std::unordered_set<std::shared_ptr<AllocInfo>>>>
         VirtualMemMaps;
+
+    uptr LocalShadowOffset = 0;
+
+    uptr PrivateShadowOffset = 0;
 };
 
 /// Shadow Memory layout of GPU PVC device