From e7907e32143526840208a9f09d01a8043d47299c Mon Sep 17 00:00:00 2001 From: Kevin B Smith Date: Fri, 2 Oct 2020 09:35:58 -0700 Subject: [PATCH] Changes to lock and unlock _pi_queue as a single shared object, excepting any const members. --- sycl/plugins/level_zero/pi_level_zero.cpp | 149 ++++++++++++++++++---- sycl/plugins/level_zero/pi_level_zero.hpp | 24 ++-- 2 files changed, 139 insertions(+), 34 deletions(-) diff --git a/sycl/plugins/level_zero/pi_level_zero.cpp b/sycl/plugins/level_zero/pi_level_zero.cpp index 1962c4129de8b..dfcd774b48732 100644 --- a/sycl/plugins/level_zero/pi_level_zero.cpp +++ b/sycl/plugins/level_zero/pi_level_zero.cpp @@ -424,8 +424,6 @@ _pi_queue::resetCommandListFenceEntry(ze_command_list_handle_t ZeCommandList, // Event has been signaled: If the fence for the associated command list // is signalled, then reset the fence and command list and add them to the // available list for ruse in PI calls. - // NOTE: Requires the caller to already have a lock with - // ZeCommandListFenceMapMutex. ZE_CALL(zeFenceReset(this->ZeCommandListFenceMap[ZeCommandList])); ZE_CALL(zeCommandListReset(ZeCommandList)); if (MakeAvailable) { @@ -438,6 +436,7 @@ _pi_queue::resetCommandListFenceEntry(ze_command_list_handle_t ZeCommandList, } // Retrieve an available command list to be used in a PI call +// Caller must hold a lock on the Queue passed in. pi_result _pi_device::getAvailableCommandList(pi_queue Queue, ze_command_list_handle_t *ZeCommandList, @@ -458,7 +457,6 @@ _pi_device::getAvailableCommandList(pi_queue Queue, if (Queue->Device->ZeCommandListCache.size() > 0) { Queue->Device->ZeCommandListCacheMutex.lock(); *ZeCommandList = Queue->Device->ZeCommandListCache.front(); - Queue->ZeCommandListFenceMapMutex.lock(); *ZeFence = Queue->ZeCommandListFenceMap[*ZeCommandList]; if (*ZeFence == nullptr) { // If there is a command list available on this device, but no fence yet @@ -468,7 +466,6 @@ _pi_device::getAvailableCommandList(pi_queue Queue, ZE_CALL(zeFenceCreate(Queue->ZeCommandQueue, &ZeFenceDesc, ZeFence)); Queue->ZeCommandListFenceMap[*ZeCommandList] = *ZeFence; } - Queue->ZeCommandListFenceMapMutex.unlock(); Queue->Device->ZeCommandListCache.pop_front(); Queue->Device->ZeCommandListCacheMutex.unlock(); return PI_SUCCESS; @@ -480,18 +477,15 @@ _pi_device::getAvailableCommandList(pi_queue Queue, // if a command list has completed dispatch of its commands and is ready for // reuse. If a command list is found to have been signalled, then the // command list & fence are reset and we return. - Queue->ZeCommandListFenceMapMutex.lock(); for (const auto &MapEntry : Queue->ZeCommandListFenceMap) { ze_result_t ZeResult = ZE_CALL_NOCHECK(zeFenceQueryStatus(MapEntry.second)); if (ZeResult == ZE_RESULT_SUCCESS) { Queue->resetCommandListFenceEntry(MapEntry.first, false); *ZeCommandList = MapEntry.first; *ZeFence = MapEntry.second; - Queue->ZeCommandListFenceMapMutex.unlock(); return PI_SUCCESS; } } - Queue->ZeCommandListFenceMapMutex.unlock(); // If there are no available command lists nor signalled command lists, then // we must create another command list if we have not exceed the maximum @@ -505,11 +499,9 @@ _pi_device::getAvailableCommandList(pi_queue Queue, // Increments the total number of command lists created on this platform. this->Platform->ZeGlobalCommandListCount++; ZE_CALL(zeFenceCreate(Queue->ZeCommandQueue, &ZeFenceDesc, ZeFence)); - Queue->ZeCommandListFenceMapMutex.lock(); Queue->ZeCommandListFenceMap.insert( std::pair(*ZeCommandList, *ZeFence)); - Queue->ZeCommandListFenceMapMutex.unlock(); pi_result = PI_SUCCESS; } @@ -1701,20 +1693,24 @@ pi_result piQueueGetInfo(pi_queue Queue, pi_queue_info ParamName, } pi_result piQueueRetain(pi_queue Queue) { + // Lock automatically releases when this goes out of scope. + std::lock_guard lock(Queue->PiQueueMutex); + ++(Queue->RefCount); return PI_SUCCESS; } pi_result piQueueRelease(pi_queue Queue) { assert(Queue); + // Lock automatically releases when this goes out of scope. + std::lock_guard lock(Queue->PiQueueMutex); + if (--(Queue->RefCount) == 0) { // Destroy all the fences created associated with this queue. - Queue->ZeCommandListFenceMapMutex.lock(); for (const auto &MapEntry : Queue->ZeCommandListFenceMap) { ZE_CALL(zeFenceDestroy(MapEntry.second)); } Queue->ZeCommandListFenceMap.clear(); - Queue->ZeCommandListFenceMapMutex.unlock(); ZE_CALL(zeCommandQueueDestroy(Queue->ZeCommandQueue)); Queue->ZeCommandQueue = nullptr; } @@ -1724,6 +1720,10 @@ pi_result piQueueRelease(pi_queue Queue) { pi_result piQueueFinish(pi_queue Queue) { // Wait until command lists attached to the command queue are executed. assert(Queue); + + // Lock automatically releases when this goes out of scope. + std::lock_guard lock(Queue->PiQueueMutex); + ZE_CALL(zeCommandQueueSynchronize(Queue->ZeCommandQueue, UINT32_MAX)); return PI_SUCCESS; } @@ -1733,6 +1733,9 @@ pi_result piextQueueGetNativeHandle(pi_queue Queue, assert(Queue); assert(NativeHandle); + // Lock automatically releases when this goes out of scope. + std::lock_guard lock(Queue->PiQueueMutex); + auto ZeQueue = pi_cast(NativeHandle); // Extract the Level Zero queue handle from the given PI queue *ZeQueue = Queue->ZeCommandQueue; @@ -2994,6 +2997,9 @@ piEnqueueKernelLaunch(pi_queue Queue, pi_kernel Kernel, pi_uint32 WorkDim, ZE_CALL(zeKernelSetGroupSize(Kernel->ZeKernel, WG[0], WG[1], WG[2])); + // Lock automatically releases when this goes out of scope. + std::lock_guard lock(Queue->PiQueueMutex); + // Get a new command list to be used on this call ze_command_list_handle_t ZeCommandList = nullptr; ze_fence_handle_t ZeFence = nullptr; @@ -3181,8 +3187,12 @@ pi_result piEventsWait(pi_uint32 NumEvents, const pi_event *EventList) { // Event has been signaled: If the fence for the associated command list // is signalled, then reset the fence and command list and add them to the // available list for reuse in PI calls. - if (EventList[I]->Queue->RefCount > 0) { - EventList[I]->Queue->ZeCommandListFenceMapMutex.lock(); + auto Queue = EventList[I]->Queue; + + // Lock automatically releases when this goes out of scope. + std::lock_guard lock(Queue->PiQueueMutex); + + if (Queue->RefCount > 0) { ze_result_t ZeResult = ZE_CALL_NOCHECK(zeFenceQueryStatus( EventList[I] ->Queue->ZeCommandListFenceMap[EventList[I]->ZeCommandList])); @@ -3191,7 +3201,6 @@ pi_result piEventsWait(pi_uint32 NumEvents, const pi_event *EventList) { EventList[I]->ZeCommandList, true); EventList[I]->ZeCommandList = nullptr; } - EventList[I]->Queue->ZeCommandListFenceMapMutex.unlock(); } } } @@ -3224,14 +3233,17 @@ pi_result piEventRelease(pi_event Event) { // If the fence associated with this command list has signalled, then // Reset the Command List Used in this event and put it back on the // available list. - if (Event->Queue->RefCount > 0) { - Event->Queue->ZeCommandListFenceMapMutex.lock(); + auto Queue = Event->Queue; + + // Lock automatically releases when this goes out of scope. + std::lock_guard lock(Queue->PiQueueMutex); + + if (Queue->RefCount > 0) { ze_result_t ZeResult = ZE_CALL_NOCHECK(zeFenceQueryStatus( Event->Queue->ZeCommandListFenceMap[Event->ZeCommandList])); if (ZeResult == ZE_RESULT_SUCCESS) { Event->Queue->resetCommandListFenceEntry(Event->ZeCommandList, true); } - Event->Queue->ZeCommandListFenceMapMutex.unlock(); } Event->ZeCommandList = nullptr; } @@ -3419,9 +3431,11 @@ pi_result piEnqueueEventsWaitWithBarrier(pi_queue Queue, pi_uint32 NumEventsInWaitList, const pi_event *EventWaitList, pi_event *Event) { - assert(Queue); + // Lock automatically releases when this goes out of scope. + std::lock_guard lock(Queue->PiQueueMutex); + // Get a new command list to be used on this call ze_command_list_handle_t ZeCommandList = nullptr; ze_fence_handle_t ZeFence = nullptr; @@ -3464,6 +3478,11 @@ pi_result piEnqueueMemBufferRead(pi_queue Queue, pi_mem Src, const pi_event *EventWaitList, pi_event *Event) { assert(Src); + assert(Queue); + + // Lock automatically releases when this goes out of scope. + std::lock_guard lock(Queue->PiQueueMutex); + return enqueueMemCopyHelper(PI_COMMAND_TYPE_MEM_BUFFER_READ, Queue, Dst, BlockingRead, Size, pi_cast(Src->getZeHandle()) + Offset, @@ -3479,6 +3498,11 @@ pi_result piEnqueueMemBufferReadRect( pi_event *Event) { assert(Buffer); + assert(Queue); + + // Lock automatically releases when this goes out of scope. + std::lock_guard lock(Queue->PiQueueMutex); + return enqueueMemCopyRectHelper( PI_COMMAND_TYPE_MEM_BUFFER_READ_RECT, Queue, Buffer->getZeHandle(), static_cast(Ptr), BufferOffset, HostOffset, Region, @@ -3489,13 +3513,13 @@ pi_result piEnqueueMemBufferReadRect( } // extern "C" // Shared by all memory read/write/copy PI interfaces. +// PI interfaces must already have queue's mutex locked on entry. static pi_result enqueueMemCopyHelper(pi_command_type CommandType, pi_queue Queue, void *Dst, pi_bool BlockingWrite, size_t Size, const void *Src, pi_uint32 NumEventsInWaitList, const pi_event *EventWaitList, pi_event *Event) { - assert(Queue); // Get a new command list to be used on this call ze_command_list_handle_t ZeCommandList = nullptr; ze_fence_handle_t ZeFence = nullptr; @@ -3546,6 +3570,7 @@ enqueueMemCopyHelper(pi_command_type CommandType, pi_queue Queue, void *Dst, } // Shared by all memory read/write/copy rect PI interfaces. +// PI interfaces must already have queue's mutex locked on entry. static pi_result enqueueMemCopyRectHelper( pi_command_type CommandType, pi_queue Queue, void *SrcBuffer, void *DstBuffer, pi_buff_rect_offset SrcOrigin, @@ -3557,7 +3582,6 @@ static pi_result enqueueMemCopyRectHelper( assert(Region); assert(SrcOrigin); assert(DstOrigin); - assert(Queue); // Get a new command list to be used on this call ze_command_list_handle_t ZeCommandList = nullptr; @@ -3655,6 +3679,11 @@ pi_result piEnqueueMemBufferWrite(pi_queue Queue, pi_mem Buffer, pi_event *Event) { assert(Buffer); + assert(Queue); + + // Lock automatically releases when this goes out of scope. + std::lock_guard lock(Queue->PiQueueMutex); + return enqueueMemCopyHelper(PI_COMMAND_TYPE_MEM_BUFFER_WRITE, Queue, pi_cast(Buffer->getZeHandle()) + Offset, // dst @@ -3672,6 +3701,11 @@ pi_result piEnqueueMemBufferWriteRect( pi_event *Event) { assert(Buffer); + assert(Queue); + + // Lock automatically releases when this goes out of scope. + std::lock_guard lock(Queue->PiQueueMutex); + return enqueueMemCopyRectHelper( PI_COMMAND_TYPE_MEM_BUFFER_WRITE_RECT, Queue, const_cast(static_cast(Ptr)), Buffer->getZeHandle(), @@ -3686,9 +3720,13 @@ pi_result piEnqueueMemBufferCopy(pi_queue Queue, pi_mem SrcBuffer, pi_uint32 NumEventsInWaitList, const pi_event *EventWaitList, pi_event *Event) { - assert(SrcBuffer); assert(DstBuffer); + assert(Queue); + + // Lock automatically releases when this goes out of scope. + std::lock_guard lock(Queue->PiQueueMutex); + return enqueueMemCopyHelper( PI_COMMAND_TYPE_MEM_BUFFER_COPY, Queue, pi_cast(DstBuffer->getZeHandle()) + DstOffset, @@ -3703,9 +3741,13 @@ pi_result piEnqueueMemBufferCopyRect( pi_buff_rect_region Region, size_t SrcRowPitch, size_t SrcSlicePitch, size_t DstRowPitch, size_t DstSlicePitch, pi_uint32 NumEventsInWaitList, const pi_event *EventWaitList, pi_event *Event) { - assert(SrcBuffer); assert(DstBuffer); + assert(Queue); + + // Lock automatically releases when this goes out of scope. + std::lock_guard lock(Queue->PiQueueMutex); + return enqueueMemCopyRectHelper( PI_COMMAND_TYPE_MEM_BUFFER_COPY_RECT, Queue, SrcBuffer->getZeHandle(), DstBuffer->getZeHandle(), SrcOrigin, DstOrigin, Region, SrcRowPitch, @@ -3716,13 +3758,15 @@ pi_result piEnqueueMemBufferCopyRect( } // extern "C" +// +// Caller of this must assure that the Queue is non-null and already had +// the lock acquired. static pi_result enqueueMemFillHelper(pi_command_type CommandType, pi_queue Queue, void *Ptr, const void *Pattern, size_t PatternSize, size_t Size, pi_uint32 NumEventsInWaitList, const pi_event *EventWaitList, pi_event *Event) { - assert(Queue); // Get a new command list to be used on this call ze_command_list_handle_t ZeCommandList = nullptr; ze_fence_handle_t ZeFence = nullptr; @@ -3786,6 +3830,11 @@ pi_result piEnqueueMemBufferFill(pi_queue Queue, pi_mem Buffer, pi_event *Event) { assert(Buffer); + assert(Queue); + + // Lock automatically releases when this goes out of scope. + std::lock_guard lock(Queue->PiQueueMutex); + return enqueueMemFillHelper(PI_COMMAND_TYPE_MEM_BUFFER_FILL, Queue, pi_cast(Buffer->getZeHandle()) + Offset, Pattern, PatternSize, Size, NumEventsInWaitList, @@ -3802,8 +3851,11 @@ piEnqueueMemBufferMap(pi_queue Queue, pi_mem Buffer, pi_bool BlockingMap, // TODO: we don't implement read-only or write-only, always read-write. // assert((map_flags & CL_MAP_READ) != 0); // assert((map_flags & CL_MAP_WRITE) != 0); - assert(Queue); assert(Buffer); + assert(Queue); + + // Lock automatically releases when this goes out of scope. + std::lock_guard lock(Queue->PiQueueMutex); // Get a new command list to be used on this call ze_command_list_handle_t ZeCommandList = nullptr; @@ -3872,6 +3924,10 @@ piEnqueueMemBufferMap(pi_queue Queue, pi_mem Buffer, pi_bool BlockingMap, pi_result piEnqueueMemUnmap(pi_queue Queue, pi_mem MemObj, void *MappedPtr, pi_uint32 NumEventsInWaitList, const pi_event *EventWaitList, pi_event *Event) { + assert(Queue); + + // Lock automatically releases when this goes out of scope. + std::lock_guard lock(Queue->PiQueueMutex); // Get a new command list to be used on this call ze_command_list_handle_t ZeCommandList = nullptr; @@ -3981,6 +4037,7 @@ static ze_image_region_t getImageRegionHelper(pi_mem Mem, } // Helper function to implement image read/write/copy. +// Caller must hold a lock on the Queue passed in. static pi_result enqueueMemImageCommandHelper(pi_command_type CommandType, pi_queue Queue, const void *Src, // image or ptr @@ -3991,7 +4048,6 @@ enqueueMemImageCommandHelper(pi_command_type CommandType, pi_queue Queue, pi_uint32 NumEventsInWaitList, const pi_event *EventWaitList, pi_event *Event) { - assert(Queue); // Get a new command list to be used on this call ze_command_list_handle_t ZeCommandList = nullptr; ze_fence_handle_t ZeFence = nullptr; @@ -4104,6 +4160,10 @@ pi_result piEnqueueMemImageRead(pi_queue Queue, pi_mem Image, pi_uint32 NumEventsInWaitList, const pi_event *EventWaitList, pi_event *Event) { + assert(Queue); + + // Lock automatically releases when this goes out of scope. + std::lock_guard lock(Queue->PiQueueMutex); return enqueueMemImageCommandHelper( PI_COMMAND_TYPE_IMAGE_READ, Queue, @@ -4123,6 +4183,11 @@ pi_result piEnqueueMemImageWrite(pi_queue Queue, pi_mem Image, const pi_event *EventWaitList, pi_event *Event) { + assert(Queue); + + // Lock automatically releases when this goes out of scope. + std::lock_guard lock(Queue->PiQueueMutex); + return enqueueMemImageCommandHelper(PI_COMMAND_TYPE_IMAGE_WRITE, Queue, Ptr, // src Image, // dst @@ -4140,6 +4205,11 @@ piEnqueueMemImageCopy(pi_queue Queue, pi_mem SrcImage, pi_mem DstImage, pi_image_region Region, pi_uint32 NumEventsInWaitList, const pi_event *EventWaitList, pi_event *Event) { + assert(Queue); + + // Lock automatically releases when this goes out of scope. + std::lock_guard lock(Queue->PiQueueMutex); + return enqueueMemImageCommandHelper( PI_COMMAND_TYPE_IMAGE_COPY, Queue, SrcImage, DstImage, false, // is_blocking @@ -4156,6 +4226,11 @@ pi_result piEnqueueMemImageFill(pi_queue Queue, pi_mem Image, const pi_event *EventWaitList, pi_event *Event) { + assert(Queue); + + // Lock automatically releases when this goes out of scope. + std::lock_guard lock(Queue->PiQueueMutex); + die("piEnqueueMemImageFill: not implemented"); return {}; } @@ -4200,6 +4275,11 @@ pi_result piEnqueueNativeKernel(pi_queue Queue, void (*UserFunc)(void *), const pi_event *EventWaitList, pi_event *Event) { + assert(Queue); + + // Lock automatically releases when this goes out of scope. + std::lock_guard lock(Queue->PiQueueMutex); + die("piEnqueueNativeKernel: not implemented"); return {}; } @@ -4476,6 +4556,11 @@ pi_result piextUSMEnqueueMemset(pi_queue Queue, void *Ptr, pi_int32 Value, return PI_INVALID_VALUE; } + assert(Queue); + + // Lock automatically releases when this goes out of scope. + std::lock_guard lock(Queue->PiQueueMutex); + return enqueueMemFillHelper( // TODO: do we need a new command type for USM memset? PI_COMMAND_TYPE_MEM_BUFFER_FILL, Queue, Ptr, @@ -4494,6 +4579,11 @@ pi_result piextUSMEnqueueMemcpy(pi_queue Queue, pi_bool Blocking, void *DstPtr, return PI_INVALID_VALUE; } + assert(Queue); + + // Lock automatically releases when this goes out of scope. + std::lock_guard lock(Queue->PiQueueMutex); + return enqueueMemCopyHelper( // TODO: do we need a new command type for this? PI_COMMAND_TYPE_MEM_BUFFER_COPY, Queue, DstPtr, Blocking, Size, SrcPtr, @@ -4514,8 +4604,11 @@ pi_result piextUSMEnqueuePrefetch(pi_queue Queue, const void *Ptr, size_t Size, pi_uint32 NumEventsInWaitlist, const pi_event *EventsWaitlist, pi_event *Event) { - assert(Queue); assert(!(Flags & ~PI_USM_MIGRATION_TBD0)); + assert(Queue); + + // Lock automatically releases when this goes out of scope. + std::lock_guard lock(Queue->PiQueueMutex); // Get a new command list to be used on this call ze_command_list_handle_t ZeCommandList = nullptr; @@ -4571,6 +4664,10 @@ pi_result piextUSMEnqueueMemAdvise(pi_queue Queue, const void *Ptr, size_t Length, pi_mem_advice Advice, pi_event *Event) { assert(Queue); + + // Lock automatically releases when this goes out of scope. + std::lock_guard lock(Queue->PiQueueMutex); + ze_memory_advice_t ZeAdvice = {}; switch (Advice) { case PI_MEM_ADVICE_SET_READ_MOSTLY: diff --git a/sycl/plugins/level_zero/pi_level_zero.hpp b/sycl/plugins/level_zero/pi_level_zero.hpp index 6d9d49f1de928..71e34a5b2a834 100644 --- a/sycl/plugins/level_zero/pi_level_zero.hpp +++ b/sycl/plugins/level_zero/pi_level_zero.hpp @@ -275,10 +275,22 @@ struct _pi_queue : _pi_object { ze_command_queue_handle_t ZeCommandQueue; // Keeps the PI context to which this queue belongs. - pi_context Context; + // This field is only set at _pi_queue creation time, and cannot change. + // Therefore it can be accessed without holding a lock on this _pi_queue. + const pi_context Context; + + // Keeps the PI device to which this queue belongs. + // This field is only set at _pi_queue creation time, and cannot change. + // Therefore it can be accessed without holding a lock on this _pi_queue. + const pi_device Device; + + // Mutex to be locked on entry to a _pi_queue API call, and unlocked + // prior to exit. Access to all state of a queue is done only after + // this lock has been acquired, and this must be released upon exit + // from a pi_queue API call. No other mutexes/locking should be + // needed/used for the queue data structures. + std::mutex PiQueueMutex; - // Mutex Lock for the Command List, Fence Map - std::mutex ZeCommandListFenceMapMutex; // Map of all Command lists created with their associated Fence used for // tracking when the command list is available for use again. std::map ZeCommandListFenceMap; @@ -286,14 +298,10 @@ struct _pi_queue : _pi_object { // Resets the Command List and Associated fence in the ZeCommandListFenceMap. // If the reset command list should be made available, then MakeAvailable // needs to be set to true. The caller must verify that this command list and - // fence have been signalled and call while holding the - // ZeCommandListFenceMapMutex lock. + // fence have been signalled. pi_result resetCommandListFenceEntry(ze_command_list_handle_t ZeCommandList, bool MakeAvailable); - // Keeps the PI device to which this queue belongs. - pi_device Device; - // Attach a command list to this queue, close, and execute it. // Note that this command list cannot be appended to after this. // The "is_blocking" tells if the wait for completion is requested.