Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 28 additions & 42 deletions source/adapters/level_zero/command_buffer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -92,8 +92,8 @@ ur_exp_command_buffer_handle_t_::ur_exp_command_buffer_handle_t_(
ZeStruct<ze_command_list_desc_t> ZeDesc,
const ur_exp_command_buffer_desc_t *Desc)
: Context(Context), Device(Device), ZeCommandList(CommandList),
ZeCommandListDesc(ZeDesc), QueueProperties(), SyncPoints(),
NextSyncPoint(0), CommandListMap() {
ZeCommandListDesc(ZeDesc), ZeFencesList(), QueueProperties(),
SyncPoints(), NextSyncPoint(0) {
(void)Desc;
urContextRetain(Context);
urDeviceRetain(Device);
Expand Down Expand Up @@ -132,10 +132,8 @@ ur_exp_command_buffer_handle_t_::~ur_exp_command_buffer_handle_t_() {
}

// Release Fences allocated to command_buffer
for (auto it = CommandListMap.begin(); it != CommandListMap.end(); ++it) {
if (it->second.ZeFence != nullptr) {
ZE_CALL_NOCHECK(zeFenceDestroy, (it->second.ZeFence));
}
for (auto &ZeFence : ZeFencesList) {
ZE_CALL_NOCHECK(zeFenceDestroy, (ZeFence));
}
}

Expand Down Expand Up @@ -464,7 +462,6 @@ urCommandBufferCreateExp(ur_context_handle_t Context, ur_device_handle_t Device,
ZE2UR_CALL(
zeCommandListAppendBarrier,
(ZeCommandList, nullptr, 1, &RetCommandBuffer->WaitEvent->ZeEvent));

return UR_RESULT_SUCCESS;
}

Expand Down Expand Up @@ -856,12 +853,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp(
ur_exp_command_buffer_handle_t CommandBuffer, ur_queue_handle_t Queue,
uint32_t NumEventsInWaitList, const ur_event_handle_t *EventWaitList,
ur_event_handle_t *Event) {
// There are issues with immediate command lists so return an error if the
// queue is in that mode.
if (Queue->UsingImmCmdLists) {
return UR_RESULT_ERROR_INVALID_QUEUE_PROPERTIES;
}

std::scoped_lock<ur_shared_mutex> lock(Queue->Mutex);
// Use compute engine rather than copy engine
const auto UseCopyEngine = false;
Expand All @@ -871,22 +862,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp(

ze_fence_handle_t ZeFence;
ZeStruct<ze_fence_desc_t> ZeFenceDesc;
ur_command_list_ptr_t CommandListPtr;

ZE2UR_CALL(zeFenceCreate, (ZeCommandQueue, &ZeFenceDesc, &ZeFence));
// TODO: Refactor so requiring a map iterator is not required here, currently
// required for executeCommandList though.
ZeStruct<ze_command_queue_desc_t> ZeQueueDesc;
ZeQueueDesc.ordinal = QueueGroupOrdinal;
CommandListPtr = CommandBuffer->CommandListMap.insert(
std::pair<ze_command_list_handle_t, ur_command_list_info_t>(
CommandBuffer->ZeCommandList,
{ZeFence, false, false, ZeCommandQueue, ZeQueueDesc}));

// Previous execution will have closed the command list, we need to reopen
// it otherwise calling `executeCommandList` will return early.
CommandListPtr->second.IsClosed = false;
CommandListPtr->second.ZeFenceInUse = true;
CommandBuffer->ZeFencesList.push_back(ZeFence);

// Create command-list to execute before `CommandListPtr` and will signal
// when `EventWaitList` dependencies are complete.
Expand All @@ -908,6 +886,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp(
(WaitCommandList->first, ZeEvent));
}

bool MustSignalWaitEvent = true;
if (NumEventsInWaitList) {
_ur_ze_event_list_t TmpWaitList;
UR_CALL(TmpWaitList.createAndRetainUrZeEventList(
Expand All @@ -920,17 +899,30 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp(
else
CommandBuffer->WaitEvent->WaitList.insert(TmpWaitList);

ZE2UR_CALL(zeCommandListAppendBarrier,
(WaitCommandList->first, CommandBuffer->WaitEvent->ZeEvent,
CommandBuffer->WaitEvent->WaitList.Length,
CommandBuffer->WaitEvent->WaitList.ZeEventList));
} else {
ZE2UR_CALL(zeCommandListAppendSignalEvent,
(WaitCommandList->first, CommandBuffer->WaitEvent->ZeEvent));
if (!CommandBuffer->WaitEvent->WaitList.isEmpty()) {
ZE2UR_CALL(zeCommandListAppendBarrier,
(WaitCommandList->first, CommandBuffer->WaitEvent->ZeEvent,
CommandBuffer->WaitEvent->WaitList.Length,
CommandBuffer->WaitEvent->WaitList.ZeEventList));
Queue->executeCommandList(WaitCommandList, false, false);
MustSignalWaitEvent = false;
}
}

if (MustSignalWaitEvent) {
ZE2UR_CALL(zeEventHostSignal, (CommandBuffer->WaitEvent->ZeEvent));
}

// Submit main command-list. This command-list is of a batch command-list
// type, regardless of the UR Queue type. We therefore need to submit the list
// directly using the Level-Zero API to avoid type mismatches if using UR
// functions.
ZE2UR_CALL(zeCommandQueueExecuteCommandLists,
(ZeCommandQueue, 1, &CommandBuffer->ZeCommandList, ZeFence));

// Execution event for this enqueue of the UR command-buffer
ur_event_handle_t RetEvent{};

// Create a command-list to signal RetEvent on completion
ur_command_list_ptr_t SignalCommandList{};
UR_CALL(Queue->Context->getAvailableCommandList(Queue, SignalCommandList,
Expand All @@ -943,7 +935,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp(
if (Event) {
UR_CALL(createEventAndAssociateQueue(Queue, &RetEvent,
UR_COMMAND_COMMAND_BUFFER_ENQUEUE_EXP,
SignalCommandList, false));
SignalCommandList, false, true));

if ((Queue->Properties & UR_QUEUE_FLAG_PROFILING_ENABLE)) {
// Multiple submissions of a command buffer implies that we need to save
Expand Down Expand Up @@ -972,13 +964,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp(
}
}

// Execution our command-lists asynchronously
// TODO Look using a single `zeCommandQueueExecuteCommandLists()` call
// passing all three command-lists, rather than individual calls which
// introduces latency.
UR_CALL(Queue->executeCommandList(WaitCommandList, false, false));
UR_CALL(Queue->executeCommandList(CommandListPtr, false, false));
UR_CALL(Queue->executeCommandList(SignalCommandList, false, false));
Queue->executeCommandList(SignalCommandList, false, false);

if (Event) {
*Event = RetEvent;
Expand Down
11 changes: 4 additions & 7 deletions source/adapters/level_zero/command_buffer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,10 @@ struct ur_exp_command_buffer_handle_t_ : public _ur_object {
ze_command_list_handle_t ZeCommandList;
// Level Zero command list descriptor
ZeStruct<ze_command_list_desc_t> ZeCommandListDesc;
// List of Level Zero fences created when submitting a graph.
// This list is needed to release all fences retained by the
// command_buffer.
std::vector<ze_fence_handle_t> ZeFencesList;
// Queue properties from command-buffer descriptor
// TODO: Do we need these?
ur_queue_properties_t QueueProperties;
Expand All @@ -60,13 +64,6 @@ struct ur_exp_command_buffer_handle_t_ : public _ur_object {
// Next sync_point value (may need to consider ways to reuse values if 32-bits
// is not enough)
ur_exp_command_buffer_sync_point_t NextSyncPoint;
// Command list map so we can use queue::executeCommandList.
// Command list map is also used to release all the Fences retained by the
// command_buffer std::unordered_multimap<ze_command_list_handle_t,
// ur_command_list_info_t> CommandListMap; CommandListMap is redefined as a
// multimap to enable mutiple commands enqueing into the same command_buffer
std::unordered_multimap<ze_command_list_handle_t, ur_command_list_info_t>
CommandListMap;
// Event which will signals the most recent execution of the command-buffer
// has finished
ur_event_handle_t SignalEvent = nullptr;
Expand Down