From b7b6b55769c2f4e8ffb077a9f945e371232148f2 Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Mon, 2 Dec 2024 08:34:30 -0800 Subject: [PATCH] [L0] Fix cached and evicted timestamp recordings This commit fixes two issues with the level zero implementation of timestamp recording events: * Events allocated for timestamp recordings may have been previously used, which may lead the implementation to think that the recordings of the old timestamp are right. The implementation will now reset the value of it. * To avoid cases where timestamp recordings could conflict in the recordings buffer, unfinished recordings of dead events are now moved to another map, to be evicted fully on queue synchronization or death. Signed-off-by: Larsen, Steffen --- source/adapters/level_zero/event.cpp | 20 +++++++++--------- source/adapters/level_zero/queue.cpp | 31 ++++++++++++++-------------- source/adapters/level_zero/queue.hpp | 15 ++++++-------- 3 files changed, 31 insertions(+), 35 deletions(-) diff --git a/source/adapters/level_zero/event.cpp b/source/adapters/level_zero/event.cpp index 96da4be0fd..f7472ac325 100644 --- a/source/adapters/level_zero/event.cpp +++ b/source/adapters/level_zero/event.cpp @@ -574,8 +574,7 @@ ur_result_t urEventGetProfilingInfo( // End time needs to be adjusted for resolution and valid bits. uint64_t ContextEndTime = - (EndTimeRecording.RecordEventEndTimestamp & TimestampMaxValue) * - ZeTimerResolution; + (EndTimeRecording & TimestampMaxValue) * ZeTimerResolution; // If the result is 0, we have not yet gotten results back and so we just // return it. @@ -748,20 +747,20 @@ ur_result_t urEnqueueTimestampRecordingExp( ze_event_handle_t ZeEvent = (*OutEvent)->ZeEvent; (*OutEvent)->WaitList = TmpWaitList; + // Reset the end timestamp, in case it has been previously used. + (*OutEvent)->RecordEventEndTimestamp = 0; + uint64_t DeviceStartTimestamp = 0; UR_CALL(ur::level_zero::urDeviceGetGlobalTimestamps( Device, &DeviceStartTimestamp, nullptr)); (*OutEvent)->RecordEventStartTimestamp = DeviceStartTimestamp; // Create a new entry in the queue's recordings. - Queue->EndTimeRecordings[*OutEvent] = - ur_queue_handle_t_::end_time_recording{}; + Queue->EndTimeRecordings[*OutEvent] = 0; ZE2UR_CALL(zeCommandListAppendWriteGlobalTimestamp, - (CommandList->first, - &Queue->EndTimeRecordings[*OutEvent].RecordEventEndTimestamp, - ZeEvent, (*OutEvent)->WaitList.Length, - (*OutEvent)->WaitList.ZeEventList)); + (CommandList->first, &Queue->EndTimeRecordings[*OutEvent], ZeEvent, + (*OutEvent)->WaitList.Length, (*OutEvent)->WaitList.ZeEventList)); UR_CALL( Queue->executeCommandList(CommandList, Blocking, false /* OkToBatch */)); @@ -1089,10 +1088,11 @@ ur_result_t urEventReleaseInternal(ur_event_handle_t Event) { auto Entry = Queue->EndTimeRecordings.find(Event); if (Entry != Queue->EndTimeRecordings.end()) { auto &EndTimeRecording = Entry->second; - if (EndTimeRecording.RecordEventEndTimestamp == 0) { + if (EndTimeRecording == 0) { // If the end time recording has not finished, we tell the queue that // the event is no longer alive to avoid invalid write-backs. - EndTimeRecording.EventHasDied = true; + Queue->EvictedEndTimeRecordings.insert( + Queue->EndTimeRecordings.extract(Entry)); } else { // Otherwise we evict the entry. Queue->EndTimeRecordings.erase(Entry); diff --git a/source/adapters/level_zero/queue.cpp b/source/adapters/level_zero/queue.cpp index c4598f3472..e39786a51a 100644 --- a/source/adapters/level_zero/queue.cpp +++ b/source/adapters/level_zero/queue.cpp @@ -1563,24 +1563,23 @@ void ur_queue_handle_t_::clearEndTimeRecordings() { for (auto Entry : EndTimeRecordings) { auto &Event = Entry.first; auto &EndTimeRecording = Entry.second; - if (!Entry.second.EventHasDied) { - // Write the result back to the event if it is not dead. - uint64_t ContextEndTime = - (EndTimeRecording.RecordEventEndTimestamp & TimestampMaxValue) * - ZeTimerResolution; - - // Handle a possible wrap-around (the underlying HW counter is < 64-bit). - // Note, it will not report correct time if there were multiple wrap - // arounds, and the longer term plan is to enlarge the capacity of the - // HW timestamps. - if (ContextEndTime < Event->RecordEventStartTimestamp) - ContextEndTime += TimestampMaxValue * ZeTimerResolution; - - // Store it in the event. - Event->RecordEventEndTimestamp = ContextEndTime; - } + + // Write the result back to the event if it is not dead. + uint64_t ContextEndTime = + (EndTimeRecording & TimestampMaxValue) * ZeTimerResolution; + + // Handle a possible wrap-around (the underlying HW counter is < 64-bit). + // Note, it will not report correct time if there were multiple wrap + // arounds, and the longer term plan is to enlarge the capacity of the + // HW timestamps. + if (ContextEndTime < Event->RecordEventStartTimestamp) + ContextEndTime += TimestampMaxValue * ZeTimerResolution; + + // Store it in the event. + Event->RecordEventEndTimestamp = ContextEndTime; } EndTimeRecordings.clear(); + EvictedEndTimeRecordings.clear(); } ur_result_t urQueueReleaseInternal(ur_queue_handle_t Queue) { diff --git a/source/adapters/level_zero/queue.hpp b/source/adapters/level_zero/queue.hpp index 1108e4c268..bd9a07dd5a 100644 --- a/source/adapters/level_zero/queue.hpp +++ b/source/adapters/level_zero/queue.hpp @@ -490,15 +490,12 @@ struct ur_queue_handle_t_ : _ur_object { // End-times enqueued are stored on the queue rather than on the event to // avoid the event objects having been destroyed prior to the write to the // end-time member. - struct end_time_recording { - // RecordEventEndTimestamp is not adjusted for valid bits nor resolution, as - // it is written asynchronously. - uint64_t RecordEventEndTimestamp = 0; - // The event may die before the recording has been written back. In this - // case the event will mark this for deletion when the queue sees fit. - bool EventHasDied = false; - }; - std::map EndTimeRecordings; + // RecordEventEndTimestamp is not adjusted for valid bits nor resolution, as + // it is written asynchronously. + std::map EndTimeRecordings; + // The event may die before the recording has been written back. In this case + // we move it to a separate map to avoid conflicts. + std::multimap EvictedEndTimeRecordings; // Clear the end time recording timestamps entries. void clearEndTimeRecordings();