@@ -1189,6 +1189,23 @@ ur_result_t _ur_ze_event_list_t::createAndRetainUrZeEventList(
11891189 CurQueue->LastCommandEvent && CurQueue->LastCommandEvent ->IsDiscarded )
11901190 IncludeLastCommandEvent = false ;
11911191
1192+ if (CurQueue->Device ->useDriverInOrderLists () && CurQueue->UsingImmCmdLists ) {
1193+ auto QueueGroup = CurQueue->getQueueGroup (UseCopyEngine);
1194+ uint32_t QueueGroupOrdinal, QueueIndex;
1195+ auto NextIndex = QueueGroup.getQueueIndex (&QueueGroupOrdinal, &QueueIndex,
1196+ /* QueryOnly */ true );
1197+ auto NextImmCmdList = QueueGroup.ImmCmdLists [NextIndex];
1198+
1199+ // If we are using L0 native implementation for handling in-order queues,
1200+ // then we don't need to add the last enqueued event into the waitlist, as
1201+ // the native driver implementation will already ensure in-order semantics.
1202+ // The only exception is when a different immediate command was last used on
1203+ // the same UR Queue.
1204+ IncludeLastCommandEvent &=
1205+ CurQueue->LastUsedCommandList != CurQueue->CommandListMap .end () &&
1206+ NextImmCmdList != CurQueue->LastUsedCommandList ;
1207+ }
1208+
11921209 try {
11931210 uint32_t TmpListLength = 0 ;
11941211
@@ -1205,6 +1222,25 @@ ur_result_t _ur_ze_event_list_t::createAndRetainUrZeEventList(
12051222 this ->UrEventList = new ur_event_handle_t [EventListLength];
12061223 }
12071224
1225+ auto WaitListEmptyOrAllEventsFromSameQueue = [CurQueue, EventListLength,
1226+ EventList]() {
1227+ if (!EventListLength)
1228+ return true ;
1229+ for (uint32_t i = 0 ; i < EventListLength; ++i) {
1230+ if (CurQueue != EventList[i]->UrQueue )
1231+ return false ;
1232+ }
1233+ return true ;
1234+ };
1235+
1236+ // For in-order queue and wait-list which is empty or has events only from
1237+ // the same queue then we don't need to wait on any other additional events
1238+ if (CurQueue->Device ->useDriverInOrderLists () &&
1239+ CurQueue->isInOrderQueue () && WaitListEmptyOrAllEventsFromSameQueue ()) {
1240+ this ->Length = TmpListLength;
1241+ return UR_RESULT_SUCCESS;
1242+ }
1243+
12081244 if (EventListLength > 0 ) {
12091245 for (uint32_t I = 0 ; I < EventListLength; I++) {
12101246 {
0 commit comments