@@ -61,7 +61,11 @@ event queue_impl::memset(const std::shared_ptr<detail::queue_impl> &Self,
6161 return event ();
6262
6363 event ResEvent = prepareUSMEvent (Self, NativeEvent);
64- addSharedEvent (ResEvent);
64+ // Track only if we won't be able to handle it with piQueueFinish.
65+ // FIXME these events are stored for level zero until as a workaround, remove
66+ // once piEventRelease no longer calls wait on the event in the plugin.
67+ if (!MSupportOOO || getPlugin ().getBackend () == backend::level_zero)
68+ addSharedEvent (ResEvent);
6569 return ResEvent;
6670}
6771
@@ -76,7 +80,11 @@ event queue_impl::memcpy(const std::shared_ptr<detail::queue_impl> &Self,
7680 return event ();
7781
7882 event ResEvent = prepareUSMEvent (Self, NativeEvent);
79- addSharedEvent (ResEvent);
83+ // Track only if we won't be able to handle it with piQueueFinish.
84+ // FIXME these events are stored for level zero until as a workaround, remove
85+ // once piEventRelease no longer calls wait on the event in the plugin.
86+ if (!MSupportOOO || getPlugin ().getBackend () == backend::level_zero)
87+ addSharedEvent (ResEvent);
8088 return ResEvent;
8189}
8290
@@ -92,7 +100,11 @@ event queue_impl::mem_advise(const std::shared_ptr<detail::queue_impl> &Self,
92100 return event ();
93101
94102 event ResEvent = prepareUSMEvent (Self, NativeEvent);
95- addSharedEvent (ResEvent);
103+ // Track only if we won't be able to handle it with piQueueFinish.
104+ // FIXME these events are stored for level zero until as a workaround, remove
105+ // once piEventRelease no longer calls wait on the event in the plugin.
106+ if (!MSupportOOO || getPlugin ().getBackend () == backend::level_zero)
107+ addSharedEvent (ResEvent);
96108 return ResEvent;
97109}
98110
@@ -101,8 +113,14 @@ void queue_impl::addEvent(const event &Event) {
101113 Command *Cmd = (Command *)(Eimpl->getCommand ());
102114 if (!Cmd) {
103115 // if there is no command on the event, we cannot track it with MEventsWeak
104- // as that will leave it with no owner. Track in MEventsShared
105- addSharedEvent (Event);
116+ // as that will leave it with no owner. Track in MEventsShared only if we're
117+ // unable to call piQueueFinish during wait.
118+ // FIXME these events are stored for level zero until as a workaround,
119+ // remove once piEventRelease no longer calls wait on the event in the
120+ // plugin.
121+ if (is_host () || !MSupportOOO ||
122+ getPlugin ().getBackend () == backend::level_zero)
123+ addSharedEvent (Event);
106124 } else {
107125 std::weak_ptr<event_impl> EventWeakPtr{Eimpl};
108126 std::lock_guard<std::mutex> Lock{MMutex};
@@ -114,6 +132,10 @@ void queue_impl::addEvent(const event &Event) {
114132// / but some events have no other owner. In this case,
115133// / addSharedEvent will have the queue track the events via a shared pointer.
116134void queue_impl::addSharedEvent (const event &Event) {
135+ // FIXME The assertion should be corrected once the Level Zero workaround is
136+ // removed.
137+ assert (is_host () || !MSupportOOO ||
138+ getPlugin ().getBackend () == backend::level_zero);
117139 std::lock_guard<std::mutex> Lock (MMutex);
118140 // Events stored in MEventsShared are not released anywhere else aside from
119141 // calls to queue::wait/wait_and_throw, which a user application might not
@@ -234,21 +256,49 @@ void queue_impl::wait(const detail::code_location &CodeLoc) {
234256 TelemetryEvent = instrumentationProlog (CodeLoc, Name, StreamID, IId);
235257#endif
236258
237- std::vector<std::weak_ptr<event_impl>> Events ;
238- std::vector<event> USMEvents ;
259+ std::vector<std::weak_ptr<event_impl>> WeakEvents ;
260+ std::vector<event> SharedEvents ;
239261 {
240- std::lock_guard<std::mutex> Lock (MMutex);
241- Events.swap (MEventsWeak);
242- USMEvents.swap (MEventsShared);
262+ std::lock_guard<mutex_class> Lock (MMutex);
263+ WeakEvents.swap (MEventsWeak);
264+ SharedEvents.swap (MEventsShared);
265+ }
266+ // If the queue is either a host one or does not support OOO (and we use
267+ // multiple in-order queues as a result of that), wait for each event
268+ // directly. Otherwise, only wait for unenqueued or host task events, starting
269+ // from the latest submitted task in order to minimize total amount of calls,
270+ // then handle the rest with piQueueFinish.
271+ bool SupportsPiFinish = !is_host () && MSupportOOO;
272+ for (auto EventImplWeakPtrIt = WeakEvents.rbegin ();
273+ EventImplWeakPtrIt != WeakEvents.rend (); ++EventImplWeakPtrIt) {
274+ if (std::shared_ptr<event_impl> EventImplSharedPtr =
275+ EventImplWeakPtrIt->lock ()) {
276+ // A nullptr PI event indicates that piQueueFinish will not cover it,
277+ // either because it's a host task event or an unenqueued one.
278+ if (!SupportsPiFinish || nullptr == EventImplSharedPtr->getHandleRef ()) {
279+ EventImplSharedPtr->wait (EventImplSharedPtr);
280+ }
281+ }
282+ }
283+ if (SupportsPiFinish) {
284+ const detail::plugin &Plugin = getPlugin ();
285+ Plugin.call <detail::PiApiKind::piQueueFinish>(getHandleRef ());
286+ for (std::weak_ptr<event_impl> &EventImplWeakPtr : WeakEvents)
287+ if (std::shared_ptr<event_impl> EventImplSharedPtr =
288+ EventImplWeakPtr.lock ())
289+ EventImplSharedPtr->cleanupCommand (EventImplSharedPtr);
290+ // FIXME these events are stored for level zero until as a workaround,
291+ // remove once piEventRelease no longer calls wait on the event in the
292+ // plugin.
293+ if (Plugin.getBackend () == backend::level_zero) {
294+ SharedEvents.clear ();
295+ }
296+ assert (SharedEvents.empty () && " Queues that support calling piQueueFinish "
297+ " shouldn't have shared events" );
298+ } else {
299+ for (event &Event : SharedEvents)
300+ Event.wait ();
243301 }
244-
245- for (std::weak_ptr<event_impl> &EventImplWeakPtr : Events)
246- if (std::shared_ptr<event_impl> EventImplPtr = EventImplWeakPtr.lock ())
247- EventImplPtr->wait (EventImplPtr);
248-
249- for (event &Event : USMEvents)
250- Event.wait ();
251-
252302#ifdef XPTI_ENABLE_INSTRUMENTATION
253303 instrumentationEpilog (TelemetryEvent, Name, StreamID, IId);
254304#endif
0 commit comments