@@ -1004,39 +1004,19 @@ exec_graph_impl::enqueue(const std::shared_ptr<sycl::detail::queue_impl> &Queue,
10041004 auto CommandBuffer = CurrentPartition->MCommandBuffers [Queue->get_device ()];
10051005
10061006 if (CommandBuffer) {
1007- // if previous submissions are incompleted, we automatically
1008- // add completion events of previous submissions as dependencies.
1009- // With Level-Zero backend we cannot resubmit a command-buffer until the
1010- // previous one has already completed.
1011- // Indeed, since a command-list does not accept a list a dependencies at
1012- // submission, we circumvent this lack by adding a barrier that waits on a
1013- // specific event and then define the conditions to signal this event in
1014- // another command-list. Consequently, if a second submission is
1015- // performed, the signal conditions of this single event are redefined by
1016- // this second submission. Thus, this can lead to an undefined behaviour
1017- // and potential hangs. We have therefore to expliclty wait in the host
1018- // for previous submission to complete before resubmitting the
1019- // command-buffer for level-zero backend.
1020- // TODO https://github.com/intel/llvm/issues/17734
1021- // Remove this backend specific behavior and allow multiple concurrent
1022- // submissions of the UR command-buffer.
10231007 for (std::vector<sycl::detail::EventImplPtr>::iterator It =
10241008 MExecutionEvents.begin ();
10251009 It != MExecutionEvents.end ();) {
10261010 auto Event = *It;
10271011 if (!Event->isCompleted ()) {
1028- if (Queue->get_device ().get_backend () ==
1029- sycl::backend::ext_oneapi_level_zero) {
1030- Event->wait (Event);
1031- } else {
1032- auto &AttachedEventsList = Event->getPostCompleteEvents ();
1033- CGData.MEvents .reserve (AttachedEventsList.size () + 1 );
1034- CGData.MEvents .push_back (Event);
1035- // Add events of the previous execution of all graph partitions.
1036- for (auto &AttachedEvent : AttachedEventsList) {
1037- CGData.MEvents .push_back (AttachedEvent);
1038- }
1039- }
1012+ auto &AttachedEventsList = Event->getPostCompleteEvents ();
1013+ CGData.MEvents .reserve (CGData.MEvents .size () +
1014+ AttachedEventsList.size () + 1 );
1015+ CGData.MEvents .push_back (Event);
1016+ // Add events of the previous execution of all graph partitions.
1017+ CGData.MEvents .insert (CGData.MEvents .end (),
1018+ AttachedEventsList.begin (),
1019+ AttachedEventsList.end ());
10401020 ++It;
10411021 } else {
10421022 // Remove completed events
@@ -1102,46 +1082,6 @@ exec_graph_impl::enqueue(const std::shared_ptr<sycl::detail::queue_impl> &Queue,
11021082
11031083 NewEvent = sycl::detail::Scheduler::getInstance ().addCG (
11041084 NodeImpl->getCGCopy (), Queue, /* EventNeeded=*/ true );
1105- } else {
1106- std::vector<std::shared_ptr<sycl::detail::event_impl>> ScheduledEvents;
1107- for (auto &NodeImpl : CurrentPartition->MSchedule ) {
1108- std::vector<ur_event_handle_t > RawEvents;
1109-
1110- // If the node has no requirements for accessors etc. then we skip the
1111- // scheduler and enqueue directly.
1112- if (NodeImpl->MCGType == sycl::detail::CGType::Kernel &&
1113- NodeImpl->MCommandGroup ->getRequirements ().size () +
1114- static_cast <sycl::detail::CGExecKernel *>(
1115- NodeImpl->MCommandGroup .get ())
1116- ->MStreams .size () ==
1117- 0 ) {
1118- sycl::detail::CGExecKernel *CG =
1119- static_cast <sycl::detail::CGExecKernel *>(
1120- NodeImpl->MCommandGroup .get ());
1121- auto OutEvent = CreateNewEvent ();
1122- sycl::detail::enqueueImpKernel (
1123- Queue, CG->MNDRDesc , CG->MArgs , CG->MKernelBundle ,
1124- CG->MSyclKernel , CG->MKernelName , RawEvents, OutEvent,
1125- // TODO: Pass accessor mem allocations
1126- nullptr ,
1127- // TODO: Extract from handler
1128- UR_KERNEL_CACHE_CONFIG_DEFAULT, CG->MKernelIsCooperative ,
1129- CG->MKernelUsesClusterLaunch , CG->MKernelWorkGroupMemorySize );
1130- ScheduledEvents.push_back (NewEvent);
1131- } else if (!NodeImpl->isEmpty ()) {
1132- // Empty nodes are node processed as other nodes, but only their
1133- // dependencies are propagated in findRealDeps
1134- sycl::detail::EventImplPtr EventImpl =
1135- sycl::detail::Scheduler::getInstance ().addCG (
1136- NodeImpl->getCGCopy (), Queue, /* EventNeeded=*/ true );
1137-
1138- ScheduledEvents.push_back (EventImpl);
1139- }
1140- }
1141- // Create an event which has all kernel events as dependencies
1142- NewEvent = std::make_shared<sycl::detail::event_impl>(Queue);
1143- NewEvent->setStateIncomplete ();
1144- NewEvent->getPreparedDepsEvents () = ScheduledEvents;
11451085 }
11461086 PartitionsExecutionEvents[CurrentPartition] = NewEvent;
11471087 }
0 commit comments