2121
2222namespace v2 {
2323
24- std::pair<ze_event_handle_t *, uint32_t >
25- ur_queue_immediate_in_order_t::getWaitListView (
24+ wait_list_view ur_queue_immediate_in_order_t::getWaitListView (
2625 const ur_event_handle_t *phWaitEvents, uint32_t numWaitEvents) {
2726 return commandListManager.getWaitListView (phWaitEvents, numWaitEvents);
2827}
@@ -291,37 +290,31 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueGenericCopyUnlocked(
291290 ur_command_t commandType) {
292291 auto zeSignalEvent = getSignalEvent (phEvent, commandType);
293292
294- auto waitList = getWaitListView (phEventWaitList, numEventsInWaitList);
293+ auto waitListView = getWaitListView (phEventWaitList, numEventsInWaitList);
295294
296- bool memoryMigrated = false ;
297295 auto pSrc = ur_cast<char *>(src->getDevicePtr (
298296 hDevice, ur_mem_handle_t_::device_access_mode_t ::read_only, srcOffset,
299297 size, [&](void *src, void *dst, size_t size) {
300298 ZE2UR_CALL_THROWS (zeCommandListAppendMemoryCopy,
301299 (commandListManager.getZeCommandList (), dst, src,
302- size, nullptr , waitList.second , waitList.first ));
303- memoryMigrated = true ;
300+ size, nullptr , waitListView.num ,
301+ waitListView.handles ));
302+ waitListView.clear ();
304303 }));
305304
306305 auto pDst = ur_cast<char *>(dst->getDevicePtr (
307306 hDevice, ur_mem_handle_t_::device_access_mode_t ::write_only, dstOffset,
308307 size, [&](void *src, void *dst, size_t size) {
309308 ZE2UR_CALL_THROWS (zeCommandListAppendMemoryCopy,
310309 (commandListManager.getZeCommandList (), dst, src,
311- size, nullptr , waitList.second , waitList.first ));
312- memoryMigrated = true ;
310+ size, nullptr , waitListView.num ,
311+ waitListView.handles ));
312+ waitListView.clear ();
313313 }));
314314
315- if (memoryMigrated) {
316- // If memory was migrated, we don't need to pass the wait list to
317- // the copy command again.
318- waitList.first = nullptr ;
319- waitList.second = 0 ;
320- }
321-
322315 ZE2UR_CALL (zeCommandListAppendMemoryCopy,
323316 (commandListManager.getZeCommandList (), pDst, pSrc, size,
324- zeSignalEvent, waitList. second , waitList. first ));
317+ zeSignalEvent, waitListView. num , waitListView. handles ));
325318
326319 if (blocking) {
327320 ZE2UR_CALL (zeCommandListHostSynchronize,
@@ -379,38 +372,32 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueRegionCopyUnlocked(
379372
380373 auto zeSignalEvent = getSignalEvent (phEvent, commandType);
381374
382- auto waitList = getWaitListView (phEventWaitList, numEventsInWaitList);
375+ auto waitListView = getWaitListView (phEventWaitList, numEventsInWaitList);
383376
384- bool memoryMigrated = false ;
385377 auto pSrc = ur_cast<char *>(src->getDevicePtr (
386378 hDevice, ur_mem_handle_t_::device_access_mode_t ::read_only, 0 ,
387379 src->getSize (), [&](void *src, void *dst, size_t size) {
388380 ZE2UR_CALL_THROWS (zeCommandListAppendMemoryCopy,
389381 (commandListManager.getZeCommandList (), dst, src,
390- size, nullptr , waitList.second , waitList.first ));
391- memoryMigrated = true ;
382+ size, nullptr , waitListView.num ,
383+ waitListView.handles ));
384+ waitListView.clear ();
392385 }));
393386 auto pDst = ur_cast<char *>(dst->getDevicePtr (
394387 hDevice, ur_mem_handle_t_::device_access_mode_t ::write_only, 0 ,
395388 dst->getSize (), [&](void *src, void *dst, size_t size) {
396389 ZE2UR_CALL_THROWS (zeCommandListAppendMemoryCopy,
397390 (commandListManager.getZeCommandList (), dst, src,
398- size, nullptr , waitList.second , waitList.first ));
399- memoryMigrated = true ;
391+ size, nullptr , waitListView.num ,
392+ waitListView.handles ));
393+ waitListView.clear ();
400394 }));
401395
402- if (memoryMigrated) {
403- // If memory was migrated, we don't need to pass the wait list to
404- // the copy command again.
405- waitList.first = nullptr ;
406- waitList.second = 0 ;
407- }
408-
409396 ZE2UR_CALL (zeCommandListAppendMemoryCopyRegion,
410397 (commandListManager.getZeCommandList (), pDst, &zeParams.dstRegion ,
411398 zeParams.dstPitch , zeParams.dstSlicePitch , pSrc,
412399 &zeParams.srcRegion , zeParams.srcPitch , zeParams.srcSlicePitch ,
413- zeSignalEvent, waitList. second , waitList. first ));
400+ zeSignalEvent, waitListView. num , waitListView. handles ));
414401
415402 if (blocking) {
416403 ZE2UR_CALL (zeCommandListHostSynchronize,
@@ -580,23 +567,23 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueMemBufferMap(
580567
581568 auto zeSignalEvent = getSignalEvent (phEvent, UR_COMMAND_MEM_BUFFER_MAP);
582569
583- auto waitList = getWaitListView (phEventWaitList, numEventsInWaitList);
570+ auto waitListView = getWaitListView (phEventWaitList, numEventsInWaitList);
584571
585- bool memoryMigrated = false ;
586572 auto pDst = ur_cast<char *>(hBuffer->mapHostPtr (
587573 mapFlags, offset, size, [&](void *src, void *dst, size_t size) {
588574 ZE2UR_CALL_THROWS (zeCommandListAppendMemoryCopy,
589575 (commandListManager.getZeCommandList (), dst, src,
590- size, nullptr , waitList.second , waitList.first ));
591- memoryMigrated = true ;
576+ size, nullptr , waitListView.num ,
577+ waitListView.handles ));
578+ waitListView.clear ();
592579 }));
593580 *ppRetMap = pDst;
594581
595- if (!memoryMigrated && waitList. second ) {
582+ if (waitListView ) {
596583 // If memory was not migrated, we need to wait on the events here.
597584 ZE2UR_CALL (zeCommandListAppendWaitOnEvents,
598- (commandListManager.getZeCommandList (), waitList. second ,
599- waitList. first ));
585+ (commandListManager.getZeCommandList (), waitListView. num ,
586+ waitListView. handles ));
600587 }
601588
602589 if (zeSignalEvent) {
@@ -621,21 +608,20 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueMemUnmap(
621608
622609 auto zeSignalEvent = getSignalEvent (phEvent, UR_COMMAND_MEM_UNMAP);
623610
624- auto waitList = getWaitListView (phEventWaitList, numEventsInWaitList);
611+ auto waitListView = getWaitListView (phEventWaitList, numEventsInWaitList);
625612
626613 // TODO: currently unmapHostPtr deallocates memory immediately,
627614 // since the memory might be used by the user, we need to make sure
628615 // all dependencies are completed.
629- ZE2UR_CALL (
630- zeCommandListAppendWaitOnEvents,
631- (commandListManager.getZeCommandList (), waitList.second , waitList.first ));
616+ ZE2UR_CALL (zeCommandListAppendWaitOnEvents,
617+ (commandListManager.getZeCommandList (), waitListView.num ,
618+ waitListView.handles ));
619+ waitListView.clear ();
632620
633- bool memoryMigrated = false ;
634621 hMem->unmapHostPtr (pMappedPtr, [&](void *src, void *dst, size_t size) {
635622 ZE2UR_CALL_THROWS (zeCommandListAppendMemoryCopy,
636623 (commandListManager.getZeCommandList (), dst, src, size,
637- nullptr , waitList.second , waitList.first ));
638- memoryMigrated = true ;
624+ nullptr , waitListView.num , waitListView.handles ));
639625 });
640626 if (zeSignalEvent) {
641627 ZE2UR_CALL (zeCommandListAppendSignalEvent,
@@ -652,33 +638,40 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueGenericFillUnlocked(
652638
653639 auto zeSignalEvent = getSignalEvent (phEvent, commandType);
654640
655- auto waitList = getWaitListView (phEventWaitList, numEventsInWaitList);
641+ auto waitListView = getWaitListView (phEventWaitList, numEventsInWaitList);
656642
657- bool memoryMigrated = false ;
658643 auto pDst = ur_cast<char *>(dst->getDevicePtr (
659644 hDevice, ur_mem_handle_t_::device_access_mode_t ::read_only, offset, size,
660645 [&](void *src, void *dst, size_t size) {
661646 ZE2UR_CALL_THROWS (zeCommandListAppendMemoryCopy,
662647 (commandListManager.getZeCommandList (), dst, src,
663- size, nullptr , waitList.second , waitList.first ));
664- memoryMigrated = true ;
648+ size, nullptr , waitListView.num ,
649+ waitListView.handles ));
650+ waitListView.clear ();
665651 }));
666652
667- if (memoryMigrated) {
668- // If memory was migrated, we don't need to pass the wait list to
669- // the copy command again.
670- waitList.first = nullptr ;
671- waitList.second = 0 ;
672- }
673-
674- // TODO: support non-power-of-two pattern sizes
675-
676653 // PatternSize must be a power of two for zeCommandListAppendMemoryFill.
677654 // When it's not, the fill is emulated with zeCommandListAppendMemoryCopy.
678- ZE2UR_CALL (zeCommandListAppendMemoryFill,
679- (commandListManager.getZeCommandList (), pDst, pPattern,
680- patternSize, size, zeSignalEvent, waitList.second ,
681- waitList.first ));
655+ if (isPowerOf2 (patternSize)) {
656+ ZE2UR_CALL (zeCommandListAppendMemoryFill,
657+ (commandListManager.getZeCommandList (), pDst, pPattern,
658+ patternSize, size, zeSignalEvent, waitListView.num ,
659+ waitListView.handles ));
660+ } else {
661+ // Copy pattern into every entry in memory array pointed by Ptr.
662+ uint32_t numOfCopySteps = size / patternSize;
663+ const void *src = pPattern;
664+
665+ for (uint32_t step = 0 ; step < numOfCopySteps; ++step) {
666+ void *dst = reinterpret_cast <void *>(reinterpret_cast <uint8_t *>(pDst) +
667+ step * patternSize);
668+ ZE2UR_CALL (zeCommandListAppendMemoryCopy,
669+ (commandListManager.getZeCommandList (), dst, src, patternSize,
670+ step == numOfCopySteps - 1 ? zeSignalEvent : nullptr ,
671+ waitListView.num , waitListView.handles ));
672+ waitListView.clear ();
673+ }
674+ }
682675
683676 return UR_RESULT_SUCCESS;
684677}
@@ -988,33 +981,25 @@ ur_result_t ur_queue_immediate_in_order_t::enqueueCooperativeKernelLaunchExp(
988981
989982 auto zeSignalEvent = getSignalEvent (phEvent, UR_COMMAND_KERNEL_LAUNCH);
990983
991- auto waitList = getWaitListView (phEventWaitList, numEventsInWaitList);
984+ auto waitListView = getWaitListView (phEventWaitList, numEventsInWaitList);
992985
993- bool memoryMigrated = false ;
994986 auto memoryMigrate = [&](void *src, void *dst, size_t size) {
995987 ZE2UR_CALL_THROWS (zeCommandListAppendMemoryCopy,
996988 (commandListManager.getZeCommandList (), dst, src, size,
997- nullptr , waitList. second , waitList. first ));
998- memoryMigrated = true ;
989+ nullptr , waitListView. num , waitListView. handles ));
990+ waitListView. clear () ;
999991 };
1000992
1001993 UR_CALL (hKernel->prepareForSubmission (hContext, hDevice, pGlobalWorkOffset,
1002994 workDim, WG[0 ], WG[1 ], WG[2 ],
1003995 memoryMigrate));
1004996
1005- if (memoryMigrated) {
1006- // If memory was migrated, we don't need to pass the wait list to
1007- // the copy command again.
1008- waitList.first = nullptr ;
1009- waitList.second = 0 ;
1010- }
1011-
1012997 TRACK_SCOPE_LATENCY (" ur_queue_immediate_in_order_t::"
1013998 " zeCommandListAppendLaunchCooperativeKernel" );
1014999 ZE2UR_CALL (zeCommandListAppendLaunchCooperativeKernel,
10151000 (commandListManager.getZeCommandList (), hZeKernel,
1016- &zeThreadGroupDimensions, zeSignalEvent, waitList. second ,
1017- waitList. first ));
1001+ &zeThreadGroupDimensions, zeSignalEvent, waitListView. num ,
1002+ waitListView. handles ));
10181003
10191004 recordSubmittedKernel (hKernel);
10201005
0 commit comments