Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions source/adapters/level_zero/command_buffer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -933,6 +933,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urCommandBufferEnqueueExp(
MustSignalWaitEvent = false;
}
}
// Given WaitEvent was created without specifying Counting Events, then this
// event can be signalled on the host.
if (MustSignalWaitEvent) {
ZE2UR_CALL(zeEventHostSignal, (CommandBuffer->WaitEvent->ZeEvent));
}
Expand Down
33 changes: 27 additions & 6 deletions source/adapters/level_zero/context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -471,7 +471,8 @@ static const uint32_t MaxNumEventsPerPool = [] {

ur_result_t ur_context_handle_t_::getFreeSlotInExistingOrNewPool(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

there's ought to be a better way of allocating an event that doesn't require passing around half a dozen arguments to multiple functions. Maybe some sort of flag?

auto event = Pool->allocateEvent(HOST_VISIBLE | ENABLE_PROFILER | COUNTER_BASED);

having functions that accept multiple boolean values is error prone.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@pbalcer Thanks for the feedback! We could create a new enum just for this case. However, this seems to be a bit overkill just for this one function. It is only used once event.cpp during event creation.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's not just this function. All the various boolean event parameters are being passed around in multiple functions. Just from a quick search:
createEventAndAssociateQueue, getEventFromQueueCache, EventCreate, getEventFromContextCache, getFreeSlotInExistingOrNewPool, getZeEventPoolCache

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@pbalcer This is quite a large refactoring. Functions with these booleans are abundant. Additionally, it would make sense to add this in ur_api, however, we would need to add this in spec before I can do so. Perhaps I can file a new ticket and have this be its own patch?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure, this was just a suggestion.

ze_event_pool_handle_t &Pool, size_t &Index, bool HostVisible,
bool ProfilingEnabled, ur_device_handle_t Device) {
bool ProfilingEnabled, ur_device_handle_t Device,
bool CounterBasedEventEnabled, bool UsingImmCmdList) {
// Lock while updating event pool machinery.
std::scoped_lock<ur_mutex> Lock(ZeEventPoolCacheMutex);

Expand All @@ -481,7 +482,8 @@ ur_result_t ur_context_handle_t_::getFreeSlotInExistingOrNewPool(
ZeDevice = Device->ZeDevice;
}
std::list<ze_event_pool_handle_t> *ZePoolCache =
getZeEventPoolCache(HostVisible, ProfilingEnabled, ZeDevice);
getZeEventPoolCache(HostVisible, ProfilingEnabled,
CounterBasedEventEnabled, UsingImmCmdList, ZeDevice);

if (!ZePoolCache->empty()) {
if (NumEventsAvailableInEventPool[ZePoolCache->front()] == 0) {
Expand All @@ -506,15 +508,27 @@ ur_result_t ur_context_handle_t_::getFreeSlotInExistingOrNewPool(
Index = 0;
// Create one event ZePool per MaxNumEventsPerPool events
if (*ZePool == nullptr) {
ze_event_pool_counter_based_exp_desc_t counterBasedExt = {
ZE_STRUCTURE_TYPE_COUNTER_BASED_EVENT_POOL_EXP_DESC};
ZeStruct<ze_event_pool_desc_t> ZeEventPoolDesc;
ZeEventPoolDesc.count = MaxNumEventsPerPool;
ZeEventPoolDesc.flags = 0;
ZeEventPoolDesc.pNext = nullptr;
if (HostVisible)
ZeEventPoolDesc.flags |= ZE_EVENT_POOL_FLAG_HOST_VISIBLE;
if (ProfilingEnabled)
ZeEventPoolDesc.flags |= ZE_EVENT_POOL_FLAG_KERNEL_TIMESTAMP;
logger::debug("ze_event_pool_desc_t flags set to: {}",
ZeEventPoolDesc.flags);
if (CounterBasedEventEnabled) {
if (UsingImmCmdList) {
counterBasedExt.flags = ZE_EVENT_POOL_COUNTER_BASED_EXP_FLAG_IMMEDIATE;
} else {
counterBasedExt.flags =
ZE_EVENT_POOL_COUNTER_BASED_EXP_FLAG_NON_IMMEDIATE;
}
ZeEventPoolDesc.pNext = &counterBasedExt;
}

std::vector<ze_device_handle_t> ZeDevices;
if (ZeDevice) {
Expand All @@ -540,14 +554,18 @@ ur_result_t ur_context_handle_t_::getFreeSlotInExistingOrNewPool(
}

ur_event_handle_t ur_context_handle_t_::getEventFromContextCache(
bool HostVisible, bool WithProfiling, ur_device_handle_t Device) {
bool HostVisible, bool WithProfiling, ur_device_handle_t Device,
bool CounterBasedEventEnabled) {
std::scoped_lock<ur_mutex> Lock(EventCacheMutex);
auto Cache = getEventCache(HostVisible, WithProfiling, Device);
if (Cache->empty())
return nullptr;

auto It = Cache->begin();
ur_event_handle_t Event = *It;
if (Event->CounterBasedEventsEnabled != CounterBasedEventEnabled) {
return nullptr;
}
Cache->erase(It);
// We have to reset event before using it.
Event->reset();
Expand Down Expand Up @@ -579,13 +597,16 @@ ur_context_handle_t_::decrementUnreleasedEventsInPool(ur_event_handle_t Event) {
}

ze_device_handle_t ZeDevice = nullptr;
bool UsingImmediateCommandlists =
!Event->UrQueue || Event->UrQueue->UsingImmCmdLists;

if (!Event->IsMultiDevice && Event->UrQueue) {
ZeDevice = Event->UrQueue->Device->ZeDevice;
}

std::list<ze_event_pool_handle_t> *ZePoolCache = getZeEventPoolCache(
Event->isHostVisible(), Event->isProfilingEnabled(), ZeDevice);
Event->isHostVisible(), Event->isProfilingEnabled(),
Event->CounterBasedEventsEnabled, UsingImmediateCommandlists, ZeDevice);

// Put the empty pool to the cache of the pools.
if (NumEventsUnreleasedInEventPool[Event->ZeEventPool] == 0)
Expand Down Expand Up @@ -683,8 +704,8 @@ ur_result_t ur_context_handle_t_::getAvailableCommandList(
// Make sure to acquire the lock before checking the size, or there
// will be a race condition.
std::scoped_lock<ur_mutex> Lock(Queue->Context->ZeCommandListCacheMutex);
// Under mutex since operator[] does insertion on the first usage for every
// unique ZeDevice.
// Under mutex since operator[] does insertion on the first usage for
// every unique ZeDevice.
auto &ZeCommandListCache =
UseCopyEngine
? Queue->Context->ZeCopyCommandListCache[Queue->Device->ZeDevice]
Expand Down
85 changes: 55 additions & 30 deletions source/adapters/level_zero/context.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -146,9 +146,9 @@ struct ur_context_handle_t_ : _ur_object {
// head.
//
// Cache of event pools to which host-visible events are added to.
std::vector<std::list<ze_event_pool_handle_t>> ZeEventPoolCache{4};
std::vector<std::list<ze_event_pool_handle_t>> ZeEventPoolCache{12};
std::vector<std::unordered_map<ze_device_handle_t, size_t>>
ZeEventPoolCacheDeviceMap{4};
ZeEventPoolCacheDeviceMap{12};

// This map will be used to determine if a pool is full or not
// by storing number of empty slots available in the pool.
Expand Down Expand Up @@ -199,48 +199,73 @@ struct ur_context_handle_t_ : _ur_object {
ur_result_t getFreeSlotInExistingOrNewPool(ze_event_pool_handle_t &, size_t &,
bool HostVisible,
bool ProfilingEnabled,
ur_device_handle_t Device);
ur_device_handle_t Device,
bool CounterBasedEventEnabled,
bool UsingImmCmdList);

// Get ur_event_handle_t from cache.
ur_event_handle_t getEventFromContextCache(bool HostVisible,
bool WithProfiling,
ur_device_handle_t Device);
ur_device_handle_t Device,
bool CounterBasedEventEnabled);

// Add ur_event_handle_t to cache.
void addEventToContextCache(ur_event_handle_t);

enum EventPoolCacheType {
HostVisibleCacheType,
HostInvisibleCacheType,
HostVisibleCounterBasedRegularCacheType,
HostInvisibleCounterBasedRegularCacheType,
HostVisibleCounterBasedImmediateCacheType,
HostInvisibleCounterBasedImmediateCacheType
};

std::list<ze_event_pool_handle_t> *
getZeEventPoolCache(bool HostVisible, bool WithProfiling,
bool CounterBasedEventEnabled, bool UsingImmediateCmdList,
ze_device_handle_t ZeDevice) {
if (HostVisible) {
if (ZeDevice) {
auto ZeEventPoolCacheMap = WithProfiling
? &ZeEventPoolCacheDeviceMap[0]
: &ZeEventPoolCacheDeviceMap[1];
if (ZeEventPoolCacheMap->find(ZeDevice) == ZeEventPoolCacheMap->end()) {
ZeEventPoolCache.emplace_back();
ZeEventPoolCacheMap->insert(
std::make_pair(ZeDevice, ZeEventPoolCache.size() - 1));
}
return &ZeEventPoolCache[(*ZeEventPoolCacheMap)[ZeDevice]];
} else {
return WithProfiling ? &ZeEventPoolCache[0] : &ZeEventPoolCache[1];
EventPoolCacheType CacheType;

calculateCacheIndex(HostVisible, CounterBasedEventEnabled,
UsingImmediateCmdList, CacheType);
if (ZeDevice) {
auto ZeEventPoolCacheMap =
WithProfiling ? &ZeEventPoolCacheDeviceMap[CacheType * 2]
: &ZeEventPoolCacheDeviceMap[CacheType * 2 + 1];
if (ZeEventPoolCacheMap->find(ZeDevice) == ZeEventPoolCacheMap->end()) {
ZeEventPoolCache.emplace_back();
ZeEventPoolCacheMap->insert(
std::make_pair(ZeDevice, ZeEventPoolCache.size() - 1));
}
return &ZeEventPoolCache[(*ZeEventPoolCacheMap)[ZeDevice]];
} else {
if (ZeDevice) {
auto ZeEventPoolCacheMap = WithProfiling
? &ZeEventPoolCacheDeviceMap[2]
: &ZeEventPoolCacheDeviceMap[3];
if (ZeEventPoolCacheMap->find(ZeDevice) == ZeEventPoolCacheMap->end()) {
ZeEventPoolCache.emplace_back();
ZeEventPoolCacheMap->insert(
std::make_pair(ZeDevice, ZeEventPoolCache.size() - 1));
}
return &ZeEventPoolCache[(*ZeEventPoolCacheMap)[ZeDevice]];
} else {
return WithProfiling ? &ZeEventPoolCache[2] : &ZeEventPoolCache[3];
}
return WithProfiling ? &ZeEventPoolCache[CacheType * 2]
: &ZeEventPoolCache[CacheType * 2 + 1];
}
}

ur_result_t calculateCacheIndex(bool HostVisible,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

how do you differentiate between counter based events for immediate and regular command lists?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

added immediate detection

bool CounterBasedEventEnabled,
bool UsingImmediateCmdList,
EventPoolCacheType &CacheType) {
if (CounterBasedEventEnabled && HostVisible && !UsingImmediateCmdList) {
CacheType = HostVisibleCounterBasedRegularCacheType;
} else if (CounterBasedEventEnabled && !HostVisible &&
!UsingImmediateCmdList) {
CacheType = HostInvisibleCounterBasedRegularCacheType;
} else if (CounterBasedEventEnabled && HostVisible &&
UsingImmediateCmdList) {
CacheType = HostVisibleCounterBasedImmediateCacheType;
} else if (CounterBasedEventEnabled && !HostVisible &&
UsingImmediateCmdList) {
CacheType = HostInvisibleCounterBasedImmediateCacheType;
} else if (!CounterBasedEventEnabled && HostVisible) {
CacheType = HostVisibleCacheType;
} else {
CacheType = HostInvisibleCacheType;
}
return UR_RESULT_SUCCESS;
}

// Decrement number of events living in the pool upon event destroy
Expand Down
29 changes: 18 additions & 11 deletions source/adapters/level_zero/event.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueEventsWait(
if (OutEvent) {
Queue->LastCommandEvent = reinterpret_cast<ur_event_handle_t>(*OutEvent);

ZE2UR_CALL(zeEventHostSignal, ((*OutEvent)->ZeEvent));
if (!(*OutEvent)->CounterBasedEventsEnabled)
ZE2UR_CALL(zeEventHostSignal, ((*OutEvent)->ZeEvent));
(*OutEvent)->Completed = true;
}
}
Expand Down Expand Up @@ -766,7 +767,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urExtEventCreate(
UR_CALL(EventCreate(Context, nullptr, false, true, Event));

(*Event)->RefCountExternal++;
ZE2UR_CALL(zeEventHostSignal, ((*Event)->ZeEvent));
if (!(*Event)->CounterBasedEventsEnabled)
ZE2UR_CALL(zeEventHostSignal, ((*Event)->ZeEvent));
return UR_RESULT_SUCCESS;
}

Expand All @@ -784,7 +786,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEventCreateWithNativeHandle(
UR_CALL(EventCreate(Context, nullptr, false, true, Event));

(*Event)->RefCountExternal++;
ZE2UR_CALL(zeEventHostSignal, ((*Event)->ZeEvent));
if (!(*Event)->CounterBasedEventsEnabled)
ZE2UR_CALL(zeEventHostSignal, ((*Event)->ZeEvent));
return UR_RESULT_SUCCESS;
}

Expand Down Expand Up @@ -1061,9 +1064,11 @@ ur_result_t CleanupCompletedEvent(ur_event_handle_t Event, bool QueueLocked,
//
ur_result_t EventCreate(ur_context_handle_t Context, ur_queue_handle_t Queue,
bool IsMultiDevice, bool HostVisible,
ur_event_handle_t *RetEvent) {
ur_event_handle_t *RetEvent,
bool CounterBasedEventEnabled) {

bool ProfilingEnabled = !Queue || Queue->isProfilingEnabled();
bool UsingImmediateCommandlists = !Queue || Queue->UsingImmCmdLists;

ur_device_handle_t Device = nullptr;

Expand All @@ -1072,7 +1077,7 @@ ur_result_t EventCreate(ur_context_handle_t Context, ur_queue_handle_t Queue,
}

if (auto CachedEvent = Context->getEventFromContextCache(
HostVisible, ProfilingEnabled, Device)) {
HostVisible, ProfilingEnabled, Device, CounterBasedEventEnabled)) {
*RetEvent = CachedEvent;
return UR_RESULT_SUCCESS;
}
Expand All @@ -1083,14 +1088,15 @@ ur_result_t EventCreate(ur_context_handle_t Context, ur_queue_handle_t Queue,
size_t Index = 0;

if (auto Res = Context->getFreeSlotInExistingOrNewPool(
ZeEventPool, Index, HostVisible, ProfilingEnabled, Device))
ZeEventPool, Index, HostVisible, ProfilingEnabled, Device,
CounterBasedEventEnabled, UsingImmediateCommandlists))
return Res;

ZeStruct<ze_event_desc_t> ZeEventDesc;
ZeEventDesc.index = Index;
ZeEventDesc.wait = 0;

if (HostVisible) {
if (HostVisible || CounterBasedEventEnabled) {
ZeEventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST;
} else {
//
Expand All @@ -1115,7 +1121,7 @@ ur_result_t EventCreate(ur_context_handle_t Context, ur_queue_handle_t Queue,
} catch (...) {
return UR_RESULT_ERROR_UNKNOWN;
}

(*RetEvent)->CounterBasedEventsEnabled = CounterBasedEventEnabled;
if (HostVisible)
(*RetEvent)->HostVisibleEvent =
reinterpret_cast<ur_event_handle_t>(*RetEvent);
Expand All @@ -1137,8 +1143,8 @@ ur_result_t ur_event_handle_t_::reset() {

if (!isHostVisible())
HostVisibleEvent = nullptr;

ZE2UR_CALL(zeEventHostReset, (ZeEvent));
if (!CounterBasedEventsEnabled)
ZE2UR_CALL(zeEventHostReset, (ZeEvent));
return UR_RESULT_SUCCESS;
}

Expand Down Expand Up @@ -1339,7 +1345,8 @@ ur_result_t _ur_ze_event_list_t::createAndRetainUrZeEventList(

zeCommandListAppendWaitOnEvents(ZeCommandList, 1u,
&EventList[I]->ZeEvent);
zeEventHostSignal(MultiDeviceZeEvent);
if (!MultiDeviceEvent->CounterBasedEventsEnabled)
zeEventHostSignal(MultiDeviceZeEvent);

UR_CALL(Queue->executeCommandList(CommandList, /* IsBlocking */ false,
/* OkToBatchCommand */ true));
Expand Down
5 changes: 4 additions & 1 deletion source/adapters/level_zero/event.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,8 @@ extern "C" {
ur_result_t urEventReleaseInternal(ur_event_handle_t Event);
ur_result_t EventCreate(ur_context_handle_t Context, ur_queue_handle_t Queue,
bool IsMultiDevice, bool HostVisible,
ur_event_handle_t *RetEvent);
ur_event_handle_t *RetEvent,
bool CounterBasedEventEnabled = false);
} // extern "C"

// This is an experimental option that allows to disable caching of events in
Expand Down Expand Up @@ -226,6 +227,8 @@ struct ur_event_handle_t_ : _ur_object {
// completion batch for this event. Only used for out-of-order immediate
// command lists.
std::optional<ur_completion_batch_it> completionBatch;
// Keeps track of whether we are using Counter-based Events.
bool CounterBasedEventsEnabled = false;
};

// Helper function to implement zeHostSynchronize.
Expand Down
8 changes: 5 additions & 3 deletions source/adapters/level_zero/memory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -944,7 +944,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemBufferMap(
}

// Signal this event
ZE2UR_CALL(zeEventHostSignal, (ZeEvent));
if (!(*Event)->CounterBasedEventsEnabled)
ZE2UR_CALL(zeEventHostSignal, (ZeEvent));
(*Event)->Completed = true;
return UR_RESULT_SUCCESS;
}
Expand Down Expand Up @@ -1078,8 +1079,9 @@ UR_APIEXPORT ur_result_t UR_APICALL urEnqueueMemUnmap(
if (Buffer->MapHostPtr)
memcpy(ZeHandleDst + MapInfo.Offset, MappedPtr, MapInfo.Size);

// Signal this event
ZE2UR_CALL(zeEventHostSignal, (ZeEvent));
// Signal this event if it is not using counter based events
if (!(*Event)->CounterBasedEventsEnabled)
ZE2UR_CALL(zeEventHostSignal, (ZeEvent));
(*Event)->Completed = true;
return UR_RESULT_SUCCESS;
}
Expand Down
8 changes: 8 additions & 0 deletions source/adapters/level_zero/platform.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,14 @@ ur_result_t ur_platform_handle_t_::initialize() {
ZeDriverModuleProgramExtensionFound = true;
}
}
// Check if extension is available for Counting Events.
if (strncmp(extension.name, ZE_EVENT_POOL_COUNTER_BASED_EXP_NAME,
strlen(ZE_EVENT_POOL_COUNTER_BASED_EXP_NAME) + 1) == 0) {
if (extension.version ==
ZE_EVENT_POOL_COUNTER_BASED_EXP_VERSION_CURRENT) {
ZeDriverEventPoolCountingEventsExtensionFound = true;
}
}
zeDriverExtensionMap[extension.name] = extension.version;
}

Expand Down
1 change: 1 addition & 0 deletions source/adapters/level_zero/platform.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ struct ur_platform_handle_t_ : public _ur_platform {
// Flags to tell whether various Level Zero platform extensions are available.
bool ZeDriverGlobalOffsetExtensionFound{false};
bool ZeDriverModuleProgramExtensionFound{false};
bool ZeDriverEventPoolCountingEventsExtensionFound{false};

// Cache UR devices for reuse
std::vector<std::unique_ptr<ur_device_handle_t_>> URDevicesCache;
Expand Down
Loading