Skip to content

Commit 68bc193

Browse files
authored
[UR] Optimize initialization for the ur_kernel_handle_t_::zeCommonProperties (#20351)
Remove lazy initialization for the ur_kernel_handle_t_::zeCommonProperties to avoid TLS access on the hot path.
1 parent 2390bb6 commit 68bc193

File tree

2 files changed

+13
-14
lines changed

2 files changed

+13
-14
lines changed

unified-runtime/source/adapters/level_zero/v2/kernel.cpp

Lines changed: 12 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -135,15 +135,14 @@ void ur_kernel_handle_t_::completeInitialization() {
135135
assert(nonEmptyKernelIt != deviceKernels.end());
136136
nonEmptyKernel = &nonEmptyKernelIt->value();
137137

138-
zeCommonProperties.Compute = [kernel = nonEmptyKernel](
139-
common_properties_t &props) {
140-
size_t size = 0;
141-
ZE_CALL_NOCHECK(zeKernelGetName, (kernel->hKernel.get(), &size, nullptr));
142-
props.name.resize(size);
143-
ZE_CALL_NOCHECK(zeKernelGetName,
144-
(kernel->hKernel.get(), &size, props.name.data()));
145-
props.numKernelArgs = kernel->zeKernelProperties->numKernelArgs;
146-
};
138+
size_t size = 0;
139+
ZE_CALL_NOCHECK(zeKernelGetName,
140+
(nonEmptyKernel->hKernel.get(), &size, nullptr));
141+
zeCommonProperties.name.resize(size);
142+
ZE_CALL_NOCHECK(zeKernelGetName, (nonEmptyKernel->hKernel.get(), &size,
143+
zeCommonProperties.name.data()));
144+
zeCommonProperties.numKernelArgs =
145+
nonEmptyKernel->zeKernelProperties->numKernelArgs;
147146
}
148147

149148
size_t ur_kernel_handle_t_::deviceIndex(ur_device_handle_t hDevice) const {
@@ -185,7 +184,7 @@ ur_kernel_handle_t_::getZeHandle(ur_device_handle_t hDevice) {
185184

186185
ur_kernel_handle_t_::common_properties_t
187186
ur_kernel_handle_t_::getCommonProperties() const {
188-
return zeCommonProperties.get();
187+
return zeCommonProperties;
189188
}
190189

191190
const ze_kernel_properties_t &
@@ -198,7 +197,7 @@ ur_result_t ur_kernel_handle_t_::setArgValue(
198197
uint32_t argIndex, size_t argSize,
199198
const ur_kernel_arg_value_properties_t * /*pProperties*/,
200199
const void *pArgValue) {
201-
if (argIndex > zeCommonProperties->numKernelArgs - 1) {
200+
if (argIndex > zeCommonProperties.numKernelArgs - 1) {
202201
return UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_INDEX;
203202
}
204203

@@ -315,7 +314,7 @@ ur_result_t ur_kernel_handle_t_::prepareForSubmission(
315314

316315
ur_result_t ur_kernel_handle_t_::addPendingMemoryAllocation(
317316
pending_memory_allocation_t allocation) {
318-
if (allocation.argIndex > zeCommonProperties->numKernelArgs - 1) {
317+
if (allocation.argIndex > zeCommonProperties.numKernelArgs - 1) {
319318
return UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_INDEX;
320319
}
321320

@@ -327,7 +326,7 @@ ur_result_t ur_kernel_handle_t_::addPendingMemoryAllocation(
327326
ur_result_t
328327
ur_kernel_handle_t_::addPendingPointerArgument(uint32_t argIndex,
329328
const void *pArgValue) {
330-
if (argIndex > zeCommonProperties->numKernelArgs - 1) {
329+
if (argIndex > zeCommonProperties.numKernelArgs - 1) {
331330
return UR_RESULT_ERROR_INVALID_KERNEL_ARGUMENT_INDEX;
332331
}
333332

unified-runtime/source/adapters/level_zero/v2/kernel.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ struct ur_kernel_handle_t_ : ur_object {
117117
std::vector<std::optional<ur_single_device_kernel_t>> deviceKernels;
118118

119119
// Cache of the common kernel properties.
120-
mutable ZeCache<common_properties_t> zeCommonProperties;
120+
common_properties_t zeCommonProperties;
121121

122122
// Index of the device in the deviceKernels vector.
123123
size_t deviceIndex(ur_device_handle_t hDevice) const;

0 commit comments

Comments
 (0)