-
Notifications
You must be signed in to change notification settings - Fork 797
[SYCL] Adds support for atomic fence capabilities device queries #8586
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 17 commits
435d760
d126996
fbb2998
8e07de6
8327cb0
46e315a
4bad01d
3a24574
eeb02e9
8829169
fc36ecc
0ce9beb
60132b7
36192ee
c1cf809
bf12a92
9548073
4c59edb
3ab7991
f19eb4c
9e7947c
806e054
c55f52a
b01fef5
30bc569
1571410
cda1cd3
45dd762
d4a5d37
918b923
6490a36
924b9b7
879c096
aea9486
2522df7
8d37dfb
2e1d848
38b415f
20f5e18
bef2e36
626b231
cc6166e
ffc9b92
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||||||
---|---|---|---|---|---|---|---|---|---|---|
|
@@ -77,9 +77,11 @@ | |||||||||
// 12.22 Add piGetDeviceAndHostTimer to query device wall-clock timestamp | ||||||||||
// 12.23 Added new piextEnqueueDeviceGlobalVariableWrite and | ||||||||||
// piextEnqueueDeviceGlobalVariableRead functions. | ||||||||||
// 12.24 Added PI_DEVICE_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES and | ||||||||||
// PI_DEVICE_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES for piDeviceGetInfo. | ||||||||||
|
||||||||||
#define _PI_H_VERSION_MAJOR 12 | ||||||||||
#define _PI_H_VERSION_MINOR 23 | ||||||||||
#define _PI_H_VERSION_MINOR 24 | ||||||||||
|
||||||||||
#define _PI_STRING_HELPER(a) #a | ||||||||||
#define _PI_CONCAT(a, b) _PI_STRING_HELPER(a.b) | ||||||||||
|
@@ -313,6 +315,8 @@ typedef enum { | |||||||||
PI_DEVICE_INFO_ATOMIC_64 = 0x10110, | ||||||||||
PI_DEVICE_INFO_ATOMIC_MEMORY_ORDER_CAPABILITIES = 0x10111, | ||||||||||
PI_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES = 0x11000, | ||||||||||
PI_DEVICE_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES = 0x10114, | ||||||||||
PI_DEVICE_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES = 0x10115, | ||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. could you rename these and use contiguous values from 0x1FFFF just below
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The file already has There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's up to you if you want to fix unrelated misses. |
||||||||||
PI_DEVICE_INFO_GPU_HW_THREADS_PER_EU = 0x10112, | ||||||||||
PI_DEVICE_INFO_BACKEND_VERSION = 0x10113, | ||||||||||
// Return whether bfloat16 math functions are supported by device | ||||||||||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1315,6 +1315,10 @@ pi_result cuda_piDeviceGetInfo(pi_device device, pi_device_info param_name, | |
return getInfo(param_value_size, param_value, param_value_size_ret, | ||
capabilities); | ||
} | ||
case PI_DEVICE_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES: | ||
case PI_DEVICE_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES: | ||
// There is no way to query this in the backend | ||
return PI_ERROR_INVALID_ARG_VALUE; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How is this error reported to the end user? Should we better use set a plugin specific error and have SYCL RT use piPluginGetLastError to retrieve/report it? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||
case PI_EXT_ONEAPI_DEVICE_INFO_BFLOAT16_MATH_FUNCTIONS: { | ||
int major = 0; | ||
sycl::detail::pi::assertion( | ||
|
Original file line number | Diff line number | Diff line change | ||||||
---|---|---|---|---|---|---|---|---|
|
@@ -5,6 +5,7 @@ | |||||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||||||||
// | ||||||||
//===-----------------------------------------------------------------===// | ||||||||
#include <sycl/detail/pi.h> | ||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. UR should not include PI
Suggested change
|
||||||||
|
||||||||
#include <algorithm> | ||||||||
#include <climits> | ||||||||
|
@@ -1164,6 +1165,26 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo( | |||||||
// bfloat16 math functions are not yet supported on Intel GPUs. | ||||||||
return ReturnValue(bool{false}); | ||||||||
} | ||||||||
case UR_DEVICE_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES: { | ||||||||
// There are no explicit restrictions in L0 programming guide, so assume all | ||||||||
// are supported | ||||||||
pi_memory_order_capabilities result = | ||||||||
PI_MEMORY_ORDER_RELAXED | PI_MEMORY_ORDER_ACQUIRE | | ||||||||
PI_MEMORY_ORDER_RELEASE | PI_MEMORY_ORDER_ACQ_REL | | ||||||||
PI_MEMORY_ORDER_SEQ_CST; | ||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You should write this in UR (extended as needed) and add a conversion in |
||||||||
|
||||||||
return ReturnValue(result); | ||||||||
} | ||||||||
case UR_DEVICE_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES: { | ||||||||
// There are no explicit restrictions in L0 programming guide, so assume all | ||||||||
// are supported | ||||||||
pi_memory_scope_capabilities result = | ||||||||
PI_MEMORY_SCOPE_WORK_ITEM | PI_MEMORY_SCOPE_SUB_GROUP | | ||||||||
PI_MEMORY_SCOPE_WORK_GROUP | PI_MEMORY_SCOPE_DEVICE | | ||||||||
PI_MEMORY_SCOPE_SYSTEM; | ||||||||
|
||||||||
return ReturnValue(result); | ||||||||
} | ||||||||
|
||||||||
// TODO: Implement. | ||||||||
case UR_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES: | ||||||||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -43,6 +43,10 @@ const int UR_EXT_DEVICE_INFO_FREE_MEMORY = UR_EXT_DEVICE_INFO_END - 13; | |
// const int ZER_EXT_DEVICE_INFO_DEVICE_ID = UR_EXT_DEVICE_INFO_END - 14; | ||
// const int ZER_EXT_DEVICE_INFO_IMAGE_MAX_ARRAY_SIZE = | ||
// UR_DEVICE_INFO_IMAGE_MAX_ARRAY_SIZE; | ||
const int UR_DEVICE_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES = | ||
UR_EXT_DEVICE_INFO_END - 16; | ||
const int UR_DEVICE_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES = | ||
UR_EXT_DEVICE_INFO_END - 17; | ||
Comment on lines
+46
to
+49
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @kbenzie: please take this change into UR |
||
|
||
const ur_device_info_t UR_EXT_DEVICE_INFO_OPENCL_C_VERSION = | ||
(ur_device_info_t)0x103D; | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -275,6 +275,19 @@ struct get_device_info_impl<std::vector<memory_order>, | |
} | ||
}; | ||
|
||
// Specialization for atomic_fence_order_capabilities, PI returns a bitfield | ||
template <> | ||
struct get_device_info_impl<std::vector<memory_order>, | ||
info::device::atomic_fence_order_capabilities> { | ||
static std::vector<memory_order> get(RT::PiDevice dev, const plugin &Plugin) { | ||
pi_memory_order_capabilities result; | ||
Plugin.call_nocheck<PiApiKind::piDeviceGetInfo>( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why |
||
dev, PiInfoCode<info::device::atomic_fence_order_capabilities>::value, | ||
sizeof(pi_memory_order_capabilities), &result, nullptr); | ||
return readMemoryOrderBitfield(result); | ||
} | ||
}; | ||
|
||
// Specialization for atomic_memory_scope_capabilities, PI returns a bitfield | ||
template <> | ||
struct get_device_info_impl<std::vector<memory_scope>, | ||
|
@@ -288,6 +301,19 @@ struct get_device_info_impl<std::vector<memory_scope>, | |
} | ||
}; | ||
|
||
// Specialization for atomic_fence_scope_capabilities, PI returns a bitfield | ||
template <> | ||
struct get_device_info_impl<std::vector<memory_scope>, | ||
info::device::atomic_fence_scope_capabilities> { | ||
static std::vector<memory_scope> get(RT::PiDevice dev, const plugin &Plugin) { | ||
pi_memory_scope_capabilities result; | ||
Plugin.call_nocheck<PiApiKind::piDeviceGetInfo>( | ||
dev, PiInfoCode<info::device::atomic_fence_scope_capabilities>::value, | ||
sizeof(pi_memory_scope_capabilities), &result, nullptr); | ||
return readMemoryScopeBitfield(result); | ||
} | ||
}; | ||
|
||
// Specialization for bf16 math functions | ||
template <> | ||
struct get_device_info_impl<bool, | ||
|
@@ -1005,13 +1031,27 @@ get_device_info_host<info::device::atomic_memory_order_capabilities>() { | |
memory_order::acq_rel, memory_order::seq_cst}; | ||
} | ||
|
||
template <> | ||
inline std::vector<memory_order> | ||
get_device_info_host<info::device::atomic_fence_order_capabilities>() { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I wonder when we will be able to stop specializing that for host device, because it doesn't really exists anymore |
||
return {memory_order::relaxed, memory_order::acquire, memory_order::release, | ||
memory_order::acq_rel}; | ||
} | ||
|
||
template <> | ||
inline std::vector<memory_scope> | ||
get_device_info_host<info::device::atomic_memory_scope_capabilities>() { | ||
return {memory_scope::work_item, memory_scope::sub_group, | ||
memory_scope::work_group, memory_scope::device, memory_scope::system}; | ||
} | ||
|
||
template <> | ||
inline std::vector<memory_scope> | ||
get_device_info_host<info::device::atomic_fence_scope_capabilities>() { | ||
return {memory_scope::work_item, memory_scope::sub_group, | ||
memory_scope::work_group, memory_scope::device, memory_scope::system}; | ||
} | ||
|
||
template <> | ||
inline bool | ||
get_device_info_host<info::device::ext_oneapi_bfloat16_math_functions>() { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@kbenzie: please take this change to Unified Runtime
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Tracking in oneapi-src/unified-runtime#399