From fa476e192dc5c9b2d68fe2f94731eb0472f64a26 Mon Sep 17 00:00:00 2001 From: Petr Vesely Date: Tue, 23 May 2023 15:03:58 +0100 Subject: [PATCH 1/6] [UR] Refactor urDevicePartition to use desc struct --- include/ur.py | 95 +++-- include/ur_api.h | 120 ++++--- include/ur_ddi.h | 3 +- scripts/core/PROG.rst | 25 +- scripts/core/common.yml | 2 + scripts/core/device.yml | 103 ++++-- source/adapters/null/ur_nullddi.cpp | 10 +- source/common/ur_params.hpp | 324 +++++++++++------- source/common/ur_pool_manager.hpp | 14 +- source/loader/layers/tracing/ur_trcddi.cpp | 13 +- source/loader/layers/validation/ur_valddi.cpp | 18 +- source/loader/ur_ldrddi.cpp | 10 +- source/loader/ur_libapi.cpp | 14 +- source/ur_api.cpp | 10 +- test/conformance/device/urDevicePartition.cpp | 87 +++-- test/conformance/device/urDeviceRelease.cpp | 6 +- test/conformance/device/urDeviceRetain.cpp | 5 +- .../testing/include/uur/fixtures.h | 7 +- test/conformance/testing/include/uur/utils.h | 15 +- test/conformance/testing/source/utils.cpp | 40 ++- test/unit/utils/params.cpp | 13 +- 21 files changed, 580 insertions(+), 354 deletions(-) diff --git a/include/ur.py b/include/ur.py index 3188bf8fba..b19a15c695 100644 --- a/include/ur.py +++ b/include/ur.py @@ -225,6 +225,7 @@ class ur_structure_type_v(IntEnum): PROGRAM_NATIVE_PROPERTIES = 23 ## ::ur_program_native_properties_t SAMPLER_NATIVE_PROPERTIES = 24 ## ::ur_sampler_native_properties_t QUEUE_NATIVE_DESC = 25 ## ::ur_queue_native_desc_t + DEVICE_PARTITION_DESC = 26 ## ::ur_device_partition_desc_t class ur_structure_type_t(c_int): def __str__(self): @@ -502,15 +503,15 @@ class ur_device_info_v(IntEnum): PREFERRED_INTEROP_USER_SYNC = 74 ## [::ur_bool_t] prefer user synchronization when sharing object with ## other API PARENT_DEVICE = 75 ## [::ur_device_handle_t] return parent device handle - PARTITION_PROPERTIES = 76 ## [::ur_device_partition_property_t[]] Returns an array of partition - ## types supported by the device + PARTITION_PROPERTIES = 76 ## [::ur_device_partition_t[]] Returns an array of partition types + ## supported by the device PARTITION_MAX_SUB_DEVICES = 77 ## [uint32_t] maximum number of sub-devices when the device is ## partitioned PARTITION_AFFINITY_DOMAIN = 78 ## [::ur_device_affinity_domain_flags_t] Returns a bit-field of the ## supported affinity domains for partitioning. ## If the device does not support any affinity domains, then 0 will be returned. - PARTITION_TYPE = 79 ## [::ur_device_partition_property_t[]] return an array of - ## ::ur_device_partition_property_t for properties specified in + PARTITION_TYPE = 79 ## [::ur_device_partition_desc_t[]] return an array of + ## ::ur_device_partition_desc_t for properties specified in ## ::urDevicePartition MAX_NUM_SUB_GROUPS = 80 ## [uint32_t] max number of sub groups SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS = 81 ## [::ur_bool_t] support sub group independent forward progress @@ -569,9 +570,33 @@ def __str__(self): ############################################################################### -## @brief Device partition property type -class ur_device_partition_property_t(c_intptr_t): - pass +## @brief Device affinity domain +class ur_device_affinity_domain_flags_v(IntEnum): + NUMA = UR_BIT(0) ## Split the device into sub devices comprised of compute units that + ## share a NUMA node. + L4_CACHE = UR_BIT(1) ## Split the device into sub devices comprised of compute units that + ## share a level 4 data cache. + L3_CACHE = UR_BIT(2) ## Split the device into sub devices comprised of compute units that + ## share a level 3 data cache. + L2_CACHE = UR_BIT(3) ## Split the device into sub devices comprised of compute units that + ## share a level 2 data cache. + L1_CACHE = UR_BIT(4) ## Split the device into sub devices comprised of compute units that + ## share a level 1 data cache. + NEXT_PARTITIONABLE = UR_BIT(5) ## Split the device along the next partitionable affinity domain. + ## The implementation shall find the first level along which the device + ## or sub device may be further subdivided in the order: + ## ::UR_DEVICE_AFFINITY_DOMAIN_FLAG_NUMA, + ## ::UR_DEVICE_AFFINITY_DOMAIN_FLAG_L4_CACHE, + ## ::UR_DEVICE_AFFINITY_DOMAIN_FLAG_L3_CACHE, + ## ::UR_DEVICE_AFFINITY_DOMAIN_FLAG_L2_CACHE, + ## ::UR_DEVICE_AFFINITY_DOMAIN_FLAG_L1_CACHE, + ## and partition the device into sub devices comprised of compute units + ## that share memory subsystems at this level. + +class ur_device_affinity_domain_flags_t(c_int): + def __str__(self): + return hex(self.value) + ############################################################################### ## @brief Partition Properties @@ -587,6 +612,29 @@ def __str__(self): return str(ur_device_partition_v(self.value)) +############################################################################### +## @brief Device partition value. +class ur_device_partition_value_t(Structure): + _fields_ = [ + ("equally", c_ulong), ## [in] Number of compute units per sub-device when partitioning with + ## ::UR_DEVICE_PARTITION_EQUALLY. + ("count", c_ulong), ## [in] Number of compute units in a sub-device when partitioning with + ## ::UR_DEVICE_PARTITION_BY_COUNTS. + ("affinity_domain", ur_device_affinity_domain_flags_t) ## [in] The affinity domain to partition for when partitioning with + ## $UR_DEVICE_PARTITION_BY_AFFINITY_DOMAIN. + ] + +############################################################################### +## @brief Device partition description +class ur_device_partition_desc_t(Structure): + _fields_ = [ + ("stype", ur_structure_type_t), ## [in] type of this structure, must be + ## ::UR_STRUCTURE_TYPE_DEVICE_PARTITION_DESC + ("pNext", c_void_p), ## [in][optional] pointer to extension-specific structure + ("type", ur_device_partition_t), ## [in] The partitioning type to be used. + ("value", ur_device_partition_value_t) ## [in] The paritioning value. + ] + ############################################################################### ## @brief FP capabilities class ur_device_fp_capability_flags_v(IntEnum): @@ -639,35 +687,6 @@ def __str__(self): return hex(self.value) -############################################################################### -## @brief Device affinity domain -class ur_device_affinity_domain_flags_v(IntEnum): - NUMA = UR_BIT(0) ## Split the device into sub devices comprised of compute units that - ## share a NUMA node. - L4_CACHE = UR_BIT(1) ## Split the device into sub devices comprised of compute units that - ## share a level 4 data cache. - L3_CACHE = UR_BIT(2) ## Split the device into sub devices comprised of compute units that - ## share a level 3 data cache. - L2_CACHE = UR_BIT(3) ## Split the device into sub devices comprised of compute units that - ## share a level 2 data cache. - L1_CACHE = UR_BIT(4) ## Split the device into sub devices comprised of compute units that - ## share a level 1 data cache. - NEXT_PARTITIONABLE = UR_BIT(5) ## Split the device along the next partitionable affinity domain. - ## The implementation shall find the first level along which the device - ## or sub device may be further subdivided in the order: - ## ::UR_DEVICE_AFFINITY_DOMAIN_FLAG_NUMA, - ## ::UR_DEVICE_AFFINITY_DOMAIN_FLAG_L4_CACHE, - ## ::UR_DEVICE_AFFINITY_DOMAIN_FLAG_L3_CACHE, - ## ::UR_DEVICE_AFFINITY_DOMAIN_FLAG_L2_CACHE, - ## ::UR_DEVICE_AFFINITY_DOMAIN_FLAG_L1_CACHE, - ## and partition the device into sub devices comprised of compute units - ## that share memory subsystems at this level. - -class ur_device_affinity_domain_flags_t(c_int): - def __str__(self): - return hex(self.value) - - ############################################################################### ## @brief Native device creation properties class ur_device_native_properties_t(Structure): @@ -2805,9 +2824,9 @@ class ur_usm_dditable_t(Structure): ############################################################################### ## @brief Function-pointer for urDevicePartition if __use_win_types: - _urDevicePartition_t = WINFUNCTYPE( ur_result_t, ur_device_handle_t, POINTER(ur_device_partition_property_t), c_ulong, POINTER(ur_device_handle_t), POINTER(c_ulong) ) + _urDevicePartition_t = WINFUNCTYPE( ur_result_t, ur_device_handle_t, POINTER(ur_device_partition_desc_t), c_size_t, c_ulong, POINTER(ur_device_handle_t), POINTER(c_ulong) ) else: - _urDevicePartition_t = CFUNCTYPE( ur_result_t, ur_device_handle_t, POINTER(ur_device_partition_property_t), c_ulong, POINTER(ur_device_handle_t), POINTER(c_ulong) ) + _urDevicePartition_t = CFUNCTYPE( ur_result_t, ur_device_handle_t, POINTER(ur_device_partition_desc_t), c_size_t, c_ulong, POINTER(ur_device_handle_t), POINTER(c_ulong) ) ############################################################################### ## @brief Function-pointer for urDeviceSelectBinary diff --git a/include/ur_api.h b/include/ur_api.h index 1aed46bc17..80735d430f 100644 --- a/include/ur_api.h +++ b/include/ur_api.h @@ -249,6 +249,7 @@ typedef enum ur_structure_type_t { UR_STRUCTURE_TYPE_PROGRAM_NATIVE_PROPERTIES = 23, ///< ::ur_program_native_properties_t UR_STRUCTURE_TYPE_SAMPLER_NATIVE_PROPERTIES = 24, ///< ::ur_sampler_native_properties_t UR_STRUCTURE_TYPE_QUEUE_NATIVE_DESC = 25, ///< ::ur_queue_native_desc_t + UR_STRUCTURE_TYPE_DEVICE_PARTITION_DESC = 26, ///< ::ur_device_partition_desc_t /// @cond UR_STRUCTURE_TYPE_FORCE_UINT32 = 0x7fffffff /// @endcond @@ -851,15 +852,15 @@ typedef enum ur_device_info_t { UR_DEVICE_INFO_PREFERRED_INTEROP_USER_SYNC = 74, ///< [::ur_bool_t] prefer user synchronization when sharing object with ///< other API UR_DEVICE_INFO_PARENT_DEVICE = 75, ///< [::ur_device_handle_t] return parent device handle - UR_DEVICE_INFO_PARTITION_PROPERTIES = 76, ///< [::ur_device_partition_property_t[]] Returns an array of partition - ///< types supported by the device + UR_DEVICE_INFO_PARTITION_PROPERTIES = 76, ///< [::ur_device_partition_t[]] Returns an array of partition types + ///< supported by the device UR_DEVICE_INFO_PARTITION_MAX_SUB_DEVICES = 77, ///< [uint32_t] maximum number of sub-devices when the device is ///< partitioned UR_DEVICE_INFO_PARTITION_AFFINITY_DOMAIN = 78, ///< [::ur_device_affinity_domain_flags_t] Returns a bit-field of the ///< supported affinity domains for partitioning. ///< If the device does not support any affinity domains, then 0 will be returned. - UR_DEVICE_INFO_PARTITION_TYPE = 79, ///< [::ur_device_partition_property_t[]] return an array of - ///< ::ur_device_partition_property_t for properties specified in + UR_DEVICE_INFO_PARTITION_TYPE = 79, ///< [::ur_device_partition_desc_t[]] return an array of + ///< ::ur_device_partition_desc_t for properties specified in ///< ::urDevicePartition UR_DEVICE_INFO_MAX_NUM_SUB_GROUPS = 80, ///< [uint32_t] max number of sub groups UR_DEVICE_INFO_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS = 81, ///< [::ur_bool_t] support sub group independent forward progress @@ -1010,8 +1011,36 @@ urDeviceRelease( ); /////////////////////////////////////////////////////////////////////////////// -/// @brief Device partition property type -typedef intptr_t ur_device_partition_property_t; +/// @brief Device affinity domain +typedef uint32_t ur_device_affinity_domain_flags_t; +typedef enum ur_device_affinity_domain_flag_t { + UR_DEVICE_AFFINITY_DOMAIN_FLAG_NUMA = UR_BIT(0), ///< Split the device into sub devices comprised of compute units that + ///< share a NUMA node. + UR_DEVICE_AFFINITY_DOMAIN_FLAG_L4_CACHE = UR_BIT(1), ///< Split the device into sub devices comprised of compute units that + ///< share a level 4 data cache. + UR_DEVICE_AFFINITY_DOMAIN_FLAG_L3_CACHE = UR_BIT(2), ///< Split the device into sub devices comprised of compute units that + ///< share a level 3 data cache. + UR_DEVICE_AFFINITY_DOMAIN_FLAG_L2_CACHE = UR_BIT(3), ///< Split the device into sub devices comprised of compute units that + ///< share a level 2 data cache. + UR_DEVICE_AFFINITY_DOMAIN_FLAG_L1_CACHE = UR_BIT(4), ///< Split the device into sub devices comprised of compute units that + ///< share a level 1 data cache. + UR_DEVICE_AFFINITY_DOMAIN_FLAG_NEXT_PARTITIONABLE = UR_BIT(5), ///< Split the device along the next partitionable affinity domain. + ///< The implementation shall find the first level along which the device + ///< or sub device may be further subdivided in the order: + ///< ::UR_DEVICE_AFFINITY_DOMAIN_FLAG_NUMA, + ///< ::UR_DEVICE_AFFINITY_DOMAIN_FLAG_L4_CACHE, + ///< ::UR_DEVICE_AFFINITY_DOMAIN_FLAG_L3_CACHE, + ///< ::UR_DEVICE_AFFINITY_DOMAIN_FLAG_L2_CACHE, + ///< ::UR_DEVICE_AFFINITY_DOMAIN_FLAG_L1_CACHE, + ///< and partition the device into sub devices comprised of compute units + ///< that share memory subsystems at this level. + /// @cond + UR_DEVICE_AFFINITY_DOMAIN_FLAG_FORCE_UINT32 = 0x7fffffff + /// @endcond + +} ur_device_affinity_domain_flag_t; +/// @brief Bit Mask for validating ur_device_affinity_domain_flags_t +#define UR_DEVICE_AFFINITY_DOMAIN_FLAGS_MASK 0xffffffc0 /////////////////////////////////////////////////////////////////////////////// /// @brief Partition Properties @@ -1027,6 +1056,29 @@ typedef enum ur_device_partition_t { } ur_device_partition_t; +/////////////////////////////////////////////////////////////////////////////// +/// @brief Device partition value. +typedef union ur_device_partition_value_t { + uint32_t equally; ///< [in] Number of compute units per sub-device when partitioning with + ///< ::UR_DEVICE_PARTITION_EQUALLY. + uint32_t count; ///< [in] Number of compute units in a sub-device when partitioning with + ///< ::UR_DEVICE_PARTITION_BY_COUNTS. + ur_device_affinity_domain_flags_t affinity_domain; ///< [in] The affinity domain to partition for when partitioning with + ///< $UR_DEVICE_PARTITION_BY_AFFINITY_DOMAIN. + +} ur_device_partition_value_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Device partition description +typedef struct ur_device_partition_desc_t { + ur_structure_type_t stype; ///< [in] type of this structure, must be + ///< ::UR_STRUCTURE_TYPE_DEVICE_PARTITION_DESC + const void *pNext; ///< [in][optional] pointer to extension-specific structure + ur_device_partition_t type; ///< [in] The partitioning type to be used. + ur_device_partition_value_t value; ///< [in] The paritioning value. + +} ur_device_partition_desc_t; + /////////////////////////////////////////////////////////////////////////////// /// @brief Partition the device into sub-devices /// @@ -1051,18 +1103,23 @@ typedef enum ur_device_partition_t { /// + `NULL == hDevice` /// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER /// + `NULL == pProperties` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `::UR_DEVICE_PARTITION_BY_CSLICE < pProperties->type` /// - ::UR_RESULT_ERROR_DEVICE_PARTITION_FAILED /// - ::UR_RESULT_ERROR_INVALID_DEVICE_PARTITION_COUNT +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// + `DescCount == 0` UR_APIEXPORT ur_result_t UR_APICALL urDevicePartition( - ur_device_handle_t hDevice, ///< [in] handle of the device to partition. - const ur_device_partition_property_t *pProperties, ///< [in] null-terminated array of <$_device_partition_t enum, value> pairs. - uint32_t NumDevices, ///< [in] the number of sub-devices. - ur_device_handle_t *phSubDevices, ///< [out][optional][range(0, NumDevices)] array of handle of devices. - ///< If NumDevices is less than the number of sub-devices available, then - ///< the function shall only retrieve that number of sub-devices. - uint32_t *pNumDevicesRet ///< [out][optional] pointer to the number of sub-devices the device can be - ///< partitioned into according to the partitioning property. + ur_device_handle_t hDevice, ///< [in] handle of the device to partition. + const ur_device_partition_desc_t *pProperties, ///< [in] Array of partition descriptors. + size_t DescCount, ///< [in] Number of descriptors pointed to by `pProperties`. + uint32_t NumDevices, ///< [in] the number of sub-devices. + ur_device_handle_t *phSubDevices, ///< [out][optional][range(0, NumDevices)] array of handle of devices. + ///< If NumDevices is less than the number of sub-devices available, then + ///< the function shall only retrieve that number of sub-devices. + uint32_t *pNumDevicesRet ///< [out][optional] pointer to the number of sub-devices the device can be + ///< partitioned into according to the partitioning property. ); /////////////////////////////////////////////////////////////////////////////// @@ -1159,38 +1216,6 @@ typedef enum ur_device_exec_capability_flag_t { /// @brief Bit Mask for validating ur_device_exec_capability_flags_t #define UR_DEVICE_EXEC_CAPABILITY_FLAGS_MASK 0xfffffffc -/////////////////////////////////////////////////////////////////////////////// -/// @brief Device affinity domain -typedef uint32_t ur_device_affinity_domain_flags_t; -typedef enum ur_device_affinity_domain_flag_t { - UR_DEVICE_AFFINITY_DOMAIN_FLAG_NUMA = UR_BIT(0), ///< Split the device into sub devices comprised of compute units that - ///< share a NUMA node. - UR_DEVICE_AFFINITY_DOMAIN_FLAG_L4_CACHE = UR_BIT(1), ///< Split the device into sub devices comprised of compute units that - ///< share a level 4 data cache. - UR_DEVICE_AFFINITY_DOMAIN_FLAG_L3_CACHE = UR_BIT(2), ///< Split the device into sub devices comprised of compute units that - ///< share a level 3 data cache. - UR_DEVICE_AFFINITY_DOMAIN_FLAG_L2_CACHE = UR_BIT(3), ///< Split the device into sub devices comprised of compute units that - ///< share a level 2 data cache. - UR_DEVICE_AFFINITY_DOMAIN_FLAG_L1_CACHE = UR_BIT(4), ///< Split the device into sub devices comprised of compute units that - ///< share a level 1 data cache. - UR_DEVICE_AFFINITY_DOMAIN_FLAG_NEXT_PARTITIONABLE = UR_BIT(5), ///< Split the device along the next partitionable affinity domain. - ///< The implementation shall find the first level along which the device - ///< or sub device may be further subdivided in the order: - ///< ::UR_DEVICE_AFFINITY_DOMAIN_FLAG_NUMA, - ///< ::UR_DEVICE_AFFINITY_DOMAIN_FLAG_L4_CACHE, - ///< ::UR_DEVICE_AFFINITY_DOMAIN_FLAG_L3_CACHE, - ///< ::UR_DEVICE_AFFINITY_DOMAIN_FLAG_L2_CACHE, - ///< ::UR_DEVICE_AFFINITY_DOMAIN_FLAG_L1_CACHE, - ///< and partition the device into sub devices comprised of compute units - ///< that share memory subsystems at this level. - /// @cond - UR_DEVICE_AFFINITY_DOMAIN_FLAG_FORCE_UINT32 = 0x7fffffff - /// @endcond - -} ur_device_affinity_domain_flag_t; -/// @brief Bit Mask for validating ur_device_affinity_domain_flags_t -#define UR_DEVICE_AFFINITY_DOMAIN_FLAGS_MASK 0xffffffc0 - /////////////////////////////////////////////////////////////////////////////// /// @brief Return platform native device handle. /// @@ -7171,7 +7196,8 @@ typedef struct ur_device_release_params_t { /// allowing the callback the ability to modify the parameter's value typedef struct ur_device_partition_params_t { ur_device_handle_t *phDevice; - const ur_device_partition_property_t **ppProperties; + const ur_device_partition_desc_t **ppProperties; + size_t *pDescCount; uint32_t *pNumDevices; ur_device_handle_t **pphSubDevices; uint32_t **ppNumDevicesRet; diff --git a/include/ur_ddi.h b/include/ur_ddi.h index 2e2d421e98..c301ebe471 100644 --- a/include/ur_ddi.h +++ b/include/ur_ddi.h @@ -1394,7 +1394,8 @@ typedef ur_result_t(UR_APICALL *ur_pfnDeviceRelease_t)( /// @brief Function-pointer for urDevicePartition typedef ur_result_t(UR_APICALL *ur_pfnDevicePartition_t)( ur_device_handle_t, - const ur_device_partition_property_t *, + const ur_device_partition_desc_t *, + size_t, uint32_t, ur_device_handle_t *, uint32_t *); diff --git a/scripts/core/PROG.rst b/scripts/core/PROG.rst index 786c4eb720..81aa472039 100644 --- a/scripts/core/PROG.rst +++ b/scripts/core/PROG.rst @@ -117,29 +117,32 @@ fixed part of the parent device, which can explicitly be programmed individually .. parsed-literal:: ${x}_device_handle_t hDevice; - ${x}_device_partition_property_t properties[] = { - ${X}_DEVICE_PARTITION_BY_AFFINITY_DOMAIN, - ${X}_DEVICE_AFFINITY_DOMAIN_FLAG_NEXT_PARTITIONABLE, - 0}; + ${x}_device_partition_desc_t properties; + properties.stype = ${X}_STRUCTURE_TYPE_DEVICE_PARTITION_DESC; + properties.pNext = nullptr; + properties.type = ${X}_DEVICE_PARTITION_BY_AFFINITY_DOMAIN; + properties.value.affinity_domain = ${X}_DEVICE_AFFINITY_DOMAIN_FLAG_NEXT_PARTITIONABLE; uint32_t count = 0; std::vector<${x}_device_handle_t> subDevices; - ${x}DevicePartition(hDevice, &properties, &count, nullptr, nullptr); + ${x}DevicePartition(hDevice, &properties, 1, 0, nullptr, &count); if (count > 0) { subDevices.resize(count); - ${x}DevicePartition(Device, &properties, &count, &subDevices.data(), nullptr); + ${x}DevicePartition(Device, &properties, 1, count, &subDevices.data(), nullptr); } The returned sub-devices may be requested for further partitioning into sub-sub-devices, and so on. -An implementation would return "0" in the count if no further partitioning is supported. +An implementation will return "0" in the count if no further partitioning is supported. .. parsed-literal:: - uint32_t count = 1; - ${x}_device_handle_t hSubSubDevice; - ${x}DevicePartition(subDevices[0], properties, &count, &hSubSubDevice, nullptr); - + uint32_t count; + ${x}DevicePartition(subDevices[0], &properties, 1, 0, nullptr, &count); + if(count == 0){ + // no further partitioning allowed + } + Contexts ======== diff --git a/scripts/core/common.yml b/scripts/core/common.yml index 124bd825bd..d4d275cbb6 100644 --- a/scripts/core/common.yml +++ b/scripts/core/common.yml @@ -314,6 +314,8 @@ etors: desc: $x_sampler_native_properties_t - name: QUEUE_NATIVE_DESC desc: $x_queue_native_desc_t + - name: DEVICE_PARTITION_DESC + desc: $x_device_partition_desc_t --- #-------------------------------------------------------------------------- type: struct desc: "Base for all properties types" diff --git a/scripts/core/device.yml b/scripts/core/device.yml index e09916b8fc..55b63cc96d 100644 --- a/scripts/core/device.yml +++ b/scripts/core/device.yml @@ -301,7 +301,7 @@ etors: - name: PARENT_DEVICE desc: "[$x_device_handle_t] return parent device handle" - name: PARTITION_PROPERTIES - desc: "[$x_device_partition_property_t[]] Returns an array of partition types supported by the device" + desc: "[$x_device_partition_t[]] Returns an array of partition types supported by the device" - name: PARTITION_MAX_SUB_DEVICES desc: "[uint32_t] maximum number of sub-devices when the device is partitioned" - name: PARTITION_AFFINITY_DOMAIN @@ -309,7 +309,7 @@ etors: [$x_device_affinity_domain_flags_t] Returns a bit-field of the supported affinity domains for partitioning. If the device does not support any affinity domains, then 0 will be returned. - name: PARTITION_TYPE - desc: "[$x_device_partition_property_t[]] return an array of $x_device_partition_property_t for properties specified in $xDevicePartition" + desc: "[$x_device_partition_desc_t[]] return an array of $x_device_partition_desc_t for properties specified in $xDevicePartition" - name: MAX_NUM_SUB_GROUPS desc: "[uint32_t] max number of sub groups" - name: SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS @@ -458,10 +458,34 @@ params: desc: | [in] handle of the device to release. --- #-------------------------------------------------------------------------- -type: typedef -desc: "Device partition property type" -name: $x_device_partition_property_t -value: intptr_t +type: enum +desc: "Device affinity domain" +class: $xDevice +name: $x_device_affinity_domain_flags_t +etors: + - name: NUMA + desc: "Split the device into sub devices comprised of compute units that share a NUMA node." + value: "$X_BIT(0)" + - name: L4_CACHE + desc: "Split the device into sub devices comprised of compute units that share a level 4 data cache." + value: "$X_BIT(1)" + - name: L3_CACHE + desc: "Split the device into sub devices comprised of compute units that share a level 3 data cache." + value: "$X_BIT(2)" + - name: L2_CACHE + desc: "Split the device into sub devices comprised of compute units that share a level 2 data cache." + value: "$X_BIT(3)" + - name: L1_CACHE + desc: "Split the device into sub devices comprised of compute units that share a level 1 data cache." + value: "$X_BIT(4)" + - name: NEXT_PARTITIONABLE + desc: | + Split the device along the next partitionable affinity domain. + The implementation shall find the first level along which the device + or sub device may be further subdivided in the order: + $X_DEVICE_AFFINITY_DOMAIN_FLAG_NUMA, $X_DEVICE_AFFINITY_DOMAIN_FLAG_L4_CACHE, $X_DEVICE_AFFINITY_DOMAIN_FLAG_L3_CACHE, $X_DEVICE_AFFINITY_DOMAIN_FLAG_L2_CACHE, $X_DEVICE_AFFINITY_DOMAIN_FLAG_L1_CACHE, + and partition the device into sub devices comprised of compute units that share memory subsystems at this level. + value: "$X_BIT(5)" --- #-------------------------------------------------------------------------- type: enum desc: "Partition Properties" @@ -483,6 +507,34 @@ etors: desc: "Partition by c-slice" value: "0x1089" --- #-------------------------------------------------------------------------- +type: union +desc: "Device partition value." +name: $x_device_partition_value_t +class: $xDevice +members: + - type: uint32_t + name: equally + desc: "[in] Number of compute units per sub-device when partitioning with $X_DEVICE_PARTITION_EQUALLY." + - type: uint32_t + name: count + desc: "[in] Number of compute units in a sub-device when partitioning with $X_DEVICE_PARTITION_BY_COUNTS." + - type: $x_device_affinity_domain_flags_t + name: affinity_domain + desc: "[in] The affinity domain to partition for when partitioning with $UR_DEVICE_PARTITION_BY_AFFINITY_DOMAIN." +--- #-------------------------------------------------------------------------- +type: struct +desc: "Device partition description" +name: $x_device_partition_desc_t +base: $x_base_desc_t +class: $xDevice +members: + - type: $x_device_partition_t + name: type + desc: "[in] The partitioning type to be used." + - type: $x_device_partition_value_t + name: value + desc: "[in] The paritioning value." +--- #-------------------------------------------------------------------------- type: function desc: "Partition the device into sub-devices" class: $xDevice @@ -501,10 +553,14 @@ params: name: hDevice desc: | [in] handle of the device to partition. - - type: const $x_device_partition_property_t* + - type: const $x_device_partition_desc_t* name: pProperties desc: | - [in] null-terminated array of <$_device_partition_t enum, value> pairs. + [in] Array of partition descriptors. + - type: size_t + name: DescCount + desc: | + [in] Number of descriptors pointed to by `pProperties`. - type: "uint32_t" name: NumDevices desc: | @@ -521,6 +577,8 @@ params: returns: - $X_RESULT_ERROR_DEVICE_PARTITION_FAILED - $X_RESULT_ERROR_INVALID_DEVICE_PARTITION_COUNT + - $X_RESULT_ERROR_INVALID_SIZE: + - "`DescCount == 0`" --- #-------------------------------------------------------------------------- type: function desc: "Selects the most appropriate device binary based on runtime information and the IR characteristics." @@ -623,35 +681,6 @@ etors: desc: "Support native kernel execution" value: "$X_BIT(1)" --- #-------------------------------------------------------------------------- -type: enum -desc: "Device affinity domain" -class: $xDevice -name: $x_device_affinity_domain_flags_t -etors: - - name: NUMA - desc: "Split the device into sub devices comprised of compute units that share a NUMA node." - value: "$X_BIT(0)" - - name: L4_CACHE - desc: "Split the device into sub devices comprised of compute units that share a level 4 data cache." - value: "$X_BIT(1)" - - name: L3_CACHE - desc: "Split the device into sub devices comprised of compute units that share a level 3 data cache." - value: "$X_BIT(2)" - - name: L2_CACHE - desc: "Split the device into sub devices comprised of compute units that share a level 2 data cache." - value: "$X_BIT(3)" - - name: L1_CACHE - desc: "Split the device into sub devices comprised of compute units that share a level 1 data cache." - value: "$X_BIT(4)" - - name: NEXT_PARTITIONABLE - desc: | - Split the device along the next partitionable affinity domain. - The implementation shall find the first level along which the device - or sub device may be further subdivided in the order: - $X_DEVICE_AFFINITY_DOMAIN_FLAG_NUMA, $X_DEVICE_AFFINITY_DOMAIN_FLAG_L4_CACHE, $X_DEVICE_AFFINITY_DOMAIN_FLAG_L3_CACHE, $X_DEVICE_AFFINITY_DOMAIN_FLAG_L2_CACHE, $X_DEVICE_AFFINITY_DOMAIN_FLAG_L1_CACHE, - and partition the device into sub devices comprised of compute units that share memory subsystems at this level. - value: "$X_BIT(5)" ---- #-------------------------------------------------------------------------- type: class desc: "C++ wrapper for a device" name: $xDevice diff --git a/source/adapters/null/ur_nullddi.cpp b/source/adapters/null/ur_nullddi.cpp index f624d97545..0968a88ccd 100644 --- a/source/adapters/null/ur_nullddi.cpp +++ b/source/adapters/null/ur_nullddi.cpp @@ -353,8 +353,10 @@ __urdlllocal ur_result_t UR_APICALL urDeviceRelease( /// @brief Intercept function for urDevicePartition __urdlllocal ur_result_t UR_APICALL urDevicePartition( ur_device_handle_t hDevice, ///< [in] handle of the device to partition. - const ur_device_partition_property_t * - pProperties, ///< [in] null-terminated array of <$_device_partition_t enum, value> pairs. + const ur_device_partition_desc_t + *pProperties, ///< [in] Array of partition descriptors. + size_t + DescCount, ///< [in] Number of descriptors pointed to by `pProperties`. uint32_t NumDevices, ///< [in] the number of sub-devices. ur_device_handle_t * phSubDevices, ///< [out][optional][range(0, NumDevices)] array of handle of devices. @@ -369,8 +371,8 @@ __urdlllocal ur_result_t UR_APICALL urDevicePartition( // if the driver has created a custom function, then call it instead of using the generic path auto pfnPartition = d_context.urDdiTable.Device.pfnPartition; if (nullptr != pfnPartition) { - result = pfnPartition(hDevice, pProperties, NumDevices, phSubDevices, - pNumDevicesRet); + result = pfnPartition(hDevice, pProperties, DescCount, NumDevices, + phSubDevices, pNumDevicesRet); } else { // generic implementation for (size_t i = 0; (nullptr != phSubDevices) && (i < NumDevices); ++i) { diff --git a/source/common/ur_params.hpp b/source/common/ur_params.hpp index 6547f89461..c9aa1bbde0 100644 --- a/source/common/ur_params.hpp +++ b/source/common/ur_params.hpp @@ -36,6 +36,10 @@ template <> inline void serializeTagged(std::ostream &os, const void *ptr, ur_device_info_t value, size_t size); +template <> +inline void serializeFlag(std::ostream &os, + uint32_t flag); + template <> inline void serializeFlag(std::ostream &os, uint32_t flag); @@ -44,10 +48,6 @@ template <> inline void serializeFlag(std::ostream &os, uint32_t flag); -template <> -inline void serializeFlag(std::ostream &os, - uint32_t flag); - template <> inline void serializeFlag(std::ostream &os, uint32_t flag); @@ -179,8 +179,14 @@ inline std::ostream &operator<<(std::ostream &os, const struct ur_device_binary_t params); inline std::ostream &operator<<(std::ostream &os, enum ur_device_type_t value); inline std::ostream &operator<<(std::ostream &os, enum ur_device_info_t value); +inline std::ostream &operator<<(std::ostream &os, + enum ur_device_affinity_domain_flag_t value); inline std::ostream &operator<<(std::ostream &os, enum ur_device_partition_t value); +inline std::ostream &operator<<(std::ostream &os, + const union ur_device_partition_value_t params); +inline std::ostream &operator<<(std::ostream &os, + const struct ur_device_partition_desc_t params); inline std::ostream &operator<<(std::ostream &os, enum ur_device_fp_capability_flag_t value); inline std::ostream &operator<<(std::ostream &os, @@ -189,8 +195,6 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_device_local_mem_type_t value); inline std::ostream &operator<<(std::ostream &os, enum ur_device_exec_capability_flag_t value); -inline std::ostream &operator<<(std::ostream &os, - enum ur_device_affinity_domain_flag_t value); inline std::ostream & operator<<(std::ostream &os, const struct ur_device_native_properties_t params); inline std::ostream &operator<<(std::ostream &os, @@ -712,6 +716,10 @@ inline std::ostream &operator<<(std::ostream &os, case UR_STRUCTURE_TYPE_QUEUE_NATIVE_DESC: os << "UR_STRUCTURE_TYPE_QUEUE_NATIVE_DESC"; break; + + case UR_STRUCTURE_TYPE_DEVICE_PARTITION_DESC: + os << "UR_STRUCTURE_TYPE_DEVICE_PARTITION_DESC"; + break; default: os << "unknown enumerator"; break; @@ -875,6 +883,12 @@ inline void serializeStruct(std::ostream &os, const void *ptr) { (const ur_queue_native_desc_t *)ptr; ur_params::serializePtr(os, pstruct); } break; + + case UR_STRUCTURE_TYPE_DEVICE_PARTITION_DESC: { + const ur_device_partition_desc_t *pstruct = + (const ur_device_partition_desc_t *)ptr; + ur_params::serializePtr(os, pstruct); + } break; default: os << "unknown enumerator"; break; @@ -2734,10 +2748,9 @@ inline void serializeTagged(std::ostream &os, const void *ptr, case UR_DEVICE_INFO_PARTITION_PROPERTIES: { - const ur_device_partition_property_t *tptr = - (const ur_device_partition_property_t *)ptr; + const ur_device_partition_t *tptr = (const ur_device_partition_t *)ptr; os << "{"; - size_t nelems = size / sizeof(ur_device_partition_property_t); + size_t nelems = size / sizeof(ur_device_partition_t); for (size_t i = 0; i < nelems; ++i) { if (i != 0) { os << ", "; @@ -2780,10 +2793,10 @@ inline void serializeTagged(std::ostream &os, const void *ptr, case UR_DEVICE_INFO_PARTITION_TYPE: { - const ur_device_partition_property_t *tptr = - (const ur_device_partition_property_t *)ptr; + const ur_device_partition_desc_t *tptr = + (const ur_device_partition_desc_t *)ptr; os << "{"; - size_t nelems = size / sizeof(ur_device_partition_property_t); + size_t nelems = size / sizeof(ur_device_partition_desc_t); for (size_t i = 0; i < nelems; ++i) { if (i != 0) { os << ", "; @@ -3265,6 +3278,123 @@ inline void serializeTagged(std::ostream &os, const void *ptr, } } } // namespace ur_params +inline std::ostream &operator<<(std::ostream &os, + enum ur_device_affinity_domain_flag_t value) { + switch (value) { + + case UR_DEVICE_AFFINITY_DOMAIN_FLAG_NUMA: + os << "UR_DEVICE_AFFINITY_DOMAIN_FLAG_NUMA"; + break; + + case UR_DEVICE_AFFINITY_DOMAIN_FLAG_L4_CACHE: + os << "UR_DEVICE_AFFINITY_DOMAIN_FLAG_L4_CACHE"; + break; + + case UR_DEVICE_AFFINITY_DOMAIN_FLAG_L3_CACHE: + os << "UR_DEVICE_AFFINITY_DOMAIN_FLAG_L3_CACHE"; + break; + + case UR_DEVICE_AFFINITY_DOMAIN_FLAG_L2_CACHE: + os << "UR_DEVICE_AFFINITY_DOMAIN_FLAG_L2_CACHE"; + break; + + case UR_DEVICE_AFFINITY_DOMAIN_FLAG_L1_CACHE: + os << "UR_DEVICE_AFFINITY_DOMAIN_FLAG_L1_CACHE"; + break; + + case UR_DEVICE_AFFINITY_DOMAIN_FLAG_NEXT_PARTITIONABLE: + os << "UR_DEVICE_AFFINITY_DOMAIN_FLAG_NEXT_PARTITIONABLE"; + break; + default: + os << "unknown enumerator"; + break; + } + return os; +} +namespace ur_params { + +template <> +inline void serializeFlag(std::ostream &os, + uint32_t flag) { + uint32_t val = flag; + bool first = true; + + if ((val & UR_DEVICE_AFFINITY_DOMAIN_FLAG_NUMA) == + (uint32_t)UR_DEVICE_AFFINITY_DOMAIN_FLAG_NUMA) { + val ^= (uint32_t)UR_DEVICE_AFFINITY_DOMAIN_FLAG_NUMA; + if (!first) { + os << " | "; + } else { + first = false; + } + os << UR_DEVICE_AFFINITY_DOMAIN_FLAG_NUMA; + } + + if ((val & UR_DEVICE_AFFINITY_DOMAIN_FLAG_L4_CACHE) == + (uint32_t)UR_DEVICE_AFFINITY_DOMAIN_FLAG_L4_CACHE) { + val ^= (uint32_t)UR_DEVICE_AFFINITY_DOMAIN_FLAG_L4_CACHE; + if (!first) { + os << " | "; + } else { + first = false; + } + os << UR_DEVICE_AFFINITY_DOMAIN_FLAG_L4_CACHE; + } + + if ((val & UR_DEVICE_AFFINITY_DOMAIN_FLAG_L3_CACHE) == + (uint32_t)UR_DEVICE_AFFINITY_DOMAIN_FLAG_L3_CACHE) { + val ^= (uint32_t)UR_DEVICE_AFFINITY_DOMAIN_FLAG_L3_CACHE; + if (!first) { + os << " | "; + } else { + first = false; + } + os << UR_DEVICE_AFFINITY_DOMAIN_FLAG_L3_CACHE; + } + + if ((val & UR_DEVICE_AFFINITY_DOMAIN_FLAG_L2_CACHE) == + (uint32_t)UR_DEVICE_AFFINITY_DOMAIN_FLAG_L2_CACHE) { + val ^= (uint32_t)UR_DEVICE_AFFINITY_DOMAIN_FLAG_L2_CACHE; + if (!first) { + os << " | "; + } else { + first = false; + } + os << UR_DEVICE_AFFINITY_DOMAIN_FLAG_L2_CACHE; + } + + if ((val & UR_DEVICE_AFFINITY_DOMAIN_FLAG_L1_CACHE) == + (uint32_t)UR_DEVICE_AFFINITY_DOMAIN_FLAG_L1_CACHE) { + val ^= (uint32_t)UR_DEVICE_AFFINITY_DOMAIN_FLAG_L1_CACHE; + if (!first) { + os << " | "; + } else { + first = false; + } + os << UR_DEVICE_AFFINITY_DOMAIN_FLAG_L1_CACHE; + } + + if ((val & UR_DEVICE_AFFINITY_DOMAIN_FLAG_NEXT_PARTITIONABLE) == + (uint32_t)UR_DEVICE_AFFINITY_DOMAIN_FLAG_NEXT_PARTITIONABLE) { + val ^= (uint32_t)UR_DEVICE_AFFINITY_DOMAIN_FLAG_NEXT_PARTITIONABLE; + if (!first) { + os << " | "; + } else { + first = false; + } + os << UR_DEVICE_AFFINITY_DOMAIN_FLAG_NEXT_PARTITIONABLE; + } + if (val != 0) { + std::bitset<32> bits(val); + if (!first) { + os << " | "; + } + os << "unknown bit flags " << bits; + } else if (first) { + os << "0"; + } +} +} // namespace ur_params inline std::ostream &operator<<(std::ostream &os, enum ur_device_partition_t value) { switch (value) { @@ -3294,6 +3424,54 @@ inline std::ostream &operator<<(std::ostream &os, } return os; } +inline std::ostream & +operator<<(std::ostream &os, const union ur_device_partition_value_t params) { + os << "(union ur_device_partition_value_t){"; + + os << ".equally = "; + + os << (params.equally); + + os << ", "; + os << ".count = "; + + os << (params.count); + + os << ", "; + os << ".affinity_domain = "; + + ur_params::serializeFlag( + os, (params.affinity_domain)); + + os << "}"; + return os; +} +inline std::ostream & +operator<<(std::ostream &os, const struct ur_device_partition_desc_t params) { + os << "(struct ur_device_partition_desc_t){"; + + os << ".stype = "; + + os << (params.stype); + + os << ", "; + os << ".pNext = "; + + ur_params::serializeStruct(os, (params.pNext)); + + os << ", "; + os << ".type = "; + + os << (params.type); + + os << ", "; + os << ".value = "; + + os << (params.value); + + os << "}"; + return os; +} inline std::ostream &operator<<(std::ostream &os, enum ur_device_fp_capability_flag_t value) { switch (value) { @@ -3541,123 +3719,6 @@ inline void serializeFlag(std::ostream &os, } } } // namespace ur_params -inline std::ostream &operator<<(std::ostream &os, - enum ur_device_affinity_domain_flag_t value) { - switch (value) { - - case UR_DEVICE_AFFINITY_DOMAIN_FLAG_NUMA: - os << "UR_DEVICE_AFFINITY_DOMAIN_FLAG_NUMA"; - break; - - case UR_DEVICE_AFFINITY_DOMAIN_FLAG_L4_CACHE: - os << "UR_DEVICE_AFFINITY_DOMAIN_FLAG_L4_CACHE"; - break; - - case UR_DEVICE_AFFINITY_DOMAIN_FLAG_L3_CACHE: - os << "UR_DEVICE_AFFINITY_DOMAIN_FLAG_L3_CACHE"; - break; - - case UR_DEVICE_AFFINITY_DOMAIN_FLAG_L2_CACHE: - os << "UR_DEVICE_AFFINITY_DOMAIN_FLAG_L2_CACHE"; - break; - - case UR_DEVICE_AFFINITY_DOMAIN_FLAG_L1_CACHE: - os << "UR_DEVICE_AFFINITY_DOMAIN_FLAG_L1_CACHE"; - break; - - case UR_DEVICE_AFFINITY_DOMAIN_FLAG_NEXT_PARTITIONABLE: - os << "UR_DEVICE_AFFINITY_DOMAIN_FLAG_NEXT_PARTITIONABLE"; - break; - default: - os << "unknown enumerator"; - break; - } - return os; -} -namespace ur_params { - -template <> -inline void serializeFlag(std::ostream &os, - uint32_t flag) { - uint32_t val = flag; - bool first = true; - - if ((val & UR_DEVICE_AFFINITY_DOMAIN_FLAG_NUMA) == - (uint32_t)UR_DEVICE_AFFINITY_DOMAIN_FLAG_NUMA) { - val ^= (uint32_t)UR_DEVICE_AFFINITY_DOMAIN_FLAG_NUMA; - if (!first) { - os << " | "; - } else { - first = false; - } - os << UR_DEVICE_AFFINITY_DOMAIN_FLAG_NUMA; - } - - if ((val & UR_DEVICE_AFFINITY_DOMAIN_FLAG_L4_CACHE) == - (uint32_t)UR_DEVICE_AFFINITY_DOMAIN_FLAG_L4_CACHE) { - val ^= (uint32_t)UR_DEVICE_AFFINITY_DOMAIN_FLAG_L4_CACHE; - if (!first) { - os << " | "; - } else { - first = false; - } - os << UR_DEVICE_AFFINITY_DOMAIN_FLAG_L4_CACHE; - } - - if ((val & UR_DEVICE_AFFINITY_DOMAIN_FLAG_L3_CACHE) == - (uint32_t)UR_DEVICE_AFFINITY_DOMAIN_FLAG_L3_CACHE) { - val ^= (uint32_t)UR_DEVICE_AFFINITY_DOMAIN_FLAG_L3_CACHE; - if (!first) { - os << " | "; - } else { - first = false; - } - os << UR_DEVICE_AFFINITY_DOMAIN_FLAG_L3_CACHE; - } - - if ((val & UR_DEVICE_AFFINITY_DOMAIN_FLAG_L2_CACHE) == - (uint32_t)UR_DEVICE_AFFINITY_DOMAIN_FLAG_L2_CACHE) { - val ^= (uint32_t)UR_DEVICE_AFFINITY_DOMAIN_FLAG_L2_CACHE; - if (!first) { - os << " | "; - } else { - first = false; - } - os << UR_DEVICE_AFFINITY_DOMAIN_FLAG_L2_CACHE; - } - - if ((val & UR_DEVICE_AFFINITY_DOMAIN_FLAG_L1_CACHE) == - (uint32_t)UR_DEVICE_AFFINITY_DOMAIN_FLAG_L1_CACHE) { - val ^= (uint32_t)UR_DEVICE_AFFINITY_DOMAIN_FLAG_L1_CACHE; - if (!first) { - os << " | "; - } else { - first = false; - } - os << UR_DEVICE_AFFINITY_DOMAIN_FLAG_L1_CACHE; - } - - if ((val & UR_DEVICE_AFFINITY_DOMAIN_FLAG_NEXT_PARTITIONABLE) == - (uint32_t)UR_DEVICE_AFFINITY_DOMAIN_FLAG_NEXT_PARTITIONABLE) { - val ^= (uint32_t)UR_DEVICE_AFFINITY_DOMAIN_FLAG_NEXT_PARTITIONABLE; - if (!first) { - os << " | "; - } else { - first = false; - } - os << UR_DEVICE_AFFINITY_DOMAIN_FLAG_NEXT_PARTITIONABLE; - } - if (val != 0) { - std::bitset<32> bits(val); - if (!first) { - os << " | "; - } - os << "unknown bit flags " << bits; - } else if (first) { - os << "0"; - } -} -} // namespace ur_params inline std::ostream & operator<<(std::ostream &os, const struct ur_device_native_properties_t params) { @@ -11674,6 +11735,11 @@ operator<<(std::ostream &os, ur_params::serializePtr(os, *(params->ppProperties)); + os << ", "; + os << ".DescCount = "; + + os << *(params->pDescCount); + os << ", "; os << ".NumDevices = "; diff --git a/source/common/ur_pool_manager.hpp b/source/common/ur_pool_manager.hpp index e40343acf2..15d99b6798 100644 --- a/source/common/ur_pool_manager.hpp +++ b/source/common/ur_pool_manager.hpp @@ -35,26 +35,28 @@ struct pool_descriptor { static inline std::pair> urGetSubDevices(ur_device_handle_t hDevice) { - size_t nComputeUnits; + uint32_t nComputeUnits; auto ret = urDeviceGetInfo(hDevice, UR_DEVICE_INFO_MAX_COMPUTE_UNITS, sizeof(nComputeUnits), &nComputeUnits, nullptr); if (ret != UR_RESULT_SUCCESS) { return {ret, {}}; } - ur_device_partition_property_t properties[] = { - UR_DEVICE_PARTITION_EQUALLY, - static_cast(nComputeUnits), 0}; + ur_device_partition_desc_t properties; + properties.stype = UR_STRUCTURE_TYPE_DEVICE_PARTITION_DESC; + properties.pNext = nullptr; + properties.type = UR_DEVICE_PARTITION_EQUALLY; + properties.value.equally = nComputeUnits; // Get the number of devices that will be created uint32_t deviceCount; - ret = urDevicePartition(hDevice, properties, 0, nullptr, &deviceCount); + ret = urDevicePartition(hDevice, &properties, 1, 0, nullptr, &deviceCount); if (ret != UR_RESULT_SUCCESS) { return {ret, {}}; } std::vector sub_devices(deviceCount); - ret = urDevicePartition(hDevice, properties, + ret = urDevicePartition(hDevice, &properties, 1, static_cast(sub_devices.size()), sub_devices.data(), nullptr); if (ret != UR_RESULT_SUCCESS) { diff --git a/source/loader/layers/tracing/ur_trcddi.cpp b/source/loader/layers/tracing/ur_trcddi.cpp index 55e0efa63c..e1ef41353e 100644 --- a/source/loader/layers/tracing/ur_trcddi.cpp +++ b/source/loader/layers/tracing/ur_trcddi.cpp @@ -395,8 +395,10 @@ __urdlllocal ur_result_t UR_APICALL urDeviceRelease( /// @brief Intercept function for urDevicePartition __urdlllocal ur_result_t UR_APICALL urDevicePartition( ur_device_handle_t hDevice, ///< [in] handle of the device to partition. - const ur_device_partition_property_t * - pProperties, ///< [in] null-terminated array of <$_device_partition_t enum, value> pairs. + const ur_device_partition_desc_t + *pProperties, ///< [in] Array of partition descriptors. + size_t + DescCount, ///< [in] Number of descriptors pointed to by `pProperties`. uint32_t NumDevices, ///< [in] the number of sub-devices. ur_device_handle_t * phSubDevices, ///< [out][optional][range(0, NumDevices)] array of handle of devices. @@ -412,13 +414,14 @@ __urdlllocal ur_result_t UR_APICALL urDevicePartition( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } - ur_device_partition_params_t params = {&hDevice, &pProperties, &NumDevices, + ur_device_partition_params_t params = {&hDevice, &pProperties, + &DescCount, &NumDevices, &phSubDevices, &pNumDevicesRet}; uint64_t instance = context.notify_begin(UR_FUNCTION_DEVICE_PARTITION, "urDevicePartition", ¶ms); - ur_result_t result = pfnPartition(hDevice, pProperties, NumDevices, - phSubDevices, pNumDevicesRet); + ur_result_t result = pfnPartition(hDevice, pProperties, DescCount, + NumDevices, phSubDevices, pNumDevicesRet); context.notify_end(UR_FUNCTION_DEVICE_PARTITION, "urDevicePartition", ¶ms, &result, instance); diff --git a/source/loader/layers/validation/ur_valddi.cpp b/source/loader/layers/validation/ur_valddi.cpp index c06ad34ea6..2b5f5130d1 100644 --- a/source/loader/layers/validation/ur_valddi.cpp +++ b/source/loader/layers/validation/ur_valddi.cpp @@ -415,8 +415,10 @@ __urdlllocal ur_result_t UR_APICALL urDeviceRelease( /// @brief Intercept function for urDevicePartition __urdlllocal ur_result_t UR_APICALL urDevicePartition( ur_device_handle_t hDevice, ///< [in] handle of the device to partition. - const ur_device_partition_property_t * - pProperties, ///< [in] null-terminated array of <$_device_partition_t enum, value> pairs. + const ur_device_partition_desc_t + *pProperties, ///< [in] Array of partition descriptors. + size_t + DescCount, ///< [in] Number of descriptors pointed to by `pProperties`. uint32_t NumDevices, ///< [in] the number of sub-devices. ur_device_handle_t * phSubDevices, ///< [out][optional][range(0, NumDevices)] array of handle of devices. @@ -440,10 +442,18 @@ __urdlllocal ur_result_t UR_APICALL urDevicePartition( if (NULL == pProperties) { return UR_RESULT_ERROR_INVALID_NULL_POINTER; } + + if (UR_DEVICE_PARTITION_BY_CSLICE < pProperties->type) { + return UR_RESULT_ERROR_INVALID_ENUMERATION; + } + + if (DescCount == 0) { + return UR_RESULT_ERROR_INVALID_SIZE; + } } - ur_result_t result = pfnPartition(hDevice, pProperties, NumDevices, - phSubDevices, pNumDevicesRet); + ur_result_t result = pfnPartition(hDevice, pProperties, DescCount, + NumDevices, phSubDevices, pNumDevicesRet); return result; } diff --git a/source/loader/ur_ldrddi.cpp b/source/loader/ur_ldrddi.cpp index 0e5ca08186..92586b07bf 100644 --- a/source/loader/ur_ldrddi.cpp +++ b/source/loader/ur_ldrddi.cpp @@ -461,8 +461,10 @@ __urdlllocal ur_result_t UR_APICALL urDeviceRelease( /// @brief Intercept function for urDevicePartition __urdlllocal ur_result_t UR_APICALL urDevicePartition( ur_device_handle_t hDevice, ///< [in] handle of the device to partition. - const ur_device_partition_property_t * - pProperties, ///< [in] null-terminated array of <$_device_partition_t enum, value> pairs. + const ur_device_partition_desc_t + *pProperties, ///< [in] Array of partition descriptors. + size_t + DescCount, ///< [in] Number of descriptors pointed to by `pProperties`. uint32_t NumDevices, ///< [in] the number of sub-devices. ur_device_handle_t * phSubDevices, ///< [out][optional][range(0, NumDevices)] array of handle of devices. @@ -485,8 +487,8 @@ __urdlllocal ur_result_t UR_APICALL urDevicePartition( hDevice = reinterpret_cast(hDevice)->handle; // forward to device-platform - result = pfnPartition(hDevice, pProperties, NumDevices, phSubDevices, - pNumDevicesRet); + result = pfnPartition(hDevice, pProperties, DescCount, NumDevices, + phSubDevices, pNumDevicesRet); if (UR_RESULT_SUCCESS != result) { return result; diff --git a/source/loader/ur_libapi.cpp b/source/loader/ur_libapi.cpp index 0992924fe3..cfdd0869dc 100644 --- a/source/loader/ur_libapi.cpp +++ b/source/loader/ur_libapi.cpp @@ -548,12 +548,18 @@ ur_result_t UR_APICALL urDeviceRelease( /// + `NULL == hDevice` /// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER /// + `NULL == pProperties` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `::UR_DEVICE_PARTITION_BY_CSLICE < pProperties->type` /// - ::UR_RESULT_ERROR_DEVICE_PARTITION_FAILED /// - ::UR_RESULT_ERROR_INVALID_DEVICE_PARTITION_COUNT +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// + `DescCount == 0` ur_result_t UR_APICALL urDevicePartition( ur_device_handle_t hDevice, ///< [in] handle of the device to partition. - const ur_device_partition_property_t * - pProperties, ///< [in] null-terminated array of <$_device_partition_t enum, value> pairs. + const ur_device_partition_desc_t + *pProperties, ///< [in] Array of partition descriptors. + size_t + DescCount, ///< [in] Number of descriptors pointed to by `pProperties`. uint32_t NumDevices, ///< [in] the number of sub-devices. ur_device_handle_t * phSubDevices, ///< [out][optional][range(0, NumDevices)] array of handle of devices. @@ -568,8 +574,8 @@ ur_result_t UR_APICALL urDevicePartition( return UR_RESULT_ERROR_UNINITIALIZED; } - return pfnPartition(hDevice, pProperties, NumDevices, phSubDevices, - pNumDevicesRet); + return pfnPartition(hDevice, pProperties, DescCount, NumDevices, + phSubDevices, pNumDevicesRet); } catch (...) { return exceptionToResult(std::current_exception()); } diff --git a/source/ur_api.cpp b/source/ur_api.cpp index 9670e7f02a..774fa03edd 100644 --- a/source/ur_api.cpp +++ b/source/ur_api.cpp @@ -454,12 +454,18 @@ ur_result_t UR_APICALL urDeviceRelease( /// + `NULL == hDevice` /// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER /// + `NULL == pProperties` +/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION +/// + `::UR_DEVICE_PARTITION_BY_CSLICE < pProperties->type` /// - ::UR_RESULT_ERROR_DEVICE_PARTITION_FAILED /// - ::UR_RESULT_ERROR_INVALID_DEVICE_PARTITION_COUNT +/// - ::UR_RESULT_ERROR_INVALID_SIZE +/// + `DescCount == 0` ur_result_t UR_APICALL urDevicePartition( ur_device_handle_t hDevice, ///< [in] handle of the device to partition. - const ur_device_partition_property_t * - pProperties, ///< [in] null-terminated array of <$_device_partition_t enum, value> pairs. + const ur_device_partition_desc_t + *pProperties, ///< [in] Array of partition descriptors. + size_t + DescCount, ///< [in] Number of descriptors pointed to by `pProperties`. uint32_t NumDevices, ///< [in] the number of sub-devices. ur_device_handle_t * phSubDevices, ///< [out][optional][range(0, NumDevices)] array of handle of devices. diff --git a/test/conformance/device/urDevicePartition.cpp b/test/conformance/device/urDevicePartition.cpp index e7a15b1c39..53fed0df28 100644 --- a/test/conformance/device/urDevicePartition.cpp +++ b/test/conformance/device/urDevicePartition.cpp @@ -1,6 +1,7 @@ // Copyright (C) 2022-2023 Intel Corporation // SPDX-License-Identifier: MIT #include +#include using urDevicePartitionTest = uur::urAllDevicesTest; @@ -28,19 +29,20 @@ TEST_F(urDevicePartitionTest, PartitionEquallySuccess) { ASSERT_NO_FATAL_FAILURE(getNumberComputeUnits(device, n_compute_units)); for (uint32_t i = 1; i < n_compute_units; ++i) { - ur_device_partition_property_t properties[] = { - UR_DEVICE_PARTITION_EQUALLY, i, 0}; + ur_device_partition_desc_t properties = + uur::makePartitionEquallyDesc(i); // Get the number of devices that will be created uint32_t n_devices; - ASSERT_SUCCESS( - urDevicePartition(device, properties, 0, nullptr, &n_devices)); + ASSERT_SUCCESS(urDevicePartition(device, &properties, 1, 0, nullptr, + &n_devices)); ASSERT_NE(n_devices, 0); std::vector sub_devices(n_devices); - ASSERT_SUCCESS(urDevicePartition( - device, properties, static_cast(sub_devices.size()), - sub_devices.data(), nullptr)); + ASSERT_SUCCESS( + urDevicePartition(device, &properties, 1, + static_cast(sub_devices.size()), + sub_devices.data(), nullptr)); for (auto sub_device : sub_devices) { ASSERT_NE(sub_device, nullptr); ASSERT_SUCCESS(urDeviceRelease(sub_device)); @@ -77,42 +79,46 @@ TEST_F(urDevicePartitionTest, PartitionByCounts) { uint32_t n_cu_across_sub_devices; for (const auto Combination : combinations) { - std::vector properties = { - UR_DEVICE_PARTITION_BY_COUNTS}; + std::vector properties; switch (Combination) { case Combination::ONE: { n_cu_across_sub_devices = 1; - properties.insert(properties.end(), {1, 0}); + properties.push_back(uur::makePartitionByCountsDesc(1)); break; } case Combination::HALF: { n_cu_across_sub_devices = (n_cu_in_device / 2) * 2; - properties.insert(properties.end(), - {n_cu_in_device / 2, n_cu_in_device / 2, 0}); + properties.push_back( + uur::makePartitionByCountsDesc(n_cu_in_device / 2)); + properties.push_back( + uur::makePartitionByCountsDesc(n_cu_in_device / 2)); break; } case Combination::ALL_MINUS_ONE: { n_cu_across_sub_devices = n_cu_in_device - 1; - properties.insert(properties.end(), {n_cu_in_device - 1, 0}); + properties.push_back( + uur::makePartitionByCountsDesc(n_cu_in_device - 1)); break; } case Combination::ALL: { n_cu_across_sub_devices = n_cu_in_device; - properties.insert(properties.end(), {n_cu_in_device, 0}); + properties.push_back( + uur::makePartitionByCountsDesc(n_cu_in_device)); break; } } // Get the number of devices that will be created uint32_t n_devices; - ASSERT_SUCCESS(urDevicePartition(device, properties.data(), 0, - nullptr, &n_devices)); - ASSERT_EQ(n_devices, properties.size() - 2); + ASSERT_SUCCESS(urDevicePartition(device, properties.data(), + properties.size(), 0, nullptr, + &n_devices)); + ASSERT_EQ(n_devices, properties.size()); std::vector sub_devices(n_devices); ASSERT_SUCCESS( - urDevicePartition(device, properties.data(), + urDevicePartition(device, properties.data(), properties.size(), static_cast(sub_devices.size()), sub_devices.data(), nullptr)); @@ -159,20 +165,19 @@ TEST_P(urDevicePartitionAffinityDomainTest, PartitionByAffinityDomain) { continue; } - std::vector properties = { - UR_DEVICE_PARTITION_BY_AFFINITY_DOMAIN, flag, 0}; + ur_device_partition_desc_t properties = + uur::makePartitionByAffinityDomain(flag); // Get the number of devices that will be created uint32_t n_devices = 0; - ASSERT_SUCCESS(urDevicePartition(device, properties.data(), 0, nullptr, - &n_devices)); + ASSERT_SUCCESS( + urDevicePartition(device, &properties, 1, 0, nullptr, &n_devices)); ASSERT_NE(n_devices, 0); std::vector sub_devices(n_devices); - ASSERT_SUCCESS( - urDevicePartition(device, properties.data(), - static_cast(sub_devices.size()), - sub_devices.data(), nullptr)); + ASSERT_SUCCESS(urDevicePartition( + device, &properties, 1, static_cast(sub_devices.size()), + sub_devices.data(), nullptr)); for (auto sub_device : sub_devices) { ASSERT_NE(sub_device, nullptr); @@ -197,12 +202,11 @@ INSTANTIATE_TEST_SUITE_P( }); TEST_F(urDevicePartitionTest, InvalidNullHandleDevice) { - ur_device_partition_property_t props[] = {UR_DEVICE_PARTITION_EQUALLY, 1, - 0}; + ur_device_partition_desc_t props = uur::makePartitionEquallyDesc(1); ur_device_handle_t sub_device = nullptr; ASSERT_EQ_RESULT( UR_RESULT_ERROR_INVALID_NULL_HANDLE, - urDevicePartition(nullptr, props, 1, &sub_device, nullptr)); + urDevicePartition(nullptr, &props, 1, 1, &sub_device, nullptr)); } TEST_F(urDevicePartitionTest, InvalidNullPointerProperties) { @@ -210,7 +214,17 @@ TEST_F(urDevicePartitionTest, InvalidNullPointerProperties) { ur_device_handle_t sub_device = nullptr; ASSERT_EQ_RESULT( UR_RESULT_ERROR_INVALID_NULL_POINTER, - urDevicePartition(device, nullptr, 1, &sub_device, nullptr)); + urDevicePartition(device, nullptr, 1, 1, &sub_device, nullptr)); + } +} + +TEST_F(urDevicePartitionTest, InvalidSizeDescCount) { + ur_device_partition_desc_t props = uur::makePartitionEquallyDesc(1); + for (auto device : devices) { + ur_device_handle_t sub_device = nullptr; + ASSERT_EQ_RESULT( + UR_RESULT_ERROR_INVALID_SIZE, + urDevicePartition(device, &props, 0, 1, &sub_device, nullptr)); } } @@ -228,21 +242,22 @@ TEST_F(urDevicePartitionTest, SuccessSubSet) { ASSERT_NO_FATAL_FAILURE(getNumberComputeUnits(device, n_compute_units)); // partition for 1 compute unit per sub-device - ur_device_partition_property_t properties[] = { - UR_DEVICE_PARTITION_EQUALLY, 1, 0}; + ur_device_partition_desc_t properties = + uur::makePartitionEquallyDesc(1); // Get the number of devices that will be created uint32_t n_devices; ASSERT_SUCCESS( - urDevicePartition(device, properties, 0, nullptr, &n_devices)); + urDevicePartition(device, &properties, 1, 0, nullptr, &n_devices)); ASSERT_NE(n_devices, 0); // We can request only a subset of these devices from [0, n_devices] for (size_t subset = 0; subset <= n_devices; ++subset) { std::vector sub_devices(subset); - ASSERT_SUCCESS(urDevicePartition( - device, properties, static_cast(sub_devices.size()), - sub_devices.data(), nullptr)); + ASSERT_SUCCESS( + urDevicePartition(device, &properties, 1, + static_cast(sub_devices.size()), + sub_devices.data(), nullptr)); for (auto sub_device : sub_devices) { ASSERT_NE(sub_device, nullptr); ASSERT_SUCCESS(urDeviceRelease(sub_device)); diff --git a/test/conformance/device/urDeviceRelease.cpp b/test/conformance/device/urDeviceRelease.cpp index dde0e0aa2a..57c185ec21 100644 --- a/test/conformance/device/urDeviceRelease.cpp +++ b/test/conformance/device/urDeviceRelease.cpp @@ -25,12 +25,12 @@ TEST_F(urDeviceReleaseTest, Success) { TEST_F(urDeviceReleaseTest, SuccessSubdevices) { for (auto device : devices) { - ur_device_partition_property_t properties[] = { - UR_DEVICE_PARTITION_BY_COUNTS, 1, 0}; + ur_device_partition_desc_t properties = + uur::makePartitionEquallyDesc(1); ur_device_handle_t sub_device; ASSERT_SUCCESS( - urDevicePartition(device, properties, 1, &sub_device, nullptr)); + urDevicePartition(device, &properties, 1, 1, &sub_device, nullptr)); ASSERT_SUCCESS(urDeviceRetain(sub_device)); diff --git a/test/conformance/device/urDeviceRetain.cpp b/test/conformance/device/urDeviceRetain.cpp index d354f4a199..0f4ccfce35 100644 --- a/test/conformance/device/urDeviceRetain.cpp +++ b/test/conformance/device/urDeviceRetain.cpp @@ -26,12 +26,11 @@ TEST_F(urDeviceRetainTest, Success) { TEST_F(urDeviceRetainTest, SuccessSubdevices) { for (auto device : devices) { - ur_device_partition_property_t properties[] = { - UR_DEVICE_PARTITION_BY_COUNTS, 1, 0}; + auto properties = uur::makePartitionEquallyDesc(1); ur_device_handle_t sub_device; ASSERT_SUCCESS( - urDevicePartition(device, properties, 1, &sub_device, nullptr)); + urDevicePartition(device, &properties, 1, 1, &sub_device, nullptr)); uint32_t prevRefCount = 0; ASSERT_SUCCESS(uur::GetObjectReferenceCount(sub_device, prevRefCount)); diff --git a/test/conformance/testing/include/uur/fixtures.h b/test/conformance/testing/include/uur/fixtures.h index 5aa58095e0..1f5cc85dec 100644 --- a/test/conformance/testing/include/uur/fixtures.h +++ b/test/conformance/testing/include/uur/fixtures.h @@ -43,10 +43,9 @@ GetDevices(ur_platform_handle_t platform) { return {true, devices}; } -inline bool -hasDevicePartitionSupport(ur_device_handle_t device, - const ur_device_partition_property_t property) { - std::vector properties; +inline bool hasDevicePartitionSupport(ur_device_handle_t device, + const ur_device_partition_t property) { + std::vector properties; uur::GetDevicePartitionProperties(device, properties); return std::find(properties.begin(), properties.end(), property) != properties.end(); diff --git a/test/conformance/testing/include/uur/utils.h b/test/conformance/testing/include/uur/utils.h index 3857a0a4bd..9b8744c209 100644 --- a/test/conformance/testing/include/uur/utils.h +++ b/test/conformance/testing/include/uur/utils.h @@ -308,9 +308,9 @@ ur_result_t GetDevicePreferredInteropUserSync(ur_device_handle_t device, bool &sync); ur_result_t GetDeviceParentDevice(ur_device_handle_t device, ur_device_handle_t &parent); -ur_result_t GetDevicePartitionProperties( - ur_device_handle_t device, - std::vector &properties); +ur_result_t +GetDevicePartitionProperties(ur_device_handle_t device, + std::vector &properties); ur_result_t GetDevicePartitionMaxSubDevices(ur_device_handle_t device, uint32_t &max_sub_devices); ur_result_t @@ -318,7 +318,7 @@ GetDevicePartitionAffinityDomainFlags(ur_device_handle_t device, ur_device_affinity_domain_flags_t &flags); ur_result_t GetDevicePartitionType(ur_device_handle_t device, - std::vector &type); + std::vector &type); ur_result_t GetDeviceMaxNumberSubGroups(ur_device_handle_t device, uint32_t &max_sub_groups); ur_result_t @@ -365,6 +365,13 @@ ur_result_t GetDeviceMaxComputeQueueIndices(ur_device_handle_t device, ur_result_t GetDeviceHostPipeRWSupported(ur_device_handle_t device, bool &support); +ur_device_partition_desc_t makePartitionByCountsDesc(uint32_t count); + +ur_device_partition_desc_t makePartitionEquallyDesc(uint32_t cu_per_device); + +ur_device_partition_desc_t +makePartitionByAffinityDomain(ur_device_affinity_domain_flags_t aff_domain); + } // namespace uur #endif // UR_CONFORMANCE_INCLUDE_UTILS_H_INCLUDED diff --git a/test/conformance/testing/source/utils.cpp b/test/conformance/testing/source/utils.cpp index 254d392712..8511129957 100644 --- a/test/conformance/testing/source/utils.cpp +++ b/test/conformance/testing/source/utils.cpp @@ -479,10 +479,10 @@ ur_result_t GetDeviceParentDevice(ur_device_handle_t device, device, UR_DEVICE_INFO_PARENT_DEVICE, parent); } -ur_result_t GetDevicePartitionProperties( - ur_device_handle_t device, - std::vector &properties) { - return GetDeviceVectorInfo( +ur_result_t +GetDevicePartitionProperties(ur_device_handle_t device, + std::vector &properties) { + return GetDeviceVectorInfo( device, UR_DEVICE_INFO_PARTITION_PROPERTIES, properties); } @@ -500,8 +500,8 @@ ur_result_t GetDevicePartitionAffinityDomainFlags( ur_result_t GetDevicePartitionType(ur_device_handle_t device, - std::vector &type) { - return GetDeviceVectorInfo( + std::vector &type) { + return GetDeviceVectorInfo( device, UR_DEVICE_INFO_PARTITION_TYPE, type); } @@ -633,4 +633,32 @@ ur_result_t GetDeviceHostPipeRWSupported(ur_device_handle_t device, device, UR_DEVICE_INFO_HOST_PIPE_READ_WRITE_SUPPORTED, support); } +ur_device_partition_desc_t makePartitionByCountsDesc(uint32_t count) { + ur_device_partition_desc_t desc; + desc.stype = UR_STRUCTURE_TYPE_DEVICE_PARTITION_DESC; + desc.pNext = nullptr; + desc.type = UR_DEVICE_PARTITION_BY_COUNTS; + desc.value.count = count; + return desc; +} + +ur_device_partition_desc_t makePartitionEquallyDesc(uint32_t cu_per_device) { + ur_device_partition_desc_t desc; + desc.stype = UR_STRUCTURE_TYPE_DEVICE_PARTITION_DESC; + desc.pNext = nullptr; + desc.type = UR_DEVICE_PARTITION_EQUALLY; + desc.value.equally = cu_per_device; + return desc; +} + +ur_device_partition_desc_t +makePartitionByAffinityDomain(ur_device_affinity_domain_flags_t aff_domain) { + ur_device_partition_desc_t desc; + desc.stype = UR_STRUCTURE_TYPE_DEVICE_PARTITION_DESC; + desc.pNext = nullptr; + desc.type = UR_DEVICE_PARTITION_BY_AFFINITY_DOMAIN; + desc.value.affinity_domain = aff_domain; + return desc; +} + } // namespace uur diff --git a/test/unit/utils/params.cpp b/test/unit/utils/params.cpp index 5f39a5a106..578ed544ae 100644 --- a/test/unit/utils/params.cpp +++ b/test/unit/utils/params.cpp @@ -239,9 +239,9 @@ struct UrDeviceGetInfoParamsInvalidSize : UrDeviceGetInfoParams { }; struct UrDeviceGetInfoParamsPartitionArray : UrDeviceGetInfoParams { - ur_device_partition_property_t props[3] = { - UR_DEVICE_PARTITION_BY_COUNTS, UR_DEVICE_PARTITION_BY_AFFINITY_DOMAIN, - UR_DEVICE_PARTITION_BY_CSLICE}; + ur_device_partition_t props[3] = {UR_DEVICE_PARTITION_BY_COUNTS, + UR_DEVICE_PARTITION_BY_AFFINITY_DOMAIN, + UR_DEVICE_PARTITION_BY_CSLICE}; UrDeviceGetInfoParamsPartitionArray() : UrDeviceGetInfoParams() { propName = UR_DEVICE_INFO_PARTITION_PROPERTIES; pPropValue = &props; @@ -251,9 +251,10 @@ struct UrDeviceGetInfoParamsPartitionArray : UrDeviceGetInfoParams { const char *get_expected() { return ".hDevice = nullptr, .propName = " "UR_DEVICE_INFO_PARTITION_PROPERTIES, .propSize " - "= 24, .pPropValue = \\{4231, 4232, 4233\\}, .pPropSizeRet = .+ " - "\\(24\\)"; - // TODO: should resolve type values for ur_device_partition_property_t... + "= 12, .pPropValue = \\{UR_DEVICE_PARTITION_BY_COUNTS, " + "UR_DEVICE_PARTITION_BY_AFFINITY_DOMAIN, " + "UR_DEVICE_PARTITION_BY_CSLICE\\}, .pPropSizeRet = .+ " + "\\(12\\)"; }; }; From 085ffd11d504f4537287a59b739db6708dfabb19 Mon Sep 17 00:00:00 2001 From: Petr Vesely Date: Thu, 25 May 2023 12:00:18 +0100 Subject: [PATCH 2/6] [UR] Refactor device partition into properties struct --- include/ur.py | 28 +++-- include/ur_api.h | 50 +++++---- include/ur_ddi.h | 3 +- scripts/core/common.yml | 4 +- scripts/core/device.yml | 28 +++-- source/adapters/null/ur_nullddi.cpp | 10 +- source/common/ur_params.hpp | 59 ++++++---- source/common/ur_pool_manager.hpp | 19 ++-- source/loader/layers/tracing/ur_trcddi.cpp | 13 +-- source/loader/layers/validation/ur_valddi.cpp | 18 +-- source/loader/ur_ldrddi.cpp | 10 +- source/loader/ur_libapi.cpp | 14 +-- source/ur_api.cpp | 10 +- test/conformance/device/urDevicePartition.cpp | 106 ++++++++++-------- test/conformance/device/urDeviceRelease.cpp | 12 +- test/conformance/device/urDeviceRetain.cpp | 10 +- test/conformance/testing/include/uur/utils.h | 8 +- test/conformance/testing/source/utils.cpp | 23 ++-- 18 files changed, 229 insertions(+), 196 deletions(-) diff --git a/include/ur.py b/include/ur.py index b19a15c695..32e18a4006 100644 --- a/include/ur.py +++ b/include/ur.py @@ -225,7 +225,7 @@ class ur_structure_type_v(IntEnum): PROGRAM_NATIVE_PROPERTIES = 23 ## ::ur_program_native_properties_t SAMPLER_NATIVE_PROPERTIES = 24 ## ::ur_sampler_native_properties_t QUEUE_NATIVE_DESC = 25 ## ::ur_queue_native_desc_t - DEVICE_PARTITION_DESC = 26 ## ::ur_device_partition_desc_t + DEVICE_PARTITION_PROPERTIES = 26 ## ::ur_device_partition_properties_t class ur_structure_type_t(c_int): def __str__(self): @@ -510,8 +510,8 @@ class ur_device_info_v(IntEnum): PARTITION_AFFINITY_DOMAIN = 78 ## [::ur_device_affinity_domain_flags_t] Returns a bit-field of the ## supported affinity domains for partitioning. ## If the device does not support any affinity domains, then 0 will be returned. - PARTITION_TYPE = 79 ## [::ur_device_partition_desc_t[]] return an array of - ## ::ur_device_partition_desc_t for properties specified in + PARTITION_TYPE = 79 ## [::ur_device_partition_property_t[]] return an array of + ## ::ur_device_partition_property_t for properties specified in ## ::urDevicePartition MAX_NUM_SUB_GROUPS = 80 ## [uint32_t] max number of sub groups SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS = 81 ## [::ur_bool_t] support sub group independent forward progress @@ -625,16 +625,24 @@ class ur_device_partition_value_t(Structure): ] ############################################################################### -## @brief Device partition description -class ur_device_partition_desc_t(Structure): +## @brief Device partition property +class ur_device_partition_property_t(Structure): _fields_ = [ - ("stype", ur_structure_type_t), ## [in] type of this structure, must be - ## ::UR_STRUCTURE_TYPE_DEVICE_PARTITION_DESC - ("pNext", c_void_p), ## [in][optional] pointer to extension-specific structure ("type", ur_device_partition_t), ## [in] The partitioning type to be used. ("value", ur_device_partition_value_t) ## [in] The paritioning value. ] +############################################################################### +## @brief Device Partition Properties +class ur_device_partition_properties_t(Structure): + _fields_ = [ + ("stype", ur_structure_type_t), ## [in] type of this structure, must be + ## ::UR_STRUCTURE_TYPE_DEVICE_PARTITION_PROPERTIES + ("pNext", c_void_p), ## [in,out][optional] pointer to extension-specific structure + ("pProperties", *), ## [in] Pointer to the beginning of the properties array. + ("PropCount", c_size_t) ## [in] The length of properties pointed to by `pProperties`. + ] + ############################################################################### ## @brief FP capabilities class ur_device_fp_capability_flags_v(IntEnum): @@ -2824,9 +2832,9 @@ class ur_usm_dditable_t(Structure): ############################################################################### ## @brief Function-pointer for urDevicePartition if __use_win_types: - _urDevicePartition_t = WINFUNCTYPE( ur_result_t, ur_device_handle_t, POINTER(ur_device_partition_desc_t), c_size_t, c_ulong, POINTER(ur_device_handle_t), POINTER(c_ulong) ) + _urDevicePartition_t = WINFUNCTYPE( ur_result_t, ur_device_handle_t, POINTER(ur_device_partition_properties_t), c_ulong, POINTER(ur_device_handle_t), POINTER(c_ulong) ) else: - _urDevicePartition_t = CFUNCTYPE( ur_result_t, ur_device_handle_t, POINTER(ur_device_partition_desc_t), c_size_t, c_ulong, POINTER(ur_device_handle_t), POINTER(c_ulong) ) + _urDevicePartition_t = CFUNCTYPE( ur_result_t, ur_device_handle_t, POINTER(ur_device_partition_properties_t), c_ulong, POINTER(ur_device_handle_t), POINTER(c_ulong) ) ############################################################################### ## @brief Function-pointer for urDeviceSelectBinary diff --git a/include/ur_api.h b/include/ur_api.h index 80735d430f..0e60857040 100644 --- a/include/ur_api.h +++ b/include/ur_api.h @@ -249,7 +249,7 @@ typedef enum ur_structure_type_t { UR_STRUCTURE_TYPE_PROGRAM_NATIVE_PROPERTIES = 23, ///< ::ur_program_native_properties_t UR_STRUCTURE_TYPE_SAMPLER_NATIVE_PROPERTIES = 24, ///< ::ur_sampler_native_properties_t UR_STRUCTURE_TYPE_QUEUE_NATIVE_DESC = 25, ///< ::ur_queue_native_desc_t - UR_STRUCTURE_TYPE_DEVICE_PARTITION_DESC = 26, ///< ::ur_device_partition_desc_t + UR_STRUCTURE_TYPE_DEVICE_PARTITION_PROPERTIES = 26, ///< ::ur_device_partition_properties_t /// @cond UR_STRUCTURE_TYPE_FORCE_UINT32 = 0x7fffffff /// @endcond @@ -859,8 +859,8 @@ typedef enum ur_device_info_t { UR_DEVICE_INFO_PARTITION_AFFINITY_DOMAIN = 78, ///< [::ur_device_affinity_domain_flags_t] Returns a bit-field of the ///< supported affinity domains for partitioning. ///< If the device does not support any affinity domains, then 0 will be returned. - UR_DEVICE_INFO_PARTITION_TYPE = 79, ///< [::ur_device_partition_desc_t[]] return an array of - ///< ::ur_device_partition_desc_t for properties specified in + UR_DEVICE_INFO_PARTITION_TYPE = 79, ///< [::ur_device_partition_property_t[]] return an array of + ///< ::ur_device_partition_property_t for properties specified in ///< ::urDevicePartition UR_DEVICE_INFO_MAX_NUM_SUB_GROUPS = 80, ///< [uint32_t] max number of sub groups UR_DEVICE_INFO_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS = 81, ///< [::ur_bool_t] support sub group independent forward progress @@ -1069,15 +1069,23 @@ typedef union ur_device_partition_value_t { } ur_device_partition_value_t; /////////////////////////////////////////////////////////////////////////////// -/// @brief Device partition description -typedef struct ur_device_partition_desc_t { - ur_structure_type_t stype; ///< [in] type of this structure, must be - ///< ::UR_STRUCTURE_TYPE_DEVICE_PARTITION_DESC - const void *pNext; ///< [in][optional] pointer to extension-specific structure +/// @brief Device partition property +typedef struct ur_device_partition_property_t { ur_device_partition_t type; ///< [in] The partitioning type to be used. ur_device_partition_value_t value; ///< [in] The paritioning value. -} ur_device_partition_desc_t; +} ur_device_partition_property_t; + +/////////////////////////////////////////////////////////////////////////////// +/// @brief Device Partition Properties +typedef struct ur_device_partition_properties_t { + ur_structure_type_t stype; ///< [in] type of this structure, must be + ///< ::UR_STRUCTURE_TYPE_DEVICE_PARTITION_PROPERTIES + void *pNext; ///< [in,out][optional] pointer to extension-specific structure + const ur_device_partition_property_t *pProperties; ///< [in] Pointer to the beginning of the properties array. + size_t PropCount; ///< [in] The length of properties pointed to by `pProperties`. + +} ur_device_partition_properties_t; /////////////////////////////////////////////////////////////////////////////// /// @brief Partition the device into sub-devices @@ -1103,23 +1111,18 @@ typedef struct ur_device_partition_desc_t { /// + `NULL == hDevice` /// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER /// + `NULL == pProperties` -/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION -/// + `::UR_DEVICE_PARTITION_BY_CSLICE < pProperties->type` /// - ::UR_RESULT_ERROR_DEVICE_PARTITION_FAILED /// - ::UR_RESULT_ERROR_INVALID_DEVICE_PARTITION_COUNT -/// - ::UR_RESULT_ERROR_INVALID_SIZE -/// + `DescCount == 0` UR_APIEXPORT ur_result_t UR_APICALL urDevicePartition( - ur_device_handle_t hDevice, ///< [in] handle of the device to partition. - const ur_device_partition_desc_t *pProperties, ///< [in] Array of partition descriptors. - size_t DescCount, ///< [in] Number of descriptors pointed to by `pProperties`. - uint32_t NumDevices, ///< [in] the number of sub-devices. - ur_device_handle_t *phSubDevices, ///< [out][optional][range(0, NumDevices)] array of handle of devices. - ///< If NumDevices is less than the number of sub-devices available, then - ///< the function shall only retrieve that number of sub-devices. - uint32_t *pNumDevicesRet ///< [out][optional] pointer to the number of sub-devices the device can be - ///< partitioned into according to the partitioning property. + ur_device_handle_t hDevice, ///< [in] handle of the device to partition. + const ur_device_partition_properties_t *pProperties, ///< [in] Array of partition descriptors. + uint32_t NumDevices, ///< [in] the number of sub-devices. + ur_device_handle_t *phSubDevices, ///< [out][optional][range(0, NumDevices)] array of handle of devices. + ///< If NumDevices is less than the number of sub-devices available, then + ///< the function shall only retrieve that number of sub-devices. + uint32_t *pNumDevicesRet ///< [out][optional] pointer to the number of sub-devices the device can be + ///< partitioned into according to the partitioning property. ); /////////////////////////////////////////////////////////////////////////////// @@ -7196,8 +7199,7 @@ typedef struct ur_device_release_params_t { /// allowing the callback the ability to modify the parameter's value typedef struct ur_device_partition_params_t { ur_device_handle_t *phDevice; - const ur_device_partition_desc_t **ppProperties; - size_t *pDescCount; + const ur_device_partition_properties_t **ppProperties; uint32_t *pNumDevices; ur_device_handle_t **pphSubDevices; uint32_t **ppNumDevicesRet; diff --git a/include/ur_ddi.h b/include/ur_ddi.h index c301ebe471..18d2af9a1b 100644 --- a/include/ur_ddi.h +++ b/include/ur_ddi.h @@ -1394,8 +1394,7 @@ typedef ur_result_t(UR_APICALL *ur_pfnDeviceRelease_t)( /// @brief Function-pointer for urDevicePartition typedef ur_result_t(UR_APICALL *ur_pfnDevicePartition_t)( ur_device_handle_t, - const ur_device_partition_desc_t *, - size_t, + const ur_device_partition_properties_t *, uint32_t, ur_device_handle_t *, uint32_t *); diff --git a/scripts/core/common.yml b/scripts/core/common.yml index d4d275cbb6..cdb381a7c4 100644 --- a/scripts/core/common.yml +++ b/scripts/core/common.yml @@ -314,8 +314,8 @@ etors: desc: $x_sampler_native_properties_t - name: QUEUE_NATIVE_DESC desc: $x_queue_native_desc_t - - name: DEVICE_PARTITION_DESC - desc: $x_device_partition_desc_t + - name: DEVICE_PARTITION_PROPERTIES + desc: $x_device_partition_properties_t --- #-------------------------------------------------------------------------- type: struct desc: "Base for all properties types" diff --git a/scripts/core/device.yml b/scripts/core/device.yml index 55b63cc96d..3f42af2e46 100644 --- a/scripts/core/device.yml +++ b/scripts/core/device.yml @@ -309,7 +309,7 @@ etors: [$x_device_affinity_domain_flags_t] Returns a bit-field of the supported affinity domains for partitioning. If the device does not support any affinity domains, then 0 will be returned. - name: PARTITION_TYPE - desc: "[$x_device_partition_desc_t[]] return an array of $x_device_partition_desc_t for properties specified in $xDevicePartition" + desc: "[$x_device_partition_property_t[]] return an array of $x_device_partition_property_t for properties specified in $xDevicePartition" - name: MAX_NUM_SUB_GROUPS desc: "[uint32_t] max number of sub groups" - name: SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS @@ -523,9 +523,8 @@ members: desc: "[in] The affinity domain to partition for when partitioning with $UR_DEVICE_PARTITION_BY_AFFINITY_DOMAIN." --- #-------------------------------------------------------------------------- type: struct -desc: "Device partition description" -name: $x_device_partition_desc_t -base: $x_base_desc_t +desc: "Device partition property" +name: $x_device_partition_property_t class: $xDevice members: - type: $x_device_partition_t @@ -535,6 +534,19 @@ members: name: value desc: "[in] The paritioning value." --- #-------------------------------------------------------------------------- +type: struct +desc: "Device Partition Properties" +name: $x_device_partition_properties_t +class: $xDevice +base: $x_base_properties_t +members: + - type: const $x_device_partition_property_t * + name: pProperties + desc: "[in] Pointer to the beginning of the properties array." + - type: size_t + name: PropCount + desc: "[in] The length of properties pointed to by `pProperties`." +--- #-------------------------------------------------------------------------- type: function desc: "Partition the device into sub-devices" class: $xDevice @@ -553,14 +565,10 @@ params: name: hDevice desc: | [in] handle of the device to partition. - - type: const $x_device_partition_desc_t* + - type: const $x_device_partition_properties_t* name: pProperties desc: | [in] Array of partition descriptors. - - type: size_t - name: DescCount - desc: | - [in] Number of descriptors pointed to by `pProperties`. - type: "uint32_t" name: NumDevices desc: | @@ -577,8 +585,6 @@ params: returns: - $X_RESULT_ERROR_DEVICE_PARTITION_FAILED - $X_RESULT_ERROR_INVALID_DEVICE_PARTITION_COUNT - - $X_RESULT_ERROR_INVALID_SIZE: - - "`DescCount == 0`" --- #-------------------------------------------------------------------------- type: function desc: "Selects the most appropriate device binary based on runtime information and the IR characteristics." diff --git a/source/adapters/null/ur_nullddi.cpp b/source/adapters/null/ur_nullddi.cpp index 0968a88ccd..f0ed60d363 100644 --- a/source/adapters/null/ur_nullddi.cpp +++ b/source/adapters/null/ur_nullddi.cpp @@ -353,10 +353,8 @@ __urdlllocal ur_result_t UR_APICALL urDeviceRelease( /// @brief Intercept function for urDevicePartition __urdlllocal ur_result_t UR_APICALL urDevicePartition( ur_device_handle_t hDevice, ///< [in] handle of the device to partition. - const ur_device_partition_desc_t - *pProperties, ///< [in] Array of partition descriptors. - size_t - DescCount, ///< [in] Number of descriptors pointed to by `pProperties`. + const ur_device_partition_properties_t + *pProperties, ///< [in] Array of partition descriptors. uint32_t NumDevices, ///< [in] the number of sub-devices. ur_device_handle_t * phSubDevices, ///< [out][optional][range(0, NumDevices)] array of handle of devices. @@ -371,8 +369,8 @@ __urdlllocal ur_result_t UR_APICALL urDevicePartition( // if the driver has created a custom function, then call it instead of using the generic path auto pfnPartition = d_context.urDdiTable.Device.pfnPartition; if (nullptr != pfnPartition) { - result = pfnPartition(hDevice, pProperties, DescCount, NumDevices, - phSubDevices, pNumDevicesRet); + result = pfnPartition(hDevice, pProperties, NumDevices, phSubDevices, + pNumDevicesRet); } else { // generic implementation for (size_t i = 0; (nullptr != phSubDevices) && (i < NumDevices); ++i) { diff --git a/source/common/ur_params.hpp b/source/common/ur_params.hpp index c9aa1bbde0..6ee62aaec5 100644 --- a/source/common/ur_params.hpp +++ b/source/common/ur_params.hpp @@ -185,8 +185,12 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_device_partition_t value); inline std::ostream &operator<<(std::ostream &os, const union ur_device_partition_value_t params); -inline std::ostream &operator<<(std::ostream &os, - const struct ur_device_partition_desc_t params); +inline std::ostream & +operator<<(std::ostream &os, + const struct ur_device_partition_property_t params); +inline std::ostream & +operator<<(std::ostream &os, + const struct ur_device_partition_properties_t params); inline std::ostream &operator<<(std::ostream &os, enum ur_device_fp_capability_flag_t value); inline std::ostream &operator<<(std::ostream &os, @@ -717,8 +721,8 @@ inline std::ostream &operator<<(std::ostream &os, os << "UR_STRUCTURE_TYPE_QUEUE_NATIVE_DESC"; break; - case UR_STRUCTURE_TYPE_DEVICE_PARTITION_DESC: - os << "UR_STRUCTURE_TYPE_DEVICE_PARTITION_DESC"; + case UR_STRUCTURE_TYPE_DEVICE_PARTITION_PROPERTIES: + os << "UR_STRUCTURE_TYPE_DEVICE_PARTITION_PROPERTIES"; break; default: os << "unknown enumerator"; @@ -884,9 +888,9 @@ inline void serializeStruct(std::ostream &os, const void *ptr) { ur_params::serializePtr(os, pstruct); } break; - case UR_STRUCTURE_TYPE_DEVICE_PARTITION_DESC: { - const ur_device_partition_desc_t *pstruct = - (const ur_device_partition_desc_t *)ptr; + case UR_STRUCTURE_TYPE_DEVICE_PARTITION_PROPERTIES: { + const ur_device_partition_properties_t *pstruct = + (const ur_device_partition_properties_t *)ptr; ur_params::serializePtr(os, pstruct); } break; default: @@ -2793,10 +2797,10 @@ inline void serializeTagged(std::ostream &os, const void *ptr, case UR_DEVICE_INFO_PARTITION_TYPE: { - const ur_device_partition_desc_t *tptr = - (const ur_device_partition_desc_t *)ptr; + const ur_device_partition_property_t *tptr = + (const ur_device_partition_property_t *)ptr; os << "{"; - size_t nelems = size / sizeof(ur_device_partition_desc_t); + size_t nelems = size / sizeof(ur_device_partition_property_t); for (size_t i = 0; i < nelems; ++i) { if (i != 0) { os << ", "; @@ -3447,8 +3451,26 @@ operator<<(std::ostream &os, const union ur_device_partition_value_t params) { return os; } inline std::ostream & -operator<<(std::ostream &os, const struct ur_device_partition_desc_t params) { - os << "(struct ur_device_partition_desc_t){"; +operator<<(std::ostream &os, + const struct ur_device_partition_property_t params) { + os << "(struct ur_device_partition_property_t){"; + + os << ".type = "; + + os << (params.type); + + os << ", "; + os << ".value = "; + + os << (params.value); + + os << "}"; + return os; +} +inline std::ostream & +operator<<(std::ostream &os, + const struct ur_device_partition_properties_t params) { + os << "(struct ur_device_partition_properties_t){"; os << ".stype = "; @@ -3460,14 +3482,14 @@ operator<<(std::ostream &os, const struct ur_device_partition_desc_t params) { ur_params::serializeStruct(os, (params.pNext)); os << ", "; - os << ".type = "; + os << ".pProperties = "; - os << (params.type); + os << (params.pProperties); os << ", "; - os << ".value = "; + os << ".PropCount = "; - os << (params.value); + os << (params.PropCount); os << "}"; return os; @@ -11735,11 +11757,6 @@ operator<<(std::ostream &os, ur_params::serializePtr(os, *(params->ppProperties)); - os << ", "; - os << ".DescCount = "; - - os << *(params->pDescCount); - os << ", "; os << ".NumDevices = "; diff --git a/source/common/ur_pool_manager.hpp b/source/common/ur_pool_manager.hpp index 15d99b6798..610d630bbe 100644 --- a/source/common/ur_pool_manager.hpp +++ b/source/common/ur_pool_manager.hpp @@ -42,21 +42,26 @@ urGetSubDevices(ur_device_handle_t hDevice) { return {ret, {}}; } - ur_device_partition_desc_t properties; - properties.stype = UR_STRUCTURE_TYPE_DEVICE_PARTITION_DESC; - properties.pNext = nullptr; - properties.type = UR_DEVICE_PARTITION_EQUALLY; - properties.value.equally = nComputeUnits; + ur_device_partition_property_t prop; + prop.type = UR_DEVICE_PARTITION_EQUALLY; + prop.value.equally = nComputeUnits; + + ur_device_partition_properties_t properties{ + UR_STRUCTURE_TYPE_DEVICE_PARTITION_PROPERTIES, + nullptr, + &prop, + 1, + }; // Get the number of devices that will be created uint32_t deviceCount; - ret = urDevicePartition(hDevice, &properties, 1, 0, nullptr, &deviceCount); + ret = urDevicePartition(hDevice, &properties, 0, nullptr, &deviceCount); if (ret != UR_RESULT_SUCCESS) { return {ret, {}}; } std::vector sub_devices(deviceCount); - ret = urDevicePartition(hDevice, &properties, 1, + ret = urDevicePartition(hDevice, &properties, static_cast(sub_devices.size()), sub_devices.data(), nullptr); if (ret != UR_RESULT_SUCCESS) { diff --git a/source/loader/layers/tracing/ur_trcddi.cpp b/source/loader/layers/tracing/ur_trcddi.cpp index e1ef41353e..239bd699b0 100644 --- a/source/loader/layers/tracing/ur_trcddi.cpp +++ b/source/loader/layers/tracing/ur_trcddi.cpp @@ -395,10 +395,8 @@ __urdlllocal ur_result_t UR_APICALL urDeviceRelease( /// @brief Intercept function for urDevicePartition __urdlllocal ur_result_t UR_APICALL urDevicePartition( ur_device_handle_t hDevice, ///< [in] handle of the device to partition. - const ur_device_partition_desc_t - *pProperties, ///< [in] Array of partition descriptors. - size_t - DescCount, ///< [in] Number of descriptors pointed to by `pProperties`. + const ur_device_partition_properties_t + *pProperties, ///< [in] Array of partition descriptors. uint32_t NumDevices, ///< [in] the number of sub-devices. ur_device_handle_t * phSubDevices, ///< [out][optional][range(0, NumDevices)] array of handle of devices. @@ -414,14 +412,13 @@ __urdlllocal ur_result_t UR_APICALL urDevicePartition( return UR_RESULT_ERROR_UNSUPPORTED_FEATURE; } - ur_device_partition_params_t params = {&hDevice, &pProperties, - &DescCount, &NumDevices, + ur_device_partition_params_t params = {&hDevice, &pProperties, &NumDevices, &phSubDevices, &pNumDevicesRet}; uint64_t instance = context.notify_begin(UR_FUNCTION_DEVICE_PARTITION, "urDevicePartition", ¶ms); - ur_result_t result = pfnPartition(hDevice, pProperties, DescCount, - NumDevices, phSubDevices, pNumDevicesRet); + ur_result_t result = pfnPartition(hDevice, pProperties, NumDevices, + phSubDevices, pNumDevicesRet); context.notify_end(UR_FUNCTION_DEVICE_PARTITION, "urDevicePartition", ¶ms, &result, instance); diff --git a/source/loader/layers/validation/ur_valddi.cpp b/source/loader/layers/validation/ur_valddi.cpp index 2b5f5130d1..d595a9872c 100644 --- a/source/loader/layers/validation/ur_valddi.cpp +++ b/source/loader/layers/validation/ur_valddi.cpp @@ -415,10 +415,8 @@ __urdlllocal ur_result_t UR_APICALL urDeviceRelease( /// @brief Intercept function for urDevicePartition __urdlllocal ur_result_t UR_APICALL urDevicePartition( ur_device_handle_t hDevice, ///< [in] handle of the device to partition. - const ur_device_partition_desc_t - *pProperties, ///< [in] Array of partition descriptors. - size_t - DescCount, ///< [in] Number of descriptors pointed to by `pProperties`. + const ur_device_partition_properties_t + *pProperties, ///< [in] Array of partition descriptors. uint32_t NumDevices, ///< [in] the number of sub-devices. ur_device_handle_t * phSubDevices, ///< [out][optional][range(0, NumDevices)] array of handle of devices. @@ -442,18 +440,10 @@ __urdlllocal ur_result_t UR_APICALL urDevicePartition( if (NULL == pProperties) { return UR_RESULT_ERROR_INVALID_NULL_POINTER; } - - if (UR_DEVICE_PARTITION_BY_CSLICE < pProperties->type) { - return UR_RESULT_ERROR_INVALID_ENUMERATION; - } - - if (DescCount == 0) { - return UR_RESULT_ERROR_INVALID_SIZE; - } } - ur_result_t result = pfnPartition(hDevice, pProperties, DescCount, - NumDevices, phSubDevices, pNumDevicesRet); + ur_result_t result = pfnPartition(hDevice, pProperties, NumDevices, + phSubDevices, pNumDevicesRet); return result; } diff --git a/source/loader/ur_ldrddi.cpp b/source/loader/ur_ldrddi.cpp index 92586b07bf..b33e9bfa12 100644 --- a/source/loader/ur_ldrddi.cpp +++ b/source/loader/ur_ldrddi.cpp @@ -461,10 +461,8 @@ __urdlllocal ur_result_t UR_APICALL urDeviceRelease( /// @brief Intercept function for urDevicePartition __urdlllocal ur_result_t UR_APICALL urDevicePartition( ur_device_handle_t hDevice, ///< [in] handle of the device to partition. - const ur_device_partition_desc_t - *pProperties, ///< [in] Array of partition descriptors. - size_t - DescCount, ///< [in] Number of descriptors pointed to by `pProperties`. + const ur_device_partition_properties_t + *pProperties, ///< [in] Array of partition descriptors. uint32_t NumDevices, ///< [in] the number of sub-devices. ur_device_handle_t * phSubDevices, ///< [out][optional][range(0, NumDevices)] array of handle of devices. @@ -487,8 +485,8 @@ __urdlllocal ur_result_t UR_APICALL urDevicePartition( hDevice = reinterpret_cast(hDevice)->handle; // forward to device-platform - result = pfnPartition(hDevice, pProperties, DescCount, NumDevices, - phSubDevices, pNumDevicesRet); + result = pfnPartition(hDevice, pProperties, NumDevices, phSubDevices, + pNumDevicesRet); if (UR_RESULT_SUCCESS != result) { return result; diff --git a/source/loader/ur_libapi.cpp b/source/loader/ur_libapi.cpp index cfdd0869dc..b7bb8c0286 100644 --- a/source/loader/ur_libapi.cpp +++ b/source/loader/ur_libapi.cpp @@ -548,18 +548,12 @@ ur_result_t UR_APICALL urDeviceRelease( /// + `NULL == hDevice` /// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER /// + `NULL == pProperties` -/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION -/// + `::UR_DEVICE_PARTITION_BY_CSLICE < pProperties->type` /// - ::UR_RESULT_ERROR_DEVICE_PARTITION_FAILED /// - ::UR_RESULT_ERROR_INVALID_DEVICE_PARTITION_COUNT -/// - ::UR_RESULT_ERROR_INVALID_SIZE -/// + `DescCount == 0` ur_result_t UR_APICALL urDevicePartition( ur_device_handle_t hDevice, ///< [in] handle of the device to partition. - const ur_device_partition_desc_t - *pProperties, ///< [in] Array of partition descriptors. - size_t - DescCount, ///< [in] Number of descriptors pointed to by `pProperties`. + const ur_device_partition_properties_t + *pProperties, ///< [in] Array of partition descriptors. uint32_t NumDevices, ///< [in] the number of sub-devices. ur_device_handle_t * phSubDevices, ///< [out][optional][range(0, NumDevices)] array of handle of devices. @@ -574,8 +568,8 @@ ur_result_t UR_APICALL urDevicePartition( return UR_RESULT_ERROR_UNINITIALIZED; } - return pfnPartition(hDevice, pProperties, DescCount, NumDevices, - phSubDevices, pNumDevicesRet); + return pfnPartition(hDevice, pProperties, NumDevices, phSubDevices, + pNumDevicesRet); } catch (...) { return exceptionToResult(std::current_exception()); } diff --git a/source/ur_api.cpp b/source/ur_api.cpp index 774fa03edd..2e53de4300 100644 --- a/source/ur_api.cpp +++ b/source/ur_api.cpp @@ -454,18 +454,12 @@ ur_result_t UR_APICALL urDeviceRelease( /// + `NULL == hDevice` /// - ::UR_RESULT_ERROR_INVALID_NULL_POINTER /// + `NULL == pProperties` -/// - ::UR_RESULT_ERROR_INVALID_ENUMERATION -/// + `::UR_DEVICE_PARTITION_BY_CSLICE < pProperties->type` /// - ::UR_RESULT_ERROR_DEVICE_PARTITION_FAILED /// - ::UR_RESULT_ERROR_INVALID_DEVICE_PARTITION_COUNT -/// - ::UR_RESULT_ERROR_INVALID_SIZE -/// + `DescCount == 0` ur_result_t UR_APICALL urDevicePartition( ur_device_handle_t hDevice, ///< [in] handle of the device to partition. - const ur_device_partition_desc_t - *pProperties, ///< [in] Array of partition descriptors. - size_t - DescCount, ///< [in] Number of descriptors pointed to by `pProperties`. + const ur_device_partition_properties_t + *pProperties, ///< [in] Array of partition descriptors. uint32_t NumDevices, ///< [in] the number of sub-devices. ur_device_handle_t * phSubDevices, ///< [out][optional][range(0, NumDevices)] array of handle of devices. diff --git a/test/conformance/device/urDevicePartition.cpp b/test/conformance/device/urDevicePartition.cpp index 53fed0df28..930396dda9 100644 --- a/test/conformance/device/urDevicePartition.cpp +++ b/test/conformance/device/urDevicePartition.cpp @@ -29,20 +29,26 @@ TEST_F(urDevicePartitionTest, PartitionEquallySuccess) { ASSERT_NO_FATAL_FAILURE(getNumberComputeUnits(device, n_compute_units)); for (uint32_t i = 1; i < n_compute_units; ++i) { - ur_device_partition_desc_t properties = + ur_device_partition_property_t property = uur::makePartitionEquallyDesc(i); + ur_device_partition_properties_t properties{ + UR_STRUCTURE_TYPE_DEVICE_PARTITION_PROPERTIES, + nullptr, + &property, + 1, + }; + // Get the number of devices that will be created uint32_t n_devices; - ASSERT_SUCCESS(urDevicePartition(device, &properties, 1, 0, nullptr, - &n_devices)); + ASSERT_SUCCESS( + urDevicePartition(device, &properties, 0, nullptr, &n_devices)); ASSERT_NE(n_devices, 0); std::vector sub_devices(n_devices); - ASSERT_SUCCESS( - urDevicePartition(device, &properties, 1, - static_cast(sub_devices.size()), - sub_devices.data(), nullptr)); + ASSERT_SUCCESS(urDevicePartition( + device, &properties, static_cast(sub_devices.size()), + sub_devices.data(), nullptr)); for (auto sub_device : sub_devices) { ASSERT_NE(sub_device, nullptr); ASSERT_SUCCESS(urDeviceRelease(sub_device)); @@ -79,48 +85,53 @@ TEST_F(urDevicePartitionTest, PartitionByCounts) { uint32_t n_cu_across_sub_devices; for (const auto Combination : combinations) { - std::vector properties; + std::vector property_list; switch (Combination) { case Combination::ONE: { n_cu_across_sub_devices = 1; - properties.push_back(uur::makePartitionByCountsDesc(1)); + property_list.push_back(uur::makePartitionByCountsDesc(1)); break; } case Combination::HALF: { n_cu_across_sub_devices = (n_cu_in_device / 2) * 2; - properties.push_back( + property_list.push_back( uur::makePartitionByCountsDesc(n_cu_in_device / 2)); - properties.push_back( + property_list.push_back( uur::makePartitionByCountsDesc(n_cu_in_device / 2)); break; } case Combination::ALL_MINUS_ONE: { n_cu_across_sub_devices = n_cu_in_device - 1; - properties.push_back( + property_list.push_back( uur::makePartitionByCountsDesc(n_cu_in_device - 1)); break; } case Combination::ALL: { n_cu_across_sub_devices = n_cu_in_device; - properties.push_back( + property_list.push_back( uur::makePartitionByCountsDesc(n_cu_in_device)); break; } } + ur_device_partition_properties_t properties{ + UR_STRUCTURE_TYPE_DEVICE_PARTITION_PROPERTIES, + nullptr, + property_list.data(), + property_list.size(), + }; + // Get the number of devices that will be created uint32_t n_devices; - ASSERT_SUCCESS(urDevicePartition(device, properties.data(), - properties.size(), 0, nullptr, - &n_devices)); - ASSERT_EQ(n_devices, properties.size()); + ASSERT_SUCCESS( + urDevicePartition(device, &properties, 0, nullptr, &n_devices)); + ASSERT_EQ(n_devices, property_list.size()); std::vector sub_devices(n_devices); - ASSERT_SUCCESS( - urDevicePartition(device, properties.data(), properties.size(), - static_cast(sub_devices.size()), - sub_devices.data(), nullptr)); + ASSERT_SUCCESS(urDevicePartition( + device, &properties, static_cast(sub_devices.size()), + sub_devices.data(), nullptr)); uint32_t sum = 0; for (auto sub_device : sub_devices) { @@ -165,18 +176,25 @@ TEST_P(urDevicePartitionAffinityDomainTest, PartitionByAffinityDomain) { continue; } - ur_device_partition_desc_t properties = + ur_device_partition_property_t prop = uur::makePartitionByAffinityDomain(flag); + ur_device_partition_properties_t properties{ + UR_STRUCTURE_TYPE_DEVICE_PARTITION_PROPERTIES, + nullptr, + &prop, + 1, + }; + // Get the number of devices that will be created uint32_t n_devices = 0; ASSERT_SUCCESS( - urDevicePartition(device, &properties, 1, 0, nullptr, &n_devices)); + urDevicePartition(device, &properties, 0, nullptr, &n_devices)); ASSERT_NE(n_devices, 0); std::vector sub_devices(n_devices); ASSERT_SUCCESS(urDevicePartition( - device, &properties, 1, static_cast(sub_devices.size()), + device, &properties, static_cast(sub_devices.size()), sub_devices.data(), nullptr)); for (auto sub_device : sub_devices) { @@ -202,11 +220,17 @@ INSTANTIATE_TEST_SUITE_P( }); TEST_F(urDevicePartitionTest, InvalidNullHandleDevice) { - ur_device_partition_desc_t props = uur::makePartitionEquallyDesc(1); + ur_device_partition_property_t prop = uur::makePartitionEquallyDesc(1); + ur_device_partition_properties_t properties{ + UR_STRUCTURE_TYPE_DEVICE_PARTITION_PROPERTIES, + nullptr, + &prop, + 1, + }; ur_device_handle_t sub_device = nullptr; ASSERT_EQ_RESULT( UR_RESULT_ERROR_INVALID_NULL_HANDLE, - urDevicePartition(nullptr, &props, 1, 1, &sub_device, nullptr)); + urDevicePartition(nullptr, &properties, 1, &sub_device, nullptr)); } TEST_F(urDevicePartitionTest, InvalidNullPointerProperties) { @@ -214,17 +238,7 @@ TEST_F(urDevicePartitionTest, InvalidNullPointerProperties) { ur_device_handle_t sub_device = nullptr; ASSERT_EQ_RESULT( UR_RESULT_ERROR_INVALID_NULL_POINTER, - urDevicePartition(device, nullptr, 1, 1, &sub_device, nullptr)); - } -} - -TEST_F(urDevicePartitionTest, InvalidSizeDescCount) { - ur_device_partition_desc_t props = uur::makePartitionEquallyDesc(1); - for (auto device : devices) { - ur_device_handle_t sub_device = nullptr; - ASSERT_EQ_RESULT( - UR_RESULT_ERROR_INVALID_SIZE, - urDevicePartition(device, &props, 0, 1, &sub_device, nullptr)); + urDevicePartition(device, nullptr, 1, &sub_device, nullptr)); } } @@ -242,22 +256,26 @@ TEST_F(urDevicePartitionTest, SuccessSubSet) { ASSERT_NO_FATAL_FAILURE(getNumberComputeUnits(device, n_compute_units)); // partition for 1 compute unit per sub-device - ur_device_partition_desc_t properties = - uur::makePartitionEquallyDesc(1); + ur_device_partition_property_t prop = uur::makePartitionEquallyDesc(1); + ur_device_partition_properties_t properties{ + UR_STRUCTURE_TYPE_DEVICE_PARTITION_PROPERTIES, + nullptr, + &prop, + 1, + }; // Get the number of devices that will be created uint32_t n_devices; ASSERT_SUCCESS( - urDevicePartition(device, &properties, 1, 0, nullptr, &n_devices)); + urDevicePartition(device, &properties, 0, nullptr, &n_devices)); ASSERT_NE(n_devices, 0); // We can request only a subset of these devices from [0, n_devices] for (size_t subset = 0; subset <= n_devices; ++subset) { std::vector sub_devices(subset); - ASSERT_SUCCESS( - urDevicePartition(device, &properties, 1, - static_cast(sub_devices.size()), - sub_devices.data(), nullptr)); + ASSERT_SUCCESS(urDevicePartition( + device, &properties, static_cast(sub_devices.size()), + sub_devices.data(), nullptr)); for (auto sub_device : sub_devices) { ASSERT_NE(sub_device, nullptr); ASSERT_SUCCESS(urDeviceRelease(sub_device)); diff --git a/test/conformance/device/urDeviceRelease.cpp b/test/conformance/device/urDeviceRelease.cpp index 57c185ec21..1ed9828d3e 100644 --- a/test/conformance/device/urDeviceRelease.cpp +++ b/test/conformance/device/urDeviceRelease.cpp @@ -25,12 +25,18 @@ TEST_F(urDeviceReleaseTest, Success) { TEST_F(urDeviceReleaseTest, SuccessSubdevices) { for (auto device : devices) { - ur_device_partition_desc_t properties = - uur::makePartitionEquallyDesc(1); + ur_device_partition_property_t prop = uur::makePartitionEquallyDesc(1); + + ur_device_partition_properties_t properties{ + UR_STRUCTURE_TYPE_DEVICE_PARTITION_PROPERTIES, + nullptr, + &prop, + 1, + }; ur_device_handle_t sub_device; ASSERT_SUCCESS( - urDevicePartition(device, &properties, 1, 1, &sub_device, nullptr)); + urDevicePartition(device, &properties, 1, &sub_device, nullptr)); ASSERT_SUCCESS(urDeviceRetain(sub_device)); diff --git a/test/conformance/device/urDeviceRetain.cpp b/test/conformance/device/urDeviceRetain.cpp index 0f4ccfce35..7e5a31d084 100644 --- a/test/conformance/device/urDeviceRetain.cpp +++ b/test/conformance/device/urDeviceRetain.cpp @@ -26,11 +26,17 @@ TEST_F(urDeviceRetainTest, Success) { TEST_F(urDeviceRetainTest, SuccessSubdevices) { for (auto device : devices) { - auto properties = uur::makePartitionEquallyDesc(1); + ur_device_partition_property_t prop = uur::makePartitionEquallyDesc(1); + ur_device_partition_properties_t properties{ + UR_STRUCTURE_TYPE_DEVICE_PARTITION_PROPERTIES, + nullptr, + &prop, + 1, + }; ur_device_handle_t sub_device; ASSERT_SUCCESS( - urDevicePartition(device, &properties, 1, 1, &sub_device, nullptr)); + urDevicePartition(device, &properties, 1, &sub_device, nullptr)); uint32_t prevRefCount = 0; ASSERT_SUCCESS(uur::GetObjectReferenceCount(sub_device, prevRefCount)); diff --git a/test/conformance/testing/include/uur/utils.h b/test/conformance/testing/include/uur/utils.h index 9b8744c209..c0d092dc2d 100644 --- a/test/conformance/testing/include/uur/utils.h +++ b/test/conformance/testing/include/uur/utils.h @@ -318,7 +318,7 @@ GetDevicePartitionAffinityDomainFlags(ur_device_handle_t device, ur_device_affinity_domain_flags_t &flags); ur_result_t GetDevicePartitionType(ur_device_handle_t device, - std::vector &type); + std::vector &type); ur_result_t GetDeviceMaxNumberSubGroups(ur_device_handle_t device, uint32_t &max_sub_groups); ur_result_t @@ -365,11 +365,11 @@ ur_result_t GetDeviceMaxComputeQueueIndices(ur_device_handle_t device, ur_result_t GetDeviceHostPipeRWSupported(ur_device_handle_t device, bool &support); -ur_device_partition_desc_t makePartitionByCountsDesc(uint32_t count); +ur_device_partition_property_t makePartitionByCountsDesc(uint32_t count); -ur_device_partition_desc_t makePartitionEquallyDesc(uint32_t cu_per_device); +ur_device_partition_property_t makePartitionEquallyDesc(uint32_t cu_per_device); -ur_device_partition_desc_t +ur_device_partition_property_t makePartitionByAffinityDomain(ur_device_affinity_domain_flags_t aff_domain); } // namespace uur diff --git a/test/conformance/testing/source/utils.cpp b/test/conformance/testing/source/utils.cpp index 8511129957..e393f93693 100644 --- a/test/conformance/testing/source/utils.cpp +++ b/test/conformance/testing/source/utils.cpp @@ -500,8 +500,8 @@ ur_result_t GetDevicePartitionAffinityDomainFlags( ur_result_t GetDevicePartitionType(ur_device_handle_t device, - std::vector &type) { - return GetDeviceVectorInfo( + std::vector &type) { + return GetDeviceVectorInfo( device, UR_DEVICE_INFO_PARTITION_TYPE, type); } @@ -633,29 +633,24 @@ ur_result_t GetDeviceHostPipeRWSupported(ur_device_handle_t device, device, UR_DEVICE_INFO_HOST_PIPE_READ_WRITE_SUPPORTED, support); } -ur_device_partition_desc_t makePartitionByCountsDesc(uint32_t count) { - ur_device_partition_desc_t desc; - desc.stype = UR_STRUCTURE_TYPE_DEVICE_PARTITION_DESC; - desc.pNext = nullptr; +ur_device_partition_property_t makePartitionByCountsDesc(uint32_t count) { + ur_device_partition_property_t desc; desc.type = UR_DEVICE_PARTITION_BY_COUNTS; desc.value.count = count; return desc; } -ur_device_partition_desc_t makePartitionEquallyDesc(uint32_t cu_per_device) { - ur_device_partition_desc_t desc; - desc.stype = UR_STRUCTURE_TYPE_DEVICE_PARTITION_DESC; - desc.pNext = nullptr; +ur_device_partition_property_t +makePartitionEquallyDesc(uint32_t cu_per_device) { + ur_device_partition_property_t desc; desc.type = UR_DEVICE_PARTITION_EQUALLY; desc.value.equally = cu_per_device; return desc; } -ur_device_partition_desc_t +ur_device_partition_property_t makePartitionByAffinityDomain(ur_device_affinity_domain_flags_t aff_domain) { - ur_device_partition_desc_t desc; - desc.stype = UR_STRUCTURE_TYPE_DEVICE_PARTITION_DESC; - desc.pNext = nullptr; + ur_device_partition_property_t desc; desc.type = UR_DEVICE_PARTITION_BY_AFFINITY_DOMAIN; desc.value.affinity_domain = aff_domain; return desc; From ff14278a68fa7ee50704289f5c834df443581ee0 Mon Sep 17 00:00:00 2001 From: Petr Vesely Date: Thu, 25 May 2023 12:09:07 +0100 Subject: [PATCH 3/6] [UR] Fix device partitioning programming guide --- scripts/core/PROG.rst | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/scripts/core/PROG.rst b/scripts/core/PROG.rst index 81aa472039..bbb2209cb9 100644 --- a/scripts/core/PROG.rst +++ b/scripts/core/PROG.rst @@ -117,19 +117,23 @@ fixed part of the parent device, which can explicitly be programmed individually .. parsed-literal:: ${x}_device_handle_t hDevice; - ${x}_device_partition_desc_t properties; - properties.stype = ${X}_STRUCTURE_TYPE_DEVICE_PARTITION_DESC; - properties.pNext = nullptr; - properties.type = ${X}_DEVICE_PARTITION_BY_AFFINITY_DOMAIN; - properties.value.affinity_domain = ${X}_DEVICE_AFFINITY_DOMAIN_FLAG_NEXT_PARTITIONABLE; + ${x}_device_partition_property_t prop; + prop.value.affinity_domain = ${X}_DEVICE_AFFINITY_DOMAIN_FLAG_NEXT_PARTITIONABLE; + + ur_device_partition_properties_t properties{ + ${X}_STRUCTURE_TYPE_DEVICE_PARTITION_PROPERTIES, + nullptr, + &prop, + 1, + }; uint32_t count = 0; std::vector<${x}_device_handle_t> subDevices; - ${x}DevicePartition(hDevice, &properties, 1, 0, nullptr, &count); + ${x}DevicePartition(hDevice, &properties, 0, nullptr, &count); if (count > 0) { subDevices.resize(count); - ${x}DevicePartition(Device, &properties, 1, count, &subDevices.data(), nullptr); + ${x}DevicePartition(Device, &properties, count, &subDevices.data(), nullptr); } The returned sub-devices may be requested for further partitioning into sub-sub-devices, and so on. @@ -138,7 +142,7 @@ An implementation will return "0" in the count if no further partitioning is sup .. parsed-literal:: uint32_t count; - ${x}DevicePartition(subDevices[0], &properties, 1, 0, nullptr, &count); + ${x}DevicePartition(subDevices[0], &properties, 0, nullptr, &count); if(count == 0){ // no further partitioning allowed } From b4cba17164b9253c374321f95aa393946a4b5222 Mon Sep 17 00:00:00 2001 From: Petr Vesely Date: Thu, 25 May 2023 12:22:36 +0100 Subject: [PATCH 4/6] [UR] Fix build and spec --- include/ur.py | 2 +- include/ur_api.h | 2 +- scripts/core/device.yml | 6 +++--- source/adapters/null/ur_nullddi.cpp | 2 +- source/common/ur_params.hpp | 2 +- source/loader/layers/tracing/ur_trcddi.cpp | 2 +- source/loader/layers/validation/ur_valddi.cpp | 2 +- source/loader/ur_ldrddi.cpp | 2 +- source/loader/ur_libapi.cpp | 2 +- source/ur_api.cpp | 2 +- 10 files changed, 12 insertions(+), 12 deletions(-) diff --git a/include/ur.py b/include/ur.py index 32e18a4006..ccbf6fd990 100644 --- a/include/ur.py +++ b/include/ur.py @@ -639,7 +639,7 @@ class ur_device_partition_properties_t(Structure): ("stype", ur_structure_type_t), ## [in] type of this structure, must be ## ::UR_STRUCTURE_TYPE_DEVICE_PARTITION_PROPERTIES ("pNext", c_void_p), ## [in,out][optional] pointer to extension-specific structure - ("pProperties", *), ## [in] Pointer to the beginning of the properties array. + ("pProperties", POINTER(ur_device_partition_property_t)), ## [in] Pointer to the beginning of the properties array. ("PropCount", c_size_t) ## [in] The length of properties pointed to by `pProperties`. ] diff --git a/include/ur_api.h b/include/ur_api.h index 0e60857040..b383ffbee5 100644 --- a/include/ur_api.h +++ b/include/ur_api.h @@ -1116,7 +1116,7 @@ typedef struct ur_device_partition_properties_t { UR_APIEXPORT ur_result_t UR_APICALL urDevicePartition( ur_device_handle_t hDevice, ///< [in] handle of the device to partition. - const ur_device_partition_properties_t *pProperties, ///< [in] Array of partition descriptors. + const ur_device_partition_properties_t *pProperties, ///< [in] Device partition properties. uint32_t NumDevices, ///< [in] the number of sub-devices. ur_device_handle_t *phSubDevices, ///< [out][optional][range(0, NumDevices)] array of handle of devices. ///< If NumDevices is less than the number of sub-devices available, then diff --git a/scripts/core/device.yml b/scripts/core/device.yml index 3f42af2e46..9407f418e2 100644 --- a/scripts/core/device.yml +++ b/scripts/core/device.yml @@ -540,7 +540,7 @@ name: $x_device_partition_properties_t class: $xDevice base: $x_base_properties_t members: - - type: const $x_device_partition_property_t * + - type: const $x_device_partition_property_t* name: pProperties desc: "[in] Pointer to the beginning of the properties array." - type: size_t @@ -565,10 +565,10 @@ params: name: hDevice desc: | [in] handle of the device to partition. - - type: const $x_device_partition_properties_t* + - type: "const $x_device_partition_properties_t*" name: pProperties desc: | - [in] Array of partition descriptors. + [in] Device partition properties. - type: "uint32_t" name: NumDevices desc: | diff --git a/source/adapters/null/ur_nullddi.cpp b/source/adapters/null/ur_nullddi.cpp index f0ed60d363..3e60c6f4af 100644 --- a/source/adapters/null/ur_nullddi.cpp +++ b/source/adapters/null/ur_nullddi.cpp @@ -354,7 +354,7 @@ __urdlllocal ur_result_t UR_APICALL urDeviceRelease( __urdlllocal ur_result_t UR_APICALL urDevicePartition( ur_device_handle_t hDevice, ///< [in] handle of the device to partition. const ur_device_partition_properties_t - *pProperties, ///< [in] Array of partition descriptors. + *pProperties, ///< [in] Device partition properties. uint32_t NumDevices, ///< [in] the number of sub-devices. ur_device_handle_t * phSubDevices, ///< [out][optional][range(0, NumDevices)] array of handle of devices. diff --git a/source/common/ur_params.hpp b/source/common/ur_params.hpp index 6ee62aaec5..3a2e0c4977 100644 --- a/source/common/ur_params.hpp +++ b/source/common/ur_params.hpp @@ -3484,7 +3484,7 @@ operator<<(std::ostream &os, os << ", "; os << ".pProperties = "; - os << (params.pProperties); + ur_params::serializePtr(os, (params.pProperties)); os << ", "; os << ".PropCount = "; diff --git a/source/loader/layers/tracing/ur_trcddi.cpp b/source/loader/layers/tracing/ur_trcddi.cpp index 239bd699b0..0c4ae0c7b6 100644 --- a/source/loader/layers/tracing/ur_trcddi.cpp +++ b/source/loader/layers/tracing/ur_trcddi.cpp @@ -396,7 +396,7 @@ __urdlllocal ur_result_t UR_APICALL urDeviceRelease( __urdlllocal ur_result_t UR_APICALL urDevicePartition( ur_device_handle_t hDevice, ///< [in] handle of the device to partition. const ur_device_partition_properties_t - *pProperties, ///< [in] Array of partition descriptors. + *pProperties, ///< [in] Device partition properties. uint32_t NumDevices, ///< [in] the number of sub-devices. ur_device_handle_t * phSubDevices, ///< [out][optional][range(0, NumDevices)] array of handle of devices. diff --git a/source/loader/layers/validation/ur_valddi.cpp b/source/loader/layers/validation/ur_valddi.cpp index d595a9872c..b130013c24 100644 --- a/source/loader/layers/validation/ur_valddi.cpp +++ b/source/loader/layers/validation/ur_valddi.cpp @@ -416,7 +416,7 @@ __urdlllocal ur_result_t UR_APICALL urDeviceRelease( __urdlllocal ur_result_t UR_APICALL urDevicePartition( ur_device_handle_t hDevice, ///< [in] handle of the device to partition. const ur_device_partition_properties_t - *pProperties, ///< [in] Array of partition descriptors. + *pProperties, ///< [in] Device partition properties. uint32_t NumDevices, ///< [in] the number of sub-devices. ur_device_handle_t * phSubDevices, ///< [out][optional][range(0, NumDevices)] array of handle of devices. diff --git a/source/loader/ur_ldrddi.cpp b/source/loader/ur_ldrddi.cpp index b33e9bfa12..87bef6a410 100644 --- a/source/loader/ur_ldrddi.cpp +++ b/source/loader/ur_ldrddi.cpp @@ -462,7 +462,7 @@ __urdlllocal ur_result_t UR_APICALL urDeviceRelease( __urdlllocal ur_result_t UR_APICALL urDevicePartition( ur_device_handle_t hDevice, ///< [in] handle of the device to partition. const ur_device_partition_properties_t - *pProperties, ///< [in] Array of partition descriptors. + *pProperties, ///< [in] Device partition properties. uint32_t NumDevices, ///< [in] the number of sub-devices. ur_device_handle_t * phSubDevices, ///< [out][optional][range(0, NumDevices)] array of handle of devices. diff --git a/source/loader/ur_libapi.cpp b/source/loader/ur_libapi.cpp index b7bb8c0286..7f4b86588f 100644 --- a/source/loader/ur_libapi.cpp +++ b/source/loader/ur_libapi.cpp @@ -553,7 +553,7 @@ ur_result_t UR_APICALL urDeviceRelease( ur_result_t UR_APICALL urDevicePartition( ur_device_handle_t hDevice, ///< [in] handle of the device to partition. const ur_device_partition_properties_t - *pProperties, ///< [in] Array of partition descriptors. + *pProperties, ///< [in] Device partition properties. uint32_t NumDevices, ///< [in] the number of sub-devices. ur_device_handle_t * phSubDevices, ///< [out][optional][range(0, NumDevices)] array of handle of devices. diff --git a/source/ur_api.cpp b/source/ur_api.cpp index 2e53de4300..f71f209d1f 100644 --- a/source/ur_api.cpp +++ b/source/ur_api.cpp @@ -459,7 +459,7 @@ ur_result_t UR_APICALL urDeviceRelease( ur_result_t UR_APICALL urDevicePartition( ur_device_handle_t hDevice, ///< [in] handle of the device to partition. const ur_device_partition_properties_t - *pProperties, ///< [in] Array of partition descriptors. + *pProperties, ///< [in] Device partition properties. uint32_t NumDevices, ///< [in] the number of sub-devices. ur_device_handle_t * phSubDevices, ///< [out][optional][range(0, NumDevices)] array of handle of devices. From 43d4b862cbec200fa105847d0886a8eb35edf124 Mon Sep 17 00:00:00 2001 From: Petr Vesely Date: Thu, 25 May 2023 13:34:22 +0100 Subject: [PATCH 5/6] [UR] Remove BY_COUNTS_LIST_END --- include/ur.py | 1 - include/ur_api.h | 1 - scripts/core/device.yml | 3 --- source/common/ur_params.hpp | 4 ---- 4 files changed, 9 deletions(-) diff --git a/include/ur.py b/include/ur.py index ccbf6fd990..5fd1c91707 100644 --- a/include/ur.py +++ b/include/ur.py @@ -603,7 +603,6 @@ def __str__(self): class ur_device_partition_v(IntEnum): EQUALLY = 0x1086 ## Partition Equally BY_COUNTS = 0x1087 ## Partition by counts - BY_COUNTS_LIST_END = 0x0 ## End of by counts list BY_AFFINITY_DOMAIN = 0x1088 ## Partition by affinity domain BY_CSLICE = 0x1089 ## Partition by c-slice diff --git a/include/ur_api.h b/include/ur_api.h index b383ffbee5..803b92fc6e 100644 --- a/include/ur_api.h +++ b/include/ur_api.h @@ -1047,7 +1047,6 @@ typedef enum ur_device_affinity_domain_flag_t { typedef enum ur_device_partition_t { UR_DEVICE_PARTITION_EQUALLY = 0x1086, ///< Partition Equally UR_DEVICE_PARTITION_BY_COUNTS = 0x1087, ///< Partition by counts - UR_DEVICE_PARTITION_BY_COUNTS_LIST_END = 0x0, ///< End of by counts list UR_DEVICE_PARTITION_BY_AFFINITY_DOMAIN = 0x1088, ///< Partition by affinity domain UR_DEVICE_PARTITION_BY_CSLICE = 0x1089, ///< Partition by c-slice /// @cond diff --git a/scripts/core/device.yml b/scripts/core/device.yml index 9407f418e2..b74cb28d94 100644 --- a/scripts/core/device.yml +++ b/scripts/core/device.yml @@ -497,9 +497,6 @@ etors: - name: BY_COUNTS desc: "Partition by counts" value: "0x1087" - - name: BY_COUNTS_LIST_END - desc: "End of by counts list" - value: "0x0" - name: BY_AFFINITY_DOMAIN desc: "Partition by affinity domain" value: "0x1088" diff --git a/source/common/ur_params.hpp b/source/common/ur_params.hpp index 3a2e0c4977..f4a68f9b4f 100644 --- a/source/common/ur_params.hpp +++ b/source/common/ur_params.hpp @@ -3411,10 +3411,6 @@ inline std::ostream &operator<<(std::ostream &os, os << "UR_DEVICE_PARTITION_BY_COUNTS"; break; - case UR_DEVICE_PARTITION_BY_COUNTS_LIST_END: - os << "UR_DEVICE_PARTITION_BY_COUNTS_LIST_END"; - break; - case UR_DEVICE_PARTITION_BY_AFFINITY_DOMAIN: os << "UR_DEVICE_PARTITION_BY_AFFINITY_DOMAIN"; break; From cf739e0a12992f2bfdc648f4c1f9c33c41e864f1 Mon Sep 17 00:00:00 2001 From: Petr Vesely Date: Tue, 30 May 2023 15:24:19 +0100 Subject: [PATCH 6/6] [UR] Rename UR_DEVICE_INFO_PARTITION_PROPERTIES --- include/ur.py | 2 +- include/ur_api.h | 2 +- scripts/core/device.yml | 2 +- source/common/ur_params.hpp | 6 +++--- test/conformance/device/urDeviceGetInfo.cpp | 2 +- test/conformance/testing/source/utils.cpp | 2 +- test/unit/utils/params.cpp | 4 ++-- 7 files changed, 10 insertions(+), 10 deletions(-) diff --git a/include/ur.py b/include/ur.py index 5fd1c91707..96176c34e2 100644 --- a/include/ur.py +++ b/include/ur.py @@ -503,7 +503,7 @@ class ur_device_info_v(IntEnum): PREFERRED_INTEROP_USER_SYNC = 74 ## [::ur_bool_t] prefer user synchronization when sharing object with ## other API PARENT_DEVICE = 75 ## [::ur_device_handle_t] return parent device handle - PARTITION_PROPERTIES = 76 ## [::ur_device_partition_t[]] Returns an array of partition types + SUPPORTED_PARTITIONS = 76 ## [::ur_device_partition_t[]] Returns an array of partition types ## supported by the device PARTITION_MAX_SUB_DEVICES = 77 ## [uint32_t] maximum number of sub-devices when the device is ## partitioned diff --git a/include/ur_api.h b/include/ur_api.h index 803b92fc6e..e3b14ab152 100644 --- a/include/ur_api.h +++ b/include/ur_api.h @@ -852,7 +852,7 @@ typedef enum ur_device_info_t { UR_DEVICE_INFO_PREFERRED_INTEROP_USER_SYNC = 74, ///< [::ur_bool_t] prefer user synchronization when sharing object with ///< other API UR_DEVICE_INFO_PARENT_DEVICE = 75, ///< [::ur_device_handle_t] return parent device handle - UR_DEVICE_INFO_PARTITION_PROPERTIES = 76, ///< [::ur_device_partition_t[]] Returns an array of partition types + UR_DEVICE_INFO_SUPPORTED_PARTITIONS = 76, ///< [::ur_device_partition_t[]] Returns an array of partition types ///< supported by the device UR_DEVICE_INFO_PARTITION_MAX_SUB_DEVICES = 77, ///< [uint32_t] maximum number of sub-devices when the device is ///< partitioned diff --git a/scripts/core/device.yml b/scripts/core/device.yml index b74cb28d94..16579318c8 100644 --- a/scripts/core/device.yml +++ b/scripts/core/device.yml @@ -300,7 +300,7 @@ etors: desc: "[$x_bool_t] prefer user synchronization when sharing object with other API" - name: PARENT_DEVICE desc: "[$x_device_handle_t] return parent device handle" - - name: PARTITION_PROPERTIES + - name: SUPPORTED_PARTITIONS desc: "[$x_device_partition_t[]] Returns an array of partition types supported by the device" - name: PARTITION_MAX_SUB_DEVICES desc: "[uint32_t] maximum number of sub-devices when the device is partitioned" diff --git a/source/common/ur_params.hpp b/source/common/ur_params.hpp index f4a68f9b4f..a17b9d88f1 100644 --- a/source/common/ur_params.hpp +++ b/source/common/ur_params.hpp @@ -1583,8 +1583,8 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_device_info_t value) { os << "UR_DEVICE_INFO_PARENT_DEVICE"; break; - case UR_DEVICE_INFO_PARTITION_PROPERTIES: - os << "UR_DEVICE_INFO_PARTITION_PROPERTIES"; + case UR_DEVICE_INFO_SUPPORTED_PARTITIONS: + os << "UR_DEVICE_INFO_SUPPORTED_PARTITIONS"; break; case UR_DEVICE_INFO_PARTITION_MAX_SUB_DEVICES: @@ -2750,7 +2750,7 @@ inline void serializeTagged(std::ostream &os, const void *ptr, os << ")"; } break; - case UR_DEVICE_INFO_PARTITION_PROPERTIES: { + case UR_DEVICE_INFO_SUPPORTED_PARTITIONS: { const ur_device_partition_t *tptr = (const ur_device_partition_t *)ptr; os << "{"; diff --git a/test/conformance/device/urDeviceGetInfo.cpp b/test/conformance/device/urDeviceGetInfo.cpp index 4d80e93810..c8b13dffc2 100644 --- a/test/conformance/device/urDeviceGetInfo.cpp +++ b/test/conformance/device/urDeviceGetInfo.cpp @@ -198,7 +198,7 @@ INSTANTIATE_TEST_SUITE_P( UR_DEVICE_INFO_PRINTF_BUFFER_SIZE, // UR_DEVICE_INFO_PREFERRED_INTEROP_USER_SYNC, // UR_DEVICE_INFO_PARENT_DEVICE, // - UR_DEVICE_INFO_PARTITION_PROPERTIES, // + UR_DEVICE_INFO_SUPPORTED_PARTITIONS, // UR_DEVICE_INFO_PARTITION_MAX_SUB_DEVICES, // UR_DEVICE_INFO_PARTITION_AFFINITY_DOMAIN, // UR_DEVICE_INFO_PARTITION_TYPE, // diff --git a/test/conformance/testing/source/utils.cpp b/test/conformance/testing/source/utils.cpp index e393f93693..e6a00235e7 100644 --- a/test/conformance/testing/source/utils.cpp +++ b/test/conformance/testing/source/utils.cpp @@ -483,7 +483,7 @@ ur_result_t GetDevicePartitionProperties(ur_device_handle_t device, std::vector &properties) { return GetDeviceVectorInfo( - device, UR_DEVICE_INFO_PARTITION_PROPERTIES, properties); + device, UR_DEVICE_INFO_SUPPORTED_PARTITIONS, properties); } ur_result_t GetDevicePartitionMaxSubDevices(ur_device_handle_t device, diff --git a/test/unit/utils/params.cpp b/test/unit/utils/params.cpp index 578ed544ae..7ee0739ace 100644 --- a/test/unit/utils/params.cpp +++ b/test/unit/utils/params.cpp @@ -243,14 +243,14 @@ struct UrDeviceGetInfoParamsPartitionArray : UrDeviceGetInfoParams { UR_DEVICE_PARTITION_BY_AFFINITY_DOMAIN, UR_DEVICE_PARTITION_BY_CSLICE}; UrDeviceGetInfoParamsPartitionArray() : UrDeviceGetInfoParams() { - propName = UR_DEVICE_INFO_PARTITION_PROPERTIES; + propName = UR_DEVICE_INFO_SUPPORTED_PARTITIONS; pPropValue = &props; propSize = sizeof(props); propSizeRet = sizeof(props); } const char *get_expected() { return ".hDevice = nullptr, .propName = " - "UR_DEVICE_INFO_PARTITION_PROPERTIES, .propSize " + "UR_DEVICE_INFO_SUPPORTED_PARTITIONS, .propSize " "= 12, .pPropValue = \\{UR_DEVICE_PARTITION_BY_COUNTS, " "UR_DEVICE_PARTITION_BY_AFFINITY_DOMAIN, " "UR_DEVICE_PARTITION_BY_CSLICE\\}, .pPropSizeRet = .+ "