From f39605035a73514b910f16363ab02c5fa91bb5c1 Mon Sep 17 00:00:00 2001 From: "Sabianin, Maksim" Date: Fri, 24 Feb 2023 05:08:46 -0800 Subject: [PATCH 1/3] [SYCL] Add forward declarations for Subgroup Shuffle functions --- sycl/include/sycl/detail/spirv.hpp | 109 ++++++++++++++++++----------- 1 file changed, 67 insertions(+), 42 deletions(-) diff --git a/sycl/include/sycl/detail/spirv.hpp b/sycl/include/sycl/detail/spirv.hpp index 4650d7a58bbbb..082154aa13adf 100644 --- a/sycl/include/sycl/detail/spirv.hpp +++ b/sycl/include/sycl/detail/spirv.hpp @@ -537,6 +537,48 @@ using EnableIfVectorShuffle = std::enable_if_t::value, T>; #endif // ifndef __NVPTX__ +// Bitcast shuffles can be implemented using a single SubgroupShuffle +// intrinsic, but require type-punning via an appropriate integer type +#ifndef __NVPTX__ +template +using EnableIfBitcastShuffle = + detail::enable_if_t::value && + (std::is_trivially_copyable::value && + (sizeof(T) == 1 || sizeof(T) == 2 || + sizeof(T) == 4 || sizeof(T) == 8)), + T>; +#else +template +using EnableIfBitcastShuffle = detail::enable_if_t< + !(std::is_integral::value && (sizeof(T) <= sizeof(int32_t))) && + !detail::is_vector_arithmetic::value && + (std::is_trivially_copyable::value && + (sizeof(T) == 1 || sizeof(T) == 2 || sizeof(T) == 4)), + T>; +#endif // ifndef __NVPTX__ + +// Generic shuffles may require multiple calls to SubgroupShuffle +// intrinsics, and should use the fewest shuffles possible: +// - Loop over 64-bit chunks until remaining bytes < 64-bit +// - At most one 32-bit, 16-bit and 8-bit chunk left over +#ifndef __NVPTX__ +template +using EnableIfGenericShuffle = + detail::enable_if_t::value && + !(std::is_trivially_copyable::value && + (sizeof(T) == 1 || sizeof(T) == 2 || + sizeof(T) == 4 || sizeof(T) == 8)), + T>; +#else +template +using EnableIfGenericShuffle = detail::enable_if_t< + !(std::is_integral::value && (sizeof(T) <= sizeof(int32_t))) && + !detail::is_vector_arithmetic::value && + !(std::is_trivially_copyable::value && + (sizeof(T) == 1 || sizeof(T) == 2 || sizeof(T) == 4)), + T>; +#endif + #ifdef __NVPTX__ inline uint32_t membermask() { // use a full mask as sync operations are required to be convergent and exited @@ -545,6 +587,31 @@ inline uint32_t membermask() { } #endif +// Forward declarations for template overloadings +template +EnableIfBitcastShuffle SubgroupShuffle(T x, id<1> local_id); + +template +EnableIfBitcastShuffle SubgroupShuffleXor(T x, id<1> local_id); + +template +EnableIfBitcastShuffle SubgroupShuffleDown(T x, id<1> local_id); + +template +EnableIfBitcastShuffle SubgroupShuffleUp(T x, id<1> local_id); + +template +EnableIfGenericShuffle SubgroupShuffle(T x, id<1> local_id); + +template +EnableIfGenericShuffle SubgroupShuffleXor(T x, id<1> local_id); + +template +EnableIfGenericShuffle SubgroupShuffleDown(T x, id<1> local_id); + +template +EnableIfGenericShuffle SubgroupShuffleUp(T x, id<1> local_id); + template EnableIfNativeShuffle SubgroupShuffle(T x, id<1> local_id) { #ifndef __NVPTX__ @@ -623,26 +690,6 @@ EnableIfVectorShuffle SubgroupShuffleUp(T x, uint32_t delta) { return result; } -// Bitcast shuffles can be implemented using a single SubgroupShuffle -// intrinsic, but require type-punning via an appropriate integer type -#ifndef __NVPTX__ -template -using EnableIfBitcastShuffle = - detail::enable_if_t::value && - (std::is_trivially_copyable::value && - (sizeof(T) == 1 || sizeof(T) == 2 || - sizeof(T) == 4 || sizeof(T) == 8)), - T>; -#else -template -using EnableIfBitcastShuffle = detail::enable_if_t< - !(std::is_integral::value && (sizeof(T) <= sizeof(int32_t))) && - !detail::is_vector_arithmetic::value && - (std::is_trivially_copyable::value && - (sizeof(T) == 1 || sizeof(T) == 2 || sizeof(T) == 4)), - T>; -#endif - template using ConvertToNativeShuffleType_t = select_cl_scalar_integral_unsigned_t; @@ -699,28 +746,6 @@ EnableIfBitcastShuffle SubgroupShuffleUp(T x, uint32_t delta) { return bit_cast(Result); } -// Generic shuffles may require multiple calls to SubgroupShuffle -// intrinsics, and should use the fewest shuffles possible: -// - Loop over 64-bit chunks until remaining bytes < 64-bit -// - At most one 32-bit, 16-bit and 8-bit chunk left over -#ifndef __NVPTX__ -template -using EnableIfGenericShuffle = - detail::enable_if_t::value && - !(std::is_trivially_copyable::value && - (sizeof(T) == 1 || sizeof(T) == 2 || - sizeof(T) == 4 || sizeof(T) == 8)), - T>; -#else -template -using EnableIfGenericShuffle = detail::enable_if_t< - !(std::is_integral::value && (sizeof(T) <= sizeof(int32_t))) && - !detail::is_vector_arithmetic::value && - !(std::is_trivially_copyable::value && - (sizeof(T) == 1 || sizeof(T) == 2 || sizeof(T) == 4)), - T>; -#endif - template EnableIfGenericShuffle SubgroupShuffle(T x, id<1> local_id) { T Result; From f568ca26447e03a494ca10ec5f6b382beb043a30 Mon Sep 17 00:00:00 2001 From: "Sabianin, Maksim" Date: Mon, 27 Feb 2023 03:55:23 -0800 Subject: [PATCH 2/3] [SYCL] use std::enable_if_t instead of detail::enable_if_t --- sycl/include/sycl/detail/spirv.hpp | 39 +++++++++++++++--------------- 1 file changed, 20 insertions(+), 19 deletions(-) diff --git a/sycl/include/sycl/detail/spirv.hpp b/sycl/include/sycl/detail/spirv.hpp index 082154aa13adf..d81a0df4a55b9 100644 --- a/sycl/include/sycl/detail/spirv.hpp +++ b/sycl/include/sycl/detail/spirv.hpp @@ -542,19 +542,20 @@ using EnableIfVectorShuffle = #ifndef __NVPTX__ template using EnableIfBitcastShuffle = - detail::enable_if_t::value && - (std::is_trivially_copyable::value && - (sizeof(T) == 1 || sizeof(T) == 2 || - sizeof(T) == 4 || sizeof(T) == 8)), - T>; + std::enable_if_t && + (std::is_trivially_copyable_v && + (sizeof(T) == 1 || sizeof(T) == 2 || sizeof(T) == 4 || + sizeof(T) == 8)), + T>; #else template -using EnableIfBitcastShuffle = detail::enable_if_t< - !(std::is_integral::value && (sizeof(T) <= sizeof(int32_t))) && - !detail::is_vector_arithmetic::value && - (std::is_trivially_copyable::value && - (sizeof(T) == 1 || sizeof(T) == 2 || sizeof(T) == 4)), - T>; +using EnableIfBitcastShuffle = + std::enable_if_t && + (sizeof(T) <= sizeof(int32_t))) && + !detail::is_vector_arithmetic_v && + (std::is_trivially_copyable_v && + (sizeof(T) == 1 || sizeof(T) == 2 || sizeof(T) == 4)), + T>; #endif // ifndef __NVPTX__ // Generic shuffles may require multiple calls to SubgroupShuffle @@ -564,17 +565,17 @@ using EnableIfBitcastShuffle = detail::enable_if_t< #ifndef __NVPTX__ template using EnableIfGenericShuffle = - detail::enable_if_t::value && - !(std::is_trivially_copyable::value && - (sizeof(T) == 1 || sizeof(T) == 2 || - sizeof(T) == 4 || sizeof(T) == 8)), - T>; + std::enable_if_t && + !(std::is_trivially_copyable_v && + (sizeof(T) == 1 || sizeof(T) == 2 || + sizeof(T) == 4 || sizeof(T) == 8)), + T>; #else template -using EnableIfGenericShuffle = detail::enable_if_t< +using EnableIfGenericShuffle = std::enable_if_t< !(std::is_integral::value && (sizeof(T) <= sizeof(int32_t))) && - !detail::is_vector_arithmetic::value && - !(std::is_trivially_copyable::value && + !detail::is_vector_arithmetic_v && + !(std::is_trivially_copyable_v && (sizeof(T) == 1 || sizeof(T) == 2 || sizeof(T) == 4)), T>; #endif From b9bde57dd23fa0468e129ac00299a5a5e7486137 Mon Sep 17 00:00:00 2001 From: "Sabianin, Maksim" Date: Mon, 27 Feb 2023 05:09:00 -0800 Subject: [PATCH 3/3] [SYCL] use detail::is_arithmetic::value instead of detail::is_arithmetic_v --- sycl/include/sycl/detail/spirv.hpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/sycl/include/sycl/detail/spirv.hpp b/sycl/include/sycl/detail/spirv.hpp index d81a0df4a55b9..a20eec899d95a 100644 --- a/sycl/include/sycl/detail/spirv.hpp +++ b/sycl/include/sycl/detail/spirv.hpp @@ -542,7 +542,7 @@ using EnableIfVectorShuffle = #ifndef __NVPTX__ template using EnableIfBitcastShuffle = - std::enable_if_t && + std::enable_if_t::value && (std::is_trivially_copyable_v && (sizeof(T) == 1 || sizeof(T) == 2 || sizeof(T) == 4 || sizeof(T) == 8)), @@ -552,7 +552,7 @@ template using EnableIfBitcastShuffle = std::enable_if_t && (sizeof(T) <= sizeof(int32_t))) && - !detail::is_vector_arithmetic_v && + !detail::is_vector_arithmetic::value && (std::is_trivially_copyable_v && (sizeof(T) == 1 || sizeof(T) == 2 || sizeof(T) == 4)), T>; @@ -565,7 +565,7 @@ using EnableIfBitcastShuffle = #ifndef __NVPTX__ template using EnableIfGenericShuffle = - std::enable_if_t && + std::enable_if_t::value && !(std::is_trivially_copyable_v && (sizeof(T) == 1 || sizeof(T) == 2 || sizeof(T) == 4 || sizeof(T) == 8)), @@ -574,7 +574,7 @@ using EnableIfGenericShuffle = template using EnableIfGenericShuffle = std::enable_if_t< !(std::is_integral::value && (sizeof(T) <= sizeof(int32_t))) && - !detail::is_vector_arithmetic_v && + !detail::is_vector_arithmetic::value && !(std::is_trivially_copyable_v && (sizeof(T) == 1 || sizeof(T) == 2 || sizeof(T) == 4)), T>;