From 261fde43e168aa81342d593a1591be0f784d1cde Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Thu, 9 Jun 2022 04:55:48 -0700 Subject: [PATCH 1/3] [SYCL][CUDA][HIP][libclc] Rename libclc NonUniform binops to KHR extensions https://github.com/intel/llvm/pull/5705 changed the use of NonUniform group binary operators to the KHR extension operators. These changes rename the implementation of these in libclc for both amdgcn and PTX to the new naming. Signed-off-by: Larsen, Steffen --- .../libspirv/group/collectives.cl | 20 ++++---- .../libspirv/group/collectives.cl | 47 +++++++++---------- 2 files changed, 33 insertions(+), 34 deletions(-) diff --git a/libclc/amdgcn-amdhsa/libspirv/group/collectives.cl b/libclc/amdgcn-amdhsa/libspirv/group/collectives.cl index 72d9e1971ec58..d9eef4163edc8 100644 --- a/libclc/amdgcn-amdhsa/libspirv/group/collectives.cl +++ b/libclc/amdgcn-amdhsa/libspirv/group/collectives.cl @@ -235,16 +235,16 @@ __CLC_GROUP_COLLECTIVE(FAdd, __CLC_ADD, float, 0) __CLC_GROUP_COLLECTIVE(FAdd, __CLC_ADD, double, 0) // There is no Mul group op in SPIR-V, use non-uniform variant instead. -__CLC_GROUP_COLLECTIVE(NonUniformIMul, IMul, __CLC_MUL, char, 1) -__CLC_GROUP_COLLECTIVE(NonUniformIMul, IMul, __CLC_MUL, uchar, 1) -__CLC_GROUP_COLLECTIVE(NonUniformIMul, IMul, __CLC_MUL, short, 1) -__CLC_GROUP_COLLECTIVE(NonUniformIMul, IMul, __CLC_MUL, ushort, 1) -__CLC_GROUP_COLLECTIVE(NonUniformIMul, IMul, __CLC_MUL, int, 1) -__CLC_GROUP_COLLECTIVE(NonUniformIMul, IMul, __CLC_MUL, uint, 1) -__CLC_GROUP_COLLECTIVE(NonUniformIMul, IMul, __CLC_MUL, long, 1) -__CLC_GROUP_COLLECTIVE(NonUniformIMul, IMul, __CLC_MUL, ulong, 1) -__CLC_GROUP_COLLECTIVE(NonUniformFMul, FMul, __CLC_MUL, float, 1) -__CLC_GROUP_COLLECTIVE(NonUniformFMul, FMul, __CLC_MUL, double, 1) +__CLC_GROUP_COLLECTIVE(IMulKHR, IMul, __CLC_MUL, char, 1) +__CLC_GROUP_COLLECTIVE(IMulKHR, IMul, __CLC_MUL, uchar, 1) +__CLC_GROUP_COLLECTIVE(IMulKHR, IMul, __CLC_MUL, short, 1) +__CLC_GROUP_COLLECTIVE(IMulKHR, IMul, __CLC_MUL, ushort, 1) +__CLC_GROUP_COLLECTIVE(IMulKHR, IMul, __CLC_MUL, int, 1) +__CLC_GROUP_COLLECTIVE(IMulKHR, IMul, __CLC_MUL, uint, 1) +__CLC_GROUP_COLLECTIVE(IMulKHR, IMul, __CLC_MUL, long, 1) +__CLC_GROUP_COLLECTIVE(IMulKHR, IMul, __CLC_MUL, ulong, 1) +__CLC_GROUP_COLLECTIVE(FMulKHR, FMul, __CLC_MUL, float, 1) +__CLC_GROUP_COLLECTIVE(FMulKHR, FMul, __CLC_MUL, double, 1) __CLC_GROUP_COLLECTIVE(SMin, __CLC_MIN, char, CHAR_MAX) __CLC_GROUP_COLLECTIVE(UMin, __CLC_MIN, uchar, UCHAR_MAX) diff --git a/libclc/ptx-nvidiacl/libspirv/group/collectives.cl b/libclc/ptx-nvidiacl/libspirv/group/collectives.cl index 7d1761f31f6aa..65b8f5e7b9742 100644 --- a/libclc/ptx-nvidiacl/libspirv/group/collectives.cl +++ b/libclc/ptx-nvidiacl/libspirv/group/collectives.cl @@ -249,12 +249,12 @@ __CLC_SUBGROUP_COLLECTIVE(FMax, __CLC_MAX, half, -HALF_MAX) __CLC_SUBGROUP_COLLECTIVE(FMax, __CLC_MAX, float, -FLT_MAX) __CLC_SUBGROUP_COLLECTIVE(FMax, __CLC_MAX, double, -DBL_MAX) -__CLC_SUBGROUP_COLLECTIVE_REDUX(NonUniformBitwiseAnd, __CLC_AND, and, uint, ~0) -__CLC_SUBGROUP_COLLECTIVE_REDUX(NonUniformBitwiseOr, __CLC_OR, or, uint, 0) -__CLC_SUBGROUP_COLLECTIVE_REDUX(NonUniformBitwiseXor, __CLC_XOR, xor, uint, 0) -__CLC_SUBGROUP_COLLECTIVE_REDUX(NonUniformBitwiseAnd, __CLC_AND, and, int, ~0) -__CLC_SUBGROUP_COLLECTIVE_REDUX(NonUniformBitwiseOr, __CLC_OR, or, int, 0) -__CLC_SUBGROUP_COLLECTIVE_REDUX(NonUniformBitwiseXor, __CLC_XOR, xor, int, 0) +__CLC_SUBGROUP_COLLECTIVE_REDUX(BitwiseAndKHR, __CLC_AND, and, uint, ~0) +__CLC_SUBGROUP_COLLECTIVE_REDUX(BitwiseOrKHR, __CLC_OR, or, uint, 0) +__CLC_SUBGROUP_COLLECTIVE_REDUX(BitwiseXorKHR, __CLC_XOR, xor, uint, 0) +__CLC_SUBGROUP_COLLECTIVE_REDUX(BitwiseAndKHR, __CLC_AND, and, int, ~0) +__CLC_SUBGROUP_COLLECTIVE_REDUX(BitwiseOrKHR, __CLC_OR, or, int, 0) +__CLC_SUBGROUP_COLLECTIVE_REDUX(BitwiseXorKHR, __CLC_XOR, xor, int, 0) #undef __CLC_SUBGROUP_COLLECTIVE_BODY #undef __CLC_SUBGROUP_COLLECTIVE @@ -340,18 +340,17 @@ __CLC_GROUP_COLLECTIVE(FAdd, __CLC_ADD, half, 0) __CLC_GROUP_COLLECTIVE(FAdd, __CLC_ADD, float, 0) __CLC_GROUP_COLLECTIVE(FAdd, __CLC_ADD, double, 0) -// There is no Mul group op in SPIR-V, use non-uniform variant instead. -__CLC_GROUP_COLLECTIVE(NonUniformIMul, IMul, __CLC_MUL, char, 1) -__CLC_GROUP_COLLECTIVE(NonUniformIMul, IMul, __CLC_MUL, uchar, 1) -__CLC_GROUP_COLLECTIVE(NonUniformIMul, IMul, __CLC_MUL, short, 1) -__CLC_GROUP_COLLECTIVE(NonUniformIMul, IMul, __CLC_MUL, ushort, 1) -__CLC_GROUP_COLLECTIVE(NonUniformIMul, IMul, __CLC_MUL, int, 1) -__CLC_GROUP_COLLECTIVE(NonUniformIMul, IMul, __CLC_MUL, uint, 1) -__CLC_GROUP_COLLECTIVE(NonUniformIMul, IMul, __CLC_MUL, long, 1) -__CLC_GROUP_COLLECTIVE(NonUniformIMul, IMul, __CLC_MUL, ulong, 1) -__CLC_GROUP_COLLECTIVE(NonUniformFMul, FMul, __CLC_MUL, half, 1) -__CLC_GROUP_COLLECTIVE(NonUniformFMul, FMul, __CLC_MUL, float, 1) -__CLC_GROUP_COLLECTIVE(NonUniformFMul, FMul, __CLC_MUL, double, 1) +__CLC_GROUP_COLLECTIVE(IMulKHR, IMul, __CLC_MUL, char, 1) +__CLC_GROUP_COLLECTIVE(IMulKHR, IMul, __CLC_MUL, uchar, 1) +__CLC_GROUP_COLLECTIVE(IMulKHR, IMul, __CLC_MUL, short, 1) +__CLC_GROUP_COLLECTIVE(IMulKHR, IMul, __CLC_MUL, ushort, 1) +__CLC_GROUP_COLLECTIVE(IMulKHR, IMul, __CLC_MUL, int, 1) +__CLC_GROUP_COLLECTIVE(IMulKHR, IMul, __CLC_MUL, uint, 1) +__CLC_GROUP_COLLECTIVE(IMulKHR, IMul, __CLC_MUL, long, 1) +__CLC_GROUP_COLLECTIVE(IMulKHR, IMul, __CLC_MUL, ulong, 1) +__CLC_GROUP_COLLECTIVE(FMulKHR, FMul, __CLC_MUL, half, 1) +__CLC_GROUP_COLLECTIVE(FMulKHR, FMul, __CLC_MUL, float, 1) +__CLC_GROUP_COLLECTIVE(FMulKHR, FMul, __CLC_MUL, double, 1) __CLC_GROUP_COLLECTIVE(SMin, __CLC_MIN, char, CHAR_MAX) __CLC_GROUP_COLLECTIVE(UMin, __CLC_MIN, uchar, UCHAR_MAX) @@ -377,12 +376,12 @@ __CLC_GROUP_COLLECTIVE(FMax, __CLC_MAX, half, -HALF_MAX) __CLC_GROUP_COLLECTIVE(FMax, __CLC_MAX, float, -FLT_MAX) __CLC_GROUP_COLLECTIVE(FMax, __CLC_MAX, double, -DBL_MAX) -__CLC_GROUP_COLLECTIVE(NonUniformBitwiseAnd, __CLC_AND, uint, ~0) -__CLC_GROUP_COLLECTIVE(NonUniformBitwiseOr, __CLC_OR, uint, 0) -__CLC_GROUP_COLLECTIVE(NonUniformBitwiseXor, __CLC_XOR, uint, 0) -__CLC_GROUP_COLLECTIVE(NonUniformBitwiseAnd, __CLC_AND, int, ~0) -__CLC_GROUP_COLLECTIVE(NonUniformBitwiseOr, __CLC_OR, int, 0) -__CLC_GROUP_COLLECTIVE(NonUniformBitwiseXor, __CLC_XOR, int, 0) +__CLC_GROUP_COLLECTIVE(BitwiseAndKHR, __CLC_AND, uint, ~0) +__CLC_GROUP_COLLECTIVE(BitwiseOrKHR, __CLC_OR, uint, 0) +__CLC_GROUP_COLLECTIVE(BitwiseXorKHR, __CLC_XOR, uint, 0) +__CLC_GROUP_COLLECTIVE(BitwiseAndKHR, __CLC_AND, int, ~0) +__CLC_GROUP_COLLECTIVE(BitwiseOrKHR, __CLC_OR, int, 0) +__CLC_GROUP_COLLECTIVE(BitwiseXorKHR, __CLC_XOR, int, 0) // half requires additional mangled entry points _CLC_DEF _CLC_CONVERGENT half _Z17__spirv_GroupFAddjjDF16_(uint scope, uint op, From 893b5927ea871b414d3081fa0ab61212cee23498 Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Thu, 9 Jun 2022 07:20:01 -0700 Subject: [PATCH 2/3] Remove old comment Signed-off-by: Larsen, Steffen --- libclc/amdgcn-amdhsa/libspirv/group/collectives.cl | 1 - 1 file changed, 1 deletion(-) diff --git a/libclc/amdgcn-amdhsa/libspirv/group/collectives.cl b/libclc/amdgcn-amdhsa/libspirv/group/collectives.cl index d9eef4163edc8..8a8eb55e17fe8 100644 --- a/libclc/amdgcn-amdhsa/libspirv/group/collectives.cl +++ b/libclc/amdgcn-amdhsa/libspirv/group/collectives.cl @@ -234,7 +234,6 @@ __CLC_GROUP_COLLECTIVE(IAdd, __CLC_ADD, ulong, 0) __CLC_GROUP_COLLECTIVE(FAdd, __CLC_ADD, float, 0) __CLC_GROUP_COLLECTIVE(FAdd, __CLC_ADD, double, 0) -// There is no Mul group op in SPIR-V, use non-uniform variant instead. __CLC_GROUP_COLLECTIVE(IMulKHR, IMul, __CLC_MUL, char, 1) __CLC_GROUP_COLLECTIVE(IMulKHR, IMul, __CLC_MUL, uchar, 1) __CLC_GROUP_COLLECTIVE(IMulKHR, IMul, __CLC_MUL, short, 1) From 429ee0ab79b467ba1afa097a340c1debff93fa38 Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Thu, 9 Jun 2022 07:30:17 -0700 Subject: [PATCH 3/3] Rename CLC subgroup collectives Signed-off-by: Larsen, Steffen --- .../libspirv/group/collectives.cl | 40 ++++++++--------- .../libspirv/group/collectives.cl | 44 +++++++++---------- 2 files changed, 42 insertions(+), 42 deletions(-) diff --git a/libclc/amdgcn-amdhsa/libspirv/group/collectives.cl b/libclc/amdgcn-amdhsa/libspirv/group/collectives.cl index 8a8eb55e17fe8..365dbacd882e9 100644 --- a/libclc/amdgcn-amdhsa/libspirv/group/collectives.cl +++ b/libclc/amdgcn-amdhsa/libspirv/group/collectives.cl @@ -116,16 +116,16 @@ __CLC_SUBGROUP_COLLECTIVE(IAdd, __CLC_ADD, ulong, m, 0) __CLC_SUBGROUP_COLLECTIVE(FAdd, __CLC_ADD, float, f, 0) __CLC_SUBGROUP_COLLECTIVE(FAdd, __CLC_ADD, double, d, 0) -__CLC_SUBGROUP_COLLECTIVE(IMul, __CLC_MUL, char, a, 1) -__CLC_SUBGROUP_COLLECTIVE(IMul, __CLC_MUL, uchar, h, 1) -__CLC_SUBGROUP_COLLECTIVE(IMul, __CLC_MUL, short, s, 1) -__CLC_SUBGROUP_COLLECTIVE(IMul, __CLC_MUL, ushort, t, 1) -__CLC_SUBGROUP_COLLECTIVE(IMul, __CLC_MUL, int, i, 1) -__CLC_SUBGROUP_COLLECTIVE(IMul, __CLC_MUL, uint, j, 1) -__CLC_SUBGROUP_COLLECTIVE(IMul, __CLC_MUL, long, l, 1) -__CLC_SUBGROUP_COLLECTIVE(IMul, __CLC_MUL, ulong, m, 1) -__CLC_SUBGROUP_COLLECTIVE(FMul, __CLC_MUL, float, f, 1) -__CLC_SUBGROUP_COLLECTIVE(FMul, __CLC_MUL, double, d, 1) +__CLC_SUBGROUP_COLLECTIVE(IMulKHR, __CLC_MUL, char, a, 1) +__CLC_SUBGROUP_COLLECTIVE(IMulKHR, __CLC_MUL, uchar, h, 1) +__CLC_SUBGROUP_COLLECTIVE(IMulKHR, __CLC_MUL, short, s, 1) +__CLC_SUBGROUP_COLLECTIVE(IMulKHR, __CLC_MUL, ushort, t, 1) +__CLC_SUBGROUP_COLLECTIVE(IMulKHR, __CLC_MUL, int, i, 1) +__CLC_SUBGROUP_COLLECTIVE(IMulKHR, __CLC_MUL, uint, j, 1) +__CLC_SUBGROUP_COLLECTIVE(IMulKHR, __CLC_MUL, long, l, 1) +__CLC_SUBGROUP_COLLECTIVE(IMulKHR, __CLC_MUL, ulong, m, 1) +__CLC_SUBGROUP_COLLECTIVE(FMulKHR, __CLC_MUL, float, f, 1) +__CLC_SUBGROUP_COLLECTIVE(FMulKHR, __CLC_MUL, double, d, 1) __CLC_SUBGROUP_COLLECTIVE(SMin, __CLC_MIN, char, a, CHAR_MAX) __CLC_SUBGROUP_COLLECTIVE(UMin, __CLC_MIN, uchar, h, UCHAR_MAX) @@ -234,16 +234,16 @@ __CLC_GROUP_COLLECTIVE(IAdd, __CLC_ADD, ulong, 0) __CLC_GROUP_COLLECTIVE(FAdd, __CLC_ADD, float, 0) __CLC_GROUP_COLLECTIVE(FAdd, __CLC_ADD, double, 0) -__CLC_GROUP_COLLECTIVE(IMulKHR, IMul, __CLC_MUL, char, 1) -__CLC_GROUP_COLLECTIVE(IMulKHR, IMul, __CLC_MUL, uchar, 1) -__CLC_GROUP_COLLECTIVE(IMulKHR, IMul, __CLC_MUL, short, 1) -__CLC_GROUP_COLLECTIVE(IMulKHR, IMul, __CLC_MUL, ushort, 1) -__CLC_GROUP_COLLECTIVE(IMulKHR, IMul, __CLC_MUL, int, 1) -__CLC_GROUP_COLLECTIVE(IMulKHR, IMul, __CLC_MUL, uint, 1) -__CLC_GROUP_COLLECTIVE(IMulKHR, IMul, __CLC_MUL, long, 1) -__CLC_GROUP_COLLECTIVE(IMulKHR, IMul, __CLC_MUL, ulong, 1) -__CLC_GROUP_COLLECTIVE(FMulKHR, FMul, __CLC_MUL, float, 1) -__CLC_GROUP_COLLECTIVE(FMulKHR, FMul, __CLC_MUL, double, 1) +__CLC_GROUP_COLLECTIVE(IMulKHR, __CLC_MUL, char, 1) +__CLC_GROUP_COLLECTIVE(IMulKHR, __CLC_MUL, uchar, 1) +__CLC_GROUP_COLLECTIVE(IMulKHR, __CLC_MUL, short, 1) +__CLC_GROUP_COLLECTIVE(IMulKHR, __CLC_MUL, ushort, 1) +__CLC_GROUP_COLLECTIVE(IMulKHR, __CLC_MUL, int, 1) +__CLC_GROUP_COLLECTIVE(IMulKHR, __CLC_MUL, uint, 1) +__CLC_GROUP_COLLECTIVE(IMulKHR, __CLC_MUL, long, 1) +__CLC_GROUP_COLLECTIVE(IMulKHR, __CLC_MUL, ulong, 1) +__CLC_GROUP_COLLECTIVE(FMulKHR, __CLC_MUL, float, 1) +__CLC_GROUP_COLLECTIVE(FMulKHR, __CLC_MUL, double, 1) __CLC_GROUP_COLLECTIVE(SMin, __CLC_MIN, char, CHAR_MAX) __CLC_GROUP_COLLECTIVE(UMin, __CLC_MIN, uchar, UCHAR_MAX) diff --git a/libclc/ptx-nvidiacl/libspirv/group/collectives.cl b/libclc/ptx-nvidiacl/libspirv/group/collectives.cl index 65b8f5e7b9742..13b4f0d8c23c2 100644 --- a/libclc/ptx-nvidiacl/libspirv/group/collectives.cl +++ b/libclc/ptx-nvidiacl/libspirv/group/collectives.cl @@ -213,17 +213,17 @@ __CLC_SUBGROUP_COLLECTIVE(FAdd, __CLC_ADD, half, 0) __CLC_SUBGROUP_COLLECTIVE(FAdd, __CLC_ADD, float, 0) __CLC_SUBGROUP_COLLECTIVE(FAdd, __CLC_ADD, double, 0) -__CLC_SUBGROUP_COLLECTIVE(IMul, __CLC_MUL, char, 1) -__CLC_SUBGROUP_COLLECTIVE(IMul, __CLC_MUL, uchar, 1) -__CLC_SUBGROUP_COLLECTIVE(IMul, __CLC_MUL, short, 1) -__CLC_SUBGROUP_COLLECTIVE(IMul, __CLC_MUL, ushort, 1) -__CLC_SUBGROUP_COLLECTIVE(IMul, __CLC_MUL, int, 1) -__CLC_SUBGROUP_COLLECTIVE(IMul, __CLC_MUL, uint, 1) -__CLC_SUBGROUP_COLLECTIVE(IMul, __CLC_MUL, long, 1) -__CLC_SUBGROUP_COLLECTIVE(IMul, __CLC_MUL, ulong, 1) -__CLC_SUBGROUP_COLLECTIVE(FMul, __CLC_MUL, half, 1) -__CLC_SUBGROUP_COLLECTIVE(FMul, __CLC_MUL, float, 1) -__CLC_SUBGROUP_COLLECTIVE(FMul, __CLC_MUL, double, 1) +__CLC_SUBGROUP_COLLECTIVE(IMulKHR, __CLC_MUL, char, 1) +__CLC_SUBGROUP_COLLECTIVE(IMulKHR, __CLC_MUL, uchar, 1) +__CLC_SUBGROUP_COLLECTIVE(IMulKHR, __CLC_MUL, short, 1) +__CLC_SUBGROUP_COLLECTIVE(IMulKHR, __CLC_MUL, ushort, 1) +__CLC_SUBGROUP_COLLECTIVE(IMulKHR, __CLC_MUL, int, 1) +__CLC_SUBGROUP_COLLECTIVE(IMulKHR, __CLC_MUL, uint, 1) +__CLC_SUBGROUP_COLLECTIVE(IMulKHR, __CLC_MUL, long, 1) +__CLC_SUBGROUP_COLLECTIVE(IMulKHR, __CLC_MUL, ulong, 1) +__CLC_SUBGROUP_COLLECTIVE(FMulKHR, __CLC_MUL, half, 1) +__CLC_SUBGROUP_COLLECTIVE(FMulKHR, __CLC_MUL, float, 1) +__CLC_SUBGROUP_COLLECTIVE(FMulKHR, __CLC_MUL, double, 1) __CLC_SUBGROUP_COLLECTIVE(SMin, __CLC_MIN, char, CHAR_MAX) __CLC_SUBGROUP_COLLECTIVE(UMin, __CLC_MIN, uchar, UCHAR_MAX) @@ -340,17 +340,17 @@ __CLC_GROUP_COLLECTIVE(FAdd, __CLC_ADD, half, 0) __CLC_GROUP_COLLECTIVE(FAdd, __CLC_ADD, float, 0) __CLC_GROUP_COLLECTIVE(FAdd, __CLC_ADD, double, 0) -__CLC_GROUP_COLLECTIVE(IMulKHR, IMul, __CLC_MUL, char, 1) -__CLC_GROUP_COLLECTIVE(IMulKHR, IMul, __CLC_MUL, uchar, 1) -__CLC_GROUP_COLLECTIVE(IMulKHR, IMul, __CLC_MUL, short, 1) -__CLC_GROUP_COLLECTIVE(IMulKHR, IMul, __CLC_MUL, ushort, 1) -__CLC_GROUP_COLLECTIVE(IMulKHR, IMul, __CLC_MUL, int, 1) -__CLC_GROUP_COLLECTIVE(IMulKHR, IMul, __CLC_MUL, uint, 1) -__CLC_GROUP_COLLECTIVE(IMulKHR, IMul, __CLC_MUL, long, 1) -__CLC_GROUP_COLLECTIVE(IMulKHR, IMul, __CLC_MUL, ulong, 1) -__CLC_GROUP_COLLECTIVE(FMulKHR, FMul, __CLC_MUL, half, 1) -__CLC_GROUP_COLLECTIVE(FMulKHR, FMul, __CLC_MUL, float, 1) -__CLC_GROUP_COLLECTIVE(FMulKHR, FMul, __CLC_MUL, double, 1) +__CLC_GROUP_COLLECTIVE(IMulKHR, __CLC_MUL, char, 1) +__CLC_GROUP_COLLECTIVE(IMulKHR, __CLC_MUL, uchar, 1) +__CLC_GROUP_COLLECTIVE(IMulKHR, __CLC_MUL, short, 1) +__CLC_GROUP_COLLECTIVE(IMulKHR, __CLC_MUL, ushort, 1) +__CLC_GROUP_COLLECTIVE(IMulKHR, __CLC_MUL, int, 1) +__CLC_GROUP_COLLECTIVE(IMulKHR, __CLC_MUL, uint, 1) +__CLC_GROUP_COLLECTIVE(IMulKHR, __CLC_MUL, long, 1) +__CLC_GROUP_COLLECTIVE(IMulKHR, __CLC_MUL, ulong, 1) +__CLC_GROUP_COLLECTIVE(FMulKHR, __CLC_MUL, half, 1) +__CLC_GROUP_COLLECTIVE(FMulKHR, __CLC_MUL, float, 1) +__CLC_GROUP_COLLECTIVE(FMulKHR, __CLC_MUL, double, 1) __CLC_GROUP_COLLECTIVE(SMin, __CLC_MIN, char, CHAR_MAX) __CLC_GROUP_COLLECTIVE(UMin, __CLC_MIN, uchar, UCHAR_MAX)