Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 20 additions & 21 deletions libclc/amdgcn-amdhsa/libspirv/group/collectives.cl
Original file line number Diff line number Diff line change
Expand Up @@ -116,16 +116,16 @@ __CLC_SUBGROUP_COLLECTIVE(IAdd, __CLC_ADD, ulong, m, 0)
__CLC_SUBGROUP_COLLECTIVE(FAdd, __CLC_ADD, float, f, 0)
__CLC_SUBGROUP_COLLECTIVE(FAdd, __CLC_ADD, double, d, 0)

__CLC_SUBGROUP_COLLECTIVE(IMul, __CLC_MUL, char, a, 1)
__CLC_SUBGROUP_COLLECTIVE(IMul, __CLC_MUL, uchar, h, 1)
__CLC_SUBGROUP_COLLECTIVE(IMul, __CLC_MUL, short, s, 1)
__CLC_SUBGROUP_COLLECTIVE(IMul, __CLC_MUL, ushort, t, 1)
__CLC_SUBGROUP_COLLECTIVE(IMul, __CLC_MUL, int, i, 1)
__CLC_SUBGROUP_COLLECTIVE(IMul, __CLC_MUL, uint, j, 1)
__CLC_SUBGROUP_COLLECTIVE(IMul, __CLC_MUL, long, l, 1)
__CLC_SUBGROUP_COLLECTIVE(IMul, __CLC_MUL, ulong, m, 1)
__CLC_SUBGROUP_COLLECTIVE(FMul, __CLC_MUL, float, f, 1)
__CLC_SUBGROUP_COLLECTIVE(FMul, __CLC_MUL, double, d, 1)
__CLC_SUBGROUP_COLLECTIVE(IMulKHR, __CLC_MUL, char, a, 1)
__CLC_SUBGROUP_COLLECTIVE(IMulKHR, __CLC_MUL, uchar, h, 1)
__CLC_SUBGROUP_COLLECTIVE(IMulKHR, __CLC_MUL, short, s, 1)
__CLC_SUBGROUP_COLLECTIVE(IMulKHR, __CLC_MUL, ushort, t, 1)
__CLC_SUBGROUP_COLLECTIVE(IMulKHR, __CLC_MUL, int, i, 1)
__CLC_SUBGROUP_COLLECTIVE(IMulKHR, __CLC_MUL, uint, j, 1)
__CLC_SUBGROUP_COLLECTIVE(IMulKHR, __CLC_MUL, long, l, 1)
__CLC_SUBGROUP_COLLECTIVE(IMulKHR, __CLC_MUL, ulong, m, 1)
__CLC_SUBGROUP_COLLECTIVE(FMulKHR, __CLC_MUL, float, f, 1)
__CLC_SUBGROUP_COLLECTIVE(FMulKHR, __CLC_MUL, double, d, 1)

__CLC_SUBGROUP_COLLECTIVE(SMin, __CLC_MIN, char, a, CHAR_MAX)
__CLC_SUBGROUP_COLLECTIVE(UMin, __CLC_MIN, uchar, h, UCHAR_MAX)
Expand Down Expand Up @@ -234,17 +234,16 @@ __CLC_GROUP_COLLECTIVE(IAdd, __CLC_ADD, ulong, 0)
__CLC_GROUP_COLLECTIVE(FAdd, __CLC_ADD, float, 0)
__CLC_GROUP_COLLECTIVE(FAdd, __CLC_ADD, double, 0)

// There is no Mul group op in SPIR-V, use non-uniform variant instead.
__CLC_GROUP_COLLECTIVE(NonUniformIMul, IMul, __CLC_MUL, char, 1)
__CLC_GROUP_COLLECTIVE(NonUniformIMul, IMul, __CLC_MUL, uchar, 1)
__CLC_GROUP_COLLECTIVE(NonUniformIMul, IMul, __CLC_MUL, short, 1)
__CLC_GROUP_COLLECTIVE(NonUniformIMul, IMul, __CLC_MUL, ushort, 1)
__CLC_GROUP_COLLECTIVE(NonUniformIMul, IMul, __CLC_MUL, int, 1)
__CLC_GROUP_COLLECTIVE(NonUniformIMul, IMul, __CLC_MUL, uint, 1)
__CLC_GROUP_COLLECTIVE(NonUniformIMul, IMul, __CLC_MUL, long, 1)
__CLC_GROUP_COLLECTIVE(NonUniformIMul, IMul, __CLC_MUL, ulong, 1)
__CLC_GROUP_COLLECTIVE(NonUniformFMul, FMul, __CLC_MUL, float, 1)
__CLC_GROUP_COLLECTIVE(NonUniformFMul, FMul, __CLC_MUL, double, 1)
__CLC_GROUP_COLLECTIVE(IMulKHR, __CLC_MUL, char, 1)
__CLC_GROUP_COLLECTIVE(IMulKHR, __CLC_MUL, uchar, 1)
__CLC_GROUP_COLLECTIVE(IMulKHR, __CLC_MUL, short, 1)
__CLC_GROUP_COLLECTIVE(IMulKHR, __CLC_MUL, ushort, 1)
__CLC_GROUP_COLLECTIVE(IMulKHR, __CLC_MUL, int, 1)
__CLC_GROUP_COLLECTIVE(IMulKHR, __CLC_MUL, uint, 1)
__CLC_GROUP_COLLECTIVE(IMulKHR, __CLC_MUL, long, 1)
__CLC_GROUP_COLLECTIVE(IMulKHR, __CLC_MUL, ulong, 1)
__CLC_GROUP_COLLECTIVE(FMulKHR, __CLC_MUL, float, 1)
__CLC_GROUP_COLLECTIVE(FMulKHR, __CLC_MUL, double, 1)

__CLC_GROUP_COLLECTIVE(SMin, __CLC_MIN, char, CHAR_MAX)
__CLC_GROUP_COLLECTIVE(UMin, __CLC_MIN, uchar, UCHAR_MAX)
Expand Down
69 changes: 34 additions & 35 deletions libclc/ptx-nvidiacl/libspirv/group/collectives.cl
Original file line number Diff line number Diff line change
Expand Up @@ -213,17 +213,17 @@ __CLC_SUBGROUP_COLLECTIVE(FAdd, __CLC_ADD, half, 0)
__CLC_SUBGROUP_COLLECTIVE(FAdd, __CLC_ADD, float, 0)
__CLC_SUBGROUP_COLLECTIVE(FAdd, __CLC_ADD, double, 0)

__CLC_SUBGROUP_COLLECTIVE(IMul, __CLC_MUL, char, 1)
__CLC_SUBGROUP_COLLECTIVE(IMul, __CLC_MUL, uchar, 1)
__CLC_SUBGROUP_COLLECTIVE(IMul, __CLC_MUL, short, 1)
__CLC_SUBGROUP_COLLECTIVE(IMul, __CLC_MUL, ushort, 1)
__CLC_SUBGROUP_COLLECTIVE(IMul, __CLC_MUL, int, 1)
__CLC_SUBGROUP_COLLECTIVE(IMul, __CLC_MUL, uint, 1)
__CLC_SUBGROUP_COLLECTIVE(IMul, __CLC_MUL, long, 1)
__CLC_SUBGROUP_COLLECTIVE(IMul, __CLC_MUL, ulong, 1)
__CLC_SUBGROUP_COLLECTIVE(FMul, __CLC_MUL, half, 1)
__CLC_SUBGROUP_COLLECTIVE(FMul, __CLC_MUL, float, 1)
__CLC_SUBGROUP_COLLECTIVE(FMul, __CLC_MUL, double, 1)
__CLC_SUBGROUP_COLLECTIVE(IMulKHR, __CLC_MUL, char, 1)
__CLC_SUBGROUP_COLLECTIVE(IMulKHR, __CLC_MUL, uchar, 1)
__CLC_SUBGROUP_COLLECTIVE(IMulKHR, __CLC_MUL, short, 1)
__CLC_SUBGROUP_COLLECTIVE(IMulKHR, __CLC_MUL, ushort, 1)
__CLC_SUBGROUP_COLLECTIVE(IMulKHR, __CLC_MUL, int, 1)
__CLC_SUBGROUP_COLLECTIVE(IMulKHR, __CLC_MUL, uint, 1)
__CLC_SUBGROUP_COLLECTIVE(IMulKHR, __CLC_MUL, long, 1)
__CLC_SUBGROUP_COLLECTIVE(IMulKHR, __CLC_MUL, ulong, 1)
__CLC_SUBGROUP_COLLECTIVE(FMulKHR, __CLC_MUL, half, 1)
__CLC_SUBGROUP_COLLECTIVE(FMulKHR, __CLC_MUL, float, 1)
__CLC_SUBGROUP_COLLECTIVE(FMulKHR, __CLC_MUL, double, 1)

__CLC_SUBGROUP_COLLECTIVE(SMin, __CLC_MIN, char, CHAR_MAX)
__CLC_SUBGROUP_COLLECTIVE(UMin, __CLC_MIN, uchar, UCHAR_MAX)
Expand All @@ -249,12 +249,12 @@ __CLC_SUBGROUP_COLLECTIVE(FMax, __CLC_MAX, half, -HALF_MAX)
__CLC_SUBGROUP_COLLECTIVE(FMax, __CLC_MAX, float, -FLT_MAX)
__CLC_SUBGROUP_COLLECTIVE(FMax, __CLC_MAX, double, -DBL_MAX)

__CLC_SUBGROUP_COLLECTIVE_REDUX(NonUniformBitwiseAnd, __CLC_AND, and, uint, ~0)
__CLC_SUBGROUP_COLLECTIVE_REDUX(NonUniformBitwiseOr, __CLC_OR, or, uint, 0)
__CLC_SUBGROUP_COLLECTIVE_REDUX(NonUniformBitwiseXor, __CLC_XOR, xor, uint, 0)
__CLC_SUBGROUP_COLLECTIVE_REDUX(NonUniformBitwiseAnd, __CLC_AND, and, int, ~0)
__CLC_SUBGROUP_COLLECTIVE_REDUX(NonUniformBitwiseOr, __CLC_OR, or, int, 0)
__CLC_SUBGROUP_COLLECTIVE_REDUX(NonUniformBitwiseXor, __CLC_XOR, xor, int, 0)
__CLC_SUBGROUP_COLLECTIVE_REDUX(BitwiseAndKHR, __CLC_AND, and, uint, ~0)
__CLC_SUBGROUP_COLLECTIVE_REDUX(BitwiseOrKHR, __CLC_OR, or, uint, 0)
__CLC_SUBGROUP_COLLECTIVE_REDUX(BitwiseXorKHR, __CLC_XOR, xor, uint, 0)
__CLC_SUBGROUP_COLLECTIVE_REDUX(BitwiseAndKHR, __CLC_AND, and, int, ~0)
__CLC_SUBGROUP_COLLECTIVE_REDUX(BitwiseOrKHR, __CLC_OR, or, int, 0)
__CLC_SUBGROUP_COLLECTIVE_REDUX(BitwiseXorKHR, __CLC_XOR, xor, int, 0)

#undef __CLC_SUBGROUP_COLLECTIVE_BODY
#undef __CLC_SUBGROUP_COLLECTIVE
Expand Down Expand Up @@ -340,18 +340,17 @@ __CLC_GROUP_COLLECTIVE(FAdd, __CLC_ADD, half, 0)
__CLC_GROUP_COLLECTIVE(FAdd, __CLC_ADD, float, 0)
__CLC_GROUP_COLLECTIVE(FAdd, __CLC_ADD, double, 0)

// There is no Mul group op in SPIR-V, use non-uniform variant instead.
__CLC_GROUP_COLLECTIVE(NonUniformIMul, IMul, __CLC_MUL, char, 1)
__CLC_GROUP_COLLECTIVE(NonUniformIMul, IMul, __CLC_MUL, uchar, 1)
__CLC_GROUP_COLLECTIVE(NonUniformIMul, IMul, __CLC_MUL, short, 1)
__CLC_GROUP_COLLECTIVE(NonUniformIMul, IMul, __CLC_MUL, ushort, 1)
__CLC_GROUP_COLLECTIVE(NonUniformIMul, IMul, __CLC_MUL, int, 1)
__CLC_GROUP_COLLECTIVE(NonUniformIMul, IMul, __CLC_MUL, uint, 1)
__CLC_GROUP_COLLECTIVE(NonUniformIMul, IMul, __CLC_MUL, long, 1)
__CLC_GROUP_COLLECTIVE(NonUniformIMul, IMul, __CLC_MUL, ulong, 1)
__CLC_GROUP_COLLECTIVE(NonUniformFMul, FMul, __CLC_MUL, half, 1)
__CLC_GROUP_COLLECTIVE(NonUniformFMul, FMul, __CLC_MUL, float, 1)
__CLC_GROUP_COLLECTIVE(NonUniformFMul, FMul, __CLC_MUL, double, 1)
__CLC_GROUP_COLLECTIVE(IMulKHR, __CLC_MUL, char, 1)
__CLC_GROUP_COLLECTIVE(IMulKHR, __CLC_MUL, uchar, 1)
__CLC_GROUP_COLLECTIVE(IMulKHR, __CLC_MUL, short, 1)
__CLC_GROUP_COLLECTIVE(IMulKHR, __CLC_MUL, ushort, 1)
__CLC_GROUP_COLLECTIVE(IMulKHR, __CLC_MUL, int, 1)
__CLC_GROUP_COLLECTIVE(IMulKHR, __CLC_MUL, uint, 1)
__CLC_GROUP_COLLECTIVE(IMulKHR, __CLC_MUL, long, 1)
__CLC_GROUP_COLLECTIVE(IMulKHR, __CLC_MUL, ulong, 1)
__CLC_GROUP_COLLECTIVE(FMulKHR, __CLC_MUL, half, 1)
__CLC_GROUP_COLLECTIVE(FMulKHR, __CLC_MUL, float, 1)
__CLC_GROUP_COLLECTIVE(FMulKHR, __CLC_MUL, double, 1)

__CLC_GROUP_COLLECTIVE(SMin, __CLC_MIN, char, CHAR_MAX)
__CLC_GROUP_COLLECTIVE(UMin, __CLC_MIN, uchar, UCHAR_MAX)
Expand All @@ -377,12 +376,12 @@ __CLC_GROUP_COLLECTIVE(FMax, __CLC_MAX, half, -HALF_MAX)
__CLC_GROUP_COLLECTIVE(FMax, __CLC_MAX, float, -FLT_MAX)
__CLC_GROUP_COLLECTIVE(FMax, __CLC_MAX, double, -DBL_MAX)

__CLC_GROUP_COLLECTIVE(NonUniformBitwiseAnd, __CLC_AND, uint, ~0)
__CLC_GROUP_COLLECTIVE(NonUniformBitwiseOr, __CLC_OR, uint, 0)
__CLC_GROUP_COLLECTIVE(NonUniformBitwiseXor, __CLC_XOR, uint, 0)
__CLC_GROUP_COLLECTIVE(NonUniformBitwiseAnd, __CLC_AND, int, ~0)
__CLC_GROUP_COLLECTIVE(NonUniformBitwiseOr, __CLC_OR, int, 0)
__CLC_GROUP_COLLECTIVE(NonUniformBitwiseXor, __CLC_XOR, int, 0)
__CLC_GROUP_COLLECTIVE(BitwiseAndKHR, __CLC_AND, uint, ~0)
__CLC_GROUP_COLLECTIVE(BitwiseOrKHR, __CLC_OR, uint, 0)
__CLC_GROUP_COLLECTIVE(BitwiseXorKHR, __CLC_XOR, uint, 0)
__CLC_GROUP_COLLECTIVE(BitwiseAndKHR, __CLC_AND, int, ~0)
__CLC_GROUP_COLLECTIVE(BitwiseOrKHR, __CLC_OR, int, 0)
__CLC_GROUP_COLLECTIVE(BitwiseXorKHR, __CLC_XOR, int, 0)

// half requires additional mangled entry points
_CLC_DEF _CLC_CONVERGENT half _Z17__spirv_GroupFAddjjDF16_(uint scope, uint op,
Expand Down