-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[clang] Add elementwise fshl/fshr builtins #153113
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
|
@llvm/pr-subscribers-clang-codegen Author: Chaitanya Koparkar (ckoparkar) ChangesThis patch implements These map to the fshl/fshr intrinsics described here:
Fixes #152555. Full diff: https://github.com/llvm/llvm-project/pull/153113.diff 6 Files Affected:
diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst
index b5bb198ca637a..ed9b334efc236 100644
--- a/clang/docs/LanguageExtensions.rst
+++ b/clang/docs/LanguageExtensions.rst
@@ -859,6 +859,15 @@ of different sizes and signs is forbidden in binary and ternary builtins.
semantics, see `LangRef
<http://llvm.org/docs/LangRef.html#llvm-min-intrinsics-comparation>`_
for the comparison.
+T __builtin_elementwise_fshl(T x, T y, T z) perform a funnel shift left. Concatenate x and y (x is the most integer types
+ significant bits of the wide value), the combined value is shifted
+ left by z, and the most significant bits are extracted to produce
+ a result that is the same size as the original arguments.
+
+T __builtin_elementwise_fshr(T x, T y, T z) perform a funnel shift right. Concatenate x and y (x is the most integer types
+ significant bits of the wide value), the combined value is shifted
+ right by z, and the least significant bits are extracted to produce
+ a result that is the same size as the original arguments.
============================================== ====================================================================== =========================================
diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td
index c81714e9b009d..b3b2591a94fe3 100644
--- a/clang/include/clang/Basic/Builtins.td
+++ b/clang/include/clang/Basic/Builtins.td
@@ -1514,6 +1514,18 @@ def ElementwiseSubSat : Builtin {
let Prototype = "void(...)";
}
+def ElementwiseFshl : Builtin {
+ let Spellings = ["__builtin_elementwise_fshl"];
+ let Attributes = [NoThrow, Const, CustomTypeChecking];
+ let Prototype = "void(...)";
+}
+
+def ElementwiseFshr : Builtin {
+ let Spellings = ["__builtin_elementwise_fshr"];
+ let Attributes = [NoThrow, Const, CustomTypeChecking];
+ let Prototype = "void(...)";
+}
+
def ReduceMax : Builtin {
let Spellings = ["__builtin_reduce_max"];
let Attributes = [NoThrow, Const, CustomTypeChecking, Constexpr];
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 071667ac772e8..dd154e5e36230 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -4030,6 +4030,13 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BI__builtin_elementwise_fma:
return RValue::get(
emitBuiltinWithOneOverloadedType<3>(*this, E, Intrinsic::fma));
+ case Builtin::BI__builtin_elementwise_fshl:
+ return RValue::get(
+ emitBuiltinWithOneOverloadedType<3>(*this, E, Intrinsic::fshl));
+ case Builtin::BI__builtin_elementwise_fshr:
+ return RValue::get(
+ emitBuiltinWithOneOverloadedType<3>(*this, E, Intrinsic::fshr));
+
case Builtin::BI__builtin_elementwise_add_sat:
case Builtin::BI__builtin_elementwise_sub_sat: {
Value *Op0 = EmitScalarExpr(E->getArg(0));
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 9ecee18661340..db22885d14c37 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -3031,6 +3031,12 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID,
EltwiseBuiltinArgTyRestriction::IntegerTy))
return ExprError();
break;
+ case Builtin::BI__builtin_elementwise_fshl:
+ case Builtin::BI__builtin_elementwise_fshr:
+ if (BuiltinElementwiseTernaryMath(TheCall,
+ EltwiseBuiltinArgTyRestriction::IntegerTy))
+ return ExprError();
+ break;
case Builtin::BI__builtin_elementwise_min:
case Builtin::BI__builtin_elementwise_max:
if (BuiltinElementwiseMath(TheCall))
diff --git a/clang/test/CodeGen/builtins-elementwise-math.c b/clang/test/CodeGen/builtins-elementwise-math.c
index ee8345ff51e5e..d0fc6a6b321fb 100644
--- a/clang/test/CodeGen/builtins-elementwise-math.c
+++ b/clang/test/CodeGen/builtins-elementwise-math.c
@@ -1176,3 +1176,89 @@ void test_builtin_elementwise_fma(float f32, double f64,
half2 tmp2_v2f16 = __builtin_elementwise_fma(v2f16, v2f16, (half2)4.0);
}
+
+void test_builtin_elementwise_fshl(long long int i1, long long int i2,
+ long long int i3, unsigned short us1,
+ unsigned short us2, unsigned short us3,
+ char c1, char c2, char c3,
+ unsigned char uc1, unsigned char uc2,
+ unsigned char uc3, si8 vi1, si8 vi2,
+ si8 vi3, u4 vu1, u4 vu2, u4 vu3) {
+ // CHECK: [[I1:%.+]] = load i64, ptr %i1.addr
+ // CHECK-NEXT: [[I2:%.+]] = load i64, ptr %i2.addr
+ // CHECK-NEXT: [[I3:%.+]] = load i64, ptr %i3.addr
+ // CHECK-NEXT: [[I4:%.+]] = call i64 @llvm.fshl.i64(i64 [[I1]], i64 [[I2]], i64 [[I3]])
+ // CHECK-NEXT: store i64 [[I4]], ptr %tmp_lli_l
+ // CHECK-NEXT: [[I5:%.+]] = load i64, ptr %i1.addr
+ // CHECK-NEXT: [[I6:%.+]] = load i64, ptr %i2.addr
+ // CHECK-NEXT: [[I7:%.+]] = load i64, ptr %i3.addr
+ // CHECK-NEXT: [[I8:%.+]] = call i64 @llvm.fshr.i64(i64 [[I5]], i64 [[I6]], i64 [[I7]])
+ // CHECK-NEXT: store i64 [[I8]], ptr %tmp_lli_r
+ long long int tmp_lli_l = __builtin_elementwise_fshl(i1, i2, i3);
+ long long int tmp_lli_r = __builtin_elementwise_fshr(i1, i2, i3);
+
+ // CHECK: [[US1:%.+]] = load i16, ptr %us1.addr
+ // CHECK-NEXT: [[US2:%.+]] = load i16, ptr %us2.addr
+ // CHECK-NEXT: [[US3:%.+]] = load i16, ptr %us3.addr
+ // CHECK-NEXT: [[US4:%.+]] = call i16 @llvm.fshl.i16(i16 [[US1]], i16 [[US2]], i16 [[US3]])
+ // CHECK-NEXT: store i16 [[US4]], ptr %tmp_usi_l
+ // CHECK-NEXT: [[US5:%.+]] = load i16, ptr %us1.addr
+ // CHECK-NEXT: [[US6:%.+]] = load i16, ptr %us2.addr
+ // CHECK-NEXT: [[US7:%.+]] = load i16, ptr %us3.addr
+ // CHECK-NEXT: [[US8:%.+]] = call i16 @llvm.fshr.i16(i16 [[US5]], i16 [[US6]], i16 [[US7]])
+ // CHECK-NEXT: store i16 [[US8]], ptr %tmp_usi_r
+ unsigned short tmp_usi_l = __builtin_elementwise_fshl(us1, us2, us3);
+ unsigned short tmp_usi_r = __builtin_elementwise_fshr(us1, us2, us3);
+
+ // CHECK: [[C1:%.+]] = load i8, ptr %c1.addr
+ // CHECK-NEXT: [[C2:%.+]] = load i8, ptr %c2.addr
+ // CHECK-NEXT: [[C3:%.+]] = load i8, ptr %c3.addr
+ // CHECK-NEXT: [[C4:%.+]] = call i8 @llvm.fshl.i8(i8 [[C1]], i8 [[C2]], i8 [[C3]])
+ // CHECK-NEXT: store i8 [[C4]], ptr %tmp_c_l
+ // CHECK-NEXT: [[C5:%.+]] = load i8, ptr %c1.addr
+ // CHECK-NEXT: [[C6:%.+]] = load i8, ptr %c2.addr
+ // CHECK-NEXT: [[C7:%.+]] = load i8, ptr %c3.addr
+ // CHECK-NEXT: [[C8:%.+]] = call i8 @llvm.fshr.i8(i8 [[C5]], i8 [[C6]], i8 [[C7]])
+ // CHECK-NEXT: store i8 [[C8]], ptr %tmp_c_r
+ char tmp_c_l = __builtin_elementwise_fshl(c1, c2, c3);
+ char tmp_c_r = __builtin_elementwise_fshr(c1, c2, c3);
+
+ // CHECK: [[UC1:%.+]] = load i8, ptr %uc1.addr
+ // CHECK-NEXT: [[UC2:%.+]] = load i8, ptr %uc2.addr
+ // CHECK-NEXT: [[UC3:%.+]] = load i8, ptr %uc3.addr
+ // CHECK-NEXT: [[UC4:%.+]] = call i8 @llvm.fshl.i8(i8 [[UC1]], i8 [[UC2]], i8 [[UC3]])
+ // CHECK-NEXT: store i8 [[UC4]], ptr %tmp_uc_l
+ // CHECK-NEXT: [[UC5:%.+]] = load i8, ptr %uc1.addr
+ // CHECK-NEXT: [[UC6:%.+]] = load i8, ptr %uc2.addr
+ // CHECK-NEXT: [[UC7:%.+]] = load i8, ptr %uc3.addr
+ // CHECK-NEXT: [[UC8:%.+]] = call i8 @llvm.fshr.i8(i8 [[UC5]], i8 [[UC6]], i8 [[UC7]])
+ // CHECK-NEXT: store i8 [[UC8]], ptr %tmp_uc_r
+ unsigned char tmp_uc_l = __builtin_elementwise_fshl(uc1, uc2, uc3);
+ unsigned char tmp_uc_r = __builtin_elementwise_fshr(uc1, uc2, uc3);
+
+ // CHECK: [[VI1:%.+]] = load <8 x i16>, ptr %vi1.addr
+ // CHECK-NEXT: [[VI2:%.+]] = load <8 x i16>, ptr %vi2.addr
+ // CHECK-NEXT: [[VI3:%.+]] = load <8 x i16>, ptr %vi3.addr
+ // CHECK-NEXT: [[VI4:%.+]] = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> [[VI1]], <8 x i16> [[VI2]], <8 x i16> [[VI3]])
+ // CHECK-NEXT: store <8 x i16> [[VI4]], ptr %tmp_vi_l
+ // CHECK-NEXT: [[VI5:%.+]] = load <8 x i16>, ptr %vi1.addr
+ // CHECK-NEXT: [[VI6:%.+]] = load <8 x i16>, ptr %vi2.addr
+ // CHECK-NEXT: [[VI7:%.+]] = load <8 x i16>, ptr %vi3.addr
+ // CHECK-NEXT: [[VI8:%.+]] = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> [[VI5]], <8 x i16> [[VI6]], <8 x i16> [[VI7]])
+ // CHECK-NEXT: store <8 x i16> [[VI8]], ptr %tmp_vi_r
+ si8 tmp_vi_l = __builtin_elementwise_fshl(vi1, vi2, vi3);
+ si8 tmp_vi_r = __builtin_elementwise_fshr(vi1, vi2, vi3);
+
+ // CHECK: [[VU1:%.+]] = load <4 x i32>, ptr %vu1.addr
+ // CHECK-NEXT: [[VU2:%.+]] = load <4 x i32>, ptr %vu2.addr
+ // CHECK-NEXT: [[VU3:%.+]] = load <4 x i32>, ptr %vu3.addr
+ // CHECK-NEXT: [[VU4:%.+]] = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> [[VU1]], <4 x i32> [[VU2]], <4 x i32> [[VU3]])
+ // CHECK-NEXT: store <4 x i32> [[VU4]], ptr %tmp_vu_l
+ // CHECK-NEXT: [[VU5:%.+]] = load <4 x i32>, ptr %vu1.addr
+ // CHECK-NEXT: [[VU6:%.+]] = load <4 x i32>, ptr %vu2.addr
+ // CHECK-NEXT: [[VU7:%.+]] = load <4 x i32>, ptr %vu3.addr
+ // CHECK-NEXT: [[VU8:%.+]] = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> [[VU5]], <4 x i32> [[VU6]], <4 x i32> [[VU7]])
+ // CHECK-NEXT: store <4 x i32> [[VU8]], ptr %tmp_vu_r
+ u4 tmp_vu_l = __builtin_elementwise_fshl(vu1, vu2, vu3);
+ u4 tmp_vu_r = __builtin_elementwise_fshr(vu1, vu2, vu3);
+}
diff --git a/clang/test/Sema/builtins-elementwise-math.c b/clang/test/Sema/builtins-elementwise-math.c
index 8548d3be8c44a..a80ff4bed4faf 100644
--- a/clang/test/Sema/builtins-elementwise-math.c
+++ b/clang/test/Sema/builtins-elementwise-math.c
@@ -1294,6 +1294,42 @@ void test_builtin_elementwise_fma(int i32, int2 v2i32, short i16,
// expected-error@-1 {{3rd argument must be a scalar or vector of floating-point types (was '_Complex float')}}
}
+void test_builtin_elementwise_fsh(int i32, int2 v2i32, short i16, int3 v3i32,
+ double f64, float f32, float2 v2f32) {
+ i32 = __builtin_elementwise_fshl();
+ // expected-error@-1 {{too few arguments to function call, expected 3, have 0}}
+
+ i32 = __builtin_elementwise_fshr();
+ // expected-error@-1 {{too few arguments to function call, expected 3, have 0}}
+
+ i32 = __builtin_elementwise_fshl(i32, i32);
+ // expected-error@-1 {{too few arguments to function call, expected 3, have 2}}
+
+ i32 = __builtin_elementwise_fshr(i32, i32);
+ // expected-error@-1 {{too few arguments to function call, expected 3, have 2}}
+
+ i32 = __builtin_elementwise_fshl(i32, i32, i16);
+ // expected-error@-1 {{arguments are of different types ('int' vs 'short')}}
+
+ i16 = __builtin_elementwise_fshr(i16, i32, i16);
+ // expected-error@-1 {{arguments are of different types ('short' vs 'int')}}
+
+ f32 = __builtin_elementwise_fshl(f32, f32, f32);
+ // expected-error@-1 {{argument must be a scalar or vector of integer types (was 'float')}}
+
+ f64 = __builtin_elementwise_fshr(f64, f64, f64);
+ // expected-error@-1 {{argument must be a scalar or vector of integer types (was 'double')}}
+
+ v2i32 = __builtin_elementwise_fshl(v2i32, v2i32, v2f32);
+ // expected-error@-1 {{argument must be a scalar or vector of integer types (was 'float2' (vector of 2 'float' values))}}
+
+ v2i32 = __builtin_elementwise_fshr(v2i32, v2i32, v3i32);
+ // expected-error@-1 {{arguments are of different types ('int2' (vector of 2 'int' values) vs 'int3' (vector of 3 'int' values))}}
+
+ v3i32 = __builtin_elementwise_fshl(v3i32, v3i32, v2i32);
+ // expected-error@-1 {{arguments are of different types ('int3' (vector of 3 'int' values) vs 'int2' (vector of 2 'int' values))}}
+}
+
typedef struct {
float3 b;
} struct_float3;
|
|
@llvm/pr-subscribers-clang Author: Chaitanya Koparkar (ckoparkar) ChangesThis patch implements These map to the fshl/fshr intrinsics described here:
Fixes #152555. Full diff: https://github.com/llvm/llvm-project/pull/153113.diff 6 Files Affected:
diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst
index b5bb198ca637a..ed9b334efc236 100644
--- a/clang/docs/LanguageExtensions.rst
+++ b/clang/docs/LanguageExtensions.rst
@@ -859,6 +859,15 @@ of different sizes and signs is forbidden in binary and ternary builtins.
semantics, see `LangRef
<http://llvm.org/docs/LangRef.html#llvm-min-intrinsics-comparation>`_
for the comparison.
+T __builtin_elementwise_fshl(T x, T y, T z) perform a funnel shift left. Concatenate x and y (x is the most integer types
+ significant bits of the wide value), the combined value is shifted
+ left by z, and the most significant bits are extracted to produce
+ a result that is the same size as the original arguments.
+
+T __builtin_elementwise_fshr(T x, T y, T z) perform a funnel shift right. Concatenate x and y (x is the most integer types
+ significant bits of the wide value), the combined value is shifted
+ right by z, and the least significant bits are extracted to produce
+ a result that is the same size as the original arguments.
============================================== ====================================================================== =========================================
diff --git a/clang/include/clang/Basic/Builtins.td b/clang/include/clang/Basic/Builtins.td
index c81714e9b009d..b3b2591a94fe3 100644
--- a/clang/include/clang/Basic/Builtins.td
+++ b/clang/include/clang/Basic/Builtins.td
@@ -1514,6 +1514,18 @@ def ElementwiseSubSat : Builtin {
let Prototype = "void(...)";
}
+def ElementwiseFshl : Builtin {
+ let Spellings = ["__builtin_elementwise_fshl"];
+ let Attributes = [NoThrow, Const, CustomTypeChecking];
+ let Prototype = "void(...)";
+}
+
+def ElementwiseFshr : Builtin {
+ let Spellings = ["__builtin_elementwise_fshr"];
+ let Attributes = [NoThrow, Const, CustomTypeChecking];
+ let Prototype = "void(...)";
+}
+
def ReduceMax : Builtin {
let Spellings = ["__builtin_reduce_max"];
let Attributes = [NoThrow, Const, CustomTypeChecking, Constexpr];
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 071667ac772e8..dd154e5e36230 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -4030,6 +4030,13 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BI__builtin_elementwise_fma:
return RValue::get(
emitBuiltinWithOneOverloadedType<3>(*this, E, Intrinsic::fma));
+ case Builtin::BI__builtin_elementwise_fshl:
+ return RValue::get(
+ emitBuiltinWithOneOverloadedType<3>(*this, E, Intrinsic::fshl));
+ case Builtin::BI__builtin_elementwise_fshr:
+ return RValue::get(
+ emitBuiltinWithOneOverloadedType<3>(*this, E, Intrinsic::fshr));
+
case Builtin::BI__builtin_elementwise_add_sat:
case Builtin::BI__builtin_elementwise_sub_sat: {
Value *Op0 = EmitScalarExpr(E->getArg(0));
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 9ecee18661340..db22885d14c37 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -3031,6 +3031,12 @@ Sema::CheckBuiltinFunctionCall(FunctionDecl *FDecl, unsigned BuiltinID,
EltwiseBuiltinArgTyRestriction::IntegerTy))
return ExprError();
break;
+ case Builtin::BI__builtin_elementwise_fshl:
+ case Builtin::BI__builtin_elementwise_fshr:
+ if (BuiltinElementwiseTernaryMath(TheCall,
+ EltwiseBuiltinArgTyRestriction::IntegerTy))
+ return ExprError();
+ break;
case Builtin::BI__builtin_elementwise_min:
case Builtin::BI__builtin_elementwise_max:
if (BuiltinElementwiseMath(TheCall))
diff --git a/clang/test/CodeGen/builtins-elementwise-math.c b/clang/test/CodeGen/builtins-elementwise-math.c
index ee8345ff51e5e..d0fc6a6b321fb 100644
--- a/clang/test/CodeGen/builtins-elementwise-math.c
+++ b/clang/test/CodeGen/builtins-elementwise-math.c
@@ -1176,3 +1176,89 @@ void test_builtin_elementwise_fma(float f32, double f64,
half2 tmp2_v2f16 = __builtin_elementwise_fma(v2f16, v2f16, (half2)4.0);
}
+
+void test_builtin_elementwise_fshl(long long int i1, long long int i2,
+ long long int i3, unsigned short us1,
+ unsigned short us2, unsigned short us3,
+ char c1, char c2, char c3,
+ unsigned char uc1, unsigned char uc2,
+ unsigned char uc3, si8 vi1, si8 vi2,
+ si8 vi3, u4 vu1, u4 vu2, u4 vu3) {
+ // CHECK: [[I1:%.+]] = load i64, ptr %i1.addr
+ // CHECK-NEXT: [[I2:%.+]] = load i64, ptr %i2.addr
+ // CHECK-NEXT: [[I3:%.+]] = load i64, ptr %i3.addr
+ // CHECK-NEXT: [[I4:%.+]] = call i64 @llvm.fshl.i64(i64 [[I1]], i64 [[I2]], i64 [[I3]])
+ // CHECK-NEXT: store i64 [[I4]], ptr %tmp_lli_l
+ // CHECK-NEXT: [[I5:%.+]] = load i64, ptr %i1.addr
+ // CHECK-NEXT: [[I6:%.+]] = load i64, ptr %i2.addr
+ // CHECK-NEXT: [[I7:%.+]] = load i64, ptr %i3.addr
+ // CHECK-NEXT: [[I8:%.+]] = call i64 @llvm.fshr.i64(i64 [[I5]], i64 [[I6]], i64 [[I7]])
+ // CHECK-NEXT: store i64 [[I8]], ptr %tmp_lli_r
+ long long int tmp_lli_l = __builtin_elementwise_fshl(i1, i2, i3);
+ long long int tmp_lli_r = __builtin_elementwise_fshr(i1, i2, i3);
+
+ // CHECK: [[US1:%.+]] = load i16, ptr %us1.addr
+ // CHECK-NEXT: [[US2:%.+]] = load i16, ptr %us2.addr
+ // CHECK-NEXT: [[US3:%.+]] = load i16, ptr %us3.addr
+ // CHECK-NEXT: [[US4:%.+]] = call i16 @llvm.fshl.i16(i16 [[US1]], i16 [[US2]], i16 [[US3]])
+ // CHECK-NEXT: store i16 [[US4]], ptr %tmp_usi_l
+ // CHECK-NEXT: [[US5:%.+]] = load i16, ptr %us1.addr
+ // CHECK-NEXT: [[US6:%.+]] = load i16, ptr %us2.addr
+ // CHECK-NEXT: [[US7:%.+]] = load i16, ptr %us3.addr
+ // CHECK-NEXT: [[US8:%.+]] = call i16 @llvm.fshr.i16(i16 [[US5]], i16 [[US6]], i16 [[US7]])
+ // CHECK-NEXT: store i16 [[US8]], ptr %tmp_usi_r
+ unsigned short tmp_usi_l = __builtin_elementwise_fshl(us1, us2, us3);
+ unsigned short tmp_usi_r = __builtin_elementwise_fshr(us1, us2, us3);
+
+ // CHECK: [[C1:%.+]] = load i8, ptr %c1.addr
+ // CHECK-NEXT: [[C2:%.+]] = load i8, ptr %c2.addr
+ // CHECK-NEXT: [[C3:%.+]] = load i8, ptr %c3.addr
+ // CHECK-NEXT: [[C4:%.+]] = call i8 @llvm.fshl.i8(i8 [[C1]], i8 [[C2]], i8 [[C3]])
+ // CHECK-NEXT: store i8 [[C4]], ptr %tmp_c_l
+ // CHECK-NEXT: [[C5:%.+]] = load i8, ptr %c1.addr
+ // CHECK-NEXT: [[C6:%.+]] = load i8, ptr %c2.addr
+ // CHECK-NEXT: [[C7:%.+]] = load i8, ptr %c3.addr
+ // CHECK-NEXT: [[C8:%.+]] = call i8 @llvm.fshr.i8(i8 [[C5]], i8 [[C6]], i8 [[C7]])
+ // CHECK-NEXT: store i8 [[C8]], ptr %tmp_c_r
+ char tmp_c_l = __builtin_elementwise_fshl(c1, c2, c3);
+ char tmp_c_r = __builtin_elementwise_fshr(c1, c2, c3);
+
+ // CHECK: [[UC1:%.+]] = load i8, ptr %uc1.addr
+ // CHECK-NEXT: [[UC2:%.+]] = load i8, ptr %uc2.addr
+ // CHECK-NEXT: [[UC3:%.+]] = load i8, ptr %uc3.addr
+ // CHECK-NEXT: [[UC4:%.+]] = call i8 @llvm.fshl.i8(i8 [[UC1]], i8 [[UC2]], i8 [[UC3]])
+ // CHECK-NEXT: store i8 [[UC4]], ptr %tmp_uc_l
+ // CHECK-NEXT: [[UC5:%.+]] = load i8, ptr %uc1.addr
+ // CHECK-NEXT: [[UC6:%.+]] = load i8, ptr %uc2.addr
+ // CHECK-NEXT: [[UC7:%.+]] = load i8, ptr %uc3.addr
+ // CHECK-NEXT: [[UC8:%.+]] = call i8 @llvm.fshr.i8(i8 [[UC5]], i8 [[UC6]], i8 [[UC7]])
+ // CHECK-NEXT: store i8 [[UC8]], ptr %tmp_uc_r
+ unsigned char tmp_uc_l = __builtin_elementwise_fshl(uc1, uc2, uc3);
+ unsigned char tmp_uc_r = __builtin_elementwise_fshr(uc1, uc2, uc3);
+
+ // CHECK: [[VI1:%.+]] = load <8 x i16>, ptr %vi1.addr
+ // CHECK-NEXT: [[VI2:%.+]] = load <8 x i16>, ptr %vi2.addr
+ // CHECK-NEXT: [[VI3:%.+]] = load <8 x i16>, ptr %vi3.addr
+ // CHECK-NEXT: [[VI4:%.+]] = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> [[VI1]], <8 x i16> [[VI2]], <8 x i16> [[VI3]])
+ // CHECK-NEXT: store <8 x i16> [[VI4]], ptr %tmp_vi_l
+ // CHECK-NEXT: [[VI5:%.+]] = load <8 x i16>, ptr %vi1.addr
+ // CHECK-NEXT: [[VI6:%.+]] = load <8 x i16>, ptr %vi2.addr
+ // CHECK-NEXT: [[VI7:%.+]] = load <8 x i16>, ptr %vi3.addr
+ // CHECK-NEXT: [[VI8:%.+]] = call <8 x i16> @llvm.fshr.v8i16(<8 x i16> [[VI5]], <8 x i16> [[VI6]], <8 x i16> [[VI7]])
+ // CHECK-NEXT: store <8 x i16> [[VI8]], ptr %tmp_vi_r
+ si8 tmp_vi_l = __builtin_elementwise_fshl(vi1, vi2, vi3);
+ si8 tmp_vi_r = __builtin_elementwise_fshr(vi1, vi2, vi3);
+
+ // CHECK: [[VU1:%.+]] = load <4 x i32>, ptr %vu1.addr
+ // CHECK-NEXT: [[VU2:%.+]] = load <4 x i32>, ptr %vu2.addr
+ // CHECK-NEXT: [[VU3:%.+]] = load <4 x i32>, ptr %vu3.addr
+ // CHECK-NEXT: [[VU4:%.+]] = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> [[VU1]], <4 x i32> [[VU2]], <4 x i32> [[VU3]])
+ // CHECK-NEXT: store <4 x i32> [[VU4]], ptr %tmp_vu_l
+ // CHECK-NEXT: [[VU5:%.+]] = load <4 x i32>, ptr %vu1.addr
+ // CHECK-NEXT: [[VU6:%.+]] = load <4 x i32>, ptr %vu2.addr
+ // CHECK-NEXT: [[VU7:%.+]] = load <4 x i32>, ptr %vu3.addr
+ // CHECK-NEXT: [[VU8:%.+]] = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> [[VU5]], <4 x i32> [[VU6]], <4 x i32> [[VU7]])
+ // CHECK-NEXT: store <4 x i32> [[VU8]], ptr %tmp_vu_r
+ u4 tmp_vu_l = __builtin_elementwise_fshl(vu1, vu2, vu3);
+ u4 tmp_vu_r = __builtin_elementwise_fshr(vu1, vu2, vu3);
+}
diff --git a/clang/test/Sema/builtins-elementwise-math.c b/clang/test/Sema/builtins-elementwise-math.c
index 8548d3be8c44a..a80ff4bed4faf 100644
--- a/clang/test/Sema/builtins-elementwise-math.c
+++ b/clang/test/Sema/builtins-elementwise-math.c
@@ -1294,6 +1294,42 @@ void test_builtin_elementwise_fma(int i32, int2 v2i32, short i16,
// expected-error@-1 {{3rd argument must be a scalar or vector of floating-point types (was '_Complex float')}}
}
+void test_builtin_elementwise_fsh(int i32, int2 v2i32, short i16, int3 v3i32,
+ double f64, float f32, float2 v2f32) {
+ i32 = __builtin_elementwise_fshl();
+ // expected-error@-1 {{too few arguments to function call, expected 3, have 0}}
+
+ i32 = __builtin_elementwise_fshr();
+ // expected-error@-1 {{too few arguments to function call, expected 3, have 0}}
+
+ i32 = __builtin_elementwise_fshl(i32, i32);
+ // expected-error@-1 {{too few arguments to function call, expected 3, have 2}}
+
+ i32 = __builtin_elementwise_fshr(i32, i32);
+ // expected-error@-1 {{too few arguments to function call, expected 3, have 2}}
+
+ i32 = __builtin_elementwise_fshl(i32, i32, i16);
+ // expected-error@-1 {{arguments are of different types ('int' vs 'short')}}
+
+ i16 = __builtin_elementwise_fshr(i16, i32, i16);
+ // expected-error@-1 {{arguments are of different types ('short' vs 'int')}}
+
+ f32 = __builtin_elementwise_fshl(f32, f32, f32);
+ // expected-error@-1 {{argument must be a scalar or vector of integer types (was 'float')}}
+
+ f64 = __builtin_elementwise_fshr(f64, f64, f64);
+ // expected-error@-1 {{argument must be a scalar or vector of integer types (was 'double')}}
+
+ v2i32 = __builtin_elementwise_fshl(v2i32, v2i32, v2f32);
+ // expected-error@-1 {{argument must be a scalar or vector of integer types (was 'float2' (vector of 2 'float' values))}}
+
+ v2i32 = __builtin_elementwise_fshr(v2i32, v2i32, v3i32);
+ // expected-error@-1 {{arguments are of different types ('int2' (vector of 2 'int' values) vs 'int3' (vector of 3 'int' values))}}
+
+ v3i32 = __builtin_elementwise_fshl(v3i32, v3i32, v2i32);
+ // expected-error@-1 {{arguments are of different types ('int3' (vector of 3 'int' values) vs 'int2' (vector of 2 'int' values))}}
+}
+
typedef struct {
float3 b;
} struct_float3;
|
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
d268061 to
84cc6b4
Compare
Fznamznon
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Perhaps it also makes sense to add a release note.
RKSimon
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM - a brief release note would be great - thanks!
84cc6b4 to
ebda1db
Compare
This patch implements `__builtin_elementwise_fshl` and `__builtin_elementwise_fshr` builtins. These map to the fshl/fshr intrinsics described here: - https://llvm.org/docs/LangRef.html#llvm-fshl-intrinsic - https://llvm.org/docs/LangRef.html#llvm-fshr-intrinsic
ebda1db to
b002f7c
Compare
|
Rebased and added a release note :-) |
RKSimon
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM - cheers
|
Could you please help with the merge as well? I don't have commit access. |
|
Thanks, and thanks everyone for the reviews. |
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/27/builds/14456 Here is the relevant piece of the build log for the reference |
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/154/builds/20119 Here is the relevant piece of the build log for the reference |
This patch implements
__builtin_elementwise_fshland__builtin_elementwise_fshrbuiltins.These map to the fshl/fshr intrinsics described here:
Fixes #152555.