-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[clang][bytecode][X86] Allow AVX512 funnel shift by scalar immediate intrinsics to be used in constexpr #157681
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Conversation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
…intrinsics to be used in constexpr Extends interp__builtin_elementwise_triop to handle (vector, vector, scalar) trinary op intrinsics Fixes llvm#153152
Member
|
@llvm/pr-subscribers-backend-x86 @llvm/pr-subscribers-clang Author: Simon Pilgrim (RKSimon) ChangesExtends interp__builtin_elementwise_triop to handle (vector, vector, scalar) trinary op intrinsics Fixes #153152 Patch is 45.42 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/157681.diff 5 Files Affected:
diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td
index b4ff550d27279..597d6fb334f31 100644
--- a/clang/include/clang/Basic/BuiltinsX86.td
+++ b/clang/include/clang/Basic/BuiltinsX86.td
@@ -1772,75 +1772,30 @@ let Features = "avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<512
def vpermi2varhi512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<32, short>, _Vector<32, short>)">;
}
-let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
def vpshldd128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Constant int)">;
-}
-
-let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
- def vpshldd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Constant int)">;
-}
-
-let Features = "avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
- def vpshldd512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>, _Constant int)">;
-}
-
-let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+ def vpshrdd128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Constant int)">;
def vpshldq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>, _Constant int)">;
-}
-
-let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
- def vpshldq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>, _Constant int)">;
-}
-
-let Features = "avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
- def vpshldq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>, _Constant int)">;
-}
-
-let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
+ def vpshrdq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>, _Constant int)">;
def vpshldw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>, _Constant int)">;
+ def vpshrdw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>, _Constant int)">;
}
-let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
- def vpshldw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>, _Constant int)">;
-}
-
-let Features = "avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
- def vpshldw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<32, short>, _Constant int)">;
-}
-
-let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
- def vpshrdd128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Constant int)">;
-}
-
-let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
+ def vpshldd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Constant int)">;
def vpshrdd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Constant int)">;
-}
-
-let Features = "avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
- def vpshrdd512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>, _Constant int)">;
-}
-
-let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
- def vpshrdq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>, _Constant int)">;
-}
-
-let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+ def vpshldq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>, _Constant int)">;
def vpshrdq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>, _Constant int)">;
-}
-
-let Features = "avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
- def vpshrdq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>, _Constant int)">;
-}
-
-let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
- def vpshrdw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>, _Constant int)">;
-}
-
-let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
+ def vpshldw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>, _Constant int)">;
def vpshrdw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>, _Constant int)">;
}
-let Features = "avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
+let Features = "avx512vbmi2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
+ def vpshldd512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>, _Constant int)">;
+ def vpshrdd512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>, _Constant int)">;
+ def vpshldq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>, _Constant int)">;
+ def vpshrdq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>, _Constant int)">;
+ def vpshldw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<32, short>, _Constant int)">;
def vpshrdw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<32, short>, _Constant int)">;
}
diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
index a0dcdace854b9..08cd100e79d35 100644
--- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp
+++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp
@@ -2851,11 +2851,32 @@ static bool interp__builtin_elementwise_triop(
return true;
}
- // Vector type.
const auto *VecT = Arg0Type->castAs<VectorType>();
const PrimType &ElemT = *S.getContext().classify(VecT->getElementType());
unsigned NumElems = VecT->getNumElements();
+ bool DestUnsigned = Call->getType()->isUnsignedIntegerOrEnumerationType();
+
+ // Vector + Vector + Scalar case.
+ if (!Arg2Type->isVectorType()) {
+ APSInt Op2 = popToAPSInt(
+ S.Stk, *S.getContext().classify(Call->getArg(2)->getType()));
+
+ const Pointer &Op1 = S.Stk.pop<Pointer>();
+ const Pointer &Op0 = S.Stk.pop<Pointer>();
+ const Pointer &Dst = S.Stk.peek<Pointer>();
+ for (unsigned I = 0; I != NumElems; ++I) {
+ INT_TYPE_SWITCH_NO_BOOL(ElemT, {
+ Dst.elem<T>(I) = static_cast<T>(APSInt(
+ Fn(Op0.elem<T>(I).toAPSInt(), Op1.elem<T>(I).toAPSInt(), Op2),
+ DestUnsigned));
+ });
+ }
+ Dst.initializeAllElements();
+
+ return true;
+ }
+ // Vector type.
const Pointer &Op2 = S.Stk.pop<Pointer>();
const Pointer &Op1 = S.Stk.pop<Pointer>();
const Pointer &Op0 = S.Stk.pop<Pointer>();
@@ -3421,6 +3442,37 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
return F;
});
+ case X86::BI__builtin_ia32_vpshldd128:
+ case X86::BI__builtin_ia32_vpshldd256:
+ case X86::BI__builtin_ia32_vpshldd512:
+ case X86::BI__builtin_ia32_vpshldq128:
+ case X86::BI__builtin_ia32_vpshldq256:
+ case X86::BI__builtin_ia32_vpshldq512:
+ case X86::BI__builtin_ia32_vpshldw128:
+ case X86::BI__builtin_ia32_vpshldw256:
+ case X86::BI__builtin_ia32_vpshldw512:
+ return interp__builtin_elementwise_triop(
+ S, OpPC, Call,
+ [](const APSInt &Hi, const APSInt &Lo, const APSInt &Amt) {
+ return llvm::APIntOps::fshl(Hi, Lo, Amt);
+ });
+
+ case X86::BI__builtin_ia32_vpshrdd128:
+ case X86::BI__builtin_ia32_vpshrdd256:
+ case X86::BI__builtin_ia32_vpshrdd512:
+ case X86::BI__builtin_ia32_vpshrdq128:
+ case X86::BI__builtin_ia32_vpshrdq256:
+ case X86::BI__builtin_ia32_vpshrdq512:
+ case X86::BI__builtin_ia32_vpshrdw128:
+ case X86::BI__builtin_ia32_vpshrdw256:
+ case X86::BI__builtin_ia32_vpshrdw512:
+ // NOTE: Reversed Hi/Lo operands.
+ return interp__builtin_elementwise_triop(
+ S, OpPC, Call,
+ [](const APSInt &Lo, const APSInt &Hi, const APSInt &Amt) {
+ return llvm::APIntOps::fshr(Hi, Lo, Amt);
+ });
+
case clang::X86::BI__builtin_ia32_blendvpd:
case clang::X86::BI__builtin_ia32_blendvpd256:
case clang::X86::BI__builtin_ia32_blendvps:
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index ca930737474df..07eabbe79cbb8 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -11860,6 +11860,69 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
return Success(APValue(ResultElements.data(), ResultElements.size()), E);
}
+ case X86::BI__builtin_ia32_vpshldd128:
+ case X86::BI__builtin_ia32_vpshldd256:
+ case X86::BI__builtin_ia32_vpshldd512:
+ case X86::BI__builtin_ia32_vpshldq128:
+ case X86::BI__builtin_ia32_vpshldq256:
+ case X86::BI__builtin_ia32_vpshldq512:
+ case X86::BI__builtin_ia32_vpshldw128:
+ case X86::BI__builtin_ia32_vpshldw256:
+ case X86::BI__builtin_ia32_vpshldw512: {
+ APValue SourceHi, SourceLo, SourceAmt;
+ if (!EvaluateAsRValue(Info, E->getArg(0), SourceHi) ||
+ !EvaluateAsRValue(Info, E->getArg(1), SourceLo) ||
+ !EvaluateAsRValue(Info, E->getArg(2), SourceAmt))
+ return false;
+
+ QualType DestEltTy = E->getType()->castAs<VectorType>()->getElementType();
+ unsigned SourceLen = SourceHi.getVectorLength();
+ SmallVector<APValue, 32> ResultElements;
+ ResultElements.reserve(SourceLen);
+
+ APInt Amt = SourceAmt.getInt();
+ for (unsigned EltNum = 0; EltNum < SourceLen; ++EltNum) {
+ APInt Hi = SourceHi.getVectorElt(EltNum).getInt();
+ APInt Lo = SourceLo.getVectorElt(EltNum).getInt();
+ APInt R = llvm::APIntOps::fshl(Hi, Lo, Amt);
+ ResultElements.push_back(
+ APValue(APSInt(R, DestEltTy->isUnsignedIntegerOrEnumerationType())));
+ }
+
+ return Success(APValue(ResultElements.data(), ResultElements.size()), E);
+ }
+ case X86::BI__builtin_ia32_vpshrdd128:
+ case X86::BI__builtin_ia32_vpshrdd256:
+ case X86::BI__builtin_ia32_vpshrdd512:
+ case X86::BI__builtin_ia32_vpshrdq128:
+ case X86::BI__builtin_ia32_vpshrdq256:
+ case X86::BI__builtin_ia32_vpshrdq512:
+ case X86::BI__builtin_ia32_vpshrdw128:
+ case X86::BI__builtin_ia32_vpshrdw256:
+ case X86::BI__builtin_ia32_vpshrdw512: {
+ // NOTE: Reversed Hi/Lo operands.
+ APValue SourceHi, SourceLo, SourceAmt;
+ if (!EvaluateAsRValue(Info, E->getArg(0), SourceLo) ||
+ !EvaluateAsRValue(Info, E->getArg(1), SourceHi) ||
+ !EvaluateAsRValue(Info, E->getArg(2), SourceAmt))
+ return false;
+
+ QualType DestEltTy = E->getType()->castAs<VectorType>()->getElementType();
+ unsigned SourceLen = SourceHi.getVectorLength();
+ SmallVector<APValue, 32> ResultElements;
+ ResultElements.reserve(SourceLen);
+
+ APInt Amt = SourceAmt.getInt();
+ for (unsigned EltNum = 0; EltNum < SourceLen; ++EltNum) {
+ APInt Hi = SourceHi.getVectorElt(EltNum).getInt();
+ APInt Lo = SourceLo.getVectorElt(EltNum).getInt();
+ APInt R = llvm::APIntOps::fshr(Hi, Lo, Amt);
+ ResultElements.push_back(
+ APValue(APSInt(R, DestEltTy->isUnsignedIntegerOrEnumerationType())));
+ }
+
+ return Success(APValue(ResultElements.data(), ResultElements.size()), E);
+ }
case X86::BI__builtin_ia32_blendvpd:
case X86::BI__builtin_ia32_blendvpd256:
case X86::BI__builtin_ia32_blendvps:
diff --git a/clang/test/CodeGen/X86/avx512vbmi2-builtins.c b/clang/test/CodeGen/X86/avx512vbmi2-builtins.c
index aecd965551ce1..560035598a6e4 100644
--- a/clang/test/CodeGen/X86/avx512vbmi2-builtins.c
+++ b/clang/test/CodeGen/X86/avx512vbmi2-builtins.c
@@ -96,6 +96,7 @@ __m512i test_mm512_mask_shldi_epi64(__m512i __S, __mmask8 __U, __m512i __A, __m5
// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
return _mm512_mask_shldi_epi64(__S, __U, __A, __B, 47);
}
+TEST_CONSTEXPR(match_v8di(_mm512_mask_shldi_epi64(((__m512i)(__v8di){ 999, 999, 999, 999, 999, 999, 999, 999}), 0x46, ((__m512i)(__v8di){ 16, 17, -18, 19, -20, 21, -22, 23}), ((__m512i)(__v8di){ -1, 2, 3, -4, -5, 6, -7, -8}), 51), 999, 38280596832649216LL, -40532396646334464LL, 999, 999, 999, -47287796087390209LL, 999));
__m512i test_mm512_maskz_shldi_epi64(__mmask8 __U, __m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_maskz_shldi_epi64
@@ -103,12 +104,14 @@ __m512i test_mm512_maskz_shldi_epi64(__mmask8 __U, __m512i __A, __m512i __B) {
// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
return _mm512_maskz_shldi_epi64(__U, __A, __B, 63);
}
+TEST_CONSTEXPR(match_v8di(_mm512_maskz_shldi_epi64(0x46, ((__m512i)(__v8di){ 16, 17, -18, 19, -20, 21, -22, 23}), ((__m512i)(__v8di){ -1, 2, 3, -4, -5, 6, -7, -8}), 51), 0, 38280596832649216LL, -40532396646334464LL, 0, 0, 0, -47287796087390209LL, 0));
__m512i test_mm512_shldi_epi64(__m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_shldi_epi64
// CHECK: call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> splat (i64 31))
return _mm512_shldi_epi64(__A, __B, 31);
}
+TEST_CONSTEXPR(match_v8di(_mm512_shldi_epi64(((__m512i)(__v8di){ 16, 17, -18, 19, -20, 21, -22, 23}), ((__m512i)(__v8di){ -1, 2, 3, -4, -5, 6, -7, -8}), 51), 38280596832649215LL, 38280596832649216LL, -40532396646334464LL, 45035996273704959LL, -42784196460019713LL, 47287796087390208LL, -47287796087390209LL, 54043195528445951LL));
__m512i test_mm512_mask_shldi_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_mask_shldi_epi32
@@ -116,6 +119,7 @@ __m512i test_mm512_mask_shldi_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m
// CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
return _mm512_mask_shldi_epi32(__S, __U, __A, __B, 7);
}
+TEST_CONSTEXPR(match_v16si(_mm512_mask_shldi_epi32(((__m512i)(__v16si){ 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999}), 0x62E9, ((__m512i)(__v16si){ 32, 33, -34, 35, 36, -37, -38, 39, -40, 41, 42, -43, -44, 45, -46, -47}), ((__m512i)(__v16si){ 1, 2, 3, -4, 5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, 16}), 11), 65536, 999, 999, 73727, 999, -73729, -75777, 81919, 999, 86015, 999, 999, 999, 94207, -92161, 999));
__m512i test_mm512_maskz_shldi_epi32(__mmask16 __U, __m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_maskz_shldi_epi32
@@ -123,12 +127,14 @@ __m512i test_mm512_maskz_shldi_epi32(__mmask16 __U, __m512i __A, __m512i __B) {
// CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}}
return _mm512_maskz_shldi_epi32(__U, __A, __B, 15);
}
+TEST_CONSTEXPR(match_v16si(_mm512_maskz_shldi_epi32(0x62E9, ((__m512i)(__v16si){ 32, 33, -34, 35, 36, -37, -38, 39, -40, 41, 42, -43, -44, 45, -46, -47}), ((__m512i)(__v16si){ 1, 2, 3, -4, 5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, 16}), 11), 65536, 0, 0, 73727, 0, -73729, -75777, 81919, 0, 86015, 0, 0, 0, 94207, -92161, 0));
__m512i test_mm512_shldi_epi32(__m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_shldi_epi32
// CHECK: call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> splat (i32 31))
return _mm512_shldi_epi32(__A, __B, 31);
}
+TEST_CONSTEXPR(match_v16si(_mm512_shldi_epi32(((__m512i)(__v16si){ 32, 33, -34, 35, 36, -37, -38, 39, -40, 41, 42, -43, -44, 45, -46, -47}), ((__m512i)(__v16si){ 1, 2, 3, -4, 5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, 16}), 11), 65536, 67584, -69632, 73727, 73728, -73729, -75777, 81919, -79873, 86015, 88063, -86017, -88065, 94207, -92161, -96256));
__m512i test_mm512_mask_shldi_epi16(__m512i __S, __mmask32 __U, __m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_mask_shldi_epi16
@@ -136,6 +142,7 @@ __m512i test_mm512_mask_shldi_epi16(__m512i __S, __mmask32 __U, __m512i __A, __m
// CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
return _mm512_mask_shldi_epi16(__S, __U, __A, __B, 3);
}
+TEST_CONSTEXPR(match_v32hi(_mm512_mask_shldi_epi16(((__m512i)(__v32hi){ 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999}), 0x589033B5, ((__m512i)(__v32hi){ 64, 65, -66, 67, 68, -69, 70, -71, 72, 73, -74, 75, -76, 77, 78, -79, -80, -81, 82, -83, 84, 85, 86, 87, -88, -89, 90, 91, 92, -93, -94, 95}), ((__m512i)(__v32hi){ -1, 2, -3, 4, 5, 6, 7, -8, 9, 10, 11, -12, 13, -14, -15, 16, 17, -18, -19, -20, 21, -22, 23, 24, -25, 26, -27, -28, -29, -30, -31, 32}), 7), 8319, 999, -8321, 999, 8704, -8832, 999, -8961, 9216, 9344, 999, 999, -9728, 9983, 999, 999, 999, 999, 999, 999, 10752, 999, 999, 11136, 999, 999, 999, 11775, 11903, 999, -11905, 999));
__m512i test_mm512_maskz_shldi_epi16(__mmask32 __U, __m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_maskz_shldi_epi16
@@ -143,12 +150,14 @@ __m512i test_mm512_maskz_shldi_epi16(__mmask32 __U, __m512i __A, __m512i __B) {
// CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}}
return _mm512_maskz_shldi_epi16(__U, __A, __B, 7);
}
+TEST_CONSTEXPR(match_v32hi(_mm512_maskz_shldi_epi16(0x589033B5, ((__m512i)(__v32hi){ 64, 65, -66, 67, 68, -69, 70, -71, 72, 73, -74, 75, -76, 77, 78, -79, -80, -81, 82, -83, 84, 85, 86, 87, -88, -89, 90, 91, 92, -93, -94, 95}), ((__m512i)(__v32hi){ -1, 2, -3, 4, 5, 6, 7, -8, 9, 10, 11, -12, 13, -14, -15, 16, 17, -18, -19, -20, 21, -22, 23, 24, -25, 26, -27, -28, -29, -30, -31, 32}), 7), 8319, 0, -8321, 0, 8704, -8832, 0, -8961, 9216, 9344, 0, 0, -9728, 9983, 0, 0, 0, 0, 0, 0, 10752, 0, 0, 11136, 0, 0, 0, 11775, 11903, 0, -11905, 0));
__m512i test_mm512_shldi_epi16(__m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_shldi_epi16
// CHECK: call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> splat (i16 15))
return _mm512_shldi_epi16(__A, __B, 15);
}
+TEST_CONSTEXPR(match_v32hi(_mm512_shldi_epi16(((__m512i)(__v32hi){ 64, 65, -66, 67, 68, -69, 70, -71, 72, 73, -74, 75, -76, 77, 78, -79, -80, -81, 82, -83, 84, 85, 86, 87, -88, -89, 90, 91, 92, -93, -94, 95}), ((__m512i)(__v32hi){ -1, 2, -3, 4, 5, 6, 7, -8, 9, 10, 11, -12, 13, -14, -15, 16, 17, -18, -19, -20, 21, -22, 23, 24, -25, 26, -27, -28, -29, -30, -31, 32}), 7), 8319, 8320, -8321, 8576, 8704, -8832, 8960, -8961, 9216, 9344, -9472, 9727, -9728, 9983, 10111, -10112, -10240, -10241, 10623, -10497, 10752, 11007, 11008, 11136, -11137, -11392, 11647, 11775, 11903, -11777, -11905, 12160));
__m512i test_mm512_mask_shrdi_epi64(__m512i __S, __mmask8 __U, __m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_mask_shrdi_epi64
@@ -156,6 +165,7 @@ __m512i test_mm512_mask_shrdi_epi64(__m512i __S, __mmask8 __U, __m512i __A, __m5
// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
return _mm512_mask_shrdi_epi64(__S, __U, __A, __B, 47);
}
+TEST_CONSTEXPR(match_v8di(_mm512_mask_shrdi_epi64(((__m512i)(__v8di){ 999, 999, 999, 999, 999, 999, 999, 999}), 0x46, ((__m512i)(__v8di){ 16, 17, -18, 19, -20, 21, -22, 23}), ((__m512i)(__v8di){ -1, 2, 3, -4, -5, 6, -7, -8}), 51), 999, 16384, 32767, 999, 999, 999, -49153, 999));
__m512i test_mm512_maskz_shrdi_epi64(__mmask8 __U, __m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_maskz_shrdi_epi64
@@ -163,12 +173,14 @@ __m512i test_mm512_maskz_shrdi_epi64(__mmask8 __U, __m512i __A, __m512i __B) {
// CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}}
return _mm512_maskz_shrdi_epi64(__U, __A, __B, 63);
}
+TEST_CONSTEXPR(match_v8di(_mm512_maskz_shrdi_epi64(0x46, ((__m512i)(__v8di){ 16, 17, -18, 19, -20, 21, -22, 23}), ((__m512i)(__v8di){ -1, 2, 3, -4, -5, 6, -7, -8}), 51), 0, 16384, 32767, 0, 0, 0, -49153, 0));
__m512i test_mm512_shrdi_epi64(__m512i __A, __m512i __B) {
// CHECK-LABEL: test_mm512_shrdi_epi64
// CHECK: call <8 x i64> @llvm...
[truncated]
|
tbaederr
approved these changes
Sep 9, 2025
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Labels
backend:X86
clang:bytecode
Issues for the clang bytecode constexpr interpreter
clang:frontend
Language frontend issues, e.g. anything involving "Sema"
clang
Clang issues not falling into any other category
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Extends interp__builtin_elementwise_triop to handle (vector, vector, scalar) trinary op intrinsics
Fixes #153152