diff --git a/clang/include/clang/Basic/BuiltinsX86.td b/clang/include/clang/Basic/BuiltinsX86.td index 9680c2b5108a2..72001e097550e 100644 --- a/clang/include/clang/Basic/BuiltinsX86.td +++ b/clang/include/clang/Basic/BuiltinsX86.td @@ -1772,75 +1772,30 @@ let Features = "avx512bw", Attributes = [NoThrow, Const, RequiredVectorWidth<512 def vpermi2varhi512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<32, short>, _Vector<32, short>)">; } -let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { +let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in { def vpshldd128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Constant int)">; -} - -let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { - def vpshldd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Constant int)">; -} - -let Features = "avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { - def vpshldd512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>, _Constant int)">; -} - -let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { + def vpshrdd128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Constant int)">; def vpshldq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>, _Constant int)">; -} - -let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { - def vpshldq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>, _Constant int)">; -} - -let Features = "avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { - def vpshldq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>, _Constant int)">; -} - -let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { + def vpshrdq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>, _Constant int)">; def vpshldw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>, _Constant int)">; + def vpshrdw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>, _Constant int)">; } -let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { - def vpshldw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>, _Constant int)">; -} - -let Features = "avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { - def vpshldw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<32, short>, _Constant int)">; -} - -let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { - def vpshrdd128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Constant int)">; -} - -let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { +let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in { + def vpshldd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Constant int)">; def vpshrdd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Constant int)">; -} - -let Features = "avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { - def vpshrdd512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>, _Constant int)">; -} - -let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { - def vpshrdq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>, _Constant int)">; -} - -let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { + def vpshldq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>, _Constant int)">; def vpshrdq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<4, long long int>, _Constant int)">; -} - -let Features = "avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { - def vpshrdq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>, _Constant int)">; -} - -let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in { - def vpshrdw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>, _Constant int)">; -} - -let Features = "avx512vl,avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in { + def vpshldw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>, _Constant int)">; def vpshrdw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>, _Constant int)">; } -let Features = "avx512vbmi2", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in { +let Features = "avx512vbmi2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in { + def vpshldd512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>, _Constant int)">; + def vpshrdd512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, _Vector<16, int>, _Constant int)">; + def vpshldq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>, _Constant int)">; + def vpshrdq512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Vector<8, long long int>, _Constant int)">; + def vpshldw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<32, short>, _Constant int)">; def vpshrdw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<32, short>, _Constant int)">; } diff --git a/clang/lib/AST/ByteCode/InterpBuiltin.cpp b/clang/lib/AST/ByteCode/InterpBuiltin.cpp index 6f0987b0adb99..910b459930765 100644 --- a/clang/lib/AST/ByteCode/InterpBuiltin.cpp +++ b/clang/lib/AST/ByteCode/InterpBuiltin.cpp @@ -2851,11 +2851,32 @@ static bool interp__builtin_elementwise_triop( return true; } - // Vector type. const auto *VecT = Arg0Type->castAs(); const PrimType &ElemT = *S.getContext().classify(VecT->getElementType()); unsigned NumElems = VecT->getNumElements(); + bool DestUnsigned = Call->getType()->isUnsignedIntegerOrEnumerationType(); + + // Vector + Vector + Scalar case. + if (!Arg2Type->isVectorType()) { + APSInt Op2 = popToAPSInt( + S.Stk, *S.getContext().classify(Call->getArg(2)->getType())); + + const Pointer &Op1 = S.Stk.pop(); + const Pointer &Op0 = S.Stk.pop(); + const Pointer &Dst = S.Stk.peek(); + for (unsigned I = 0; I != NumElems; ++I) { + INT_TYPE_SWITCH_NO_BOOL(ElemT, { + Dst.elem(I) = static_cast(APSInt( + Fn(Op0.elem(I).toAPSInt(), Op1.elem(I).toAPSInt(), Op2), + DestUnsigned)); + }); + } + Dst.initializeAllElements(); + + return true; + } + // Vector type. const Pointer &Op2 = S.Stk.pop(); const Pointer &Op1 = S.Stk.pop(); const Pointer &Op0 = S.Stk.pop(); @@ -3435,6 +3456,37 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call, return F; }); + case X86::BI__builtin_ia32_vpshldd128: + case X86::BI__builtin_ia32_vpshldd256: + case X86::BI__builtin_ia32_vpshldd512: + case X86::BI__builtin_ia32_vpshldq128: + case X86::BI__builtin_ia32_vpshldq256: + case X86::BI__builtin_ia32_vpshldq512: + case X86::BI__builtin_ia32_vpshldw128: + case X86::BI__builtin_ia32_vpshldw256: + case X86::BI__builtin_ia32_vpshldw512: + return interp__builtin_elementwise_triop( + S, OpPC, Call, + [](const APSInt &Hi, const APSInt &Lo, const APSInt &Amt) { + return llvm::APIntOps::fshl(Hi, Lo, Amt); + }); + + case X86::BI__builtin_ia32_vpshrdd128: + case X86::BI__builtin_ia32_vpshrdd256: + case X86::BI__builtin_ia32_vpshrdd512: + case X86::BI__builtin_ia32_vpshrdq128: + case X86::BI__builtin_ia32_vpshrdq256: + case X86::BI__builtin_ia32_vpshrdq512: + case X86::BI__builtin_ia32_vpshrdw128: + case X86::BI__builtin_ia32_vpshrdw256: + case X86::BI__builtin_ia32_vpshrdw512: + // NOTE: Reversed Hi/Lo operands. + return interp__builtin_elementwise_triop( + S, OpPC, Call, + [](const APSInt &Lo, const APSInt &Hi, const APSInt &Amt) { + return llvm::APIntOps::fshr(Hi, Lo, Amt); + }); + case clang::X86::BI__builtin_ia32_blendvpd: case clang::X86::BI__builtin_ia32_blendvpd256: case clang::X86::BI__builtin_ia32_blendvps: diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp index 16f03c84384ae..13e025e56c547 100644 --- a/clang/lib/AST/ExprConstant.cpp +++ b/clang/lib/AST/ExprConstant.cpp @@ -11874,6 +11874,69 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) { return Success(APValue(ResultElements.data(), ResultElements.size()), E); } + case X86::BI__builtin_ia32_vpshldd128: + case X86::BI__builtin_ia32_vpshldd256: + case X86::BI__builtin_ia32_vpshldd512: + case X86::BI__builtin_ia32_vpshldq128: + case X86::BI__builtin_ia32_vpshldq256: + case X86::BI__builtin_ia32_vpshldq512: + case X86::BI__builtin_ia32_vpshldw128: + case X86::BI__builtin_ia32_vpshldw256: + case X86::BI__builtin_ia32_vpshldw512: { + APValue SourceHi, SourceLo, SourceAmt; + if (!EvaluateAsRValue(Info, E->getArg(0), SourceHi) || + !EvaluateAsRValue(Info, E->getArg(1), SourceLo) || + !EvaluateAsRValue(Info, E->getArg(2), SourceAmt)) + return false; + + QualType DestEltTy = E->getType()->castAs()->getElementType(); + unsigned SourceLen = SourceHi.getVectorLength(); + SmallVector ResultElements; + ResultElements.reserve(SourceLen); + + APInt Amt = SourceAmt.getInt(); + for (unsigned EltNum = 0; EltNum < SourceLen; ++EltNum) { + APInt Hi = SourceHi.getVectorElt(EltNum).getInt(); + APInt Lo = SourceLo.getVectorElt(EltNum).getInt(); + APInt R = llvm::APIntOps::fshl(Hi, Lo, Amt); + ResultElements.push_back( + APValue(APSInt(R, DestEltTy->isUnsignedIntegerOrEnumerationType()))); + } + + return Success(APValue(ResultElements.data(), ResultElements.size()), E); + } + case X86::BI__builtin_ia32_vpshrdd128: + case X86::BI__builtin_ia32_vpshrdd256: + case X86::BI__builtin_ia32_vpshrdd512: + case X86::BI__builtin_ia32_vpshrdq128: + case X86::BI__builtin_ia32_vpshrdq256: + case X86::BI__builtin_ia32_vpshrdq512: + case X86::BI__builtin_ia32_vpshrdw128: + case X86::BI__builtin_ia32_vpshrdw256: + case X86::BI__builtin_ia32_vpshrdw512: { + // NOTE: Reversed Hi/Lo operands. + APValue SourceHi, SourceLo, SourceAmt; + if (!EvaluateAsRValue(Info, E->getArg(0), SourceLo) || + !EvaluateAsRValue(Info, E->getArg(1), SourceHi) || + !EvaluateAsRValue(Info, E->getArg(2), SourceAmt)) + return false; + + QualType DestEltTy = E->getType()->castAs()->getElementType(); + unsigned SourceLen = SourceHi.getVectorLength(); + SmallVector ResultElements; + ResultElements.reserve(SourceLen); + + APInt Amt = SourceAmt.getInt(); + for (unsigned EltNum = 0; EltNum < SourceLen; ++EltNum) { + APInt Hi = SourceHi.getVectorElt(EltNum).getInt(); + APInt Lo = SourceLo.getVectorElt(EltNum).getInt(); + APInt R = llvm::APIntOps::fshr(Hi, Lo, Amt); + ResultElements.push_back( + APValue(APSInt(R, DestEltTy->isUnsignedIntegerOrEnumerationType()))); + } + + return Success(APValue(ResultElements.data(), ResultElements.size()), E); + } case X86::BI__builtin_ia32_blendvpd: case X86::BI__builtin_ia32_blendvpd256: case X86::BI__builtin_ia32_blendvps: diff --git a/clang/test/CodeGen/X86/avx512vbmi2-builtins.c b/clang/test/CodeGen/X86/avx512vbmi2-builtins.c index aecd965551ce1..560035598a6e4 100644 --- a/clang/test/CodeGen/X86/avx512vbmi2-builtins.c +++ b/clang/test/CodeGen/X86/avx512vbmi2-builtins.c @@ -96,6 +96,7 @@ __m512i test_mm512_mask_shldi_epi64(__m512i __S, __mmask8 __U, __m512i __A, __m5 // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_mask_shldi_epi64(__S, __U, __A, __B, 47); } +TEST_CONSTEXPR(match_v8di(_mm512_mask_shldi_epi64(((__m512i)(__v8di){ 999, 999, 999, 999, 999, 999, 999, 999}), 0x46, ((__m512i)(__v8di){ 16, 17, -18, 19, -20, 21, -22, 23}), ((__m512i)(__v8di){ -1, 2, 3, -4, -5, 6, -7, -8}), 51), 999, 38280596832649216LL, -40532396646334464LL, 999, 999, 999, -47287796087390209LL, 999)); __m512i test_mm512_maskz_shldi_epi64(__mmask8 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_maskz_shldi_epi64 @@ -103,12 +104,14 @@ __m512i test_mm512_maskz_shldi_epi64(__mmask8 __U, __m512i __A, __m512i __B) { // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_maskz_shldi_epi64(__U, __A, __B, 63); } +TEST_CONSTEXPR(match_v8di(_mm512_maskz_shldi_epi64(0x46, ((__m512i)(__v8di){ 16, 17, -18, 19, -20, 21, -22, 23}), ((__m512i)(__v8di){ -1, 2, 3, -4, -5, 6, -7, -8}), 51), 0, 38280596832649216LL, -40532396646334464LL, 0, 0, 0, -47287796087390209LL, 0)); __m512i test_mm512_shldi_epi64(__m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_shldi_epi64 // CHECK: call <8 x i64> @llvm.fshl.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> splat (i64 31)) return _mm512_shldi_epi64(__A, __B, 31); } +TEST_CONSTEXPR(match_v8di(_mm512_shldi_epi64(((__m512i)(__v8di){ 16, 17, -18, 19, -20, 21, -22, 23}), ((__m512i)(__v8di){ -1, 2, 3, -4, -5, 6, -7, -8}), 51), 38280596832649215LL, 38280596832649216LL, -40532396646334464LL, 45035996273704959LL, -42784196460019713LL, 47287796087390208LL, -47287796087390209LL, 54043195528445951LL)); __m512i test_mm512_mask_shldi_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_mask_shldi_epi32 @@ -116,6 +119,7 @@ __m512i test_mm512_mask_shldi_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_mask_shldi_epi32(__S, __U, __A, __B, 7); } +TEST_CONSTEXPR(match_v16si(_mm512_mask_shldi_epi32(((__m512i)(__v16si){ 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999}), 0x62E9, ((__m512i)(__v16si){ 32, 33, -34, 35, 36, -37, -38, 39, -40, 41, 42, -43, -44, 45, -46, -47}), ((__m512i)(__v16si){ 1, 2, 3, -4, 5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, 16}), 11), 65536, 999, 999, 73727, 999, -73729, -75777, 81919, 999, 86015, 999, 999, 999, 94207, -92161, 999)); __m512i test_mm512_maskz_shldi_epi32(__mmask16 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_maskz_shldi_epi32 @@ -123,12 +127,14 @@ __m512i test_mm512_maskz_shldi_epi32(__mmask16 __U, __m512i __A, __m512i __B) { // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_maskz_shldi_epi32(__U, __A, __B, 15); } +TEST_CONSTEXPR(match_v16si(_mm512_maskz_shldi_epi32(0x62E9, ((__m512i)(__v16si){ 32, 33, -34, 35, 36, -37, -38, 39, -40, 41, 42, -43, -44, 45, -46, -47}), ((__m512i)(__v16si){ 1, 2, 3, -4, 5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, 16}), 11), 65536, 0, 0, 73727, 0, -73729, -75777, 81919, 0, 86015, 0, 0, 0, 94207, -92161, 0)); __m512i test_mm512_shldi_epi32(__m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_shldi_epi32 // CHECK: call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> splat (i32 31)) return _mm512_shldi_epi32(__A, __B, 31); } +TEST_CONSTEXPR(match_v16si(_mm512_shldi_epi32(((__m512i)(__v16si){ 32, 33, -34, 35, 36, -37, -38, 39, -40, 41, 42, -43, -44, 45, -46, -47}), ((__m512i)(__v16si){ 1, 2, 3, -4, 5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, 16}), 11), 65536, 67584, -69632, 73727, 73728, -73729, -75777, 81919, -79873, 86015, 88063, -86017, -88065, 94207, -92161, -96256)); __m512i test_mm512_mask_shldi_epi16(__m512i __S, __mmask32 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_mask_shldi_epi16 @@ -136,6 +142,7 @@ __m512i test_mm512_mask_shldi_epi16(__m512i __S, __mmask32 __U, __m512i __A, __m // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}} return _mm512_mask_shldi_epi16(__S, __U, __A, __B, 3); } +TEST_CONSTEXPR(match_v32hi(_mm512_mask_shldi_epi16(((__m512i)(__v32hi){ 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999}), 0x589033B5, ((__m512i)(__v32hi){ 64, 65, -66, 67, 68, -69, 70, -71, 72, 73, -74, 75, -76, 77, 78, -79, -80, -81, 82, -83, 84, 85, 86, 87, -88, -89, 90, 91, 92, -93, -94, 95}), ((__m512i)(__v32hi){ -1, 2, -3, 4, 5, 6, 7, -8, 9, 10, 11, -12, 13, -14, -15, 16, 17, -18, -19, -20, 21, -22, 23, 24, -25, 26, -27, -28, -29, -30, -31, 32}), 7), 8319, 999, -8321, 999, 8704, -8832, 999, -8961, 9216, 9344, 999, 999, -9728, 9983, 999, 999, 999, 999, 999, 999, 10752, 999, 999, 11136, 999, 999, 999, 11775, 11903, 999, -11905, 999)); __m512i test_mm512_maskz_shldi_epi16(__mmask32 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_maskz_shldi_epi16 @@ -143,12 +150,14 @@ __m512i test_mm512_maskz_shldi_epi16(__mmask32 __U, __m512i __A, __m512i __B) { // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}} return _mm512_maskz_shldi_epi16(__U, __A, __B, 7); } +TEST_CONSTEXPR(match_v32hi(_mm512_maskz_shldi_epi16(0x589033B5, ((__m512i)(__v32hi){ 64, 65, -66, 67, 68, -69, 70, -71, 72, 73, -74, 75, -76, 77, 78, -79, -80, -81, 82, -83, 84, 85, 86, 87, -88, -89, 90, 91, 92, -93, -94, 95}), ((__m512i)(__v32hi){ -1, 2, -3, 4, 5, 6, 7, -8, 9, 10, 11, -12, 13, -14, -15, 16, 17, -18, -19, -20, 21, -22, 23, 24, -25, 26, -27, -28, -29, -30, -31, 32}), 7), 8319, 0, -8321, 0, 8704, -8832, 0, -8961, 9216, 9344, 0, 0, -9728, 9983, 0, 0, 0, 0, 0, 0, 10752, 0, 0, 11136, 0, 0, 0, 11775, 11903, 0, -11905, 0)); __m512i test_mm512_shldi_epi16(__m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_shldi_epi16 // CHECK: call <32 x i16> @llvm.fshl.v32i16(<32 x i16> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> splat (i16 15)) return _mm512_shldi_epi16(__A, __B, 15); } +TEST_CONSTEXPR(match_v32hi(_mm512_shldi_epi16(((__m512i)(__v32hi){ 64, 65, -66, 67, 68, -69, 70, -71, 72, 73, -74, 75, -76, 77, 78, -79, -80, -81, 82, -83, 84, 85, 86, 87, -88, -89, 90, 91, 92, -93, -94, 95}), ((__m512i)(__v32hi){ -1, 2, -3, 4, 5, 6, 7, -8, 9, 10, 11, -12, 13, -14, -15, 16, 17, -18, -19, -20, 21, -22, 23, 24, -25, 26, -27, -28, -29, -30, -31, 32}), 7), 8319, 8320, -8321, 8576, 8704, -8832, 8960, -8961, 9216, 9344, -9472, 9727, -9728, 9983, 10111, -10112, -10240, -10241, 10623, -10497, 10752, 11007, 11008, 11136, -11137, -11392, 11647, 11775, 11903, -11777, -11905, 12160)); __m512i test_mm512_mask_shrdi_epi64(__m512i __S, __mmask8 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_mask_shrdi_epi64 @@ -156,6 +165,7 @@ __m512i test_mm512_mask_shrdi_epi64(__m512i __S, __mmask8 __U, __m512i __A, __m5 // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_mask_shrdi_epi64(__S, __U, __A, __B, 47); } +TEST_CONSTEXPR(match_v8di(_mm512_mask_shrdi_epi64(((__m512i)(__v8di){ 999, 999, 999, 999, 999, 999, 999, 999}), 0x46, ((__m512i)(__v8di){ 16, 17, -18, 19, -20, 21, -22, 23}), ((__m512i)(__v8di){ -1, 2, 3, -4, -5, 6, -7, -8}), 51), 999, 16384, 32767, 999, 999, 999, -49153, 999)); __m512i test_mm512_maskz_shrdi_epi64(__mmask8 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_maskz_shrdi_epi64 @@ -163,12 +173,14 @@ __m512i test_mm512_maskz_shrdi_epi64(__mmask8 __U, __m512i __A, __m512i __B) { // CHECK: select <8 x i1> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> %{{.*}} return _mm512_maskz_shrdi_epi64(__U, __A, __B, 63); } +TEST_CONSTEXPR(match_v8di(_mm512_maskz_shrdi_epi64(0x46, ((__m512i)(__v8di){ 16, 17, -18, 19, -20, 21, -22, 23}), ((__m512i)(__v8di){ -1, 2, 3, -4, -5, 6, -7, -8}), 51), 0, 16384, 32767, 0, 0, 0, -49153, 0)); __m512i test_mm512_shrdi_epi64(__m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_shrdi_epi64 // CHECK: call <8 x i64> @llvm.fshr.v8i64(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}, <8 x i64> splat (i64 31)) return _mm512_shrdi_epi64(__A, __B, 31); } +TEST_CONSTEXPR(match_v8di(_mm512_shrdi_epi64(((__m512i)(__v8di){ 16, 17, -18, 19, -20, 21, -22, 23}), ((__m512i)(__v8di){ -1, 2, 3, -4, -5, 6, -7, -8}), 51), -8192, 16384, 32767, -32768, -32769, 49152, -49153, -65536)); __m512i test_mm512_mask_shrdi_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_mask_shrdi_epi32 @@ -176,6 +188,7 @@ __m512i test_mm512_mask_shrdi_epi32(__m512i __S, __mmask16 __U, __m512i __A, __m // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_mask_shrdi_epi32(__S, __U, __A, __B, 7); } +TEST_CONSTEXPR(match_v16si(_mm512_mask_shrdi_epi32(((__m512i)(__v16si){ 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999}), 0x62E9, ((__m512i)(__v16si){ 32, 33, -34, 35, 36, -37, -38, 39, -40, 41, 42, -43, -44, 45, -46, -47}), ((__m512i)(__v16si){ 1, 2, 3, -4, 5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, 16}), 11), 2097152, 999, 999, -8388608, 999, -10485761, -12582913, -16777216, 999, -20971520, 999, 999, 999, -29360128, -29360129, 999)); __m512i test_mm512_maskz_shrdi_epi32(__mmask16 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_maskz_shrdi_epi32 @@ -183,12 +196,14 @@ __m512i test_mm512_maskz_shrdi_epi32(__mmask16 __U, __m512i __A, __m512i __B) { // CHECK: select <16 x i1> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> %{{.*}} return _mm512_maskz_shrdi_epi32(__U, __A, __B, 15); } +TEST_CONSTEXPR(match_v16si(_mm512_maskz_shrdi_epi32(0x62E9, ((__m512i)(__v16si){ 32, 33, -34, 35, 36, -37, -38, 39, -40, 41, 42, -43, -44, 45, -46, -47}), ((__m512i)(__v16si){ 1, 2, 3, -4, 5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, 16}), 11), 2097152, 0, 0, -8388608, 0, -10485761, -12582913, -16777216, 0, -20971520, 0, 0, 0, -29360128, -29360129, 0)); __m512i test_mm512_shrdi_epi32(__m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_shrdi_epi32 // CHECK: call <16 x i32> @llvm.fshr.v16i32(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}, <16 x i32> splat (i32 31)) return _mm512_shrdi_epi32(__A, __B, 31); } +TEST_CONSTEXPR(match_v16si(_mm512_shrdi_epi32(((__m512i)(__v16si){ 32, 33, -34, 35, 36, -37, -38, 39, -40, 41, 42, -43, -44, 45, -46, -47}), ((__m512i)(__v16si){ 1, 2, 3, -4, 5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, 16}), 11), 2097152, 4194304, 8388607, -8388608, 10485760, -10485761, -12582913, -16777216, -16777217, -20971520, -23068672, -23068673, -25165825, -29360128, -29360129, 35651583)); __m512i test_mm512_mask_shrdi_epi16(__m512i __S, __mmask32 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_mask_shrdi_epi16 @@ -196,6 +211,7 @@ __m512i test_mm512_mask_shrdi_epi16(__m512i __S, __mmask32 __U, __m512i __A, __m // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}} return _mm512_mask_shrdi_epi16(__S, __U, __A, __B, 3); } +TEST_CONSTEXPR(match_v32hi(_mm512_mask_shrdi_epi16(((__m512i)(__v32hi){ 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999}), 0x589033B5, ((__m512i)(__v32hi){ 64, 65, -66, 67, 68, -69, 70, -71, 72, 73, -74, 75, -76, 77, 78, -79, -80, -81, 82, -83, 84, 85, 86, 87, -88, -89, 90, 91, 92, -93, -94, 95}), ((__m512i)(__v32hi){ -1, 2, -3, 4, 5, 6, 7, -8, 9, 10, 11, -12, 13, -14, -15, 16, 17, -18, -19, -20, 21, -22, 23, 24, -25, 26, -27, -28, -29, -30, -31, 32}), 7), -512, 999, -1025, 999, 2560, 3583, 999, -3585, 4608, 5120, 999, 999, 7167, -7168, 999, 999, 999, 999, 999, 999, 10752, 999, 999, 12288, 999, 999, 999, -14336, -14848, 999, -15361, 999)); __m512i test_mm512_maskz_shrdi_epi16(__mmask32 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_maskz_shrdi_epi16 @@ -203,12 +219,14 @@ __m512i test_mm512_maskz_shrdi_epi16(__mmask32 __U, __m512i __A, __m512i __B) { // CHECK: select <32 x i1> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> %{{.*}} return _mm512_maskz_shrdi_epi16(__U, __A, __B, 15); } +TEST_CONSTEXPR(match_v32hi(_mm512_maskz_shrdi_epi16(0x589033B5, ((__m512i)(__v32hi){ 64, 65, -66, 67, 68, -69, 70, -71, 72, 73, -74, 75, -76, 77, 78, -79, -80, -81, 82, -83, 84, 85, 86, 87, -88, -89, 90, 91, 92, -93, -94, 95}), ((__m512i)(__v32hi){ -1, 2, -3, 4, 5, 6, 7, -8, 9, 10, 11, -12, 13, -14, -15, 16, 17, -18, -19, -20, 21, -22, 23, 24, -25, 26, -27, -28, -29, -30, -31, 32}), 7), -512, 0, -1025, 0, 2560, 3583, 0, -3585, 4608, 5120, 0, 0, 7167, -7168, 0, 0, 0, 0, 0, 0, 10752, 0, 0, 12288, 0, 0, 0, -14336, -14848, 0, -15361, 0)); __m512i test_mm512_shrdi_epi16(__m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_shrdi_epi16 // CHECK: call <32 x i16> @llvm.fshr.v32i16(<32 x i16> %{{.*}}, <32 x i16> %{{.*}}, <32 x i16> splat (i16 31)) return _mm512_shrdi_epi16(__A, __B, 31); } +TEST_CONSTEXPR(match_v32hi(_mm512_shrdi_epi16(((__m512i)(__v32hi){ 64, 65, -66, 67, 68, -69, 70, -71, 72, 73, -74, 75, -76, 77, 78, -79, -80, -81, 82, -83, 84, 85, 86, 87, -88, -89, 90, 91, 92, -93, -94, 95}), ((__m512i)(__v32hi){ -1, 2, -3, 4, 5, 6, 7, -8, 9, 10, 11, -12, 13, -14, -15, 16, 17, -18, -19, -20, 21, -22, 23, 24, -25, 26, -27, -28, -29, -30, -31, 32}), 7), -512, 1024, -1025, 2048, 2560, 3583, 3584, -3585, 4608, 5120, 6143, -6144, 7167, -7168, -7680, 8703, 9215, -8705, -9728, -9729, 10752, -11264, 11776, 12288, -12289, 13823, -13824, -14336, -14848, -14849, -15361, 16384)); __m512i test_mm512_mask_shldv_epi64(__m512i __S, __mmask8 __U, __m512i __A, __m512i __B) { // CHECK-LABEL: test_mm512_mask_shldv_epi64 diff --git a/clang/test/CodeGen/X86/avx512vlvbmi2-builtins.c b/clang/test/CodeGen/X86/avx512vlvbmi2-builtins.c index 913264fe877cd..e1e8578ea414a 100644 --- a/clang/test/CodeGen/X86/avx512vlvbmi2-builtins.c +++ b/clang/test/CodeGen/X86/avx512vlvbmi2-builtins.c @@ -180,6 +180,7 @@ __m256i test_mm256_mask_shldi_epi64(__m256i __S, __mmask8 __U, __m256i __A, __m2 // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} return _mm256_mask_shldi_epi64(__S, __U, __A, __B, 47); } +TEST_CONSTEXPR(match_v4di(_mm256_mask_shldi_epi64(((__m256i)(__v4di){ 999, 999, 999, 999}), 0xB, ((__m256i)(__v4di){ -8, -9, 10, 11}), ((__m256i)(__v4di){ -1, 2, 3, 4}), 50), -7881299347898369LL, -10133099161583616LL, 999, 12384898975268864LL)); __m256i test_mm256_maskz_shldi_epi64(__mmask8 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_maskz_shldi_epi64 @@ -187,12 +188,14 @@ __m256i test_mm256_maskz_shldi_epi64(__mmask8 __U, __m256i __A, __m256i __B) { // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} return _mm256_maskz_shldi_epi64(__U, __A, __B, 63); } +TEST_CONSTEXPR(match_v4di(_mm256_maskz_shldi_epi64(0xB, ((__m256i)(__v4di){ -8, -9, 10, 11}), ((__m256i)(__v4di){ -1, 2, 3, 4}), 50), -7881299347898369LL, -10133099161583616LL, 0, 12384898975268864LL)); __m256i test_mm256_shldi_epi64(__m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_shldi_epi64 // CHECK: call <4 x i64> @llvm.fshl.v4i64(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> splat (i64 31)) return _mm256_shldi_epi64(__A, __B, 31); } +TEST_CONSTEXPR(match_v4di(_mm256_shldi_epi64(((__m256i)(__v4di){ -8, -9, 10, 11}), ((__m256i)(__v4di){ -1, 2, 3, 4}), 50), -7881299347898369LL, -10133099161583616LL, 11258999068426240LL, 12384898975268864LL)); __m128i test_mm_mask_shldi_epi64(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_mask_shldi_epi64 @@ -200,6 +203,7 @@ __m128i test_mm_mask_shldi_epi64(__m128i __S, __mmask8 __U, __m128i __A, __m128i // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} return _mm_mask_shldi_epi64(__S, __U, __A, __B, 47); } +TEST_CONSTEXPR(match_v2di(_mm_mask_shldi_epi64(((__m128i)(__v2di){ 999, 999}), 0x2, ((__m128i)(__v2di){ -4, -5}), ((__m128i)(__v2di){ -1, 2}), 5), 999, -160)); __m128i test_mm_maskz_shldi_epi64(__mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_maskz_shldi_epi64 @@ -207,12 +211,14 @@ __m128i test_mm_maskz_shldi_epi64(__mmask8 __U, __m128i __A, __m128i __B) { // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} return _mm_maskz_shldi_epi64(__U, __A, __B, 63); } +TEST_CONSTEXPR(match_v2di(_mm_maskz_shldi_epi64(0x2, ((__m128i)(__v2di){ -4, -5}), ((__m128i)(__v2di){ -1, 2}), 5), 0, -160)); __m128i test_mm_shldi_epi64(__m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_shldi_epi64 // CHECK: call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> splat (i64 31)) return _mm_shldi_epi64(__A, __B, 31); } +TEST_CONSTEXPR(match_v2di(_mm_shldi_epi64(((__m128i)(__v2di){ -4, -5}), ((__m128i)(__v2di){ -1, 2}), 5), -97, -160)); __m256i test_mm256_mask_shldi_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_mask_shldi_epi32 @@ -220,6 +226,7 @@ __m256i test_mm256_mask_shldi_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m2 // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} return _mm256_mask_shldi_epi32(__S, __U, __A, __B, 7); } +TEST_CONSTEXPR(match_v8si(_mm256_mask_shldi_epi32(((__m256i)(__v8si){ 999, 999, 999, 999, 999, 999, 999, 999}), 0xDC, ((__m256i)(__v8si){ -16, 17, 18, -19, 20, 21, -22, -23}), ((__m256i)(__v8si){ 1, -2, 3, -4, 5, 6, 7, 8}), 9), 999, 999, 9216, -9217, 10240, 999, -11264, -11776)); __m256i test_mm256_maskz_shldi_epi32(__mmask8 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_maskz_shldi_epi32 @@ -227,12 +234,14 @@ __m256i test_mm256_maskz_shldi_epi32(__mmask8 __U, __m256i __A, __m256i __B) { // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} return _mm256_maskz_shldi_epi32(__U, __A, __B, 15); } +TEST_CONSTEXPR(match_v8si(_mm256_maskz_shldi_epi32(0xDC, ((__m256i)(__v8si){ -16, 17, 18, -19, 20, 21, -22, -23}), ((__m256i)(__v8si){ 1, -2, 3, -4, 5, 6, 7, 8}), 9), 0, 0, 9216, -9217, 10240, 0, -11264, -11776)); __m256i test_mm256_shldi_epi32(__m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_shldi_epi32 // CHECK: call <8 x i32> @llvm.fshl.v8i32(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> splat (i32 31)) return _mm256_shldi_epi32(__A, __B, 31); } +TEST_CONSTEXPR(match_v8si(_mm256_shldi_epi32(((__m256i)(__v8si){ -16, 17, 18, -19, 20, 21, -22, -23}), ((__m256i)(__v8si){ 1, -2, 3, -4, 5, 6, 7, 8}), 9), -8192, 9215, 9216, -9217, 10240, 10752, -11264, -11776)); __m128i test_mm_mask_shldi_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_mask_shldi_epi32 @@ -240,6 +249,7 @@ __m128i test_mm_mask_shldi_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} return _mm_mask_shldi_epi32(__S, __U, __A, __B, 7); } +TEST_CONSTEXPR(match_v4si(_mm_mask_shldi_epi32(((__m128i)(__v4si){ 999, 999, 999, 999}), 0xD, ((__m128i)(__v4si){ 8, 9, 10, -11}), ((__m128i)(__v4si){ 1, 2, -3, 4}), 10), 8192, 999, 11263, -11264)); __m128i test_mm_maskz_shldi_epi32(__mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_maskz_shldi_epi32 @@ -247,12 +257,14 @@ __m128i test_mm_maskz_shldi_epi32(__mmask8 __U, __m128i __A, __m128i __B) { // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} return _mm_maskz_shldi_epi32(__U, __A, __B, 15); } +TEST_CONSTEXPR(match_v4si(_mm_maskz_shldi_epi32(0xD, ((__m128i)(__v4si){ 8, 9, 10, -11}), ((__m128i)(__v4si){ 1, 2, -3, 4}), 10), 8192, 0, 11263, -11264)); __m128i test_mm_shldi_epi32(__m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_shldi_epi32 // CHECK: call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> splat (i32 31)) return _mm_shldi_epi32(__A, __B, 31); } +TEST_CONSTEXPR(match_v4si(_mm_shldi_epi32(((__m128i)(__v4si){ 8, 9, 10, -11}), ((__m128i)(__v4si){ 1, 2, -3, 4}), 10), 8192, 9216, 11263, -11264)); __m256i test_mm256_mask_shldi_epi16(__m256i __S, __mmask16 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_mask_shldi_epi16 @@ -260,6 +272,7 @@ __m256i test_mm256_mask_shldi_epi16(__m256i __S, __mmask16 __U, __m256i __A, __m // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} return _mm256_mask_shldi_epi16(__S, __U, __A, __B, 3); } +TEST_CONSTEXPR(match_v16hi(_mm256_mask_shldi_epi16(((__m256i)(__v16hi){ 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999}), 0x15A1, ((__m256i)(__v16hi){ 32, -33, -34, 35, -36, 37, -38, 39, -40, -41, -42, -43, -44, -45, -46, -47}), ((__m256i)(__v16hi){ 1, -2, 3, -4, -5, 6, -7, -8, -9, 10, 11, 12, -13, -14, -15, -16}), 10), -32768, 999, 999, 999, 999, -27648, 999, -24577, 25599, 999, 22528, 999, 21503, 999, 999, 999)); __m256i test_mm256_maskz_shldi_epi16(__mmask16 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_maskz_shldi_epi16 @@ -267,12 +280,14 @@ __m256i test_mm256_maskz_shldi_epi16(__mmask16 __U, __m256i __A, __m256i __B) { // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} return _mm256_maskz_shldi_epi16(__U, __A, __B, 7); } +TEST_CONSTEXPR(match_v16hi(_mm256_maskz_shldi_epi16(0x15A1, ((__m256i)(__v16hi){ 32, -33, -34, 35, -36, 37, -38, 39, -40, -41, -42, -43, -44, -45, -46, -47}), ((__m256i)(__v16hi){ 1, -2, 3, -4, -5, 6, -7, -8, -9, 10, 11, 12, -13, -14, -15, -16}), 10), -32768, 0, 0, 0, 0, -27648, 0, -24577, 25599, 0, 22528, 0, 21503, 0, 0, 0)); __m256i test_mm256_shldi_epi16(__m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_shldi_epi16 // CHECK: call <16 x i16> @llvm.fshl.v16i16(<16 x i16> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> splat (i16 31)) return _mm256_shldi_epi16(__A, __B, 31); } +TEST_CONSTEXPR(match_v16hi(_mm256_shldi_epi16(((__m256i)(__v16hi){ 32, -33, -34, 35, -36, 37, -38, 39, -40, -41, -42, -43, -44, -45, -46, -47}), ((__m256i)(__v16hi){ 1, -2, 3, -4, -5, 6, -7, -8, -9, 10, 11, 12, -13, -14, -15, -16}), 10), -32768, 32767, 30720, -28673, 29695, -27648, 27647, -24577, 25599, 23552, 22528, 21504, 21503, 20479, 19455, 18431)); __m128i test_mm_mask_shldi_epi16(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_mask_shldi_epi16 @@ -280,6 +295,7 @@ __m128i test_mm_mask_shldi_epi16(__m128i __S, __mmask8 __U, __m128i __A, __m128i // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} return _mm_mask_shldi_epi16(__S, __U, __A, __B, 3); } +TEST_CONSTEXPR(match_v8hi(_mm_mask_shldi_epi16(((__m128i)(__v8hi){ 999, 999, 999, 999, 999, 999, 999, 999}), 0x9C, ((__m128i)(__v8hi){ 16, 17, -18, -19, 20, -21, -22, 23}), ((__m128i)(__v8hi){ -1, -2, 3, 4, -5, 6, 7, -8}), 8), 999, 999, -4608, -4864, 5375, 999, 999, 6143)); __m128i test_mm_maskz_shldi_epi16(__mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_maskz_shldi_epi16 @@ -287,12 +303,14 @@ __m128i test_mm_maskz_shldi_epi16(__mmask8 __U, __m128i __A, __m128i __B) { // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} return _mm_maskz_shldi_epi16(__U, __A, __B, 7); } +TEST_CONSTEXPR(match_v8hi(_mm_maskz_shldi_epi16(0x9C, ((__m128i)(__v8hi){ 16, 17, -18, -19, 20, -21, -22, 23}), ((__m128i)(__v8hi){ -1, -2, 3, 4, -5, 6, 7, -8}), 8), 0, 0, -4608, -4864, 5375, 0, 0, 6143)); __m128i test_mm_shldi_epi16(__m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_shldi_epi16 // CHECK: call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> splat (i16 31)) return _mm_shldi_epi16(__A, __B, 31); } +TEST_CONSTEXPR(match_v8hi(_mm_shldi_epi16(((__m128i)(__v8hi){ 16, 17, -18, -19, 20, -21, -22, 23}), ((__m128i)(__v8hi){ -1, -2, 3, 4, -5, 6, 7, -8}), 8), 4351, 4607, -4608, -4864, 5375, -5376, -5632, 6143)); __m256i test_mm256_mask_shrdi_epi64(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_mask_shrdi_epi64 @@ -300,6 +318,7 @@ __m256i test_mm256_mask_shrdi_epi64(__m256i __S, __mmask8 __U, __m256i __A, __m2 // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} return _mm256_mask_shrdi_epi64(__S, __U, __A, __B, 47); } +TEST_CONSTEXPR(match_v4di(_mm256_mask_shrdi_epi64(((__m256i)(__v4di){ 999, 999, 999, 999}), 0xB, ((__m256i)(__v4di){ -8, -9, 10, 11}), ((__m256i)(__v4di){ -1, 2, 3, 4}), 50), -1, 49151, 999, 65536)); __m256i test_mm256_maskz_shrdi_epi64(__mmask8 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_maskz_shrdi_epi64 @@ -307,12 +326,14 @@ __m256i test_mm256_maskz_shrdi_epi64(__mmask8 __U, __m256i __A, __m256i __B) { // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} return _mm256_maskz_shrdi_epi64(__U, __A, __B, 63); } +TEST_CONSTEXPR(match_v4di(_mm256_maskz_shrdi_epi64(0xB, ((__m256i)(__v4di){ -8, -9, 10, 11}), ((__m256i)(__v4di){ -1, 2, 3, 4}), 50), -1, 49151, 0, 65536)); __m256i test_mm256_shrdi_epi64(__m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_shrdi_epi64 // CHECK: call <4 x i64> @llvm.fshr.v4i64(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> splat (i64 31) return _mm256_shrdi_epi64(__A, __B, 31); } +TEST_CONSTEXPR(match_v4di(_mm256_shrdi_epi64(((__m256i)(__v4di){ -8, -9, 10, 11}), ((__m256i)(__v4di){ -1, 2, 3, 4}), 50), -1, 49151, 49152, 65536)); __m128i test_mm_mask_shrdi_epi64(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_mask_shrdi_epi64 @@ -320,6 +341,7 @@ __m128i test_mm_mask_shrdi_epi64(__m128i __S, __mmask8 __U, __m128i __A, __m128i // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} return _mm_mask_shrdi_epi64(__S, __U, __A, __B, 47); } +TEST_CONSTEXPR(match_v2di(_mm_mask_shrdi_epi64(((__m128i)(__v2di){ 999, 999}), 0x2, ((__m128i)(__v2di){ -4, -5}), ((__m128i)(__v2di){ -1, 2}), 5), 999, 1729382256910270463LL)); __m128i test_mm_maskz_shrdi_epi64(__mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_maskz_shrdi_epi64 @@ -327,12 +349,14 @@ __m128i test_mm_maskz_shrdi_epi64(__mmask8 __U, __m128i __A, __m128i __B) { // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} return _mm_maskz_shrdi_epi64(__U, __A, __B, 63); } +TEST_CONSTEXPR(match_v2di(_mm_maskz_shrdi_epi64(0x2, ((__m128i)(__v2di){ -4, -5}), ((__m128i)(__v2di){ -1, 2}), 5), 0, 1729382256910270463LL)); __m128i test_mm_shrdi_epi64(__m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_shrdi_epi64 // CHECK: call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> splat (i64 31)) return _mm_shrdi_epi64(__A, __B, 31); } +TEST_CONSTEXPR(match_v2di(_mm_shrdi_epi64(((__m128i)(__v2di){ -4, -5}), ((__m128i)(__v2di){ -1, 2}), 5), -1, 1729382256910270463LL)); __m256i test_mm256_mask_shrdi_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_mask_shrdi_epi32 @@ -340,6 +364,7 @@ __m256i test_mm256_mask_shrdi_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m2 // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} return _mm256_mask_shrdi_epi32(__S, __U, __A, __B, 7); } +TEST_CONSTEXPR(match_v8si(_mm256_mask_shrdi_epi32(((__m256i)(__v8si){ 999, 999, 999, 999, 999, 999, 999, 999}), 0xDC, ((__m256i)(__v8si){ -16, 17, 18, -19, 20, 21, -22, -23}), ((__m256i)(__v8si){ 1, -2, 3, -4, 5, 6, 7, 8}), 9), 999, 999, 25165824, -25165825, 41943040, 999, 67108863, 75497471)); __m256i test_mm256_maskz_shrdi_epi32(__mmask8 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_maskz_shrdi_epi32 @@ -347,12 +372,14 @@ __m256i test_mm256_maskz_shrdi_epi32(__mmask8 __U, __m256i __A, __m256i __B) { // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} return _mm256_maskz_shrdi_epi32(__U, __A, __B, 15); } +TEST_CONSTEXPR(match_v8si(_mm256_maskz_shrdi_epi32(0xDC, ((__m256i)(__v8si){ -16, 17, 18, -19, 20, 21, -22, -23}), ((__m256i)(__v8si){ 1, -2, 3, -4, 5, 6, 7, 8}), 9), 0, 0, 25165824, -25165825, 41943040, 0, 67108863, 75497471)); __m256i test_mm256_shrdi_epi32(__m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_shrdi_epi32 // CHECK: call <8 x i32> @llvm.fshr.v8i32(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> splat (i32 31) return _mm256_shrdi_epi32(__A, __B, 31); } +TEST_CONSTEXPR(match_v8si(_mm256_shrdi_epi32(((__m256i)(__v8si){ -16, 17, 18, -19, 20, 21, -22, -23}), ((__m256i)(__v8si){ 1, -2, 3, -4, 5, 6, 7, 8}), 9), 16777215, -16777216, 25165824, -25165825, 41943040, 50331648, 67108863, 75497471)); __m128i test_mm_mask_shrdi_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_mask_shrdi_epi32 @@ -360,6 +387,7 @@ __m128i test_mm_mask_shrdi_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} return _mm_mask_shrdi_epi32(__S, __U, __A, __B, 7); } +TEST_CONSTEXPR(match_v4si(_mm_mask_shrdi_epi32(((__m128i)(__v4si){ 999, 999, 999, 999}), 0xD, ((__m128i)(__v4si){ 8, 9, 10, -11}), ((__m128i)(__v4si){ 1, 2, -3, 4}), 10), 4194304, 999, -12582912, 20971519)); __m128i test_mm_maskz_shrdi_epi32(__mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_maskz_shrdi_epi32 @@ -367,12 +395,14 @@ __m128i test_mm_maskz_shrdi_epi32(__mmask8 __U, __m128i __A, __m128i __B) { // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} return _mm_maskz_shrdi_epi32(__U, __A, __B, 15); } +TEST_CONSTEXPR(match_v4si(_mm_maskz_shrdi_epi32(0xD, ((__m128i)(__v4si){ 8, 9, 10, -11}), ((__m128i)(__v4si){ 1, 2, -3, 4}), 10), 4194304, 0, -12582912, 20971519)); __m128i test_mm_shrdi_epi32(__m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_shrdi_epi32 // CHECK: call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> splat (i32 31)) return _mm_shrdi_epi32(__A, __B, 31); } +TEST_CONSTEXPR(match_v4si(_mm_shrdi_epi32(((__m128i)(__v4si){ 8, 9, 10, -11}), ((__m128i)(__v4si){ 1, 2, -3, 4}), 10), 4194304, 8388608, -12582912, 20971519)); __m256i test_mm256_mask_shrdi_epi16(__m256i __S, __mmask16 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_mask_shrdi_epi16 @@ -380,6 +410,7 @@ __m256i test_mm256_mask_shrdi_epi16(__m256i __S, __mmask16 __U, __m256i __A, __m // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} return _mm256_mask_shrdi_epi16(__S, __U, __A, __B, 3); } +TEST_CONSTEXPR(match_v16hi(_mm256_mask_shrdi_epi16(((__m256i)(__v16hi){ 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999, 999}), 0x15A1, ((__m256i)(__v16hi){ 32, -33, -34, 35, -36, 37, -38, 39, -40, -41, -42, -43, -44, -45, -46, -47}), ((__m256i)(__v16hi){ 1, -2, 3, -4, -5, 6, -7, -8, -9, 10, 11, 12, -13, -14, -15, -16}), 10), 64, 999, 999, 999, 999, 384, 999, -512, -513, 999, 767, 999, -769, 999, 999, 999)); __m256i test_mm256_maskz_shrdi_epi16(__mmask16 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_maskz_shrdi_epi16 @@ -387,12 +418,14 @@ __m256i test_mm256_maskz_shrdi_epi16(__mmask16 __U, __m256i __A, __m256i __B) { // CHECK: select <16 x i1> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> %{{.*}} return _mm256_maskz_shrdi_epi16(__U, __A, __B, 7); } +TEST_CONSTEXPR(match_v16hi(_mm256_maskz_shrdi_epi16(0x15A1, ((__m256i)(__v16hi){ 32, -33, -34, 35, -36, 37, -38, 39, -40, -41, -42, -43, -44, -45, -46, -47}), ((__m256i)(__v16hi){ 1, -2, 3, -4, -5, 6, -7, -8, -9, 10, 11, 12, -13, -14, -15, -16}), 10), 64, 0, 0, 0, 0, 384, 0, -512, -513, 0, 767, 0, -769, 0, 0, 0)); __m256i test_mm256_shrdi_epi16(__m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_shrdi_epi16 // CHECK: call <16 x i16> @llvm.fshr.v16i16(<16 x i16> %{{.*}}, <16 x i16> %{{.*}}, <16 x i16> splat (i16 31)) return _mm256_shrdi_epi16(__A, __B, 31); } +TEST_CONSTEXPR(match_v16hi(_mm256_shrdi_epi16(((__m256i)(__v16hi){ 32, -33, -34, 35, -36, 37, -38, 39, -40, -41, -42, -43, -44, -45, -46, -47}), ((__m256i)(__v16hi){ 1, -2, 3, -4, -5, 6, -7, -8, -9, 10, 11, 12, -13, -14, -15, -16}), 10), 64, -65, 255, -256, -257, 384, -385, -512, -513, 703, 767, 831, -769, -833, -897, -961)); __m128i test_mm_mask_shrdi_epi16(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_mask_shrdi_epi16 @@ -400,6 +433,7 @@ __m128i test_mm_mask_shrdi_epi16(__m128i __S, __mmask8 __U, __m128i __A, __m128i // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} return _mm_mask_shrdi_epi16(__S, __U, __A, __B, 3); } +TEST_CONSTEXPR(match_v8hi(_mm_mask_shrdi_epi16(((__m128i)(__v8hi){ 999, 999, 999, 999, 999, 999, 999, 999}), 0x9C, ((__m128i)(__v8hi){ 16, 17, -18, -19, 20, -21, -22, 23}), ((__m128i)(__v8hi){ -1, -2, 3, 4, -5, 6, 7, -8}), 8), 999, 999, 1023, 1279, -1280, 999, 999, -2048)); __m128i test_mm_maskz_shrdi_epi16(__mmask8 __U, __m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_maskz_shrdi_epi16 @@ -407,12 +441,14 @@ __m128i test_mm_maskz_shrdi_epi16(__mmask8 __U, __m128i __A, __m128i __B) { // CHECK: select <8 x i1> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> %{{.*}} return _mm_maskz_shrdi_epi16(__U, __A, __B, 7); } +TEST_CONSTEXPR(match_v8hi(_mm_maskz_shrdi_epi16(0x9C, ((__m128i)(__v8hi){ 16, 17, -18, -19, 20, -21, -22, 23}), ((__m128i)(__v8hi){ -1, -2, 3, 4, -5, 6, 7, -8}), 8), 0, 0, 1023, 1279, -1280, 0, 0, -2048)); __m128i test_mm_shrdi_epi16(__m128i __A, __m128i __B) { // CHECK-LABEL: test_mm_shrdi_epi16 // CHECK: call <8 x i16> @llvm.fshr.v8i16(<8 x i16> %{{.*}}, <8 x i16> %{{.*}}, <8 x i16> splat (i16 31)) return _mm_shrdi_epi16(__A, __B, 31); } +TEST_CONSTEXPR(match_v8hi(_mm_shrdi_epi16(((__m128i)(__v8hi){ 16, 17, -18, -19, 20, -21, -22, 23}), ((__m128i)(__v8hi){ -1, -2, 3, 4, -5, 6, 7, -8}), 8), -256, -512, 1023, 1279, -1280, 1791, 2047, -2048)); __m256i test_mm256_mask_shldv_epi64(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) { // CHECK-LABEL: test_mm256_mask_shldv_epi64