Skip to content

Commit a6f4a86

Browse files
committed
[Headers][X86] VisitCallExpr constexpr immediate shifts (#154293)
Implement VectorExprEvaluator::VisitCallExpr constexpr support for left, right, arithmetic shift for MMX/SSE/AVX2/AVX512 intrinsics. Also implement in experimental-new-constant-interpreter Adds support and tests for _mm*_slli_epi* _mm*_srli_epi* _mm*_srai_epi* _mm*_mask_slli_epi* _mm*_maskz_slli_epi* NOTE: not all intrinsics have all widths i.e. _mm_srli_pi32 doesn't have pi64 etc.
1 parent fcc7867 commit a6f4a86

18 files changed

+546
-288
lines changed

clang/include/clang/Basic/BuiltinsX86.td

Lines changed: 41 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -276,21 +276,25 @@ let Features = "sse2", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] i
276276
def psllw128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
277277
def pslld128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>)">;
278278
def psllq128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Vector<2, long long int>)">;
279+
def pmaddwd128 : X86Builtin<"_Vector<4, int>(_Vector<8, short>, _Vector<8, short>)">;
280+
def pslldqi128_byteshift : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Constant int)">;
281+
def psrldqi128_byteshift : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Constant int)">;
282+
}
283+
284+
let Features = "sse2",
285+
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
286+
def pmuludq128 : X86Builtin<"_Vector<2, long long int>(_Vector<4, int>, _Vector<4, int>)">;
287+
279288
def psllwi128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, int)">;
280289
def pslldi128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, int)">;
281290
def psllqi128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, int)">;
291+
282292
def psrlwi128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, int)">;
283293
def psrldi128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, int)">;
284294
def psrlqi128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, int)">;
295+
285296
def psrawi128 : X86Builtin<"_Vector<8, short>(_Vector<8, short>, int)">;
286297
def psradi128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, int)">;
287-
def pmaddwd128 : X86Builtin<"_Vector<4, int>(_Vector<8, short>, _Vector<8, short>)">;
288-
def pslldqi128_byteshift : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Constant int)">;
289-
def psrldqi128_byteshift : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, _Constant int)">;
290-
}
291-
292-
let Features = "sse2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
293-
def pmuludq128 : X86Builtin<"_Vector<2, long long int>(_Vector<4, int>, _Vector<4, int>)">;
294298
}
295299

296300
let Features = "sse3", Attributes = [NoThrow] in {
@@ -595,23 +599,15 @@ let Features = "avx2", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] i
595599
def psignb256 : X86Builtin<"_Vector<32, char>(_Vector<32, char>, _Vector<32, char>)">;
596600
def psignw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
597601
def psignd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>)">;
598-
def psllwi256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, int)">;
599602
def psllw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<8, short>)">;
600603
def pslldqi256_byteshift : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Constant int)">;
601-
def pslldi256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, int)">;
602604
def pslld256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<4, int>)">;
603-
def psllqi256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, int)">;
604605
def psllq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<2, long long int>)">;
605-
def psrawi256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, int)">;
606606
def psraw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<8, short>)">;
607-
def psradi256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, int)">;
608607
def psrad256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<4, int>)">;
609608
def psrldqi256_byteshift : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Constant int)">;
610-
def psrlwi256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, int)">;
611609
def psrlw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<8, short>)">;
612-
def psrldi256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, int)">;
613610
def psrld256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<4, int>)">;
614-
def psrlqi256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, int)">;
615611
def psrlq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<2, long long int>)">;
616612
def pblendd128 : X86Builtin<"_Vector<4, int>(_Vector<4, int>, _Vector<4, int>, _Constant int)">;
617613
def pblendd256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, _Vector<8, int>, _Constant int)">;
@@ -628,6 +624,19 @@ let Features = "avx2", Attributes = [NoThrow, Const, Constexpr, RequiredVectorWi
628624
def pmuldq256 : X86Builtin<"_Vector<4, long long int>(_Vector<8, int>, _Vector<8, int>)">;
629625
def pmuludq256 : X86Builtin<"_Vector<4, long long int>(_Vector<8, int>, _Vector<8, int>)">;
630626

627+
def psllwi256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, int)">;
628+
def pslldi256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, int)">;
629+
def psllqi256
630+
: X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, int)">;
631+
632+
def psrlwi256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, int)">;
633+
def psrldi256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, int)">;
634+
def psrlqi256
635+
: X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, int)">;
636+
637+
def psrawi256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, int)">;
638+
def psradi256 : X86Builtin<"_Vector<8, int>(_Vector<8, int>, int)">;
639+
631640
def pmulhuw256 : X86Builtin<"_Vector<16, unsigned short>(_Vector<16, unsigned short>, _Vector<16, unsigned short>)">;
632641
def pmulhw256 : X86Builtin<"_Vector<16, short>(_Vector<16, short>, _Vector<16, short>)">;
633642

@@ -2062,7 +2071,6 @@ let Features = "avx512bw,evex512", Attributes = [NoThrow, Const, RequiredVectorW
20622071
def pshuflw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Constant int)">;
20632072
def psllv32hi : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<32, short>)">;
20642073
def psllw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<8, short>)">;
2065-
def psllwi512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, int)">;
20662074
}
20672075

20682076
let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
@@ -2073,7 +2081,9 @@ let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, RequiredVector
20732081
def psllv8hi : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
20742082
}
20752083

2076-
let Features = "avx512f,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
2084+
let Features = "avx512f,evex512",
2085+
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
2086+
def psllwi512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, int)">;
20772087
def pslldi512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, int)">;
20782088
def psllqi512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, int)">;
20792089
}
@@ -2090,7 +2100,9 @@ let Features = "avx512bw,avx512vl", Attributes = [NoThrow, Const, RequiredVector
20902100
def psrlv8hi : X86Builtin<"_Vector<8, short>(_Vector<8, short>, _Vector<8, short>)">;
20912101
}
20922102

2093-
let Features = "avx512f,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
2103+
let Features = "avx512f,evex512",
2104+
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
2105+
def psrlwi512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, int)">;
20942106
def psrldi512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, int)">;
20952107
def psrlqi512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, int)">;
20962108
}
@@ -2116,10 +2128,10 @@ let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256
21162128
}
21172129

21182130
let Features = "avx512bw,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
2119-
def psraw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<8, short>)">;
2120-
def psrawi512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, int)">;
2121-
def psrlw512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<8, short>)">;
2122-
def psrlwi512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, int)">;
2131+
def psraw512
2132+
: X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<8, short>)">;
2133+
def psrlw512
2134+
: X86Builtin<"_Vector<32, short>(_Vector<32, short>, _Vector<8, short>)">;
21232135
def pslldqi512_byteshift : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Constant int)">;
21242136
def psrldqi512_byteshift : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, _Constant int)">;
21252137
}
@@ -2435,7 +2447,9 @@ let Features = "avx512f", Attributes = [NoThrow, Const, RequiredVectorWidth<128>
24352447
def scalefss_round_mask : X86Builtin<"_Vector<4, float>(_Vector<4, float>, _Vector<4, float>, _Vector<4, float>, unsigned char, _Constant int)">;
24362448
}
24372449

2438-
let Features = "avx512f,evex512", Attributes = [NoThrow, Const, RequiredVectorWidth<512>] in {
2450+
let Features = "avx512f,evex512",
2451+
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<512>] in {
2452+
def psrawi512 : X86Builtin<"_Vector<32, short>(_Vector<32, short>, int)">;
24392453
def psradi512 : X86Builtin<"_Vector<16, int>(_Vector<16, int>, int)">;
24402454
def psraqi512 : X86Builtin<"_Vector<8, long long int>(_Vector<8, long long int>, int)">;
24412455
}
@@ -2448,11 +2462,13 @@ let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256
24482462
def psraq256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, _Vector<2, long long int>)">;
24492463
}
24502464

2451-
let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<128>] in {
2465+
let Features = "avx512vl",
2466+
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<128>] in {
24522467
def psraqi128 : X86Builtin<"_Vector<2, long long int>(_Vector<2, long long int>, int)">;
24532468
}
24542469

2455-
let Features = "avx512vl", Attributes = [NoThrow, Const, RequiredVectorWidth<256>] in {
2470+
let Features = "avx512vl",
2471+
Attributes = [NoThrow, Const, Constexpr, RequiredVectorWidth<256>] in {
24562472
def psraqi256 : X86Builtin<"_Vector<4, long long int>(_Vector<4, long long int>, int)">;
24572473
}
24582474

clang/lib/AST/ByteCode/InterpBuiltin.cpp

Lines changed: 61 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -2565,10 +2565,34 @@ static bool interp__builtin_elementwise_int_binop(
25652565
return true;
25662566
}
25672567

2568+
const auto *VT = Call->getArg(0)->getType()->castAs<VectorType>();
2569+
assert(VT->getElementType()->isIntegralOrEnumerationType());
2570+
PrimType ElemT = *S.getContext().classify(VT->getElementType());
2571+
unsigned NumElems = VT->getNumElements();
2572+
bool DestUnsigned = Call->getType()->isUnsignedIntegerOrEnumerationType();
2573+
2574+
// Vector + Scalar case.
2575+
if (!Call->getArg(1)->getType()->isVectorType()) {
2576+
assert(Call->getArg(1)->getType()->isIntegralOrEnumerationType());
2577+
2578+
APSInt RHS = popToAPSInt(
2579+
S.Stk, *S.getContext().classify(Call->getArg(1)->getType()));
2580+
const Pointer &LHS = S.Stk.pop<Pointer>();
2581+
const Pointer &Dst = S.Stk.peek<Pointer>();
2582+
2583+
for (unsigned I = 0; I != NumElems; ++I) {
2584+
INT_TYPE_SWITCH_NO_BOOL(ElemT, {
2585+
Dst.elem<T>(I) = static_cast<T>(
2586+
APSInt(Fn(LHS.elem<T>(I).toAPSInt(), RHS), DestUnsigned));
2587+
});
2588+
}
2589+
Dst.initializeAllElements();
2590+
return true;
2591+
}
2592+
25682593
// Vector case.
25692594
assert(Call->getArg(0)->getType()->isVectorType() &&
25702595
Call->getArg(1)->getType()->isVectorType());
2571-
const auto *VT = Call->getArg(0)->getType()->castAs<VectorType>();
25722596
assert(VT->getElementType() ==
25732597
Call->getArg(1)->getType()->castAs<VectorType>()->getElementType());
25742598
assert(VT->getNumElements() ==
@@ -2578,22 +2602,12 @@ static bool interp__builtin_elementwise_int_binop(
25782602
const Pointer &RHS = S.Stk.pop<Pointer>();
25792603
const Pointer &LHS = S.Stk.pop<Pointer>();
25802604
const Pointer &Dst = S.Stk.peek<Pointer>();
2581-
PrimType ElemT = *S.getContext().classify(VT->getElementType());
2582-
unsigned NumElems = VT->getNumElements();
25832605
for (unsigned I = 0; I != NumElems; ++I) {
2584-
APSInt Elem1;
2585-
APSInt Elem2;
25862606
INT_TYPE_SWITCH_NO_BOOL(ElemT, {
2587-
Elem1 = LHS.elem<T>(I).toAPSInt();
2588-
Elem2 = RHS.elem<T>(I).toAPSInt();
2607+
APSInt Elem1 = LHS.elem<T>(I).toAPSInt();
2608+
APSInt Elem2 = RHS.elem<T>(I).toAPSInt();
2609+
Dst.elem<T>(I) = static_cast<T>(APSInt(Fn(Elem1, Elem2), DestUnsigned));
25892610
});
2590-
2591-
APSInt Result =
2592-
APSInt(Fn(Elem1, Elem2),
2593-
Call->getType()->isUnsignedIntegerOrEnumerationType());
2594-
2595-
INT_TYPE_SWITCH_NO_BOOL(ElemT,
2596-
{ Dst.elem<T>(I) = static_cast<T>(Result); });
25972611
}
25982612
Dst.initializeAllElements();
25992613

@@ -3254,20 +3268,38 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
32543268
case clang::X86::BI__builtin_ia32_psllv4di:
32553269
case clang::X86::BI__builtin_ia32_psllv4si:
32563270
case clang::X86::BI__builtin_ia32_psllv8si:
3271+
case clang::X86::BI__builtin_ia32_psllwi128:
3272+
case clang::X86::BI__builtin_ia32_psllwi256:
3273+
case clang::X86::BI__builtin_ia32_psllwi512:
3274+
case clang::X86::BI__builtin_ia32_pslldi128:
3275+
case clang::X86::BI__builtin_ia32_pslldi256:
3276+
case clang::X86::BI__builtin_ia32_pslldi512:
3277+
case clang::X86::BI__builtin_ia32_psllqi128:
3278+
case clang::X86::BI__builtin_ia32_psllqi256:
3279+
case clang::X86::BI__builtin_ia32_psllqi512:
32573280
return interp__builtin_elementwise_int_binop(
32583281
S, OpPC, Call, BuiltinID, [](const APSInt &LHS, const APSInt &RHS) {
3259-
if (RHS.uge(RHS.getBitWidth())) {
3260-
return APInt::getZero(RHS.getBitWidth());
3282+
if (RHS.uge(LHS.getBitWidth())) {
3283+
return APInt::getZero(LHS.getBitWidth());
32613284
}
32623285
return LHS.shl(RHS.getZExtValue());
32633286
});
32643287

32653288
case clang::X86::BI__builtin_ia32_psrav4si:
32663289
case clang::X86::BI__builtin_ia32_psrav8si:
3290+
case clang::X86::BI__builtin_ia32_psrawi128:
3291+
case clang::X86::BI__builtin_ia32_psrawi256:
3292+
case clang::X86::BI__builtin_ia32_psrawi512:
3293+
case clang::X86::BI__builtin_ia32_psradi128:
3294+
case clang::X86::BI__builtin_ia32_psradi256:
3295+
case clang::X86::BI__builtin_ia32_psradi512:
3296+
case clang::X86::BI__builtin_ia32_psraqi128:
3297+
case clang::X86::BI__builtin_ia32_psraqi256:
3298+
case clang::X86::BI__builtin_ia32_psraqi512:
32673299
return interp__builtin_elementwise_int_binop(
32683300
S, OpPC, Call, BuiltinID, [](const APSInt &LHS, const APSInt &RHS) {
3269-
if (RHS.uge(RHS.getBitWidth())) {
3270-
return LHS.ashr(RHS.getBitWidth() - 1);
3301+
if (RHS.uge(LHS.getBitWidth())) {
3302+
return LHS.ashr(LHS.getBitWidth() - 1);
32713303
}
32723304
return LHS.ashr(RHS.getZExtValue());
32733305
});
@@ -3276,10 +3308,19 @@ bool InterpretBuiltin(InterpState &S, CodePtr OpPC, const CallExpr *Call,
32763308
case clang::X86::BI__builtin_ia32_psrlv4di:
32773309
case clang::X86::BI__builtin_ia32_psrlv4si:
32783310
case clang::X86::BI__builtin_ia32_psrlv8si:
3311+
case clang::X86::BI__builtin_ia32_psrlwi128:
3312+
case clang::X86::BI__builtin_ia32_psrlwi256:
3313+
case clang::X86::BI__builtin_ia32_psrlwi512:
3314+
case clang::X86::BI__builtin_ia32_psrldi128:
3315+
case clang::X86::BI__builtin_ia32_psrldi256:
3316+
case clang::X86::BI__builtin_ia32_psrldi512:
3317+
case clang::X86::BI__builtin_ia32_psrlqi128:
3318+
case clang::X86::BI__builtin_ia32_psrlqi256:
3319+
case clang::X86::BI__builtin_ia32_psrlqi512:
32793320
return interp__builtin_elementwise_int_binop(
32803321
S, OpPC, Call, BuiltinID, [](const APSInt &LHS, const APSInt &RHS) {
3281-
if (RHS.uge(RHS.getBitWidth())) {
3282-
return APInt::getZero(RHS.getBitWidth());
3322+
if (RHS.uge(LHS.getBitWidth())) {
3323+
return APInt::getZero(LHS.getBitWidth());
32833324
}
32843325
return LHS.lshr(RHS.getZExtValue());
32853326
});

clang/lib/AST/ExprConstant.cpp

Lines changed: 90 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11632,7 +11632,38 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
1163211632
case clang::X86::BI__builtin_ia32_psrlv2di:
1163311633
case clang::X86::BI__builtin_ia32_psrlv4di:
1163411634
case clang::X86::BI__builtin_ia32_psrlv4si:
11635-
case clang::X86::BI__builtin_ia32_psrlv8si:{
11635+
case clang::X86::BI__builtin_ia32_psrlv8si:
11636+
11637+
case clang::X86::BI__builtin_ia32_psllwi128:
11638+
case clang::X86::BI__builtin_ia32_pslldi128:
11639+
case clang::X86::BI__builtin_ia32_psllqi128:
11640+
case clang::X86::BI__builtin_ia32_psllwi256:
11641+
case clang::X86::BI__builtin_ia32_pslldi256:
11642+
case clang::X86::BI__builtin_ia32_psllqi256:
11643+
case clang::X86::BI__builtin_ia32_psllwi512:
11644+
case clang::X86::BI__builtin_ia32_pslldi512:
11645+
case clang::X86::BI__builtin_ia32_psllqi512:
11646+
11647+
case clang::X86::BI__builtin_ia32_psrlwi128:
11648+
case clang::X86::BI__builtin_ia32_psrldi128:
11649+
case clang::X86::BI__builtin_ia32_psrlqi128:
11650+
case clang::X86::BI__builtin_ia32_psrlwi256:
11651+
case clang::X86::BI__builtin_ia32_psrldi256:
11652+
case clang::X86::BI__builtin_ia32_psrlqi256:
11653+
case clang::X86::BI__builtin_ia32_psrlwi512:
11654+
case clang::X86::BI__builtin_ia32_psrldi512:
11655+
case clang::X86::BI__builtin_ia32_psrlqi512:
11656+
11657+
case clang::X86::BI__builtin_ia32_psrawi128:
11658+
case clang::X86::BI__builtin_ia32_psradi128:
11659+
case clang::X86::BI__builtin_ia32_psraqi128:
11660+
case clang::X86::BI__builtin_ia32_psrawi256:
11661+
case clang::X86::BI__builtin_ia32_psradi256:
11662+
case clang::X86::BI__builtin_ia32_psraqi256:
11663+
case clang::X86::BI__builtin_ia32_psrawi512:
11664+
case clang::X86::BI__builtin_ia32_psradi512:
11665+
case clang::X86::BI__builtin_ia32_psraqi512: {
11666+
1163611667
APValue SourceLHS, SourceRHS;
1163711668
if (!EvaluateAsRValue(Info, E->getArg(0), SourceLHS) ||
1163811669
!EvaluateAsRValue(Info, E->getArg(1), SourceRHS))
@@ -11646,6 +11677,64 @@ bool VectorExprEvaluator::VisitCallExpr(const CallExpr *E) {
1164611677

1164711678
for (unsigned EltNum = 0; EltNum < SourceLen; ++EltNum) {
1164811679
APSInt LHS = SourceLHS.getVectorElt(EltNum).getInt();
11680+
11681+
if (SourceRHS.isInt()) {
11682+
const unsigned LaneBitWidth = LHS.getBitWidth();
11683+
const unsigned ShiftAmount = SourceRHS.getInt().getZExtValue();
11684+
11685+
switch (E->getBuiltinCallee()) {
11686+
case clang::X86::BI__builtin_ia32_psllwi128:
11687+
case clang::X86::BI__builtin_ia32_psllwi256:
11688+
case clang::X86::BI__builtin_ia32_psllwi512:
11689+
case clang::X86::BI__builtin_ia32_pslldi128:
11690+
case clang::X86::BI__builtin_ia32_pslldi256:
11691+
case clang::X86::BI__builtin_ia32_pslldi512:
11692+
case clang::X86::BI__builtin_ia32_psllqi128:
11693+
case clang::X86::BI__builtin_ia32_psllqi256:
11694+
case clang::X86::BI__builtin_ia32_psllqi512:
11695+
if (ShiftAmount >= LaneBitWidth) {
11696+
ResultElements.push_back(
11697+
APValue(APSInt(APInt::getZero(LaneBitWidth), DestUnsigned)));
11698+
} else {
11699+
ResultElements.push_back(
11700+
APValue(APSInt(LHS.shl(ShiftAmount), DestUnsigned)));
11701+
}
11702+
break;
11703+
case clang::X86::BI__builtin_ia32_psrlwi128:
11704+
case clang::X86::BI__builtin_ia32_psrlwi256:
11705+
case clang::X86::BI__builtin_ia32_psrlwi512:
11706+
case clang::X86::BI__builtin_ia32_psrldi128:
11707+
case clang::X86::BI__builtin_ia32_psrldi256:
11708+
case clang::X86::BI__builtin_ia32_psrldi512:
11709+
case clang::X86::BI__builtin_ia32_psrlqi128:
11710+
case clang::X86::BI__builtin_ia32_psrlqi256:
11711+
case clang::X86::BI__builtin_ia32_psrlqi512:
11712+
if (ShiftAmount >= LaneBitWidth) {
11713+
ResultElements.push_back(
11714+
APValue(APSInt(APInt::getZero(LaneBitWidth), DestUnsigned)));
11715+
} else {
11716+
ResultElements.push_back(
11717+
APValue(APSInt(LHS.lshr(ShiftAmount), DestUnsigned)));
11718+
}
11719+
break;
11720+
case clang::X86::BI__builtin_ia32_psrawi128:
11721+
case clang::X86::BI__builtin_ia32_psrawi256:
11722+
case clang::X86::BI__builtin_ia32_psrawi512:
11723+
case clang::X86::BI__builtin_ia32_psradi128:
11724+
case clang::X86::BI__builtin_ia32_psradi256:
11725+
case clang::X86::BI__builtin_ia32_psradi512:
11726+
case clang::X86::BI__builtin_ia32_psraqi128:
11727+
case clang::X86::BI__builtin_ia32_psraqi256:
11728+
case clang::X86::BI__builtin_ia32_psraqi512:
11729+
ResultElements.push_back(
11730+
APValue(APSInt(LHS.ashr(std::min(ShiftAmount, LaneBitWidth - 1)),
11731+
DestUnsigned)));
11732+
break;
11733+
default:
11734+
llvm_unreachable("Unexpected builtin callee");
11735+
}
11736+
continue;
11737+
}
1164911738
APSInt RHS = SourceRHS.getVectorElt(EltNum).getInt();
1165011739
switch (E->getBuiltinCallee()) {
1165111740
case Builtin::BI__builtin_elementwise_add_sat:

0 commit comments

Comments
 (0)