Skip to content

Commit 0f99a73

Browse files
topperctstellar
authored andcommitted
[X86] Teach combineVectorShiftImm to constant fold undef elements to 0 not undef.
Shifts are supposed to always shift in zeros or sign bits regardless of their inputs. It's possible the input value may have been replaced with undef by SimplifyDemandedBits, but the shift in zeros are still demanded. This issue was reported to me by ispc from 10.0. Unfortunately their failing test does not fail on trunk. Seems to be because the shl is optimized out earlier now and doesn't become VSHLI. ispc bug ispc/ispc#1771 Differential Revision: https://reviews.llvm.org/D81212 (cherry picked from commit 7c9a89f)
1 parent 756b482 commit 0f99a73

File tree

2 files changed

+12
-4
lines changed

2 files changed

+12
-4
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39699,14 +39699,22 @@ static SDValue combineVectorShiftImm(SDNode *N, SelectionDAG &DAG,
3969939699
getTargetConstantBitsFromNode(N0, NumBitsPerElt, UndefElts, EltBits)) {
3970039700
assert(EltBits.size() == VT.getVectorNumElements() &&
3970139701
"Unexpected shift value type");
39702-
for (APInt &Elt : EltBits) {
39703-
if (X86ISD::VSHLI == Opcode)
39702+
// Undef elements need to fold to 0. It's possible SimplifyDemandedBits
39703+
// created an undef input due to no input bits being demanded, but user
39704+
// still expects 0 in other bits.
39705+
for (unsigned i = 0, e = EltBits.size(); i != e; ++i) {
39706+
APInt &Elt = EltBits[i];
39707+
if (UndefElts[i])
39708+
Elt = 0;
39709+
else if (X86ISD::VSHLI == Opcode)
3970439710
Elt <<= ShiftVal;
3970539711
else if (X86ISD::VSRAI == Opcode)
3970639712
Elt.ashrInPlace(ShiftVal);
3970739713
else
3970839714
Elt.lshrInPlace(ShiftVal);
3970939715
}
39716+
// Reset undef elements since they were zeroed above.
39717+
UndefElts = 0;
3971039718
return getConstVector(EltBits, UndefElts, VT.getSimpleVT(), DAG, SDLoc(N));
3971139719
}
3971239720

llvm/test/CodeGen/X86/vec_shift5.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -149,7 +149,7 @@ define <4 x i32> @test10() {
149149
define <2 x i64> @test11() {
150150
; X32-LABEL: test11:
151151
; X32: # %bb.0:
152-
; X32-NEXT: movaps {{.*#+}} xmm0 = <u,u,3,0>
152+
; X32-NEXT: movaps {{.*#+}} xmm0 = [0,0,3,0]
153153
; X32-NEXT: retl
154154
;
155155
; X64-LABEL: test11:
@@ -219,7 +219,7 @@ define <4 x i32> @test15() {
219219
define <2 x i64> @test16() {
220220
; X32-LABEL: test16:
221221
; X32: # %bb.0:
222-
; X32-NEXT: movaps {{.*#+}} xmm0 = <u,u,248,0>
222+
; X32-NEXT: movaps {{.*#+}} xmm0 = [0,0,248,0]
223223
; X32-NEXT: retl
224224
;
225225
; X64-LABEL: test16:

0 commit comments

Comments
 (0)