Skip to content

Commit ea65b89

Browse files
topperctstellar
authored andcommitted
[X86] Fold undef elts to 0 in getTargetVShiftByConstNode.
Similar to D81212. Differential Revision: https://reviews.llvm.org/D81292 (cherry picked from commit 3408dcb)
1 parent 0f99a73 commit ea65b89

File tree

2 files changed

+20
-17
lines changed

2 files changed

+20
-17
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23319,7 +23319,8 @@ static SDValue getTargetVShiftByConstNode(unsigned Opc, const SDLoc &dl, MVT VT,
2331923319
for (unsigned i = 0; i != NumElts; ++i) {
2332023320
SDValue CurrentOp = SrcOp->getOperand(i);
2332123321
if (CurrentOp->isUndef()) {
23322-
Elts.push_back(CurrentOp);
23322+
// Must produce 0s in the correct bits.
23323+
Elts.push_back(DAG.getConstant(0, dl, ElementType));
2332323324
continue;
2332423325
}
2332523326
auto *ND = cast<ConstantSDNode>(CurrentOp);
@@ -23331,7 +23332,8 @@ static SDValue getTargetVShiftByConstNode(unsigned Opc, const SDLoc &dl, MVT VT,
2333123332
for (unsigned i = 0; i != NumElts; ++i) {
2333223333
SDValue CurrentOp = SrcOp->getOperand(i);
2333323334
if (CurrentOp->isUndef()) {
23334-
Elts.push_back(CurrentOp);
23335+
// Must produce 0s in the correct bits.
23336+
Elts.push_back(DAG.getConstant(0, dl, ElementType));
2333523337
continue;
2333623338
}
2333723339
auto *ND = cast<ConstantSDNode>(CurrentOp);
@@ -23343,7 +23345,8 @@ static SDValue getTargetVShiftByConstNode(unsigned Opc, const SDLoc &dl, MVT VT,
2334323345
for (unsigned i = 0; i != NumElts; ++i) {
2334423346
SDValue CurrentOp = SrcOp->getOperand(i);
2334523347
if (CurrentOp->isUndef()) {
23346-
Elts.push_back(CurrentOp);
23348+
// All shifted in bits must be the same so use 0.
23349+
Elts.push_back(DAG.getConstant(0, dl, ElementType));
2334723350
continue;
2334823351
}
2334923352
auto *ND = cast<ConstantSDNode>(CurrentOp);

llvm/test/CodeGen/X86/vec_shift5.ll

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -121,12 +121,12 @@ define <2 x i64> @test8() {
121121
define <8 x i16> @test9() {
122122
; X32-LABEL: test9:
123123
; X32: # %bb.0:
124-
; X32-NEXT: movaps {{.*#+}} xmm0 = <1,1,u,u,3,u,8,16>
124+
; X32-NEXT: movaps {{.*#+}} xmm0 = [1,1,0,0,3,0,8,16]
125125
; X32-NEXT: retl
126126
;
127127
; X64-LABEL: test9:
128128
; X64: # %bb.0:
129-
; X64-NEXT: movaps {{.*#+}} xmm0 = <1,1,u,u,3,u,8,16>
129+
; X64-NEXT: movaps {{.*#+}} xmm0 = [1,1,0,0,3,0,8,16]
130130
; X64-NEXT: retq
131131
%1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> <i16 15, i16 8, i16 undef, i16 undef, i16 31, i16 undef, i16 64, i16 128>, i32 3)
132132
ret <8 x i16> %1
@@ -135,12 +135,12 @@ define <8 x i16> @test9() {
135135
define <4 x i32> @test10() {
136136
; X32-LABEL: test10:
137137
; X32: # %bb.0:
138-
; X32-NEXT: movaps {{.*#+}} xmm0 = <u,1,u,4>
138+
; X32-NEXT: movaps {{.*#+}} xmm0 = [0,1,0,4]
139139
; X32-NEXT: retl
140140
;
141141
; X64-LABEL: test10:
142142
; X64: # %bb.0:
143-
; X64-NEXT: movaps {{.*#+}} xmm0 = <u,1,u,4>
143+
; X64-NEXT: movaps {{.*#+}} xmm0 = [0,1,0,4]
144144
; X64-NEXT: retq
145145
%1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> <i32 undef, i32 8, i32 undef, i32 32>, i32 3)
146146
ret <4 x i32> %1
@@ -154,7 +154,7 @@ define <2 x i64> @test11() {
154154
;
155155
; X64-LABEL: test11:
156156
; X64: # %bb.0:
157-
; X64-NEXT: movaps {{.*#+}} xmm0 = <u,3>
157+
; X64-NEXT: movaps {{.*#+}} xmm0 = [0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0]
158158
; X64-NEXT: retq
159159
%1 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> <i64 undef, i64 31>, i32 3)
160160
ret <2 x i64> %1
@@ -163,12 +163,12 @@ define <2 x i64> @test11() {
163163
define <8 x i16> @test12() {
164164
; X32-LABEL: test12:
165165
; X32: # %bb.0:
166-
; X32-NEXT: movaps {{.*#+}} xmm0 = <1,1,u,u,3,u,8,16>
166+
; X32-NEXT: movaps {{.*#+}} xmm0 = [1,1,0,0,3,0,8,16]
167167
; X32-NEXT: retl
168168
;
169169
; X64-LABEL: test12:
170170
; X64: # %bb.0:
171-
; X64-NEXT: movaps {{.*#+}} xmm0 = <1,1,u,u,3,u,8,16>
171+
; X64-NEXT: movaps {{.*#+}} xmm0 = [1,1,0,0,3,0,8,16]
172172
; X64-NEXT: retq
173173
%1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> <i16 15, i16 8, i16 undef, i16 undef, i16 31, i16 undef, i16 64, i16 128>, i32 3)
174174
ret <8 x i16> %1
@@ -177,12 +177,12 @@ define <8 x i16> @test12() {
177177
define <4 x i32> @test13() {
178178
; X32-LABEL: test13:
179179
; X32: # %bb.0:
180-
; X32-NEXT: movaps {{.*#+}} xmm0 = <u,1,u,4>
180+
; X32-NEXT: movaps {{.*#+}} xmm0 = [0,1,0,4]
181181
; X32-NEXT: retl
182182
;
183183
; X64-LABEL: test13:
184184
; X64: # %bb.0:
185-
; X64-NEXT: movaps {{.*#+}} xmm0 = <u,1,u,4>
185+
; X64-NEXT: movaps {{.*#+}} xmm0 = [0,1,0,4]
186186
; X64-NEXT: retq
187187
%1 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> <i32 undef, i32 8, i32 undef, i32 32>, i32 3)
188188
ret <4 x i32> %1
@@ -191,12 +191,12 @@ define <4 x i32> @test13() {
191191
define <8 x i16> @test14() {
192192
; X32-LABEL: test14:
193193
; X32: # %bb.0:
194-
; X32-NEXT: movaps {{.*#+}} xmm0 = <1,1,u,u,3,u,8,16>
194+
; X32-NEXT: movaps {{.*#+}} xmm0 = [1,1,0,0,3,0,8,16]
195195
; X32-NEXT: retl
196196
;
197197
; X64-LABEL: test14:
198198
; X64: # %bb.0:
199-
; X64-NEXT: movaps {{.*#+}} xmm0 = <1,1,u,u,3,u,8,16>
199+
; X64-NEXT: movaps {{.*#+}} xmm0 = [1,1,0,0,3,0,8,16]
200200
; X64-NEXT: retq
201201
%1 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> <i16 15, i16 8, i16 undef, i16 undef, i16 31, i16 undef, i16 64, i16 128>, i32 3)
202202
ret <8 x i16> %1
@@ -205,12 +205,12 @@ define <8 x i16> @test14() {
205205
define <4 x i32> @test15() {
206206
; X32-LABEL: test15:
207207
; X32: # %bb.0:
208-
; X32-NEXT: movaps {{.*#+}} xmm0 = <u,64,u,256>
208+
; X32-NEXT: movaps {{.*#+}} xmm0 = [0,64,0,256]
209209
; X32-NEXT: retl
210210
;
211211
; X64-LABEL: test15:
212212
; X64: # %bb.0:
213-
; X64-NEXT: movaps {{.*#+}} xmm0 = <u,64,u,256>
213+
; X64-NEXT: movaps {{.*#+}} xmm0 = [0,64,0,256]
214214
; X64-NEXT: retq
215215
%1 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> <i32 undef, i32 8, i32 undef, i32 32>, i32 3)
216216
ret <4 x i32> %1
@@ -224,7 +224,7 @@ define <2 x i64> @test16() {
224224
;
225225
; X64-LABEL: test16:
226226
; X64: # %bb.0:
227-
; X64-NEXT: movaps {{.*#+}} xmm0 = <u,248>
227+
; X64-NEXT: movaps {{.*#+}} xmm0 = [0,0,0,0,0,0,0,0,248,0,0,0,0,0,0,0]
228228
; X64-NEXT: retq
229229
%1 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> <i64 undef, i64 31>, i32 3)
230230
ret <2 x i64> %1

0 commit comments

Comments
 (0)