Skip to content

Commit e3aece0

Browse files
committed
[X86] Improve (vzmovl (insert_subvector)) combine to handle a bitcast between the vzmovl and insert
This combine tries shrink a vzmovl if its input is an insert_subvector. This patch improves it to turn (vzmovl (bitcast (insert_subvector))) into (insert_subvector (vzmovl (bitcast))) potentially allowing the bitcast to be folded with a load.
1 parent 22987ba commit e3aece0

File tree

3 files changed

+24
-25
lines changed

3 files changed

+24
-25
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 15 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -36576,16 +36576,21 @@ static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG,
3657636576
// insert into a zero vector. This helps get VZEXT_MOVL closer to
3657736577
// scalar_to_vectors where 256/512 are canonicalized to an insert and a
3657836578
// 128-bit scalar_to_vector. This reduces the number of isel patterns.
36579-
if (N->getOpcode() == X86ISD::VZEXT_MOVL && !DCI.isBeforeLegalizeOps() &&
36580-
N->getOperand(0).getOpcode() == ISD::INSERT_SUBVECTOR &&
36581-
N->getOperand(0).hasOneUse() &&
36582-
N->getOperand(0).getOperand(0).isUndef() &&
36583-
isNullConstant(N->getOperand(0).getOperand(2))) {
36584-
SDValue In = N->getOperand(0).getOperand(1);
36585-
SDValue Movl = DAG.getNode(X86ISD::VZEXT_MOVL, dl, In.getValueType(), In);
36586-
return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, VT,
36587-
getZeroVector(VT.getSimpleVT(), Subtarget, DAG, dl),
36588-
Movl, N->getOperand(0).getOperand(2));
36579+
if (N->getOpcode() == X86ISD::VZEXT_MOVL && !DCI.isBeforeLegalizeOps()) {
36580+
SDValue V = peekThroughOneUseBitcasts(N->getOperand(0));
36581+
36582+
if (V.getOpcode() == ISD::INSERT_SUBVECTOR && V.hasOneUse() &&
36583+
V.getOperand(0).isUndef() && isNullConstant(V.getOperand(2))) {
36584+
SDValue In = V.getOperand(1);
36585+
MVT SubVT =
36586+
MVT::getVectorVT(VT.getSimpleVT().getVectorElementType(),
36587+
In.getValueSizeInBits() / VT.getScalarSizeInBits());
36588+
In = DAG.getBitcast(SubVT, In);
36589+
SDValue Movl = DAG.getNode(X86ISD::VZEXT_MOVL, dl, SubVT, In);
36590+
return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, VT,
36591+
getZeroVector(VT.getSimpleVT(), Subtarget, DAG, dl),
36592+
Movl, V.getOperand(2));
36593+
}
3658936594
}
3659036595

3659136596
return SDValue();

llvm/test/CodeGen/X86/2012-01-12-extract-sv.ll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,12 @@ define void @endless_loop() {
55
; CHECK-LABEL: endless_loop:
66
; CHECK: # %bb.0: # %entry
77
; CHECK-NEXT: vmovaps (%eax), %xmm0
8-
; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
9-
; CHECK-NEXT: vblendps {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
10-
; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
11-
; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
8+
; CHECK-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
9+
; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1
1210
; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
13-
; CHECK-NEXT: vblendps {{.*#+}} ymm0 = ymm2[0,1,2,3,4,5,6],ymm0[7]
11+
; CHECK-NEXT: vblendps {{.*#+}} ymm1 = ymm2[0,1,2,3,4,5,6],ymm1[7]
12+
; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
13+
; CHECK-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3]
1414
; CHECK-NEXT: vmovaps %ymm0, (%eax)
1515
; CHECK-NEXT: vmovaps %ymm1, (%eax)
1616
; CHECK-NEXT: vzeroupper

llvm/test/CodeGen/X86/vec-strict-fptoint-128.ll

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -687,7 +687,6 @@ define <2 x i64> @strict_vector_fptosi_v2f32_to_v2i64(<2 x float> %a) #0 {
687687
;
688688
; AVX512DQ-LABEL: strict_vector_fptosi_v2f32_to_v2i64:
689689
; AVX512DQ: # %bb.0:
690-
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
691690
; AVX512DQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
692691
; AVX512DQ-NEXT: vcvttps2qq %ymm0, %zmm0
693692
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
@@ -866,17 +865,15 @@ define <2 x i64> @strict_vector_fptosi_v2f32_to_v2i64_load128(<4 x float>* %x) s
866865
; AVX512DQ-32-LABEL: strict_vector_fptosi_v2f32_to_v2i64_load128:
867866
; AVX512DQ-32: # %bb.0:
868867
; AVX512DQ-32-NEXT: movl {{[0-9]+}}(%esp), %eax
869-
; AVX512DQ-32-NEXT: vmovdqa (%eax), %xmm0
870-
; AVX512DQ-32-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
868+
; AVX512DQ-32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
871869
; AVX512DQ-32-NEXT: vcvttps2qq %ymm0, %zmm0
872870
; AVX512DQ-32-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
873871
; AVX512DQ-32-NEXT: vzeroupper
874872
; AVX512DQ-32-NEXT: retl
875873
;
876874
; AVX512DQ-64-LABEL: strict_vector_fptosi_v2f32_to_v2i64_load128:
877875
; AVX512DQ-64: # %bb.0:
878-
; AVX512DQ-64-NEXT: vmovdqa (%rdi), %xmm0
879-
; AVX512DQ-64-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
876+
; AVX512DQ-64-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
880877
; AVX512DQ-64-NEXT: vcvttps2qq %ymm0, %zmm0
881878
; AVX512DQ-64-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
882879
; AVX512DQ-64-NEXT: vzeroupper
@@ -1201,7 +1198,6 @@ define <2 x i64> @strict_vector_fptoui_v2f32_to_v2i64(<2 x float> %a) #0 {
12011198
;
12021199
; AVX512DQ-LABEL: strict_vector_fptoui_v2f32_to_v2i64:
12031200
; AVX512DQ: # %bb.0:
1204-
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
12051201
; AVX512DQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
12061202
; AVX512DQ-NEXT: vcvttps2uqq %ymm0, %zmm0
12071203
; AVX512DQ-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
@@ -1528,17 +1524,15 @@ define <2 x i64> @strict_vector_fptoui_v2f32_to_v2i64_load128(<4 x float>* %x) s
15281524
; AVX512DQ-32-LABEL: strict_vector_fptoui_v2f32_to_v2i64_load128:
15291525
; AVX512DQ-32: # %bb.0:
15301526
; AVX512DQ-32-NEXT: movl {{[0-9]+}}(%esp), %eax
1531-
; AVX512DQ-32-NEXT: vmovdqa (%eax), %xmm0
1532-
; AVX512DQ-32-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
1527+
; AVX512DQ-32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
15331528
; AVX512DQ-32-NEXT: vcvttps2uqq %ymm0, %zmm0
15341529
; AVX512DQ-32-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
15351530
; AVX512DQ-32-NEXT: vzeroupper
15361531
; AVX512DQ-32-NEXT: retl
15371532
;
15381533
; AVX512DQ-64-LABEL: strict_vector_fptoui_v2f32_to_v2i64_load128:
15391534
; AVX512DQ-64: # %bb.0:
1540-
; AVX512DQ-64-NEXT: vmovdqa (%rdi), %xmm0
1541-
; AVX512DQ-64-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
1535+
; AVX512DQ-64-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
15421536
; AVX512DQ-64-NEXT: vcvttps2uqq %ymm0, %zmm0
15431537
; AVX512DQ-64-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
15441538
; AVX512DQ-64-NEXT: vzeroupper

0 commit comments

Comments
 (0)