Skip to content

Commit eb0e197

Browse files
committed
[TargetLowering] SimplifyDemandedBits - call SimplifyMultipleUseDemandedBits for ISD::EXTRACT_VECTOR_ELT (REAPPLIED)
This patch attempts to peek through vectors based on the demanded bits/elt of a particular ISD::EXTRACT_VECTOR_ELT node, allowing us to avoid dependencies on ops that have no impact on the extract. In particular this helps remove some unnecessary scalar->vector->scalar patterns. The wasm shift patterns are annoying - @tlively has indicated that the wasm vector shift codegen are to be refactored in the near-term and isn't considered a major issue. Reapplied after reversion at rL368660 due to PR42982 which was fixed at rGca7fdd41bda0. Differential Revision: https://reviews.llvm.org/D65887
1 parent 1737cc7 commit eb0e197

File tree

12 files changed

+151
-150
lines changed

12 files changed

+151
-150
lines changed

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1836,6 +1836,17 @@ bool TargetLowering::SimplifyDemandedBits(
18361836
Depth + 1))
18371837
return true;
18381838

1839+
// Attempt to avoid multi-use ops if we don't need anything from them.
1840+
if (!DemandedSrcBits.isAllOnesValue() ||
1841+
!DemandedSrcElts.isAllOnesValue()) {
1842+
if (SDValue DemandedSrc = SimplifyMultipleUseDemandedBits(
1843+
Src, DemandedSrcBits, DemandedSrcElts, TLO.DAG, Depth + 1)) {
1844+
SDValue NewOp =
1845+
TLO.DAG.getNode(Op.getOpcode(), dl, VT, DemandedSrc, Idx);
1846+
return TLO.CombineTo(Op, NewOp);
1847+
}
1848+
}
1849+
18391850
Known = Known2;
18401851
if (BitWidth > EltBitWidth)
18411852
Known = Known.zext(BitWidth, false /* => any extend */);

llvm/test/CodeGen/AArch64/vecreduce-and-legalization.ll

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -105,17 +105,17 @@ define i8 @test_v9i8(<9 x i8> %a) nounwind {
105105
; CHECK-NEXT: mov v0.b[14], w8
106106
; CHECK-NEXT: mov v0.b[15], w8
107107
; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
108-
; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
109-
; CHECK-NEXT: umov w8, v0.b[1]
110-
; CHECK-NEXT: umov w9, v0.b[0]
108+
; CHECK-NEXT: and v1.8b, v0.8b, v1.8b
109+
; CHECK-NEXT: umov w8, v1.b[1]
110+
; CHECK-NEXT: umov w9, v1.b[0]
111111
; CHECK-NEXT: and w8, w9, w8
112-
; CHECK-NEXT: umov w9, v0.b[2]
112+
; CHECK-NEXT: umov w9, v1.b[2]
113113
; CHECK-NEXT: and w8, w8, w9
114-
; CHECK-NEXT: umov w9, v0.b[3]
114+
; CHECK-NEXT: umov w9, v1.b[3]
115115
; CHECK-NEXT: and w8, w8, w9
116-
; CHECK-NEXT: umov w9, v0.b[4]
116+
; CHECK-NEXT: umov w9, v1.b[4]
117117
; CHECK-NEXT: and w8, w8, w9
118-
; CHECK-NEXT: umov w9, v0.b[5]
118+
; CHECK-NEXT: umov w9, v1.b[5]
119119
; CHECK-NEXT: and w8, w8, w9
120120
; CHECK-NEXT: umov w9, v0.b[6]
121121
; CHECK-NEXT: and w8, w8, w9
@@ -132,9 +132,9 @@ define i32 @test_v3i32(<3 x i32> %a) nounwind {
132132
; CHECK-NEXT: mov w8, #-1
133133
; CHECK-NEXT: mov v0.s[3], w8
134134
; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
135-
; CHECK-NEXT: and v0.8b, v0.8b, v1.8b
135+
; CHECK-NEXT: and v1.8b, v0.8b, v1.8b
136136
; CHECK-NEXT: mov w8, v0.s[1]
137-
; CHECK-NEXT: fmov w9, s0
137+
; CHECK-NEXT: fmov w9, s1
138138
; CHECK-NEXT: and w0, w9, w8
139139
; CHECK-NEXT: ret
140140
%b = call i32 @llvm.experimental.vector.reduce.and.v3i32(<3 x i32> %a)

llvm/test/CodeGen/ARM/dagcombine-anyexttozeroext.ll

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -113,17 +113,14 @@ define float @k(<8 x i8>* nocapture %in) {
113113
}
114114

115115
define float @KnownUpperZero(<4 x i16> %v) {
116-
; FIXME: uxtb are not required
117116
; CHECK-LABEL: KnownUpperZero:
118117
; CHECK: @ %bb.0:
119-
; CHECK-NEXT: vmov.i16 d16, #0x3
120-
; CHECK-NEXT: vmov d17, r0, r1
121-
; CHECK-NEXT: vand d16, d17, d16
118+
; CHECK-NEXT: vmov d16, r0, r1
122119
; CHECK-NEXT: vmov.u16 r0, d16[0]
123120
; CHECK-NEXT: vmov.u16 r1, d16[3]
124-
; CHECK-NEXT: uxtb r0, r0
121+
; CHECK-NEXT: and r0, r0, #3
125122
; CHECK-NEXT: vmov s0, r0
126-
; CHECK-NEXT: uxtb r0, r1
123+
; CHECK-NEXT: and r0, r1, #3
127124
; CHECK-NEXT: vmov s2, r0
128125
; CHECK-NEXT: vcvt.f32.s32 s0, s0
129126
; CHECK-NEXT: vcvt.f32.s32 s2, s2

llvm/test/CodeGen/Thumb2/lsll0.ll

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,18 +5,17 @@ define void @_Z4loopPxS_iS_i(i64* %d) {
55
; CHECK-LABEL: _Z4loopPxS_iS_i:
66
; CHECK: @ %bb.0: @ %entry
77
; CHECK-NEXT: vldrw.u32 q0, [r0]
8-
; CHECK-NEXT: vmov r1, s2
9-
; CHECK-NEXT: vmov r2, s0
10-
; CHECK-NEXT: sxth r1, r1
8+
; CHECK-NEXT: vmov r2, s2
9+
; CHECK-NEXT: vmov r1, s0
1110
; CHECK-NEXT: sxth r2, r2
1211
; CHECK-NEXT: rsbs r1, r1, #0
1312
; CHECK-NEXT: rsbs r2, r2, #0
1413
; CHECK-NEXT: sxth r1, r1
1514
; CHECK-NEXT: sxth r2, r2
16-
; CHECK-NEXT: asr.w r12, r1, #31
17-
; CHECK-NEXT: asrs r3, r2, #31
18-
; CHECK-NEXT: strd r2, r3, [r0]
19-
; CHECK-NEXT: strd r1, r12, [r0, #8]
15+
; CHECK-NEXT: asrs r3, r1, #31
16+
; CHECK-NEXT: asr.w r12, r2, #31
17+
; CHECK-NEXT: strd r1, r3, [r0]
18+
; CHECK-NEXT: strd r2, r12, [r0, #8]
2019
; CHECK-NEXT: bx lr
2120
entry:
2221
%wide.load = load <2 x i64>, <2 x i64>* undef, align 8

llvm/test/CodeGen/Thumb2/mve-vld3.ll

Lines changed: 27 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -7,23 +7,23 @@ define void @vld3_v2i32(<6 x i32> *%src, <2 x i32> *%dst) {
77
; CHECK-LABEL: vld3_v2i32:
88
; CHECK: @ %bb.0: @ %entry
99
; CHECK-NEXT: ldrd r2, r3, [r0, #16]
10-
; CHECK-NEXT: vldrw.u32 q1, [r0]
10+
; CHECK-NEXT: vldrw.u32 q0, [r0]
1111
; CHECK-NEXT: vmov.32 q2[0], r2
12-
; CHECK-NEXT: vmov.f64 d0, d2
12+
; CHECK-NEXT: vmov.f64 d2, d0
1313
; CHECK-NEXT: vmov.32 q2[2], r3
14-
; CHECK-NEXT: vmov.f32 s12, s5
14+
; CHECK-NEXT: vmov.32 r0, q0[2]
15+
; CHECK-NEXT: vmov.f32 s12, s1
16+
; CHECK-NEXT: vmov.f32 s6, s3
1517
; CHECK-NEXT: vmov.f32 s14, s8
16-
; CHECK-NEXT: vmov.f32 s2, s7
17-
; CHECK-NEXT: vmov.f32 s8, s6
18-
; CHECK-NEXT: vmov r3, s12
19-
; CHECK-NEXT: vmov r2, s14
20-
; CHECK-NEXT: vmov r0, s2
21-
; CHECK-NEXT: add r0, r2
22-
; CHECK-NEXT: vmov r2, s10
23-
; CHECK-NEXT: add r0, r2
24-
; CHECK-NEXT: vmov r2, s0
18+
; CHECK-NEXT: vmov r2, s12
19+
; CHECK-NEXT: vmov r12, s6
20+
; CHECK-NEXT: vdup.32 q1, r0
21+
; CHECK-NEXT: vmov r0, s14
22+
; CHECK-NEXT: add r0, r12
23+
; CHECK-NEXT: add r0, r3
24+
; CHECK-NEXT: vmov r3, s0
2525
; CHECK-NEXT: add r2, r3
26-
; CHECK-NEXT: vmov r3, s8
26+
; CHECK-NEXT: vmov r3, s4
2727
; CHECK-NEXT: add r2, r3
2828
; CHECK-NEXT: strd r2, r0, [r1]
2929
; CHECK-NEXT: bx lr
@@ -258,27 +258,27 @@ define void @vld3_v2i16(<6 x i16> *%src, <2 x i16> *%dst) {
258258
; CHECK: @ %bb.0: @ %entry
259259
; CHECK-NEXT: .pad #8
260260
; CHECK-NEXT: sub sp, #8
261+
; CHECK-NEXT: vldrh.u32 q0, [r0]
261262
; CHECK-NEXT: ldr r2, [r0, #8]
262-
; CHECK-NEXT: vldrh.u32 q1, [r0]
263263
; CHECK-NEXT: mov r3, sp
264264
; CHECK-NEXT: str r2, [sp]
265-
; CHECK-NEXT: vldrh.u32 q2, [r3]
266-
; CHECK-NEXT: vmov.f64 d0, d2
267-
; CHECK-NEXT: vmov.f32 s12, s5
268-
; CHECK-NEXT: vmov.f32 s2, s7
269-
; CHECK-NEXT: vmov.f32 s14, s8
270-
; CHECK-NEXT: vmov.f32 s4, s6
271-
; CHECK-NEXT: vmov.f32 s6, s9
272-
; CHECK-NEXT: vmov r0, s2
273-
; CHECK-NEXT: vmov r2, s14
265+
; CHECK-NEXT: vmov.f64 d2, d0
266+
; CHECK-NEXT: vmov.f32 s6, s3
267+
; CHECK-NEXT: vmov.f32 s8, s1
268+
; CHECK-NEXT: vmov.f64 d6, d1
269+
; CHECK-NEXT: vmov r0, s6
270+
; CHECK-NEXT: vldrh.u32 q1, [r3]
271+
; CHECK-NEXT: vmov.f32 s10, s4
272+
; CHECK-NEXT: vmov.f32 s14, s5
273+
; CHECK-NEXT: vmov r2, s10
274274
; CHECK-NEXT: add r0, r2
275-
; CHECK-NEXT: vmov r2, s6
275+
; CHECK-NEXT: vmov r2, s14
276276
; CHECK-NEXT: add r0, r2
277277
; CHECK-NEXT: strh r0, [r1, #2]
278-
; CHECK-NEXT: vmov r0, s0
279-
; CHECK-NEXT: vmov r2, s12
278+
; CHECK-NEXT: vmov r0, s8
279+
; CHECK-NEXT: vmov r2, s0
280280
; CHECK-NEXT: add r0, r2
281-
; CHECK-NEXT: vmov r2, s4
281+
; CHECK-NEXT: vmov r2, s12
282282
; CHECK-NEXT: add r0, r2
283283
; CHECK-NEXT: strh r0, [r1]
284284
; CHECK-NEXT: add sp, #8

llvm/test/CodeGen/Thumb2/mve-vld4.ll

Lines changed: 20 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -6,33 +6,32 @@
66
define void @vld4_v2i32(<8 x i32> *%src, <2 x i32> *%dst) {
77
; CHECK-LABEL: vld4_v2i32:
88
; CHECK: @ %bb.0: @ %entry
9-
; CHECK-NEXT: .vsave {d8, d9}
10-
; CHECK-NEXT: vpush {d8, d9}
119
; CHECK-NEXT: vldrw.u32 q0, [r0]
12-
; CHECK-NEXT: vldrw.u32 q2, [r0, #16]
13-
; CHECK-NEXT: vmov.f32 s4, s3
14-
; CHECK-NEXT: vmov.f64 d6, d1
15-
; CHECK-NEXT: vmov.f32 s6, s11
16-
; CHECK-NEXT: vmov.f32 s14, s10
17-
; CHECK-NEXT: vmov.f32 s16, s1
18-
; CHECK-NEXT: vmov.f32 s2, s8
19-
; CHECK-NEXT: vmov.f32 s18, s9
20-
; CHECK-NEXT: vmov r0, s6
21-
; CHECK-NEXT: vmov r2, s14
22-
; CHECK-NEXT: vmov r3, s2
10+
; CHECK-NEXT: vldrw.u32 q1, [r0, #16]
11+
; CHECK-NEXT: vmov.f32 s8, s3
12+
; CHECK-NEXT: vmov.32 r3, q1[0]
13+
; CHECK-NEXT: vmov.f32 s10, s7
14+
; CHECK-NEXT: vmov r2, s6
15+
; CHECK-NEXT: vmov.f32 s12, s1
16+
; CHECK-NEXT: vmov.f32 s14, s5
17+
; CHECK-NEXT: vdup.32 q1, r3
18+
; CHECK-NEXT: vmov r3, s6
19+
; CHECK-NEXT: vmov r0, s10
2320
; CHECK-NEXT: add r0, r2
24-
; CHECK-NEXT: vmov r2, s18
21+
; CHECK-NEXT: vmov r2, s14
2522
; CHECK-NEXT: add r2, r3
26-
; CHECK-NEXT: vmov r3, s12
27-
; CHECK-NEXT: add.w r12, r2, r0
23+
; CHECK-NEXT: vmov r3, s0
24+
; CHECK-NEXT: add r0, r2
25+
; CHECK-NEXT: str r0, [r1, #4]
26+
; CHECK-NEXT: vmov.32 r2, q0[2]
27+
; CHECK-NEXT: vmov r0, s8
28+
; CHECK-NEXT: vdup.32 q1, r2
2829
; CHECK-NEXT: vmov r2, s4
29-
; CHECK-NEXT: vmov r0, s0
30+
; CHECK-NEXT: add r0, r2
31+
; CHECK-NEXT: vmov r2, s12
3032
; CHECK-NEXT: add r2, r3
31-
; CHECK-NEXT: vmov r3, s16
32-
; CHECK-NEXT: add r0, r3
3333
; CHECK-NEXT: add r0, r2
34-
; CHECK-NEXT: strd r0, r12, [r1]
35-
; CHECK-NEXT: vpop {d8, d9}
34+
; CHECK-NEXT: str r0, [r1]
3635
; CHECK-NEXT: bx lr
3736
entry:
3837
%l1 = load <8 x i32>, <8 x i32>* %src, align 4

0 commit comments

Comments
 (0)