Skip to content

Commit 07a4e07

Browse files
committed
[DAG] visitTRUNCATE - more aggressively fold trunc(add(x,x)) -> add(trunc(x),trunc(x))
We're very careful not to truncate binary arithmetic ops if it will affect legality, or cause additional truncation instructions, hence we limit this to cases where at one operand is constant. But if both ops are the same (i.e. add/mul) then we wouldn't increase the number of truncations,so can be slightly more aggressive at folding the truncation.
1 parent 96688d4 commit 07a4e07

File tree

5 files changed

+68
-99
lines changed

5 files changed

+68
-99
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16433,7 +16433,8 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
1643316433
case ISD::OR:
1643416434
case ISD::XOR:
1643516435
if (!LegalOperations && N0.hasOneUse() &&
16436-
(isConstantOrConstantVector(N0.getOperand(0), true) ||
16436+
(N0.getOperand(0) == N0.getOperand(1) ||
16437+
isConstantOrConstantVector(N0.getOperand(0), true) ||
1643716438
isConstantOrConstantVector(N0.getOperand(1), true))) {
1643816439
// TODO: We already restricted this to pre-legalization, but for vectors
1643916440
// we are extra cautious to not create an unsupported operation.

llvm/test/CodeGen/AArch64/avoid-pre-trunc.ll

Lines changed: 44 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -5,32 +5,30 @@ define <16 x i8> @lower_trunc_16xi8(i16 %a, i16 %b, i16 %c, i16 %d, i16 %e, i16
55
; CHECK-LABEL: lower_trunc_16xi8:
66
; CHECK: // %bb.0:
77
; CHECK-NEXT: fmov s0, w0
8-
; CHECK-NEXT: ldr h1, [sp]
8+
; CHECK-NEXT: mov x8, sp
9+
; CHECK-NEXT: mov v0.b[1], w1
10+
; CHECK-NEXT: mov v0.b[2], w2
11+
; CHECK-NEXT: mov v0.b[3], w3
12+
; CHECK-NEXT: mov v0.b[4], w4
13+
; CHECK-NEXT: mov v0.b[5], w5
14+
; CHECK-NEXT: mov v0.b[6], w6
15+
; CHECK-NEXT: mov v0.b[7], w7
16+
; CHECK-NEXT: ld1 { v0.b }[8], [x8]
917
; CHECK-NEXT: add x8, sp, #8
10-
; CHECK-NEXT: ld1 { v1.h }[1], [x8]
18+
; CHECK-NEXT: ld1 { v0.b }[9], [x8]
1119
; CHECK-NEXT: add x8, sp, #16
12-
; CHECK-NEXT: mov v0.h[1], w1
13-
; CHECK-NEXT: ld1 { v1.h }[2], [x8]
20+
; CHECK-NEXT: ld1 { v0.b }[10], [x8]
1421
; CHECK-NEXT: add x8, sp, #24
15-
; CHECK-NEXT: mov v0.h[2], w2
16-
; CHECK-NEXT: ld1 { v1.h }[3], [x8]
22+
; CHECK-NEXT: ld1 { v0.b }[11], [x8]
1723
; CHECK-NEXT: add x8, sp, #32
18-
; CHECK-NEXT: mov v0.h[3], w3
19-
; CHECK-NEXT: ld1 { v1.h }[4], [x8]
24+
; CHECK-NEXT: ld1 { v0.b }[12], [x8]
2025
; CHECK-NEXT: add x8, sp, #40
21-
; CHECK-NEXT: ld1 { v1.h }[5], [x8]
26+
; CHECK-NEXT: ld1 { v0.b }[13], [x8]
2227
; CHECK-NEXT: add x8, sp, #48
23-
; CHECK-NEXT: mov v0.h[4], w4
24-
; CHECK-NEXT: ld1 { v1.h }[6], [x8]
28+
; CHECK-NEXT: ld1 { v0.b }[14], [x8]
2529
; CHECK-NEXT: add x8, sp, #56
26-
; CHECK-NEXT: mov v0.h[5], w5
27-
; CHECK-NEXT: ld1 { v1.h }[7], [x8]
28-
; CHECK-NEXT: mov v0.h[6], w6
29-
; CHECK-NEXT: add v2.8h, v1.8h, v1.8h
30-
; CHECK-NEXT: mov v0.h[7], w7
31-
; CHECK-NEXT: add v3.8h, v0.8h, v0.8h
32-
; CHECK-NEXT: uzp1 v0.16b, v0.16b, v1.16b
33-
; CHECK-NEXT: uzp1 v1.16b, v3.16b, v2.16b
30+
; CHECK-NEXT: ld1 { v0.b }[15], [x8]
31+
; CHECK-NEXT: add v1.16b, v0.16b, v0.16b
3432
; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b
3533
; CHECK-NEXT: ret
3634
%a1 = insertelement <16 x i16> poison, i16 %a, i16 0
@@ -59,18 +57,15 @@ define <16 x i8> @lower_trunc_16xi8(i16 %a, i16 %b, i16 %c, i16 %d, i16 %e, i16
5957
define <8 x i16> @lower_trunc_8xi16(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h) {
6058
; CHECK-LABEL: lower_trunc_8xi16:
6159
; CHECK: // %bb.0:
62-
; CHECK-NEXT: fmov s0, w4
63-
; CHECK-NEXT: fmov s1, w0
64-
; CHECK-NEXT: mov v0.s[1], w5
65-
; CHECK-NEXT: mov v1.s[1], w1
66-
; CHECK-NEXT: mov v0.s[2], w6
67-
; CHECK-NEXT: mov v1.s[2], w2
68-
; CHECK-NEXT: mov v0.s[3], w7
69-
; CHECK-NEXT: mov v1.s[3], w3
70-
; CHECK-NEXT: add v2.4s, v0.4s, v0.4s
71-
; CHECK-NEXT: add v3.4s, v1.4s, v1.4s
72-
; CHECK-NEXT: uzp1 v0.8h, v1.8h, v0.8h
73-
; CHECK-NEXT: uzp1 v1.8h, v3.8h, v2.8h
60+
; CHECK-NEXT: fmov s0, w0
61+
; CHECK-NEXT: mov v0.h[1], w1
62+
; CHECK-NEXT: mov v0.h[2], w2
63+
; CHECK-NEXT: mov v0.h[3], w3
64+
; CHECK-NEXT: mov v0.h[4], w4
65+
; CHECK-NEXT: mov v0.h[5], w5
66+
; CHECK-NEXT: mov v0.h[6], w6
67+
; CHECK-NEXT: mov v0.h[7], w7
68+
; CHECK-NEXT: add v1.8h, v0.8h, v0.8h
7469
; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b
7570
; CHECK-NEXT: ret
7671
%a1 = insertelement <8 x i32> poison, i32 %a, i32 0
@@ -91,14 +86,11 @@ define <8 x i16> @lower_trunc_8xi16(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32
9186
define <4 x i32> @lower_trunc_4xi32(i64 %a, i64 %b, i64 %c, i64 %d) {
9287
; CHECK-LABEL: lower_trunc_4xi32:
9388
; CHECK: // %bb.0:
94-
; CHECK-NEXT: fmov d0, x2
95-
; CHECK-NEXT: fmov d1, x0
96-
; CHECK-NEXT: mov v0.d[1], x3
97-
; CHECK-NEXT: mov v1.d[1], x1
98-
; CHECK-NEXT: add v2.2d, v0.2d, v0.2d
99-
; CHECK-NEXT: add v3.2d, v1.2d, v1.2d
100-
; CHECK-NEXT: uzp1 v0.4s, v1.4s, v0.4s
101-
; CHECK-NEXT: uzp1 v1.4s, v3.4s, v2.4s
89+
; CHECK-NEXT: fmov s0, w0
90+
; CHECK-NEXT: mov v0.s[1], w1
91+
; CHECK-NEXT: mov v0.s[2], w2
92+
; CHECK-NEXT: mov v0.s[3], w3
93+
; CHECK-NEXT: add v1.4s, v0.4s, v0.4s
10294
; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b
10395
; CHECK-NEXT: ret
10496
%a1 = insertelement <4 x i64> poison, i64 %a, i64 0
@@ -115,24 +107,20 @@ define <4 x i32> @lower_trunc_4xi32(i64 %a, i64 %b, i64 %c, i64 %d) {
115107
define <8 x i32> @lower_trunc_8xi32(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e, i64 %f, i64 %g, i64 %h) {
116108
; CHECK-LABEL: lower_trunc_8xi32:
117109
; CHECK: // %bb.0:
118-
; CHECK-NEXT: fmov d0, x2
119-
; CHECK-NEXT: fmov d1, x0
120-
; CHECK-NEXT: fmov d2, x6
121-
; CHECK-NEXT: fmov d3, x4
122-
; CHECK-NEXT: mov v0.d[1], x3
123-
; CHECK-NEXT: mov v1.d[1], x1
124-
; CHECK-NEXT: mov v2.d[1], x7
125-
; CHECK-NEXT: mov v3.d[1], x5
126-
; CHECK-NEXT: add v4.2d, v0.2d, v0.2d
127-
; CHECK-NEXT: add v5.2d, v1.2d, v1.2d
128-
; CHECK-NEXT: add v6.2d, v2.2d, v2.2d
129-
; CHECK-NEXT: add v7.2d, v3.2d, v3.2d
110+
; CHECK-NEXT: fmov d0, x6
111+
; CHECK-NEXT: fmov d1, x4
112+
; CHECK-NEXT: fmov d2, x2
113+
; CHECK-NEXT: fmov d3, x0
114+
; CHECK-NEXT: mov v0.d[1], x7
115+
; CHECK-NEXT: mov v1.d[1], x5
116+
; CHECK-NEXT: mov v2.d[1], x3
117+
; CHECK-NEXT: mov v3.d[1], x1
118+
; CHECK-NEXT: uzp1 v1.4s, v1.4s, v0.4s
130119
; CHECK-NEXT: uzp1 v2.4s, v3.4s, v2.4s
131-
; CHECK-NEXT: uzp1 v0.4s, v1.4s, v0.4s
132-
; CHECK-NEXT: uzp1 v3.4s, v5.4s, v4.4s
133-
; CHECK-NEXT: uzp1 v1.4s, v7.4s, v6.4s
134-
; CHECK-NEXT: eor v0.16b, v0.16b, v3.16b
135-
; CHECK-NEXT: eor v1.16b, v2.16b, v1.16b
120+
; CHECK-NEXT: add v3.4s, v1.4s, v1.4s
121+
; CHECK-NEXT: add v0.4s, v2.4s, v2.4s
122+
; CHECK-NEXT: eor v1.16b, v1.16b, v3.16b
123+
; CHECK-NEXT: eor v0.16b, v2.16b, v0.16b
136124
; CHECK-NEXT: ret
137125
%a1 = insertelement <8 x i64> poison, i64 %a, i64 0
138126
%b1 = insertelement <8 x i64> %a1, i64 %b, i64 1

llvm/test/CodeGen/AArch64/zext-shuffle.ll

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -674,10 +674,8 @@ define <4 x i32> @isUndefDeInterleave_t1_bad(<8 x i16> %a) {
674674
define i16 @undeftop(<8 x i16> %0) {
675675
; CHECK-LABEL: undeftop:
676676
; CHECK: // %bb.0:
677-
; CHECK-NEXT: dup v0.8h, v0.h[4]
678-
; CHECK-NEXT: uaddl v0.4s, v0.4h, v0.4h
679-
; CHECK-NEXT: xtn v0.4h, v0.4s
680-
; CHECK-NEXT: umov w0, v0.h[0]
677+
; CHECK-NEXT: add v0.8h, v0.8h, v0.8h
678+
; CHECK-NEXT: umov w0, v0.h[4]
681679
; CHECK-NEXT: ret
682680
%2 = shufflevector <8 x i16> %0, <8 x i16> zeroinitializer, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 9, i32 7, i32 5, i32 3>
683681
%3 = zext <8 x i16> %2 to <8 x i64>

llvm/test/CodeGen/SystemZ/int-conv-14.ll

Lines changed: 10 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -58,9 +58,8 @@ define i128 @f4(ptr %ptr) {
5858
define i64 @f5(i128 %a) {
5959
; CHECK-LABEL: f5:
6060
; CHECK: # %bb.0:
61-
; CHECK-NEXT: vl %v0, 0(%r2), 3
62-
; CHECK-NEXT: vaq %v0, %v0, %v0
63-
; CHECK-NEXT: vlgvg %r2, %v0, 1
61+
; CHECK-NEXT: lg %r1, 8(%r2)
62+
; CHECK-NEXT: la %r2, 0(%r1,%r1)
6463
; CHECK-NEXT: br %r14
6564
%op = add i128 %a, %a
6665
%res = trunc i128 %op to i64
@@ -137,10 +136,8 @@ define i128 @f10(ptr %ptr) {
137136
define i32 @f11(i128 %a) {
138137
; CHECK-LABEL: f11:
139138
; CHECK: # %bb.0:
140-
; CHECK-NEXT: vl %v0, 0(%r2), 3
141-
; CHECK-NEXT: vaq %v0, %v0, %v0
142-
; CHECK-NEXT: vlgvf %r2, %v0, 3
143-
; CHECK-NEXT: # kill: def $r2l killed $r2l killed $r2d
139+
; CHECK-NEXT: l %r2, 12(%r2)
140+
; CHECK-NEXT: ar %r2, %r2
144141
; CHECK-NEXT: br %r14
145142
%op = add i128 %a, %a
146143
%res = trunc i128 %op to i32
@@ -218,10 +215,8 @@ define i128 @f16(ptr %ptr) {
218215
define i16 @f17(i128 %a) {
219216
; CHECK-LABEL: f17:
220217
; CHECK: # %bb.0:
221-
; CHECK-NEXT: vl %v0, 0(%r2), 3
222-
; CHECK-NEXT: vaq %v0, %v0, %v0
223-
; CHECK-NEXT: vlgvf %r2, %v0, 3
224-
; CHECK-NEXT: # kill: def $r2l killed $r2l killed $r2d
218+
; CHECK-NEXT: l %r2, 12(%r2)
219+
; CHECK-NEXT: ar %r2, %r2
225220
; CHECK-NEXT: br %r14
226221
%op = add i128 %a, %a
227222
%res = trunc i128 %op to i16
@@ -299,10 +294,8 @@ define i128 @f22(ptr %ptr) {
299294
define i8 @f23(i128 %a) {
300295
; CHECK-LABEL: f23:
301296
; CHECK: # %bb.0:
302-
; CHECK-NEXT: vl %v0, 0(%r2), 3
303-
; CHECK-NEXT: vaq %v0, %v0, %v0
304-
; CHECK-NEXT: vlgvf %r2, %v0, 3
305-
; CHECK-NEXT: # kill: def $r2l killed $r2l killed $r2d
297+
; CHECK-NEXT: l %r2, 12(%r2)
298+
; CHECK-NEXT: ar %r2, %r2
306299
; CHECK-NEXT: br %r14
307300
%op = add i128 %a, %a
308301
%res = trunc i128 %op to i8
@@ -388,10 +381,8 @@ define i128 @f28(ptr %ptr) {
388381
define i1 @f29(i128 %a) {
389382
; CHECK-LABEL: f29:
390383
; CHECK: # %bb.0:
391-
; CHECK-NEXT: vl %v0, 0(%r2), 3
392-
; CHECK-NEXT: vaq %v0, %v0, %v0
393-
; CHECK-NEXT: vlgvf %r2, %v0, 3
394-
; CHECK-NEXT: # kill: def $r2l killed $r2l killed $r2d
384+
; CHECK-NEXT: l %r2, 12(%r2)
385+
; CHECK-NEXT: ar %r2, %r2
395386
; CHECK-NEXT: br %r14
396387
%op = add i128 %a, %a
397388
%res = trunc i128 %op to i1

llvm/test/CodeGen/SystemZ/int-conv-15.ll

Lines changed: 10 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -58,9 +58,8 @@ define i128 @f4(ptr %ptr) {
5858
define i64 @f5(i128 %a) {
5959
; CHECK-LABEL: f5:
6060
; CHECK: # %bb.0:
61-
; CHECK-NEXT: vl %v0, 0(%r2), 3
62-
; CHECK-NEXT: vaq %v0, %v0, %v0
63-
; CHECK-NEXT: vlgvg %r2, %v0, 1
61+
; CHECK-NEXT: lg %r1, 8(%r2)
62+
; CHECK-NEXT: la %r2, 0(%r1,%r1)
6463
; CHECK-NEXT: br %r14
6564
%op = add i128 %a, %a
6665
%res = trunc i128 %op to i64
@@ -137,10 +136,8 @@ define i128 @f10(ptr %ptr) {
137136
define i32 @f11(i128 %a) {
138137
; CHECK-LABEL: f11:
139138
; CHECK: # %bb.0:
140-
; CHECK-NEXT: vl %v0, 0(%r2), 3
141-
; CHECK-NEXT: vaq %v0, %v0, %v0
142-
; CHECK-NEXT: vlgvf %r2, %v0, 3
143-
; CHECK-NEXT: # kill: def $r2l killed $r2l killed $r2d
139+
; CHECK-NEXT: l %r2, 12(%r2)
140+
; CHECK-NEXT: ar %r2, %r2
144141
; CHECK-NEXT: br %r14
145142
%op = add i128 %a, %a
146143
%res = trunc i128 %op to i32
@@ -218,10 +215,8 @@ define i128 @f16(ptr %ptr) {
218215
define i16 @f17(i128 %a) {
219216
; CHECK-LABEL: f17:
220217
; CHECK: # %bb.0:
221-
; CHECK-NEXT: vl %v0, 0(%r2), 3
222-
; CHECK-NEXT: vaq %v0, %v0, %v0
223-
; CHECK-NEXT: vlgvf %r2, %v0, 3
224-
; CHECK-NEXT: # kill: def $r2l killed $r2l killed $r2d
218+
; CHECK-NEXT: l %r2, 12(%r2)
219+
; CHECK-NEXT: ar %r2, %r2
225220
; CHECK-NEXT: br %r14
226221
%op = add i128 %a, %a
227222
%res = trunc i128 %op to i16
@@ -299,10 +294,8 @@ define i128 @f22(ptr %ptr) {
299294
define i8 @f23(i128 %a) {
300295
; CHECK-LABEL: f23:
301296
; CHECK: # %bb.0:
302-
; CHECK-NEXT: vl %v0, 0(%r2), 3
303-
; CHECK-NEXT: vaq %v0, %v0, %v0
304-
; CHECK-NEXT: vlgvf %r2, %v0, 3
305-
; CHECK-NEXT: # kill: def $r2l killed $r2l killed $r2d
297+
; CHECK-NEXT: l %r2, 12(%r2)
298+
; CHECK-NEXT: ar %r2, %r2
306299
; CHECK-NEXT: br %r14
307300
%op = add i128 %a, %a
308301
%res = trunc i128 %op to i8
@@ -386,10 +379,8 @@ define i128 @f28(ptr %ptr) {
386379
define i1 @f29(i128 %a) {
387380
; CHECK-LABEL: f29:
388381
; CHECK: # %bb.0:
389-
; CHECK-NEXT: vl %v0, 0(%r2), 3
390-
; CHECK-NEXT: vaq %v0, %v0, %v0
391-
; CHECK-NEXT: vlgvf %r2, %v0, 3
392-
; CHECK-NEXT: # kill: def $r2l killed $r2l killed $r2d
382+
; CHECK-NEXT: l %r2, 12(%r2)
383+
; CHECK-NEXT: ar %r2, %r2
393384
; CHECK-NEXT: br %r14
394385
%op = add i128 %a, %a
395386
%res = trunc i128 %op to i1

0 commit comments

Comments
 (0)