@@ -5,32 +5,30 @@ define <16 x i8> @lower_trunc_16xi8(i16 %a, i16 %b, i16 %c, i16 %d, i16 %e, i16
55; CHECK-LABEL: lower_trunc_16xi8:
66; CHECK: // %bb.0:
77; CHECK-NEXT: fmov s0, w0
8- ; CHECK-NEXT: ldr h1, [sp]
8+ ; CHECK-NEXT: mov x8, sp
9+ ; CHECK-NEXT: mov v0.b[1], w1
10+ ; CHECK-NEXT: mov v0.b[2], w2
11+ ; CHECK-NEXT: mov v0.b[3], w3
12+ ; CHECK-NEXT: mov v0.b[4], w4
13+ ; CHECK-NEXT: mov v0.b[5], w5
14+ ; CHECK-NEXT: mov v0.b[6], w6
15+ ; CHECK-NEXT: mov v0.b[7], w7
16+ ; CHECK-NEXT: ld1 { v0.b }[8], [x8]
917; CHECK-NEXT: add x8, sp, #8
10- ; CHECK-NEXT: ld1 { v1.h }[1 ], [x8]
18+ ; CHECK-NEXT: ld1 { v0.b }[9 ], [x8]
1119; CHECK-NEXT: add x8, sp, #16
12- ; CHECK-NEXT: mov v0.h[1], w1
13- ; CHECK-NEXT: ld1 { v1.h }[2], [x8]
20+ ; CHECK-NEXT: ld1 { v0.b }[10], [x8]
1421; CHECK-NEXT: add x8, sp, #24
15- ; CHECK-NEXT: mov v0.h[2], w2
16- ; CHECK-NEXT: ld1 { v1.h }[3], [x8]
22+ ; CHECK-NEXT: ld1 { v0.b }[11], [x8]
1723; CHECK-NEXT: add x8, sp, #32
18- ; CHECK-NEXT: mov v0.h[3], w3
19- ; CHECK-NEXT: ld1 { v1.h }[4], [x8]
24+ ; CHECK-NEXT: ld1 { v0.b }[12], [x8]
2025; CHECK-NEXT: add x8, sp, #40
21- ; CHECK-NEXT: ld1 { v1.h }[5 ], [x8]
26+ ; CHECK-NEXT: ld1 { v0.b }[13 ], [x8]
2227; CHECK-NEXT: add x8, sp, #48
23- ; CHECK-NEXT: mov v0.h[4], w4
24- ; CHECK-NEXT: ld1 { v1.h }[6], [x8]
28+ ; CHECK-NEXT: ld1 { v0.b }[14], [x8]
2529; CHECK-NEXT: add x8, sp, #56
26- ; CHECK-NEXT: mov v0.h[5], w5
27- ; CHECK-NEXT: ld1 { v1.h }[7], [x8]
28- ; CHECK-NEXT: mov v0.h[6], w6
29- ; CHECK-NEXT: add v2.8h, v1.8h, v1.8h
30- ; CHECK-NEXT: mov v0.h[7], w7
31- ; CHECK-NEXT: add v3.8h, v0.8h, v0.8h
32- ; CHECK-NEXT: uzp1 v0.16b, v0.16b, v1.16b
33- ; CHECK-NEXT: uzp1 v1.16b, v3.16b, v2.16b
30+ ; CHECK-NEXT: ld1 { v0.b }[15], [x8]
31+ ; CHECK-NEXT: add v1.16b, v0.16b, v0.16b
3432; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b
3533; CHECK-NEXT: ret
3634 %a1 = insertelement <16 x i16 > poison, i16 %a , i16 0
@@ -59,18 +57,15 @@ define <16 x i8> @lower_trunc_16xi8(i16 %a, i16 %b, i16 %c, i16 %d, i16 %e, i16
5957define <8 x i16 > @lower_trunc_8xi16 (i32 %a , i32 %b , i32 %c , i32 %d , i32 %e , i32 %f , i32 %g , i32 %h ) {
6058; CHECK-LABEL: lower_trunc_8xi16:
6159; CHECK: // %bb.0:
62- ; CHECK-NEXT: fmov s0, w4
63- ; CHECK-NEXT: fmov s1, w0
64- ; CHECK-NEXT: mov v0.s[1], w5
65- ; CHECK-NEXT: mov v1.s[1], w1
66- ; CHECK-NEXT: mov v0.s[2], w6
67- ; CHECK-NEXT: mov v1.s[2], w2
68- ; CHECK-NEXT: mov v0.s[3], w7
69- ; CHECK-NEXT: mov v1.s[3], w3
70- ; CHECK-NEXT: add v2.4s, v0.4s, v0.4s
71- ; CHECK-NEXT: add v3.4s, v1.4s, v1.4s
72- ; CHECK-NEXT: uzp1 v0.8h, v1.8h, v0.8h
73- ; CHECK-NEXT: uzp1 v1.8h, v3.8h, v2.8h
60+ ; CHECK-NEXT: fmov s0, w0
61+ ; CHECK-NEXT: mov v0.h[1], w1
62+ ; CHECK-NEXT: mov v0.h[2], w2
63+ ; CHECK-NEXT: mov v0.h[3], w3
64+ ; CHECK-NEXT: mov v0.h[4], w4
65+ ; CHECK-NEXT: mov v0.h[5], w5
66+ ; CHECK-NEXT: mov v0.h[6], w6
67+ ; CHECK-NEXT: mov v0.h[7], w7
68+ ; CHECK-NEXT: add v1.8h, v0.8h, v0.8h
7469; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b
7570; CHECK-NEXT: ret
7671 %a1 = insertelement <8 x i32 > poison, i32 %a , i32 0
@@ -91,14 +86,11 @@ define <8 x i16> @lower_trunc_8xi16(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32
9186define <4 x i32 > @lower_trunc_4xi32 (i64 %a , i64 %b , i64 %c , i64 %d ) {
9287; CHECK-LABEL: lower_trunc_4xi32:
9388; CHECK: // %bb.0:
94- ; CHECK-NEXT: fmov d0, x2
95- ; CHECK-NEXT: fmov d1, x0
96- ; CHECK-NEXT: mov v0.d[1], x3
97- ; CHECK-NEXT: mov v1.d[1], x1
98- ; CHECK-NEXT: add v2.2d, v0.2d, v0.2d
99- ; CHECK-NEXT: add v3.2d, v1.2d, v1.2d
100- ; CHECK-NEXT: uzp1 v0.4s, v1.4s, v0.4s
101- ; CHECK-NEXT: uzp1 v1.4s, v3.4s, v2.4s
89+ ; CHECK-NEXT: fmov s0, w0
90+ ; CHECK-NEXT: mov v0.s[1], w1
91+ ; CHECK-NEXT: mov v0.s[2], w2
92+ ; CHECK-NEXT: mov v0.s[3], w3
93+ ; CHECK-NEXT: add v1.4s, v0.4s, v0.4s
10294; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b
10395; CHECK-NEXT: ret
10496 %a1 = insertelement <4 x i64 > poison, i64 %a , i64 0
@@ -115,24 +107,20 @@ define <4 x i32> @lower_trunc_4xi32(i64 %a, i64 %b, i64 %c, i64 %d) {
115107define <8 x i32 > @lower_trunc_8xi32 (i64 %a , i64 %b , i64 %c , i64 %d , i64 %e , i64 %f , i64 %g , i64 %h ) {
116108; CHECK-LABEL: lower_trunc_8xi32:
117109; CHECK: // %bb.0:
118- ; CHECK-NEXT: fmov d0, x2
119- ; CHECK-NEXT: fmov d1, x0
120- ; CHECK-NEXT: fmov d2, x6
121- ; CHECK-NEXT: fmov d3, x4
122- ; CHECK-NEXT: mov v0.d[1], x3
123- ; CHECK-NEXT: mov v1.d[1], x1
124- ; CHECK-NEXT: mov v2.d[1], x7
125- ; CHECK-NEXT: mov v3.d[1], x5
126- ; CHECK-NEXT: add v4.2d, v0.2d, v0.2d
127- ; CHECK-NEXT: add v5.2d, v1.2d, v1.2d
128- ; CHECK-NEXT: add v6.2d, v2.2d, v2.2d
129- ; CHECK-NEXT: add v7.2d, v3.2d, v3.2d
110+ ; CHECK-NEXT: fmov d0, x6
111+ ; CHECK-NEXT: fmov d1, x4
112+ ; CHECK-NEXT: fmov d2, x2
113+ ; CHECK-NEXT: fmov d3, x0
114+ ; CHECK-NEXT: mov v0.d[1], x7
115+ ; CHECK-NEXT: mov v1.d[1], x5
116+ ; CHECK-NEXT: mov v2.d[1], x3
117+ ; CHECK-NEXT: mov v3.d[1], x1
118+ ; CHECK-NEXT: uzp1 v1.4s, v1.4s, v0.4s
130119; CHECK-NEXT: uzp1 v2.4s, v3.4s, v2.4s
131- ; CHECK-NEXT: uzp1 v0.4s, v1.4s, v0.4s
132- ; CHECK-NEXT: uzp1 v3.4s, v5.4s, v4.4s
133- ; CHECK-NEXT: uzp1 v1.4s, v7.4s, v6.4s
134- ; CHECK-NEXT: eor v0.16b, v0.16b, v3.16b
135- ; CHECK-NEXT: eor v1.16b, v2.16b, v1.16b
120+ ; CHECK-NEXT: add v3.4s, v1.4s, v1.4s
121+ ; CHECK-NEXT: add v0.4s, v2.4s, v2.4s
122+ ; CHECK-NEXT: eor v1.16b, v1.16b, v3.16b
123+ ; CHECK-NEXT: eor v0.16b, v2.16b, v0.16b
136124; CHECK-NEXT: ret
137125 %a1 = insertelement <8 x i64 > poison, i64 %a , i64 0
138126 %b1 = insertelement <8 x i64 > %a1 , i64 %b , i64 1
0 commit comments