11; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2- ; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -mattr=+neon | FileCheck %s
2+ ; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu | FileCheck %s --check-prefixes=CHECK,CHECK-SD
3+ ; RUN: llc < %s -verify-machineinstrs -mtriple=arm64-none-linux-gnu -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
4+
5+ ; CHECK-GI: warning: Instruction selection used fallback path for udiv_v2i64
36
47define <16 x i8 > @div16xi8 (<16 x i8 > %x ) {
5- ; CHECK-LABEL: div16xi8:
6- ; CHECK: // %bb.0:
7- ; CHECK-NEXT: movi v1.16b, #41
8- ; CHECK-NEXT: smull2 v2.8h, v0.16b, v1.16b
9- ; CHECK-NEXT: smull v0.8h, v0.8b, v1.8b
10- ; CHECK-NEXT: uzp2 v0.16b, v0.16b, v2.16b
11- ; CHECK-NEXT: sshr v0.16b, v0.16b, #2
12- ; CHECK-NEXT: usra v0.16b, v0.16b, #7
13- ; CHECK-NEXT: ret
8+ ; CHECK-SD-LABEL: div16xi8:
9+ ; CHECK-SD: // %bb.0:
10+ ; CHECK-SD-NEXT: movi v1.16b, #41
11+ ; CHECK-SD-NEXT: smull2 v2.8h, v0.16b, v1.16b
12+ ; CHECK-SD-NEXT: smull v0.8h, v0.8b, v1.8b
13+ ; CHECK-SD-NEXT: uzp2 v0.16b, v0.16b, v2.16b
14+ ; CHECK-SD-NEXT: sshr v0.16b, v0.16b, #2
15+ ; CHECK-SD-NEXT: usra v0.16b, v0.16b, #7
16+ ; CHECK-SD-NEXT: ret
17+ ;
18+ ; CHECK-GI-LABEL: div16xi8:
19+ ; CHECK-GI: // %bb.0:
20+ ; CHECK-GI-NEXT: smov w9, v0.b[0]
21+ ; CHECK-GI-NEXT: mov w8, #25 // =0x19
22+ ; CHECK-GI-NEXT: smov w10, v0.b[1]
23+ ; CHECK-GI-NEXT: smov w11, v0.b[2]
24+ ; CHECK-GI-NEXT: smov w12, v0.b[3]
25+ ; CHECK-GI-NEXT: smov w13, v0.b[4]
26+ ; CHECK-GI-NEXT: smov w14, v0.b[5]
27+ ; CHECK-GI-NEXT: smov w15, v0.b[6]
28+ ; CHECK-GI-NEXT: smov w16, v0.b[7]
29+ ; CHECK-GI-NEXT: smov w17, v0.b[8]
30+ ; CHECK-GI-NEXT: smov w18, v0.b[9]
31+ ; CHECK-GI-NEXT: sdiv w9, w9, w8
32+ ; CHECK-GI-NEXT: sdiv w10, w10, w8
33+ ; CHECK-GI-NEXT: fmov s1, w9
34+ ; CHECK-GI-NEXT: sdiv w11, w11, w8
35+ ; CHECK-GI-NEXT: mov v1.b[1], w10
36+ ; CHECK-GI-NEXT: smov w10, v0.b[10]
37+ ; CHECK-GI-NEXT: sdiv w12, w12, w8
38+ ; CHECK-GI-NEXT: mov v1.b[2], w11
39+ ; CHECK-GI-NEXT: smov w11, v0.b[11]
40+ ; CHECK-GI-NEXT: sdiv w13, w13, w8
41+ ; CHECK-GI-NEXT: mov v1.b[3], w12
42+ ; CHECK-GI-NEXT: smov w12, v0.b[12]
43+ ; CHECK-GI-NEXT: sdiv w14, w14, w8
44+ ; CHECK-GI-NEXT: mov v1.b[4], w13
45+ ; CHECK-GI-NEXT: smov w13, v0.b[13]
46+ ; CHECK-GI-NEXT: sdiv w15, w15, w8
47+ ; CHECK-GI-NEXT: mov v1.b[5], w14
48+ ; CHECK-GI-NEXT: sdiv w16, w16, w8
49+ ; CHECK-GI-NEXT: mov v1.b[6], w15
50+ ; CHECK-GI-NEXT: sdiv w17, w17, w8
51+ ; CHECK-GI-NEXT: mov v1.b[7], w16
52+ ; CHECK-GI-NEXT: sdiv w9, w18, w8
53+ ; CHECK-GI-NEXT: mov v1.b[8], w17
54+ ; CHECK-GI-NEXT: sdiv w10, w10, w8
55+ ; CHECK-GI-NEXT: mov v1.b[9], w9
56+ ; CHECK-GI-NEXT: smov w9, v0.b[14]
57+ ; CHECK-GI-NEXT: sdiv w11, w11, w8
58+ ; CHECK-GI-NEXT: mov v1.b[10], w10
59+ ; CHECK-GI-NEXT: smov w10, v0.b[15]
60+ ; CHECK-GI-NEXT: sdiv w12, w12, w8
61+ ; CHECK-GI-NEXT: mov v1.b[11], w11
62+ ; CHECK-GI-NEXT: sdiv w13, w13, w8
63+ ; CHECK-GI-NEXT: mov v1.b[12], w12
64+ ; CHECK-GI-NEXT: sdiv w9, w9, w8
65+ ; CHECK-GI-NEXT: mov v1.b[13], w13
66+ ; CHECK-GI-NEXT: sdiv w8, w10, w8
67+ ; CHECK-GI-NEXT: mov v1.b[14], w9
68+ ; CHECK-GI-NEXT: mov v1.b[15], w8
69+ ; CHECK-GI-NEXT: mov v0.16b, v1.16b
70+ ; CHECK-GI-NEXT: ret
1471 %div = sdiv <16 x i8 > %x , <i8 25 , i8 25 , i8 25 , i8 25 , i8 25 , i8 25 , i8 25 , i8 25 , i8 25 , i8 25 , i8 25 , i8 25 , i8 25 , i8 25 , i8 25 , i8 25 >
1572 ret <16 x i8 > %div
1673}
1774
1875define <8 x i16 > @div8xi16 (<8 x i16 > %x ) {
19- ; CHECK-LABEL: div8xi16:
20- ; CHECK: // %bb.0:
21- ; CHECK-NEXT: mov w8, #40815 // =0x9f6f
22- ; CHECK-NEXT: dup v1.8h, w8
23- ; CHECK-NEXT: smull2 v2.4s, v0.8h, v1.8h
24- ; CHECK-NEXT: smull v1.4s, v0.4h, v1.4h
25- ; CHECK-NEXT: uzp2 v1.8h, v1.8h, v2.8h
26- ; CHECK-NEXT: add v0.8h, v1.8h, v0.8h
27- ; CHECK-NEXT: sshr v0.8h, v0.8h, #12
28- ; CHECK-NEXT: usra v0.8h, v0.8h, #15
29- ; CHECK-NEXT: ret
76+ ; CHECK-SD-LABEL: div8xi16:
77+ ; CHECK-SD: // %bb.0:
78+ ; CHECK-SD-NEXT: mov w8, #40815 // =0x9f6f
79+ ; CHECK-SD-NEXT: dup v1.8h, w8
80+ ; CHECK-SD-NEXT: smull2 v2.4s, v0.8h, v1.8h
81+ ; CHECK-SD-NEXT: smull v1.4s, v0.4h, v1.4h
82+ ; CHECK-SD-NEXT: uzp2 v1.8h, v1.8h, v2.8h
83+ ; CHECK-SD-NEXT: add v0.8h, v1.8h, v0.8h
84+ ; CHECK-SD-NEXT: sshr v0.8h, v0.8h, #12
85+ ; CHECK-SD-NEXT: usra v0.8h, v0.8h, #15
86+ ; CHECK-SD-NEXT: ret
87+ ;
88+ ; CHECK-GI-LABEL: div8xi16:
89+ ; CHECK-GI: // %bb.0:
90+ ; CHECK-GI-NEXT: smov w9, v0.h[0]
91+ ; CHECK-GI-NEXT: mov w8, #6577 // =0x19b1
92+ ; CHECK-GI-NEXT: smov w10, v0.h[1]
93+ ; CHECK-GI-NEXT: smov w11, v0.h[2]
94+ ; CHECK-GI-NEXT: smov w12, v0.h[3]
95+ ; CHECK-GI-NEXT: smov w13, v0.h[4]
96+ ; CHECK-GI-NEXT: smov w14, v0.h[5]
97+ ; CHECK-GI-NEXT: sdiv w9, w9, w8
98+ ; CHECK-GI-NEXT: sdiv w10, w10, w8
99+ ; CHECK-GI-NEXT: fmov s1, w9
100+ ; CHECK-GI-NEXT: sdiv w11, w11, w8
101+ ; CHECK-GI-NEXT: mov v1.h[1], w10
102+ ; CHECK-GI-NEXT: smov w10, v0.h[6]
103+ ; CHECK-GI-NEXT: sdiv w12, w12, w8
104+ ; CHECK-GI-NEXT: mov v1.h[2], w11
105+ ; CHECK-GI-NEXT: smov w11, v0.h[7]
106+ ; CHECK-GI-NEXT: sdiv w13, w13, w8
107+ ; CHECK-GI-NEXT: mov v1.h[3], w12
108+ ; CHECK-GI-NEXT: sdiv w9, w14, w8
109+ ; CHECK-GI-NEXT: mov v1.h[4], w13
110+ ; CHECK-GI-NEXT: sdiv w10, w10, w8
111+ ; CHECK-GI-NEXT: mov v1.h[5], w9
112+ ; CHECK-GI-NEXT: sdiv w8, w11, w8
113+ ; CHECK-GI-NEXT: mov v1.h[6], w10
114+ ; CHECK-GI-NEXT: mov v1.h[7], w8
115+ ; CHECK-GI-NEXT: mov v0.16b, v1.16b
116+ ; CHECK-GI-NEXT: ret
30117 %div = sdiv <8 x i16 > %x , <i16 6577 , i16 6577 , i16 6577 , i16 6577 , i16 6577 , i16 6577 , i16 6577 , i16 6577 >
31118 ret <8 x i16 > %div
32119}
33120
34121define <4 x i32 > @div32xi4 (<4 x i32 > %x ) {
35- ; CHECK-LABEL: div32xi4:
36- ; CHECK: // %bb.0:
37- ; CHECK-NEXT: mov w8, #7527 // =0x1d67
38- ; CHECK-NEXT: movk w8, #28805, lsl #16
39- ; CHECK-NEXT: dup v1.4s, w8
40- ; CHECK-NEXT: smull2 v2.2d, v0.4s, v1.4s
41- ; CHECK-NEXT: smull v0.2d, v0.2s, v1.2s
42- ; CHECK-NEXT: uzp2 v1.4s, v0.4s, v2.4s
43- ; CHECK-NEXT: sshr v0.4s, v1.4s, #22
44- ; CHECK-NEXT: usra v0.4s, v1.4s, #31
45- ; CHECK-NEXT: ret
122+ ; CHECK-SD-LABEL: div32xi4:
123+ ; CHECK-SD: // %bb.0:
124+ ; CHECK-SD-NEXT: mov w8, #7527 // =0x1d67
125+ ; CHECK-SD-NEXT: movk w8, #28805, lsl #16
126+ ; CHECK-SD-NEXT: dup v1.4s, w8
127+ ; CHECK-SD-NEXT: smull2 v2.2d, v0.4s, v1.4s
128+ ; CHECK-SD-NEXT: smull v0.2d, v0.2s, v1.2s
129+ ; CHECK-SD-NEXT: uzp2 v1.4s, v0.4s, v2.4s
130+ ; CHECK-SD-NEXT: sshr v0.4s, v1.4s, #22
131+ ; CHECK-SD-NEXT: usra v0.4s, v1.4s, #31
132+ ; CHECK-SD-NEXT: ret
133+ ;
134+ ; CHECK-GI-LABEL: div32xi4:
135+ ; CHECK-GI: // %bb.0:
136+ ; CHECK-GI-NEXT: fmov w9, s0
137+ ; CHECK-GI-NEXT: mov w8, #39957 // =0x9c15
138+ ; CHECK-GI-NEXT: mov w10, v0.s[1]
139+ ; CHECK-GI-NEXT: movk w8, #145, lsl #16
140+ ; CHECK-GI-NEXT: mov w11, v0.s[2]
141+ ; CHECK-GI-NEXT: mov w12, v0.s[3]
142+ ; CHECK-GI-NEXT: sdiv w9, w9, w8
143+ ; CHECK-GI-NEXT: sdiv w10, w10, w8
144+ ; CHECK-GI-NEXT: mov v0.s[0], w9
145+ ; CHECK-GI-NEXT: sdiv w11, w11, w8
146+ ; CHECK-GI-NEXT: mov v0.s[1], w10
147+ ; CHECK-GI-NEXT: sdiv w8, w12, w8
148+ ; CHECK-GI-NEXT: mov v0.s[2], w11
149+ ; CHECK-GI-NEXT: mov v0.s[3], w8
150+ ; CHECK-GI-NEXT: ret
46151 %div = sdiv <4 x i32 > %x , <i32 9542677 , i32 9542677 , i32 9542677 , i32 9542677 >
47152 ret <4 x i32 > %div
48153}
@@ -61,32 +166,78 @@ define <16 x i8> @udiv16xi8(<16 x i8> %x) {
61166}
62167
63168define <8 x i16 > @udiv8xi16 (<8 x i16 > %x ) {
64- ; CHECK-LABEL: udiv8xi16:
65- ; CHECK: // %bb.0:
66- ; CHECK-NEXT: mov w8, #16593 // =0x40d1
67- ; CHECK-NEXT: dup v1.8h, w8
68- ; CHECK-NEXT: umull2 v2.4s, v0.8h, v1.8h
69- ; CHECK-NEXT: umull v1.4s, v0.4h, v1.4h
70- ; CHECK-NEXT: uzp2 v1.8h, v1.8h, v2.8h
71- ; CHECK-NEXT: sub v0.8h, v0.8h, v1.8h
72- ; CHECK-NEXT: usra v1.8h, v0.8h, #1
73- ; CHECK-NEXT: ushr v0.8h, v1.8h, #12
74- ; CHECK-NEXT: ret
169+ ; CHECK-SD-LABEL: udiv8xi16:
170+ ; CHECK-SD: // %bb.0:
171+ ; CHECK-SD-NEXT: mov w8, #16593 // =0x40d1
172+ ; CHECK-SD-NEXT: dup v1.8h, w8
173+ ; CHECK-SD-NEXT: umull2 v2.4s, v0.8h, v1.8h
174+ ; CHECK-SD-NEXT: umull v1.4s, v0.4h, v1.4h
175+ ; CHECK-SD-NEXT: uzp2 v1.8h, v1.8h, v2.8h
176+ ; CHECK-SD-NEXT: sub v0.8h, v0.8h, v1.8h
177+ ; CHECK-SD-NEXT: usra v1.8h, v0.8h, #1
178+ ; CHECK-SD-NEXT: ushr v0.8h, v1.8h, #12
179+ ; CHECK-SD-NEXT: ret
180+ ;
181+ ; CHECK-GI-LABEL: udiv8xi16:
182+ ; CHECK-GI: // %bb.0:
183+ ; CHECK-GI-NEXT: adrp x8, .LCPI4_0
184+ ; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI4_0]
185+ ; CHECK-GI-NEXT: umull2 v2.4s, v0.8h, v1.8h
186+ ; CHECK-GI-NEXT: umull v1.4s, v0.4h, v1.4h
187+ ; CHECK-GI-NEXT: uzp2 v1.8h, v1.8h, v2.8h
188+ ; CHECK-GI-NEXT: sub v0.8h, v0.8h, v1.8h
189+ ; CHECK-GI-NEXT: usra v1.8h, v0.8h, #1
190+ ; CHECK-GI-NEXT: ushr v0.8h, v1.8h, #12
191+ ; CHECK-GI-NEXT: ret
75192 %div = udiv <8 x i16 > %x , <i16 6537 , i16 6537 , i16 6537 , i16 6537 , i16 6537 , i16 6537 , i16 6537 , i16 6537 >
76193 ret <8 x i16 > %div
77194}
78195
79196define <4 x i32 > @udiv32xi4 (<4 x i32 > %x ) {
80- ; CHECK-LABEL: udiv32xi4:
81- ; CHECK: // %bb.0:
82- ; CHECK-NEXT: mov w8, #16747 // =0x416b
83- ; CHECK-NEXT: movk w8, #31439, lsl #16
84- ; CHECK-NEXT: dup v1.4s, w8
85- ; CHECK-NEXT: umull2 v2.2d, v0.4s, v1.4s
86- ; CHECK-NEXT: umull v0.2d, v0.2s, v1.2s
87- ; CHECK-NEXT: uzp2 v0.4s, v0.4s, v2.4s
88- ; CHECK-NEXT: ushr v0.4s, v0.4s, #22
89- ; CHECK-NEXT: ret
197+ ; CHECK-SD-LABEL: udiv32xi4:
198+ ; CHECK-SD: // %bb.0:
199+ ; CHECK-SD-NEXT: mov w8, #16747 // =0x416b
200+ ; CHECK-SD-NEXT: movk w8, #31439, lsl #16
201+ ; CHECK-SD-NEXT: dup v1.4s, w8
202+ ; CHECK-SD-NEXT: umull2 v2.2d, v0.4s, v1.4s
203+ ; CHECK-SD-NEXT: umull v0.2d, v0.2s, v1.2s
204+ ; CHECK-SD-NEXT: uzp2 v0.4s, v0.4s, v2.4s
205+ ; CHECK-SD-NEXT: ushr v0.4s, v0.4s, #22
206+ ; CHECK-SD-NEXT: ret
207+ ;
208+ ; CHECK-GI-LABEL: udiv32xi4:
209+ ; CHECK-GI: // %bb.0:
210+ ; CHECK-GI-NEXT: adrp x8, .LCPI5_0
211+ ; CHECK-GI-NEXT: ldr q1, [x8, :lo12:.LCPI5_0]
212+ ; CHECK-GI-NEXT: umull2 v2.2d, v0.4s, v1.4s
213+ ; CHECK-GI-NEXT: umull v0.2d, v0.2s, v1.2s
214+ ; CHECK-GI-NEXT: uzp2 v0.4s, v0.4s, v2.4s
215+ ; CHECK-GI-NEXT: ushr v0.4s, v0.4s, #22
216+ ; CHECK-GI-NEXT: ret
90217 %div = udiv <4 x i32 > %x , <i32 8743143 , i32 8743143 , i32 8743143 , i32 8743143 >
91218 ret <4 x i32 > %div
92219}
220+
221+ define <2 x i64 > @udiv_v2i64 (<2 x i64 > %a ) {
222+ ; CHECK-LABEL: udiv_v2i64:
223+ ; CHECK: // %bb.0:
224+ ; CHECK-NEXT: mov x8, #9363 // =0x2493
225+ ; CHECK-NEXT: fmov x10, d0
226+ ; CHECK-NEXT: mov x9, v0.d[1]
227+ ; CHECK-NEXT: movk x8, #37449, lsl #16
228+ ; CHECK-NEXT: movk x8, #18724, lsl #32
229+ ; CHECK-NEXT: movk x8, #9362, lsl #48
230+ ; CHECK-NEXT: umulh x11, x10, x8
231+ ; CHECK-NEXT: umulh x8, x9, x8
232+ ; CHECK-NEXT: sub x10, x10, x11
233+ ; CHECK-NEXT: add x10, x11, x10, lsr #1
234+ ; CHECK-NEXT: sub x9, x9, x8
235+ ; CHECK-NEXT: add x8, x8, x9, lsr #1
236+ ; CHECK-NEXT: lsr x9, x10, #2
237+ ; CHECK-NEXT: fmov d0, x9
238+ ; CHECK-NEXT: lsr x8, x8, #2
239+ ; CHECK-NEXT: mov v0.d[1], x8
240+ ; CHECK-NEXT: ret
241+ %r = udiv <2 x i64 > %a , splat (i64 7 )
242+ ret <2 x i64 > %r
243+ }
0 commit comments