44
55; WITH VSCALE RANGE
66
7- define i64 @ctz_nxv8i1 (<vscale x 8 x i1 > %a ) #0 {
8- ; CHECK-LABEL: ctz_nxv8i1:
9- ; CHECK: // %bb.0:
10- ; CHECK-NEXT: index z0.h, #0, #-1
11- ; CHECK-NEXT: mov z1.h, p0/z, #-1 // =0xffffffffffffffff
12- ; CHECK-NEXT: ptrue p0.h
13- ; CHECK-NEXT: cnth x9
14- ; CHECK-NEXT: inch z0.h
15- ; CHECK-NEXT: and z0.d, z0.d, z1.d
16- ; CHECK-NEXT: and z0.h, z0.h, #0xff
17- ; CHECK-NEXT: umaxv h0, p0, z0.h
18- ; CHECK-NEXT: fmov w8, s0
19- ; CHECK-NEXT: sub w8, w9, w8
20- ; CHECK-NEXT: and x0, x8, #0xff
21- ; CHECK-NEXT: ret
22- %res = call i64 @llvm.experimental.cttz.elts.i64.nxv8i1 (<vscale x 8 x i1 > %a , i1 0 )
23- ret i64 %res
24- }
25-
267define i32 @ctz_nxv32i1 (<vscale x 32 x i1 > %a ) #0 {
278; CHECK-LABEL: ctz_nxv32i1:
289; CHECK: // %bb.0:
@@ -156,53 +137,178 @@ define i64 @vscale_4096_poison(<vscale x 16 x i8> %a) #1 {
156137 ret i64 %res
157138}
158139
159- ; NO VSCALE RANGE
140+ ; MATCH WITH BRKB + CNTP
160141
161- define i32 @ctz_nxv8i1_no_range (<vscale x 8 x i1 > %a ) {
162- ; CHECK-LABEL: ctz_nxv8i1_no_range :
142+ define i32 @ctz_nxv2i1 (<vscale x 2 x i1 > %a ) {
143+ ; CHECK-LABEL: ctz_nxv2i1 :
163144; CHECK: // %bb.0:
164- ; CHECK-NEXT: index z0.s, #0, #-1
165- ; CHECK-NEXT: cntw x8
166- ; CHECK-NEXT: punpklo p1.h, p0.b
167- ; CHECK-NEXT: neg x8, x8
168- ; CHECK-NEXT: punpkhi p0.h, p0.b
169- ; CHECK-NEXT: cnth x9
170- ; CHECK-NEXT: mov z1.s, w8
171- ; CHECK-NEXT: mov z2.s, p1/z, #-1 // =0xffffffffffffffff
172- ; CHECK-NEXT: mov z3.s, p0/z, #-1 // =0xffffffffffffffff
173- ; CHECK-NEXT: ptrue p0.s
174- ; CHECK-NEXT: incw z0.s, all, mul #2
175- ; CHECK-NEXT: add z1.s, z0.s, z1.s
176- ; CHECK-NEXT: and z0.d, z0.d, z2.d
177- ; CHECK-NEXT: and z1.d, z1.d, z3.d
178- ; CHECK-NEXT: umax z0.s, p0/m, z0.s, z1.s
179- ; CHECK-NEXT: umaxv s0, p0, z0.s
180- ; CHECK-NEXT: fmov w8, s0
181- ; CHECK-NEXT: sub w0, w9, w8
145+ ; CHECK-NEXT: ptrue p1.d
146+ ; CHECK-NEXT: brkb p0.b, p1/z, p0.b
147+ ; CHECK-NEXT: cntp x0, p0, p0.d
148+ ; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
149+ ; CHECK-NEXT: ret
150+ %res = call i32 @llvm.experimental.cttz.elts.i32.nxv2i1 (<vscale x 2 x i1 > %a , i1 0 )
151+ ret i32 %res
152+ }
153+
154+ define i32 @ctz_nxv2i1_poison (<vscale x 2 x i1 > %a ) {
155+ ; CHECK-LABEL: ctz_nxv2i1_poison:
156+ ; CHECK: // %bb.0:
157+ ; CHECK-NEXT: ptrue p1.d
158+ ; CHECK-NEXT: brkb p0.b, p1/z, p0.b
159+ ; CHECK-NEXT: cntp x0, p0, p0.d
160+ ; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
161+ ; CHECK-NEXT: ret
162+ %res = call i32 @llvm.experimental.cttz.elts.i32.nxv2i1 (<vscale x 2 x i1 > %a , i1 1 )
163+ ret i32 %res
164+ }
165+
166+ define i64 @add_i64_ctz_nxv2i1_poison (<vscale x 2 x i1 > %a , i64 %b ) {
167+ ; CHECK-LABEL: add_i64_ctz_nxv2i1_poison:
168+ ; CHECK: // %bb.0:
169+ ; CHECK-NEXT: ptrue p1.d
170+ ; CHECK-NEXT: brkb p0.b, p1/z, p0.b
171+ ; CHECK-NEXT: incp x0, p0.d
172+ ; CHECK-NEXT: ret
173+ %res = call i64 @llvm.experimental.cttz.elts.i64.nxv2i1 (<vscale x 2 x i1 > %a , i1 1 )
174+ %add = add i64 %res , %b
175+ ret i64 %add
176+ }
177+
178+ define i32 @add_i32_ctz_nxv2i1_poison (<vscale x 2 x i1 > %a , i32 %b ) {
179+ ; CHECK-LABEL: add_i32_ctz_nxv2i1_poison:
180+ ; CHECK: // %bb.0:
181+ ; CHECK-NEXT: ptrue p1.d
182+ ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
183+ ; CHECK-NEXT: brkb p0.b, p1/z, p0.b
184+ ; CHECK-NEXT: incp x0, p0.d
185+ ; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
186+ ; CHECK-NEXT: ret
187+ %res = call i64 @llvm.experimental.cttz.elts.i64.nxv2i1 (<vscale x 2 x i1 > %a , i1 1 )
188+ %trunc = trunc i64 %res to i32
189+ %add = add i32 %trunc , %b
190+ ret i32 %add
191+ }
192+
193+ define i32 @ctz_nxv4i1 (<vscale x 4 x i1 > %a ) {
194+ ; CHECK-LABEL: ctz_nxv4i1:
195+ ; CHECK: // %bb.0:
196+ ; CHECK-NEXT: ptrue p1.s
197+ ; CHECK-NEXT: brkb p0.b, p1/z, p0.b
198+ ; CHECK-NEXT: cntp x0, p0, p0.s
199+ ; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
200+ ; CHECK-NEXT: ret
201+ %res = call i32 @llvm.experimental.cttz.elts.i32.nxv4i1 (<vscale x 4 x i1 > %a , i1 0 )
202+ ret i32 %res
203+ }
204+
205+ define i32 @ctz_nxv4i1_poison (<vscale x 4 x i1 > %a ) {
206+ ; CHECK-LABEL: ctz_nxv4i1_poison:
207+ ; CHECK: // %bb.0:
208+ ; CHECK-NEXT: ptrue p1.s
209+ ; CHECK-NEXT: brkb p0.b, p1/z, p0.b
210+ ; CHECK-NEXT: cntp x0, p0, p0.s
211+ ; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
212+ ; CHECK-NEXT: ret
213+ %res = call i32 @llvm.experimental.cttz.elts.i32.nxv4i1 (<vscale x 4 x i1 > %a , i1 1 )
214+ ret i32 %res
215+ }
216+
217+ define i64 @add_i64_ctz_nxv4i1_poison (<vscale x 4 x i1 > %a , i64 %b ) {
218+ ; CHECK-LABEL: add_i64_ctz_nxv4i1_poison:
219+ ; CHECK: // %bb.0:
220+ ; CHECK-NEXT: ptrue p1.s
221+ ; CHECK-NEXT: brkb p0.b, p1/z, p0.b
222+ ; CHECK-NEXT: incp x0, p0.s
223+ ; CHECK-NEXT: ret
224+ %res = call i64 @llvm.experimental.cttz.elts.i64.nxv4i1 (<vscale x 4 x i1 > %a , i1 1 )
225+ %add = add i64 %res , %b
226+ ret i64 %add
227+ }
228+
229+ define i32 @add_i32_ctz_nxv4i1_poison (<vscale x 4 x i1 > %a , i32 %b ) {
230+ ; CHECK-LABEL: add_i32_ctz_nxv4i1_poison:
231+ ; CHECK: // %bb.0:
232+ ; CHECK-NEXT: ptrue p1.s
233+ ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
234+ ; CHECK-NEXT: brkb p0.b, p1/z, p0.b
235+ ; CHECK-NEXT: incp x0, p0.s
236+ ; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
237+ ; CHECK-NEXT: ret
238+ %res = call i64 @llvm.experimental.cttz.elts.i64.nxv4i1 (<vscale x 4 x i1 > %a , i1 1 )
239+ %trunc = trunc i64 %res to i32
240+ %add = add i32 %trunc , %b
241+ ret i32 %add
242+ }
243+
244+ define i32 @ctz_nxv8i1 (<vscale x 8 x i1 > %a ) {
245+ ; CHECK-LABEL: ctz_nxv8i1:
246+ ; CHECK: // %bb.0:
247+ ; CHECK-NEXT: ptrue p1.h
248+ ; CHECK-NEXT: brkb p0.b, p1/z, p0.b
249+ ; CHECK-NEXT: cntp x0, p0, p0.h
250+ ; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
182251; CHECK-NEXT: ret
183252 %res = call i32 @llvm.experimental.cttz.elts.i32.nxv8i1 (<vscale x 8 x i1 > %a , i1 0 )
184253 ret i32 %res
185254}
186255
187- ; MATCH WITH BRKB + CNTP
256+ define i32 @ctz_nxv8i1_poison (<vscale x 8 x i1 > %a ) {
257+ ; CHECK-LABEL: ctz_nxv8i1_poison:
258+ ; CHECK: // %bb.0:
259+ ; CHECK-NEXT: ptrue p1.h
260+ ; CHECK-NEXT: brkb p0.b, p1/z, p0.b
261+ ; CHECK-NEXT: cntp x0, p0, p0.h
262+ ; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
263+ ; CHECK-NEXT: ret
264+ %res = call i32 @llvm.experimental.cttz.elts.i32.nxv8i1 (<vscale x 8 x i1 > %a , i1 1 )
265+ ret i32 %res
266+ }
267+
268+ define i64 @add_i64_ctz_nxv8i1_poison (<vscale x 8 x i1 > %a , i64 %b ) {
269+ ; CHECK-LABEL: add_i64_ctz_nxv8i1_poison:
270+ ; CHECK: // %bb.0:
271+ ; CHECK-NEXT: ptrue p1.h
272+ ; CHECK-NEXT: brkb p0.b, p1/z, p0.b
273+ ; CHECK-NEXT: incp x0, p0.h
274+ ; CHECK-NEXT: ret
275+ %res = call i64 @llvm.experimental.cttz.elts.i64.nxv8i1 (<vscale x 8 x i1 > %a , i1 1 )
276+ %add = add i64 %res , %b
277+ ret i64 %add
278+ }
188279
189- define i32 @ctz_nxv16i1 (<vscale x 16 x i1 > %pg , <vscale x 16 x i1 > %a ) {
280+ define i32 @add_i32_ctz_nxv8i1_poison (<vscale x 8 x i1 > %a , i32 %b ) {
281+ ; CHECK-LABEL: add_i32_ctz_nxv8i1_poison:
282+ ; CHECK: // %bb.0:
283+ ; CHECK-NEXT: ptrue p1.h
284+ ; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
285+ ; CHECK-NEXT: brkb p0.b, p1/z, p0.b
286+ ; CHECK-NEXT: incp x0, p0.h
287+ ; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
288+ ; CHECK-NEXT: ret
289+ %res = call i64 @llvm.experimental.cttz.elts.i64.nxv8i1 (<vscale x 8 x i1 > %a , i1 1 )
290+ %trunc = trunc i64 %res to i32
291+ %add = add i32 %trunc , %b
292+ ret i32 %add
293+ }
294+
295+ define i32 @ctz_nxv16i1 (<vscale x 16 x i1 > %a ) {
190296; CHECK-LABEL: ctz_nxv16i1:
191297; CHECK: // %bb.0:
192- ; CHECK-NEXT: ptrue p0 .b
193- ; CHECK-NEXT: brkb p0.b, p0 /z, p1 .b
298+ ; CHECK-NEXT: ptrue p1 .b
299+ ; CHECK-NEXT: brkb p0.b, p1 /z, p0 .b
194300; CHECK-NEXT: cntp x0, p0, p0.b
195301; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
196302; CHECK-NEXT: ret
197303 %res = call i32 @llvm.experimental.cttz.elts.i32.nxv16i1 (<vscale x 16 x i1 > %a , i1 0 )
198304 ret i32 %res
199305}
200306
201- define i32 @ctz_nxv16i1_poison (<vscale x 16 x i1 > %pg , <vscale x 16 x i1 > % a ) {
307+ define i32 @ctz_nxv16i1_poison (<vscale x 16 x i1 > %a ) {
202308; CHECK-LABEL: ctz_nxv16i1_poison:
203309; CHECK: // %bb.0:
204- ; CHECK-NEXT: ptrue p0 .b
205- ; CHECK-NEXT: brkb p0.b, p0 /z, p1 .b
310+ ; CHECK-NEXT: ptrue p1 .b
311+ ; CHECK-NEXT: brkb p0.b, p1 /z, p0 .b
206312; CHECK-NEXT: cntp x0, p0, p0.b
207313; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
208314; CHECK-NEXT: ret
@@ -226,24 +332,24 @@ define i32 @ctz_and_nxv16i1(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vsca
226332 ret i32 %res
227333}
228334
229- define i64 @add_i64_ctz_nxv16i1_poison (<vscale x 16 x i1 > %pg , <vscale x 16 x i1 > % a , i64 %b ) {
335+ define i64 @add_i64_ctz_nxv16i1_poison (<vscale x 16 x i1 > %a , i64 %b ) {
230336; CHECK-LABEL: add_i64_ctz_nxv16i1_poison:
231337; CHECK: // %bb.0:
232- ; CHECK-NEXT: ptrue p0 .b
233- ; CHECK-NEXT: brkb p0.b, p0 /z, p1 .b
338+ ; CHECK-NEXT: ptrue p1 .b
339+ ; CHECK-NEXT: brkb p0.b, p1 /z, p0 .b
234340; CHECK-NEXT: incp x0, p0.b
235341; CHECK-NEXT: ret
236342 %res = call i64 @llvm.experimental.cttz.elts.i64.nxv16i1 (<vscale x 16 x i1 > %a , i1 1 )
237343 %add = add i64 %res , %b
238344 ret i64 %add
239345}
240346
241- define i32 @add_i32_ctz_nxv16i1_poison (<vscale x 16 x i1 > %pg , <vscale x 16 x i1 > % a , i32 %b ) {
347+ define i32 @add_i32_ctz_nxv16i1_poison (<vscale x 16 x i1 > %a , i32 %b ) {
242348; CHECK-LABEL: add_i32_ctz_nxv16i1_poison:
243349; CHECK: // %bb.0:
244- ; CHECK-NEXT: ptrue p0 .b
350+ ; CHECK-NEXT: ptrue p1 .b
245351; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0
246- ; CHECK-NEXT: brkb p0.b, p0 /z, p1 .b
352+ ; CHECK-NEXT: brkb p0.b, p1 /z, p0 .b
247353; CHECK-NEXT: incp x0, p0.b
248354; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
249355; CHECK-NEXT: ret
0 commit comments