@@ -37,10 +37,6 @@ define void @select_v16f16(ptr %a, ptr %b) vscale_range(2,0) #0 {
37
37
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
38
38
; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1]
39
39
; CHECK-NEXT: fcmeq p1.h, p0/z, z0.h, z1.h
40
- ; CHECK-NEXT: mov z2.h, p1/z, #-1 // =0xffffffffffffffff
41
- ; CHECK-NEXT: ptrue p1.h
42
- ; CHECK-NEXT: and z2.h, z2.h, #0x1
43
- ; CHECK-NEXT: cmpne p1.h, p1/z, z2.h, #0
44
40
; CHECK-NEXT: sel z0.h, p1, z0.h, z1.h
45
41
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
46
42
; CHECK-NEXT: ret
@@ -63,15 +59,8 @@ define void @select_v32f16(ptr %a, ptr %b) #0 {
63
59
; VBITS_GE_256-NEXT: ld1h { z3.h }, p0/z, [x1]
64
60
; VBITS_GE_256-NEXT: fcmeq p1.h, p0/z, z0.h, z1.h
65
61
; VBITS_GE_256-NEXT: fcmeq p2.h, p0/z, z2.h, z3.h
66
- ; VBITS_GE_256-NEXT: mov z4.h, p1/z, #-1 // =0xffffffffffffffff
67
- ; VBITS_GE_256-NEXT: ptrue p1.h
68
- ; VBITS_GE_256-NEXT: mov z5.h, p2/z, #-1 // =0xffffffffffffffff
69
- ; VBITS_GE_256-NEXT: and z4.h, z4.h, #0x1
70
- ; VBITS_GE_256-NEXT: and z5.h, z5.h, #0x1
71
- ; VBITS_GE_256-NEXT: cmpne p2.h, p1/z, z4.h, #0
72
- ; VBITS_GE_256-NEXT: cmpne p1.h, p1/z, z5.h, #0
73
- ; VBITS_GE_256-NEXT: sel z0.h, p2, z0.h, z1.h
74
- ; VBITS_GE_256-NEXT: sel z1.h, p1, z2.h, z3.h
62
+ ; VBITS_GE_256-NEXT: sel z0.h, p1, z0.h, z1.h
63
+ ; VBITS_GE_256-NEXT: sel z1.h, p2, z2.h, z3.h
75
64
; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x0, x8, lsl #1]
76
65
; VBITS_GE_256-NEXT: st1h { z1.h }, p0, [x0]
77
66
; VBITS_GE_256-NEXT: ret
@@ -82,10 +71,6 @@ define void @select_v32f16(ptr %a, ptr %b) #0 {
82
71
; VBITS_GE_512-NEXT: ld1h { z0.h }, p0/z, [x0]
83
72
; VBITS_GE_512-NEXT: ld1h { z1.h }, p0/z, [x1]
84
73
; VBITS_GE_512-NEXT: fcmeq p1.h, p0/z, z0.h, z1.h
85
- ; VBITS_GE_512-NEXT: mov z2.h, p1/z, #-1 // =0xffffffffffffffff
86
- ; VBITS_GE_512-NEXT: ptrue p1.h
87
- ; VBITS_GE_512-NEXT: and z2.h, z2.h, #0x1
88
- ; VBITS_GE_512-NEXT: cmpne p1.h, p1/z, z2.h, #0
89
74
; VBITS_GE_512-NEXT: sel z0.h, p1, z0.h, z1.h
90
75
; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x0]
91
76
; VBITS_GE_512-NEXT: ret
@@ -104,10 +89,6 @@ define void @select_v64f16(ptr %a, ptr %b) vscale_range(8,0) #0 {
104
89
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
105
90
; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1]
106
91
; CHECK-NEXT: fcmeq p1.h, p0/z, z0.h, z1.h
107
- ; CHECK-NEXT: mov z2.h, p1/z, #-1 // =0xffffffffffffffff
108
- ; CHECK-NEXT: ptrue p1.h
109
- ; CHECK-NEXT: and z2.h, z2.h, #0x1
110
- ; CHECK-NEXT: cmpne p1.h, p1/z, z2.h, #0
111
92
; CHECK-NEXT: sel z0.h, p1, z0.h, z1.h
112
93
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
113
94
; CHECK-NEXT: ret
@@ -126,10 +107,6 @@ define void @select_v128f16(ptr %a, ptr %b) vscale_range(16,0) #0 {
126
107
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
127
108
; CHECK-NEXT: ld1h { z1.h }, p0/z, [x1]
128
109
; CHECK-NEXT: fcmeq p1.h, p0/z, z0.h, z1.h
129
- ; CHECK-NEXT: mov z2.h, p1/z, #-1 // =0xffffffffffffffff
130
- ; CHECK-NEXT: ptrue p1.h
131
- ; CHECK-NEXT: and z2.h, z2.h, #0x1
132
- ; CHECK-NEXT: cmpne p1.h, p1/z, z2.h, #0
133
110
; CHECK-NEXT: sel z0.h, p1, z0.h, z1.h
134
111
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
135
112
; CHECK-NEXT: ret
@@ -173,10 +150,6 @@ define void @select_v8f32(ptr %a, ptr %b) vscale_range(2,0) #0 {
173
150
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
174
151
; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1]
175
152
; CHECK-NEXT: fcmeq p1.s, p0/z, z0.s, z1.s
176
- ; CHECK-NEXT: mov z2.s, p1/z, #-1 // =0xffffffffffffffff
177
- ; CHECK-NEXT: ptrue p1.s
178
- ; CHECK-NEXT: and z2.s, z2.s, #0x1
179
- ; CHECK-NEXT: cmpne p1.s, p1/z, z2.s, #0
180
153
; CHECK-NEXT: sel z0.s, p1, z0.s, z1.s
181
154
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
182
155
; CHECK-NEXT: ret
@@ -199,15 +172,8 @@ define void @select_v16f32(ptr %a, ptr %b) #0 {
199
172
; VBITS_GE_256-NEXT: ld1w { z3.s }, p0/z, [x1]
200
173
; VBITS_GE_256-NEXT: fcmeq p1.s, p0/z, z0.s, z1.s
201
174
; VBITS_GE_256-NEXT: fcmeq p2.s, p0/z, z2.s, z3.s
202
- ; VBITS_GE_256-NEXT: mov z4.s, p1/z, #-1 // =0xffffffffffffffff
203
- ; VBITS_GE_256-NEXT: ptrue p1.s
204
- ; VBITS_GE_256-NEXT: mov z5.s, p2/z, #-1 // =0xffffffffffffffff
205
- ; VBITS_GE_256-NEXT: and z4.s, z4.s, #0x1
206
- ; VBITS_GE_256-NEXT: and z5.s, z5.s, #0x1
207
- ; VBITS_GE_256-NEXT: cmpne p2.s, p1/z, z4.s, #0
208
- ; VBITS_GE_256-NEXT: cmpne p1.s, p1/z, z5.s, #0
209
- ; VBITS_GE_256-NEXT: sel z0.s, p2, z0.s, z1.s
210
- ; VBITS_GE_256-NEXT: sel z1.s, p1, z2.s, z3.s
175
+ ; VBITS_GE_256-NEXT: sel z0.s, p1, z0.s, z1.s
176
+ ; VBITS_GE_256-NEXT: sel z1.s, p2, z2.s, z3.s
211
177
; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2]
212
178
; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x0]
213
179
; VBITS_GE_256-NEXT: ret
@@ -218,10 +184,6 @@ define void @select_v16f32(ptr %a, ptr %b) #0 {
218
184
; VBITS_GE_512-NEXT: ld1w { z0.s }, p0/z, [x0]
219
185
; VBITS_GE_512-NEXT: ld1w { z1.s }, p0/z, [x1]
220
186
; VBITS_GE_512-NEXT: fcmeq p1.s, p0/z, z0.s, z1.s
221
- ; VBITS_GE_512-NEXT: mov z2.s, p1/z, #-1 // =0xffffffffffffffff
222
- ; VBITS_GE_512-NEXT: ptrue p1.s
223
- ; VBITS_GE_512-NEXT: and z2.s, z2.s, #0x1
224
- ; VBITS_GE_512-NEXT: cmpne p1.s, p1/z, z2.s, #0
225
187
; VBITS_GE_512-NEXT: sel z0.s, p1, z0.s, z1.s
226
188
; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0]
227
189
; VBITS_GE_512-NEXT: ret
@@ -240,10 +202,6 @@ define void @select_v32f32(ptr %a, ptr %b) vscale_range(8,0) #0 {
240
202
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
241
203
; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1]
242
204
; CHECK-NEXT: fcmeq p1.s, p0/z, z0.s, z1.s
243
- ; CHECK-NEXT: mov z2.s, p1/z, #-1 // =0xffffffffffffffff
244
- ; CHECK-NEXT: ptrue p1.s
245
- ; CHECK-NEXT: and z2.s, z2.s, #0x1
246
- ; CHECK-NEXT: cmpne p1.s, p1/z, z2.s, #0
247
205
; CHECK-NEXT: sel z0.s, p1, z0.s, z1.s
248
206
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
249
207
; CHECK-NEXT: ret
@@ -262,10 +220,6 @@ define void @select_v64f32(ptr %a, ptr %b) vscale_range(16,0) #0 {
262
220
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
263
221
; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1]
264
222
; CHECK-NEXT: fcmeq p1.s, p0/z, z0.s, z1.s
265
- ; CHECK-NEXT: mov z2.s, p1/z, #-1 // =0xffffffffffffffff
266
- ; CHECK-NEXT: ptrue p1.s
267
- ; CHECK-NEXT: and z2.s, z2.s, #0x1
268
- ; CHECK-NEXT: cmpne p1.s, p1/z, z2.s, #0
269
223
; CHECK-NEXT: sel z0.s, p1, z0.s, z1.s
270
224
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
271
225
; CHECK-NEXT: ret
@@ -310,10 +264,6 @@ define void @select_v4f64(ptr %a, ptr %b) vscale_range(2,0) #0 {
310
264
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
311
265
; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1]
312
266
; CHECK-NEXT: fcmeq p1.d, p0/z, z0.d, z1.d
313
- ; CHECK-NEXT: mov z2.d, p1/z, #-1 // =0xffffffffffffffff
314
- ; CHECK-NEXT: ptrue p1.d
315
- ; CHECK-NEXT: and z2.d, z2.d, #0x1
316
- ; CHECK-NEXT: cmpne p1.d, p1/z, z2.d, #0
317
267
; CHECK-NEXT: sel z0.d, p1, z0.d, z1.d
318
268
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
319
269
; CHECK-NEXT: ret
@@ -336,15 +286,8 @@ define void @select_v8f64(ptr %a, ptr %b) #0 {
336
286
; VBITS_GE_256-NEXT: ld1d { z3.d }, p0/z, [x1]
337
287
; VBITS_GE_256-NEXT: fcmeq p1.d, p0/z, z0.d, z1.d
338
288
; VBITS_GE_256-NEXT: fcmeq p2.d, p0/z, z2.d, z3.d
339
- ; VBITS_GE_256-NEXT: mov z4.d, p1/z, #-1 // =0xffffffffffffffff
340
- ; VBITS_GE_256-NEXT: ptrue p1.d
341
- ; VBITS_GE_256-NEXT: mov z5.d, p2/z, #-1 // =0xffffffffffffffff
342
- ; VBITS_GE_256-NEXT: and z4.d, z4.d, #0x1
343
- ; VBITS_GE_256-NEXT: and z5.d, z5.d, #0x1
344
- ; VBITS_GE_256-NEXT: cmpne p2.d, p1/z, z4.d, #0
345
- ; VBITS_GE_256-NEXT: cmpne p1.d, p1/z, z5.d, #0
346
- ; VBITS_GE_256-NEXT: sel z0.d, p2, z0.d, z1.d
347
- ; VBITS_GE_256-NEXT: sel z1.d, p1, z2.d, z3.d
289
+ ; VBITS_GE_256-NEXT: sel z0.d, p1, z0.d, z1.d
290
+ ; VBITS_GE_256-NEXT: sel z1.d, p2, z2.d, z3.d
348
291
; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3]
349
292
; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0]
350
293
; VBITS_GE_256-NEXT: ret
@@ -355,10 +298,6 @@ define void @select_v8f64(ptr %a, ptr %b) #0 {
355
298
; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x0]
356
299
; VBITS_GE_512-NEXT: ld1d { z1.d }, p0/z, [x1]
357
300
; VBITS_GE_512-NEXT: fcmeq p1.d, p0/z, z0.d, z1.d
358
- ; VBITS_GE_512-NEXT: mov z2.d, p1/z, #-1 // =0xffffffffffffffff
359
- ; VBITS_GE_512-NEXT: ptrue p1.d
360
- ; VBITS_GE_512-NEXT: and z2.d, z2.d, #0x1
361
- ; VBITS_GE_512-NEXT: cmpne p1.d, p1/z, z2.d, #0
362
301
; VBITS_GE_512-NEXT: sel z0.d, p1, z0.d, z1.d
363
302
; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0]
364
303
; VBITS_GE_512-NEXT: ret
@@ -377,10 +316,6 @@ define void @select_v16f64(ptr %a, ptr %b) vscale_range(8,0) #0 {
377
316
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
378
317
; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1]
379
318
; CHECK-NEXT: fcmeq p1.d, p0/z, z0.d, z1.d
380
- ; CHECK-NEXT: mov z2.d, p1/z, #-1 // =0xffffffffffffffff
381
- ; CHECK-NEXT: ptrue p1.d
382
- ; CHECK-NEXT: and z2.d, z2.d, #0x1
383
- ; CHECK-NEXT: cmpne p1.d, p1/z, z2.d, #0
384
319
; CHECK-NEXT: sel z0.d, p1, z0.d, z1.d
385
320
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
386
321
; CHECK-NEXT: ret
@@ -399,10 +334,6 @@ define void @select_v32f64(ptr %a, ptr %b) vscale_range(16,0) #0 {
399
334
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
400
335
; CHECK-NEXT: ld1d { z1.d }, p0/z, [x1]
401
336
; CHECK-NEXT: fcmeq p1.d, p0/z, z0.d, z1.d
402
- ; CHECK-NEXT: mov z2.d, p1/z, #-1 // =0xffffffffffffffff
403
- ; CHECK-NEXT: ptrue p1.d
404
- ; CHECK-NEXT: and z2.d, z2.d, #0x1
405
- ; CHECK-NEXT: cmpne p1.d, p1/z, z2.d, #0
406
337
; CHECK-NEXT: sel z0.d, p1, z0.d, z1.d
407
338
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
408
339
; CHECK-NEXT: ret
0 commit comments