@@ -203,89 +203,93 @@ define <12 x float> @abp90c12(<12 x float> %a, <12 x float> %b, <12 x float> %c)
203
203
; CHECK: // %bb.0: // %entry
204
204
; CHECK-NEXT: // kill: def $s1 killed $s1 def $q1
205
205
; CHECK-NEXT: // kill: def $s3 killed $s3 def $q3
206
- ; CHECK-NEXT: ldr s17, [sp, #40]
207
- ; CHECK-NEXT: add x10, sp, #56
208
206
; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0
207
+ ; CHECK-NEXT: // kill: def $s2 killed $s2 def $q2
208
+ ; CHECK-NEXT: ldr s17, [sp, #32]
209
+ ; CHECK-NEXT: // kill: def $s5 killed $s5 def $q5
209
210
; CHECK-NEXT: add x9, sp, #48
211
+ ; CHECK-NEXT: add x10, sp, #64
210
212
; CHECK-NEXT: mov v1.s[1], v3.s[0]
211
- ; CHECK-NEXT: ldr s3, [sp, #32]
212
- ; CHECK-NEXT: // kill: def $s2 killed $s2 def $q2
213
213
; CHECK-NEXT: mov v0.s[1], v2.s[0]
214
- ; CHECK-NEXT: ld1 { v17.s }[1], [x10]
215
- ; CHECK-NEXT: // kill: def $s5 killed $s5 def $q5
216
- ; CHECK-NEXT: ldr s16, [sp, #8]
217
214
; CHECK-NEXT: // kill: def $s4 killed $s4 def $q4
218
- ; CHECK-NEXT: add x10, sp, #24
219
- ; CHECK-NEXT: ld1 { v3.s }[1], [x9]
220
- ; CHECK-NEXT: add x9, sp, #72
221
- ; CHECK-NEXT: // kill: def $s7 killed $s7 def $q7
215
+ ; CHECK-NEXT: add x11, sp, #72
216
+ ; CHECK-NEXT: ld1 { v17.s }[1], [x9]
217
+ ; CHECK-NEXT: ldr s18, [x10]
218
+ ; CHECK-NEXT: add x9, sp, #80
219
+ ; CHECK-NEXT: add x10, sp, #56
222
220
; CHECK-NEXT: // kill: def $s6 killed $s6 def $q6
221
+ ; CHECK-NEXT: // kill: def $s7 killed $s7 def $q7
222
+ ; CHECK-NEXT: ldr s16, [sp, #8]
223
+ ; CHECK-NEXT: ldr s3, [sp, #96]
224
+ ; CHECK-NEXT: ld1 { v18.s }[1], [x9]
225
+ ; CHECK-NEXT: add x9, sp, #88
223
226
; CHECK-NEXT: ldr s2, [sp]
224
- ; CHECK-NEXT: ld1 { v16.s }[1], [x10]
225
- ; CHECK-NEXT: add x10, sp, #112
226
- ; CHECK-NEXT: ldr s20, [sp, #136]
227
227
; CHECK-NEXT: mov v1.s[2], v5.s[0]
228
- ; CHECK-NEXT: ld1 { v17.s }[2], [x9]
229
- ; CHECK-NEXT: add x9, sp, #64
230
- ; CHECK-NEXT: ldr s5, [sp, #96]
231
- ; CHECK-NEXT: ld1 { v3.s }[2], [x9]
228
+ ; CHECK-NEXT: ldr s5, [sp, #40]
232
229
; CHECK-NEXT: mov v0.s[2], v4.s[0]
233
- ; CHECK-NEXT: add x9, sp, #88
234
- ; CHECK-NEXT: ldr s4, [sp, #104]
235
- ; CHECK-NEXT: ldr s19, [sp, #192]
236
230
; CHECK-NEXT: ld1 { v5.s }[1], [x10]
237
- ; CHECK-NEXT: add x10, sp, #80
238
- ; CHECK-NEXT: ld1 { v17.s }[3], [x9]
239
- ; CHECK-NEXT: mov v1.s[3], v7.s[0]
240
- ; CHECK-NEXT: add x9, sp, #120
241
- ; CHECK-NEXT: ld1 { v3.s }[3], [x10]
242
- ; CHECK-NEXT: ld1 { v4.s }[1], [x9]
243
- ; CHECK-NEXT: ldr s7, [sp, #128]
231
+ ; CHECK-NEXT: ldr s19, [x11]
244
232
; CHECK-NEXT: add x10, sp, #144
233
+ ; CHECK-NEXT: zip1 v4.2d, v17.2d, v18.2d
234
+ ; CHECK-NEXT: add x11, sp, #160
235
+ ; CHECK-NEXT: ldr s18, [sp, #136]
236
+ ; CHECK-NEXT: ld1 { v19.s }[1], [x9]
245
237
; CHECK-NEXT: mov v0.s[3], v6.s[0]
246
- ; CHECK-NEXT: add x9, sp, #16
238
+ ; CHECK-NEXT: ldr s6, [sp, #128]
239
+ ; CHECK-NEXT: mov v1.s[3], v7.s[0]
240
+ ; CHECK-NEXT: add x9, sp, #24
241
+ ; CHECK-NEXT: ldr s7, [sp, #104]
242
+ ; CHECK-NEXT: ld1 { v16.s }[1], [x9]
243
+ ; CHECK-NEXT: add x9, sp, #112
244
+ ; CHECK-NEXT: ld1 { v6.s }[1], [x10]
245
+ ; CHECK-NEXT: zip1 v5.2d, v5.2d, v19.2d
246
+ ; CHECK-NEXT: add x10, sp, #120
247
+ ; CHECK-NEXT: ld1 { v3.s }[1], [x9]
247
248
; CHECK-NEXT: ld1 { v7.s }[1], [x10]
248
- ; CHECK-NEXT: ld1 { v2.s }[1], [x9]
249
- ; CHECK-NEXT: add x9, sp, #160
250
- ; CHECK-NEXT: fmul v6.4s, v17.4s, v1.4s
251
- ; CHECK-NEXT: fmul v18.4s, v4.4s, v16.4s
252
- ; CHECK-NEXT: fmul v16.4s, v5.4s, v16.4s
253
- ; CHECK-NEXT: fmul v1.4s, v3.4s, v1.4s
254
- ; CHECK-NEXT: add x10, sp, #208
255
- ; CHECK-NEXT: ld1 { v7.s }[2], [x9]
256
- ; CHECK-NEXT: add x9, sp, #152
257
- ; CHECK-NEXT: ld1 { v19.s }[1], [x10]
258
- ; CHECK-NEXT: ld1 { v20.s }[1], [x9]
249
+ ; CHECK-NEXT: ldr s17, [x11]
259
250
; CHECK-NEXT: add x9, sp, #176
260
- ; CHECK-NEXT: add x10, sp, #184
261
- ; CHECK-NEXT: fneg v6.4s, v6.4s
262
- ; CHECK-NEXT: fneg v18.4s, v18.4s
263
- ; CHECK-NEXT: fmla v16.4s, v2.4s, v4.4s
264
- ; CHECK-NEXT: fmla v1.4s, v0.4s, v17.4s
265
- ; CHECK-NEXT: ld1 { v7.s }[3], [x9]
266
- ; CHECK-NEXT: add x9, sp, #168
267
- ; CHECK-NEXT: ld1 { v20.s }[2], [x9]
268
- ; CHECK-NEXT: ldr s4, [sp, #200]
251
+ ; CHECK-NEXT: add x10, sp, #16
252
+ ; CHECK-NEXT: add x11, sp, #168
253
+ ; CHECK-NEXT: ld1 { v17.s }[1], [x9]
254
+ ; CHECK-NEXT: ld1 { v2.s }[1], [x10]
255
+ ; CHECK-NEXT: add x9, sp, #152
256
+ ; CHECK-NEXT: fmul v19.4s, v5.4s, v1.4s
257
+ ; CHECK-NEXT: fmul v20.4s, v7.4s, v16.4s
258
+ ; CHECK-NEXT: fmul v16.4s, v3.4s, v16.4s
259
+ ; CHECK-NEXT: fmul v1.4s, v4.4s, v1.4s
260
+ ; CHECK-NEXT: ld1 { v18.s }[1], [x9]
261
+ ; CHECK-NEXT: ldr s21, [x11]
262
+ ; CHECK-NEXT: zip1 v6.2d, v6.2d, v17.2d
263
+ ; CHECK-NEXT: ldr s17, [sp, #192]
264
+ ; CHECK-NEXT: add x9, sp, #184
265
+ ; CHECK-NEXT: add x10, sp, #208
266
+ ; CHECK-NEXT: ld1 { v21.s }[1], [x9]
269
267
; CHECK-NEXT: add x9, sp, #216
270
- ; CHECK-NEXT: fmla v6.4s, v0.4s, v3.4s
271
- ; CHECK-NEXT: fmla v18.4s, v2.4s, v5.4s
272
- ; CHECK-NEXT: ld1 { v4.s }[1], [x9]
273
- ; CHECK-NEXT: fsub v0.4s, v7.4s, v1.4s
274
- ; CHECK-NEXT: fsub v1.4s, v19.4s, v16.4s
275
- ; CHECK-NEXT: ld1 { v20.s }[3], [x10]
276
- ; CHECK-NEXT: fadd v2.4s, v4.4s, v18.4s
277
- ; CHECK-NEXT: fadd v3.4s, v20.4s, v6.4s
268
+ ; CHECK-NEXT: fneg v19.4s, v19.4s
269
+ ; CHECK-NEXT: fneg v20.4s, v20.4s
270
+ ; CHECK-NEXT: fmla v16.4s, v2.4s, v7.4s
271
+ ; CHECK-NEXT: fmla v1.4s, v0.4s, v5.4s
272
+ ; CHECK-NEXT: ld1 { v17.s }[1], [x10]
273
+ ; CHECK-NEXT: ldr s5, [sp, #200]
274
+ ; CHECK-NEXT: zip1 v7.2d, v18.2d, v21.2d
275
+ ; CHECK-NEXT: ld1 { v5.s }[1], [x9]
276
+ ; CHECK-NEXT: fmla v19.4s, v0.4s, v4.4s
277
+ ; CHECK-NEXT: fmla v20.4s, v2.4s, v3.4s
278
+ ; CHECK-NEXT: fsub v0.4s, v6.4s, v1.4s
279
+ ; CHECK-NEXT: fsub v1.4s, v17.4s, v16.4s
280
+ ; CHECK-NEXT: fadd v2.4s, v7.4s, v19.4s
281
+ ; CHECK-NEXT: fadd v3.4s, v5.4s, v20.4s
278
282
; CHECK-NEXT: ext v4.16b, v0.16b, v1.16b, #12
279
- ; CHECK-NEXT: ext v5.16b, v3 .16b, v2 .16b, #12
280
- ; CHECK-NEXT: trn2 v1.4s, v1.4s, v2 .4s
283
+ ; CHECK-NEXT: ext v5.16b, v2 .16b, v3 .16b, #12
284
+ ; CHECK-NEXT: trn2 v1.4s, v1.4s, v3 .4s
281
285
; CHECK-NEXT: ext v4.16b, v0.16b, v4.16b, #12
282
- ; CHECK-NEXT: ext v5.16b, v3 .16b, v5.16b, #8
286
+ ; CHECK-NEXT: ext v5.16b, v2 .16b, v5.16b, #8
283
287
; CHECK-NEXT: rev64 v4.4s, v4.4s
284
- ; CHECK-NEXT: trn2 v2 .4s, v4.4s, v5.4s
285
- ; CHECK-NEXT: zip2 v4.4s, v0.4s, v3 .4s
286
- ; CHECK-NEXT: zip1 v0.4s, v0.4s, v3 .4s
287
- ; CHECK-NEXT: ext v1.16b, v2 .16b, v1.16b, #8
288
- ; CHECK-NEXT: mov v4.d[1], v2 .d[0]
288
+ ; CHECK-NEXT: trn2 v3 .4s, v4.4s, v5.4s
289
+ ; CHECK-NEXT: zip2 v4.4s, v0.4s, v2 .4s
290
+ ; CHECK-NEXT: zip1 v0.4s, v0.4s, v2 .4s
291
+ ; CHECK-NEXT: ext v1.16b, v3 .16b, v1.16b, #8
292
+ ; CHECK-NEXT: mov v4.d[1], v3 .d[0]
289
293
; CHECK-NEXT: str q0, [x8]
290
294
; CHECK-NEXT: stp q4, q1, [x8, #16]
291
295
; CHECK-NEXT: ret
0 commit comments