@@ -262,54 +262,37 @@ define <4 x float> @merge_4f32_f32_45zz(ptr %ptr) nounwind uwtable noinline ssp
262
262
define <4 x float > @merge_4f32_f32_012u (ptr %ptr ) nounwind uwtable noinline ssp {
263
263
; SSE2-LABEL: merge_4f32_f32_012u:
264
264
; SSE2: # %bb.0:
265
- ; SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
266
265
; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
267
- ; SSE2-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
268
- ; SSE2-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
269
- ; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
270
- ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
266
+ ; SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
267
+ ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
271
268
; SSE2-NEXT: retq
272
269
;
273
270
; SSE41-LABEL: merge_4f32_f32_012u:
274
271
; SSE41: # %bb.0:
275
- ; SSE41-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
276
- ; SSE41-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
277
- ; SSE41-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
278
- ; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
279
- ; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3]
280
- ; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2,0]
272
+ ; SSE41-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
273
+ ; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
281
274
; SSE41-NEXT: retq
282
275
;
283
276
; AVX-LABEL: merge_4f32_f32_012u:
284
277
; AVX: # %bb.0:
285
- ; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
286
- ; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
287
- ; AVX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
288
- ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
289
- ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3]
290
- ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2,0]
278
+ ; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
279
+ ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
291
280
; AVX-NEXT: retq
292
281
;
293
282
; X86-SSE1-LABEL: merge_4f32_f32_012u:
294
283
; X86-SSE1: # %bb.0:
295
284
; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
296
- ; X86-SSE1-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
285
+ ; X86-SSE1-NEXT: xorps %xmm0, %xmm0
286
+ ; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
297
287
; X86-SSE1-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
298
- ; X86-SSE1-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
299
- ; X86-SSE1-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
300
- ; X86-SSE1-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
301
- ; X86-SSE1-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
288
+ ; X86-SSE1-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
302
289
; X86-SSE1-NEXT: retl
303
290
;
304
291
; X86-SSE41-LABEL: merge_4f32_f32_012u:
305
292
; X86-SSE41: # %bb.0:
306
293
; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
307
- ; X86-SSE41-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
308
- ; X86-SSE41-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
309
- ; X86-SSE41-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
310
- ; X86-SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
311
- ; X86-SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3]
312
- ; X86-SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2,0]
294
+ ; X86-SSE41-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
295
+ ; X86-SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
313
296
; X86-SSE41-NEXT: retl
314
297
%ptr1 = getelementptr inbounds float , ptr %ptr , i64 1
315
298
%ptr2 = getelementptr inbounds float , ptr %ptr , i64 2
@@ -326,54 +309,37 @@ define <4 x float> @merge_4f32_f32_012u(ptr %ptr) nounwind uwtable noinline ssp
326
309
define <4 x float > @merge_4f32_f32_019u (ptr %ptr ) nounwind uwtable noinline ssp {
327
310
; SSE2-LABEL: merge_4f32_f32_019u:
328
311
; SSE2: # %bb.0:
329
- ; SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
330
312
; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
331
- ; SSE2-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
332
- ; SSE2-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
333
- ; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
334
- ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
313
+ ; SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
314
+ ; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
335
315
; SSE2-NEXT: retq
336
316
;
337
317
; SSE41-LABEL: merge_4f32_f32_019u:
338
318
; SSE41: # %bb.0:
339
- ; SSE41-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
340
- ; SSE41-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
341
- ; SSE41-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
342
- ; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
343
- ; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3]
344
- ; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2,0]
319
+ ; SSE41-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
320
+ ; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
345
321
; SSE41-NEXT: retq
346
322
;
347
323
; AVX-LABEL: merge_4f32_f32_019u:
348
324
; AVX: # %bb.0:
349
- ; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
350
- ; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
351
- ; AVX-NEXT: vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
352
- ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
353
- ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3]
354
- ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2,0]
325
+ ; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
326
+ ; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
355
327
; AVX-NEXT: retq
356
328
;
357
329
; X86-SSE1-LABEL: merge_4f32_f32_019u:
358
330
; X86-SSE1: # %bb.0:
359
331
; X86-SSE1-NEXT: movl {{[0-9]+}}(%esp), %eax
360
- ; X86-SSE1-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
332
+ ; X86-SSE1-NEXT: xorps %xmm0, %xmm0
333
+ ; X86-SSE1-NEXT: movlps {{.*#+}} xmm0 = mem[0,1],xmm0[2,3]
361
334
; X86-SSE1-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
362
- ; X86-SSE1-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
363
- ; X86-SSE1-NEXT: unpcklps {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
364
- ; X86-SSE1-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
365
- ; X86-SSE1-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm2[0]
335
+ ; X86-SSE1-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
366
336
; X86-SSE1-NEXT: retl
367
337
;
368
338
; X86-SSE41-LABEL: merge_4f32_f32_019u:
369
339
; X86-SSE41: # %bb.0:
370
340
; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax
371
- ; X86-SSE41-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
372
- ; X86-SSE41-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
373
- ; X86-SSE41-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
374
- ; X86-SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
375
- ; X86-SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3]
376
- ; X86-SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1,2,0]
341
+ ; X86-SSE41-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero
342
+ ; X86-SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
377
343
; X86-SSE41-NEXT: retl
378
344
%ptr1 = getelementptr inbounds float , ptr %ptr , i64 1
379
345
%ptr2 = getelementptr inbounds float , ptr %ptr , i64 9
0 commit comments