@@ -1017,23 +1017,11 @@ define <8 x i16> @shuffle_v8i16_0c1d2e3f(<8 x i16> %a, <8 x i16> %b) {
10171017; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
10181018; SSE-NEXT: retq
10191019;
1020- ; AVX1OR2-LABEL: shuffle_v8i16_0c1d2e3f:
1021- ; AVX1OR2: # %bb.0:
1022- ; AVX1OR2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
1023- ; AVX1OR2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1024- ; AVX1OR2-NEXT: retq
1025- ;
1026- ; AVX512VL-SLOW-LABEL: shuffle_v8i16_0c1d2e3f:
1027- ; AVX512VL-SLOW: # %bb.0:
1028- ; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
1029- ; AVX512VL-SLOW-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1030- ; AVX512VL-SLOW-NEXT: retq
1031- ;
1032- ; AVX512VL-FAST-LABEL: shuffle_v8i16_0c1d2e3f:
1033- ; AVX512VL-FAST: # %bb.0:
1034- ; AVX512VL-FAST-NEXT: vmovdqa {{.*#+}} xmm2 = [0,12,1,13,2,14,3,15]
1035- ; AVX512VL-FAST-NEXT: vpermt2w %xmm1, %xmm2, %xmm0
1036- ; AVX512VL-FAST-NEXT: retq
1020+ ; AVX-LABEL: shuffle_v8i16_0c1d2e3f:
1021+ ; AVX: # %bb.0:
1022+ ; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
1023+ ; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1024+ ; AVX-NEXT: retq
10371025 %shuffle = shufflevector <8 x i16 > %a , <8 x i16 > %b , <8 x i32 > <i32 0 , i32 12 , i32 1 , i32 13 , i32 2 , i32 14 , i32 3 , i32 15 >
10381026 ret <8 x i16 > %shuffle
10391027}
@@ -1059,23 +1047,11 @@ define <8 x i16> @shuffle_v8i16_48596a7b(<8 x i16> %a, <8 x i16> %b) {
10591047; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
10601048; SSE-NEXT: retq
10611049;
1062- ; AVX1OR2-LABEL: shuffle_v8i16_48596a7b:
1063- ; AVX1OR2: # %bb.0:
1064- ; AVX1OR2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
1065- ; AVX1OR2-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1066- ; AVX1OR2-NEXT: retq
1067- ;
1068- ; AVX512VL-SLOW-LABEL: shuffle_v8i16_48596a7b:
1069- ; AVX512VL-SLOW: # %bb.0:
1070- ; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
1071- ; AVX512VL-SLOW-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1072- ; AVX512VL-SLOW-NEXT: retq
1073- ;
1074- ; AVX512VL-FAST-LABEL: shuffle_v8i16_48596a7b:
1075- ; AVX512VL-FAST: # %bb.0:
1076- ; AVX512VL-FAST-NEXT: vmovdqa {{.*#+}} xmm2 = [4,8,5,9,6,10,7,11]
1077- ; AVX512VL-FAST-NEXT: vpermt2w %xmm1, %xmm2, %xmm0
1078- ; AVX512VL-FAST-NEXT: retq
1050+ ; AVX-LABEL: shuffle_v8i16_48596a7b:
1051+ ; AVX: # %bb.0:
1052+ ; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
1053+ ; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1054+ ; AVX-NEXT: retq
10791055 %shuffle = shufflevector <8 x i16 > %a , <8 x i16 > %b , <8 x i32 > <i32 4 , i32 8 , i32 5 , i32 9 , i32 6 , i32 10 , i32 7 , i32 11 >
10801056 ret <8 x i16 > %shuffle
10811057}
@@ -1424,23 +1400,11 @@ define <8 x i16> @shuffle_v8i16_012dXXXX(<8 x i16> %a, <8 x i16> %b) {
14241400; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
14251401; SSE41-NEXT: retq
14261402;
1427- ; AVX1OR2-LABEL: shuffle_v8i16_012dXXXX:
1428- ; AVX1OR2: # %bb.0:
1429- ; AVX1OR2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
1430- ; AVX1OR2-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
1431- ; AVX1OR2-NEXT: retq
1432- ;
1433- ; AVX512VL-SLOW-LABEL: shuffle_v8i16_012dXXXX:
1434- ; AVX512VL-SLOW: # %bb.0:
1435- ; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
1436- ; AVX512VL-SLOW-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
1437- ; AVX512VL-SLOW-NEXT: retq
1438- ;
1439- ; AVX512VL-FAST-LABEL: shuffle_v8i16_012dXXXX:
1440- ; AVX512VL-FAST: # %bb.0:
1441- ; AVX512VL-FAST-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,2,13,4,5,6,7]
1442- ; AVX512VL-FAST-NEXT: vpermt2w %xmm1, %xmm2, %xmm0
1443- ; AVX512VL-FAST-NEXT: retq
1403+ ; AVX-LABEL: shuffle_v8i16_012dXXXX:
1404+ ; AVX: # %bb.0:
1405+ ; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,2,3,3]
1406+ ; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2],xmm1[3],xmm0[4,5,6,7]
1407+ ; AVX-NEXT: retq
14441408 %shuffle = shufflevector <8 x i16 > %a , <8 x i16 > %b , <8 x i32 > <i32 0 , i32 1 , i32 2 , i32 13 , i32 undef , i32 undef , i32 undef , i32 undef >
14451409 ret <8 x i16 > %shuffle
14461410}
@@ -1475,24 +1439,11 @@ define <8 x i16> @shuffle_v8i16_XXXXcde3(<8 x i16> %a, <8 x i16> %b) {
14751439; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5,6],xmm0[7]
14761440; AVX1-NEXT: retq
14771441;
1478- ; AVX2-LABEL: shuffle_v8i16_XXXXcde3:
1479- ; AVX2: # %bb.0:
1480- ; AVX2-NEXT: vpbroadcastq %xmm0, %xmm0
1481- ; AVX2-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5,6],xmm0[7]
1482- ; AVX2-NEXT: retq
1483- ;
1484- ; AVX512VL-SLOW-LABEL: shuffle_v8i16_XXXXcde3:
1485- ; AVX512VL-SLOW: # %bb.0:
1486- ; AVX512VL-SLOW-NEXT: vpbroadcastq %xmm0, %xmm0
1487- ; AVX512VL-SLOW-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5,6],xmm0[7]
1488- ; AVX512VL-SLOW-NEXT: retq
1489- ;
1490- ; AVX512VL-FAST-LABEL: shuffle_v8i16_XXXXcde3:
1491- ; AVX512VL-FAST: # %bb.0:
1492- ; AVX512VL-FAST-NEXT: vmovdqa {{.*#+}} xmm2 = [0,1,2,3,4,5,6,11]
1493- ; AVX512VL-FAST-NEXT: vpermi2w %xmm0, %xmm1, %xmm2
1494- ; AVX512VL-FAST-NEXT: vmovdqa %xmm2, %xmm0
1495- ; AVX512VL-FAST-NEXT: retq
1442+ ; AVX2OR512VL-LABEL: shuffle_v8i16_XXXXcde3:
1443+ ; AVX2OR512VL: # %bb.0:
1444+ ; AVX2OR512VL-NEXT: vpbroadcastq %xmm0, %xmm0
1445+ ; AVX2OR512VL-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2,3,4,5,6],xmm0[7]
1446+ ; AVX2OR512VL-NEXT: retq
14961447;
14971448; XOPAVX1-LABEL: shuffle_v8i16_XXXXcde3:
14981449; XOPAVX1: # %bb.0:
@@ -1533,24 +1484,11 @@ define <8 x i16> @shuffle_v8i16_cde3XXXX(<8 x i16> %a, <8 x i16> %b) {
15331484; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3],xmm1[4,5,6,7]
15341485; SSE41-NEXT: retq
15351486;
1536- ; AVX1OR2-LABEL: shuffle_v8i16_cde3XXXX:
1537- ; AVX1OR2: # %bb.0:
1538- ; AVX1OR2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
1539- ; AVX1OR2-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3],xmm1[4,5,6,7]
1540- ; AVX1OR2-NEXT: retq
1541- ;
1542- ; AVX512VL-SLOW-LABEL: shuffle_v8i16_cde3XXXX:
1543- ; AVX512VL-SLOW: # %bb.0:
1544- ; AVX512VL-SLOW-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
1545- ; AVX512VL-SLOW-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3],xmm1[4,5,6,7]
1546- ; AVX512VL-SLOW-NEXT: retq
1547- ;
1548- ; AVX512VL-FAST-LABEL: shuffle_v8i16_cde3XXXX:
1549- ; AVX512VL-FAST: # %bb.0:
1550- ; AVX512VL-FAST-NEXT: vmovdqa {{.*#+}} xmm2 = [4,5,6,11,4,5,6,7]
1551- ; AVX512VL-FAST-NEXT: vpermi2w %xmm0, %xmm1, %xmm2
1552- ; AVX512VL-FAST-NEXT: vmovdqa %xmm2, %xmm0
1553- ; AVX512VL-FAST-NEXT: retq
1487+ ; AVX-LABEL: shuffle_v8i16_cde3XXXX:
1488+ ; AVX: # %bb.0:
1489+ ; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
1490+ ; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[3],xmm1[4,5,6,7]
1491+ ; AVX-NEXT: retq
15541492 %shuffle = shufflevector <8 x i16 > %a , <8 x i16 > %b , <8 x i32 > <i32 12 , i32 13 , i32 14 , i32 3 , i32 undef , i32 undef , i32 undef , i32 undef >
15551493 ret <8 x i16 > %shuffle
15561494}
0 commit comments