@@ -2028,6 +2028,57 @@ define <32 x i8> @shuffle_v32i8_32_01_34_03_36_05_38_07_40_09_42_11_44_13_46_15_
20282028 ret <32 x i8 > %shuffle
20292029}
20302030
2031+ ; PR159670
2032+ define <32 x i8 > @shuffle_v32i8_00_40_01_41_02_42_03_43_04_44_05_45_06_46_07_47_16_56_17_57_18_58_19_59_20_60_21_61_22_62_23_63 (<32 x i8 > %a , <32 x i8 > %b ) {
2033+ ; AVX1-LABEL: shuffle_v32i8_00_40_01_41_02_42_03_43_04_44_05_45_06_46_07_47_16_56_17_57_18_58_19_59_20_60_21_61_22_62_23_63:
2034+ ; AVX1: # %bb.0:
2035+ ; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
2036+ ; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
2037+ ; AVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,3,2,3]
2038+ ; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3],xmm2[4],xmm3[4],xmm2[5],xmm3[5],xmm2[6],xmm3[6],xmm2[7],xmm3[7]
2039+ ; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
2040+ ; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2041+ ; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
2042+ ; AVX1-NEXT: retq
2043+ ;
2044+ ; AVX2-LABEL: shuffle_v32i8_00_40_01_41_02_42_03_43_04_44_05_45_06_46_07_47_16_56_17_57_18_58_19_59_20_60_21_61_22_62_23_63:
2045+ ; AVX2: # %bb.0:
2046+ ; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
2047+ ; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8,1,9,2,10,3,11,4,12,5,13,6,14,7,15,16,24,17,25,18,26,19,27,20,28,21,29,22,30,23,31]
2048+ ; AVX2-NEXT: retq
2049+ ;
2050+ ; AVX512VLBW-LABEL: shuffle_v32i8_00_40_01_41_02_42_03_43_04_44_05_45_06_46_07_47_16_56_17_57_18_58_19_59_20_60_21_61_22_62_23_63:
2051+ ; AVX512VLBW: # %bb.0:
2052+ ; AVX512VLBW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
2053+ ; AVX512VLBW-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8,1,9,2,10,3,11,4,12,5,13,6,14,7,15,16,24,17,25,18,26,19,27,20,28,21,29,22,30,23,31]
2054+ ; AVX512VLBW-NEXT: retq
2055+ ;
2056+ ; AVX512VLVBMI-LABEL: shuffle_v32i8_00_40_01_41_02_42_03_43_04_44_05_45_06_46_07_47_16_56_17_57_18_58_19_59_20_60_21_61_22_62_23_63:
2057+ ; AVX512VLVBMI: # %bb.0:
2058+ ; AVX512VLVBMI-NEXT: vmovdqa {{.*#+}} ymm2 = [0,40,1,41,2,42,3,43,4,44,5,45,6,46,7,47,16,56,17,57,18,58,19,59,20,60,21,61,22,62,23,63]
2059+ ; AVX512VLVBMI-NEXT: vpermt2b %ymm1, %ymm2, %ymm0
2060+ ; AVX512VLVBMI-NEXT: retq
2061+ ;
2062+ ; XOPAVX1-LABEL: shuffle_v32i8_00_40_01_41_02_42_03_43_04_44_05_45_06_46_07_47_16_56_17_57_18_58_19_59_20_60_21_61_22_62_23_63:
2063+ ; XOPAVX1: # %bb.0:
2064+ ; XOPAVX1-NEXT: vextractf128 $1, %ymm0, %xmm2
2065+ ; XOPAVX1-NEXT: vextractf128 $1, %ymm1, %xmm3
2066+ ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm3 = xmm3[2,3,2,3]
2067+ ; XOPAVX1-NEXT: vpunpcklbw {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3],xmm2[4],xmm3[4],xmm2[5],xmm3[5],xmm2[6],xmm3[6],xmm2[7],xmm3[7]
2068+ ; XOPAVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
2069+ ; XOPAVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
2070+ ; XOPAVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
2071+ ; XOPAVX1-NEXT: retq
2072+ ;
2073+ ; XOPAVX2-LABEL: shuffle_v32i8_00_40_01_41_02_42_03_43_04_44_05_45_06_46_07_47_16_56_17_57_18_58_19_59_20_60_21_61_22_62_23_63:
2074+ ; XOPAVX2: # %bb.0:
2075+ ; XOPAVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
2076+ ; XOPAVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8,1,9,2,10,3,11,4,12,5,13,6,14,7,15,16,24,17,25,18,26,19,27,20,28,21,29,22,30,23,31]
2077+ ; XOPAVX2-NEXT: retq
2078+ %shuffle = shufflevector <32 x i8 > %a , <32 x i8 > %b , <32 x i32 > <i32 0 , i32 40 , i32 1 , i32 41 , i32 2 , i32 42 , i32 3 , i32 43 , i32 4 , i32 44 , i32 5 , i32 45 , i32 6 , i32 46 , i32 7 , i32 47 , i32 16 , i32 56 , i32 17 , i32 57 , i32 18 , i32 58 , i32 19 , i32 59 , i32 20 , i32 60 , i32 21 , i32 61 , i32 22 , i32 62 , i32 23 , i32 63 >
2079+ ret <32 x i8 > %shuffle
2080+ }
2081+
20312082; PR27780 - https://bugs.llvm.org/show_bug.cgi?id=27780
20322083
20332084define <32 x i8 > @load_fold_pblendvb (ptr %px , <32 x i8 > %y ) {
0 commit comments