@@ -227,3 +227,117 @@ define void @trunc_v8i64_to_v8i8(<64 x i8>* %L, <8 x i8>* %S) nounwind {
227227 store <8 x i8 > %strided.vec , <8 x i8 >* %S
228228 ret void
229229}
230+
231+ define <16 x i8 > @trunc_shuffle_v64i8_01_05_09_13_17_21_25_29_33_37_41_45_49_53_57_62 (<64 x i8 > %x ) {
232+ ; AVX512F-LABEL: trunc_shuffle_v64i8_01_05_09_13_17_21_25_29_33_37_41_45_49_53_57_62:
233+ ; AVX512F: # BB#0:
234+ ; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm2
235+ ; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = <1,5,9,13,u,u,u,u,u,u,u,u,u,u,u,u>
236+ ; AVX512F-NEXT: vpshufb %xmm3, %xmm2, %xmm2
237+ ; AVX512F-NEXT: vpshufb %xmm3, %xmm0, %xmm0
238+ ; AVX512F-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
239+ ; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2
240+ ; AVX512F-NEXT: vpshufb {{.*#+}} xmm2 = xmm2[u,u,u,u,1,5,9,14,u,u,u,u,u,u,u,u]
241+ ; AVX512F-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[u,u,u,u,1,5,9,13,u,u,u,u,u,u,u,u]
242+ ; AVX512F-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
243+ ; AVX512F-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
244+ ; AVX512F-NEXT: vzeroupper
245+ ; AVX512F-NEXT: retq
246+ ;
247+ ; AVX512VL-LABEL: trunc_shuffle_v64i8_01_05_09_13_17_21_25_29_33_37_41_45_49_53_57_62:
248+ ; AVX512VL: # BB#0:
249+ ; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm2
250+ ; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = <1,5,9,13,u,u,u,u,u,u,u,u,u,u,u,u>
251+ ; AVX512VL-NEXT: vpshufb %xmm3, %xmm2, %xmm2
252+ ; AVX512VL-NEXT: vpshufb %xmm3, %xmm0, %xmm0
253+ ; AVX512VL-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
254+ ; AVX512VL-NEXT: vextracti128 $1, %ymm1, %xmm2
255+ ; AVX512VL-NEXT: vpshufb {{.*#+}} xmm2 = xmm2[u,u,u,u,1,5,9,14,u,u,u,u,u,u,u,u]
256+ ; AVX512VL-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[u,u,u,u,1,5,9,13,u,u,u,u,u,u,u,u]
257+ ; AVX512VL-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
258+ ; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
259+ ; AVX512VL-NEXT: vzeroupper
260+ ; AVX512VL-NEXT: retq
261+ ;
262+ ; AVX512BW-LABEL: trunc_shuffle_v64i8_01_05_09_13_17_21_25_29_33_37_41_45_49_53_57_62:
263+ ; AVX512BW: # BB#0:
264+ ; AVX512BW-NEXT: vpextrb $5, %xmm0, %eax
265+ ; AVX512BW-NEXT: vpextrb $1, %xmm0, %ecx
266+ ; AVX512BW-NEXT: vmovd %ecx, %xmm1
267+ ; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
268+ ; AVX512BW-NEXT: vpextrb $9, %xmm0, %eax
269+ ; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
270+ ; AVX512BW-NEXT: vpextrb $13, %xmm0, %eax
271+ ; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
272+ ; AVX512BW-NEXT: vextracti32x4 $1, %zmm0, %xmm2
273+ ; AVX512BW-NEXT: vpextrb $1, %xmm2, %eax
274+ ; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
275+ ; AVX512BW-NEXT: vpextrb $5, %xmm2, %eax
276+ ; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
277+ ; AVX512BW-NEXT: vpextrb $9, %xmm2, %eax
278+ ; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
279+ ; AVX512BW-NEXT: vpextrb $13, %xmm2, %eax
280+ ; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
281+ ; AVX512BW-NEXT: vextracti32x4 $2, %zmm0, %xmm2
282+ ; AVX512BW-NEXT: vpextrb $1, %xmm2, %eax
283+ ; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
284+ ; AVX512BW-NEXT: vpextrb $5, %xmm2, %eax
285+ ; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1
286+ ; AVX512BW-NEXT: vpextrb $9, %xmm2, %eax
287+ ; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1
288+ ; AVX512BW-NEXT: vpextrb $13, %xmm2, %eax
289+ ; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1
290+ ; AVX512BW-NEXT: vextracti32x4 $3, %zmm0, %xmm0
291+ ; AVX512BW-NEXT: vpextrb $1, %xmm0, %eax
292+ ; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
293+ ; AVX512BW-NEXT: vpextrb $5, %xmm0, %eax
294+ ; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1
295+ ; AVX512BW-NEXT: vpextrb $9, %xmm0, %eax
296+ ; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1
297+ ; AVX512BW-NEXT: vpextrb $14, %xmm0, %eax
298+ ; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm1, %xmm0
299+ ; AVX512BW-NEXT: vzeroupper
300+ ; AVX512BW-NEXT: retq
301+ ;
302+ ; AVX512BWVL-LABEL: trunc_shuffle_v64i8_01_05_09_13_17_21_25_29_33_37_41_45_49_53_57_62:
303+ ; AVX512BWVL: # BB#0:
304+ ; AVX512BWVL-NEXT: vpextrb $5, %xmm0, %eax
305+ ; AVX512BWVL-NEXT: vpextrb $1, %xmm0, %ecx
306+ ; AVX512BWVL-NEXT: vmovd %ecx, %xmm1
307+ ; AVX512BWVL-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
308+ ; AVX512BWVL-NEXT: vpextrb $9, %xmm0, %eax
309+ ; AVX512BWVL-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
310+ ; AVX512BWVL-NEXT: vpextrb $13, %xmm0, %eax
311+ ; AVX512BWVL-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
312+ ; AVX512BWVL-NEXT: vextracti32x4 $1, %zmm0, %xmm2
313+ ; AVX512BWVL-NEXT: vpextrb $1, %xmm2, %eax
314+ ; AVX512BWVL-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
315+ ; AVX512BWVL-NEXT: vpextrb $5, %xmm2, %eax
316+ ; AVX512BWVL-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
317+ ; AVX512BWVL-NEXT: vpextrb $9, %xmm2, %eax
318+ ; AVX512BWVL-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
319+ ; AVX512BWVL-NEXT: vpextrb $13, %xmm2, %eax
320+ ; AVX512BWVL-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
321+ ; AVX512BWVL-NEXT: vextracti32x4 $2, %zmm0, %xmm2
322+ ; AVX512BWVL-NEXT: vpextrb $1, %xmm2, %eax
323+ ; AVX512BWVL-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
324+ ; AVX512BWVL-NEXT: vpextrb $5, %xmm2, %eax
325+ ; AVX512BWVL-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1
326+ ; AVX512BWVL-NEXT: vpextrb $9, %xmm2, %eax
327+ ; AVX512BWVL-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1
328+ ; AVX512BWVL-NEXT: vpextrb $13, %xmm2, %eax
329+ ; AVX512BWVL-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1
330+ ; AVX512BWVL-NEXT: vextracti32x4 $3, %zmm0, %xmm0
331+ ; AVX512BWVL-NEXT: vpextrb $1, %xmm0, %eax
332+ ; AVX512BWVL-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
333+ ; AVX512BWVL-NEXT: vpextrb $5, %xmm0, %eax
334+ ; AVX512BWVL-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1
335+ ; AVX512BWVL-NEXT: vpextrb $9, %xmm0, %eax
336+ ; AVX512BWVL-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1
337+ ; AVX512BWVL-NEXT: vpextrb $14, %xmm0, %eax
338+ ; AVX512BWVL-NEXT: vpinsrb $15, %eax, %xmm1, %xmm0
339+ ; AVX512BWVL-NEXT: vzeroupper
340+ ; AVX512BWVL-NEXT: retq
341+ %res = shufflevector <64 x i8 > %x , <64 x i8 > %x , <16 x i32 > <i32 1 , i32 5 , i32 9 , i32 13 , i32 17 , i32 21 , i32 25 , i32 29 , i32 33 , i32 37 , i32 41 , i32 45 , i32 49 , i32 53 , i32 57 , i32 62 >
342+ ret <16 x i8 > %res
343+ }
0 commit comments