Skip to content

Commit 99d0ab3

Browse files
committed
[X86] Adding a test for vector shuffle extractions.
When both the vector inputs of the shuffle vector is comprising of same vector or shuffle mask is accessing elements from only one operand vector (like in PR33758 test already present). Committed on behalf of @jbhateja (Jatin Bhateja) Differential Revision: https://reviews.llvm.org/D36271 llvm-svn: 309963
1 parent 869fa74 commit 99d0ab3

File tree

1 file changed

+114
-0
lines changed

1 file changed

+114
-0
lines changed

llvm/test/CodeGen/X86/shuffle-vs-trunc-512.ll

Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -227,3 +227,117 @@ define void @trunc_v8i64_to_v8i8(<64 x i8>* %L, <8 x i8>* %S) nounwind {
227227
store <8 x i8> %strided.vec, <8 x i8>* %S
228228
ret void
229229
}
230+
231+
define <16 x i8> @trunc_shuffle_v64i8_01_05_09_13_17_21_25_29_33_37_41_45_49_53_57_62(<64 x i8> %x) {
232+
; AVX512F-LABEL: trunc_shuffle_v64i8_01_05_09_13_17_21_25_29_33_37_41_45_49_53_57_62:
233+
; AVX512F: # BB#0:
234+
; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm2
235+
; AVX512F-NEXT: vmovdqa {{.*#+}} xmm3 = <1,5,9,13,u,u,u,u,u,u,u,u,u,u,u,u>
236+
; AVX512F-NEXT: vpshufb %xmm3, %xmm2, %xmm2
237+
; AVX512F-NEXT: vpshufb %xmm3, %xmm0, %xmm0
238+
; AVX512F-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
239+
; AVX512F-NEXT: vextracti128 $1, %ymm1, %xmm2
240+
; AVX512F-NEXT: vpshufb {{.*#+}} xmm2 = xmm2[u,u,u,u,1,5,9,14,u,u,u,u,u,u,u,u]
241+
; AVX512F-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[u,u,u,u,1,5,9,13,u,u,u,u,u,u,u,u]
242+
; AVX512F-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
243+
; AVX512F-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
244+
; AVX512F-NEXT: vzeroupper
245+
; AVX512F-NEXT: retq
246+
;
247+
; AVX512VL-LABEL: trunc_shuffle_v64i8_01_05_09_13_17_21_25_29_33_37_41_45_49_53_57_62:
248+
; AVX512VL: # BB#0:
249+
; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm2
250+
; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = <1,5,9,13,u,u,u,u,u,u,u,u,u,u,u,u>
251+
; AVX512VL-NEXT: vpshufb %xmm3, %xmm2, %xmm2
252+
; AVX512VL-NEXT: vpshufb %xmm3, %xmm0, %xmm0
253+
; AVX512VL-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
254+
; AVX512VL-NEXT: vextracti128 $1, %ymm1, %xmm2
255+
; AVX512VL-NEXT: vpshufb {{.*#+}} xmm2 = xmm2[u,u,u,u,1,5,9,14,u,u,u,u,u,u,u,u]
256+
; AVX512VL-NEXT: vpshufb {{.*#+}} xmm1 = xmm1[u,u,u,u,1,5,9,13,u,u,u,u,u,u,u,u]
257+
; AVX512VL-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
258+
; AVX512VL-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
259+
; AVX512VL-NEXT: vzeroupper
260+
; AVX512VL-NEXT: retq
261+
;
262+
; AVX512BW-LABEL: trunc_shuffle_v64i8_01_05_09_13_17_21_25_29_33_37_41_45_49_53_57_62:
263+
; AVX512BW: # BB#0:
264+
; AVX512BW-NEXT: vpextrb $5, %xmm0, %eax
265+
; AVX512BW-NEXT: vpextrb $1, %xmm0, %ecx
266+
; AVX512BW-NEXT: vmovd %ecx, %xmm1
267+
; AVX512BW-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
268+
; AVX512BW-NEXT: vpextrb $9, %xmm0, %eax
269+
; AVX512BW-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
270+
; AVX512BW-NEXT: vpextrb $13, %xmm0, %eax
271+
; AVX512BW-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
272+
; AVX512BW-NEXT: vextracti32x4 $1, %zmm0, %xmm2
273+
; AVX512BW-NEXT: vpextrb $1, %xmm2, %eax
274+
; AVX512BW-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
275+
; AVX512BW-NEXT: vpextrb $5, %xmm2, %eax
276+
; AVX512BW-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
277+
; AVX512BW-NEXT: vpextrb $9, %xmm2, %eax
278+
; AVX512BW-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
279+
; AVX512BW-NEXT: vpextrb $13, %xmm2, %eax
280+
; AVX512BW-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
281+
; AVX512BW-NEXT: vextracti32x4 $2, %zmm0, %xmm2
282+
; AVX512BW-NEXT: vpextrb $1, %xmm2, %eax
283+
; AVX512BW-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
284+
; AVX512BW-NEXT: vpextrb $5, %xmm2, %eax
285+
; AVX512BW-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1
286+
; AVX512BW-NEXT: vpextrb $9, %xmm2, %eax
287+
; AVX512BW-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1
288+
; AVX512BW-NEXT: vpextrb $13, %xmm2, %eax
289+
; AVX512BW-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1
290+
; AVX512BW-NEXT: vextracti32x4 $3, %zmm0, %xmm0
291+
; AVX512BW-NEXT: vpextrb $1, %xmm0, %eax
292+
; AVX512BW-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
293+
; AVX512BW-NEXT: vpextrb $5, %xmm0, %eax
294+
; AVX512BW-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1
295+
; AVX512BW-NEXT: vpextrb $9, %xmm0, %eax
296+
; AVX512BW-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1
297+
; AVX512BW-NEXT: vpextrb $14, %xmm0, %eax
298+
; AVX512BW-NEXT: vpinsrb $15, %eax, %xmm1, %xmm0
299+
; AVX512BW-NEXT: vzeroupper
300+
; AVX512BW-NEXT: retq
301+
;
302+
; AVX512BWVL-LABEL: trunc_shuffle_v64i8_01_05_09_13_17_21_25_29_33_37_41_45_49_53_57_62:
303+
; AVX512BWVL: # BB#0:
304+
; AVX512BWVL-NEXT: vpextrb $5, %xmm0, %eax
305+
; AVX512BWVL-NEXT: vpextrb $1, %xmm0, %ecx
306+
; AVX512BWVL-NEXT: vmovd %ecx, %xmm1
307+
; AVX512BWVL-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1
308+
; AVX512BWVL-NEXT: vpextrb $9, %xmm0, %eax
309+
; AVX512BWVL-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1
310+
; AVX512BWVL-NEXT: vpextrb $13, %xmm0, %eax
311+
; AVX512BWVL-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1
312+
; AVX512BWVL-NEXT: vextracti32x4 $1, %zmm0, %xmm2
313+
; AVX512BWVL-NEXT: vpextrb $1, %xmm2, %eax
314+
; AVX512BWVL-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1
315+
; AVX512BWVL-NEXT: vpextrb $5, %xmm2, %eax
316+
; AVX512BWVL-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1
317+
; AVX512BWVL-NEXT: vpextrb $9, %xmm2, %eax
318+
; AVX512BWVL-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1
319+
; AVX512BWVL-NEXT: vpextrb $13, %xmm2, %eax
320+
; AVX512BWVL-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1
321+
; AVX512BWVL-NEXT: vextracti32x4 $2, %zmm0, %xmm2
322+
; AVX512BWVL-NEXT: vpextrb $1, %xmm2, %eax
323+
; AVX512BWVL-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1
324+
; AVX512BWVL-NEXT: vpextrb $5, %xmm2, %eax
325+
; AVX512BWVL-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1
326+
; AVX512BWVL-NEXT: vpextrb $9, %xmm2, %eax
327+
; AVX512BWVL-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1
328+
; AVX512BWVL-NEXT: vpextrb $13, %xmm2, %eax
329+
; AVX512BWVL-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1
330+
; AVX512BWVL-NEXT: vextracti32x4 $3, %zmm0, %xmm0
331+
; AVX512BWVL-NEXT: vpextrb $1, %xmm0, %eax
332+
; AVX512BWVL-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1
333+
; AVX512BWVL-NEXT: vpextrb $5, %xmm0, %eax
334+
; AVX512BWVL-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1
335+
; AVX512BWVL-NEXT: vpextrb $9, %xmm0, %eax
336+
; AVX512BWVL-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1
337+
; AVX512BWVL-NEXT: vpextrb $14, %xmm0, %eax
338+
; AVX512BWVL-NEXT: vpinsrb $15, %eax, %xmm1, %xmm0
339+
; AVX512BWVL-NEXT: vzeroupper
340+
; AVX512BWVL-NEXT: retq
341+
%res = shufflevector <64 x i8> %x, <64 x i8> %x, <16 x i32> <i32 1, i32 5, i32 9, i32 13, i32 17, i32 21, i32 25, i32 29, i32 33, i32 37, i32 41, i32 45, i32 49, i32 53, i32 57, i32 62>
342+
ret <16 x i8> %res
343+
}

0 commit comments

Comments
 (0)