Skip to content

Commit 1b18ce5

Browse files
committed
[X86] vector-interleaved-load-i16-stride-2.ll - regenerate with AVX512 common prefix
1 parent 57bbdbd commit 1b18ce5

File tree

1 file changed

+41
-286
lines changed

1 file changed

+41
-286
lines changed

llvm/test/CodeGen/X86/vector-interleaved-load-i16-stride-2.ll

Lines changed: 41 additions & 286 deletions
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,14 @@
44
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2 | FileCheck %s --check-prefixes=AVX2
55
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX2-FP
66
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX2-FCP
7-
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vl | FileCheck %s --check-prefixes=AVX512
8-
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vl,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512-FCP
9-
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vl,+avx512dq | FileCheck %s --check-prefixes=AVX512DQ
10-
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vl,+avx512dq,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512DQ-FCP
11-
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vl,+avx512bw | FileCheck %s --check-prefixes=AVX512BW
12-
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vl,+avx512bw,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512BW-FCP
13-
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vl,+avx512dq,+avx512bw | FileCheck %s --check-prefixes=AVX512DQ-BW
14-
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vl,+avx512dq,+avx512bw,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512DQ-BW-FCP
7+
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vl | FileCheck %s --check-prefixes=AVX512,AVX512-VL
8+
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vl,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512-FCP
9+
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vl,+avx512dq | FileCheck %s --check-prefixes=AVX512,AVX512DQ
10+
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vl,+avx512dq,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512DQ-FCP
11+
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vl,+avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512BW
12+
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vl,+avx512bw,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512BW-FCP
13+
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vl,+avx512dq,+avx512bw | FileCheck %s --check-prefixes=AVX512,AVX512DQ-BW
14+
; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512vl,+avx512dq,+avx512bw,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512DQ-BW-FCP
1515

1616
; These patterns are produced by LoopVectorizer for interleaved loads.
1717

@@ -69,69 +69,6 @@ define void @load_i16_stride2_vf2(ptr %in.vec, ptr %out.vec0, ptr %out.vec1) nou
6969
; AVX512-NEXT: vmovd %xmm1, (%rsi)
7070
; AVX512-NEXT: vmovd %xmm0, (%rdx)
7171
; AVX512-NEXT: retq
72-
;
73-
; AVX512-FCP-LABEL: load_i16_stride2_vf2:
74-
; AVX512-FCP: # %bb.0:
75-
; AVX512-FCP-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
76-
; AVX512-FCP-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,2,2,3,4,5,6,7]
77-
; AVX512-FCP-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,3,2,3,4,5,6,7]
78-
; AVX512-FCP-NEXT: vmovd %xmm1, (%rsi)
79-
; AVX512-FCP-NEXT: vmovd %xmm0, (%rdx)
80-
; AVX512-FCP-NEXT: retq
81-
;
82-
; AVX512DQ-LABEL: load_i16_stride2_vf2:
83-
; AVX512DQ: # %bb.0:
84-
; AVX512DQ-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
85-
; AVX512DQ-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,2,2,3,4,5,6,7]
86-
; AVX512DQ-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,3,2,3,4,5,6,7]
87-
; AVX512DQ-NEXT: vmovd %xmm1, (%rsi)
88-
; AVX512DQ-NEXT: vmovd %xmm0, (%rdx)
89-
; AVX512DQ-NEXT: retq
90-
;
91-
; AVX512DQ-FCP-LABEL: load_i16_stride2_vf2:
92-
; AVX512DQ-FCP: # %bb.0:
93-
; AVX512DQ-FCP-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
94-
; AVX512DQ-FCP-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,2,2,3,4,5,6,7]
95-
; AVX512DQ-FCP-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,3,2,3,4,5,6,7]
96-
; AVX512DQ-FCP-NEXT: vmovd %xmm1, (%rsi)
97-
; AVX512DQ-FCP-NEXT: vmovd %xmm0, (%rdx)
98-
; AVX512DQ-FCP-NEXT: retq
99-
;
100-
; AVX512BW-LABEL: load_i16_stride2_vf2:
101-
; AVX512BW: # %bb.0:
102-
; AVX512BW-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
103-
; AVX512BW-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,2,2,3,4,5,6,7]
104-
; AVX512BW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,3,2,3,4,5,6,7]
105-
; AVX512BW-NEXT: vmovd %xmm1, (%rsi)
106-
; AVX512BW-NEXT: vmovd %xmm0, (%rdx)
107-
; AVX512BW-NEXT: retq
108-
;
109-
; AVX512BW-FCP-LABEL: load_i16_stride2_vf2:
110-
; AVX512BW-FCP: # %bb.0:
111-
; AVX512BW-FCP-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
112-
; AVX512BW-FCP-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,2,2,3,4,5,6,7]
113-
; AVX512BW-FCP-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,3,2,3,4,5,6,7]
114-
; AVX512BW-FCP-NEXT: vmovd %xmm1, (%rsi)
115-
; AVX512BW-FCP-NEXT: vmovd %xmm0, (%rdx)
116-
; AVX512BW-FCP-NEXT: retq
117-
;
118-
; AVX512DQ-BW-LABEL: load_i16_stride2_vf2:
119-
; AVX512DQ-BW: # %bb.0:
120-
; AVX512DQ-BW-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
121-
; AVX512DQ-BW-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,2,2,3,4,5,6,7]
122-
; AVX512DQ-BW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,3,2,3,4,5,6,7]
123-
; AVX512DQ-BW-NEXT: vmovd %xmm1, (%rsi)
124-
; AVX512DQ-BW-NEXT: vmovd %xmm0, (%rdx)
125-
; AVX512DQ-BW-NEXT: retq
126-
;
127-
; AVX512DQ-BW-FCP-LABEL: load_i16_stride2_vf2:
128-
; AVX512DQ-BW-FCP: # %bb.0:
129-
; AVX512DQ-BW-FCP-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
130-
; AVX512DQ-BW-FCP-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,2,2,3,4,5,6,7]
131-
; AVX512DQ-BW-FCP-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[1,3,2,3,4,5,6,7]
132-
; AVX512DQ-BW-FCP-NEXT: vmovd %xmm1, (%rsi)
133-
; AVX512DQ-BW-FCP-NEXT: vmovd %xmm0, (%rdx)
134-
; AVX512DQ-BW-FCP-NEXT: retq
13572
%wide.vec = load <4 x i16>, ptr %in.vec, align 64
13673
%strided.vec0 = shufflevector <4 x i16> %wide.vec, <4 x i16> poison, <2 x i32> <i32 0, i32 2>
13774
%strided.vec1 = shufflevector <4 x i16> %wide.vec, <4 x i16> poison, <2 x i32> <i32 1, i32 3>
@@ -198,62 +135,6 @@ define void @load_i16_stride2_vf4(ptr %in.vec, ptr %out.vec0, ptr %out.vec1) nou
198135
; AVX512-NEXT: vpmovdw %xmm0, (%rsi)
199136
; AVX512-NEXT: vmovq %xmm1, (%rdx)
200137
; AVX512-NEXT: retq
201-
;
202-
; AVX512-FCP-LABEL: load_i16_stride2_vf4:
203-
; AVX512-FCP: # %bb.0:
204-
; AVX512-FCP-NEXT: vmovdqa (%rdi), %xmm0
205-
; AVX512-FCP-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[2,3,6,7,10,11,14,15,u,u,u,u,u,u,u,u]
206-
; AVX512-FCP-NEXT: vpmovdw %xmm0, (%rsi)
207-
; AVX512-FCP-NEXT: vmovq %xmm1, (%rdx)
208-
; AVX512-FCP-NEXT: retq
209-
;
210-
; AVX512DQ-LABEL: load_i16_stride2_vf4:
211-
; AVX512DQ: # %bb.0:
212-
; AVX512DQ-NEXT: vmovdqa (%rdi), %xmm0
213-
; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[2,3,6,7,10,11,14,15,u,u,u,u,u,u,u,u]
214-
; AVX512DQ-NEXT: vpmovdw %xmm0, (%rsi)
215-
; AVX512DQ-NEXT: vmovq %xmm1, (%rdx)
216-
; AVX512DQ-NEXT: retq
217-
;
218-
; AVX512DQ-FCP-LABEL: load_i16_stride2_vf4:
219-
; AVX512DQ-FCP: # %bb.0:
220-
; AVX512DQ-FCP-NEXT: vmovdqa (%rdi), %xmm0
221-
; AVX512DQ-FCP-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[2,3,6,7,10,11,14,15,u,u,u,u,u,u,u,u]
222-
; AVX512DQ-FCP-NEXT: vpmovdw %xmm0, (%rsi)
223-
; AVX512DQ-FCP-NEXT: vmovq %xmm1, (%rdx)
224-
; AVX512DQ-FCP-NEXT: retq
225-
;
226-
; AVX512BW-LABEL: load_i16_stride2_vf4:
227-
; AVX512BW: # %bb.0:
228-
; AVX512BW-NEXT: vmovdqa (%rdi), %xmm0
229-
; AVX512BW-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[2,3,6,7,10,11,14,15,u,u,u,u,u,u,u,u]
230-
; AVX512BW-NEXT: vpmovdw %xmm0, (%rsi)
231-
; AVX512BW-NEXT: vmovq %xmm1, (%rdx)
232-
; AVX512BW-NEXT: retq
233-
;
234-
; AVX512BW-FCP-LABEL: load_i16_stride2_vf4:
235-
; AVX512BW-FCP: # %bb.0:
236-
; AVX512BW-FCP-NEXT: vmovdqa (%rdi), %xmm0
237-
; AVX512BW-FCP-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[2,3,6,7,10,11,14,15,u,u,u,u,u,u,u,u]
238-
; AVX512BW-FCP-NEXT: vpmovdw %xmm0, (%rsi)
239-
; AVX512BW-FCP-NEXT: vmovq %xmm1, (%rdx)
240-
; AVX512BW-FCP-NEXT: retq
241-
;
242-
; AVX512DQ-BW-LABEL: load_i16_stride2_vf4:
243-
; AVX512DQ-BW: # %bb.0:
244-
; AVX512DQ-BW-NEXT: vmovdqa (%rdi), %xmm0
245-
; AVX512DQ-BW-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[2,3,6,7,10,11,14,15,u,u,u,u,u,u,u,u]
246-
; AVX512DQ-BW-NEXT: vpmovdw %xmm0, (%rsi)
247-
; AVX512DQ-BW-NEXT: vmovq %xmm1, (%rdx)
248-
; AVX512DQ-BW-NEXT: retq
249-
;
250-
; AVX512DQ-BW-FCP-LABEL: load_i16_stride2_vf4:
251-
; AVX512DQ-BW-FCP: # %bb.0:
252-
; AVX512DQ-BW-FCP-NEXT: vmovdqa (%rdi), %xmm0
253-
; AVX512DQ-BW-FCP-NEXT: vpshufb {{.*#+}} xmm1 = xmm0[2,3,6,7,10,11,14,15,u,u,u,u,u,u,u,u]
254-
; AVX512DQ-BW-FCP-NEXT: vpmovdw %xmm0, (%rsi)
255-
; AVX512DQ-BW-FCP-NEXT: vmovq %xmm1, (%rdx)
256-
; AVX512DQ-BW-FCP-NEXT: retq
257138
%wide.vec = load <8 x i16>, ptr %in.vec, align 64
258139
%strided.vec0 = shufflevector <8 x i16> %wide.vec, <8 x i16> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
259140
%strided.vec1 = shufflevector <8 x i16> %wide.vec, <8 x i16> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
@@ -349,69 +230,6 @@ define void @load_i16_stride2_vf8(ptr %in.vec, ptr %out.vec0, ptr %out.vec1) nou
349230
; AVX512-NEXT: vpmovdw %ymm1, (%rdx)
350231
; AVX512-NEXT: vzeroupper
351232
; AVX512-NEXT: retq
352-
;
353-
; AVX512-FCP-LABEL: load_i16_stride2_vf8:
354-
; AVX512-FCP: # %bb.0:
355-
; AVX512-FCP-NEXT: vmovdqa (%rdi), %ymm0
356-
; AVX512-FCP-NEXT: vpsrld $16, %ymm0, %ymm1
357-
; AVX512-FCP-NEXT: vpmovdw %ymm0, (%rsi)
358-
; AVX512-FCP-NEXT: vpmovdw %ymm1, (%rdx)
359-
; AVX512-FCP-NEXT: vzeroupper
360-
; AVX512-FCP-NEXT: retq
361-
;
362-
; AVX512DQ-LABEL: load_i16_stride2_vf8:
363-
; AVX512DQ: # %bb.0:
364-
; AVX512DQ-NEXT: vmovdqa (%rdi), %ymm0
365-
; AVX512DQ-NEXT: vpsrld $16, %ymm0, %ymm1
366-
; AVX512DQ-NEXT: vpmovdw %ymm0, (%rsi)
367-
; AVX512DQ-NEXT: vpmovdw %ymm1, (%rdx)
368-
; AVX512DQ-NEXT: vzeroupper
369-
; AVX512DQ-NEXT: retq
370-
;
371-
; AVX512DQ-FCP-LABEL: load_i16_stride2_vf8:
372-
; AVX512DQ-FCP: # %bb.0:
373-
; AVX512DQ-FCP-NEXT: vmovdqa (%rdi), %ymm0
374-
; AVX512DQ-FCP-NEXT: vpsrld $16, %ymm0, %ymm1
375-
; AVX512DQ-FCP-NEXT: vpmovdw %ymm0, (%rsi)
376-
; AVX512DQ-FCP-NEXT: vpmovdw %ymm1, (%rdx)
377-
; AVX512DQ-FCP-NEXT: vzeroupper
378-
; AVX512DQ-FCP-NEXT: retq
379-
;
380-
; AVX512BW-LABEL: load_i16_stride2_vf8:
381-
; AVX512BW: # %bb.0:
382-
; AVX512BW-NEXT: vmovdqa (%rdi), %ymm0
383-
; AVX512BW-NEXT: vpsrld $16, %ymm0, %ymm1
384-
; AVX512BW-NEXT: vpmovdw %ymm0, (%rsi)
385-
; AVX512BW-NEXT: vpmovdw %ymm1, (%rdx)
386-
; AVX512BW-NEXT: vzeroupper
387-
; AVX512BW-NEXT: retq
388-
;
389-
; AVX512BW-FCP-LABEL: load_i16_stride2_vf8:
390-
; AVX512BW-FCP: # %bb.0:
391-
; AVX512BW-FCP-NEXT: vmovdqa (%rdi), %ymm0
392-
; AVX512BW-FCP-NEXT: vpsrld $16, %ymm0, %ymm1
393-
; AVX512BW-FCP-NEXT: vpmovdw %ymm0, (%rsi)
394-
; AVX512BW-FCP-NEXT: vpmovdw %ymm1, (%rdx)
395-
; AVX512BW-FCP-NEXT: vzeroupper
396-
; AVX512BW-FCP-NEXT: retq
397-
;
398-
; AVX512DQ-BW-LABEL: load_i16_stride2_vf8:
399-
; AVX512DQ-BW: # %bb.0:
400-
; AVX512DQ-BW-NEXT: vmovdqa (%rdi), %ymm0
401-
; AVX512DQ-BW-NEXT: vpsrld $16, %ymm0, %ymm1
402-
; AVX512DQ-BW-NEXT: vpmovdw %ymm0, (%rsi)
403-
; AVX512DQ-BW-NEXT: vpmovdw %ymm1, (%rdx)
404-
; AVX512DQ-BW-NEXT: vzeroupper
405-
; AVX512DQ-BW-NEXT: retq
406-
;
407-
; AVX512DQ-BW-FCP-LABEL: load_i16_stride2_vf8:
408-
; AVX512DQ-BW-FCP: # %bb.0:
409-
; AVX512DQ-BW-FCP-NEXT: vmovdqa (%rdi), %ymm0
410-
; AVX512DQ-BW-FCP-NEXT: vpsrld $16, %ymm0, %ymm1
411-
; AVX512DQ-BW-FCP-NEXT: vpmovdw %ymm0, (%rsi)
412-
; AVX512DQ-BW-FCP-NEXT: vpmovdw %ymm1, (%rdx)
413-
; AVX512DQ-BW-FCP-NEXT: vzeroupper
414-
; AVX512DQ-BW-FCP-NEXT: retq
415233
%wide.vec = load <16 x i16>, ptr %in.vec, align 64
416234
%strided.vec0 = shufflevector <16 x i16> %wide.vec, <16 x i16> poison, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
417235
%strided.vec1 = shufflevector <16 x i16> %wide.vec, <16 x i16> poison, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
@@ -544,69 +362,6 @@ define void @load_i16_stride2_vf16(ptr %in.vec, ptr %out.vec0, ptr %out.vec1) no
544362
; AVX512-NEXT: vpmovdw %zmm1, (%rdx)
545363
; AVX512-NEXT: vzeroupper
546364
; AVX512-NEXT: retq
547-
;
548-
; AVX512-FCP-LABEL: load_i16_stride2_vf16:
549-
; AVX512-FCP: # %bb.0:
550-
; AVX512-FCP-NEXT: vmovdqa64 (%rdi), %zmm0
551-
; AVX512-FCP-NEXT: vpsrld $16, %zmm0, %zmm1
552-
; AVX512-FCP-NEXT: vpmovdw %zmm0, (%rsi)
553-
; AVX512-FCP-NEXT: vpmovdw %zmm1, (%rdx)
554-
; AVX512-FCP-NEXT: vzeroupper
555-
; AVX512-FCP-NEXT: retq
556-
;
557-
; AVX512DQ-LABEL: load_i16_stride2_vf16:
558-
; AVX512DQ: # %bb.0:
559-
; AVX512DQ-NEXT: vmovdqa64 (%rdi), %zmm0
560-
; AVX512DQ-NEXT: vpsrld $16, %zmm0, %zmm1
561-
; AVX512DQ-NEXT: vpmovdw %zmm0, (%rsi)
562-
; AVX512DQ-NEXT: vpmovdw %zmm1, (%rdx)
563-
; AVX512DQ-NEXT: vzeroupper
564-
; AVX512DQ-NEXT: retq
565-
;
566-
; AVX512DQ-FCP-LABEL: load_i16_stride2_vf16:
567-
; AVX512DQ-FCP: # %bb.0:
568-
; AVX512DQ-FCP-NEXT: vmovdqa64 (%rdi), %zmm0
569-
; AVX512DQ-FCP-NEXT: vpsrld $16, %zmm0, %zmm1
570-
; AVX512DQ-FCP-NEXT: vpmovdw %zmm0, (%rsi)
571-
; AVX512DQ-FCP-NEXT: vpmovdw %zmm1, (%rdx)
572-
; AVX512DQ-FCP-NEXT: vzeroupper
573-
; AVX512DQ-FCP-NEXT: retq
574-
;
575-
; AVX512BW-LABEL: load_i16_stride2_vf16:
576-
; AVX512BW: # %bb.0:
577-
; AVX512BW-NEXT: vmovdqa64 (%rdi), %zmm0
578-
; AVX512BW-NEXT: vpsrld $16, %zmm0, %zmm1
579-
; AVX512BW-NEXT: vpmovdw %zmm0, (%rsi)
580-
; AVX512BW-NEXT: vpmovdw %zmm1, (%rdx)
581-
; AVX512BW-NEXT: vzeroupper
582-
; AVX512BW-NEXT: retq
583-
;
584-
; AVX512BW-FCP-LABEL: load_i16_stride2_vf16:
585-
; AVX512BW-FCP: # %bb.0:
586-
; AVX512BW-FCP-NEXT: vmovdqa64 (%rdi), %zmm0
587-
; AVX512BW-FCP-NEXT: vpsrld $16, %zmm0, %zmm1
588-
; AVX512BW-FCP-NEXT: vpmovdw %zmm0, (%rsi)
589-
; AVX512BW-FCP-NEXT: vpmovdw %zmm1, (%rdx)
590-
; AVX512BW-FCP-NEXT: vzeroupper
591-
; AVX512BW-FCP-NEXT: retq
592-
;
593-
; AVX512DQ-BW-LABEL: load_i16_stride2_vf16:
594-
; AVX512DQ-BW: # %bb.0:
595-
; AVX512DQ-BW-NEXT: vmovdqa64 (%rdi), %zmm0
596-
; AVX512DQ-BW-NEXT: vpsrld $16, %zmm0, %zmm1
597-
; AVX512DQ-BW-NEXT: vpmovdw %zmm0, (%rsi)
598-
; AVX512DQ-BW-NEXT: vpmovdw %zmm1, (%rdx)
599-
; AVX512DQ-BW-NEXT: vzeroupper
600-
; AVX512DQ-BW-NEXT: retq
601-
;
602-
; AVX512DQ-BW-FCP-LABEL: load_i16_stride2_vf16:
603-
; AVX512DQ-BW-FCP: # %bb.0:
604-
; AVX512DQ-BW-FCP-NEXT: vmovdqa64 (%rdi), %zmm0
605-
; AVX512DQ-BW-FCP-NEXT: vpsrld $16, %zmm0, %zmm1
606-
; AVX512DQ-BW-FCP-NEXT: vpmovdw %zmm0, (%rsi)
607-
; AVX512DQ-BW-FCP-NEXT: vpmovdw %zmm1, (%rdx)
608-
; AVX512DQ-BW-FCP-NEXT: vzeroupper
609-
; AVX512DQ-BW-FCP-NEXT: retq
610365
%wide.vec = load <32 x i16>, ptr %in.vec, align 64
611366
%strided.vec0 = shufflevector <32 x i16> %wide.vec, <32 x i16> poison, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
612367
%strided.vec1 = shufflevector <32 x i16> %wide.vec, <32 x i16> poison, <16 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15, i32 17, i32 19, i32 21, i32 23, i32 25, i32 27, i32 29, i32 31>
@@ -817,18 +572,18 @@ define void @load_i16_stride2_vf32(ptr %in.vec, ptr %out.vec0, ptr %out.vec1) no
817572
; AVX2-FCP-NEXT: vzeroupper
818573
; AVX2-FCP-NEXT: retq
819574
;
820-
; AVX512-LABEL: load_i16_stride2_vf32:
821-
; AVX512: # %bb.0:
822-
; AVX512-NEXT: vmovdqa64 (%rdi), %zmm0
823-
; AVX512-NEXT: vmovdqa64 64(%rdi), %zmm1
824-
; AVX512-NEXT: vpsrld $16, %zmm0, %zmm2
825-
; AVX512-NEXT: vpsrld $16, %zmm1, %zmm3
826-
; AVX512-NEXT: vpmovdw %zmm1, 32(%rsi)
827-
; AVX512-NEXT: vpmovdw %zmm0, (%rsi)
828-
; AVX512-NEXT: vpmovdw %zmm3, 32(%rdx)
829-
; AVX512-NEXT: vpmovdw %zmm2, (%rdx)
830-
; AVX512-NEXT: vzeroupper
831-
; AVX512-NEXT: retq
575+
; AVX512-VL-LABEL: load_i16_stride2_vf32:
576+
; AVX512-VL: # %bb.0:
577+
; AVX512-VL-NEXT: vmovdqa64 (%rdi), %zmm0
578+
; AVX512-VL-NEXT: vmovdqa64 64(%rdi), %zmm1
579+
; AVX512-VL-NEXT: vpsrld $16, %zmm0, %zmm2
580+
; AVX512-VL-NEXT: vpsrld $16, %zmm1, %zmm3
581+
; AVX512-VL-NEXT: vpmovdw %zmm1, 32(%rsi)
582+
; AVX512-VL-NEXT: vpmovdw %zmm0, (%rsi)
583+
; AVX512-VL-NEXT: vpmovdw %zmm3, 32(%rdx)
584+
; AVX512-VL-NEXT: vpmovdw %zmm2, (%rdx)
585+
; AVX512-VL-NEXT: vzeroupper
586+
; AVX512-VL-NEXT: retq
832587
;
833588
; AVX512-FCP-LABEL: load_i16_stride2_vf32:
834589
; AVX512-FCP: # %bb.0:
@@ -1344,27 +1099,27 @@ define void @load_i16_stride2_vf64(ptr %in.vec, ptr %out.vec0, ptr %out.vec1) no
13441099
; AVX2-FCP-NEXT: vzeroupper
13451100
; AVX2-FCP-NEXT: retq
13461101
;
1347-
; AVX512-LABEL: load_i16_stride2_vf64:
1348-
; AVX512: # %bb.0:
1349-
; AVX512-NEXT: vmovdqa64 (%rdi), %zmm0
1350-
; AVX512-NEXT: vmovdqa64 64(%rdi), %zmm1
1351-
; AVX512-NEXT: vmovdqa64 128(%rdi), %zmm2
1352-
; AVX512-NEXT: vmovdqa64 192(%rdi), %zmm3
1353-
; AVX512-NEXT: vpmovdw %zmm1, %ymm4
1354-
; AVX512-NEXT: vpsrld $16, %zmm1, %zmm1
1355-
; AVX512-NEXT: vpsrld $16, %zmm0, %zmm5
1356-
; AVX512-NEXT: vpsrld $16, %zmm3, %zmm6
1357-
; AVX512-NEXT: vpsrld $16, %zmm2, %zmm7
1358-
; AVX512-NEXT: vpmovdw %zmm0, (%rsi)
1359-
; AVX512-NEXT: vmovdqa %ymm4, 32(%rsi)
1360-
; AVX512-NEXT: vpmovdw %zmm2, 64(%rsi)
1361-
; AVX512-NEXT: vpmovdw %zmm3, 96(%rsi)
1362-
; AVX512-NEXT: vpmovdw %zmm7, 64(%rdx)
1363-
; AVX512-NEXT: vpmovdw %zmm6, 96(%rdx)
1364-
; AVX512-NEXT: vpmovdw %zmm5, (%rdx)
1365-
; AVX512-NEXT: vpmovdw %zmm1, 32(%rdx)
1366-
; AVX512-NEXT: vzeroupper
1367-
; AVX512-NEXT: retq
1102+
; AVX512-VL-LABEL: load_i16_stride2_vf64:
1103+
; AVX512-VL: # %bb.0:
1104+
; AVX512-VL-NEXT: vmovdqa64 (%rdi), %zmm0
1105+
; AVX512-VL-NEXT: vmovdqa64 64(%rdi), %zmm1
1106+
; AVX512-VL-NEXT: vmovdqa64 128(%rdi), %zmm2
1107+
; AVX512-VL-NEXT: vmovdqa64 192(%rdi), %zmm3
1108+
; AVX512-VL-NEXT: vpmovdw %zmm1, %ymm4
1109+
; AVX512-VL-NEXT: vpsrld $16, %zmm1, %zmm1
1110+
; AVX512-VL-NEXT: vpsrld $16, %zmm0, %zmm5
1111+
; AVX512-VL-NEXT: vpsrld $16, %zmm3, %zmm6
1112+
; AVX512-VL-NEXT: vpsrld $16, %zmm2, %zmm7
1113+
; AVX512-VL-NEXT: vpmovdw %zmm0, (%rsi)
1114+
; AVX512-VL-NEXT: vmovdqa %ymm4, 32(%rsi)
1115+
; AVX512-VL-NEXT: vpmovdw %zmm2, 64(%rsi)
1116+
; AVX512-VL-NEXT: vpmovdw %zmm3, 96(%rsi)
1117+
; AVX512-VL-NEXT: vpmovdw %zmm7, 64(%rdx)
1118+
; AVX512-VL-NEXT: vpmovdw %zmm6, 96(%rdx)
1119+
; AVX512-VL-NEXT: vpmovdw %zmm5, (%rdx)
1120+
; AVX512-VL-NEXT: vpmovdw %zmm1, 32(%rdx)
1121+
; AVX512-VL-NEXT: vzeroupper
1122+
; AVX512-VL-NEXT: retq
13681123
;
13691124
; AVX512-FCP-LABEL: load_i16_stride2_vf64:
13701125
; AVX512-FCP: # %bb.0:

0 commit comments

Comments
 (0)