@@ -159,8 +159,8 @@ define <8 x half> @fcopysign_v8f16(ptr %p0, ptr %p1) nounwind {
159159; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax
160160; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %ecx
161161; X86-AVX512-NEXT: vmovdqa (%ecx), %xmm1
162- ; X86-AVX512-NEXT: vpbroadcastw {{.*#+}} xmm0 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN ]
163- ; X86-AVX512-NEXT: vpternlogq $202, (%eax), %xmm1, %xmm0
162+ ; X86-AVX512-NEXT: vpbroadcastd {{.*#+}} xmm0 = [2147450879,2147450879,2147450879,2147450879 ]
163+ ; X86-AVX512-NEXT: vpternlogd $202, (%eax), %xmm1, %xmm0
164164; X86-AVX512-NEXT: retl
165165;
166166; X64-SSE-LABEL: fcopysign_v8f16:
@@ -193,8 +193,8 @@ define <8 x half> @fcopysign_v8f16(ptr %p0, ptr %p1) nounwind {
193193; X64-AVX512-LABEL: fcopysign_v8f16:
194194; X64-AVX512: # %bb.0:
195195; X64-AVX512-NEXT: vmovdqa (%rdi), %xmm1
196- ; X64-AVX512-NEXT: vpbroadcastq {{.*#+}} xmm0 = [9223231297218904063,9223231297218904063 ]
197- ; X64-AVX512-NEXT: vpternlogq $202, (%rsi), %xmm1, %xmm0
196+ ; X64-AVX512-NEXT: vpbroadcastd {{.*#+}} xmm0 = [2147450879,2147450879,2147450879,2147450879 ]
197+ ; X64-AVX512-NEXT: vpternlogd $202, (%rsi), %xmm1, %xmm0
198198; X64-AVX512-NEXT: retq
199199 %a0 = load <8 x half >, ptr %p0 , align 16
200200 %a1 = load <8 x half >, ptr %p1 , align 16
@@ -405,8 +405,8 @@ define <16 x half> @fcopysign_v16f16(ptr %p0, ptr %p1) nounwind {
405405; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax
406406; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %ecx
407407; X86-AVX512-NEXT: vmovdqu (%ecx), %ymm1
408- ; X86-AVX512-NEXT: vpbroadcastw {{.*#+}} ymm0 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN ]
409- ; X86-AVX512-NEXT: vpternlogq $202, (%eax), %ymm1, %ymm0
408+ ; X86-AVX512-NEXT: vpbroadcastd {{.*#+}} ymm0 = [2147450879,2147450879,2147450879,2147450879,2147450879,2147450879,2147450879,2147450879 ]
409+ ; X86-AVX512-NEXT: vpternlogd $202, (%eax), %ymm1, %ymm0
410410; X86-AVX512-NEXT: retl
411411;
412412; X64-SSE-LABEL: fcopysign_v16f16:
@@ -444,8 +444,8 @@ define <16 x half> @fcopysign_v16f16(ptr %p0, ptr %p1) nounwind {
444444; X64-AVX512-LABEL: fcopysign_v16f16:
445445; X64-AVX512: # %bb.0:
446446; X64-AVX512-NEXT: vmovdqu (%rdi), %ymm1
447- ; X64-AVX512-NEXT: vpbroadcastq {{.*#+}} ymm0 = [9223231297218904063,9223231297218904063,9223231297218904063,9223231297218904063 ]
448- ; X64-AVX512-NEXT: vpternlogq $202, (%rsi), %ymm1, %ymm0
447+ ; X64-AVX512-NEXT: vpbroadcastd {{.*#+}} ymm0 = [2147450879,2147450879,2147450879,2147450879,2147450879,2147450879,2147450879,2147450879 ]
448+ ; X64-AVX512-NEXT: vpternlogd $202, (%rsi), %ymm1, %ymm0
449449; X64-AVX512-NEXT: retq
450450 %a0 = load <16 x half >, ptr %p0 , align 16
451451 %a1 = load <16 x half >, ptr %p1 , align 16
@@ -691,34 +691,14 @@ define <32 x half> @fcopysign_v32f16(ptr %p0, ptr %p1) nounwind {
691691; X86-AVX2-NEXT: vpor %ymm2, %ymm1, %ymm1
692692; X86-AVX2-NEXT: retl
693693;
694- ; X86-AVX512VL-LABEL: fcopysign_v32f16:
695- ; X86-AVX512VL: # %bb.0:
696- ; X86-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %eax
697- ; X86-AVX512VL-NEXT: movl {{[0-9]+}}(%esp), %ecx
698- ; X86-AVX512VL-NEXT: vmovdqu64 (%ecx), %zmm1
699- ; X86-AVX512VL-NEXT: vpbroadcastw {{.*#+}} ymm0 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
700- ; X86-AVX512VL-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
701- ; X86-AVX512VL-NEXT: vpternlogq $202, (%eax), %zmm1, %zmm0
702- ; X86-AVX512VL-NEXT: retl
703- ;
704- ; X86-AVX512FP16-LABEL: fcopysign_v32f16:
705- ; X86-AVX512FP16: # %bb.0:
706- ; X86-AVX512FP16-NEXT: movl {{[0-9]+}}(%esp), %eax
707- ; X86-AVX512FP16-NEXT: movl {{[0-9]+}}(%esp), %ecx
708- ; X86-AVX512FP16-NEXT: vmovdqu64 (%ecx), %zmm1
709- ; X86-AVX512FP16-NEXT: vpbroadcastw {{.*#+}} zmm0 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
710- ; X86-AVX512FP16-NEXT: vpternlogq $202, (%eax), %zmm1, %zmm0
711- ; X86-AVX512FP16-NEXT: retl
712- ;
713- ; X86-AVX512VLDQ-LABEL: fcopysign_v32f16:
714- ; X86-AVX512VLDQ: # %bb.0:
715- ; X86-AVX512VLDQ-NEXT: movl {{[0-9]+}}(%esp), %eax
716- ; X86-AVX512VLDQ-NEXT: movl {{[0-9]+}}(%esp), %ecx
717- ; X86-AVX512VLDQ-NEXT: vmovdqu64 (%ecx), %zmm1
718- ; X86-AVX512VLDQ-NEXT: vpbroadcastw {{.*#+}} ymm0 = [NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN,NaN]
719- ; X86-AVX512VLDQ-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
720- ; X86-AVX512VLDQ-NEXT: vpternlogq $202, (%eax), %zmm1, %zmm0
721- ; X86-AVX512VLDQ-NEXT: retl
694+ ; X86-AVX512-LABEL: fcopysign_v32f16:
695+ ; X86-AVX512: # %bb.0:
696+ ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax
697+ ; X86-AVX512-NEXT: movl {{[0-9]+}}(%esp), %ecx
698+ ; X86-AVX512-NEXT: vmovdqu64 (%ecx), %zmm1
699+ ; X86-AVX512-NEXT: vpbroadcastd {{.*#+}} zmm0 = [2147450879,2147450879,2147450879,2147450879,2147450879,2147450879,2147450879,2147450879,2147450879,2147450879,2147450879,2147450879,2147450879,2147450879,2147450879,2147450879]
700+ ; X86-AVX512-NEXT: vpternlogd $202, (%eax), %zmm1, %zmm0
701+ ; X86-AVX512-NEXT: retl
722702;
723703; X64-SSE-LABEL: fcopysign_v32f16:
724704; X64-SSE: # %bb.0:
@@ -769,8 +749,8 @@ define <32 x half> @fcopysign_v32f16(ptr %p0, ptr %p1) nounwind {
769749; X64-AVX512-LABEL: fcopysign_v32f16:
770750; X64-AVX512: # %bb.0:
771751; X64-AVX512-NEXT: vmovdqu64 (%rdi), %zmm1
772- ; X64-AVX512-NEXT: vpbroadcastq {{.*#+}} zmm0 = [9223231297218904063,9223231297218904063,9223231297218904063,9223231297218904063,9223231297218904063,9223231297218904063,9223231297218904063,9223231297218904063 ]
773- ; X64-AVX512-NEXT: vpternlogq $202, (%rsi), %zmm1, %zmm0
752+ ; X64-AVX512-NEXT: vpbroadcastd {{.*#+}} zmm0 = [2147450879,2147450879,2147450879,2147450879,2147450879,2147450879,2147450879,2147450879,2147450879,2147450879,2147450879,2147450879,2147450879,2147450879,2147450879,2147450879 ]
753+ ; X64-AVX512-NEXT: vpternlogd $202, (%rsi), %zmm1, %zmm0
774754; X64-AVX512-NEXT: retq
775755 %a0 = load <32 x half >, ptr %p0 , align 16
776756 %a1 = load <32 x half >, ptr %p1 , align 16
@@ -786,3 +766,6 @@ declare <32 x half> @llvm.copysign.v32f16(<32 x half>, <32 x half>)
786766; X64-AVX512VLDQ: {{.*}}
787767; X86: {{.*}}
788768; X86-AVX: {{.*}}
769+ ; X86-AVX512FP16: {{.*}}
770+ ; X86-AVX512VL: {{.*}}
771+ ; X86-AVX512VLDQ: {{.*}}
0 commit comments