diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index df305637b1e4b..2d52e65aca1fb 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -43450,6 +43450,28 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode( KnownZero = LHSZero; break; } + case X86ISD::CMPM: + case X86ISD::CMPP: { + // Scalarize packed fp comparison if we only require element 0. + if (DemandedElts == 1) { + SDLoc dl(Op); + MVT VT = Op.getSimpleValueType(); + MVT OpSVT = Op.getOperand(0).getSimpleValueType().getScalarType(); + SDValue LHS = TLO.DAG.getExtractVectorElt(dl, OpSVT, Op.getOperand(0), 0); + SDValue RHS = TLO.DAG.getExtractVectorElt(dl, OpSVT, Op.getOperand(1), 0); + SDValue CC = Op.getOperand(2); + if (Opc == X86ISD::CMPM) { + SDValue Cmp = + TLO.DAG.getNode(X86ISD::FSETCCM, dl, MVT::v1i1, LHS, RHS, CC); + return TLO.CombineTo( + Op, TLO.DAG.getInsertSubvector(dl, TLO.DAG.getUNDEF(VT), Cmp, 0)); + } + SDValue Cmp = TLO.DAG.getNode(X86ISD::FSETCC, dl, OpSVT, LHS, RHS, CC); + return TLO.CombineTo(Op, + TLO.DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Cmp)); + } + break; + } case X86ISD::PCMPEQ: case X86ISD::PCMPGT: { APInt LHSUndef, LHSZero; diff --git a/llvm/test/CodeGen/X86/and-or-setcc.ll b/llvm/test/CodeGen/X86/and-or-setcc.ll index a6a9362908811..4484f23bbda36 100644 --- a/llvm/test/CodeGen/X86/and-or-setcc.ll +++ b/llvm/test/CodeGen/X86/and-or-setcc.ll @@ -17,8 +17,8 @@ define i1 @and_ord(float %a, float %b) { ; X64-LABEL: and_ord: ; X64: # %bb.0: ; X64-NEXT: xorps %xmm2, %xmm2 -; X64-NEXT: cmpordps %xmm2, %xmm1 -; X64-NEXT: cmpordps %xmm2, %xmm0 +; X64-NEXT: cmpordss %xmm2, %xmm1 +; X64-NEXT: cmpordss %xmm2, %xmm0 ; X64-NEXT: andps %xmm1, %xmm0 ; X64-NEXT: movd %xmm0, %eax ; X64-NEXT: # kill: def $al killed $al killed $eax @@ -44,8 +44,8 @@ define i1 @or_uno(float %a, float %b) { ; X64-LABEL: or_uno: ; X64: # %bb.0: ; X64-NEXT: xorps %xmm2, %xmm2 -; X64-NEXT: cmpunordps %xmm2, %xmm1 -; X64-NEXT: cmpunordps %xmm2, %xmm0 +; X64-NEXT: cmpunordss %xmm2, %xmm1 +; X64-NEXT: cmpunordss %xmm2, %xmm0 ; X64-NEXT: orps %xmm1, %xmm0 ; X64-NEXT: movd %xmm0, %eax ; X64-NEXT: # kill: def $al killed $al killed $eax diff --git a/llvm/test/CodeGen/X86/extract-vselect-setcc.ll b/llvm/test/CodeGen/X86/extract-vselect-setcc.ll index 81ab104cab283..96c8e773d5edd 100644 --- a/llvm/test/CodeGen/X86/extract-vselect-setcc.ll +++ b/llvm/test/CodeGen/X86/extract-vselect-setcc.ll @@ -5,10 +5,9 @@ define void @PR117684(i1 %cond, <8 x float> %vec, ptr %ptr1, ptr %ptr2) #0 { ; CHECK-LABEL: PR117684: ; CHECK: # %bb.0: ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; CHECK-NEXT: vcmpnltps %ymm1, %ymm0, %k1 +; CHECK-NEXT: vcmpnltss %xmm1, %xmm0, %k1 ; CHECK-NEXT: vbroadcastss {{.*#+}} xmm0 = [NaN,NaN,NaN,NaN] ; CHECK-NEXT: vinsertf32x4 $0, %xmm0, %ymm0, %ymm0 {%k1} {z} -; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: vmulss %xmm1, %xmm0, %xmm0 ; CHECK-NEXT: vmulss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm2 ; CHECK-NEXT: vbroadcastss %xmm2, %ymm2 diff --git a/llvm/test/CodeGen/X86/extractelement-fp.ll b/llvm/test/CodeGen/X86/extractelement-fp.ll index 944f6bbfd0bfb..1706f17eac165 100644 --- a/llvm/test/CodeGen/X86/extractelement-fp.ll +++ b/llvm/test/CodeGen/X86/extractelement-fp.ll @@ -319,7 +319,7 @@ define void @extsetcc(<4 x float> %x) { define <3 x double> @extvselectsetcc_crash(<2 x double> %x) { ; X64-LABEL: extvselectsetcc_crash: ; X64: # %bb.0: -; X64-NEXT: vcmpeqpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 +; X64-NEXT: vcmpeqsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 ; X64-NEXT: vmovsd {{.*#+}} xmm2 = [1.0E+0,0.0E+0] ; X64-NEXT: vandpd %xmm2, %xmm1, %xmm1 ; X64-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 @@ -328,7 +328,7 @@ define <3 x double> @extvselectsetcc_crash(<2 x double> %x) { ; ; X86-LABEL: extvselectsetcc_crash: ; X86: # %bb.0: -; X86-NEXT: vcmpeqpd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm1 +; X86-NEXT: vcmpeqsd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm1 ; X86-NEXT: vmovsd {{.*#+}} xmm2 = [1.0E+0,0.0E+0] ; X86-NEXT: vandpd %xmm2, %xmm1, %xmm1 ; X86-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 diff --git a/llvm/test/CodeGen/X86/fcmp-logic.ll b/llvm/test/CodeGen/X86/fcmp-logic.ll index 794b0ad92aef6..7b806bca43c2e 100644 --- a/llvm/test/CodeGen/X86/fcmp-logic.ll +++ b/llvm/test/CodeGen/X86/fcmp-logic.ll @@ -6,8 +6,8 @@ define i1 @olt_ole_and_f32(float %w, float %x, float %y, float %z) { ; SSE2-LABEL: olt_ole_and_f32: ; SSE2: # %bb.0: -; SSE2-NEXT: cmpleps %xmm3, %xmm2 -; SSE2-NEXT: cmpltps %xmm1, %xmm0 +; SSE2-NEXT: cmpless %xmm3, %xmm2 +; SSE2-NEXT: cmpltss %xmm1, %xmm0 ; SSE2-NEXT: andps %xmm2, %xmm0 ; SSE2-NEXT: movd %xmm0, %eax ; SSE2-NEXT: # kill: def $al killed $al killed $eax @@ -15,8 +15,8 @@ define i1 @olt_ole_and_f32(float %w, float %x, float %y, float %z) { ; ; AVX1-LABEL: olt_ole_and_f32: ; AVX1: # %bb.0: -; AVX1-NEXT: vcmpleps %xmm3, %xmm2, %xmm2 -; AVX1-NEXT: vcmpltps %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vcmpless %xmm3, %xmm2, %xmm2 +; AVX1-NEXT: vcmpltss %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vandps %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vmovd %xmm0, %eax ; AVX1-NEXT: # kill: def $al killed $al killed $eax @@ -24,15 +24,11 @@ define i1 @olt_ole_and_f32(float %w, float %x, float %y, float %z) { ; ; AVX512-LABEL: olt_ole_and_f32: ; AVX512: # %bb.0: -; AVX512-NEXT: # kill: def $xmm3 killed $xmm3 def $zmm3 -; AVX512-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2 -; AVX512-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 -; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; AVX512-NEXT: vcmpltps %zmm1, %zmm0, %k1 -; AVX512-NEXT: vcmpleps %zmm3, %zmm2, %k0 {%k1} +; AVX512-NEXT: vcmpless %xmm3, %xmm2, %k0 +; AVX512-NEXT: vcmpltss %xmm1, %xmm0, %k1 +; AVX512-NEXT: kandw %k0, %k1, %k0 ; AVX512-NEXT: kmovw %k0, %eax ; AVX512-NEXT: # kill: def $al killed $al killed $eax -; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %f1 = fcmp olt float %w, %x %f2 = fcmp ole float %y, %z @@ -43,8 +39,8 @@ define i1 @olt_ole_and_f32(float %w, float %x, float %y, float %z) { define i1 @oge_oeq_or_f32(float %w, float %x, float %y, float %z) { ; SSE2-LABEL: oge_oeq_or_f32: ; SSE2: # %bb.0: -; SSE2-NEXT: cmpeqps %xmm3, %xmm2 -; SSE2-NEXT: cmpleps %xmm0, %xmm1 +; SSE2-NEXT: cmpeqss %xmm3, %xmm2 +; SSE2-NEXT: cmpless %xmm0, %xmm1 ; SSE2-NEXT: orps %xmm2, %xmm1 ; SSE2-NEXT: movd %xmm1, %eax ; SSE2-NEXT: # kill: def $al killed $al killed $eax @@ -52,8 +48,8 @@ define i1 @oge_oeq_or_f32(float %w, float %x, float %y, float %z) { ; ; AVX1-LABEL: oge_oeq_or_f32: ; AVX1: # %bb.0: -; AVX1-NEXT: vcmpeqps %xmm3, %xmm2, %xmm2 -; AVX1-NEXT: vcmpleps %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vcmpeqss %xmm3, %xmm2, %xmm2 +; AVX1-NEXT: vcmpless %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vorps %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vmovd %xmm0, %eax ; AVX1-NEXT: # kill: def $al killed $al killed $eax @@ -61,16 +57,11 @@ define i1 @oge_oeq_or_f32(float %w, float %x, float %y, float %z) { ; ; AVX512-LABEL: oge_oeq_or_f32: ; AVX512: # %bb.0: -; AVX512-NEXT: # kill: def $xmm3 killed $xmm3 def $zmm3 -; AVX512-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2 -; AVX512-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 -; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; AVX512-NEXT: vcmpeqps %zmm3, %zmm2, %k0 -; AVX512-NEXT: vcmpleps %zmm0, %zmm1, %k1 +; AVX512-NEXT: vcmpeqss %xmm3, %xmm2, %k0 +; AVX512-NEXT: vcmpless %xmm0, %xmm1, %k1 ; AVX512-NEXT: korw %k0, %k1, %k0 ; AVX512-NEXT: kmovw %k0, %eax ; AVX512-NEXT: # kill: def $al killed $al killed $eax -; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %f1 = fcmp oge float %w, %x %f2 = fcmp oeq float %y, %z @@ -90,8 +81,8 @@ define i1 @ord_one_xor_f32(float %w, float %x, float %y, float %z) { ; ; AVX1-LABEL: ord_one_xor_f32: ; AVX1: # %bb.0: -; AVX1-NEXT: vcmpneq_oqps %xmm3, %xmm2, %xmm2 -; AVX1-NEXT: vcmpordps %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vcmpneq_oqss %xmm3, %xmm2, %xmm2 +; AVX1-NEXT: vcmpordss %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vxorps %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vmovd %xmm0, %eax ; AVX1-NEXT: # kill: def $al killed $al killed $eax @@ -99,16 +90,11 @@ define i1 @ord_one_xor_f32(float %w, float %x, float %y, float %z) { ; ; AVX512-LABEL: ord_one_xor_f32: ; AVX512: # %bb.0: -; AVX512-NEXT: # kill: def $xmm3 killed $xmm3 def $zmm3 -; AVX512-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2 -; AVX512-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 -; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; AVX512-NEXT: vcmpneq_oqps %zmm3, %zmm2, %k0 -; AVX512-NEXT: vcmpordps %zmm1, %zmm0, %k1 +; AVX512-NEXT: vcmpneq_oqss %xmm3, %xmm2, %k0 +; AVX512-NEXT: vcmpordss %xmm1, %xmm0, %k1 ; AVX512-NEXT: kxorw %k0, %k1, %k0 ; AVX512-NEXT: kmovw %k0, %eax ; AVX512-NEXT: # kill: def $al killed $al killed $eax -; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %f1 = fcmp ord float %w, %x %f2 = fcmp one float %y, %z @@ -120,8 +106,8 @@ define i1 @ord_one_xor_f32(float %w, float %x, float %y, float %z) { define i1 @une_oeq_xor_f32(float %w, float %x, float %y, float %z) { ; SSE2-LABEL: une_oeq_xor_f32: ; SSE2: # %bb.0: -; SSE2-NEXT: cmpeqps %xmm3, %xmm2 -; SSE2-NEXT: cmpneqps %xmm1, %xmm0 +; SSE2-NEXT: cmpeqss %xmm3, %xmm2 +; SSE2-NEXT: cmpneqss %xmm1, %xmm0 ; SSE2-NEXT: xorps %xmm2, %xmm0 ; SSE2-NEXT: movd %xmm0, %eax ; SSE2-NEXT: # kill: def $al killed $al killed $eax @@ -129,8 +115,8 @@ define i1 @une_oeq_xor_f32(float %w, float %x, float %y, float %z) { ; ; AVX1-LABEL: une_oeq_xor_f32: ; AVX1: # %bb.0: -; AVX1-NEXT: vcmpeqps %xmm3, %xmm2, %xmm2 -; AVX1-NEXT: vcmpneqps %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vcmpeqss %xmm3, %xmm2, %xmm2 +; AVX1-NEXT: vcmpneqss %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vxorps %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vmovd %xmm0, %eax ; AVX1-NEXT: # kill: def $al killed $al killed $eax @@ -138,16 +124,11 @@ define i1 @une_oeq_xor_f32(float %w, float %x, float %y, float %z) { ; ; AVX512-LABEL: une_oeq_xor_f32: ; AVX512: # %bb.0: -; AVX512-NEXT: # kill: def $xmm3 killed $xmm3 def $zmm3 -; AVX512-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2 -; AVX512-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 -; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; AVX512-NEXT: vcmpeqps %zmm3, %zmm2, %k0 -; AVX512-NEXT: vcmpneqps %zmm1, %zmm0, %k1 +; AVX512-NEXT: vcmpeqss %xmm3, %xmm2, %k0 +; AVX512-NEXT: vcmpneqss %xmm1, %xmm0, %k1 ; AVX512-NEXT: kxorw %k0, %k1, %k0 ; AVX512-NEXT: kmovw %k0, %eax ; AVX512-NEXT: # kill: def $al killed $al killed $eax -; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %f1 = fcmp une float %w, %x %f2 = fcmp oeq float %y, %z @@ -158,8 +139,8 @@ define i1 @une_oeq_xor_f32(float %w, float %x, float %y, float %z) { define i1 @une_ugt_and_f64(double %w, double %x, double %y, double %z) { ; SSE2-LABEL: une_ugt_and_f64: ; SSE2: # %bb.0: -; SSE2-NEXT: cmpnlepd %xmm3, %xmm2 -; SSE2-NEXT: cmpneqpd %xmm1, %xmm0 +; SSE2-NEXT: cmpnlesd %xmm3, %xmm2 +; SSE2-NEXT: cmpneqsd %xmm1, %xmm0 ; SSE2-NEXT: andpd %xmm2, %xmm0 ; SSE2-NEXT: movd %xmm0, %eax ; SSE2-NEXT: # kill: def $al killed $al killed $eax @@ -167,8 +148,8 @@ define i1 @une_ugt_and_f64(double %w, double %x, double %y, double %z) { ; ; AVX1-LABEL: une_ugt_and_f64: ; AVX1: # %bb.0: -; AVX1-NEXT: vcmpnlepd %xmm3, %xmm2, %xmm2 -; AVX1-NEXT: vcmpneqpd %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vcmpnlesd %xmm3, %xmm2, %xmm2 +; AVX1-NEXT: vcmpneqsd %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vandpd %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vmovd %xmm0, %eax ; AVX1-NEXT: # kill: def $al killed $al killed $eax @@ -176,15 +157,11 @@ define i1 @une_ugt_and_f64(double %w, double %x, double %y, double %z) { ; ; AVX512-LABEL: une_ugt_and_f64: ; AVX512: # %bb.0: -; AVX512-NEXT: # kill: def $xmm3 killed $xmm3 def $zmm3 -; AVX512-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2 -; AVX512-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 -; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; AVX512-NEXT: vcmpneqpd %zmm1, %zmm0, %k1 -; AVX512-NEXT: vcmpnlepd %zmm3, %zmm2, %k0 {%k1} +; AVX512-NEXT: vcmpnlesd %xmm3, %xmm2, %k0 +; AVX512-NEXT: vcmpneqsd %xmm1, %xmm0, %k1 +; AVX512-NEXT: kandw %k0, %k1, %k0 ; AVX512-NEXT: kmovw %k0, %eax ; AVX512-NEXT: # kill: def $al killed $al killed $eax -; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %f1 = fcmp une double %w, %x %f2 = fcmp ugt double %y, %z @@ -195,8 +172,8 @@ define i1 @une_ugt_and_f64(double %w, double %x, double %y, double %z) { define i1 @ult_uge_or_f64(double %w, double %x, double %y, double %z) { ; SSE2-LABEL: ult_uge_or_f64: ; SSE2: # %bb.0: -; SSE2-NEXT: cmpnltpd %xmm3, %xmm2 -; SSE2-NEXT: cmpnlepd %xmm0, %xmm1 +; SSE2-NEXT: cmpnltsd %xmm3, %xmm2 +; SSE2-NEXT: cmpnlesd %xmm0, %xmm1 ; SSE2-NEXT: orpd %xmm2, %xmm1 ; SSE2-NEXT: movd %xmm1, %eax ; SSE2-NEXT: # kill: def $al killed $al killed $eax @@ -204,8 +181,8 @@ define i1 @ult_uge_or_f64(double %w, double %x, double %y, double %z) { ; ; AVX1-LABEL: ult_uge_or_f64: ; AVX1: # %bb.0: -; AVX1-NEXT: vcmpnltpd %xmm3, %xmm2, %xmm2 -; AVX1-NEXT: vcmpnlepd %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vcmpnltsd %xmm3, %xmm2, %xmm2 +; AVX1-NEXT: vcmpnlesd %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vorpd %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vmovd %xmm0, %eax ; AVX1-NEXT: # kill: def $al killed $al killed $eax @@ -213,16 +190,11 @@ define i1 @ult_uge_or_f64(double %w, double %x, double %y, double %z) { ; ; AVX512-LABEL: ult_uge_or_f64: ; AVX512: # %bb.0: -; AVX512-NEXT: # kill: def $xmm3 killed $xmm3 def $zmm3 -; AVX512-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2 -; AVX512-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 -; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; AVX512-NEXT: vcmpnltpd %zmm3, %zmm2, %k0 -; AVX512-NEXT: vcmpnlepd %zmm0, %zmm1, %k1 +; AVX512-NEXT: vcmpnltsd %xmm3, %xmm2, %k0 +; AVX512-NEXT: vcmpnlesd %xmm0, %xmm1, %k1 ; AVX512-NEXT: korw %k0, %k1, %k0 ; AVX512-NEXT: kmovw %k0, %eax ; AVX512-NEXT: # kill: def $al killed $al killed $eax -; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %f1 = fcmp ult double %w, %x %f2 = fcmp uge double %y, %z @@ -233,8 +205,8 @@ define i1 @ult_uge_or_f64(double %w, double %x, double %y, double %z) { define i1 @une_uno_xor_f64(double %w, double %x, double %y, double %z) { ; SSE2-LABEL: une_uno_xor_f64: ; SSE2: # %bb.0: -; SSE2-NEXT: cmpunordpd %xmm3, %xmm2 -; SSE2-NEXT: cmpneqpd %xmm1, %xmm0 +; SSE2-NEXT: cmpunordsd %xmm3, %xmm2 +; SSE2-NEXT: cmpneqsd %xmm1, %xmm0 ; SSE2-NEXT: xorpd %xmm2, %xmm0 ; SSE2-NEXT: movd %xmm0, %eax ; SSE2-NEXT: # kill: def $al killed $al killed $eax @@ -242,8 +214,8 @@ define i1 @une_uno_xor_f64(double %w, double %x, double %y, double %z) { ; ; AVX1-LABEL: une_uno_xor_f64: ; AVX1: # %bb.0: -; AVX1-NEXT: vcmpunordpd %xmm3, %xmm2, %xmm2 -; AVX1-NEXT: vcmpneqpd %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vcmpunordsd %xmm3, %xmm2, %xmm2 +; AVX1-NEXT: vcmpneqsd %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vxorpd %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vmovd %xmm0, %eax ; AVX1-NEXT: # kill: def $al killed $al killed $eax @@ -251,16 +223,11 @@ define i1 @une_uno_xor_f64(double %w, double %x, double %y, double %z) { ; ; AVX512-LABEL: une_uno_xor_f64: ; AVX512: # %bb.0: -; AVX512-NEXT: # kill: def $xmm3 killed $xmm3 def $zmm3 -; AVX512-NEXT: # kill: def $xmm2 killed $xmm2 def $zmm2 -; AVX512-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 -; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; AVX512-NEXT: vcmpunordpd %zmm3, %zmm2, %k0 -; AVX512-NEXT: vcmpneqpd %zmm1, %zmm0, %k1 +; AVX512-NEXT: vcmpunordsd %xmm3, %xmm2, %k0 +; AVX512-NEXT: vcmpneqsd %xmm1, %xmm0, %k1 ; AVX512-NEXT: kxorw %k0, %k1, %k0 ; AVX512-NEXT: kmovw %k0, %eax ; AVX512-NEXT: # kill: def $al killed $al killed $eax -; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %f1 = fcmp une double %w, %x %f2 = fcmp uno double %y, %z @@ -371,8 +338,8 @@ define i1 @f32cmp3(float %x, float %y, float %z, float %w) { ; SSE2: # %bb.0: ; SSE2-NEXT: xorps %xmm4, %xmm4 ; SSE2-NEXT: xorps %xmm5, %xmm5 -; SSE2-NEXT: cmpltps %xmm1, %xmm5 -; SSE2-NEXT: cmpltps %xmm0, %xmm4 +; SSE2-NEXT: cmpltss %xmm1, %xmm5 +; SSE2-NEXT: cmpltss %xmm0, %xmm4 ; SSE2-NEXT: orps %xmm5, %xmm4 ; SSE2-NEXT: movd %xmm4, %ecx ; SSE2-NEXT: ucomiss %xmm2, %xmm3 @@ -383,8 +350,8 @@ define i1 @f32cmp3(float %x, float %y, float %z, float %w) { ; AVX1-LABEL: f32cmp3: ; AVX1: # %bb.0: ; AVX1-NEXT: vxorps %xmm4, %xmm4, %xmm4 -; AVX1-NEXT: vcmpltps %xmm1, %xmm4, %xmm1 -; AVX1-NEXT: vcmpltps %xmm0, %xmm4, %xmm0 +; AVX1-NEXT: vcmpltss %xmm1, %xmm4, %xmm1 +; AVX1-NEXT: vcmpltss %xmm0, %xmm4, %xmm0 ; AVX1-NEXT: vorps %xmm1, %xmm0, %xmm0 ; AVX1-NEXT: vmovd %xmm0, %ecx ; AVX1-NEXT: vucomiss %xmm2, %xmm3 @@ -394,17 +361,14 @@ define i1 @f32cmp3(float %x, float %y, float %z, float %w) { ; ; AVX512-LABEL: f32cmp3: ; AVX512: # %bb.0: -; AVX512-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 -; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512-NEXT: vxorps %xmm4, %xmm4, %xmm4 -; AVX512-NEXT: vcmpltps %zmm1, %zmm4, %k0 -; AVX512-NEXT: vcmpltps %zmm0, %zmm4, %k1 +; AVX512-NEXT: vcmpltss %xmm1, %xmm4, %k0 +; AVX512-NEXT: vcmpltss %xmm0, %xmm4, %k1 ; AVX512-NEXT: korw %k0, %k1, %k0 ; AVX512-NEXT: kmovw %k0, %ecx ; AVX512-NEXT: vucomiss %xmm2, %xmm3 ; AVX512-NEXT: seta %al ; AVX512-NEXT: xorb %cl, %al -; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %cmpx = fcmp ogt float %x, 0.0 %cmpy = fcmp ogt float %y, 0.0 @@ -425,8 +389,8 @@ define i1 @PR140534(i32 %a0, i32 %a1, i32 %a2) { ; SSE2-NEXT: cvtsi2sd %rax, %xmm2 ; SSE2-NEXT: mulsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 ; SSE2-NEXT: movapd %xmm1, %xmm3 -; SSE2-NEXT: cmpltpd %xmm2, %xmm3 -; SSE2-NEXT: cmpltpd %xmm0, %xmm1 +; SSE2-NEXT: cmpltsd %xmm2, %xmm3 +; SSE2-NEXT: cmpltsd %xmm0, %xmm1 ; SSE2-NEXT: orpd %xmm3, %xmm1 ; SSE2-NEXT: movd %xmm1, %eax ; SSE2-NEXT: # kill: def $al killed $al killed $eax @@ -441,8 +405,8 @@ define i1 @PR140534(i32 %a0, i32 %a1, i32 %a2) { ; AVX1-NEXT: movl %edx, %eax ; AVX1-NEXT: vcvtsi2sd %rax, %xmm2, %xmm2 ; AVX1-NEXT: vmulsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX1-NEXT: vcmpltpd %xmm2, %xmm1, %xmm2 -; AVX1-NEXT: vcmpltpd %xmm0, %xmm1, %xmm0 +; AVX1-NEXT: vcmpltsd %xmm2, %xmm1, %xmm2 +; AVX1-NEXT: vcmpltsd %xmm0, %xmm1, %xmm0 ; AVX1-NEXT: vorpd %xmm2, %xmm0, %xmm0 ; AVX1-NEXT: vmovd %xmm0, %eax ; AVX1-NEXT: # kill: def $al killed $al killed $eax @@ -454,12 +418,11 @@ define i1 @PR140534(i32 %a0, i32 %a1, i32 %a2) { ; AVX512-NEXT: vcvtusi2sd %esi, %xmm1, %xmm1 ; AVX512-NEXT: vcvtusi2sd %edx, %xmm2, %xmm2 ; AVX512-NEXT: vmulsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 -; AVX512-NEXT: vcmpltpd %zmm2, %zmm1, %k0 -; AVX512-NEXT: vcmpltpd %zmm0, %zmm1, %k1 +; AVX512-NEXT: vcmpltsd %xmm2, %xmm1, %k0 +; AVX512-NEXT: vcmpltsd %xmm0, %xmm1, %k1 ; AVX512-NEXT: korw %k0, %k1, %k0 ; AVX512-NEXT: kmovw %k0, %eax ; AVX512-NEXT: # kill: def $al killed $al killed $eax -; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq %conv0 = uitofp i32 %a0 to double %conv1 = uitofp i32 %a1 to double diff --git a/llvm/test/CodeGen/X86/lzcnt-zext-cmp.ll b/llvm/test/CodeGen/X86/lzcnt-zext-cmp.ll index d9d5e2846ed0f..43bac05988e29 100644 --- a/llvm/test/CodeGen/X86/lzcnt-zext-cmp.ll +++ b/llvm/test/CodeGen/X86/lzcnt-zext-cmp.ll @@ -322,8 +322,8 @@ define i32 @test_zext_cmp11(double %a, double %b) "no-nans-fp-math"="true" { ; ALL-LABEL: test_zext_cmp11: ; ALL: # %bb.0: # %entry ; ALL-NEXT: vxorpd %xmm2, %xmm2, %xmm2 -; ALL-NEXT: vcmpeqpd %xmm2, %xmm1, %xmm1 -; ALL-NEXT: vcmpeqpd %xmm2, %xmm0, %xmm0 +; ALL-NEXT: vcmpeqsd %xmm2, %xmm1, %xmm1 +; ALL-NEXT: vcmpeqsd %xmm2, %xmm0, %xmm0 ; ALL-NEXT: vorpd %xmm1, %xmm0, %xmm0 ; ALL-NEXT: vmovd %xmm0, %eax ; ALL-NEXT: andl $1, %eax diff --git a/llvm/test/CodeGen/X86/pr40539.ll b/llvm/test/CodeGen/X86/pr40539.ll index 56d80a025fa08..a920efbec59ea 100644 --- a/llvm/test/CodeGen/X86/pr40539.ll +++ b/llvm/test/CodeGen/X86/pr40539.ll @@ -40,20 +40,19 @@ define zeroext i1 @_Z8test_cosv() { ; CHECK-NEXT: subl $8, %esp ; CHECK-NEXT: .cfi_def_cfa_offset 12 ; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; CHECK-NEXT: movss {{.*#+}} xmm2 = [8.70000004E-1,0.0E+0,0.0E+0,0.0E+0] +; CHECK-NEXT: movss {{.*#+}} xmm1 = [8.60000014E-1,0.0E+0,0.0E+0,0.0E+0] ; CHECK-NEXT: divss {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 ; CHECK-NEXT: movss %xmm0, {{[0-9]+}}(%esp) -; CHECK-NEXT: movss {{.*#+}} xmm0 = [8.60000014E-1,0.0E+0,0.0E+0,0.0E+0] ; CHECK-NEXT: flds {{[0-9]+}}(%esp) ; CHECK-NEXT: #APP ; CHECK-NEXT: fcos ; CHECK-NEXT: #NO_APP ; CHECK-NEXT: fstps (%esp) -; CHECK-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero -; CHECK-NEXT: cmpleps %xmm1, %xmm0 -; CHECK-NEXT: cmpleps %xmm2, %xmm1 -; CHECK-NEXT: andps %xmm0, %xmm1 -; CHECK-NEXT: movd %xmm1, %eax +; CHECK-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; CHECK-NEXT: cmpless %xmm0, %xmm1 +; CHECK-NEXT: cmpless {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 +; CHECK-NEXT: andps %xmm1, %xmm0 +; CHECK-NEXT: movd %xmm0, %eax ; CHECK-NEXT: andb $1, %al ; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: addl $8, %esp diff --git a/llvm/test/CodeGen/X86/vector-reduce-fmax-nnan.ll b/llvm/test/CodeGen/X86/vector-reduce-fmax-nnan.ll index 060bd1764d3c4..179790c46f33c 100644 --- a/llvm/test/CodeGen/X86/vector-reduce-fmax-nnan.ll +++ b/llvm/test/CodeGen/X86/vector-reduce-fmax-nnan.ll @@ -426,18 +426,17 @@ define half @test_v2f16(<2 x half> %a0) nounwind { ; AVX512VL-LABEL: test_v2f16: ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vpsrld $16, %xmm0, %xmm1 -; AVX512VL-NEXT: vcvtph2ps %xmm0, %ymm2 -; AVX512VL-NEXT: vcvtph2ps %xmm1, %ymm3 -; AVX512VL-NEXT: vcmpltps %ymm2, %ymm3, %k1 +; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm2 +; AVX512VL-NEXT: vcvtph2ps %xmm1, %xmm3 +; AVX512VL-NEXT: vcmpltss %xmm2, %xmm3, %k1 ; AVX512VL-NEXT: vmovdqu16 %xmm0, %xmm1 {%k1} ; AVX512VL-NEXT: vmovdqa %xmm1, %xmm0 -; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq ; ; AVX512FP16-LABEL: test_v2f16: ; AVX512FP16: # %bb.0: ; AVX512FP16-NEXT: vpsrld $16, %xmm0, %xmm1 -; AVX512FP16-NEXT: vcmpltph %xmm0, %xmm1, %k1 +; AVX512FP16-NEXT: vcmpltsh %xmm0, %xmm1, %k1 ; AVX512FP16-NEXT: vmovsh %xmm0, %xmm0, %xmm1 {%k1} ; AVX512FP16-NEXT: vmovaps %xmm1, %xmm0 ; AVX512FP16-NEXT: retq diff --git a/llvm/test/CodeGen/X86/vector-reduce-fmin-nnan.ll b/llvm/test/CodeGen/X86/vector-reduce-fmin-nnan.ll index 4d6daf3fb77f0..465988760d44a 100644 --- a/llvm/test/CodeGen/X86/vector-reduce-fmin-nnan.ll +++ b/llvm/test/CodeGen/X86/vector-reduce-fmin-nnan.ll @@ -426,18 +426,17 @@ define half @test_v2f16(<2 x half> %a0) nounwind { ; AVX512VL-LABEL: test_v2f16: ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vpsrld $16, %xmm0, %xmm1 -; AVX512VL-NEXT: vcvtph2ps %xmm0, %ymm2 -; AVX512VL-NEXT: vcvtph2ps %xmm1, %ymm3 -; AVX512VL-NEXT: vcmpltps %ymm3, %ymm2, %k1 +; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm2 +; AVX512VL-NEXT: vcvtph2ps %xmm1, %xmm3 +; AVX512VL-NEXT: vcmpltss %xmm3, %xmm2, %k1 ; AVX512VL-NEXT: vmovdqu16 %xmm0, %xmm1 {%k1} ; AVX512VL-NEXT: vmovdqa %xmm1, %xmm0 -; AVX512VL-NEXT: vzeroupper ; AVX512VL-NEXT: retq ; ; AVX512FP16-LABEL: test_v2f16: ; AVX512FP16: # %bb.0: ; AVX512FP16-NEXT: vpsrld $16, %xmm0, %xmm1 -; AVX512FP16-NEXT: vcmpltph %xmm1, %xmm0, %k1 +; AVX512FP16-NEXT: vcmpltsh %xmm1, %xmm0, %k1 ; AVX512FP16-NEXT: vmovsh %xmm0, %xmm0, %xmm1 {%k1} ; AVX512FP16-NEXT: vmovaps %xmm1, %xmm0 ; AVX512FP16-NEXT: retq