diff --git a/llvm/include/llvm/CodeGen/SDPatternMatch.h b/llvm/include/llvm/CodeGen/SDPatternMatch.h index 04135ee7e1c02..0af4f73b869c3 100644 --- a/llvm/include/llvm/CodeGen/SDPatternMatch.h +++ b/llvm/include/llvm/CodeGen/SDPatternMatch.h @@ -542,6 +542,81 @@ struct BinaryOpc_match { } }; +template +struct MaxMin_match { + using PredType = Pred_t; + LHS_P LHS; + RHS_P RHS; + + MaxMin_match(const LHS_P &L, const RHS_P &R) : LHS(L), RHS(R) {} + + template + bool match(const MatchContext &Ctx, SDValue N) { + if (sd_context_match(N, Ctx, m_Opc(ISD::SELECT)) || + sd_context_match(N, Ctx, m_Opc(ISD::VSELECT))) { + EffectiveOperands EO_SELECT(N, Ctx); + assert(EO_SELECT.Size == 3); + SDValue Cond = N->getOperand(EO_SELECT.FirstIndex); + SDValue TrueValue = N->getOperand(EO_SELECT.FirstIndex + 1); + SDValue FalseValue = N->getOperand(EO_SELECT.FirstIndex + 2); + + if (sd_context_match(Cond, Ctx, m_Opc(ISD::SETCC))) { + EffectiveOperands EO_SETCC(Cond, Ctx); + assert(EO_SETCC.Size == 3); + SDValue L = Cond->getOperand(EO_SETCC.FirstIndex); + SDValue R = Cond->getOperand(EO_SETCC.FirstIndex + 1); + auto *CondNode = + cast(Cond->getOperand(EO_SETCC.FirstIndex + 2)); + + if ((TrueValue != L || FalseValue != R) && + (TrueValue != R || FalseValue != L)) { + return false; + } + + ISD::CondCode Cond = + TrueValue == L ? CondNode->get() + : getSetCCInverse(CondNode->get(), L.getValueType()); + if (!Pred_t::match(Cond)) { + return false; + } + return (LHS.match(Ctx, L) && RHS.match(Ctx, R)) || + (Commutable && LHS.match(Ctx, R) && RHS.match(Ctx, L)); + } + } + + return false; + } +}; + +// Helper class for identifying signed max predicates. +struct smax_pred_ty { + static bool match(ISD::CondCode Cond) { + return Cond == ISD::CondCode::SETGT || Cond == ISD::CondCode::SETGE; + } +}; + +// Helper class for identifying unsigned max predicates. +struct umax_pred_ty { + static bool match(ISD::CondCode Cond) { + return Cond == ISD::CondCode::SETUGT || Cond == ISD::CondCode::SETUGE; + } +}; + +// Helper class for identifying signed min predicates. +struct smin_pred_ty { + static bool match(ISD::CondCode Cond) { + return Cond == ISD::CondCode::SETLT || Cond == ISD::CondCode::SETLE; + } +}; + +// Helper class for identifying unsigned min predicates. +struct umin_pred_ty { + static bool match(ISD::CondCode Cond) { + return Cond == ISD::CondCode::SETULT || Cond == ISD::CondCode::SETULE; + } +}; + template inline BinaryOpc_match m_BinOp(unsigned Opc, const LHS &L, const RHS &R) { @@ -613,21 +688,45 @@ inline BinaryOpc_match m_SMin(const LHS &L, const RHS &R) { return BinaryOpc_match(ISD::SMIN, L, R); } +template +inline auto m_SMinLike(const LHS &L, const RHS &R) { + return m_AnyOf(BinaryOpc_match(ISD::SMIN, L, R), + MaxMin_match(L, R)); +} + template inline BinaryOpc_match m_SMax(const LHS &L, const RHS &R) { return BinaryOpc_match(ISD::SMAX, L, R); } +template +inline auto m_SMaxLike(const LHS &L, const RHS &R) { + return m_AnyOf(BinaryOpc_match(ISD::SMAX, L, R), + MaxMin_match(L, R)); +} + template inline BinaryOpc_match m_UMin(const LHS &L, const RHS &R) { return BinaryOpc_match(ISD::UMIN, L, R); } +template +inline auto m_UMinLike(const LHS &L, const RHS &R) { + return m_AnyOf(BinaryOpc_match(ISD::UMIN, L, R), + MaxMin_match(L, R)); +} + template inline BinaryOpc_match m_UMax(const LHS &L, const RHS &R) { return BinaryOpc_match(ISD::UMAX, L, R); } +template +inline auto m_UMaxLike(const LHS &L, const RHS &R) { + return m_AnyOf(BinaryOpc_match(ISD::UMAX, L, R), + MaxMin_match(L, R)); +} + template inline BinaryOpc_match m_UDiv(const LHS &L, const RHS &R) { return BinaryOpc_match(ISD::UDIV, L, R); diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 810ca458bc878..924558a298fd8 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -4190,26 +4190,26 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { // smax(a,b) - smin(a,b) --> abds(a,b) if ((!LegalOperations || hasOperation(ISD::ABDS, VT)) && - sd_match(N0, m_SMax(m_Value(A), m_Value(B))) && - sd_match(N1, m_SMin(m_Specific(A), m_Specific(B)))) + sd_match(N0, m_SMaxLike(m_Value(A), m_Value(B))) && + sd_match(N1, m_SMinLike(m_Specific(A), m_Specific(B)))) return DAG.getNode(ISD::ABDS, DL, VT, A, B); // smin(a,b) - smax(a,b) --> neg(abds(a,b)) if (hasOperation(ISD::ABDS, VT) && - sd_match(N0, m_SMin(m_Value(A), m_Value(B))) && - sd_match(N1, m_SMax(m_Specific(A), m_Specific(B)))) + sd_match(N0, m_SMinLike(m_Value(A), m_Value(B))) && + sd_match(N1, m_SMaxLike(m_Specific(A), m_Specific(B)))) return DAG.getNegative(DAG.getNode(ISD::ABDS, DL, VT, A, B), DL, VT); // umax(a,b) - umin(a,b) --> abdu(a,b) if ((!LegalOperations || hasOperation(ISD::ABDU, VT)) && - sd_match(N0, m_UMax(m_Value(A), m_Value(B))) && - sd_match(N1, m_UMin(m_Specific(A), m_Specific(B)))) + sd_match(N0, m_UMaxLike(m_Value(A), m_Value(B))) && + sd_match(N1, m_UMinLike(m_Specific(A), m_Specific(B)))) return DAG.getNode(ISD::ABDU, DL, VT, A, B); // umin(a,b) - umax(a,b) --> neg(abdu(a,b)) if (hasOperation(ISD::ABDU, VT) && - sd_match(N0, m_UMin(m_Value(A), m_Value(B))) && - sd_match(N1, m_UMax(m_Specific(A), m_Specific(B)))) + sd_match(N0, m_UMinLike(m_Value(A), m_Value(B))) && + sd_match(N1, m_UMaxLike(m_Specific(A), m_Specific(B)))) return DAG.getNegative(DAG.getNode(ISD::ABDU, DL, VT, A, B), DL, VT); return SDValue(); diff --git a/llvm/test/CodeGen/AArch64/abds.ll b/llvm/test/CodeGen/AArch64/abds.ll index e5cc04f9be1a1..62db30f17747c 100644 --- a/llvm/test/CodeGen/AArch64/abds.ll +++ b/llvm/test/CodeGen/AArch64/abds.ll @@ -547,10 +547,9 @@ define i8 @abd_select_i8(i8 %a, i8 %b) nounwind { ; CHECK-LABEL: abd_select_i8: ; CHECK: // %bb.0: ; CHECK-NEXT: sxtb w8, w0 -; CHECK-NEXT: cmp w8, w1, sxtb -; CHECK-NEXT: csel w8, w0, w1, lt -; CHECK-NEXT: csel w9, w1, w0, lt -; CHECK-NEXT: sub w0, w9, w8 +; CHECK-NEXT: sub w8, w8, w1, sxtb +; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %cmp = icmp slt i8 %a, %b %ab = select i1 %cmp, i8 %a, i8 %b @@ -563,10 +562,9 @@ define i16 @abd_select_i16(i16 %a, i16 %b) nounwind { ; CHECK-LABEL: abd_select_i16: ; CHECK: // %bb.0: ; CHECK-NEXT: sxth w8, w0 -; CHECK-NEXT: cmp w8, w1, sxth -; CHECK-NEXT: csel w8, w0, w1, le -; CHECK-NEXT: csel w9, w1, w0, le -; CHECK-NEXT: sub w0, w9, w8 +; CHECK-NEXT: sub w8, w8, w1, sxth +; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %cmp = icmp sle i16 %a, %b %ab = select i1 %cmp, i16 %a, i16 %b @@ -578,10 +576,9 @@ define i16 @abd_select_i16(i16 %a, i16 %b) nounwind { define i32 @abd_select_i32(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: abd_select_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: cmp w0, w1 -; CHECK-NEXT: csel w8, w0, w1, gt -; CHECK-NEXT: csel w9, w1, w0, gt -; CHECK-NEXT: sub w0, w8, w9 +; CHECK-NEXT: sub w8, w1, w0 +; CHECK-NEXT: subs w9, w0, w1 +; CHECK-NEXT: csel w0, w9, w8, gt ; CHECK-NEXT: ret %cmp = icmp sgt i32 %a, %b %ab = select i1 %cmp, i32 %a, i32 %b @@ -593,10 +590,9 @@ define i32 @abd_select_i32(i32 %a, i32 %b) nounwind { define i64 @abd_select_i64(i64 %a, i64 %b) nounwind { ; CHECK-LABEL: abd_select_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: cmp x0, x1 -; CHECK-NEXT: csel x8, x0, x1, ge -; CHECK-NEXT: csel x9, x1, x0, ge -; CHECK-NEXT: sub x0, x8, x9 +; CHECK-NEXT: sub x8, x1, x0 +; CHECK-NEXT: subs x9, x0, x1 +; CHECK-NEXT: csel x0, x9, x8, gt ; CHECK-NEXT: ret %cmp = icmp sge i64 %a, %b %ab = select i1 %cmp, i64 %a, i64 %b @@ -608,14 +604,13 @@ define i64 @abd_select_i64(i64 %a, i64 %b) nounwind { define i128 @abd_select_i128(i128 %a, i128 %b) nounwind { ; CHECK-LABEL: abd_select_i128: ; CHECK: // %bb.0: -; CHECK-NEXT: cmp x0, x2 -; CHECK-NEXT: sbcs xzr, x1, x3 -; CHECK-NEXT: csel x8, x0, x2, lt -; CHECK-NEXT: csel x9, x2, x0, lt -; CHECK-NEXT: csel x10, x1, x3, lt -; CHECK-NEXT: csel x11, x3, x1, lt -; CHECK-NEXT: subs x0, x9, x8 -; CHECK-NEXT: sbc x1, x11, x10 +; CHECK-NEXT: subs x8, x0, x2 +; CHECK-NEXT: sbc x9, x1, x3 +; CHECK-NEXT: subs x10, x2, x0 +; CHECK-NEXT: sbc x11, x3, x1 +; CHECK-NEXT: sbcs xzr, x3, x1 +; CHECK-NEXT: csel x0, x8, x10, lt +; CHECK-NEXT: csel x1, x9, x11, lt ; CHECK-NEXT: ret %cmp = icmp slt i128 %a, %b %ab = select i1 %cmp, i128 %a, i128 %b diff --git a/llvm/test/CodeGen/AArch64/abdu.ll b/llvm/test/CodeGen/AArch64/abdu.ll index 0a44ae1688458..4585de96c848f 100644 --- a/llvm/test/CodeGen/AArch64/abdu.ll +++ b/llvm/test/CodeGen/AArch64/abdu.ll @@ -408,10 +408,9 @@ define i8 @abd_select_i8(i8 %a, i8 %b) nounwind { ; CHECK-LABEL: abd_select_i8: ; CHECK: // %bb.0: ; CHECK-NEXT: and w8, w0, #0xff -; CHECK-NEXT: cmp w8, w1, uxtb -; CHECK-NEXT: csel w8, w0, w1, lo -; CHECK-NEXT: csel w9, w1, w0, lo -; CHECK-NEXT: sub w0, w9, w8 +; CHECK-NEXT: sub w8, w8, w1, uxtb +; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %cmp = icmp ult i8 %a, %b %ab = select i1 %cmp, i8 %a, i8 %b @@ -424,10 +423,9 @@ define i16 @abd_select_i16(i16 %a, i16 %b) nounwind { ; CHECK-LABEL: abd_select_i16: ; CHECK: // %bb.0: ; CHECK-NEXT: and w8, w0, #0xffff -; CHECK-NEXT: cmp w8, w1, uxth -; CHECK-NEXT: csel w8, w0, w1, ls -; CHECK-NEXT: csel w9, w1, w0, ls -; CHECK-NEXT: sub w0, w9, w8 +; CHECK-NEXT: sub w8, w8, w1, uxth +; CHECK-NEXT: cmp w8, #0 +; CHECK-NEXT: cneg w0, w8, mi ; CHECK-NEXT: ret %cmp = icmp ule i16 %a, %b %ab = select i1 %cmp, i16 %a, i16 %b @@ -439,10 +437,9 @@ define i16 @abd_select_i16(i16 %a, i16 %b) nounwind { define i32 @abd_select_i32(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: abd_select_i32: ; CHECK: // %bb.0: -; CHECK-NEXT: cmp w0, w1 -; CHECK-NEXT: csel w8, w0, w1, hi -; CHECK-NEXT: csel w9, w1, w0, hi -; CHECK-NEXT: sub w0, w8, w9 +; CHECK-NEXT: sub w8, w1, w0 +; CHECK-NEXT: subs w9, w0, w1 +; CHECK-NEXT: csel w0, w9, w8, hi ; CHECK-NEXT: ret %cmp = icmp ugt i32 %a, %b %ab = select i1 %cmp, i32 %a, i32 %b @@ -454,10 +451,9 @@ define i32 @abd_select_i32(i32 %a, i32 %b) nounwind { define i64 @abd_select_i64(i64 %a, i64 %b) nounwind { ; CHECK-LABEL: abd_select_i64: ; CHECK: // %bb.0: -; CHECK-NEXT: cmp x0, x1 -; CHECK-NEXT: csel x8, x0, x1, hs -; CHECK-NEXT: csel x9, x1, x0, hs -; CHECK-NEXT: sub x0, x8, x9 +; CHECK-NEXT: sub x8, x1, x0 +; CHECK-NEXT: subs x9, x0, x1 +; CHECK-NEXT: csel x0, x9, x8, hi ; CHECK-NEXT: ret %cmp = icmp uge i64 %a, %b %ab = select i1 %cmp, i64 %a, i64 %b @@ -469,14 +465,14 @@ define i64 @abd_select_i64(i64 %a, i64 %b) nounwind { define i128 @abd_select_i128(i128 %a, i128 %b) nounwind { ; CHECK-LABEL: abd_select_i128: ; CHECK: // %bb.0: -; CHECK-NEXT: cmp x0, x2 -; CHECK-NEXT: sbcs xzr, x1, x3 -; CHECK-NEXT: csel x8, x0, x2, lo -; CHECK-NEXT: csel x9, x2, x0, lo -; CHECK-NEXT: csel x10, x1, x3, lo -; CHECK-NEXT: csel x11, x3, x1, lo -; CHECK-NEXT: subs x0, x9, x8 -; CHECK-NEXT: sbc x1, x11, x10 +; CHECK-NEXT: subs x8, x0, x2 +; CHECK-NEXT: sbcs x9, x1, x3 +; CHECK-NEXT: cset w10, lo +; CHECK-NEXT: sbfx x10, x10, #0, #1 +; CHECK-NEXT: eor x8, x8, x10 +; CHECK-NEXT: eor x9, x9, x10 +; CHECK-NEXT: subs x0, x8, x10 +; CHECK-NEXT: sbc x1, x9, x10 ; CHECK-NEXT: ret %cmp = icmp ult i128 %a, %b %ab = select i1 %cmp, i128 %a, i128 %b diff --git a/llvm/test/CodeGen/AArch64/midpoint-int.ll b/llvm/test/CodeGen/AArch64/midpoint-int.ll index 1043fa5c4565e..bbdce7c6e933b 100644 --- a/llvm/test/CodeGen/AArch64/midpoint-int.ll +++ b/llvm/test/CodeGen/AArch64/midpoint-int.ll @@ -13,12 +13,11 @@ define i32 @scalar_i32_signed_reg_reg(i32 %a1, i32 %a2) nounwind { ; CHECK-LABEL: scalar_i32_signed_reg_reg: ; CHECK: // %bb.0: -; CHECK-NEXT: cmp w0, w1 +; CHECK-NEXT: sub w9, w1, w0 +; CHECK-NEXT: subs w10, w0, w1 ; CHECK-NEXT: mov w8, #-1 // =0xffffffff -; CHECK-NEXT: csel w9, w1, w0, gt -; CHECK-NEXT: csel w10, w0, w1, gt +; CHECK-NEXT: csel w9, w10, w9, gt ; CHECK-NEXT: cneg w8, w8, le -; CHECK-NEXT: sub w9, w10, w9 ; CHECK-NEXT: lsr w9, w9, #1 ; CHECK-NEXT: madd w0, w9, w8, w0 ; CHECK-NEXT: ret @@ -36,12 +35,11 @@ define i32 @scalar_i32_signed_reg_reg(i32 %a1, i32 %a2) nounwind { define i32 @scalar_i32_unsigned_reg_reg(i32 %a1, i32 %a2) nounwind { ; CHECK-LABEL: scalar_i32_unsigned_reg_reg: ; CHECK: // %bb.0: -; CHECK-NEXT: cmp w0, w1 +; CHECK-NEXT: sub w9, w1, w0 +; CHECK-NEXT: subs w10, w0, w1 ; CHECK-NEXT: mov w8, #-1 // =0xffffffff -; CHECK-NEXT: csel w9, w1, w0, hi -; CHECK-NEXT: csel w10, w0, w1, hi +; CHECK-NEXT: csel w9, w10, w9, hi ; CHECK-NEXT: cneg w8, w8, ls -; CHECK-NEXT: sub w9, w10, w9 ; CHECK-NEXT: lsr w9, w9, #1 ; CHECK-NEXT: madd w0, w9, w8, w0 ; CHECK-NEXT: ret @@ -64,10 +62,10 @@ define i32 @scalar_i32_signed_mem_reg(ptr %a1_addr, i32 %a2) nounwind { ; CHECK-NEXT: ldr w9, [x0] ; CHECK-NEXT: mov w8, #-1 // =0xffffffff ; CHECK-NEXT: cmp w9, w1 -; CHECK-NEXT: csel w10, w1, w9, gt -; CHECK-NEXT: csel w11, w9, w1, gt +; CHECK-NEXT: sub w10, w1, w9 ; CHECK-NEXT: cneg w8, w8, le -; CHECK-NEXT: sub w10, w11, w10 +; CHECK-NEXT: subs w11, w9, w1 +; CHECK-NEXT: csel w10, w11, w10, gt ; CHECK-NEXT: lsr w10, w10, #1 ; CHECK-NEXT: madd w0, w10, w8, w9 ; CHECK-NEXT: ret @@ -89,10 +87,10 @@ define i32 @scalar_i32_signed_reg_mem(i32 %a1, ptr %a2_addr) nounwind { ; CHECK-NEXT: ldr w9, [x1] ; CHECK-NEXT: mov w8, #-1 // =0xffffffff ; CHECK-NEXT: cmp w0, w9 -; CHECK-NEXT: csel w10, w9, w0, gt -; CHECK-NEXT: csel w9, w0, w9, gt +; CHECK-NEXT: sub w10, w9, w0 ; CHECK-NEXT: cneg w8, w8, le -; CHECK-NEXT: sub w9, w9, w10 +; CHECK-NEXT: subs w9, w0, w9 +; CHECK-NEXT: csel w9, w9, w10, gt ; CHECK-NEXT: lsr w9, w9, #1 ; CHECK-NEXT: madd w0, w9, w8, w0 ; CHECK-NEXT: ret @@ -115,10 +113,10 @@ define i32 @scalar_i32_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind { ; CHECK-NEXT: ldr w10, [x1] ; CHECK-NEXT: mov w8, #-1 // =0xffffffff ; CHECK-NEXT: cmp w9, w10 -; CHECK-NEXT: csel w11, w10, w9, gt -; CHECK-NEXT: csel w10, w9, w10, gt +; CHECK-NEXT: sub w11, w10, w9 ; CHECK-NEXT: cneg w8, w8, le -; CHECK-NEXT: sub w10, w10, w11 +; CHECK-NEXT: subs w10, w9, w10 +; CHECK-NEXT: csel w10, w10, w11, gt ; CHECK-NEXT: lsr w10, w10, #1 ; CHECK-NEXT: madd w0, w10, w8, w9 ; CHECK-NEXT: ret @@ -144,12 +142,11 @@ define i32 @scalar_i32_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind { define i64 @scalar_i64_signed_reg_reg(i64 %a1, i64 %a2) nounwind { ; CHECK-LABEL: scalar_i64_signed_reg_reg: ; CHECK: // %bb.0: -; CHECK-NEXT: cmp x0, x1 +; CHECK-NEXT: sub x9, x1, x0 +; CHECK-NEXT: subs x10, x0, x1 ; CHECK-NEXT: mov x8, #-1 // =0xffffffffffffffff -; CHECK-NEXT: csel x9, x1, x0, gt -; CHECK-NEXT: csel x10, x0, x1, gt +; CHECK-NEXT: csel x9, x10, x9, gt ; CHECK-NEXT: cneg x8, x8, le -; CHECK-NEXT: sub x9, x10, x9 ; CHECK-NEXT: lsr x9, x9, #1 ; CHECK-NEXT: madd x0, x9, x8, x0 ; CHECK-NEXT: ret @@ -167,12 +164,11 @@ define i64 @scalar_i64_signed_reg_reg(i64 %a1, i64 %a2) nounwind { define i64 @scalar_i64_unsigned_reg_reg(i64 %a1, i64 %a2) nounwind { ; CHECK-LABEL: scalar_i64_unsigned_reg_reg: ; CHECK: // %bb.0: -; CHECK-NEXT: cmp x0, x1 +; CHECK-NEXT: sub x9, x1, x0 +; CHECK-NEXT: subs x10, x0, x1 ; CHECK-NEXT: mov x8, #-1 // =0xffffffffffffffff -; CHECK-NEXT: csel x9, x1, x0, hi -; CHECK-NEXT: csel x10, x0, x1, hi +; CHECK-NEXT: csel x9, x10, x9, hi ; CHECK-NEXT: cneg x8, x8, ls -; CHECK-NEXT: sub x9, x10, x9 ; CHECK-NEXT: lsr x9, x9, #1 ; CHECK-NEXT: madd x0, x9, x8, x0 ; CHECK-NEXT: ret @@ -195,10 +191,10 @@ define i64 @scalar_i64_signed_mem_reg(ptr %a1_addr, i64 %a2) nounwind { ; CHECK-NEXT: ldr x9, [x0] ; CHECK-NEXT: mov x8, #-1 // =0xffffffffffffffff ; CHECK-NEXT: cmp x9, x1 -; CHECK-NEXT: csel x10, x1, x9, gt -; CHECK-NEXT: csel x11, x9, x1, gt +; CHECK-NEXT: sub x10, x1, x9 ; CHECK-NEXT: cneg x8, x8, le -; CHECK-NEXT: sub x10, x11, x10 +; CHECK-NEXT: subs x11, x9, x1 +; CHECK-NEXT: csel x10, x11, x10, gt ; CHECK-NEXT: lsr x10, x10, #1 ; CHECK-NEXT: madd x0, x10, x8, x9 ; CHECK-NEXT: ret @@ -220,10 +216,10 @@ define i64 @scalar_i64_signed_reg_mem(i64 %a1, ptr %a2_addr) nounwind { ; CHECK-NEXT: ldr x9, [x1] ; CHECK-NEXT: mov x8, #-1 // =0xffffffffffffffff ; CHECK-NEXT: cmp x0, x9 -; CHECK-NEXT: csel x10, x9, x0, gt -; CHECK-NEXT: csel x9, x0, x9, gt +; CHECK-NEXT: sub x10, x9, x0 ; CHECK-NEXT: cneg x8, x8, le -; CHECK-NEXT: sub x9, x9, x10 +; CHECK-NEXT: subs x9, x0, x9 +; CHECK-NEXT: csel x9, x9, x10, gt ; CHECK-NEXT: lsr x9, x9, #1 ; CHECK-NEXT: madd x0, x9, x8, x0 ; CHECK-NEXT: ret @@ -246,10 +242,10 @@ define i64 @scalar_i64_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind { ; CHECK-NEXT: ldr x10, [x1] ; CHECK-NEXT: mov x8, #-1 // =0xffffffffffffffff ; CHECK-NEXT: cmp x9, x10 -; CHECK-NEXT: csel x11, x10, x9, gt -; CHECK-NEXT: csel x10, x9, x10, gt +; CHECK-NEXT: sub x11, x10, x9 ; CHECK-NEXT: cneg x8, x8, le -; CHECK-NEXT: sub x10, x10, x11 +; CHECK-NEXT: subs x10, x9, x10 +; CHECK-NEXT: csel x10, x10, x11, gt ; CHECK-NEXT: lsr x10, x10, #1 ; CHECK-NEXT: madd x0, x10, x8, x9 ; CHECK-NEXT: ret @@ -275,14 +271,13 @@ define i64 @scalar_i64_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind { define i16 @scalar_i16_signed_reg_reg(i16 %a1, i16 %a2) nounwind { ; CHECK-LABEL: scalar_i16_signed_reg_reg: ; CHECK: // %bb.0: -; CHECK-NEXT: sxth w9, w0 +; CHECK-NEXT: sxth w9, w1 +; CHECK-NEXT: sxth w10, w0 ; CHECK-NEXT: mov w8, #-1 // =0xffffffff -; CHECK-NEXT: cmp w9, w1, sxth -; CHECK-NEXT: csel w9, w1, w0, gt -; CHECK-NEXT: csel w10, w0, w1, gt +; CHECK-NEXT: subs w9, w10, w9 +; CHECK-NEXT: cneg w9, w9, mi ; CHECK-NEXT: cneg w8, w8, le -; CHECK-NEXT: sub w9, w10, w9 -; CHECK-NEXT: ubfx w9, w9, #1, #15 +; CHECK-NEXT: lsr w9, w9, #1 ; CHECK-NEXT: madd w0, w9, w8, w0 ; CHECK-NEXT: ret %t3 = icmp sgt i16 %a1, %a2 ; signed @@ -299,14 +294,13 @@ define i16 @scalar_i16_signed_reg_reg(i16 %a1, i16 %a2) nounwind { define i16 @scalar_i16_unsigned_reg_reg(i16 %a1, i16 %a2) nounwind { ; CHECK-LABEL: scalar_i16_unsigned_reg_reg: ; CHECK: // %bb.0: -; CHECK-NEXT: and w9, w0, #0xffff +; CHECK-NEXT: and w9, w1, #0xffff +; CHECK-NEXT: and w10, w0, #0xffff ; CHECK-NEXT: mov w8, #-1 // =0xffffffff -; CHECK-NEXT: cmp w9, w1, uxth -; CHECK-NEXT: csel w9, w1, w0, hi -; CHECK-NEXT: csel w10, w0, w1, hi +; CHECK-NEXT: subs w9, w10, w9 +; CHECK-NEXT: cneg w9, w9, mi ; CHECK-NEXT: cneg w8, w8, ls -; CHECK-NEXT: sub w9, w10, w9 -; CHECK-NEXT: ubfx w9, w9, #1, #15 +; CHECK-NEXT: lsr w9, w9, #1 ; CHECK-NEXT: madd w0, w9, w8, w0 ; CHECK-NEXT: ret %t3 = icmp ugt i16 %a1, %a2 @@ -325,15 +319,14 @@ define i16 @scalar_i16_unsigned_reg_reg(i16 %a1, i16 %a2) nounwind { define i16 @scalar_i16_signed_mem_reg(ptr %a1_addr, i16 %a2) nounwind { ; CHECK-LABEL: scalar_i16_signed_mem_reg: ; CHECK: // %bb.0: -; CHECK-NEXT: ldrsh w9, [x0] +; CHECK-NEXT: sxth w9, w1 +; CHECK-NEXT: ldrsh w10, [x0] ; CHECK-NEXT: mov w8, #-1 // =0xffffffff -; CHECK-NEXT: cmp w9, w1, sxth -; CHECK-NEXT: csel w10, w1, w9, gt -; CHECK-NEXT: csel w11, w9, w1, gt +; CHECK-NEXT: subs w9, w10, w9 +; CHECK-NEXT: cneg w9, w9, mi ; CHECK-NEXT: cneg w8, w8, le -; CHECK-NEXT: sub w10, w11, w10 -; CHECK-NEXT: ubfx w10, w10, #1, #15 -; CHECK-NEXT: madd w0, w10, w8, w9 +; CHECK-NEXT: lsr w9, w9, #1 +; CHECK-NEXT: madd w0, w9, w8, w10 ; CHECK-NEXT: ret %a1 = load i16, ptr %a1_addr %t3 = icmp sgt i16 %a1, %a2 ; signed @@ -353,12 +346,10 @@ define i16 @scalar_i16_signed_reg_mem(i16 %a1, ptr %a2_addr) nounwind { ; CHECK-NEXT: sxth w9, w0 ; CHECK-NEXT: ldrsh w10, [x1] ; CHECK-NEXT: mov w8, #-1 // =0xffffffff -; CHECK-NEXT: cmp w9, w10 -; CHECK-NEXT: csel w9, w10, w0, gt -; CHECK-NEXT: csel w10, w0, w10, gt +; CHECK-NEXT: subs w9, w9, w10 +; CHECK-NEXT: cneg w9, w9, mi ; CHECK-NEXT: cneg w8, w8, le -; CHECK-NEXT: sub w9, w10, w9 -; CHECK-NEXT: ubfx w9, w9, #1, #15 +; CHECK-NEXT: lsr w9, w9, #1 ; CHECK-NEXT: madd w0, w9, w8, w0 ; CHECK-NEXT: ret %a2 = load i16, ptr %a2_addr @@ -379,12 +370,10 @@ define i16 @scalar_i16_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind { ; CHECK-NEXT: ldrsh w9, [x0] ; CHECK-NEXT: ldrsh w10, [x1] ; CHECK-NEXT: mov w8, #-1 // =0xffffffff -; CHECK-NEXT: cmp w9, w10 -; CHECK-NEXT: csel w11, w10, w9, gt -; CHECK-NEXT: csel w10, w9, w10, gt +; CHECK-NEXT: subs w10, w9, w10 +; CHECK-NEXT: cneg w10, w10, mi ; CHECK-NEXT: cneg w8, w8, le -; CHECK-NEXT: sub w10, w10, w11 -; CHECK-NEXT: ubfx w10, w10, #1, #15 +; CHECK-NEXT: lsr w10, w10, #1 ; CHECK-NEXT: madd w0, w10, w8, w9 ; CHECK-NEXT: ret %a1 = load i16, ptr %a1_addr @@ -409,14 +398,13 @@ define i16 @scalar_i16_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind { define i8 @scalar_i8_signed_reg_reg(i8 %a1, i8 %a2) nounwind { ; CHECK-LABEL: scalar_i8_signed_reg_reg: ; CHECK: // %bb.0: -; CHECK-NEXT: sxtb w9, w0 +; CHECK-NEXT: sxtb w9, w1 +; CHECK-NEXT: sxtb w10, w0 ; CHECK-NEXT: mov w8, #-1 // =0xffffffff -; CHECK-NEXT: cmp w9, w1, sxtb -; CHECK-NEXT: csel w9, w1, w0, gt -; CHECK-NEXT: csel w10, w0, w1, gt +; CHECK-NEXT: subs w9, w10, w9 +; CHECK-NEXT: cneg w9, w9, mi ; CHECK-NEXT: cneg w8, w8, le -; CHECK-NEXT: sub w9, w10, w9 -; CHECK-NEXT: ubfx w9, w9, #1, #7 +; CHECK-NEXT: lsr w9, w9, #1 ; CHECK-NEXT: madd w0, w9, w8, w0 ; CHECK-NEXT: ret %t3 = icmp sgt i8 %a1, %a2 ; signed @@ -433,14 +421,13 @@ define i8 @scalar_i8_signed_reg_reg(i8 %a1, i8 %a2) nounwind { define i8 @scalar_i8_unsigned_reg_reg(i8 %a1, i8 %a2) nounwind { ; CHECK-LABEL: scalar_i8_unsigned_reg_reg: ; CHECK: // %bb.0: -; CHECK-NEXT: and w9, w0, #0xff +; CHECK-NEXT: and w9, w1, #0xff +; CHECK-NEXT: and w10, w0, #0xff ; CHECK-NEXT: mov w8, #-1 // =0xffffffff -; CHECK-NEXT: cmp w9, w1, uxtb -; CHECK-NEXT: csel w9, w1, w0, hi -; CHECK-NEXT: csel w10, w0, w1, hi +; CHECK-NEXT: subs w9, w10, w9 +; CHECK-NEXT: cneg w9, w9, mi ; CHECK-NEXT: cneg w8, w8, ls -; CHECK-NEXT: sub w9, w10, w9 -; CHECK-NEXT: ubfx w9, w9, #1, #7 +; CHECK-NEXT: lsr w9, w9, #1 ; CHECK-NEXT: madd w0, w9, w8, w0 ; CHECK-NEXT: ret %t3 = icmp ugt i8 %a1, %a2 @@ -459,15 +446,14 @@ define i8 @scalar_i8_unsigned_reg_reg(i8 %a1, i8 %a2) nounwind { define i8 @scalar_i8_signed_mem_reg(ptr %a1_addr, i8 %a2) nounwind { ; CHECK-LABEL: scalar_i8_signed_mem_reg: ; CHECK: // %bb.0: -; CHECK-NEXT: ldrsb w9, [x0] +; CHECK-NEXT: sxtb w9, w1 +; CHECK-NEXT: ldrsb w10, [x0] ; CHECK-NEXT: mov w8, #-1 // =0xffffffff -; CHECK-NEXT: cmp w9, w1, sxtb -; CHECK-NEXT: csel w10, w1, w9, gt -; CHECK-NEXT: csel w11, w9, w1, gt +; CHECK-NEXT: subs w9, w10, w9 +; CHECK-NEXT: cneg w9, w9, mi ; CHECK-NEXT: cneg w8, w8, le -; CHECK-NEXT: sub w10, w11, w10 -; CHECK-NEXT: ubfx w10, w10, #1, #7 -; CHECK-NEXT: madd w0, w10, w8, w9 +; CHECK-NEXT: lsr w9, w9, #1 +; CHECK-NEXT: madd w0, w9, w8, w10 ; CHECK-NEXT: ret %a1 = load i8, ptr %a1_addr %t3 = icmp sgt i8 %a1, %a2 ; signed @@ -487,12 +473,10 @@ define i8 @scalar_i8_signed_reg_mem(i8 %a1, ptr %a2_addr) nounwind { ; CHECK-NEXT: sxtb w9, w0 ; CHECK-NEXT: ldrsb w10, [x1] ; CHECK-NEXT: mov w8, #-1 // =0xffffffff -; CHECK-NEXT: cmp w9, w10 -; CHECK-NEXT: csel w9, w10, w0, gt -; CHECK-NEXT: csel w10, w0, w10, gt +; CHECK-NEXT: subs w9, w9, w10 +; CHECK-NEXT: cneg w9, w9, mi ; CHECK-NEXT: cneg w8, w8, le -; CHECK-NEXT: sub w9, w10, w9 -; CHECK-NEXT: ubfx w9, w9, #1, #7 +; CHECK-NEXT: lsr w9, w9, #1 ; CHECK-NEXT: madd w0, w9, w8, w0 ; CHECK-NEXT: ret %a2 = load i8, ptr %a2_addr @@ -513,12 +497,10 @@ define i8 @scalar_i8_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind { ; CHECK-NEXT: ldrsb w9, [x0] ; CHECK-NEXT: ldrsb w10, [x1] ; CHECK-NEXT: mov w8, #-1 // =0xffffffff -; CHECK-NEXT: cmp w9, w10 -; CHECK-NEXT: csel w11, w10, w9, gt -; CHECK-NEXT: csel w10, w9, w10, gt +; CHECK-NEXT: subs w10, w9, w10 +; CHECK-NEXT: cneg w10, w10, mi ; CHECK-NEXT: cneg w8, w8, le -; CHECK-NEXT: sub w10, w10, w11 -; CHECK-NEXT: ubfx w10, w10, #1, #7 +; CHECK-NEXT: lsr w10, w10, #1 ; CHECK-NEXT: madd w0, w10, w8, w9 ; CHECK-NEXT: ret %a1 = load i8, ptr %a1_addr diff --git a/llvm/test/CodeGen/RISCV/abds.ll b/llvm/test/CodeGen/RISCV/abds.ll index e639d4b30d4c9..8208eafbc205c 100644 --- a/llvm/test/CodeGen/RISCV/abds.ll +++ b/llvm/test/CodeGen/RISCV/abds.ll @@ -2348,30 +2348,26 @@ define i32 @abd_sub_i32(i32 %a, i32 %b) nounwind { define i8 @abd_select_i8(i8 %a, i8 %b) nounwind { ; RV32I-LABEL: abd_select_i8: ; RV32I: # %bb.0: -; RV32I-NEXT: slli a2, a1, 24 -; RV32I-NEXT: srai a2, a2, 24 -; RV32I-NEXT: slli a3, a0, 24 -; RV32I-NEXT: srai a3, a3, 24 -; RV32I-NEXT: blt a3, a2, .LBB34_2 -; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: slli a1, a1, 24 +; RV32I-NEXT: srai a1, a1, 24 +; RV32I-NEXT: slli a0, a0, 24 +; RV32I-NEXT: srai a0, a0, 24 +; RV32I-NEXT: sub a0, a0, a1 +; RV32I-NEXT: srai a1, a0, 31 +; RV32I-NEXT: xor a0, a0, a1 ; RV32I-NEXT: sub a0, a0, a1 -; RV32I-NEXT: ret -; RV32I-NEXT: .LBB34_2: -; RV32I-NEXT: sub a0, a1, a0 ; RV32I-NEXT: ret ; ; RV64I-LABEL: abd_select_i8: ; RV64I: # %bb.0: -; RV64I-NEXT: slli a2, a1, 56 -; RV64I-NEXT: srai a2, a2, 56 -; RV64I-NEXT: slli a3, a0, 56 -; RV64I-NEXT: srai a3, a3, 56 -; RV64I-NEXT: blt a3, a2, .LBB34_2 -; RV64I-NEXT: # %bb.1: +; RV64I-NEXT: slli a1, a1, 56 +; RV64I-NEXT: srai a1, a1, 56 +; RV64I-NEXT: slli a0, a0, 56 +; RV64I-NEXT: srai a0, a0, 56 +; RV64I-NEXT: sub a0, a0, a1 +; RV64I-NEXT: srai a1, a0, 63 +; RV64I-NEXT: xor a0, a0, a1 ; RV64I-NEXT: sub a0, a0, a1 -; RV64I-NEXT: ret -; RV64I-NEXT: .LBB34_2: -; RV64I-NEXT: sub a0, a1, a0 ; RV64I-NEXT: ret ; ; ZBB-LABEL: abd_select_i8: @@ -2392,30 +2388,26 @@ define i8 @abd_select_i8(i8 %a, i8 %b) nounwind { define i16 @abd_select_i16(i16 %a, i16 %b) nounwind { ; RV32I-LABEL: abd_select_i16: ; RV32I: # %bb.0: -; RV32I-NEXT: slli a2, a0, 16 -; RV32I-NEXT: srai a2, a2, 16 -; RV32I-NEXT: slli a3, a1, 16 -; RV32I-NEXT: srai a3, a3, 16 -; RV32I-NEXT: bge a3, a2, .LBB35_2 -; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: slli a1, a1, 16 +; RV32I-NEXT: srai a1, a1, 16 +; RV32I-NEXT: slli a0, a0, 16 +; RV32I-NEXT: srai a0, a0, 16 +; RV32I-NEXT: sub a0, a0, a1 +; RV32I-NEXT: srai a1, a0, 31 +; RV32I-NEXT: xor a0, a0, a1 ; RV32I-NEXT: sub a0, a0, a1 -; RV32I-NEXT: ret -; RV32I-NEXT: .LBB35_2: -; RV32I-NEXT: sub a0, a1, a0 ; RV32I-NEXT: ret ; ; RV64I-LABEL: abd_select_i16: ; RV64I: # %bb.0: -; RV64I-NEXT: slli a2, a0, 48 -; RV64I-NEXT: srai a2, a2, 48 -; RV64I-NEXT: slli a3, a1, 48 -; RV64I-NEXT: srai a3, a3, 48 -; RV64I-NEXT: bge a3, a2, .LBB35_2 -; RV64I-NEXT: # %bb.1: +; RV64I-NEXT: slli a1, a1, 48 +; RV64I-NEXT: srai a1, a1, 48 +; RV64I-NEXT: slli a0, a0, 48 +; RV64I-NEXT: srai a0, a0, 48 +; RV64I-NEXT: sub a0, a0, a1 +; RV64I-NEXT: srai a1, a0, 63 +; RV64I-NEXT: xor a0, a0, a1 ; RV64I-NEXT: sub a0, a0, a1 -; RV64I-NEXT: ret -; RV64I-NEXT: .LBB35_2: -; RV64I-NEXT: sub a0, a1, a0 ; RV64I-NEXT: ret ; ; ZBB-LABEL: abd_select_i16: @@ -2446,14 +2438,12 @@ define i32 @abd_select_i32(i32 %a, i32 %b) nounwind { ; ; RV64I-LABEL: abd_select_i32: ; RV64I: # %bb.0: -; RV64I-NEXT: sext.w a2, a0 -; RV64I-NEXT: sext.w a3, a1 -; RV64I-NEXT: blt a3, a2, .LBB36_2 -; RV64I-NEXT: # %bb.1: -; RV64I-NEXT: subw a0, a1, a0 -; RV64I-NEXT: ret -; RV64I-NEXT: .LBB36_2: -; RV64I-NEXT: subw a0, a0, a1 +; RV64I-NEXT: sext.w a1, a1 +; RV64I-NEXT: sext.w a0, a0 +; RV64I-NEXT: sub a0, a0, a1 +; RV64I-NEXT: srai a1, a0, 63 +; RV64I-NEXT: xor a0, a0, a1 +; RV64I-NEXT: sub a0, a0, a1 ; RV64I-NEXT: ret ; ; RV32ZBB-LABEL: abd_select_i32: @@ -2481,32 +2471,28 @@ define i32 @abd_select_i32(i32 %a, i32 %b) nounwind { define i64 @abd_select_i64(i64 %a, i64 %b) nounwind { ; RV32I-LABEL: abd_select_i64: ; RV32I: # %bb.0: -; RV32I-NEXT: beq a1, a3, .LBB37_3 +; RV32I-NEXT: sltu a4, a2, a0 +; RV32I-NEXT: mv a5, a4 +; RV32I-NEXT: beq a1, a3, .LBB37_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: slt a4, a1, a3 -; RV32I-NEXT: bnez a4, .LBB37_4 +; RV32I-NEXT: slt a5, a3, a1 ; RV32I-NEXT: .LBB37_2: -; RV32I-NEXT: mv a4, a1 -; RV32I-NEXT: mv a5, a0 -; RV32I-NEXT: mv a1, a3 -; RV32I-NEXT: mv a0, a2 -; RV32I-NEXT: j .LBB37_5 -; RV32I-NEXT: .LBB37_3: -; RV32I-NEXT: sltu a4, a0, a2 -; RV32I-NEXT: beqz a4, .LBB37_2 +; RV32I-NEXT: bnez a5, .LBB37_4 +; RV32I-NEXT: # %bb.3: +; RV32I-NEXT: sub a1, a3, a1 +; RV32I-NEXT: sub a1, a1, a4 +; RV32I-NEXT: sub a0, a2, a0 +; RV32I-NEXT: ret ; RV32I-NEXT: .LBB37_4: -; RV32I-NEXT: mv a4, a3 -; RV32I-NEXT: mv a5, a2 -; RV32I-NEXT: .LBB37_5: -; RV32I-NEXT: sltu a2, a5, a0 -; RV32I-NEXT: sub a1, a4, a1 -; RV32I-NEXT: sub a1, a1, a2 -; RV32I-NEXT: sub a0, a5, a0 +; RV32I-NEXT: sltu a4, a0, a2 +; RV32I-NEXT: sub a1, a1, a3 +; RV32I-NEXT: sub a1, a1, a4 +; RV32I-NEXT: sub a0, a0, a2 ; RV32I-NEXT: ret ; ; RV64I-LABEL: abd_select_i64: ; RV64I: # %bb.0: -; RV64I-NEXT: bge a0, a1, .LBB37_2 +; RV64I-NEXT: blt a1, a0, .LBB37_2 ; RV64I-NEXT: # %bb.1: ; RV64I-NEXT: sub a0, a1, a0 ; RV64I-NEXT: ret @@ -2551,97 +2537,98 @@ define i64 @abd_select_i64(i64 %a, i64 %b) nounwind { define i128 @abd_select_i128(i128 %a, i128 %b) nounwind { ; RV32I-LABEL: abd_select_i128: ; RV32I: # %bb.0: -; RV32I-NEXT: lw a7, 4(a2) -; RV32I-NEXT: lw a6, 8(a2) -; RV32I-NEXT: lw t0, 12(a2) -; RV32I-NEXT: lw a5, 12(a1) -; RV32I-NEXT: lw a3, 4(a1) -; RV32I-NEXT: lw a4, 8(a1) -; RV32I-NEXT: beq a5, t0, .LBB38_2 +; RV32I-NEXT: lw a3, 0(a1) +; RV32I-NEXT: lw a4, 4(a1) +; RV32I-NEXT: lw a6, 8(a1) +; RV32I-NEXT: lw t0, 12(a1) +; RV32I-NEXT: lw a7, 8(a2) +; RV32I-NEXT: lw t1, 12(a2) +; RV32I-NEXT: lw a5, 0(a2) +; RV32I-NEXT: lw a1, 4(a2) +; RV32I-NEXT: sltu a2, a7, a6 +; RV32I-NEXT: mv t4, a2 +; RV32I-NEXT: beq t0, t1, .LBB38_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: slt t1, a5, t0 -; RV32I-NEXT: j .LBB38_3 +; RV32I-NEXT: slt t4, t1, t0 ; RV32I-NEXT: .LBB38_2: -; RV32I-NEXT: sltu t1, a4, a6 -; RV32I-NEXT: .LBB38_3: -; RV32I-NEXT: lw t3, 0(a2) -; RV32I-NEXT: lw a1, 0(a1) -; RV32I-NEXT: beq a3, a7, .LBB38_5 -; RV32I-NEXT: # %bb.4: -; RV32I-NEXT: sltu a2, a3, a7 -; RV32I-NEXT: j .LBB38_6 -; RV32I-NEXT: .LBB38_5: -; RV32I-NEXT: sltu a2, a1, t3 +; RV32I-NEXT: sltu t2, a5, a3 +; RV32I-NEXT: sltu t5, a1, a4 +; RV32I-NEXT: mv t3, t2 +; RV32I-NEXT: beq a4, a1, .LBB38_4 +; RV32I-NEXT: # %bb.3: +; RV32I-NEXT: mv t3, t5 +; RV32I-NEXT: .LBB38_4: +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw s0, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: xor t6, t0, t1 +; RV32I-NEXT: xor s0, a6, a7 +; RV32I-NEXT: or t6, s0, t6 +; RV32I-NEXT: beqz t6, .LBB38_6 +; RV32I-NEXT: # %bb.5: +; RV32I-NEXT: mv t3, t4 ; RV32I-NEXT: .LBB38_6: -; RV32I-NEXT: xor t2, a5, t0 -; RV32I-NEXT: xor t4, a4, a6 -; RV32I-NEXT: or t2, t4, t2 -; RV32I-NEXT: beqz t2, .LBB38_8 +; RV32I-NEXT: mv t4, t2 +; RV32I-NEXT: beq a1, a4, .LBB38_8 ; RV32I-NEXT: # %bb.7: -; RV32I-NEXT: mv a2, t1 +; RV32I-NEXT: mv t4, t5 ; RV32I-NEXT: .LBB38_8: -; RV32I-NEXT: bnez a2, .LBB38_10 +; RV32I-NEXT: sltu t5, a3, a5 +; RV32I-NEXT: mv t6, t5 +; RV32I-NEXT: beq a4, a1, .LBB38_10 ; RV32I-NEXT: # %bb.9: -; RV32I-NEXT: mv a2, t3 -; RV32I-NEXT: mv t1, a7 -; RV32I-NEXT: mv t4, t0 -; RV32I-NEXT: mv t2, a6 -; RV32I-NEXT: j .LBB38_11 +; RV32I-NEXT: sltu t6, a4, a1 ; RV32I-NEXT: .LBB38_10: -; RV32I-NEXT: mv a2, a1 -; RV32I-NEXT: mv t1, a3 -; RV32I-NEXT: mv t4, a5 -; RV32I-NEXT: mv t2, a4 -; RV32I-NEXT: mv a1, t3 -; RV32I-NEXT: mv a3, a7 -; RV32I-NEXT: mv a5, t0 -; RV32I-NEXT: mv a4, a6 -; RV32I-NEXT: .LBB38_11: -; RV32I-NEXT: sltu a6, a4, t2 -; RV32I-NEXT: sub a7, a5, t4 -; RV32I-NEXT: sltu a5, a1, a2 +; RV32I-NEXT: bnez t3, .LBB38_12 +; RV32I-NEXT: # %bb.11: +; RV32I-NEXT: sub t0, t1, t0 ; RV32I-NEXT: sub a6, a7, a6 -; RV32I-NEXT: mv a7, a5 -; RV32I-NEXT: beq a3, t1, .LBB38_13 -; RV32I-NEXT: # %bb.12: -; RV32I-NEXT: sltu a7, a3, t1 -; RV32I-NEXT: .LBB38_13: -; RV32I-NEXT: sub a4, a4, t2 -; RV32I-NEXT: sltu t0, a4, a7 -; RV32I-NEXT: sub a6, a6, t0 -; RV32I-NEXT: sub a4, a4, a7 -; RV32I-NEXT: sub a3, a3, t1 +; RV32I-NEXT: sub a2, t0, a2 +; RV32I-NEXT: sltu a7, a6, t4 +; RV32I-NEXT: sub a2, a2, a7 +; RV32I-NEXT: sub a3, a5, a3 +; RV32I-NEXT: sub a1, a1, a4 +; RV32I-NEXT: sub a1, a1, t2 +; RV32I-NEXT: sub a4, a6, t4 +; RV32I-NEXT: j .LBB38_13 +; RV32I-NEXT: .LBB38_12: +; RV32I-NEXT: sltu a2, a6, a7 +; RV32I-NEXT: sub t0, t0, t1 +; RV32I-NEXT: sub a2, t0, a2 +; RV32I-NEXT: sub a6, a6, a7 +; RV32I-NEXT: sltu a7, a6, t6 +; RV32I-NEXT: sub a2, a2, a7 ; RV32I-NEXT: sub a3, a3, a5 -; RV32I-NEXT: sub a1, a1, a2 -; RV32I-NEXT: sw a1, 0(a0) -; RV32I-NEXT: sw a3, 4(a0) +; RV32I-NEXT: sub a4, a4, a1 +; RV32I-NEXT: sub a1, a4, t5 +; RV32I-NEXT: sub a4, a6, t6 +; RV32I-NEXT: .LBB38_13: +; RV32I-NEXT: sw a3, 0(a0) +; RV32I-NEXT: sw a1, 4(a0) ; RV32I-NEXT: sw a4, 8(a0) -; RV32I-NEXT: sw a6, 12(a0) +; RV32I-NEXT: sw a2, 12(a0) +; RV32I-NEXT: lw s0, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; ; RV64I-LABEL: abd_select_i128: ; RV64I: # %bb.0: -; RV64I-NEXT: beq a1, a3, .LBB38_3 +; RV64I-NEXT: sltu a4, a2, a0 +; RV64I-NEXT: mv a5, a4 +; RV64I-NEXT: beq a1, a3, .LBB38_2 ; RV64I-NEXT: # %bb.1: -; RV64I-NEXT: slt a4, a1, a3 -; RV64I-NEXT: beqz a4, .LBB38_4 +; RV64I-NEXT: slt a5, a3, a1 ; RV64I-NEXT: .LBB38_2: -; RV64I-NEXT: mv a4, a1 -; RV64I-NEXT: mv a5, a0 -; RV64I-NEXT: mv a1, a3 -; RV64I-NEXT: mv a0, a2 -; RV64I-NEXT: j .LBB38_5 -; RV64I-NEXT: .LBB38_3: -; RV64I-NEXT: sltu a4, a0, a2 -; RV64I-NEXT: bnez a4, .LBB38_2 +; RV64I-NEXT: bnez a5, .LBB38_4 +; RV64I-NEXT: # %bb.3: +; RV64I-NEXT: sub a1, a3, a1 +; RV64I-NEXT: sub a1, a1, a4 +; RV64I-NEXT: sub a0, a2, a0 +; RV64I-NEXT: ret ; RV64I-NEXT: .LBB38_4: -; RV64I-NEXT: mv a4, a3 -; RV64I-NEXT: mv a5, a2 -; RV64I-NEXT: .LBB38_5: -; RV64I-NEXT: sltu a2, a0, a5 +; RV64I-NEXT: sltu a4, a0, a2 +; RV64I-NEXT: sub a1, a1, a3 ; RV64I-NEXT: sub a1, a1, a4 -; RV64I-NEXT: sub a1, a1, a2 -; RV64I-NEXT: sub a0, a0, a5 +; RV64I-NEXT: sub a0, a0, a2 ; RV64I-NEXT: ret ; ; RV32ZBB-LABEL: abd_select_i128: diff --git a/llvm/test/CodeGen/RISCV/abdu.ll b/llvm/test/CodeGen/RISCV/abdu.ll index a04a800157dbb..814bca98523ce 100644 --- a/llvm/test/CodeGen/RISCV/abdu.ll +++ b/llvm/test/CodeGen/RISCV/abdu.ll @@ -1725,17 +1725,25 @@ define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind { ; define i8 @abd_select_i8(i8 %a, i8 %b) nounwind { -; NOZBB-LABEL: abd_select_i8: -; NOZBB: # %bb.0: -; NOZBB-NEXT: andi a2, a1, 255 -; NOZBB-NEXT: andi a3, a0, 255 -; NOZBB-NEXT: bltu a3, a2, .LBB23_2 -; NOZBB-NEXT: # %bb.1: -; NOZBB-NEXT: sub a0, a0, a1 -; NOZBB-NEXT: ret -; NOZBB-NEXT: .LBB23_2: -; NOZBB-NEXT: sub a0, a1, a0 -; NOZBB-NEXT: ret +; RV32I-LABEL: abd_select_i8: +; RV32I: # %bb.0: +; RV32I-NEXT: andi a1, a1, 255 +; RV32I-NEXT: andi a0, a0, 255 +; RV32I-NEXT: sub a0, a0, a1 +; RV32I-NEXT: srai a1, a0, 31 +; RV32I-NEXT: xor a0, a0, a1 +; RV32I-NEXT: sub a0, a0, a1 +; RV32I-NEXT: ret +; +; RV64I-LABEL: abd_select_i8: +; RV64I: # %bb.0: +; RV64I-NEXT: andi a1, a1, 255 +; RV64I-NEXT: andi a0, a0, 255 +; RV64I-NEXT: sub a0, a0, a1 +; RV64I-NEXT: srai a1, a0, 63 +; RV64I-NEXT: xor a0, a0, a1 +; RV64I-NEXT: sub a0, a0, a1 +; RV64I-NEXT: ret ; ; ZBB-LABEL: abd_select_i8: ; ZBB: # %bb.0: @@ -1757,28 +1765,24 @@ define i16 @abd_select_i16(i16 %a, i16 %b) nounwind { ; RV32I: # %bb.0: ; RV32I-NEXT: lui a2, 16 ; RV32I-NEXT: addi a2, a2, -1 -; RV32I-NEXT: and a3, a0, a2 -; RV32I-NEXT: and a2, a1, a2 -; RV32I-NEXT: bgeu a2, a3, .LBB24_2 -; RV32I-NEXT: # %bb.1: +; RV32I-NEXT: and a1, a1, a2 +; RV32I-NEXT: and a0, a0, a2 +; RV32I-NEXT: sub a0, a0, a1 +; RV32I-NEXT: srai a1, a0, 31 +; RV32I-NEXT: xor a0, a0, a1 ; RV32I-NEXT: sub a0, a0, a1 -; RV32I-NEXT: ret -; RV32I-NEXT: .LBB24_2: -; RV32I-NEXT: sub a0, a1, a0 ; RV32I-NEXT: ret ; ; RV64I-LABEL: abd_select_i16: ; RV64I: # %bb.0: ; RV64I-NEXT: lui a2, 16 ; RV64I-NEXT: addiw a2, a2, -1 -; RV64I-NEXT: and a3, a0, a2 -; RV64I-NEXT: and a2, a1, a2 -; RV64I-NEXT: bgeu a2, a3, .LBB24_2 -; RV64I-NEXT: # %bb.1: +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: sub a0, a0, a1 +; RV64I-NEXT: srai a1, a0, 63 +; RV64I-NEXT: xor a0, a0, a1 ; RV64I-NEXT: sub a0, a0, a1 -; RV64I-NEXT: ret -; RV64I-NEXT: .LBB24_2: -; RV64I-NEXT: sub a0, a1, a0 ; RV64I-NEXT: ret ; ; ZBB-LABEL: abd_select_i16: @@ -1809,14 +1813,14 @@ define i32 @abd_select_i32(i32 %a, i32 %b) nounwind { ; ; RV64I-LABEL: abd_select_i32: ; RV64I: # %bb.0: -; RV64I-NEXT: sext.w a2, a0 -; RV64I-NEXT: sext.w a3, a1 -; RV64I-NEXT: bltu a3, a2, .LBB25_2 -; RV64I-NEXT: # %bb.1: -; RV64I-NEXT: subw a0, a1, a0 -; RV64I-NEXT: ret -; RV64I-NEXT: .LBB25_2: -; RV64I-NEXT: subw a0, a0, a1 +; RV64I-NEXT: slli a1, a1, 32 +; RV64I-NEXT: srli a1, a1, 32 +; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: srli a0, a0, 32 +; RV64I-NEXT: sub a0, a0, a1 +; RV64I-NEXT: srai a1, a0, 63 +; RV64I-NEXT: xor a0, a0, a1 +; RV64I-NEXT: sub a0, a0, a1 ; RV64I-NEXT: ret ; ; RV32ZBB-LABEL: abd_select_i32: @@ -1846,32 +1850,29 @@ define i32 @abd_select_i32(i32 %a, i32 %b) nounwind { define i64 @abd_select_i64(i64 %a, i64 %b) nounwind { ; RV32I-LABEL: abd_select_i64: ; RV32I: # %bb.0: -; RV32I-NEXT: beq a1, a3, .LBB26_3 +; RV32I-NEXT: sltu a4, a0, a2 +; RV32I-NEXT: sub a3, a1, a3 +; RV32I-NEXT: sub a3, a3, a4 +; RV32I-NEXT: sub a2, a0, a2 +; RV32I-NEXT: beq a3, a1, .LBB26_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: sltu a4, a1, a3 -; RV32I-NEXT: bnez a4, .LBB26_4 +; RV32I-NEXT: sltu a0, a1, a3 +; RV32I-NEXT: j .LBB26_3 ; RV32I-NEXT: .LBB26_2: -; RV32I-NEXT: mv a4, a1 -; RV32I-NEXT: mv a5, a0 -; RV32I-NEXT: mv a1, a3 -; RV32I-NEXT: mv a0, a2 -; RV32I-NEXT: j .LBB26_5 +; RV32I-NEXT: sltu a0, a0, a2 ; RV32I-NEXT: .LBB26_3: -; RV32I-NEXT: sltu a4, a0, a2 -; RV32I-NEXT: beqz a4, .LBB26_2 -; RV32I-NEXT: .LBB26_4: -; RV32I-NEXT: mv a4, a3 -; RV32I-NEXT: mv a5, a2 -; RV32I-NEXT: .LBB26_5: -; RV32I-NEXT: sltu a2, a5, a0 -; RV32I-NEXT: sub a1, a4, a1 -; RV32I-NEXT: sub a1, a1, a2 -; RV32I-NEXT: sub a0, a5, a0 +; RV32I-NEXT: neg a1, a0 +; RV32I-NEXT: xor a2, a2, a1 +; RV32I-NEXT: sltu a4, a2, a1 +; RV32I-NEXT: xor a1, a3, a1 +; RV32I-NEXT: add a1, a1, a0 +; RV32I-NEXT: sub a1, a1, a4 +; RV32I-NEXT: add a0, a2, a0 ; RV32I-NEXT: ret ; ; RV64I-LABEL: abd_select_i64: ; RV64I: # %bb.0: -; RV64I-NEXT: bgeu a0, a1, .LBB26_2 +; RV64I-NEXT: bltu a1, a0, .LBB26_2 ; RV64I-NEXT: # %bb.1: ; RV64I-NEXT: sub a0, a1, a0 ; RV64I-NEXT: ret @@ -1917,97 +1918,98 @@ define i64 @abd_select_i64(i64 %a, i64 %b) nounwind { define i128 @abd_select_i128(i128 %a, i128 %b) nounwind { ; RV32I-LABEL: abd_select_i128: ; RV32I: # %bb.0: -; RV32I-NEXT: lw a7, 4(a2) +; RV32I-NEXT: lw a3, 0(a2) +; RV32I-NEXT: lw a5, 4(a2) ; RV32I-NEXT: lw a6, 8(a2) -; RV32I-NEXT: lw t0, 12(a2) -; RV32I-NEXT: lw a5, 12(a1) -; RV32I-NEXT: lw a3, 4(a1) -; RV32I-NEXT: lw a4, 8(a1) -; RV32I-NEXT: beq a5, t0, .LBB27_2 +; RV32I-NEXT: lw a7, 12(a2) +; RV32I-NEXT: lw a2, 8(a1) +; RV32I-NEXT: lw a4, 12(a1) +; RV32I-NEXT: lw t0, 0(a1) +; RV32I-NEXT: lw a1, 4(a1) +; RV32I-NEXT: sltu t1, a2, a6 +; RV32I-NEXT: sub a7, a4, a7 +; RV32I-NEXT: sltu t2, t0, a3 +; RV32I-NEXT: sub a7, a7, t1 +; RV32I-NEXT: mv t1, t2 +; RV32I-NEXT: beq a1, a5, .LBB27_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: sltu t1, a5, t0 -; RV32I-NEXT: j .LBB27_3 +; RV32I-NEXT: sltu t1, a1, a5 ; RV32I-NEXT: .LBB27_2: +; RV32I-NEXT: sub t3, a2, a6 +; RV32I-NEXT: sltu a6, t3, t1 +; RV32I-NEXT: sub a6, a7, a6 +; RV32I-NEXT: sub a7, t3, t1 +; RV32I-NEXT: beq a6, a4, .LBB27_4 +; RV32I-NEXT: # %bb.3: ; RV32I-NEXT: sltu t1, a4, a6 -; RV32I-NEXT: .LBB27_3: -; RV32I-NEXT: lw t3, 0(a2) -; RV32I-NEXT: lw a1, 0(a1) -; RV32I-NEXT: beq a3, a7, .LBB27_5 -; RV32I-NEXT: # %bb.4: -; RV32I-NEXT: sltu a2, a3, a7 -; RV32I-NEXT: j .LBB27_6 +; RV32I-NEXT: j .LBB27_5 +; RV32I-NEXT: .LBB27_4: +; RV32I-NEXT: sltu t1, a2, a7 ; RV32I-NEXT: .LBB27_5: -; RV32I-NEXT: sltu a2, a1, t3 -; RV32I-NEXT: .LBB27_6: -; RV32I-NEXT: xor t2, a5, t0 -; RV32I-NEXT: xor t4, a4, a6 -; RV32I-NEXT: or t2, t4, t2 -; RV32I-NEXT: beqz t2, .LBB27_8 -; RV32I-NEXT: # %bb.7: -; RV32I-NEXT: mv a2, t1 +; RV32I-NEXT: sub a5, a1, a5 +; RV32I-NEXT: sub a5, a5, t2 +; RV32I-NEXT: sub a3, t0, a3 +; RV32I-NEXT: beq a5, a1, .LBB27_7 +; RV32I-NEXT: # %bb.6: +; RV32I-NEXT: sltu a1, a1, a5 +; RV32I-NEXT: j .LBB27_8 +; RV32I-NEXT: .LBB27_7: +; RV32I-NEXT: sltu a1, t0, a3 ; RV32I-NEXT: .LBB27_8: -; RV32I-NEXT: bnez a2, .LBB27_10 +; RV32I-NEXT: xor a4, a6, a4 +; RV32I-NEXT: xor a2, a7, a2 +; RV32I-NEXT: or a2, a2, a4 +; RV32I-NEXT: beqz a2, .LBB27_10 ; RV32I-NEXT: # %bb.9: -; RV32I-NEXT: mv a2, t3 -; RV32I-NEXT: mv t1, a7 -; RV32I-NEXT: mv t4, t0 -; RV32I-NEXT: mv t2, a6 -; RV32I-NEXT: j .LBB27_11 +; RV32I-NEXT: mv a1, t1 ; RV32I-NEXT: .LBB27_10: -; RV32I-NEXT: mv a2, a1 -; RV32I-NEXT: mv t1, a3 -; RV32I-NEXT: mv t4, a5 -; RV32I-NEXT: mv t2, a4 -; RV32I-NEXT: mv a1, t3 -; RV32I-NEXT: mv a3, a7 -; RV32I-NEXT: mv a5, t0 -; RV32I-NEXT: mv a4, a6 -; RV32I-NEXT: .LBB27_11: -; RV32I-NEXT: sltu a6, a4, t2 -; RV32I-NEXT: sub a7, a5, t4 -; RV32I-NEXT: sltu a5, a1, a2 -; RV32I-NEXT: sub a6, a7, a6 -; RV32I-NEXT: mv a7, a5 -; RV32I-NEXT: beq a3, t1, .LBB27_13 -; RV32I-NEXT: # %bb.12: -; RV32I-NEXT: sltu a7, a3, t1 -; RV32I-NEXT: .LBB27_13: -; RV32I-NEXT: sub a4, a4, t2 -; RV32I-NEXT: sltu t0, a4, a7 -; RV32I-NEXT: sub a6, a6, t0 -; RV32I-NEXT: sub a4, a4, a7 -; RV32I-NEXT: sub a3, a3, t1 -; RV32I-NEXT: sub a3, a3, a5 -; RV32I-NEXT: sub a1, a1, a2 +; RV32I-NEXT: neg t0, a1 +; RV32I-NEXT: xor a2, a7, t0 +; RV32I-NEXT: sltu a4, a2, t0 +; RV32I-NEXT: xor a6, a6, t0 +; RV32I-NEXT: add a6, a6, a1 +; RV32I-NEXT: sub a4, a6, a4 +; RV32I-NEXT: xor a3, a3, t0 +; RV32I-NEXT: sltu a6, a3, t0 +; RV32I-NEXT: xor a7, a5, t0 +; RV32I-NEXT: mv t1, a6 +; RV32I-NEXT: beqz a5, .LBB27_12 +; RV32I-NEXT: # %bb.11: +; RV32I-NEXT: sltu t1, a7, t0 +; RV32I-NEXT: .LBB27_12: +; RV32I-NEXT: add a2, a2, a1 +; RV32I-NEXT: sltu a5, a2, t1 +; RV32I-NEXT: sub a4, a4, a5 +; RV32I-NEXT: sub a2, a2, t1 +; RV32I-NEXT: add a7, a7, a1 +; RV32I-NEXT: sub a5, a7, a6 +; RV32I-NEXT: add a1, a3, a1 ; RV32I-NEXT: sw a1, 0(a0) -; RV32I-NEXT: sw a3, 4(a0) -; RV32I-NEXT: sw a4, 8(a0) -; RV32I-NEXT: sw a6, 12(a0) +; RV32I-NEXT: sw a5, 4(a0) +; RV32I-NEXT: sw a2, 8(a0) +; RV32I-NEXT: sw a4, 12(a0) ; RV32I-NEXT: ret ; ; RV64I-LABEL: abd_select_i128: ; RV64I: # %bb.0: -; RV64I-NEXT: beq a1, a3, .LBB27_3 +; RV64I-NEXT: sltu a4, a0, a2 +; RV64I-NEXT: sub a3, a1, a3 +; RV64I-NEXT: sub a3, a3, a4 +; RV64I-NEXT: sub a2, a0, a2 +; RV64I-NEXT: beq a3, a1, .LBB27_2 ; RV64I-NEXT: # %bb.1: -; RV64I-NEXT: sltu a4, a1, a3 -; RV64I-NEXT: beqz a4, .LBB27_4 +; RV64I-NEXT: sltu a0, a1, a3 +; RV64I-NEXT: j .LBB27_3 ; RV64I-NEXT: .LBB27_2: -; RV64I-NEXT: mv a4, a1 -; RV64I-NEXT: mv a5, a0 -; RV64I-NEXT: mv a1, a3 -; RV64I-NEXT: mv a0, a2 -; RV64I-NEXT: j .LBB27_5 +; RV64I-NEXT: sltu a0, a0, a2 ; RV64I-NEXT: .LBB27_3: -; RV64I-NEXT: sltu a4, a0, a2 -; RV64I-NEXT: bnez a4, .LBB27_2 -; RV64I-NEXT: .LBB27_4: -; RV64I-NEXT: mv a4, a3 -; RV64I-NEXT: mv a5, a2 -; RV64I-NEXT: .LBB27_5: -; RV64I-NEXT: sltu a2, a0, a5 +; RV64I-NEXT: neg a1, a0 +; RV64I-NEXT: xor a2, a2, a1 +; RV64I-NEXT: sltu a4, a2, a1 +; RV64I-NEXT: xor a1, a3, a1 +; RV64I-NEXT: add a1, a1, a0 ; RV64I-NEXT: sub a1, a1, a4 -; RV64I-NEXT: sub a1, a1, a2 -; RV64I-NEXT: sub a0, a0, a5 +; RV64I-NEXT: add a0, a2, a0 ; RV64I-NEXT: ret ; ; RV32ZBB-LABEL: abd_select_i128: @@ -2129,3 +2131,4 @@ declare i32 @llvm.umin.i32(i32, i32) declare i64 @llvm.umin.i64(i64, i64) ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: ; CHECK: {{.*}} +; NOZBB: {{.*}} diff --git a/llvm/test/CodeGen/X86/abds.ll b/llvm/test/CodeGen/X86/abds.ll index 4c524c28b160a..0356c2702a419 100644 --- a/llvm/test/CodeGen/X86/abds.ll +++ b/llvm/test/CodeGen/X86/abds.ll @@ -1161,24 +1161,23 @@ define i32 @abd_sub_i32(i32 %a, i32 %b) nounwind { define i8 @abd_select_i8(i8 %a, i8 %b) nounwind { ; X86-LABEL: abd_select_i8: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: cmpb %cl, %al +; X86-NEXT: movsbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movsbl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl %ecx, %edx -; X86-NEXT: cmovll %eax, %edx -; X86-NEXT: cmovll %ecx, %eax -; X86-NEXT: subb %dl, %al +; X86-NEXT: subl %eax, %edx +; X86-NEXT: subl %ecx, %eax +; X86-NEXT: cmovll %edx, %eax ; X86-NEXT: # kill: def $al killed $al killed $eax ; X86-NEXT: retl ; ; X64-LABEL: abd_select_i8: ; X64: # %bb.0: -; X64-NEXT: movl %edi, %eax -; X64-NEXT: cmpb %sil, %al -; X64-NEXT: movl %esi, %ecx -; X64-NEXT: cmovll %edi, %ecx -; X64-NEXT: cmovll %esi, %eax -; X64-NEXT: subb %cl, %al +; X64-NEXT: movsbl %sil, %eax +; X64-NEXT: movsbl %dil, %ecx +; X64-NEXT: movl %ecx, %edx +; X64-NEXT: subl %eax, %edx +; X64-NEXT: subl %ecx, %eax +; X64-NEXT: cmovll %edx, %eax ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq %cmp = icmp slt i8 %a, %b @@ -1191,24 +1190,23 @@ define i8 @abd_select_i8(i8 %a, i8 %b) nounwind { define i16 @abd_select_i16(i16 %a, i16 %b) nounwind { ; X86-LABEL: abd_select_i16: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: cmpw %cx, %ax +; X86-NEXT: movswl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movswl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl %ecx, %edx -; X86-NEXT: cmovlel %eax, %edx -; X86-NEXT: cmovlel %ecx, %eax -; X86-NEXT: subl %edx, %eax +; X86-NEXT: subl %eax, %edx +; X86-NEXT: subl %ecx, %eax +; X86-NEXT: cmovll %edx, %eax ; X86-NEXT: # kill: def $ax killed $ax killed $eax ; X86-NEXT: retl ; ; X64-LABEL: abd_select_i16: ; X64: # %bb.0: -; X64-NEXT: movl %edi, %eax -; X64-NEXT: cmpw %si, %ax -; X64-NEXT: movl %esi, %ecx -; X64-NEXT: cmovlel %edi, %ecx -; X64-NEXT: cmovlel %esi, %eax +; X64-NEXT: movswl %si, %eax +; X64-NEXT: movswl %di, %ecx +; X64-NEXT: movl %ecx, %edx +; X64-NEXT: subl %eax, %edx ; X64-NEXT: subl %ecx, %eax +; X64-NEXT: cmovll %edx, %eax ; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq %cmp = icmp sle i16 %a, %b @@ -1221,22 +1219,20 @@ define i16 @abd_select_i16(i16 %a, i16 %b) nounwind { define i32 @abd_select_i32(i32 %a, i32 %b) nounwind { ; X86-LABEL: abd_select_i32: ; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: cmpl %ecx, %edx -; X86-NEXT: movl %ecx, %eax -; X86-NEXT: cmovgl %edx, %eax -; X86-NEXT: cmovgl %ecx, %edx -; X86-NEXT: subl %edx, %eax +; X86-NEXT: movl %ecx, %edx +; X86-NEXT: subl %eax, %edx +; X86-NEXT: subl %ecx, %eax +; X86-NEXT: cmovll %edx, %eax ; X86-NEXT: retl ; ; X64-LABEL: abd_select_i32: ; X64: # %bb.0: -; X64-NEXT: cmpl %esi, %edi -; X64-NEXT: movl %esi, %eax -; X64-NEXT: cmovgl %edi, %eax -; X64-NEXT: cmovgl %esi, %edi -; X64-NEXT: subl %edi, %eax +; X64-NEXT: movl %edi, %eax +; X64-NEXT: subl %esi, %eax +; X64-NEXT: subl %edi, %esi +; X64-NEXT: cmovgel %esi, %eax ; X64-NEXT: retq %cmp = icmp sgt i32 %a, %b %ab = select i1 %cmp, i32 %a, i32 %b @@ -1251,21 +1247,18 @@ define i64 @abd_select_i64(i64 %a, i64 %b) nounwind { ; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: cmpl %esi, %ebx -; X86-NEXT: movl %ecx, %eax -; X86-NEXT: sbbl %edi, %eax -; X86-NEXT: movl %edi, %edx -; X86-NEXT: cmovgel %ecx, %edx -; X86-NEXT: movl %esi, %eax -; X86-NEXT: cmovgel %ebx, %eax -; X86-NEXT: cmovgel %edi, %ecx -; X86-NEXT: cmovgel %esi, %ebx -; X86-NEXT: subl %ebx, %eax -; X86-NEXT: sbbl %ecx, %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: movl %ecx, %edi +; X86-NEXT: subl %eax, %edi +; X86-NEXT: movl %esi, %ebx +; X86-NEXT: sbbl %edx, %ebx +; X86-NEXT: subl %ecx, %eax +; X86-NEXT: sbbl %esi, %edx +; X86-NEXT: cmovll %edi, %eax +; X86-NEXT: cmovll %ebx, %edx ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi ; X86-NEXT: popl %ebx @@ -1273,11 +1266,10 @@ define i64 @abd_select_i64(i64 %a, i64 %b) nounwind { ; ; X64-LABEL: abd_select_i64: ; X64: # %bb.0: -; X64-NEXT: cmpq %rsi, %rdi -; X64-NEXT: movq %rsi, %rax -; X64-NEXT: cmovgeq %rdi, %rax -; X64-NEXT: cmovgeq %rsi, %rdi -; X64-NEXT: subq %rdi, %rax +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: subq %rsi, %rax +; X64-NEXT: subq %rdi, %rsi +; X64-NEXT: cmovgeq %rsi, %rax ; X64-NEXT: retq %cmp = icmp sge i64 %a, %b %ab = select i1 %cmp, i64 %a, i64 %b @@ -1294,42 +1286,32 @@ define i128 @abd_select_i128(i128 %a, i128 %b) nounwind { ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi ; X86-NEXT: pushl %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: cmpl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %esi, %eax -; X86-NEXT: sbbl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %ecx, %eax -; X86-NEXT: sbbl %ebp, %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl %edi, %eax -; X86-NEXT: sbbl %ebx, %eax -; X86-NEXT: movl %ebx, %eax -; X86-NEXT: cmovll %edi, %eax +; X86-NEXT: subl %edx, %eax ; X86-NEXT: movl %eax, (%esp) # 4-byte Spill -; X86-NEXT: cmovll %ebx, %edi -; X86-NEXT: movl %ebp, %ebx -; X86-NEXT: cmovll %ecx, %ebx -; X86-NEXT: cmovll %ebp, %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %eax, %ebp -; X86-NEXT: cmovll %esi, %ebp -; X86-NEXT: cmovll %eax, %esi +; X86-NEXT: sbbl %esi, %ebx +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-NEXT: sbbl %ecx, %ebp +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: cmovll %edx, %eax -; X86-NEXT: cmovll {{[0-9]+}}(%esp), %edx -; X86-NEXT: subl %eax, %edx -; X86-NEXT: sbbl %ebp, %esi -; X86-NEXT: sbbl %ebx, %ecx -; X86-NEXT: sbbl (%esp), %edi # 4-byte Folded Reload +; X86-NEXT: sbbl %edi, %eax +; X86-NEXT: subl {{[0-9]+}}(%esp), %edx +; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi +; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edi +; X86-NEXT: cmovll (%esp), %edx # 4-byte Folded Reload +; X86-NEXT: cmovll %ebx, %esi +; X86-NEXT: cmovll %ebp, %ecx +; X86-NEXT: cmovll %eax, %edi ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %edx, (%eax) -; X86-NEXT: movl %esi, 4(%eax) -; X86-NEXT: movl %ecx, 8(%eax) ; X86-NEXT: movl %edi, 12(%eax) +; X86-NEXT: movl %ecx, 8(%eax) +; X86-NEXT: movl %esi, 4(%eax) +; X86-NEXT: movl %edx, (%eax) ; X86-NEXT: addl $4, %esp ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi @@ -1340,18 +1322,14 @@ define i128 @abd_select_i128(i128 %a, i128 %b) nounwind { ; X64-LABEL: abd_select_i128: ; X64: # %bb.0: ; X64-NEXT: movq %rdi, %rax -; X64-NEXT: cmpq %rdx, %rdi -; X64-NEXT: movq %rsi, %rdi -; X64-NEXT: sbbq %rcx, %rdi -; X64-NEXT: movq %rcx, %rdi -; X64-NEXT: cmovlq %rsi, %rdi -; X64-NEXT: movq %rdx, %r8 -; X64-NEXT: cmovlq %rax, %r8 -; X64-NEXT: cmovlq %rcx, %rsi -; X64-NEXT: cmovlq %rdx, %rax -; X64-NEXT: subq %r8, %rax -; X64-NEXT: sbbq %rdi, %rsi -; X64-NEXT: movq %rsi, %rdx +; X64-NEXT: subq %rdx, %rax +; X64-NEXT: movq %rsi, %r8 +; X64-NEXT: sbbq %rcx, %r8 +; X64-NEXT: subq %rdi, %rdx +; X64-NEXT: sbbq %rsi, %rcx +; X64-NEXT: cmovgeq %rdx, %rax +; X64-NEXT: cmovgeq %rcx, %r8 +; X64-NEXT: movq %r8, %rdx ; X64-NEXT: retq %cmp = icmp slt i128 %a, %b %ab = select i1 %cmp, i128 %a, i128 %b diff --git a/llvm/test/CodeGen/X86/abdu.ll b/llvm/test/CodeGen/X86/abdu.ll index fe9006a8aec23..27acec32fd348 100644 --- a/llvm/test/CodeGen/X86/abdu.ll +++ b/llvm/test/CodeGen/X86/abdu.ll @@ -775,24 +775,23 @@ define i128 @abd_cmp_i128(i128 %a, i128 %b) nounwind { define i8 @abd_select_i8(i8 %a, i8 %b) nounwind { ; X86-LABEL: abd_select_i8: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: cmpb %cl, %al +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl %ecx, %edx -; X86-NEXT: cmovbl %eax, %edx -; X86-NEXT: cmovbl %ecx, %eax -; X86-NEXT: subb %dl, %al +; X86-NEXT: subl %eax, %edx +; X86-NEXT: subl %ecx, %eax +; X86-NEXT: cmovbl %edx, %eax ; X86-NEXT: # kill: def $al killed $al killed $eax ; X86-NEXT: retl ; ; X64-LABEL: abd_select_i8: ; X64: # %bb.0: -; X64-NEXT: movl %edi, %eax -; X64-NEXT: cmpb %sil, %al -; X64-NEXT: movl %esi, %ecx -; X64-NEXT: cmovbl %edi, %ecx -; X64-NEXT: cmovbl %esi, %eax -; X64-NEXT: subb %cl, %al +; X64-NEXT: movzbl %sil, %eax +; X64-NEXT: movzbl %dil, %ecx +; X64-NEXT: movl %ecx, %edx +; X64-NEXT: subl %eax, %edx +; X64-NEXT: subl %ecx, %eax +; X64-NEXT: cmovbl %edx, %eax ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq %cmp = icmp ult i8 %a, %b @@ -805,24 +804,23 @@ define i8 @abd_select_i8(i8 %a, i8 %b) nounwind { define i16 @abd_select_i16(i16 %a, i16 %b) nounwind { ; X86-LABEL: abd_select_i16: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: cmpw %cx, %ax +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl %ecx, %edx -; X86-NEXT: cmovbel %eax, %edx -; X86-NEXT: cmovbel %ecx, %eax -; X86-NEXT: subl %edx, %eax +; X86-NEXT: subl %eax, %edx +; X86-NEXT: subl %ecx, %eax +; X86-NEXT: cmovbl %edx, %eax ; X86-NEXT: # kill: def $ax killed $ax killed $eax ; X86-NEXT: retl ; ; X64-LABEL: abd_select_i16: ; X64: # %bb.0: -; X64-NEXT: movl %edi, %eax -; X64-NEXT: cmpw %si, %ax -; X64-NEXT: movl %esi, %ecx -; X64-NEXT: cmovbel %edi, %ecx -; X64-NEXT: cmovbel %esi, %eax +; X64-NEXT: movzwl %si, %eax +; X64-NEXT: movzwl %di, %ecx +; X64-NEXT: movl %ecx, %edx +; X64-NEXT: subl %eax, %edx ; X64-NEXT: subl %ecx, %eax +; X64-NEXT: cmovbl %edx, %eax ; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq %cmp = icmp ule i16 %a, %b @@ -835,22 +833,20 @@ define i16 @abd_select_i16(i16 %a, i16 %b) nounwind { define i32 @abd_select_i32(i32 %a, i32 %b) nounwind { ; X86-LABEL: abd_select_i32: ; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: cmpl %ecx, %edx -; X86-NEXT: movl %ecx, %eax -; X86-NEXT: cmoval %edx, %eax -; X86-NEXT: cmoval %ecx, %edx -; X86-NEXT: subl %edx, %eax +; X86-NEXT: movl %ecx, %edx +; X86-NEXT: subl %eax, %edx +; X86-NEXT: subl %ecx, %eax +; X86-NEXT: cmovbl %edx, %eax ; X86-NEXT: retl ; ; X64-LABEL: abd_select_i32: ; X64: # %bb.0: -; X64-NEXT: cmpl %esi, %edi -; X64-NEXT: movl %esi, %eax -; X64-NEXT: cmoval %edi, %eax -; X64-NEXT: cmoval %esi, %edi -; X64-NEXT: subl %edi, %eax +; X64-NEXT: movl %edi, %eax +; X64-NEXT: subl %esi, %eax +; X64-NEXT: subl %edi, %esi +; X64-NEXT: cmovael %esi, %eax ; X64-NEXT: retq %cmp = icmp ugt i32 %a, %b %ab = select i1 %cmp, i32 %a, i32 %b @@ -862,36 +858,24 @@ define i32 @abd_select_i32(i32 %a, i32 %b) nounwind { define i64 @abd_select_i64(i64 %a, i64 %b) nounwind { ; X86-LABEL: abd_select_i64: ; X86: # %bb.0: -; X86-NEXT: pushl %ebx -; X86-NEXT: pushl %edi -; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: cmpl %esi, %ebx -; X86-NEXT: movl %ecx, %eax -; X86-NEXT: sbbl %edi, %eax -; X86-NEXT: movl %edi, %edx -; X86-NEXT: cmovael %ecx, %edx -; X86-NEXT: movl %esi, %eax -; X86-NEXT: cmovael %ebx, %eax -; X86-NEXT: cmovael %edi, %ecx -; X86-NEXT: cmovael %esi, %ebx -; X86-NEXT: subl %ebx, %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: xorl %ecx, %ecx +; X86-NEXT: subl {{[0-9]+}}(%esp), %eax +; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edx +; X86-NEXT: sbbl %ecx, %ecx +; X86-NEXT: xorl %ecx, %edx +; X86-NEXT: xorl %ecx, %eax +; X86-NEXT: subl %ecx, %eax ; X86-NEXT: sbbl %ecx, %edx -; X86-NEXT: popl %esi -; X86-NEXT: popl %edi -; X86-NEXT: popl %ebx ; X86-NEXT: retl ; ; X64-LABEL: abd_select_i64: ; X64: # %bb.0: -; X64-NEXT: cmpq %rsi, %rdi -; X64-NEXT: movq %rsi, %rax -; X64-NEXT: cmovaeq %rdi, %rax -; X64-NEXT: cmovaeq %rsi, %rdi -; X64-NEXT: subq %rdi, %rax +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: subq %rsi, %rax +; X64-NEXT: subq %rdi, %rsi +; X64-NEXT: cmovaeq %rsi, %rax ; X64-NEXT: retq %cmp = icmp uge i64 %a, %b %ab = select i1 %cmp, i64 %a, i64 %b @@ -903,67 +887,47 @@ define i64 @abd_select_i64(i64 %a, i64 %b) nounwind { define i128 @abd_select_i128(i128 %a, i128 %b) nounwind { ; X86-LABEL: abd_select_i128: ; X86: # %bb.0: -; X86-NEXT: pushl %ebp ; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: pushl %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: cmpl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl %esi, %eax -; X86-NEXT: sbbl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %ecx, %eax -; X86-NEXT: sbbl %ebp, %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebx +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: movl %edi, %eax -; X86-NEXT: sbbl %ebx, %eax -; X86-NEXT: movl %ebx, %eax -; X86-NEXT: cmovbl %edi, %eax -; X86-NEXT: movl %eax, (%esp) # 4-byte Spill -; X86-NEXT: cmovbl %ebx, %edi -; X86-NEXT: movl %ebp, %ebx -; X86-NEXT: cmovbl %ecx, %ebx -; X86-NEXT: cmovbl %ebp, %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %eax, %ebp -; X86-NEXT: cmovbl %esi, %ebp -; X86-NEXT: cmovbl %eax, %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: cmovbl %edx, %eax -; X86-NEXT: cmovbl {{[0-9]+}}(%esp), %edx -; X86-NEXT: subl %eax, %edx -; X86-NEXT: sbbl %ebp, %esi +; X86-NEXT: xorl %ebx, %ebx +; X86-NEXT: subl {{[0-9]+}}(%esp), %edi +; X86-NEXT: sbbl {{[0-9]+}}(%esp), %esi +; X86-NEXT: sbbl {{[0-9]+}}(%esp), %edx +; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: sbbl %ebx, %ebx +; X86-NEXT: xorl %ebx, %ecx +; X86-NEXT: xorl %ebx, %edx +; X86-NEXT: xorl %ebx, %esi +; X86-NEXT: xorl %ebx, %edi +; X86-NEXT: subl %ebx, %edi +; X86-NEXT: sbbl %ebx, %esi +; X86-NEXT: sbbl %ebx, %edx ; X86-NEXT: sbbl %ebx, %ecx -; X86-NEXT: sbbl (%esp), %edi # 4-byte Folded Reload -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %edx, (%eax) +; X86-NEXT: movl %edi, (%eax) ; X86-NEXT: movl %esi, 4(%eax) -; X86-NEXT: movl %ecx, 8(%eax) -; X86-NEXT: movl %edi, 12(%eax) -; X86-NEXT: addl $4, %esp +; X86-NEXT: movl %edx, 8(%eax) +; X86-NEXT: movl %ecx, 12(%eax) ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi ; X86-NEXT: popl %ebx -; X86-NEXT: popl %ebp ; X86-NEXT: retl $4 ; ; X64-LABEL: abd_select_i128: ; X64: # %bb.0: ; X64-NEXT: movq %rdi, %rax -; X64-NEXT: cmpq %rdx, %rdi -; X64-NEXT: movq %rsi, %rdi -; X64-NEXT: sbbq %rcx, %rdi -; X64-NEXT: movq %rcx, %rdi -; X64-NEXT: cmovbq %rsi, %rdi -; X64-NEXT: movq %rdx, %r8 -; X64-NEXT: cmovbq %rax, %r8 -; X64-NEXT: cmovbq %rcx, %rsi -; X64-NEXT: cmovbq %rdx, %rax -; X64-NEXT: subq %r8, %rax +; X64-NEXT: xorl %edi, %edi +; X64-NEXT: subq %rdx, %rax +; X64-NEXT: sbbq %rcx, %rsi +; X64-NEXT: sbbq %rdi, %rdi +; X64-NEXT: xorq %rdi, %rsi +; X64-NEXT: xorq %rdi, %rax +; X64-NEXT: subq %rdi, %rax ; X64-NEXT: sbbq %rdi, %rsi ; X64-NEXT: movq %rsi, %rdx ; X64-NEXT: retq diff --git a/llvm/test/CodeGen/X86/midpoint-int.ll b/llvm/test/CodeGen/X86/midpoint-int.ll index 601166d67f6f2..5ffee3fa6bda4 100644 --- a/llvm/test/CodeGen/X86/midpoint-int.ll +++ b/llvm/test/CodeGen/X86/midpoint-int.ll @@ -14,37 +14,34 @@ define i32 @scalar_i32_signed_reg_reg(i32 %a1, i32 %a2) nounwind { ; X64-LABEL: scalar_i32_signed_reg_reg: ; X64: # %bb.0: -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpl %esi, %edi -; X64-NEXT: setle %al -; X64-NEXT: leal -1(%rax,%rax), %eax -; X64-NEXT: movl %edi, %ecx -; X64-NEXT: cmovgl %esi, %ecx -; X64-NEXT: cmovgl %edi, %esi -; X64-NEXT: subl %ecx, %esi -; X64-NEXT: shrl %esi -; X64-NEXT: imull %esi, %eax +; X64-NEXT: xorl %ecx, %ecx +; X64-NEXT: movl %edi, %eax +; X64-NEXT: subl %esi, %eax +; X64-NEXT: setle %cl +; X64-NEXT: leal -1(%rcx,%rcx), %ecx +; X64-NEXT: subl %edi, %esi +; X64-NEXT: cmovgel %esi, %eax +; X64-NEXT: shrl %eax +; X64-NEXT: imull %ecx, %eax ; X64-NEXT: addl %edi, %eax ; X64-NEXT: retq ; ; X86-LABEL: scalar_i32_signed_reg_reg: ; X86: # %bb.0: ; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: xorl %edx, %edx -; X86-NEXT: cmpl %eax, %ecx -; X86-NEXT: setle %dl -; X86-NEXT: leal -1(%edx,%edx), %edx -; X86-NEXT: jg .LBB0_1 -; X86-NEXT: # %bb.2: -; X86-NEXT: movl %ecx, %esi -; X86-NEXT: jmp .LBB0_3 -; X86-NEXT: .LBB0_1: -; X86-NEXT: movl %eax, %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: cmpl %esi, %ecx +; X86-NEXT: setle %al +; X86-NEXT: leal -1(%eax,%eax), %edx ; X86-NEXT: movl %ecx, %eax -; X86-NEXT: .LBB0_3: ; X86-NEXT: subl %esi, %eax +; X86-NEXT: jg .LBB0_2 +; X86-NEXT: # %bb.1: +; X86-NEXT: subl %ecx, %esi +; X86-NEXT: movl %esi, %eax +; X86-NEXT: .LBB0_2: ; X86-NEXT: shrl %eax ; X86-NEXT: imull %edx, %eax ; X86-NEXT: addl %ecx, %eax @@ -64,41 +61,41 @@ define i32 @scalar_i32_signed_reg_reg(i32 %a1, i32 %a2) nounwind { define i32 @scalar_i32_unsigned_reg_reg(i32 %a1, i32 %a2) nounwind { ; X64-LABEL: scalar_i32_unsigned_reg_reg: ; X64: # %bb.0: -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpl %esi, %edi -; X64-NEXT: setbe %al -; X64-NEXT: leal -1(%rax,%rax), %eax -; X64-NEXT: movl %edi, %ecx -; X64-NEXT: cmoval %esi, %ecx -; X64-NEXT: cmoval %edi, %esi -; X64-NEXT: subl %ecx, %esi -; X64-NEXT: shrl %esi -; X64-NEXT: imull %esi, %eax +; X64-NEXT: xorl %ecx, %ecx +; X64-NEXT: cmpl %edi, %esi +; X64-NEXT: sbbl %ecx, %ecx +; X64-NEXT: movl %edi, %eax +; X64-NEXT: subl %esi, %eax +; X64-NEXT: subl %edi, %esi +; X64-NEXT: cmovael %esi, %eax +; X64-NEXT: orl $1, %ecx +; X64-NEXT: shrl %eax +; X64-NEXT: imull %ecx, %eax ; X64-NEXT: addl %edi, %eax ; X64-NEXT: retq ; ; X86-LABEL: scalar_i32_unsigned_reg_reg: ; X86: # %bb.0: +; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edi ; X86-NEXT: xorl %edx, %edx -; X86-NEXT: cmpl %eax, %ecx -; X86-NEXT: setbe %dl -; X86-NEXT: leal -1(%edx,%edx), %edx -; X86-NEXT: ja .LBB1_1 -; X86-NEXT: # %bb.2: -; X86-NEXT: movl %ecx, %esi -; X86-NEXT: jmp .LBB1_3 -; X86-NEXT: .LBB1_1: -; X86-NEXT: movl %eax, %esi +; X86-NEXT: movl %edi, %esi +; X86-NEXT: subl %ecx, %esi +; X86-NEXT: sbbl %edx, %edx +; X86-NEXT: orl $1, %edx ; X86-NEXT: movl %ecx, %eax -; X86-NEXT: .LBB1_3: -; X86-NEXT: subl %esi, %eax +; X86-NEXT: subl %edi, %eax +; X86-NEXT: ja .LBB1_2 +; X86-NEXT: # %bb.1: +; X86-NEXT: movl %esi, %eax +; X86-NEXT: .LBB1_2: ; X86-NEXT: shrl %eax ; X86-NEXT: imull %edx, %eax ; X86-NEXT: addl %ecx, %eax ; X86-NEXT: popl %esi +; X86-NEXT: popl %edi ; X86-NEXT: retl %t3 = icmp ugt i32 %a1, %a2 %t4 = select i1 %t3, i32 -1, i32 1 @@ -117,40 +114,37 @@ define i32 @scalar_i32_signed_mem_reg(ptr %a1_addr, i32 %a2) nounwind { ; X64-LABEL: scalar_i32_signed_mem_reg: ; X64: # %bb.0: ; X64-NEXT: movl (%rdi), %ecx -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpl %esi, %ecx -; X64-NEXT: setle %al -; X64-NEXT: leal -1(%rax,%rax), %eax -; X64-NEXT: movl %ecx, %edx -; X64-NEXT: cmovgl %esi, %edx -; X64-NEXT: cmovgl %ecx, %esi -; X64-NEXT: subl %edx, %esi -; X64-NEXT: shrl %esi -; X64-NEXT: imull %esi, %eax +; X64-NEXT: xorl %edx, %edx +; X64-NEXT: movl %ecx, %eax +; X64-NEXT: subl %esi, %eax +; X64-NEXT: setle %dl +; X64-NEXT: leal -1(%rdx,%rdx), %edx +; X64-NEXT: subl %ecx, %esi +; X64-NEXT: cmovgel %esi, %eax +; X64-NEXT: shrl %eax +; X64-NEXT: imull %edx, %eax ; X64-NEXT: addl %ecx, %eax ; X64-NEXT: retq ; ; X86-LABEL: scalar_i32_signed_mem_reg: ; X86: # %bb.0: ; X86-NEXT: pushl %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl (%ecx), %ecx -; X86-NEXT: xorl %edx, %edx -; X86-NEXT: cmpl %eax, %ecx -; X86-NEXT: setle %dl -; X86-NEXT: leal -1(%edx,%edx), %edx -; X86-NEXT: jg .LBB2_1 -; X86-NEXT: # %bb.2: -; X86-NEXT: movl %ecx, %esi -; X86-NEXT: jmp .LBB2_3 -; X86-NEXT: .LBB2_1: -; X86-NEXT: movl %eax, %esi +; X86-NEXT: movl (%eax), %ecx +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: cmpl %edx, %ecx +; X86-NEXT: setle %al +; X86-NEXT: leal -1(%eax,%eax), %esi ; X86-NEXT: movl %ecx, %eax -; X86-NEXT: .LBB2_3: -; X86-NEXT: subl %esi, %eax +; X86-NEXT: subl %edx, %eax +; X86-NEXT: jg .LBB2_2 +; X86-NEXT: # %bb.1: +; X86-NEXT: subl %ecx, %edx +; X86-NEXT: movl %edx, %eax +; X86-NEXT: .LBB2_2: ; X86-NEXT: shrl %eax -; X86-NEXT: imull %edx, %eax +; X86-NEXT: imull %esi, %eax ; X86-NEXT: addl %ecx, %eax ; X86-NEXT: popl %esi ; X86-NEXT: retl @@ -171,13 +165,12 @@ define i32 @scalar_i32_signed_reg_mem(i32 %a1, ptr %a2_addr) nounwind { ; X64: # %bb.0: ; X64-NEXT: movl (%rsi), %eax ; X64-NEXT: xorl %ecx, %ecx -; X64-NEXT: cmpl %eax, %edi +; X64-NEXT: movl %edi, %edx +; X64-NEXT: subl %eax, %edx ; X64-NEXT: setle %cl ; X64-NEXT: leal -1(%rcx,%rcx), %ecx -; X64-NEXT: movl %edi, %edx -; X64-NEXT: cmovgl %eax, %edx -; X64-NEXT: cmovgl %edi, %eax -; X64-NEXT: subl %edx, %eax +; X64-NEXT: subl %edi, %eax +; X64-NEXT: cmovll %edx, %eax ; X64-NEXT: shrl %eax ; X64-NEXT: imull %ecx, %eax ; X64-NEXT: addl %edi, %eax @@ -188,20 +181,18 @@ define i32 @scalar_i32_signed_reg_mem(i32 %a1, ptr %a2_addr) nounwind { ; X86-NEXT: pushl %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl (%eax), %eax -; X86-NEXT: xorl %edx, %edx -; X86-NEXT: cmpl %eax, %ecx -; X86-NEXT: setle %dl -; X86-NEXT: leal -1(%edx,%edx), %edx -; X86-NEXT: jg .LBB3_1 -; X86-NEXT: # %bb.2: -; X86-NEXT: movl %ecx, %esi -; X86-NEXT: jmp .LBB3_3 -; X86-NEXT: .LBB3_1: -; X86-NEXT: movl %eax, %esi +; X86-NEXT: movl (%eax), %esi +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: cmpl %esi, %ecx +; X86-NEXT: setle %al +; X86-NEXT: leal -1(%eax,%eax), %edx ; X86-NEXT: movl %ecx, %eax -; X86-NEXT: .LBB3_3: ; X86-NEXT: subl %esi, %eax +; X86-NEXT: jg .LBB3_2 +; X86-NEXT: # %bb.1: +; X86-NEXT: subl %ecx, %esi +; X86-NEXT: movl %esi, %eax +; X86-NEXT: .LBB3_2: ; X86-NEXT: shrl %eax ; X86-NEXT: imull %edx, %eax ; X86-NEXT: addl %ecx, %eax @@ -225,13 +216,12 @@ define i32 @scalar_i32_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind { ; X64-NEXT: movl (%rdi), %ecx ; X64-NEXT: movl (%rsi), %eax ; X64-NEXT: xorl %edx, %edx -; X64-NEXT: cmpl %eax, %ecx +; X64-NEXT: movl %ecx, %esi +; X64-NEXT: subl %eax, %esi ; X64-NEXT: setle %dl ; X64-NEXT: leal -1(%rdx,%rdx), %edx -; X64-NEXT: movl %ecx, %esi -; X64-NEXT: cmovgl %eax, %esi -; X64-NEXT: cmovgl %ecx, %eax -; X64-NEXT: subl %esi, %eax +; X64-NEXT: subl %ecx, %eax +; X64-NEXT: cmovll %esi, %eax ; X64-NEXT: shrl %eax ; X64-NEXT: imull %edx, %eax ; X64-NEXT: addl %ecx, %eax @@ -243,20 +233,18 @@ define i32 @scalar_i32_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind { ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl (%ecx), %ecx -; X86-NEXT: movl (%eax), %eax -; X86-NEXT: xorl %edx, %edx -; X86-NEXT: cmpl %eax, %ecx -; X86-NEXT: setle %dl -; X86-NEXT: leal -1(%edx,%edx), %edx -; X86-NEXT: jg .LBB4_1 -; X86-NEXT: # %bb.2: -; X86-NEXT: movl %ecx, %esi -; X86-NEXT: jmp .LBB4_3 -; X86-NEXT: .LBB4_1: -; X86-NEXT: movl %eax, %esi +; X86-NEXT: movl (%eax), %esi +; X86-NEXT: xorl %eax, %eax +; X86-NEXT: cmpl %esi, %ecx +; X86-NEXT: setle %al +; X86-NEXT: leal -1(%eax,%eax), %edx ; X86-NEXT: movl %ecx, %eax -; X86-NEXT: .LBB4_3: ; X86-NEXT: subl %esi, %eax +; X86-NEXT: jg .LBB4_2 +; X86-NEXT: # %bb.1: +; X86-NEXT: subl %ecx, %esi +; X86-NEXT: movl %esi, %eax +; X86-NEXT: .LBB4_2: ; X86-NEXT: shrl %eax ; X86-NEXT: imull %edx, %eax ; X86-NEXT: addl %ecx, %eax @@ -284,16 +272,15 @@ define i32 @scalar_i32_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind { define i64 @scalar_i64_signed_reg_reg(i64 %a1, i64 %a2) nounwind { ; X64-LABEL: scalar_i64_signed_reg_reg: ; X64: # %bb.0: -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpq %rsi, %rdi -; X64-NEXT: setle %al -; X64-NEXT: leaq -1(%rax,%rax), %rax -; X64-NEXT: movq %rdi, %rcx -; X64-NEXT: cmovgq %rsi, %rcx -; X64-NEXT: cmovgq %rdi, %rsi -; X64-NEXT: subq %rcx, %rsi -; X64-NEXT: shrq %rsi -; X64-NEXT: imulq %rsi, %rax +; X64-NEXT: xorl %ecx, %ecx +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: subq %rsi, %rax +; X64-NEXT: setle %cl +; X64-NEXT: leaq -1(%rcx,%rcx), %rcx +; X64-NEXT: subq %rdi, %rsi +; X64-NEXT: cmovgeq %rsi, %rax +; X64-NEXT: shrq %rax +; X64-NEXT: imulq %rcx, %rax ; X64-NEXT: addq %rdi, %rax ; X64-NEXT: retq ; @@ -303,41 +290,37 @@ define i64 @scalar_i64_signed_reg_reg(i64 %a1, i64 %a2) nounwind { ; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: cmpl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %edi, %edx -; X86-NEXT: sbbl %ebp, %edx -; X86-NEXT: setl %dl -; X86-NEXT: movzbl %dl, %ebx -; X86-NEXT: jl .LBB5_1 -; X86-NEXT: # %bb.2: -; X86-NEXT: movl %ebp, %ecx -; X86-NEXT: movl %ebp, %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: jmp .LBB5_3 -; X86-NEXT: .LBB5_1: -; X86-NEXT: movl %edi, %edx -; X86-NEXT: movl %eax, %esi -; X86-NEXT: movl %ebp, %ecx -; X86-NEXT: movl %ebp, %edi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: .LBB5_3: -; X86-NEXT: negl %ebx -; X86-NEXT: movl %ebx, %ebp -; X86-NEXT: orl $1, %ebp -; X86-NEXT: subl %esi, %eax -; X86-NEXT: sbbl %edx, %edi -; X86-NEXT: shrdl $1, %edi, %eax -; X86-NEXT: imull %eax, %ebx -; X86-NEXT: mull %ebp -; X86-NEXT: addl %ebx, %edx -; X86-NEXT: shrl %edi -; X86-NEXT: imull %ebp, %edi +; X86-NEXT: cmpl %esi, %edx +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: sbbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: setl %al +; X86-NEXT: movzbl %al, %edi +; X86-NEXT: negl %edi +; X86-NEXT: movl %edi, %ebx +; X86-NEXT: orl $1, %ebx +; X86-NEXT: movl %esi, %eax +; X86-NEXT: subl %edx, %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-NEXT: sbbl %ecx, %ebp +; X86-NEXT: subl %esi, %edx +; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: jl .LBB5_2 +; X86-NEXT: # %bb.1: +; X86-NEXT: movl %edx, %eax +; X86-NEXT: movl %ecx, %ebp +; X86-NEXT: .LBB5_2: +; X86-NEXT: shrdl $1, %ebp, %eax +; X86-NEXT: shrl %ebp +; X86-NEXT: imull %eax, %edi +; X86-NEXT: mull %ebx ; X86-NEXT: addl %edi, %edx +; X86-NEXT: imull %ebx, %ebp +; X86-NEXT: addl %ebp, %edx ; X86-NEXT: addl {{[0-9]+}}(%esp), %eax -; X86-NEXT: adcl %ecx, %edx +; X86-NEXT: adcl {{[0-9]+}}(%esp), %edx ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi ; X86-NEXT: popl %ebx @@ -357,16 +340,16 @@ define i64 @scalar_i64_signed_reg_reg(i64 %a1, i64 %a2) nounwind { define i64 @scalar_i64_unsigned_reg_reg(i64 %a1, i64 %a2) nounwind { ; X64-LABEL: scalar_i64_unsigned_reg_reg: ; X64: # %bb.0: -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpq %rsi, %rdi -; X64-NEXT: setbe %al -; X64-NEXT: leaq -1(%rax,%rax), %rax -; X64-NEXT: movq %rdi, %rcx -; X64-NEXT: cmovaq %rsi, %rcx -; X64-NEXT: cmovaq %rdi, %rsi -; X64-NEXT: subq %rcx, %rsi -; X64-NEXT: shrq %rsi -; X64-NEXT: imulq %rsi, %rax +; X64-NEXT: xorl %ecx, %ecx +; X64-NEXT: cmpq %rdi, %rsi +; X64-NEXT: sbbq %rcx, %rcx +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: subq %rsi, %rax +; X64-NEXT: subq %rdi, %rsi +; X64-NEXT: cmovaeq %rsi, %rax +; X64-NEXT: orq $1, %rcx +; X64-NEXT: shrq %rax +; X64-NEXT: imulq %rcx, %rax ; X64-NEXT: addq %rdi, %rax ; X64-NEXT: retq ; @@ -377,39 +360,34 @@ define i64 @scalar_i64_unsigned_reg_reg(i64 %a1, i64 %a2) nounwind { ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edi -; X86-NEXT: xorl %ebx, %ebx -; X86-NEXT: cmpl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %edi, %edx -; X86-NEXT: sbbl %ebp, %edx -; X86-NEXT: setb %dl +; X86-NEXT: xorl %edx, %edx +; X86-NEXT: cmpl %eax, %ebp +; X86-NEXT: sbbl %edi, %esi +; X86-NEXT: movl %edi, %ecx +; X86-NEXT: movl $0, %ebx ; X86-NEXT: sbbl %ebx, %ebx -; X86-NEXT: testb %dl, %dl -; X86-NEXT: jne .LBB6_1 -; X86-NEXT: # %bb.2: -; X86-NEXT: movl %ebp, %ecx -; X86-NEXT: movl %ebp, %edx -; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: jmp .LBB6_3 -; X86-NEXT: .LBB6_1: -; X86-NEXT: movl %edi, %edx +; X86-NEXT: movl %ebx, %edi +; X86-NEXT: orl $1, %edi ; X86-NEXT: movl %eax, %esi -; X86-NEXT: movl %ebp, %ecx -; X86-NEXT: movl %ebp, %edi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: .LBB6_3: -; X86-NEXT: movl %ebx, %ebp -; X86-NEXT: orl $1, %ebp -; X86-NEXT: subl %esi, %eax -; X86-NEXT: sbbl %edx, %edi -; X86-NEXT: shrdl $1, %edi, %eax +; X86-NEXT: subl %ebp, %esi +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: sbbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: sbbl %edx, %edx +; X86-NEXT: xorl %edx, %eax +; X86-NEXT: xorl %edx, %esi +; X86-NEXT: subl %edx, %esi +; X86-NEXT: sbbl %edx, %eax +; X86-NEXT: movl %eax, %ebp +; X86-NEXT: shldl $31, %esi, %eax ; X86-NEXT: imull %eax, %ebx -; X86-NEXT: mull %ebp +; X86-NEXT: mull %edi ; X86-NEXT: addl %ebx, %edx -; X86-NEXT: shrl %edi -; X86-NEXT: imull %ebp, %edi -; X86-NEXT: addl %edi, %edx +; X86-NEXT: shrl %ebp +; X86-NEXT: imull %edi, %ebp +; X86-NEXT: addl %ebp, %edx ; X86-NEXT: addl {{[0-9]+}}(%esp), %eax ; X86-NEXT: adcl %ecx, %edx ; X86-NEXT: popl %esi @@ -434,16 +412,15 @@ define i64 @scalar_i64_signed_mem_reg(ptr %a1_addr, i64 %a2) nounwind { ; X64-LABEL: scalar_i64_signed_mem_reg: ; X64: # %bb.0: ; X64-NEXT: movq (%rdi), %rcx -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpq %rsi, %rcx -; X64-NEXT: setle %al -; X64-NEXT: leaq -1(%rax,%rax), %rax -; X64-NEXT: movq %rcx, %rdx -; X64-NEXT: cmovgq %rsi, %rdx -; X64-NEXT: cmovgq %rcx, %rsi -; X64-NEXT: subq %rdx, %rsi -; X64-NEXT: shrq %rsi -; X64-NEXT: imulq %rsi, %rax +; X64-NEXT: xorl %edx, %edx +; X64-NEXT: movq %rcx, %rax +; X64-NEXT: subq %rsi, %rax +; X64-NEXT: setle %dl +; X64-NEXT: leaq -1(%rdx,%rdx), %rdx +; X64-NEXT: subq %rcx, %rsi +; X64-NEXT: cmovgeq %rsi, %rax +; X64-NEXT: shrq %rax +; X64-NEXT: imulq %rdx, %rax ; X64-NEXT: addq %rcx, %rax ; X64-NEXT: retq ; @@ -453,43 +430,45 @@ define i64 @scalar_i64_signed_mem_reg(ptr %a1_addr, i64 %a2) nounwind { ; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: pushl %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edi +; X86-NEXT: subl $12, %esp +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl (%ecx), %esi -; X86-NEXT: movl 4(%ecx), %ecx -; X86-NEXT: cmpl %esi, %eax -; X86-NEXT: movl %edi, %edx -; X86-NEXT: sbbl %ecx, %edx -; X86-NEXT: setl %dl -; X86-NEXT: movzbl %dl, %ebx -; X86-NEXT: jl .LBB7_1 -; X86-NEXT: # %bb.2: -; X86-NEXT: movl %ecx, (%esp) # 4-byte Spill -; X86-NEXT: movl %esi, %edx -; X86-NEXT: jmp .LBB7_3 -; X86-NEXT: .LBB7_1: -; X86-NEXT: movl %edi, (%esp) # 4-byte Spill -; X86-NEXT: movl %eax, %edx -; X86-NEXT: movl %ecx, %edi -; X86-NEXT: movl %esi, %eax -; X86-NEXT: .LBB7_3: -; X86-NEXT: negl %ebx -; X86-NEXT: movl %ebx, %ebp -; X86-NEXT: orl $1, %ebp +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl (%eax), %ebx +; X86-NEXT: movl 4(%eax), %esi +; X86-NEXT: cmpl %ebx, %edx +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: sbbl %esi, %eax +; X86-NEXT: setl %al +; X86-NEXT: movzbl %al, %edi +; X86-NEXT: negl %edi +; X86-NEXT: movl %edi, %eax +; X86-NEXT: orl $1, %eax +; X86-NEXT: movl %eax, (%esp) # 4-byte Spill +; X86-NEXT: movl %ebx, %eax ; X86-NEXT: subl %edx, %eax -; X86-NEXT: sbbl (%esp), %edi # 4-byte Folded Reload -; X86-NEXT: shrdl $1, %edi, %eax -; X86-NEXT: imull %eax, %ebx -; X86-NEXT: mull %ebp -; X86-NEXT: addl %ebx, %edx -; X86-NEXT: shrl %edi -; X86-NEXT: imull %ebp, %edi +; X86-NEXT: movl %esi, %ebp +; X86-NEXT: sbbl %ecx, %ebp +; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: subl %ebx, %edx +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: sbbl %esi, %ecx +; X86-NEXT: jl .LBB7_2 +; X86-NEXT: # %bb.1: +; X86-NEXT: movl %edx, %eax +; X86-NEXT: movl %ecx, %ebp +; X86-NEXT: .LBB7_2: +; X86-NEXT: shrdl $1, %ebp, %eax +; X86-NEXT: shrl %ebp +; X86-NEXT: imull %eax, %edi +; X86-NEXT: movl (%esp), %ecx # 4-byte Reload +; X86-NEXT: mull %ecx ; X86-NEXT: addl %edi, %edx -; X86-NEXT: addl %esi, %eax -; X86-NEXT: adcl %ecx, %edx -; X86-NEXT: addl $4, %esp +; X86-NEXT: imull %ecx, %ebp +; X86-NEXT: addl %ebp, %edx +; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-NEXT: addl $12, %esp ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi ; X86-NEXT: popl %ebx @@ -512,13 +491,12 @@ define i64 @scalar_i64_signed_reg_mem(i64 %a1, ptr %a2_addr) nounwind { ; X64: # %bb.0: ; X64-NEXT: movq (%rsi), %rax ; X64-NEXT: xorl %ecx, %ecx -; X64-NEXT: cmpq %rax, %rdi +; X64-NEXT: movq %rdi, %rdx +; X64-NEXT: subq %rax, %rdx ; X64-NEXT: setle %cl ; X64-NEXT: leaq -1(%rcx,%rcx), %rcx -; X64-NEXT: movq %rdi, %rdx -; X64-NEXT: cmovgq %rax, %rdx -; X64-NEXT: cmovgq %rdi, %rax -; X64-NEXT: subq %rdx, %rax +; X64-NEXT: subq %rdi, %rax +; X64-NEXT: cmovlq %rdx, %rax ; X64-NEXT: shrq %rax ; X64-NEXT: imulq %rcx, %rax ; X64-NEXT: addq %rdi, %rax @@ -530,42 +508,38 @@ define i64 @scalar_i64_signed_reg_mem(i64 %a1, ptr %a2_addr) nounwind { ; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: movl (%edx), %eax -; X86-NEXT: movl 4(%edx), %edi -; X86-NEXT: cmpl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %edi, %edx -; X86-NEXT: sbbl %ebp, %edx -; X86-NEXT: setl %dl -; X86-NEXT: movzbl %dl, %ebx -; X86-NEXT: jl .LBB8_1 -; X86-NEXT: # %bb.2: -; X86-NEXT: movl %ebp, %ecx -; X86-NEXT: movl %ebp, %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %esi -; X86-NEXT: jmp .LBB8_3 -; X86-NEXT: .LBB8_1: -; X86-NEXT: movl %edi, %edx -; X86-NEXT: movl %eax, %esi -; X86-NEXT: movl %ebp, %ecx -; X86-NEXT: movl %ebp, %edi ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: .LBB8_3: -; X86-NEXT: negl %ebx -; X86-NEXT: movl %ebx, %ebp -; X86-NEXT: orl $1, %ebp -; X86-NEXT: subl %esi, %eax -; X86-NEXT: sbbl %edx, %edi -; X86-NEXT: shrdl $1, %edi, %eax -; X86-NEXT: imull %eax, %ebx -; X86-NEXT: mull %ebp -; X86-NEXT: addl %ebx, %edx -; X86-NEXT: shrl %edi -; X86-NEXT: imull %ebp, %edi +; X86-NEXT: movl (%eax), %edx +; X86-NEXT: movl 4(%eax), %ecx +; X86-NEXT: cmpl %esi, %edx +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: sbbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: setl %al +; X86-NEXT: movzbl %al, %edi +; X86-NEXT: negl %edi +; X86-NEXT: movl %edi, %ebx +; X86-NEXT: orl $1, %ebx +; X86-NEXT: movl %esi, %eax +; X86-NEXT: subl %edx, %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp +; X86-NEXT: sbbl %ecx, %ebp +; X86-NEXT: subl %esi, %edx +; X86-NEXT: sbbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: jl .LBB8_2 +; X86-NEXT: # %bb.1: +; X86-NEXT: movl %edx, %eax +; X86-NEXT: movl %ecx, %ebp +; X86-NEXT: .LBB8_2: +; X86-NEXT: shrdl $1, %ebp, %eax +; X86-NEXT: shrl %ebp +; X86-NEXT: imull %eax, %edi +; X86-NEXT: mull %ebx ; X86-NEXT: addl %edi, %edx +; X86-NEXT: imull %ebx, %ebp +; X86-NEXT: addl %ebp, %edx ; X86-NEXT: addl {{[0-9]+}}(%esp), %eax -; X86-NEXT: adcl %ecx, %edx +; X86-NEXT: adcl {{[0-9]+}}(%esp), %edx ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi ; X86-NEXT: popl %ebx @@ -589,13 +563,12 @@ define i64 @scalar_i64_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind { ; X64-NEXT: movq (%rdi), %rcx ; X64-NEXT: movq (%rsi), %rax ; X64-NEXT: xorl %edx, %edx -; X64-NEXT: cmpq %rax, %rcx +; X64-NEXT: movq %rcx, %rsi +; X64-NEXT: subq %rax, %rsi ; X64-NEXT: setle %dl ; X64-NEXT: leaq -1(%rdx,%rdx), %rdx -; X64-NEXT: movq %rcx, %rsi -; X64-NEXT: cmovgq %rax, %rsi -; X64-NEXT: cmovgq %rcx, %rax -; X64-NEXT: subq %rsi, %rax +; X64-NEXT: subq %rcx, %rax +; X64-NEXT: cmovlq %rsi, %rax ; X64-NEXT: shrq %rax ; X64-NEXT: imulq %rdx, %rax ; X64-NEXT: addq %rcx, %rax @@ -607,44 +580,46 @@ define i64 @scalar_i64_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind { ; X86-NEXT: pushl %ebx ; X86-NEXT: pushl %edi ; X86-NEXT: pushl %esi -; X86-NEXT: pushl %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: subl $12, %esp ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl (%eax), %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl (%ecx), %ebx +; X86-NEXT: movl 4(%ecx), %esi +; X86-NEXT: movl (%eax), %edx ; X86-NEXT: movl 4(%eax), %ecx -; X86-NEXT: movl (%edx), %eax -; X86-NEXT: movl 4(%edx), %edi -; X86-NEXT: cmpl %esi, %eax -; X86-NEXT: movl %edi, %edx -; X86-NEXT: sbbl %ecx, %edx -; X86-NEXT: setl %dl -; X86-NEXT: movzbl %dl, %ebx -; X86-NEXT: jl .LBB9_1 -; X86-NEXT: # %bb.2: -; X86-NEXT: movl %ecx, (%esp) # 4-byte Spill -; X86-NEXT: movl %esi, %edx -; X86-NEXT: jmp .LBB9_3 -; X86-NEXT: .LBB9_1: -; X86-NEXT: movl %edi, (%esp) # 4-byte Spill -; X86-NEXT: movl %eax, %edx -; X86-NEXT: movl %ecx, %edi -; X86-NEXT: movl %esi, %eax -; X86-NEXT: .LBB9_3: -; X86-NEXT: negl %ebx -; X86-NEXT: movl %ebx, %ebp -; X86-NEXT: orl $1, %ebp +; X86-NEXT: cmpl %ebx, %edx +; X86-NEXT: movl %ecx, %eax +; X86-NEXT: sbbl %esi, %eax +; X86-NEXT: setl %al +; X86-NEXT: movzbl %al, %edi +; X86-NEXT: negl %edi +; X86-NEXT: movl %edi, %eax +; X86-NEXT: orl $1, %eax +; X86-NEXT: movl %eax, (%esp) # 4-byte Spill +; X86-NEXT: movl %ebx, %eax ; X86-NEXT: subl %edx, %eax -; X86-NEXT: sbbl (%esp), %edi # 4-byte Folded Reload -; X86-NEXT: shrdl $1, %edi, %eax -; X86-NEXT: imull %eax, %ebx -; X86-NEXT: mull %ebp -; X86-NEXT: addl %ebx, %edx -; X86-NEXT: shrl %edi -; X86-NEXT: imull %ebp, %edi +; X86-NEXT: movl %esi, %ebp +; X86-NEXT: sbbl %ecx, %ebp +; X86-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: subl %ebx, %edx +; X86-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; X86-NEXT: sbbl %esi, %ecx +; X86-NEXT: jl .LBB9_2 +; X86-NEXT: # %bb.1: +; X86-NEXT: movl %edx, %eax +; X86-NEXT: movl %ecx, %ebp +; X86-NEXT: .LBB9_2: +; X86-NEXT: shrdl $1, %ebp, %eax +; X86-NEXT: shrl %ebp +; X86-NEXT: imull %eax, %edi +; X86-NEXT: movl (%esp), %ecx # 4-byte Reload +; X86-NEXT: mull %ecx ; X86-NEXT: addl %edi, %edx -; X86-NEXT: addl %esi, %eax -; X86-NEXT: adcl %ecx, %edx -; X86-NEXT: addl $4, %esp +; X86-NEXT: imull %ecx, %ebp +; X86-NEXT: addl %ebp, %edx +; X86-NEXT: addl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; X86-NEXT: adcl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; X86-NEXT: addl $12, %esp ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi ; X86-NEXT: popl %ebx @@ -677,9 +652,11 @@ define i16 @scalar_i16_signed_reg_reg(i16 %a1, i16 %a2) nounwind { ; X64-NEXT: setle %al ; X64-NEXT: leal -1(%rax,%rax), %ecx ; X64-NEXT: movl %edi, %eax -; X64-NEXT: cmovgl %esi, %eax -; X64-NEXT: cmovgl %edi, %esi -; X64-NEXT: subl %eax, %esi +; X64-NEXT: subl %esi, %eax +; X64-NEXT: movswl %di, %edx +; X64-NEXT: movswl %si, %esi +; X64-NEXT: subl %edx, %esi +; X64-NEXT: cmovll %eax, %esi ; X64-NEXT: movzwl %si, %eax ; X64-NEXT: shrl %eax ; X64-NEXT: imull %ecx, %eax @@ -689,28 +666,25 @@ define i16 @scalar_i16_signed_reg_reg(i16 %a1, i16 %a2) nounwind { ; ; X86-LABEL: scalar_i16_signed_reg_reg: ; X86: # %bb.0: -; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: pushl %ebx +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: xorl %edx, %edx -; X86-NEXT: cmpw %ax, %cx -; X86-NEXT: setle %dl -; X86-NEXT: leal -1(%edx,%edx), %edx -; X86-NEXT: jg .LBB10_1 -; X86-NEXT: # %bb.2: -; X86-NEXT: movl %ecx, %esi -; X86-NEXT: jmp .LBB10_3 -; X86-NEXT: .LBB10_1: -; X86-NEXT: movl %eax, %esi ; X86-NEXT: movl %ecx, %eax -; X86-NEXT: .LBB10_3: -; X86-NEXT: subl %esi, %eax +; X86-NEXT: subw %dx, %ax +; X86-NEXT: jg .LBB10_2 +; X86-NEXT: # %bb.1: +; X86-NEXT: negl %eax +; X86-NEXT: .LBB10_2: +; X86-NEXT: xorl %ebx, %ebx +; X86-NEXT: cmpw %dx, %cx +; X86-NEXT: setle %bl +; X86-NEXT: leal -1(%ebx,%ebx), %edx ; X86-NEXT: movzwl %ax, %eax ; X86-NEXT: shrl %eax ; X86-NEXT: imull %edx, %eax ; X86-NEXT: addl %ecx, %eax ; X86-NEXT: # kill: def $ax killed $ax killed $eax -; X86-NEXT: popl %esi +; X86-NEXT: popl %ebx ; X86-NEXT: retl %t3 = icmp sgt i16 %a1, %a2 ; signed %t4 = select i1 %t3, i16 -1, i16 1 @@ -726,14 +700,16 @@ define i16 @scalar_i16_signed_reg_reg(i16 %a1, i16 %a2) nounwind { define i16 @scalar_i16_unsigned_reg_reg(i16 %a1, i16 %a2) nounwind { ; X64-LABEL: scalar_i16_unsigned_reg_reg: ; X64: # %bb.0: -; X64-NEXT: xorl %eax, %eax -; X64-NEXT: cmpw %si, %di -; X64-NEXT: setbe %al -; X64-NEXT: leal -1(%rax,%rax), %ecx +; X64-NEXT: xorl %ecx, %ecx +; X64-NEXT: cmpw %di, %si +; X64-NEXT: sbbl %ecx, %ecx +; X64-NEXT: orl $1, %ecx ; X64-NEXT: movl %edi, %eax -; X64-NEXT: cmoval %esi, %eax -; X64-NEXT: cmoval %edi, %esi -; X64-NEXT: subl %eax, %esi +; X64-NEXT: subl %esi, %eax +; X64-NEXT: movzwl %di, %edx +; X64-NEXT: movzwl %si, %esi +; X64-NEXT: subl %edx, %esi +; X64-NEXT: cmovbl %eax, %esi ; X64-NEXT: movzwl %si, %eax ; X64-NEXT: shrl %eax ; X64-NEXT: imull %ecx, %eax @@ -744,24 +720,21 @@ define i16 @scalar_i16_unsigned_reg_reg(i16 %a1, i16 %a2) nounwind { ; X86-LABEL: scalar_i16_unsigned_reg_reg: ; X86: # %bb.0: ; X86-NEXT: pushl %esi -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: xorl %edx, %edx -; X86-NEXT: cmpw %ax, %cx -; X86-NEXT: setbe %dl -; X86-NEXT: leal -1(%edx,%edx), %edx -; X86-NEXT: ja .LBB11_1 -; X86-NEXT: # %bb.2: -; X86-NEXT: movl %ecx, %esi -; X86-NEXT: jmp .LBB11_3 -; X86-NEXT: .LBB11_1: -; X86-NEXT: movl %eax, %esi ; X86-NEXT: movl %ecx, %eax -; X86-NEXT: .LBB11_3: -; X86-NEXT: subl %esi, %eax +; X86-NEXT: subw %dx, %ax +; X86-NEXT: ja .LBB11_2 +; X86-NEXT: # %bb.1: +; X86-NEXT: negl %eax +; X86-NEXT: .LBB11_2: +; X86-NEXT: xorl %esi, %esi +; X86-NEXT: cmpw %cx, %dx +; X86-NEXT: sbbl %esi, %esi +; X86-NEXT: orl $1, %esi ; X86-NEXT: movzwl %ax, %eax ; X86-NEXT: shrl %eax -; X86-NEXT: imull %edx, %eax +; X86-NEXT: imull %esi, %eax ; X86-NEXT: addl %ecx, %eax ; X86-NEXT: # kill: def $ax killed $ax killed $eax ; X86-NEXT: popl %esi @@ -782,15 +755,16 @@ define i16 @scalar_i16_unsigned_reg_reg(i16 %a1, i16 %a2) nounwind { define i16 @scalar_i16_signed_mem_reg(ptr %a1_addr, i16 %a2) nounwind { ; X64-LABEL: scalar_i16_signed_mem_reg: ; X64: # %bb.0: -; X64-NEXT: movzwl (%rdi), %ecx +; X64-NEXT: movswl (%rdi), %ecx ; X64-NEXT: xorl %eax, %eax ; X64-NEXT: cmpw %si, %cx ; X64-NEXT: setle %al ; X64-NEXT: leal -1(%rax,%rax), %edx ; X64-NEXT: movl %ecx, %eax -; X64-NEXT: cmovgl %esi, %eax -; X64-NEXT: cmovgl %ecx, %esi -; X64-NEXT: subl %eax, %esi +; X64-NEXT: subl %esi, %eax +; X64-NEXT: movswl %si, %esi +; X64-NEXT: subl %ecx, %esi +; X64-NEXT: cmovll %eax, %esi ; X64-NEXT: movzwl %si, %eax ; X64-NEXT: shrl %eax ; X64-NEXT: imull %edx, %eax @@ -800,29 +774,26 @@ define i16 @scalar_i16_signed_mem_reg(ptr %a1_addr, i16 %a2) nounwind { ; ; X86-LABEL: scalar_i16_signed_mem_reg: ; X86: # %bb.0: -; X86-NEXT: pushl %esi +; X86-NEXT: pushl %ebx +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movzwl (%ecx), %ecx -; X86-NEXT: xorl %edx, %edx -; X86-NEXT: cmpw %ax, %cx -; X86-NEXT: setle %dl -; X86-NEXT: leal -1(%edx,%edx), %edx -; X86-NEXT: jg .LBB12_1 -; X86-NEXT: # %bb.2: -; X86-NEXT: movl %ecx, %esi -; X86-NEXT: jmp .LBB12_3 -; X86-NEXT: .LBB12_1: -; X86-NEXT: movl %eax, %esi +; X86-NEXT: movzwl (%eax), %ecx ; X86-NEXT: movl %ecx, %eax -; X86-NEXT: .LBB12_3: -; X86-NEXT: subl %esi, %eax +; X86-NEXT: subw %dx, %ax +; X86-NEXT: jg .LBB12_2 +; X86-NEXT: # %bb.1: +; X86-NEXT: negl %eax +; X86-NEXT: .LBB12_2: +; X86-NEXT: xorl %ebx, %ebx +; X86-NEXT: cmpw %dx, %cx +; X86-NEXT: setle %bl +; X86-NEXT: leal -1(%ebx,%ebx), %edx ; X86-NEXT: movzwl %ax, %eax ; X86-NEXT: shrl %eax ; X86-NEXT: imull %edx, %eax ; X86-NEXT: addl %ecx, %eax ; X86-NEXT: # kill: def $ax killed $ax killed $eax -; X86-NEXT: popl %esi +; X86-NEXT: popl %ebx ; X86-NEXT: retl %a1 = load i16, ptr %a1_addr %t3 = icmp sgt i16 %a1, %a2 ; signed @@ -839,15 +810,16 @@ define i16 @scalar_i16_signed_mem_reg(ptr %a1_addr, i16 %a2) nounwind { define i16 @scalar_i16_signed_reg_mem(i16 %a1, ptr %a2_addr) nounwind { ; X64-LABEL: scalar_i16_signed_reg_mem: ; X64: # %bb.0: -; X64-NEXT: movzwl (%rsi), %eax +; X64-NEXT: movswl (%rsi), %eax ; X64-NEXT: xorl %ecx, %ecx ; X64-NEXT: cmpw %ax, %di ; X64-NEXT: setle %cl ; X64-NEXT: leal -1(%rcx,%rcx), %ecx ; X64-NEXT: movl %edi, %edx -; X64-NEXT: cmovgl %eax, %edx -; X64-NEXT: cmovgl %edi, %eax -; X64-NEXT: subl %edx, %eax +; X64-NEXT: subl %eax, %edx +; X64-NEXT: movswl %di, %esi +; X64-NEXT: subl %esi, %eax +; X64-NEXT: cmovll %edx, %eax ; X64-NEXT: movzwl %ax, %eax ; X64-NEXT: shrl %eax ; X64-NEXT: imull %ecx, %eax @@ -857,29 +829,26 @@ define i16 @scalar_i16_signed_reg_mem(i16 %a1, ptr %a2_addr) nounwind { ; ; X86-LABEL: scalar_i16_signed_reg_mem: ; X86: # %bb.0: -; X86-NEXT: pushl %esi +; X86-NEXT: pushl %ebx ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movzwl (%eax), %eax -; X86-NEXT: xorl %edx, %edx -; X86-NEXT: cmpw %ax, %cx -; X86-NEXT: setle %dl -; X86-NEXT: leal -1(%edx,%edx), %edx -; X86-NEXT: jg .LBB13_1 -; X86-NEXT: # %bb.2: -; X86-NEXT: movl %ecx, %esi -; X86-NEXT: jmp .LBB13_3 -; X86-NEXT: .LBB13_1: -; X86-NEXT: movl %eax, %esi +; X86-NEXT: movzwl (%eax), %edx ; X86-NEXT: movl %ecx, %eax -; X86-NEXT: .LBB13_3: -; X86-NEXT: subl %esi, %eax +; X86-NEXT: subw %dx, %ax +; X86-NEXT: jg .LBB13_2 +; X86-NEXT: # %bb.1: +; X86-NEXT: negl %eax +; X86-NEXT: .LBB13_2: +; X86-NEXT: xorl %ebx, %ebx +; X86-NEXT: cmpw %dx, %cx +; X86-NEXT: setle %bl +; X86-NEXT: leal -1(%ebx,%ebx), %edx ; X86-NEXT: movzwl %ax, %eax ; X86-NEXT: shrl %eax ; X86-NEXT: imull %edx, %eax ; X86-NEXT: addl %ecx, %eax ; X86-NEXT: # kill: def $ax killed $ax killed $eax -; X86-NEXT: popl %esi +; X86-NEXT: popl %ebx ; X86-NEXT: retl %a2 = load i16, ptr %a2_addr %t3 = icmp sgt i16 %a1, %a2 ; signed @@ -896,16 +865,16 @@ define i16 @scalar_i16_signed_reg_mem(i16 %a1, ptr %a2_addr) nounwind { define i16 @scalar_i16_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind { ; X64-LABEL: scalar_i16_signed_mem_mem: ; X64: # %bb.0: -; X64-NEXT: movzwl (%rdi), %ecx -; X64-NEXT: movzwl (%rsi), %eax +; X64-NEXT: movswl (%rdi), %ecx +; X64-NEXT: movswl (%rsi), %eax ; X64-NEXT: xorl %edx, %edx ; X64-NEXT: cmpw %ax, %cx ; X64-NEXT: setle %dl ; X64-NEXT: leal -1(%rdx,%rdx), %edx ; X64-NEXT: movl %ecx, %esi -; X64-NEXT: cmovgl %eax, %esi -; X64-NEXT: cmovgl %ecx, %eax -; X64-NEXT: subl %esi, %eax +; X64-NEXT: subl %eax, %esi +; X64-NEXT: subl %ecx, %eax +; X64-NEXT: cmovll %esi, %eax ; X64-NEXT: movzwl %ax, %eax ; X64-NEXT: shrl %eax ; X64-NEXT: imull %edx, %eax @@ -915,30 +884,27 @@ define i16 @scalar_i16_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind { ; ; X86-LABEL: scalar_i16_signed_mem_mem: ; X86: # %bb.0: -; X86-NEXT: pushl %esi +; X86-NEXT: pushl %ebx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movzwl (%ecx), %ecx -; X86-NEXT: movzwl (%eax), %eax -; X86-NEXT: xorl %edx, %edx -; X86-NEXT: cmpw %ax, %cx -; X86-NEXT: setle %dl -; X86-NEXT: leal -1(%edx,%edx), %edx -; X86-NEXT: jg .LBB14_1 -; X86-NEXT: # %bb.2: -; X86-NEXT: movl %ecx, %esi -; X86-NEXT: jmp .LBB14_3 -; X86-NEXT: .LBB14_1: -; X86-NEXT: movl %eax, %esi +; X86-NEXT: movzwl (%eax), %edx ; X86-NEXT: movl %ecx, %eax -; X86-NEXT: .LBB14_3: -; X86-NEXT: subl %esi, %eax +; X86-NEXT: subw %dx, %ax +; X86-NEXT: jg .LBB14_2 +; X86-NEXT: # %bb.1: +; X86-NEXT: negl %eax +; X86-NEXT: .LBB14_2: +; X86-NEXT: xorl %ebx, %ebx +; X86-NEXT: cmpw %dx, %cx +; X86-NEXT: setle %bl +; X86-NEXT: leal -1(%ebx,%ebx), %edx ; X86-NEXT: movzwl %ax, %eax ; X86-NEXT: shrl %eax ; X86-NEXT: imull %edx, %eax ; X86-NEXT: addl %ecx, %eax ; X86-NEXT: # kill: def $ax killed $ax killed $eax -; X86-NEXT: popl %esi +; X86-NEXT: popl %ebx ; X86-NEXT: retl %a1 = load i16, ptr %a1_addr %a2 = load i16, ptr %a2_addr @@ -962,38 +928,36 @@ define i16 @scalar_i16_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind { define i8 @scalar_i8_signed_reg_reg(i8 %a1, i8 %a2) nounwind { ; X64-LABEL: scalar_i8_signed_reg_reg: ; X64: # %bb.0: -; X64-NEXT: movl %esi, %eax -; X64-NEXT: cmpb %al, %dil +; X64-NEXT: cmpb %sil, %dil ; X64-NEXT: setg %cl -; X64-NEXT: movl %edi, %edx -; X64-NEXT: cmovgl %esi, %edx -; X64-NEXT: cmovgl %edi, %eax ; X64-NEXT: negb %cl ; X64-NEXT: orb $1, %cl -; X64-NEXT: subb %dl, %al +; X64-NEXT: movsbl %dil, %edx +; X64-NEXT: subl %esi, %edi +; X64-NEXT: movsbl %sil, %eax +; X64-NEXT: subl %edx, %eax +; X64-NEXT: cmovll %edi, %eax ; X64-NEXT: shrb %al ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: mulb %cl -; X64-NEXT: addb %dil, %al +; X64-NEXT: addb %dl, %al ; X64-NEXT: retq ; ; X86-LABEL: scalar_i8_signed_reg_reg: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: cmpb %al, %cl +; X86-NEXT: movb {{[0-9]+}}(%esp), %ah +; X86-NEXT: cmpb %ah, %cl ; X86-NEXT: setg %dl -; X86-NEXT: jg .LBB15_1 -; X86-NEXT: # %bb.2: -; X86-NEXT: movb %cl, %ah -; X86-NEXT: jmp .LBB15_3 -; X86-NEXT: .LBB15_1: -; X86-NEXT: movb %al, %ah -; X86-NEXT: movb %cl, %al -; X86-NEXT: .LBB15_3: -; X86-NEXT: subb %ah, %al ; X86-NEXT: negb %dl ; X86-NEXT: orb $1, %dl +; X86-NEXT: movb %cl, %al +; X86-NEXT: subb %ah, %al +; X86-NEXT: jg .LBB15_2 +; X86-NEXT: # %bb.1: +; X86-NEXT: subb %cl, %ah +; X86-NEXT: movb %ah, %al +; X86-NEXT: .LBB15_2: ; X86-NEXT: shrb %al ; X86-NEXT: mulb %dl ; X86-NEXT: addb %cl, %al @@ -1012,38 +976,36 @@ define i8 @scalar_i8_signed_reg_reg(i8 %a1, i8 %a2) nounwind { define i8 @scalar_i8_unsigned_reg_reg(i8 %a1, i8 %a2) nounwind { ; X64-LABEL: scalar_i8_unsigned_reg_reg: ; X64: # %bb.0: -; X64-NEXT: movl %esi, %eax -; X64-NEXT: cmpb %al, %dil -; X64-NEXT: seta %cl -; X64-NEXT: movl %edi, %edx -; X64-NEXT: cmoval %esi, %edx -; X64-NEXT: cmoval %edi, %eax -; X64-NEXT: negb %cl +; X64-NEXT: xorl %ecx, %ecx +; X64-NEXT: cmpb %dil, %sil +; X64-NEXT: sbbl %ecx, %ecx ; X64-NEXT: orb $1, %cl -; X64-NEXT: subb %dl, %al +; X64-NEXT: movzbl %dil, %edx +; X64-NEXT: subl %esi, %edi +; X64-NEXT: movzbl %sil, %eax +; X64-NEXT: subl %edx, %eax +; X64-NEXT: cmovbl %edi, %eax ; X64-NEXT: shrb %al ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: mulb %cl -; X64-NEXT: addb %dil, %al +; X64-NEXT: addb %dl, %al ; X64-NEXT: retq ; ; X86-LABEL: scalar_i8_unsigned_reg_reg: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: cmpb %al, %cl -; X86-NEXT: seta %dl -; X86-NEXT: ja .LBB16_1 -; X86-NEXT: # %bb.2: -; X86-NEXT: movb %cl, %ah -; X86-NEXT: jmp .LBB16_3 -; X86-NEXT: .LBB16_1: -; X86-NEXT: movb %al, %ah -; X86-NEXT: movb %cl, %al -; X86-NEXT: .LBB16_3: -; X86-NEXT: subb %ah, %al -; X86-NEXT: negb %dl +; X86-NEXT: movb {{[0-9]+}}(%esp), %ch +; X86-NEXT: xorl %edx, %edx +; X86-NEXT: movb %ch, %ah +; X86-NEXT: subb %cl, %ah +; X86-NEXT: sbbl %edx, %edx ; X86-NEXT: orb $1, %dl +; X86-NEXT: movb %cl, %al +; X86-NEXT: subb %ch, %al +; X86-NEXT: ja .LBB16_2 +; X86-NEXT: # %bb.1: +; X86-NEXT: movb %ah, %al +; X86-NEXT: .LBB16_2: ; X86-NEXT: shrb %al ; X86-NEXT: mulb %dl ; X86-NEXT: addb %cl, %al @@ -1064,16 +1026,16 @@ define i8 @scalar_i8_unsigned_reg_reg(i8 %a1, i8 %a2) nounwind { define i8 @scalar_i8_signed_mem_reg(ptr %a1_addr, i8 %a2) nounwind { ; X64-LABEL: scalar_i8_signed_mem_reg: ; X64: # %bb.0: -; X64-NEXT: movzbl (%rdi), %ecx +; X64-NEXT: movsbl (%rdi), %ecx ; X64-NEXT: cmpb %sil, %cl ; X64-NEXT: setg %dl -; X64-NEXT: movl %ecx, %edi -; X64-NEXT: cmovgl %esi, %edi -; X64-NEXT: movl %ecx, %eax -; X64-NEXT: cmovlel %esi, %eax ; X64-NEXT: negb %dl ; X64-NEXT: orb $1, %dl -; X64-NEXT: subb %dil, %al +; X64-NEXT: movl %ecx, %edi +; X64-NEXT: subl %esi, %edi +; X64-NEXT: movsbl %sil, %eax +; X64-NEXT: subl %ecx, %eax +; X64-NEXT: cmovll %edi, %eax ; X64-NEXT: shrb %al ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: mulb %dl @@ -1082,22 +1044,20 @@ define i8 @scalar_i8_signed_mem_reg(ptr %a1_addr, i8 %a2) nounwind { ; ; X86-LABEL: scalar_i8_signed_mem_reg: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movb {{[0-9]+}}(%esp), %ah ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movzbl (%ecx), %ecx -; X86-NEXT: cmpb %al, %cl +; X86-NEXT: cmpb %ah, %cl ; X86-NEXT: setg %dl -; X86-NEXT: jg .LBB17_1 -; X86-NEXT: # %bb.2: -; X86-NEXT: movb %cl, %ah -; X86-NEXT: jmp .LBB17_3 -; X86-NEXT: .LBB17_1: -; X86-NEXT: movb %al, %ah -; X86-NEXT: movb %cl, %al -; X86-NEXT: .LBB17_3: -; X86-NEXT: subb %ah, %al ; X86-NEXT: negb %dl ; X86-NEXT: orb $1, %dl +; X86-NEXT: movb %cl, %al +; X86-NEXT: subb %ah, %al +; X86-NEXT: jg .LBB17_2 +; X86-NEXT: # %bb.1: +; X86-NEXT: subb %cl, %ah +; X86-NEXT: movb %ah, %al +; X86-NEXT: .LBB17_2: ; X86-NEXT: shrb %al ; X86-NEXT: mulb %dl ; X86-NEXT: addb %cl, %al @@ -1117,39 +1077,37 @@ define i8 @scalar_i8_signed_mem_reg(ptr %a1_addr, i8 %a2) nounwind { define i8 @scalar_i8_signed_reg_mem(i8 %a1, ptr %a2_addr) nounwind { ; X64-LABEL: scalar_i8_signed_reg_mem: ; X64: # %bb.0: -; X64-NEXT: movzbl (%rsi), %eax +; X64-NEXT: movsbl (%rsi), %eax ; X64-NEXT: cmpb %al, %dil ; X64-NEXT: setg %cl -; X64-NEXT: movl %edi, %edx -; X64-NEXT: cmovgl %eax, %edx -; X64-NEXT: cmovgl %edi, %eax ; X64-NEXT: negb %cl ; X64-NEXT: orb $1, %cl -; X64-NEXT: subb %dl, %al +; X64-NEXT: movsbl %dil, %edx +; X64-NEXT: subl %eax, %edi +; X64-NEXT: subl %edx, %eax +; X64-NEXT: cmovll %edi, %eax ; X64-NEXT: shrb %al ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: mulb %cl -; X64-NEXT: addb %dil, %al +; X64-NEXT: addb %dl, %al ; X64-NEXT: retq ; ; X86-LABEL: scalar_i8_signed_reg_mem: ; X86: # %bb.0: ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movzbl (%eax), %eax -; X86-NEXT: cmpb %al, %cl +; X86-NEXT: movb (%eax), %ah +; X86-NEXT: cmpb %ah, %cl ; X86-NEXT: setg %dl -; X86-NEXT: jg .LBB18_1 -; X86-NEXT: # %bb.2: -; X86-NEXT: movb %cl, %ah -; X86-NEXT: jmp .LBB18_3 -; X86-NEXT: .LBB18_1: -; X86-NEXT: movb %al, %ah -; X86-NEXT: movb %cl, %al -; X86-NEXT: .LBB18_3: -; X86-NEXT: subb %ah, %al ; X86-NEXT: negb %dl ; X86-NEXT: orb $1, %dl +; X86-NEXT: movb %cl, %al +; X86-NEXT: subb %ah, %al +; X86-NEXT: jg .LBB18_2 +; X86-NEXT: # %bb.1: +; X86-NEXT: subb %cl, %ah +; X86-NEXT: movb %ah, %al +; X86-NEXT: .LBB18_2: ; X86-NEXT: shrb %al ; X86-NEXT: mulb %dl ; X86-NEXT: addb %cl, %al @@ -1169,16 +1127,16 @@ define i8 @scalar_i8_signed_reg_mem(i8 %a1, ptr %a2_addr) nounwind { define i8 @scalar_i8_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind { ; X64-LABEL: scalar_i8_signed_mem_mem: ; X64: # %bb.0: -; X64-NEXT: movzbl (%rdi), %ecx -; X64-NEXT: movzbl (%rsi), %eax +; X64-NEXT: movsbl (%rdi), %ecx +; X64-NEXT: movsbl (%rsi), %eax ; X64-NEXT: cmpb %al, %cl ; X64-NEXT: setg %dl -; X64-NEXT: movl %ecx, %esi -; X64-NEXT: cmovgl %eax, %esi -; X64-NEXT: cmovgl %ecx, %eax ; X64-NEXT: negb %dl ; X64-NEXT: orb $1, %dl -; X64-NEXT: subb %sil, %al +; X64-NEXT: movl %ecx, %esi +; X64-NEXT: subl %eax, %esi +; X64-NEXT: subl %ecx, %eax +; X64-NEXT: cmovll %esi, %eax ; X64-NEXT: shrb %al ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: mulb %dl @@ -1190,20 +1148,18 @@ define i8 @scalar_i8_signed_mem_mem(ptr %a1_addr, ptr %a2_addr) nounwind { ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movzbl (%ecx), %ecx -; X86-NEXT: movzbl (%eax), %eax -; X86-NEXT: cmpb %al, %cl +; X86-NEXT: movb (%eax), %ah +; X86-NEXT: cmpb %ah, %cl ; X86-NEXT: setg %dl -; X86-NEXT: jg .LBB19_1 -; X86-NEXT: # %bb.2: -; X86-NEXT: movb %cl, %ah -; X86-NEXT: jmp .LBB19_3 -; X86-NEXT: .LBB19_1: -; X86-NEXT: movb %al, %ah -; X86-NEXT: movb %cl, %al -; X86-NEXT: .LBB19_3: -; X86-NEXT: subb %ah, %al ; X86-NEXT: negb %dl ; X86-NEXT: orb $1, %dl +; X86-NEXT: movb %cl, %al +; X86-NEXT: subb %ah, %al +; X86-NEXT: jg .LBB19_2 +; X86-NEXT: # %bb.1: +; X86-NEXT: subb %cl, %ah +; X86-NEXT: movb %ah, %al +; X86-NEXT: .LBB19_2: ; X86-NEXT: shrb %al ; X86-NEXT: mulb %dl ; X86-NEXT: addb %cl, %al diff --git a/llvm/unittests/CodeGen/SelectionDAGPatternMatchTest.cpp b/llvm/unittests/CodeGen/SelectionDAGPatternMatchTest.cpp index e404dd08b5f1e..7400b6c1984f7 100644 --- a/llvm/unittests/CodeGen/SelectionDAGPatternMatchTest.cpp +++ b/llvm/unittests/CodeGen/SelectionDAGPatternMatchTest.cpp @@ -201,6 +201,23 @@ TEST_F(SelectionDAGPatternMatchTest, matchBinaryOp) { SDValue UMax = DAG->getNode(ISD::UMAX, DL, Int32VT, Op0, Op1); SDValue UMin = DAG->getNode(ISD::UMIN, DL, Int32VT, Op1, Op0); + SDValue ICMP_GT = DAG->getSetCC(DL, MVT::i1, Op0, Op1, ISD::SETGT); + SDValue ICMP_GE = DAG->getSetCC(DL, MVT::i1, Op0, Op1, ISD::SETGE); + SDValue ICMP_UGT = DAG->getSetCC(DL, MVT::i1, Op0, Op1, ISD::SETUGT); + SDValue ICMP_UGE = DAG->getSetCC(DL, MVT::i1, Op0, Op1, ISD::SETUGE); + SDValue ICMP_LT = DAG->getSetCC(DL, MVT::i1, Op0, Op1, ISD::SETLT); + SDValue ICMP_LE = DAG->getSetCC(DL, MVT::i1, Op0, Op1, ISD::SETLE); + SDValue ICMP_ULT = DAG->getSetCC(DL, MVT::i1, Op0, Op1, ISD::SETULT); + SDValue ICMP_ULE = DAG->getSetCC(DL, MVT::i1, Op0, Op1, ISD::SETULE); + SDValue SMaxLikeGT = DAG->getSelect(DL, MVT::i32, ICMP_GT, Op0, Op1); + SDValue SMaxLikeGE = DAG->getSelect(DL, MVT::i32, ICMP_GE, Op0, Op1); + SDValue UMaxLikeUGT = DAG->getSelect(DL, MVT::i32, ICMP_UGT, Op0, Op1); + SDValue UMaxLikeUGE = DAG->getSelect(DL, MVT::i32, ICMP_UGE, Op0, Op1); + SDValue SMinLikeLT = DAG->getSelect(DL, MVT::i32, ICMP_LT, Op0, Op1); + SDValue SMinLikeLE = DAG->getSelect(DL, MVT::i32, ICMP_LE, Op0, Op1); + SDValue UMinLikeULT = DAG->getSelect(DL, MVT::i32, ICMP_ULT, Op0, Op1); + SDValue UMinLikeULE = DAG->getSelect(DL, MVT::i32, ICMP_ULE, Op0, Op1); + SDValue SFAdd = DAG->getNode(ISD::STRICT_FADD, DL, {Float32VT, MVT::Other}, {DAG->getEntryNode(), Op2, Op2}); @@ -231,12 +248,24 @@ TEST_F(SelectionDAGPatternMatchTest, matchBinaryOp) { EXPECT_TRUE(sd_match(SMax, m_c_BinOp(ISD::SMAX, m_Value(), m_Value()))); EXPECT_TRUE(sd_match(SMax, m_SMax(m_Value(), m_Value()))); + EXPECT_TRUE(sd_match(SMax, m_SMaxLike(m_Value(), m_Value()))); + EXPECT_TRUE(sd_match(SMaxLikeGT, m_SMaxLike(m_Value(), m_Value()))); + EXPECT_TRUE(sd_match(SMaxLikeGE, m_SMaxLike(m_Value(), m_Value()))); EXPECT_TRUE(sd_match(SMin, m_c_BinOp(ISD::SMIN, m_Value(), m_Value()))); EXPECT_TRUE(sd_match(SMin, m_SMin(m_Value(), m_Value()))); + EXPECT_TRUE(sd_match(SMin, m_SMinLike(m_Value(), m_Value()))); + EXPECT_TRUE(sd_match(SMinLikeLT, m_SMinLike(m_Value(), m_Value()))); + EXPECT_TRUE(sd_match(SMinLikeLE, m_SMinLike(m_Value(), m_Value()))); EXPECT_TRUE(sd_match(UMax, m_c_BinOp(ISD::UMAX, m_Value(), m_Value()))); EXPECT_TRUE(sd_match(UMax, m_UMax(m_Value(), m_Value()))); + EXPECT_TRUE(sd_match(UMax, m_UMaxLike(m_Value(), m_Value()))); + EXPECT_TRUE(sd_match(UMaxLikeUGT, m_UMaxLike(m_Value(), m_Value()))); + EXPECT_TRUE(sd_match(UMaxLikeUGE, m_UMaxLike(m_Value(), m_Value()))); EXPECT_TRUE(sd_match(UMin, m_c_BinOp(ISD::UMIN, m_Value(), m_Value()))); EXPECT_TRUE(sd_match(UMin, m_UMin(m_Value(), m_Value()))); + EXPECT_TRUE(sd_match(UMin, m_UMinLike(m_Value(), m_Value()))); + EXPECT_TRUE(sd_match(UMinLikeULT, m_UMinLike(m_Value(), m_Value()))); + EXPECT_TRUE(sd_match(UMinLikeULE, m_UMinLike(m_Value(), m_Value()))); SDValue BindVal; EXPECT_TRUE(sd_match(SFAdd, m_ChainedBinOp(ISD::STRICT_FADD, m_Value(BindVal),