Skip to content

Conversation

@mshockwave
Copy link
Member

This is the GISel version of #120666. Also supports both unsigned and signed version of min & max.

@llvmbot
Copy link
Member

llvmbot commented Dec 23, 2024

@llvm/pr-subscribers-backend-risc-v

Author: Min-Yih Hsu (mshockwave)

Changes

This is the GISel version of #120666. Also supports both unsigned and signed version of min & max.


Full diff: https://github.com/llvm/llvm-project/pull/120998.diff

7 Files Affected:

  • (modified) llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h (+3)
  • (modified) llvm/include/llvm/CodeGen/GlobalISel/Utils.h (+4)
  • (modified) llvm/include/llvm/Target/GlobalISel/Combine.td (+7-1)
  • (modified) llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp (+36)
  • (modified) llvm/lib/CodeGen/GlobalISel/Utils.cpp (+15)
  • (modified) llvm/lib/Target/RISCV/RISCVCombine.td (+1-1)
  • (added) llvm/test/CodeGen/RISCV/GlobalISel/combine-neg-abs.ll (+457)
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index 871456d2a55b5e..94e36e412b0cf7 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -864,6 +864,9 @@ class CombinerHelper {
   /// Combine select to integer min/max.
   bool matchSelectIMinMax(const MachineOperand &MO, BuildFnTy &MatchInfo) const;
 
+  /// Tranform (neg (min/max x, (neg x))) into (max/min x, (neg x)).
+  bool matchSimplifyNegMinMax(MachineInstr &MI, BuildFnTy &MatchInfo) const;
+
   /// Combine selects.
   bool matchSelect(MachineInstr &MI, BuildFnTy &MatchInfo) const;
 
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
index 37653631cc2388..072febd09c63e2 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
@@ -171,6 +171,10 @@ void reportGISelWarning(MachineFunction &MF, const TargetPassConfig &TPC,
                         MachineOptimizationRemarkEmitter &MORE,
                         MachineOptimizationRemarkMissed &R);
 
+/// Returns the inverse opcode of \p MinMaxOpc , which is a generic min/max
+/// opcode like G_SMIN.
+unsigned getInverseGMinMaxOpcode(unsigned MinMaxOpc);
+
 /// If \p VReg is defined by a G_CONSTANT, return the corresponding value.
 std::optional<APInt> getIConstantVRegVal(Register VReg,
                                          const MachineRegisterInfo &MRI);
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 013c3a6ed83d8b..8641eabbdd84c6 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -1372,6 +1372,12 @@ def select_to_iminmax: GICombineRule<
          [{ return Helper.matchSelectIMinMax(${root}, ${info}); }]),
   (apply [{ Helper.applyBuildFnMO(${root}, ${info}); }])>;
 
+def simplify_neg_minmax : GICombineRule<
+  (defs root:$root, build_fn_matchinfo:$matchinfo),
+  (match (wip_match_opcode G_SUB):$root,
+         [{ return Helper.matchSimplifyNegMinMax(*${root}, ${matchinfo}); }]),
+  (apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])>;
+
 def match_selects : GICombineRule<
   (defs root:$root, build_fn_matchinfo:$matchinfo),
   (match (wip_match_opcode G_SELECT):$root,
@@ -2008,7 +2014,7 @@ def all_combines : GICombineGroup<[integer_reassoc_combines, trivial_combines,
     and_or_disjoint_mask, fma_combines, fold_binop_into_select,
     sub_add_reg, select_to_minmax,
     fsub_to_fneg, commute_constant_to_rhs, match_ands, match_ors,
-    combine_concat_vector,
+    simplify_neg_minmax, combine_concat_vector,
     sext_trunc, zext_trunc, prefer_sign_combines, shuffle_combines,
     combine_use_vector_truncate, merge_combines, overflow_combines]>;
 
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index c20e9d0c6876ee..2ddf3f617bd92f 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -7045,6 +7045,42 @@ bool CombinerHelper::matchSelectIMinMax(const MachineOperand &MO,
   }
 }
 
+// (neg (min/max x, (neg x))) --> (max/min x, (neg x))
+bool CombinerHelper::matchSimplifyNegMinMax(MachineInstr &MI,
+                                            BuildFnTy &MatchInfo) const {
+  assert(MI.getOpcode() == TargetOpcode::G_SUB);
+  Register DestReg = MI.getOperand(0).getReg();
+  LLT DestTy = MRI.getType(DestReg);
+  if (!isLegal({TargetOpcode::G_SUB, {DestTy}}))
+    return false;
+
+  // GISel doesn't have m_Deferred at this moment, so we have to
+  // match this pattern in two phases.
+  Register X, Y;
+  Register Sub0;
+  if (mi_match(DestReg, MRI,
+               m_Neg(m_OneUse(m_any_of(
+                   m_GSMin(m_Reg(X), m_Reg(Y)), m_GSMax(m_Reg(X), m_Reg(Y)),
+                   m_CommutativeBinOp(TargetOpcode::G_UMIN, m_Reg(X), m_Reg(Y)),
+                   m_CommutativeBinOp(TargetOpcode::G_UMAX, m_Reg(X),
+                                      m_Reg(Y)))))) &&
+      (mi_match(Y, MRI, m_all_of(m_Neg(m_SpecificReg(X)), m_Reg(Sub0))) ||
+       mi_match(X, MRI, m_all_of(m_Neg(m_SpecificReg(Y)), m_Reg(Sub0))))) {
+    MachineInstr *MinMaxMI = MRI.getVRegDef(MI.getOperand(2).getReg());
+    MachineInstr *Sub0MI = MRI.getVRegDef(Sub0);
+    X = Sub0MI->getOperand(2).getReg();
+    unsigned NewOpc = getInverseGMinMaxOpcode(MinMaxMI->getOpcode());
+    if (isLegal({NewOpc, {DestTy}})) {
+      MatchInfo = [=](MachineIRBuilder &B) {
+        B.buildInstr(NewOpc, {DestReg}, {X, Sub0});
+      };
+      return true;
+    }
+  }
+
+  return false;
+}
+
 bool CombinerHelper::matchSelect(MachineInstr &MI, BuildFnTy &MatchInfo) const {
   GSelect *Select = cast<GSelect>(&MI);
 
diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
index 8c1e41ea106eca..0ce08c3ea3dcbb 100644
--- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -276,6 +276,21 @@ void llvm::reportGISelFailure(MachineFunction &MF, const TargetPassConfig &TPC,
   reportGISelFailure(MF, TPC, MORE, R);
 }
 
+unsigned llvm::getInverseGMinMaxOpcode(unsigned MinMaxOpc) {
+  switch (MinMaxOpc) {
+  case TargetOpcode::G_SMIN:
+    return TargetOpcode::G_SMAX;
+  case TargetOpcode::G_SMAX:
+    return TargetOpcode::G_SMIN;
+  case TargetOpcode::G_UMIN:
+    return TargetOpcode::G_UMAX;
+  case TargetOpcode::G_UMAX:
+    return TargetOpcode::G_UMIN;
+  default:
+    llvm_unreachable("unrecognized opcode");
+  }
+}
+
 std::optional<APInt> llvm::getIConstantVRegVal(Register VReg,
                                                const MachineRegisterInfo &MRI) {
   std::optional<ValueAndVReg> ValAndVReg = getIConstantVRegValWithLookThrough(
diff --git a/llvm/lib/Target/RISCV/RISCVCombine.td b/llvm/lib/Target/RISCV/RISCVCombine.td
index 030613a7d89040..995dd0c5d82eba 100644
--- a/llvm/lib/Target/RISCV/RISCVCombine.td
+++ b/llvm/lib/Target/RISCV/RISCVCombine.td
@@ -25,5 +25,5 @@ def RISCVPostLegalizerCombiner
     : GICombiner<"RISCVPostLegalizerCombinerImpl",
                  [sub_to_add, combines_for_extload, redundant_and,
                   identity_combines, shift_immed_chain,
-                  commute_constant_to_rhs]> {
+                  commute_constant_to_rhs, simplify_neg_minmax]> {
 }
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/combine-neg-abs.ll b/llvm/test/CodeGen/RISCV/GlobalISel/combine-neg-abs.ll
new file mode 100644
index 00000000000000..6c848ecf0fffdb
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/combine-neg-abs.ll
@@ -0,0 +1,457 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=riscv32 -global-isel -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s --check-prefixes=RV32I
+; RUN: llc -mtriple=riscv32 -global-isel -mattr=+zbb -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s --check-prefixes=RV32ZBB
+; RUN: llc -mtriple=riscv64 -global-isel -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s --check-prefixes=RV64I
+; RUN: llc -mtriple=riscv64 -global-isel -mattr=+zbb -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s --check-prefixes=RV64ZBB
+
+define i32 @expanded_neg_abs32(i32 %x) {
+; RV32I-LABEL: expanded_neg_abs32:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    neg a1, a0
+; RV32I-NEXT:    blt a0, a1, .LBB0_2
+; RV32I-NEXT:  # %bb.1:
+; RV32I-NEXT:    mv a1, a0
+; RV32I-NEXT:  .LBB0_2:
+; RV32I-NEXT:    neg a0, a1
+; RV32I-NEXT:    ret
+;
+; RV32ZBB-LABEL: expanded_neg_abs32:
+; RV32ZBB:       # %bb.0:
+; RV32ZBB-NEXT:    neg a1, a0
+; RV32ZBB-NEXT:    min a0, a0, a1
+; RV32ZBB-NEXT:    ret
+;
+; RV64I-LABEL: expanded_neg_abs32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    neg a1, a0
+; RV64I-NEXT:    sext.w a2, a1
+; RV64I-NEXT:    sext.w a3, a0
+; RV64I-NEXT:    blt a3, a2, .LBB0_2
+; RV64I-NEXT:  # %bb.1:
+; RV64I-NEXT:    mv a1, a0
+; RV64I-NEXT:  .LBB0_2:
+; RV64I-NEXT:    neg a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: expanded_neg_abs32:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    negw a1, a0
+; RV64ZBB-NEXT:    sext.w a0, a0
+; RV64ZBB-NEXT:    max a0, a1, a0
+; RV64ZBB-NEXT:    neg a0, a0
+; RV64ZBB-NEXT:    ret
+  %n = sub i32 0, %x
+  %t = call i32 @llvm.smax.i32(i32 %n, i32 %x)
+  %r = sub i32 0, %t
+  ret i32 %r
+}
+
+define i32 @expanded_neg_abs32_unsigned(i32 %x) {
+; RV32I-LABEL: expanded_neg_abs32_unsigned:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    neg a1, a0
+; RV32I-NEXT:    bltu a0, a1, .LBB1_2
+; RV32I-NEXT:  # %bb.1:
+; RV32I-NEXT:    mv a1, a0
+; RV32I-NEXT:  .LBB1_2:
+; RV32I-NEXT:    neg a0, a1
+; RV32I-NEXT:    ret
+;
+; RV32ZBB-LABEL: expanded_neg_abs32_unsigned:
+; RV32ZBB:       # %bb.0:
+; RV32ZBB-NEXT:    neg a1, a0
+; RV32ZBB-NEXT:    minu a0, a0, a1
+; RV32ZBB-NEXT:    ret
+;
+; RV64I-LABEL: expanded_neg_abs32_unsigned:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    neg a1, a0
+; RV64I-NEXT:    sext.w a2, a1
+; RV64I-NEXT:    sext.w a3, a0
+; RV64I-NEXT:    bltu a3, a2, .LBB1_2
+; RV64I-NEXT:  # %bb.1:
+; RV64I-NEXT:    mv a1, a0
+; RV64I-NEXT:  .LBB1_2:
+; RV64I-NEXT:    neg a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: expanded_neg_abs32_unsigned:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    negw a1, a0
+; RV64ZBB-NEXT:    sext.w a0, a0
+; RV64ZBB-NEXT:    maxu a0, a1, a0
+; RV64ZBB-NEXT:    neg a0, a0
+; RV64ZBB-NEXT:    ret
+  %n = sub i32 0, %x
+  %t = call i32 @llvm.umax.i32(i32 %n, i32 %x)
+  %r = sub i32 0, %t
+  ret i32 %r
+}
+
+define i64 @expanded_neg_abs64(i64 %x) {
+; RV32I-LABEL: expanded_neg_abs64:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    snez a2, a0
+; RV32I-NEXT:    neg a3, a1
+; RV32I-NEXT:    sub a2, a3, a2
+; RV32I-NEXT:    neg a3, a0
+; RV32I-NEXT:    beq a2, a1, .LBB2_2
+; RV32I-NEXT:  # %bb.1:
+; RV32I-NEXT:    slt a4, a1, a2
+; RV32I-NEXT:    beqz a4, .LBB2_3
+; RV32I-NEXT:    j .LBB2_4
+; RV32I-NEXT:  .LBB2_2:
+; RV32I-NEXT:    sltu a4, a0, a3
+; RV32I-NEXT:    bnez a4, .LBB2_4
+; RV32I-NEXT:  .LBB2_3:
+; RV32I-NEXT:    mv a3, a0
+; RV32I-NEXT:    mv a2, a1
+; RV32I-NEXT:  .LBB2_4:
+; RV32I-NEXT:    neg a0, a3
+; RV32I-NEXT:    snez a1, a3
+; RV32I-NEXT:    neg a2, a2
+; RV32I-NEXT:    sub a1, a2, a1
+; RV32I-NEXT:    ret
+;
+; RV32ZBB-LABEL: expanded_neg_abs64:
+; RV32ZBB:       # %bb.0:
+; RV32ZBB-NEXT:    snez a2, a0
+; RV32ZBB-NEXT:    neg a3, a1
+; RV32ZBB-NEXT:    sub a2, a3, a2
+; RV32ZBB-NEXT:    neg a3, a0
+; RV32ZBB-NEXT:    beq a2, a1, .LBB2_2
+; RV32ZBB-NEXT:  # %bb.1:
+; RV32ZBB-NEXT:    slt a4, a1, a2
+; RV32ZBB-NEXT:    beqz a4, .LBB2_3
+; RV32ZBB-NEXT:    j .LBB2_4
+; RV32ZBB-NEXT:  .LBB2_2:
+; RV32ZBB-NEXT:    sltu a4, a0, a3
+; RV32ZBB-NEXT:    bnez a4, .LBB2_4
+; RV32ZBB-NEXT:  .LBB2_3:
+; RV32ZBB-NEXT:    mv a3, a0
+; RV32ZBB-NEXT:    mv a2, a1
+; RV32ZBB-NEXT:  .LBB2_4:
+; RV32ZBB-NEXT:    neg a0, a3
+; RV32ZBB-NEXT:    snez a1, a3
+; RV32ZBB-NEXT:    neg a2, a2
+; RV32ZBB-NEXT:    sub a1, a2, a1
+; RV32ZBB-NEXT:    ret
+;
+; RV64I-LABEL: expanded_neg_abs64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    neg a1, a0
+; RV64I-NEXT:    blt a0, a1, .LBB2_2
+; RV64I-NEXT:  # %bb.1:
+; RV64I-NEXT:    mv a1, a0
+; RV64I-NEXT:  .LBB2_2:
+; RV64I-NEXT:    neg a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: expanded_neg_abs64:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    neg a1, a0
+; RV64ZBB-NEXT:    min a0, a0, a1
+; RV64ZBB-NEXT:    ret
+  %n = sub i64 0, %x
+  %t = call i64 @llvm.smax.i64(i64 %n, i64 %x)
+  %r = sub i64 0, %t
+  ret i64 %r
+}
+
+define i64 @expanded_neg_abs64_unsigned(i64 %x) {
+; RV32I-LABEL: expanded_neg_abs64_unsigned:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    snez a2, a0
+; RV32I-NEXT:    neg a3, a1
+; RV32I-NEXT:    sub a2, a3, a2
+; RV32I-NEXT:    neg a3, a0
+; RV32I-NEXT:    beq a2, a1, .LBB3_2
+; RV32I-NEXT:  # %bb.1:
+; RV32I-NEXT:    sltu a4, a1, a2
+; RV32I-NEXT:    beqz a4, .LBB3_3
+; RV32I-NEXT:    j .LBB3_4
+; RV32I-NEXT:  .LBB3_2:
+; RV32I-NEXT:    sltu a4, a0, a3
+; RV32I-NEXT:    bnez a4, .LBB3_4
+; RV32I-NEXT:  .LBB3_3:
+; RV32I-NEXT:    mv a3, a0
+; RV32I-NEXT:    mv a2, a1
+; RV32I-NEXT:  .LBB3_4:
+; RV32I-NEXT:    neg a0, a3
+; RV32I-NEXT:    snez a1, a3
+; RV32I-NEXT:    neg a2, a2
+; RV32I-NEXT:    sub a1, a2, a1
+; RV32I-NEXT:    ret
+;
+; RV32ZBB-LABEL: expanded_neg_abs64_unsigned:
+; RV32ZBB:       # %bb.0:
+; RV32ZBB-NEXT:    snez a2, a0
+; RV32ZBB-NEXT:    neg a3, a1
+; RV32ZBB-NEXT:    sub a2, a3, a2
+; RV32ZBB-NEXT:    neg a3, a0
+; RV32ZBB-NEXT:    beq a2, a1, .LBB3_2
+; RV32ZBB-NEXT:  # %bb.1:
+; RV32ZBB-NEXT:    sltu a4, a1, a2
+; RV32ZBB-NEXT:    beqz a4, .LBB3_3
+; RV32ZBB-NEXT:    j .LBB3_4
+; RV32ZBB-NEXT:  .LBB3_2:
+; RV32ZBB-NEXT:    sltu a4, a0, a3
+; RV32ZBB-NEXT:    bnez a4, .LBB3_4
+; RV32ZBB-NEXT:  .LBB3_3:
+; RV32ZBB-NEXT:    mv a3, a0
+; RV32ZBB-NEXT:    mv a2, a1
+; RV32ZBB-NEXT:  .LBB3_4:
+; RV32ZBB-NEXT:    neg a0, a3
+; RV32ZBB-NEXT:    snez a1, a3
+; RV32ZBB-NEXT:    neg a2, a2
+; RV32ZBB-NEXT:    sub a1, a2, a1
+; RV32ZBB-NEXT:    ret
+;
+; RV64I-LABEL: expanded_neg_abs64_unsigned:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    neg a1, a0
+; RV64I-NEXT:    bltu a0, a1, .LBB3_2
+; RV64I-NEXT:  # %bb.1:
+; RV64I-NEXT:    mv a1, a0
+; RV64I-NEXT:  .LBB3_2:
+; RV64I-NEXT:    neg a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: expanded_neg_abs64_unsigned:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    neg a1, a0
+; RV64ZBB-NEXT:    minu a0, a0, a1
+; RV64ZBB-NEXT:    ret
+  %n = sub i64 0, %x
+  %t = call i64 @llvm.umax.i64(i64 %n, i64 %x)
+  %r = sub i64 0, %t
+  ret i64 %r
+}
+
+define i32 @expanded_neg_inv_abs32(i32 %x) {
+; RV32I-LABEL: expanded_neg_inv_abs32:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    neg a1, a0
+; RV32I-NEXT:    blt a1, a0, .LBB4_2
+; RV32I-NEXT:  # %bb.1:
+; RV32I-NEXT:    mv a1, a0
+; RV32I-NEXT:  .LBB4_2:
+; RV32I-NEXT:    neg a0, a1
+; RV32I-NEXT:    ret
+;
+; RV32ZBB-LABEL: expanded_neg_inv_abs32:
+; RV32ZBB:       # %bb.0:
+; RV32ZBB-NEXT:    neg a1, a0
+; RV32ZBB-NEXT:    max a0, a0, a1
+; RV32ZBB-NEXT:    ret
+;
+; RV64I-LABEL: expanded_neg_inv_abs32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    neg a1, a0
+; RV64I-NEXT:    sext.w a2, a1
+; RV64I-NEXT:    sext.w a3, a0
+; RV64I-NEXT:    blt a2, a3, .LBB4_2
+; RV64I-NEXT:  # %bb.1:
+; RV64I-NEXT:    mv a1, a0
+; RV64I-NEXT:  .LBB4_2:
+; RV64I-NEXT:    neg a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: expanded_neg_inv_abs32:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    negw a1, a0
+; RV64ZBB-NEXT:    sext.w a0, a0
+; RV64ZBB-NEXT:    min a0, a1, a0
+; RV64ZBB-NEXT:    neg a0, a0
+; RV64ZBB-NEXT:    ret
+  %n = sub i32 0, %x
+  %t = call i32 @llvm.smin.i32(i32 %n, i32 %x)
+  %r = sub i32 0, %t
+  ret i32 %r
+}
+
+define i32 @expanded_neg_inv_abs32_unsigned(i32 %x) {
+; RV32I-LABEL: expanded_neg_inv_abs32_unsigned:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    neg a1, a0
+; RV32I-NEXT:    bltu a1, a0, .LBB5_2
+; RV32I-NEXT:  # %bb.1:
+; RV32I-NEXT:    mv a1, a0
+; RV32I-NEXT:  .LBB5_2:
+; RV32I-NEXT:    neg a0, a1
+; RV32I-NEXT:    ret
+;
+; RV32ZBB-LABEL: expanded_neg_inv_abs32_unsigned:
+; RV32ZBB:       # %bb.0:
+; RV32ZBB-NEXT:    neg a1, a0
+; RV32ZBB-NEXT:    maxu a0, a0, a1
+; RV32ZBB-NEXT:    ret
+;
+; RV64I-LABEL: expanded_neg_inv_abs32_unsigned:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    neg a1, a0
+; RV64I-NEXT:    sext.w a2, a1
+; RV64I-NEXT:    sext.w a3, a0
+; RV64I-NEXT:    bltu a2, a3, .LBB5_2
+; RV64I-NEXT:  # %bb.1:
+; RV64I-NEXT:    mv a1, a0
+; RV64I-NEXT:  .LBB5_2:
+; RV64I-NEXT:    neg a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: expanded_neg_inv_abs32_unsigned:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    negw a1, a0
+; RV64ZBB-NEXT:    sext.w a0, a0
+; RV64ZBB-NEXT:    minu a0, a1, a0
+; RV64ZBB-NEXT:    neg a0, a0
+; RV64ZBB-NEXT:    ret
+  %n = sub i32 0, %x
+  %t = call i32 @llvm.umin.i32(i32 %n, i32 %x)
+  %r = sub i32 0, %t
+  ret i32 %r
+}
+
+define i64 @expanded_neg_inv_abs64(i64 %x) {
+; RV32I-LABEL: expanded_neg_inv_abs64:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    snez a2, a0
+; RV32I-NEXT:    neg a3, a1
+; RV32I-NEXT:    sub a2, a3, a2
+; RV32I-NEXT:    neg a3, a0
+; RV32I-NEXT:    beq a2, a1, .LBB6_2
+; RV32I-NEXT:  # %bb.1:
+; RV32I-NEXT:    slt a4, a2, a1
+; RV32I-NEXT:    beqz a4, .LBB6_3
+; RV32I-NEXT:    j .LBB6_4
+; RV32I-NEXT:  .LBB6_2:
+; RV32I-NEXT:    sltu a4, a3, a0
+; RV32I-NEXT:    bnez a4, .LBB6_4
+; RV32I-NEXT:  .LBB6_3:
+; RV32I-NEXT:    mv a3, a0
+; RV32I-NEXT:    mv a2, a1
+; RV32I-NEXT:  .LBB6_4:
+; RV32I-NEXT:    neg a0, a3
+; RV32I-NEXT:    snez a1, a3
+; RV32I-NEXT:    neg a2, a2
+; RV32I-NEXT:    sub a1, a2, a1
+; RV32I-NEXT:    ret
+;
+; RV32ZBB-LABEL: expanded_neg_inv_abs64:
+; RV32ZBB:       # %bb.0:
+; RV32ZBB-NEXT:    snez a2, a0
+; RV32ZBB-NEXT:    neg a3, a1
+; RV32ZBB-NEXT:    sub a2, a3, a2
+; RV32ZBB-NEXT:    neg a3, a0
+; RV32ZBB-NEXT:    beq a2, a1, .LBB6_2
+; RV32ZBB-NEXT:  # %bb.1:
+; RV32ZBB-NEXT:    slt a4, a2, a1
+; RV32ZBB-NEXT:    beqz a4, .LBB6_3
+; RV32ZBB-NEXT:    j .LBB6_4
+; RV32ZBB-NEXT:  .LBB6_2:
+; RV32ZBB-NEXT:    sltu a4, a3, a0
+; RV32ZBB-NEXT:    bnez a4, .LBB6_4
+; RV32ZBB-NEXT:  .LBB6_3:
+; RV32ZBB-NEXT:    mv a3, a0
+; RV32ZBB-NEXT:    mv a2, a1
+; RV32ZBB-NEXT:  .LBB6_4:
+; RV32ZBB-NEXT:    neg a0, a3
+; RV32ZBB-NEXT:    snez a1, a3
+; RV32ZBB-NEXT:    neg a2, a2
+; RV32ZBB-NEXT:    sub a1, a2, a1
+; RV32ZBB-NEXT:    ret
+;
+; RV64I-LABEL: expanded_neg_inv_abs64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    neg a1, a0
+; RV64I-NEXT:    blt a1, a0, .LBB6_2
+; RV64I-NEXT:  # %bb.1:
+; RV64I-NEXT:    mv a1, a0
+; RV64I-NEXT:  .LBB6_2:
+; RV64I-NEXT:    neg a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: expanded_neg_inv_abs64:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    neg a1, a0
+; RV64ZBB-NEXT:    max a0, a0, a1
+; RV64ZBB-NEXT:    ret
+  %n = sub i64 0, %x
+  %t = call i64 @llvm.smin.i64(i64 %n, i64 %x)
+  %r = sub i64 0, %t
+  ret i64 %r
+}
+
+define i64 @expanded_neg_inv_abs64_unsigned(i64 %x) {
+; RV32I-LABEL: expanded_neg_inv_abs64_unsigned:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    snez a2, a0
+; RV32I-NEXT:    neg a3, a1
+; RV32I-NEXT:    sub a2, a3, a2
+; RV32I-NEXT:    neg a3, a0
+; RV32I-NEXT:    beq a2, a1, .LBB7_2
+; RV32I-NEXT:  # %bb.1:
+; RV32I-NEXT:    sltu a4, a2, a1
+; RV32I-NEXT:    beqz a4, .LBB7_3
+; RV32I-NEXT:    j .LBB7_4
+; RV32I-NEXT:  .LBB7_2:
+; RV32I-NEXT:    sltu a4, a3, a0
+; RV32I-NEXT:    bnez a4, .LBB7_4
+; RV32I-NEXT:  .LBB7_3:
+; RV32I-NEXT:    mv a3, a0
+; RV32I-NEXT:    mv a2, a1
+; RV32I-NEXT:  .LBB7_4:
+; RV32I-NEXT:    neg a0, a3
+; RV32I-NEXT:    snez a1, a3
+; RV32I-NEXT:    neg a2, a2
+; RV32I-NEXT:    sub a1, a2, a1
+; RV32I-NEXT:    ret
+;
+; RV32ZBB-LABEL: expanded_neg_inv_abs64_unsigned:
+; RV32ZBB:       # %bb.0:
+; RV32ZBB-NEXT:    snez a2, a0
+; RV32ZBB-NEXT:    neg a3, a1
+; RV32ZBB-NEXT:    sub a2, a3, a2
+; RV32ZBB-NEXT:    neg a3, a0
+; RV32ZBB-NEXT:    beq a2, a1, .LBB7_2
+; RV32ZBB-NEXT:  # %bb.1:
+; RV32ZBB-NEXT:    sltu a4, a2, a1
+; RV32ZBB-NEXT:    beqz a4, .LBB7_3
+; RV32ZBB-NEXT:    j .LBB7_4
+; RV32ZBB-NEXT:  .LBB7_2:
+; RV32ZBB-NEXT:    sltu a4, a3, a0
+; RV32ZBB-NEXT:    bnez a4, .LBB7_4
+; RV32ZBB-NEXT:  .LBB7_3:
+; RV32ZBB-NEXT:    mv a3, a0
+; RV32ZBB-NEXT:    mv a2, a1
+; RV32ZBB-NEXT:  .LBB7_4:
+; RV32ZBB-NEXT:    neg a0, a3
+; RV32ZBB-NEXT:    snez a1, a3
+; RV32ZBB-NEXT:    neg a2, a2
+; RV32ZBB-NEXT:    sub a1, a2, a1
+; RV32ZBB-NEXT:    ret
+;
+; RV64I-LABEL: expanded_neg_inv_abs64_unsigned:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    neg a1, a0
+; RV64I-NEXT:    bltu a1, a0, .LBB7_2
+; RV64I-NEXT:  # %bb.1:
+; RV64I-NEXT:    mv a1, a0
+; RV64I-NEXT:  .LBB7_2:
+; RV64I-NEXT:    neg a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: expanded_neg_inv_abs64_unsigned:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    neg a1, a0
+; RV64ZBB-NEXT:    maxu a0, a0, a1
+; RV64ZBB-NEXT:    ret
+  %n = sub i64 0, %x
+  %t = call i64 @llvm.umin.i64(i64 %n, i64 %x)
+  %r = sub i64 0, %t
+  ret i64 %r
+}

@llvmbot
Copy link
Member

llvmbot commented Dec 23, 2024

@llvm/pr-subscribers-llvm-globalisel

Author: Min-Yih Hsu (mshockwave)

Changes

This is the GISel version of #120666. Also supports both unsigned and signed version of min & max.


Full diff: https://github.com/llvm/llvm-project/pull/120998.diff

7 Files Affected:

  • (modified) llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h (+3)
  • (modified) llvm/include/llvm/CodeGen/GlobalISel/Utils.h (+4)
  • (modified) llvm/include/llvm/Target/GlobalISel/Combine.td (+7-1)
  • (modified) llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp (+36)
  • (modified) llvm/lib/CodeGen/GlobalISel/Utils.cpp (+15)
  • (modified) llvm/lib/Target/RISCV/RISCVCombine.td (+1-1)
  • (added) llvm/test/CodeGen/RISCV/GlobalISel/combine-neg-abs.ll (+457)
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index 871456d2a55b5e..94e36e412b0cf7 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -864,6 +864,9 @@ class CombinerHelper {
   /// Combine select to integer min/max.
   bool matchSelectIMinMax(const MachineOperand &MO, BuildFnTy &MatchInfo) const;
 
+  /// Tranform (neg (min/max x, (neg x))) into (max/min x, (neg x)).
+  bool matchSimplifyNegMinMax(MachineInstr &MI, BuildFnTy &MatchInfo) const;
+
   /// Combine selects.
   bool matchSelect(MachineInstr &MI, BuildFnTy &MatchInfo) const;
 
diff --git a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
index 37653631cc2388..072febd09c63e2 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/Utils.h
@@ -171,6 +171,10 @@ void reportGISelWarning(MachineFunction &MF, const TargetPassConfig &TPC,
                         MachineOptimizationRemarkEmitter &MORE,
                         MachineOptimizationRemarkMissed &R);
 
+/// Returns the inverse opcode of \p MinMaxOpc , which is a generic min/max
+/// opcode like G_SMIN.
+unsigned getInverseGMinMaxOpcode(unsigned MinMaxOpc);
+
 /// If \p VReg is defined by a G_CONSTANT, return the corresponding value.
 std::optional<APInt> getIConstantVRegVal(Register VReg,
                                          const MachineRegisterInfo &MRI);
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 013c3a6ed83d8b..8641eabbdd84c6 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -1372,6 +1372,12 @@ def select_to_iminmax: GICombineRule<
          [{ return Helper.matchSelectIMinMax(${root}, ${info}); }]),
   (apply [{ Helper.applyBuildFnMO(${root}, ${info}); }])>;
 
+def simplify_neg_minmax : GICombineRule<
+  (defs root:$root, build_fn_matchinfo:$matchinfo),
+  (match (wip_match_opcode G_SUB):$root,
+         [{ return Helper.matchSimplifyNegMinMax(*${root}, ${matchinfo}); }]),
+  (apply [{ Helper.applyBuildFn(*${root}, ${matchinfo}); }])>;
+
 def match_selects : GICombineRule<
   (defs root:$root, build_fn_matchinfo:$matchinfo),
   (match (wip_match_opcode G_SELECT):$root,
@@ -2008,7 +2014,7 @@ def all_combines : GICombineGroup<[integer_reassoc_combines, trivial_combines,
     and_or_disjoint_mask, fma_combines, fold_binop_into_select,
     sub_add_reg, select_to_minmax,
     fsub_to_fneg, commute_constant_to_rhs, match_ands, match_ors,
-    combine_concat_vector,
+    simplify_neg_minmax, combine_concat_vector,
     sext_trunc, zext_trunc, prefer_sign_combines, shuffle_combines,
     combine_use_vector_truncate, merge_combines, overflow_combines]>;
 
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index c20e9d0c6876ee..2ddf3f617bd92f 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -7045,6 +7045,42 @@ bool CombinerHelper::matchSelectIMinMax(const MachineOperand &MO,
   }
 }
 
+// (neg (min/max x, (neg x))) --> (max/min x, (neg x))
+bool CombinerHelper::matchSimplifyNegMinMax(MachineInstr &MI,
+                                            BuildFnTy &MatchInfo) const {
+  assert(MI.getOpcode() == TargetOpcode::G_SUB);
+  Register DestReg = MI.getOperand(0).getReg();
+  LLT DestTy = MRI.getType(DestReg);
+  if (!isLegal({TargetOpcode::G_SUB, {DestTy}}))
+    return false;
+
+  // GISel doesn't have m_Deferred at this moment, so we have to
+  // match this pattern in two phases.
+  Register X, Y;
+  Register Sub0;
+  if (mi_match(DestReg, MRI,
+               m_Neg(m_OneUse(m_any_of(
+                   m_GSMin(m_Reg(X), m_Reg(Y)), m_GSMax(m_Reg(X), m_Reg(Y)),
+                   m_CommutativeBinOp(TargetOpcode::G_UMIN, m_Reg(X), m_Reg(Y)),
+                   m_CommutativeBinOp(TargetOpcode::G_UMAX, m_Reg(X),
+                                      m_Reg(Y)))))) &&
+      (mi_match(Y, MRI, m_all_of(m_Neg(m_SpecificReg(X)), m_Reg(Sub0))) ||
+       mi_match(X, MRI, m_all_of(m_Neg(m_SpecificReg(Y)), m_Reg(Sub0))))) {
+    MachineInstr *MinMaxMI = MRI.getVRegDef(MI.getOperand(2).getReg());
+    MachineInstr *Sub0MI = MRI.getVRegDef(Sub0);
+    X = Sub0MI->getOperand(2).getReg();
+    unsigned NewOpc = getInverseGMinMaxOpcode(MinMaxMI->getOpcode());
+    if (isLegal({NewOpc, {DestTy}})) {
+      MatchInfo = [=](MachineIRBuilder &B) {
+        B.buildInstr(NewOpc, {DestReg}, {X, Sub0});
+      };
+      return true;
+    }
+  }
+
+  return false;
+}
+
 bool CombinerHelper::matchSelect(MachineInstr &MI, BuildFnTy &MatchInfo) const {
   GSelect *Select = cast<GSelect>(&MI);
 
diff --git a/llvm/lib/CodeGen/GlobalISel/Utils.cpp b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
index 8c1e41ea106eca..0ce08c3ea3dcbb 100644
--- a/llvm/lib/CodeGen/GlobalISel/Utils.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/Utils.cpp
@@ -276,6 +276,21 @@ void llvm::reportGISelFailure(MachineFunction &MF, const TargetPassConfig &TPC,
   reportGISelFailure(MF, TPC, MORE, R);
 }
 
+unsigned llvm::getInverseGMinMaxOpcode(unsigned MinMaxOpc) {
+  switch (MinMaxOpc) {
+  case TargetOpcode::G_SMIN:
+    return TargetOpcode::G_SMAX;
+  case TargetOpcode::G_SMAX:
+    return TargetOpcode::G_SMIN;
+  case TargetOpcode::G_UMIN:
+    return TargetOpcode::G_UMAX;
+  case TargetOpcode::G_UMAX:
+    return TargetOpcode::G_UMIN;
+  default:
+    llvm_unreachable("unrecognized opcode");
+  }
+}
+
 std::optional<APInt> llvm::getIConstantVRegVal(Register VReg,
                                                const MachineRegisterInfo &MRI) {
   std::optional<ValueAndVReg> ValAndVReg = getIConstantVRegValWithLookThrough(
diff --git a/llvm/lib/Target/RISCV/RISCVCombine.td b/llvm/lib/Target/RISCV/RISCVCombine.td
index 030613a7d89040..995dd0c5d82eba 100644
--- a/llvm/lib/Target/RISCV/RISCVCombine.td
+++ b/llvm/lib/Target/RISCV/RISCVCombine.td
@@ -25,5 +25,5 @@ def RISCVPostLegalizerCombiner
     : GICombiner<"RISCVPostLegalizerCombinerImpl",
                  [sub_to_add, combines_for_extload, redundant_and,
                   identity_combines, shift_immed_chain,
-                  commute_constant_to_rhs]> {
+                  commute_constant_to_rhs, simplify_neg_minmax]> {
 }
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/combine-neg-abs.ll b/llvm/test/CodeGen/RISCV/GlobalISel/combine-neg-abs.ll
new file mode 100644
index 00000000000000..6c848ecf0fffdb
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/combine-neg-abs.ll
@@ -0,0 +1,457 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=riscv32 -global-isel -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s --check-prefixes=RV32I
+; RUN: llc -mtriple=riscv32 -global-isel -mattr=+zbb -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s --check-prefixes=RV32ZBB
+; RUN: llc -mtriple=riscv64 -global-isel -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s --check-prefixes=RV64I
+; RUN: llc -mtriple=riscv64 -global-isel -mattr=+zbb -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s --check-prefixes=RV64ZBB
+
+define i32 @expanded_neg_abs32(i32 %x) {
+; RV32I-LABEL: expanded_neg_abs32:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    neg a1, a0
+; RV32I-NEXT:    blt a0, a1, .LBB0_2
+; RV32I-NEXT:  # %bb.1:
+; RV32I-NEXT:    mv a1, a0
+; RV32I-NEXT:  .LBB0_2:
+; RV32I-NEXT:    neg a0, a1
+; RV32I-NEXT:    ret
+;
+; RV32ZBB-LABEL: expanded_neg_abs32:
+; RV32ZBB:       # %bb.0:
+; RV32ZBB-NEXT:    neg a1, a0
+; RV32ZBB-NEXT:    min a0, a0, a1
+; RV32ZBB-NEXT:    ret
+;
+; RV64I-LABEL: expanded_neg_abs32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    neg a1, a0
+; RV64I-NEXT:    sext.w a2, a1
+; RV64I-NEXT:    sext.w a3, a0
+; RV64I-NEXT:    blt a3, a2, .LBB0_2
+; RV64I-NEXT:  # %bb.1:
+; RV64I-NEXT:    mv a1, a0
+; RV64I-NEXT:  .LBB0_2:
+; RV64I-NEXT:    neg a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: expanded_neg_abs32:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    negw a1, a0
+; RV64ZBB-NEXT:    sext.w a0, a0
+; RV64ZBB-NEXT:    max a0, a1, a0
+; RV64ZBB-NEXT:    neg a0, a0
+; RV64ZBB-NEXT:    ret
+  %n = sub i32 0, %x
+  %t = call i32 @llvm.smax.i32(i32 %n, i32 %x)
+  %r = sub i32 0, %t
+  ret i32 %r
+}
+
+define i32 @expanded_neg_abs32_unsigned(i32 %x) {
+; RV32I-LABEL: expanded_neg_abs32_unsigned:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    neg a1, a0
+; RV32I-NEXT:    bltu a0, a1, .LBB1_2
+; RV32I-NEXT:  # %bb.1:
+; RV32I-NEXT:    mv a1, a0
+; RV32I-NEXT:  .LBB1_2:
+; RV32I-NEXT:    neg a0, a1
+; RV32I-NEXT:    ret
+;
+; RV32ZBB-LABEL: expanded_neg_abs32_unsigned:
+; RV32ZBB:       # %bb.0:
+; RV32ZBB-NEXT:    neg a1, a0
+; RV32ZBB-NEXT:    minu a0, a0, a1
+; RV32ZBB-NEXT:    ret
+;
+; RV64I-LABEL: expanded_neg_abs32_unsigned:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    neg a1, a0
+; RV64I-NEXT:    sext.w a2, a1
+; RV64I-NEXT:    sext.w a3, a0
+; RV64I-NEXT:    bltu a3, a2, .LBB1_2
+; RV64I-NEXT:  # %bb.1:
+; RV64I-NEXT:    mv a1, a0
+; RV64I-NEXT:  .LBB1_2:
+; RV64I-NEXT:    neg a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: expanded_neg_abs32_unsigned:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    negw a1, a0
+; RV64ZBB-NEXT:    sext.w a0, a0
+; RV64ZBB-NEXT:    maxu a0, a1, a0
+; RV64ZBB-NEXT:    neg a0, a0
+; RV64ZBB-NEXT:    ret
+  %n = sub i32 0, %x
+  %t = call i32 @llvm.umax.i32(i32 %n, i32 %x)
+  %r = sub i32 0, %t
+  ret i32 %r
+}
+
+define i64 @expanded_neg_abs64(i64 %x) {
+; RV32I-LABEL: expanded_neg_abs64:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    snez a2, a0
+; RV32I-NEXT:    neg a3, a1
+; RV32I-NEXT:    sub a2, a3, a2
+; RV32I-NEXT:    neg a3, a0
+; RV32I-NEXT:    beq a2, a1, .LBB2_2
+; RV32I-NEXT:  # %bb.1:
+; RV32I-NEXT:    slt a4, a1, a2
+; RV32I-NEXT:    beqz a4, .LBB2_3
+; RV32I-NEXT:    j .LBB2_4
+; RV32I-NEXT:  .LBB2_2:
+; RV32I-NEXT:    sltu a4, a0, a3
+; RV32I-NEXT:    bnez a4, .LBB2_4
+; RV32I-NEXT:  .LBB2_3:
+; RV32I-NEXT:    mv a3, a0
+; RV32I-NEXT:    mv a2, a1
+; RV32I-NEXT:  .LBB2_4:
+; RV32I-NEXT:    neg a0, a3
+; RV32I-NEXT:    snez a1, a3
+; RV32I-NEXT:    neg a2, a2
+; RV32I-NEXT:    sub a1, a2, a1
+; RV32I-NEXT:    ret
+;
+; RV32ZBB-LABEL: expanded_neg_abs64:
+; RV32ZBB:       # %bb.0:
+; RV32ZBB-NEXT:    snez a2, a0
+; RV32ZBB-NEXT:    neg a3, a1
+; RV32ZBB-NEXT:    sub a2, a3, a2
+; RV32ZBB-NEXT:    neg a3, a0
+; RV32ZBB-NEXT:    beq a2, a1, .LBB2_2
+; RV32ZBB-NEXT:  # %bb.1:
+; RV32ZBB-NEXT:    slt a4, a1, a2
+; RV32ZBB-NEXT:    beqz a4, .LBB2_3
+; RV32ZBB-NEXT:    j .LBB2_4
+; RV32ZBB-NEXT:  .LBB2_2:
+; RV32ZBB-NEXT:    sltu a4, a0, a3
+; RV32ZBB-NEXT:    bnez a4, .LBB2_4
+; RV32ZBB-NEXT:  .LBB2_3:
+; RV32ZBB-NEXT:    mv a3, a0
+; RV32ZBB-NEXT:    mv a2, a1
+; RV32ZBB-NEXT:  .LBB2_4:
+; RV32ZBB-NEXT:    neg a0, a3
+; RV32ZBB-NEXT:    snez a1, a3
+; RV32ZBB-NEXT:    neg a2, a2
+; RV32ZBB-NEXT:    sub a1, a2, a1
+; RV32ZBB-NEXT:    ret
+;
+; RV64I-LABEL: expanded_neg_abs64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    neg a1, a0
+; RV64I-NEXT:    blt a0, a1, .LBB2_2
+; RV64I-NEXT:  # %bb.1:
+; RV64I-NEXT:    mv a1, a0
+; RV64I-NEXT:  .LBB2_2:
+; RV64I-NEXT:    neg a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: expanded_neg_abs64:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    neg a1, a0
+; RV64ZBB-NEXT:    min a0, a0, a1
+; RV64ZBB-NEXT:    ret
+  %n = sub i64 0, %x
+  %t = call i64 @llvm.smax.i64(i64 %n, i64 %x)
+  %r = sub i64 0, %t
+  ret i64 %r
+}
+
+define i64 @expanded_neg_abs64_unsigned(i64 %x) {
+; RV32I-LABEL: expanded_neg_abs64_unsigned:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    snez a2, a0
+; RV32I-NEXT:    neg a3, a1
+; RV32I-NEXT:    sub a2, a3, a2
+; RV32I-NEXT:    neg a3, a0
+; RV32I-NEXT:    beq a2, a1, .LBB3_2
+; RV32I-NEXT:  # %bb.1:
+; RV32I-NEXT:    sltu a4, a1, a2
+; RV32I-NEXT:    beqz a4, .LBB3_3
+; RV32I-NEXT:    j .LBB3_4
+; RV32I-NEXT:  .LBB3_2:
+; RV32I-NEXT:    sltu a4, a0, a3
+; RV32I-NEXT:    bnez a4, .LBB3_4
+; RV32I-NEXT:  .LBB3_3:
+; RV32I-NEXT:    mv a3, a0
+; RV32I-NEXT:    mv a2, a1
+; RV32I-NEXT:  .LBB3_4:
+; RV32I-NEXT:    neg a0, a3
+; RV32I-NEXT:    snez a1, a3
+; RV32I-NEXT:    neg a2, a2
+; RV32I-NEXT:    sub a1, a2, a1
+; RV32I-NEXT:    ret
+;
+; RV32ZBB-LABEL: expanded_neg_abs64_unsigned:
+; RV32ZBB:       # %bb.0:
+; RV32ZBB-NEXT:    snez a2, a0
+; RV32ZBB-NEXT:    neg a3, a1
+; RV32ZBB-NEXT:    sub a2, a3, a2
+; RV32ZBB-NEXT:    neg a3, a0
+; RV32ZBB-NEXT:    beq a2, a1, .LBB3_2
+; RV32ZBB-NEXT:  # %bb.1:
+; RV32ZBB-NEXT:    sltu a4, a1, a2
+; RV32ZBB-NEXT:    beqz a4, .LBB3_3
+; RV32ZBB-NEXT:    j .LBB3_4
+; RV32ZBB-NEXT:  .LBB3_2:
+; RV32ZBB-NEXT:    sltu a4, a0, a3
+; RV32ZBB-NEXT:    bnez a4, .LBB3_4
+; RV32ZBB-NEXT:  .LBB3_3:
+; RV32ZBB-NEXT:    mv a3, a0
+; RV32ZBB-NEXT:    mv a2, a1
+; RV32ZBB-NEXT:  .LBB3_4:
+; RV32ZBB-NEXT:    neg a0, a3
+; RV32ZBB-NEXT:    snez a1, a3
+; RV32ZBB-NEXT:    neg a2, a2
+; RV32ZBB-NEXT:    sub a1, a2, a1
+; RV32ZBB-NEXT:    ret
+;
+; RV64I-LABEL: expanded_neg_abs64_unsigned:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    neg a1, a0
+; RV64I-NEXT:    bltu a0, a1, .LBB3_2
+; RV64I-NEXT:  # %bb.1:
+; RV64I-NEXT:    mv a1, a0
+; RV64I-NEXT:  .LBB3_2:
+; RV64I-NEXT:    neg a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: expanded_neg_abs64_unsigned:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    neg a1, a0
+; RV64ZBB-NEXT:    minu a0, a0, a1
+; RV64ZBB-NEXT:    ret
+  %n = sub i64 0, %x
+  %t = call i64 @llvm.umax.i64(i64 %n, i64 %x)
+  %r = sub i64 0, %t
+  ret i64 %r
+}
+
+define i32 @expanded_neg_inv_abs32(i32 %x) {
+; RV32I-LABEL: expanded_neg_inv_abs32:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    neg a1, a0
+; RV32I-NEXT:    blt a1, a0, .LBB4_2
+; RV32I-NEXT:  # %bb.1:
+; RV32I-NEXT:    mv a1, a0
+; RV32I-NEXT:  .LBB4_2:
+; RV32I-NEXT:    neg a0, a1
+; RV32I-NEXT:    ret
+;
+; RV32ZBB-LABEL: expanded_neg_inv_abs32:
+; RV32ZBB:       # %bb.0:
+; RV32ZBB-NEXT:    neg a1, a0
+; RV32ZBB-NEXT:    max a0, a0, a1
+; RV32ZBB-NEXT:    ret
+;
+; RV64I-LABEL: expanded_neg_inv_abs32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    neg a1, a0
+; RV64I-NEXT:    sext.w a2, a1
+; RV64I-NEXT:    sext.w a3, a0
+; RV64I-NEXT:    blt a2, a3, .LBB4_2
+; RV64I-NEXT:  # %bb.1:
+; RV64I-NEXT:    mv a1, a0
+; RV64I-NEXT:  .LBB4_2:
+; RV64I-NEXT:    neg a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: expanded_neg_inv_abs32:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    negw a1, a0
+; RV64ZBB-NEXT:    sext.w a0, a0
+; RV64ZBB-NEXT:    min a0, a1, a0
+; RV64ZBB-NEXT:    neg a0, a0
+; RV64ZBB-NEXT:    ret
+  %n = sub i32 0, %x
+  %t = call i32 @llvm.smin.i32(i32 %n, i32 %x)
+  %r = sub i32 0, %t
+  ret i32 %r
+}
+
+define i32 @expanded_neg_inv_abs32_unsigned(i32 %x) {
+; RV32I-LABEL: expanded_neg_inv_abs32_unsigned:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    neg a1, a0
+; RV32I-NEXT:    bltu a1, a0, .LBB5_2
+; RV32I-NEXT:  # %bb.1:
+; RV32I-NEXT:    mv a1, a0
+; RV32I-NEXT:  .LBB5_2:
+; RV32I-NEXT:    neg a0, a1
+; RV32I-NEXT:    ret
+;
+; RV32ZBB-LABEL: expanded_neg_inv_abs32_unsigned:
+; RV32ZBB:       # %bb.0:
+; RV32ZBB-NEXT:    neg a1, a0
+; RV32ZBB-NEXT:    maxu a0, a0, a1
+; RV32ZBB-NEXT:    ret
+;
+; RV64I-LABEL: expanded_neg_inv_abs32_unsigned:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    neg a1, a0
+; RV64I-NEXT:    sext.w a2, a1
+; RV64I-NEXT:    sext.w a3, a0
+; RV64I-NEXT:    bltu a2, a3, .LBB5_2
+; RV64I-NEXT:  # %bb.1:
+; RV64I-NEXT:    mv a1, a0
+; RV64I-NEXT:  .LBB5_2:
+; RV64I-NEXT:    neg a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: expanded_neg_inv_abs32_unsigned:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    negw a1, a0
+; RV64ZBB-NEXT:    sext.w a0, a0
+; RV64ZBB-NEXT:    minu a0, a1, a0
+; RV64ZBB-NEXT:    neg a0, a0
+; RV64ZBB-NEXT:    ret
+  %n = sub i32 0, %x
+  %t = call i32 @llvm.umin.i32(i32 %n, i32 %x)
+  %r = sub i32 0, %t
+  ret i32 %r
+}
+
+define i64 @expanded_neg_inv_abs64(i64 %x) {
+; RV32I-LABEL: expanded_neg_inv_abs64:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    snez a2, a0
+; RV32I-NEXT:    neg a3, a1
+; RV32I-NEXT:    sub a2, a3, a2
+; RV32I-NEXT:    neg a3, a0
+; RV32I-NEXT:    beq a2, a1, .LBB6_2
+; RV32I-NEXT:  # %bb.1:
+; RV32I-NEXT:    slt a4, a2, a1
+; RV32I-NEXT:    beqz a4, .LBB6_3
+; RV32I-NEXT:    j .LBB6_4
+; RV32I-NEXT:  .LBB6_2:
+; RV32I-NEXT:    sltu a4, a3, a0
+; RV32I-NEXT:    bnez a4, .LBB6_4
+; RV32I-NEXT:  .LBB6_3:
+; RV32I-NEXT:    mv a3, a0
+; RV32I-NEXT:    mv a2, a1
+; RV32I-NEXT:  .LBB6_4:
+; RV32I-NEXT:    neg a0, a3
+; RV32I-NEXT:    snez a1, a3
+; RV32I-NEXT:    neg a2, a2
+; RV32I-NEXT:    sub a1, a2, a1
+; RV32I-NEXT:    ret
+;
+; RV32ZBB-LABEL: expanded_neg_inv_abs64:
+; RV32ZBB:       # %bb.0:
+; RV32ZBB-NEXT:    snez a2, a0
+; RV32ZBB-NEXT:    neg a3, a1
+; RV32ZBB-NEXT:    sub a2, a3, a2
+; RV32ZBB-NEXT:    neg a3, a0
+; RV32ZBB-NEXT:    beq a2, a1, .LBB6_2
+; RV32ZBB-NEXT:  # %bb.1:
+; RV32ZBB-NEXT:    slt a4, a2, a1
+; RV32ZBB-NEXT:    beqz a4, .LBB6_3
+; RV32ZBB-NEXT:    j .LBB6_4
+; RV32ZBB-NEXT:  .LBB6_2:
+; RV32ZBB-NEXT:    sltu a4, a3, a0
+; RV32ZBB-NEXT:    bnez a4, .LBB6_4
+; RV32ZBB-NEXT:  .LBB6_3:
+; RV32ZBB-NEXT:    mv a3, a0
+; RV32ZBB-NEXT:    mv a2, a1
+; RV32ZBB-NEXT:  .LBB6_4:
+; RV32ZBB-NEXT:    neg a0, a3
+; RV32ZBB-NEXT:    snez a1, a3
+; RV32ZBB-NEXT:    neg a2, a2
+; RV32ZBB-NEXT:    sub a1, a2, a1
+; RV32ZBB-NEXT:    ret
+;
+; RV64I-LABEL: expanded_neg_inv_abs64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    neg a1, a0
+; RV64I-NEXT:    blt a1, a0, .LBB6_2
+; RV64I-NEXT:  # %bb.1:
+; RV64I-NEXT:    mv a1, a0
+; RV64I-NEXT:  .LBB6_2:
+; RV64I-NEXT:    neg a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: expanded_neg_inv_abs64:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    neg a1, a0
+; RV64ZBB-NEXT:    max a0, a0, a1
+; RV64ZBB-NEXT:    ret
+  %n = sub i64 0, %x
+  %t = call i64 @llvm.smin.i64(i64 %n, i64 %x)
+  %r = sub i64 0, %t
+  ret i64 %r
+}
+
+define i64 @expanded_neg_inv_abs64_unsigned(i64 %x) {
+; RV32I-LABEL: expanded_neg_inv_abs64_unsigned:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    snez a2, a0
+; RV32I-NEXT:    neg a3, a1
+; RV32I-NEXT:    sub a2, a3, a2
+; RV32I-NEXT:    neg a3, a0
+; RV32I-NEXT:    beq a2, a1, .LBB7_2
+; RV32I-NEXT:  # %bb.1:
+; RV32I-NEXT:    sltu a4, a2, a1
+; RV32I-NEXT:    beqz a4, .LBB7_3
+; RV32I-NEXT:    j .LBB7_4
+; RV32I-NEXT:  .LBB7_2:
+; RV32I-NEXT:    sltu a4, a3, a0
+; RV32I-NEXT:    bnez a4, .LBB7_4
+; RV32I-NEXT:  .LBB7_3:
+; RV32I-NEXT:    mv a3, a0
+; RV32I-NEXT:    mv a2, a1
+; RV32I-NEXT:  .LBB7_4:
+; RV32I-NEXT:    neg a0, a3
+; RV32I-NEXT:    snez a1, a3
+; RV32I-NEXT:    neg a2, a2
+; RV32I-NEXT:    sub a1, a2, a1
+; RV32I-NEXT:    ret
+;
+; RV32ZBB-LABEL: expanded_neg_inv_abs64_unsigned:
+; RV32ZBB:       # %bb.0:
+; RV32ZBB-NEXT:    snez a2, a0
+; RV32ZBB-NEXT:    neg a3, a1
+; RV32ZBB-NEXT:    sub a2, a3, a2
+; RV32ZBB-NEXT:    neg a3, a0
+; RV32ZBB-NEXT:    beq a2, a1, .LBB7_2
+; RV32ZBB-NEXT:  # %bb.1:
+; RV32ZBB-NEXT:    sltu a4, a2, a1
+; RV32ZBB-NEXT:    beqz a4, .LBB7_3
+; RV32ZBB-NEXT:    j .LBB7_4
+; RV32ZBB-NEXT:  .LBB7_2:
+; RV32ZBB-NEXT:    sltu a4, a3, a0
+; RV32ZBB-NEXT:    bnez a4, .LBB7_4
+; RV32ZBB-NEXT:  .LBB7_3:
+; RV32ZBB-NEXT:    mv a3, a0
+; RV32ZBB-NEXT:    mv a2, a1
+; RV32ZBB-NEXT:  .LBB7_4:
+; RV32ZBB-NEXT:    neg a0, a3
+; RV32ZBB-NEXT:    snez a1, a3
+; RV32ZBB-NEXT:    neg a2, a2
+; RV32ZBB-NEXT:    sub a1, a2, a1
+; RV32ZBB-NEXT:    ret
+;
+; RV64I-LABEL: expanded_neg_inv_abs64_unsigned:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    neg a1, a0
+; RV64I-NEXT:    bltu a1, a0, .LBB7_2
+; RV64I-NEXT:  # %bb.1:
+; RV64I-NEXT:    mv a1, a0
+; RV64I-NEXT:  .LBB7_2:
+; RV64I-NEXT:    neg a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64ZBB-LABEL: expanded_neg_inv_abs64_unsigned:
+; RV64ZBB:       # %bb.0:
+; RV64ZBB-NEXT:    neg a1, a0
+; RV64ZBB-NEXT:    maxu a0, a0, a1
+; RV64ZBB-NEXT:    ret
+  %n = sub i64 0, %x
+  %t = call i64 @llvm.umin.i64(i64 %n, i64 %x)
+  %r = sub i64 0, %t
+  ret i64 %r
+}

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Drop the extra space before the comma

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed.

@mshockwave mshockwave force-pushed the patch/gisel-neg-abs branch from 080ddcd to e1f5da5 Compare January 2, 2025 21:49
Comment on lines 7071 to 7072
if (!isLegal({TargetOpcode::G_SUB, {DestTy}}))
return false;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why is this legality check here?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oops must be left there by accident. It's removed now.

@mshockwave mshockwave merged commit 3cac26f into llvm:main Jan 3, 2025
8 checks passed
@mshockwave mshockwave deleted the patch/gisel-neg-abs branch January 3, 2025 00:29
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

5 participants