Skip to content

Conversation

@heiher
Copy link
Member

@heiher heiher commented Jul 21, 2024

recommit of #99272

@llvmbot
Copy link
Member

llvmbot commented Jul 21, 2024

@llvm/pr-subscribers-backend-loongarch

Author: hev (heiher)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/99798.diff

2 Files Affected:

  • (modified) llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp (+166)
  • (modified) llvm/test/CodeGen/LoongArch/andn-icmp.ll (+16-41)
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index ba6be85c7f2e8..496f126b7173d 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -335,6 +335,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
   setTargetDAGCombine(ISD::AND);
   setTargetDAGCombine(ISD::OR);
   setTargetDAGCombine(ISD::SRL);
+  setTargetDAGCombine(ISD::SETCC);
 
   // Set DAG combine for 'LSX' feature.
 
@@ -2528,6 +2529,169 @@ static SDValue performORCombine(SDNode *N, SelectionDAG &DAG,
   return SDValue();
 }
 
+static bool checkValueWidth(SDValue V, ISD::LoadExtType &ExtType) {
+  ExtType = ISD::NON_EXTLOAD;
+
+  switch (V.getNode()->getOpcode()) {
+  case ISD::LOAD: {
+    LoadSDNode *LoadNode = cast<LoadSDNode>(V.getNode());
+    if ((LoadNode->getMemoryVT() == MVT::i8) ||
+        (LoadNode->getMemoryVT() == MVT::i16)) {
+      ExtType = LoadNode->getExtensionType();
+      return true;
+    }
+    return false;
+  }
+  case ISD::AssertSext: {
+    VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
+    if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) {
+      ExtType = ISD::SEXTLOAD;
+      return true;
+    }
+    return false;
+  }
+  case ISD::AssertZext: {
+    VTSDNode *TypeNode = cast<VTSDNode>(V.getNode()->getOperand(1));
+    if ((TypeNode->getVT() == MVT::i8) || (TypeNode->getVT() == MVT::i16)) {
+      ExtType = ISD::ZEXTLOAD;
+      return true;
+    }
+    return false;
+  }
+  default:
+    return false;
+  }
+
+  return false;
+}
+
+// Eliminate redundant truncation and zero-extension nodes.
+// * Case 1:
+//  +------------+ +------------+ +------------+
+//  |   Input1   | |   Input2   | |     CC     |
+//  +------------+ +------------+ +------------+
+//         |              |              |
+//         V              V              +----+
+//  +------------+ +------------+             |
+//  |  TRUNCATE  | |  TRUNCATE  |             |
+//  +------------+ +------------+             |
+//         |              |                   |
+//         V              V                   |
+//  +------------+ +------------+             |
+//  |  ZERO_EXT  | |  ZERO_EXT  |             |
+//  +------------+ +------------+             |
+//         |              |                   |
+//         |              +-------------+     |
+//         V              V             |     |
+//        +----------------+            |     |
+//        |      AND       |            |     |
+//        +----------------+            |     |
+//                |                     |     |
+//                +---------------+     |     |
+//                                |     |     |
+//                                V     V     V
+//                               +-------------+
+//                               |     CMP     |
+//                               +-------------+
+// * Case 2:
+//  +------------+ +------------+ +-------------+ +------------+ +------------+
+//  |   Input1   | |   Input2   | | Constant -1 | | Constant 0 | |     CC     |
+//  +------------+ +------------+ +-------------+ +------------+ +------------+
+//         |              |             |               |               |
+//         V              |             |               |               |
+//  +------------+        |             |               |               |
+//  |     XOR    |<---------------------+               |               |
+//  +------------+        |                             |               |
+//         |              |                             |               |
+//         V              V             +---------------+               |
+//  +------------+ +------------+       |                               |
+//  |  TRUNCATE  | |  TRUNCATE  |       |     +-------------------------+
+//  +------------+ +------------+       |     |
+//         |              |             |     |
+//         V              V             |     |
+//  +------------+ +------------+       |     |
+//  |  ZERO_EXT  | |  ZERO_EXT  |       |     |
+//  +------------+ +------------+       |     |
+//         |              |             |     |
+//         V              V             |     |
+//        +----------------+            |     |
+//        |      AND       |            |     |
+//        +----------------+            |     |
+//                |                     |     |
+//                +---------------+     |     |
+//                                |     |     |
+//                                V     V     V
+//                               +-------------+
+//                               |     CMP     |
+//                               +-------------+
+static SDValue performSETCCCombine(SDNode *N, SelectionDAG &DAG,
+                                   TargetLowering::DAGCombinerInfo &DCI,
+                                   const LoongArchSubtarget &Subtarget) {
+  ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(2))->get();
+
+  SDNode *AndNode = N->getOperand(0).getNode();
+  if (AndNode->getOpcode() != ISD::AND)
+    return SDValue();
+
+  SDValue AndInputValue2 = AndNode->getOperand(1);
+  if (AndInputValue2.getOpcode() != ISD::ZERO_EXTEND)
+    return SDValue();
+
+  SDValue CmpInputValue = N->getOperand(1);
+  SDValue AndInputValue1 = AndNode->getOperand(0);
+  if (AndInputValue1.getOpcode() == ISD::XOR) {
+    if (CC != ISD::SETEQ && CC != ISD::SETNE)
+      return SDValue();
+    ConstantSDNode *CN = dyn_cast<ConstantSDNode>(AndInputValue1.getOperand(1));
+    if (!CN || CN->getSExtValue() != -1)
+      return SDValue();
+    CN = dyn_cast<ConstantSDNode>(CmpInputValue);
+    if (!CN || CN->getSExtValue() != 0)
+      return SDValue();
+    AndInputValue1 = AndInputValue1.getOperand(0);
+    if (AndInputValue1.getOpcode() != ISD::ZERO_EXTEND)
+      return SDValue();
+  } else if (AndInputValue1.getOpcode() == ISD::ZERO_EXTEND) {
+    if (AndInputValue2 != CmpInputValue)
+      return SDValue();
+  } else {
+    return SDValue();
+  }
+
+  SDValue TruncValue1 = AndInputValue1.getNode()->getOperand(0);
+  if (TruncValue1.getOpcode() != ISD::TRUNCATE)
+    return SDValue();
+
+  SDValue TruncValue2 = AndInputValue2.getNode()->getOperand(0);
+  if (TruncValue2.getOpcode() != ISD::TRUNCATE)
+    return SDValue();
+
+  SDValue TruncInputValue1 = TruncValue1.getNode()->getOperand(0);
+  SDValue TruncInputValue2 = TruncValue2.getNode()->getOperand(0);
+  ISD::LoadExtType ExtType1;
+  ISD::LoadExtType ExtType2;
+
+  if (!checkValueWidth(TruncInputValue1, ExtType1) ||
+      !checkValueWidth(TruncInputValue2, ExtType2))
+    return SDValue();
+
+  if (TruncInputValue1->getValueType(0) != TruncInputValue2->getValueType(0) ||
+      AndNode->getValueType(0) != TruncInputValue1->getValueType(0))
+    return SDValue();
+
+  if ((ExtType2 != ISD::ZEXTLOAD) &&
+      ((ExtType2 != ISD::SEXTLOAD) && (ExtType1 != ISD::SEXTLOAD)))
+    return SDValue();
+
+  // These truncation and zero-extension nodes are not necessary, remove them.
+  SDValue NewAnd = DAG.getNode(ISD::AND, SDLoc(N), AndNode->getValueType(0),
+                               TruncInputValue1, TruncInputValue2);
+  SDValue NewSetCC =
+      DAG.getSetCC(SDLoc(N), N->getValueType(0), NewAnd, TruncInputValue2, CC);
+  DAG.ReplaceAllUsesWith(N, NewSetCC.getNode());
+  return SDValue(N, 0);
+}
+
 // Combine (loongarch_bitrev_w (loongarch_revb_2w X)) to loongarch_bitrev_4b.
 static SDValue performBITREV_WCombine(SDNode *N, SelectionDAG &DAG,
                                       TargetLowering::DAGCombinerInfo &DCI,
@@ -3155,6 +3319,8 @@ SDValue LoongArchTargetLowering::PerformDAGCombine(SDNode *N,
     return performANDCombine(N, DAG, DCI, Subtarget);
   case ISD::OR:
     return performORCombine(N, DAG, DCI, Subtarget);
+  case ISD::SETCC:
+    return performSETCCCombine(N, DAG, DCI, Subtarget);
   case ISD::SRL:
     return performSRLCombine(N, DAG, DCI, Subtarget);
   case LoongArchISD::BITREV_W:
diff --git a/llvm/test/CodeGen/LoongArch/andn-icmp.ll b/llvm/test/CodeGen/LoongArch/andn-icmp.ll
index 447f3ac5c34fd..67d62eca914e9 100644
--- a/llvm/test/CodeGen/LoongArch/andn-icmp.ll
+++ b/llvm/test/CodeGen/LoongArch/andn-icmp.ll
@@ -6,14 +6,12 @@ define i1 @andn_icmp_eq_i8(i8 signext %a, i8 signext %b) nounwind {
 ; LA32-LABEL: andn_icmp_eq_i8:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    andn $a0, $a1, $a0
-; LA32-NEXT:    andi $a0, $a0, 255
 ; LA32-NEXT:    sltui $a0, $a0, 1
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: andn_icmp_eq_i8:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    andn $a0, $a1, $a0
-; LA64-NEXT:    andi $a0, $a0, 255
 ; LA64-NEXT:    sltui $a0, $a0, 1
 ; LA64-NEXT:    ret
   %and = and i8 %a, %b
@@ -25,14 +23,12 @@ define i1 @andn_icmp_eq_i16(i16 signext %a, i16 signext %b) nounwind {
 ; LA32-LABEL: andn_icmp_eq_i16:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    andn $a0, $a1, $a0
-; LA32-NEXT:    bstrpick.w $a0, $a0, 15, 0
 ; LA32-NEXT:    sltui $a0, $a0, 1
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: andn_icmp_eq_i16:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    andn $a0, $a1, $a0
-; LA64-NEXT:    bstrpick.d $a0, $a0, 15, 0
 ; LA64-NEXT:    sltui $a0, $a0, 1
 ; LA64-NEXT:    ret
   %and = and i16 %a, %b
@@ -80,14 +76,12 @@ define i1 @andn_icmp_ne_i8(i8 signext %a, i8 signext %b) nounwind {
 ; LA32-LABEL: andn_icmp_ne_i8:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    andn $a0, $a1, $a0
-; LA32-NEXT:    andi $a0, $a0, 255
 ; LA32-NEXT:    sltu $a0, $zero, $a0
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: andn_icmp_ne_i8:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    andn $a0, $a1, $a0
-; LA64-NEXT:    andi $a0, $a0, 255
 ; LA64-NEXT:    sltu $a0, $zero, $a0
 ; LA64-NEXT:    ret
   %and = and i8 %a, %b
@@ -99,14 +93,12 @@ define i1 @andn_icmp_ne_i16(i16 signext %a, i16 signext %b) nounwind {
 ; LA32-LABEL: andn_icmp_ne_i16:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    andn $a0, $a1, $a0
-; LA32-NEXT:    bstrpick.w $a0, $a0, 15, 0
 ; LA32-NEXT:    sltu $a0, $zero, $a0
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: andn_icmp_ne_i16:
 ; LA64:       # %bb.0:
 ; LA64-NEXT:    andn $a0, $a1, $a0
-; LA64-NEXT:    bstrpick.d $a0, $a0, 15, 0
 ; LA64-NEXT:    sltu $a0, $zero, $a0
 ; LA64-NEXT:    ret
   %and = and i16 %a, %b
@@ -153,15 +145,13 @@ define i1 @andn_icmp_ne_i64(i64 %a, i64 %b) nounwind {
 define i1 @andn_icmp_ult_i8(i8 signext %a, i8 signext %b) nounwind {
 ; LA32-LABEL: andn_icmp_ult_i8:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    andi $a1, $a1, 255
-; LA32-NEXT:    and $a0, $a1, $a0
+; LA32-NEXT:    and $a0, $a0, $a1
 ; LA32-NEXT:    sltu $a0, $a0, $a1
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: andn_icmp_ult_i8:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    andi $a1, $a1, 255
-; LA64-NEXT:    and $a0, $a1, $a0
+; LA64-NEXT:    and $a0, $a0, $a1
 ; LA64-NEXT:    sltu $a0, $a0, $a1
 ; LA64-NEXT:    ret
   %and = and i8 %a, %b
@@ -172,15 +162,13 @@ define i1 @andn_icmp_ult_i8(i8 signext %a, i8 signext %b) nounwind {
 define i1 @andn_icmp_ult_i16(i16 signext %a, i16 signext %b) nounwind {
 ; LA32-LABEL: andn_icmp_ult_i16:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    bstrpick.w $a1, $a1, 15, 0
-; LA32-NEXT:    and $a0, $a1, $a0
+; LA32-NEXT:    and $a0, $a0, $a1
 ; LA32-NEXT:    sltu $a0, $a0, $a1
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: andn_icmp_ult_i16:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
-; LA64-NEXT:    and $a0, $a1, $a0
+; LA64-NEXT:    and $a0, $a0, $a1
 ; LA64-NEXT:    sltu $a0, $a0, $a1
 ; LA64-NEXT:    ret
   %and = and i16 %a, %b
@@ -191,16 +179,14 @@ define i1 @andn_icmp_ult_i16(i16 signext %a, i16 signext %b) nounwind {
 define i1 @andn_icmp_uge_i8(i8 signext %a, i8 signext %b) nounwind {
 ; LA32-LABEL: andn_icmp_uge_i8:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    andi $a1, $a1, 255
-; LA32-NEXT:    and $a0, $a1, $a0
+; LA32-NEXT:    and $a0, $a0, $a1
 ; LA32-NEXT:    sltu $a0, $a0, $a1
 ; LA32-NEXT:    xori $a0, $a0, 1
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: andn_icmp_uge_i8:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    andi $a1, $a1, 255
-; LA64-NEXT:    and $a0, $a1, $a0
+; LA64-NEXT:    and $a0, $a0, $a1
 ; LA64-NEXT:    sltu $a0, $a0, $a1
 ; LA64-NEXT:    xori $a0, $a0, 1
 ; LA64-NEXT:    ret
@@ -212,16 +198,14 @@ define i1 @andn_icmp_uge_i8(i8 signext %a, i8 signext %b) nounwind {
 define i1 @andn_icmp_uge_i16(i16 signext %a, i16 signext %b) nounwind {
 ; LA32-LABEL: andn_icmp_uge_i16:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    bstrpick.w $a1, $a1, 15, 0
-; LA32-NEXT:    and $a0, $a1, $a0
+; LA32-NEXT:    and $a0, $a0, $a1
 ; LA32-NEXT:    sltu $a0, $a0, $a1
 ; LA32-NEXT:    xori $a0, $a0, 1
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: andn_icmp_uge_i16:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
-; LA64-NEXT:    and $a0, $a1, $a0
+; LA64-NEXT:    and $a0, $a0, $a1
 ; LA64-NEXT:    sltu $a0, $a0, $a1
 ; LA64-NEXT:    xori $a0, $a0, 1
 ; LA64-NEXT:    ret
@@ -233,15 +217,13 @@ define i1 @andn_icmp_uge_i16(i16 signext %a, i16 signext %b) nounwind {
 define i1 @andn_icmp_ugt_i8(i8 signext %a, i8 signext %b) nounwind {
 ; LA32-LABEL: andn_icmp_ugt_i8:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    andi $a1, $a1, 255
-; LA32-NEXT:    and $a0, $a1, $a0
+; LA32-NEXT:    and $a0, $a0, $a1
 ; LA32-NEXT:    sltu $a0, $a1, $a0
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: andn_icmp_ugt_i8:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    andi $a1, $a1, 255
-; LA64-NEXT:    and $a0, $a1, $a0
+; LA64-NEXT:    and $a0, $a0, $a1
 ; LA64-NEXT:    sltu $a0, $a1, $a0
 ; LA64-NEXT:    ret
   %and = and i8 %a, %b
@@ -252,15 +234,13 @@ define i1 @andn_icmp_ugt_i8(i8 signext %a, i8 signext %b) nounwind {
 define i1 @andn_icmp_ugt_i16(i16 signext %a, i16 signext %b) nounwind {
 ; LA32-LABEL: andn_icmp_ugt_i16:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    bstrpick.w $a1, $a1, 15, 0
-; LA32-NEXT:    and $a0, $a1, $a0
+; LA32-NEXT:    and $a0, $a0, $a1
 ; LA32-NEXT:    sltu $a0, $a1, $a0
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: andn_icmp_ugt_i16:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
-; LA64-NEXT:    and $a0, $a1, $a0
+; LA64-NEXT:    and $a0, $a0, $a1
 ; LA64-NEXT:    sltu $a0, $a1, $a0
 ; LA64-NEXT:    ret
   %and = and i16 %a, %b
@@ -271,16 +251,14 @@ define i1 @andn_icmp_ugt_i16(i16 signext %a, i16 signext %b) nounwind {
 define i1 @andn_icmp_ule_i8(i8 signext %a, i8 signext %b) nounwind {
 ; LA32-LABEL: andn_icmp_ule_i8:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    andi $a1, $a1, 255
-; LA32-NEXT:    and $a0, $a1, $a0
+; LA32-NEXT:    and $a0, $a0, $a1
 ; LA32-NEXT:    sltu $a0, $a1, $a0
 ; LA32-NEXT:    xori $a0, $a0, 1
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: andn_icmp_ule_i8:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    andi $a1, $a1, 255
-; LA64-NEXT:    and $a0, $a1, $a0
+; LA64-NEXT:    and $a0, $a0, $a1
 ; LA64-NEXT:    sltu $a0, $a1, $a0
 ; LA64-NEXT:    xori $a0, $a0, 1
 ; LA64-NEXT:    ret
@@ -292,16 +270,14 @@ define i1 @andn_icmp_ule_i8(i8 signext %a, i8 signext %b) nounwind {
 define i1 @andn_icmp_ule_i16(i16 signext %a, i16 signext %b) nounwind {
 ; LA32-LABEL: andn_icmp_ule_i16:
 ; LA32:       # %bb.0:
-; LA32-NEXT:    bstrpick.w $a1, $a1, 15, 0
-; LA32-NEXT:    and $a0, $a1, $a0
+; LA32-NEXT:    and $a0, $a0, $a1
 ; LA32-NEXT:    sltu $a0, $a1, $a0
 ; LA32-NEXT:    xori $a0, $a0, 1
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: andn_icmp_ule_i16:
 ; LA64:       # %bb.0:
-; LA64-NEXT:    bstrpick.d $a1, $a1, 15, 0
-; LA64-NEXT:    and $a0, $a1, $a0
+; LA64-NEXT:    and $a0, $a0, $a1
 ; LA64-NEXT:    sltu $a0, $a1, $a0
 ; LA64-NEXT:    xori $a0, $a0, 1
 ; LA64-NEXT:    ret
@@ -606,7 +582,6 @@ define i1 @andn_icmp_eq_i8_i32(i8 signext %a, i8 signext %b) nounwind {
 ; LA32-LABEL: andn_icmp_eq_i8_i32:
 ; LA32:       # %bb.0:
 ; LA32-NEXT:    andn $a0, $a1, $a0
-; LA32-NEXT:    andi $a0, $a0, 255
 ; LA32-NEXT:    sltui $a0, $a0, 1
 ; LA32-NEXT:    ret
 ;

@heiher heiher merged commit 4c73b1a into llvm:main Jul 22, 2024
@heiher heiher deleted the spurious-mask-removal branch July 22, 2024 07:10
yuxuanchen1997 pushed a commit that referenced this pull request Jul 25, 2024
… on 16 and 8 bit values" (#99798)

Summary: recommit of #99272

Test Plan: 

Reviewers: 

Subscribers: 

Tasks: 

Tags: 


Differential Revision: https://phabricator.intern.facebook.com/D60251358
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

3 participants