Skip to content

Conversation

@goldsteinn
Copy link
Contributor

  • [ValueTracking] Add tests for cmpExcludesZero for non-splat vecs; NFC
  • [ValueTracking] Add support for non-splat vecs in cmpExcludesZero

@goldsteinn goldsteinn requested a review from nikic as a code owner October 5, 2023 16:23
@goldsteinn goldsteinn changed the title goldsteinn/cmp excludes zero vecs [ValueTracking] Add support for non-splat vecs in cmpExcludesZero Oct 5, 2023
@goldsteinn goldsteinn requested a review from dtcxzyw October 5, 2023 16:23
@llvmbot llvmbot added the llvm:analysis Includes value tracking, cost tables and constant folding label Oct 5, 2023
@llvmbot
Copy link
Member

llvmbot commented Oct 5, 2023

@llvm/pr-subscribers-llvm-analysis

Changes
  • [ValueTracking] Add tests for cmpExcludesZero for non-splat vecs; NFC
  • [ValueTracking] Add support for non-splat vecs in cmpExcludesZero

Full diff: https://github.com/llvm/llvm-project/pull/68331.diff

2 Files Affected:

  • (modified) llvm/lib/Analysis/ValueTracking.cpp (+26-5)
  • (modified) llvm/test/Analysis/ValueTracking/known-non-zero.ll (+58)
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index 0736ef65b306519..b9a6729ab3f3076 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -562,7 +562,7 @@ bool llvm::isValidAssumeForContext(const Instruction *Inv,
 // example Pred=EQ, RHS=isKnownNonZero. cmpExcludesZero is called in loops
 // so the extra compile time may not be worth it, but possibly a second API
 // should be created for use outside of loops.
-static bool cmpExcludesZero(CmpInst::Predicate Pred, const Value *RHS) {
+static bool cmpExcludesZero(CmpInst::Predicate Pred, Value *RHS) {
   // v u> y implies v != 0.
   if (Pred == ICmpInst::ICMP_UGT)
     return true;
@@ -573,11 +573,31 @@ static bool cmpExcludesZero(CmpInst::Predicate Pred, const Value *RHS) {
 
   // All other predicates - rely on generic ConstantRange handling.
   const APInt *C;
-  if (!match(RHS, m_APInt(C)))
-    return false;
+  if (match(RHS, m_APInt(C))) {
+    ConstantRange TrueValues = ConstantRange::makeExactICmpRegion(Pred, *C);
+    return !TrueValues.contains(APInt::getZero(C->getBitWidth()));
+  }
 
-  ConstantRange TrueValues = ConstantRange::makeExactICmpRegion(Pred, *C);
-  return !TrueValues.contains(APInt::getZero(C->getBitWidth()));
+  auto *FVTy = dyn_cast<FixedVectorType>(RHS->getType());
+  if (FVTy == nullptr)
+    return false;
+  Constant *VC = nullptr;
+  if (!match(RHS, m_ImmConstant(VC)))
+    return false;
+  for (unsigned EleIdx = 0, NEle = FVTy->getNumElements(); EleIdx < NEle;
+       ++EleIdx) {
+    Constant *EleC = VC->getAggregateElement(EleIdx);
+    if (EleC == nullptr)
+      return false;
+    ConstantInt *EleCI = dyn_cast<ConstantInt>(EleC);
+    if (EleCI == nullptr)
+      return false;
+    ConstantRange TrueValues =
+        ConstantRange::makeExactICmpRegion(Pred, EleCI->getValue());
+    if (TrueValues.contains(APInt::getZero(EleCI->getBitWidth())))
+      return false;
+  }
+  return true;
 }
 
 static bool isKnownNonZeroFromAssume(const Value *V, const SimplifyQuery &Q) {
@@ -8809,6 +8829,7 @@ static void setLimitsForSelectPattern(const SelectInst &SI, APInt &Lower,
     default:
       break;
   }
+
 }
 
 static void setLimitForFPToI(const Instruction *I, APInt &Lower, APInt &Upper) {
diff --git a/llvm/test/Analysis/ValueTracking/known-non-zero.ll b/llvm/test/Analysis/ValueTracking/known-non-zero.ll
index 6dce6e528165ea6..dbec47ea0ae261a 100644
--- a/llvm/test/Analysis/ValueTracking/known-non-zero.ll
+++ b/llvm/test/Analysis/ValueTracking/known-non-zero.ll
@@ -1160,3 +1160,61 @@ define i1 @sdiv_known_non_zero_fail(i8 %x, i8 %y) {
   %nz = icmp ne i8 %xy, 0
   ret i1 %nz
 }
+
+define <2 x i1> @cmp_excludes_zero_with_nonsplat_vec(<2 x i8> %a, <2 x i8> %b) {
+; CHECK-LABEL: @cmp_excludes_zero_with_nonsplat_vec(
+; CHECK-NEXT:    ret <2 x i1> zeroinitializer
+;
+  %c = icmp sge <2 x i8> %a, <i8 1, i8 4>
+  %s = select <2 x i1> %c, <2 x i8> %a, <2 x i8> <i8 4, i8 5>
+  %and = or <2 x i8> %s, %b
+  %r = icmp eq <2 x i8> %and, zeroinitializer
+  ret <2 x i1> %r
+}
+
+define <2 x i1> @cmp_excludes_zero_with_nonsplat_vec_wundef(<2 x i8> %a, <2 x i8> %b) {
+; CHECK-LABEL: @cmp_excludes_zero_with_nonsplat_vec_wundef(
+; CHECK-NEXT:    [[C:%.*]] = icmp sge <2 x i8> [[A:%.*]], <i8 1, i8 undef>
+; CHECK-NEXT:    [[S:%.*]] = select <2 x i1> [[C]], <2 x i8> [[A]], <2 x i8> <i8 4, i8 5>
+; CHECK-NEXT:    [[AND:%.*]] = or <2 x i8> [[S]], [[B:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = icmp eq <2 x i8> [[AND]], zeroinitializer
+; CHECK-NEXT:    ret <2 x i1> [[R]]
+;
+  %c = icmp sge <2 x i8> %a, <i8 1, i8 undef>
+  %s = select <2 x i1> %c, <2 x i8> %a, <2 x i8> <i8 4, i8 5>
+  %and = or <2 x i8> %s, %b
+  %r = icmp eq <2 x i8> %and, zeroinitializer
+  ret <2 x i1> %r
+}
+
+define <2 x i1> @cmp_excludes_zero_with_nonsplat_vec_wpoison(<2 x i8> %a, <2 x i8> %b) {
+; CHECK-LABEL: @cmp_excludes_zero_with_nonsplat_vec_wpoison(
+; CHECK-NEXT:    [[C:%.*]] = icmp sge <2 x i8> [[A:%.*]], <i8 1, i8 poison>
+; CHECK-NEXT:    [[S:%.*]] = select <2 x i1> [[C]], <2 x i8> [[A]], <2 x i8> <i8 4, i8 5>
+; CHECK-NEXT:    [[AND:%.*]] = or <2 x i8> [[S]], [[B:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = icmp eq <2 x i8> [[AND]], zeroinitializer
+; CHECK-NEXT:    ret <2 x i1> [[R]]
+;
+  %c = icmp sge <2 x i8> %a, <i8 1, i8 poison>
+  %s = select <2 x i1> %c, <2 x i8> %a, <2 x i8> <i8 4, i8 5>
+  %and = or <2 x i8> %s, %b
+  %r = icmp eq <2 x i8> %and, zeroinitializer
+  ret <2 x i1> %r
+}
+
+
+define <2 x i1> @cmp_excludes_zero_with_nonsplat_vec_fail(<2 x i8> %a, <2 x i8> %b) {
+; CHECK-LABEL: @cmp_excludes_zero_with_nonsplat_vec_fail(
+; CHECK-NEXT:    [[C:%.*]] = icmp sge <2 x i8> [[A:%.*]], <i8 0, i8 4>
+; CHECK-NEXT:    [[S:%.*]] = select <2 x i1> [[C]], <2 x i8> [[A]], <2 x i8> <i8 4, i8 5>
+; CHECK-NEXT:    [[AND:%.*]] = or <2 x i8> [[S]], [[B:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = icmp eq <2 x i8> [[AND]], zeroinitializer
+; CHECK-NEXT:    ret <2 x i1> [[R]]
+;
+  %c = icmp sge <2 x i8> %a, <i8 0, i8 4>
+  %s = select <2 x i1> %c, <2 x i8> %a, <2 x i8> <i8 4, i8 5>
+  %and = or <2 x i8> %s, %b
+  %r = icmp eq <2 x i8> %and, zeroinitializer
+  ret <2 x i1> %r
+}
+

@github-actions
Copy link

github-actions bot commented Oct 5, 2023

✅ With the latest revision this PR passed the C/C++ code formatter.

@goldsteinn goldsteinn force-pushed the goldsteinn/cmp-excludes-zero-vecs branch from ec954c5 to ab67777 Compare October 6, 2023 02:17
@goldsteinn goldsteinn force-pushed the goldsteinn/cmp-excludes-zero-vecs branch from ab67777 to 3011996 Compare October 6, 2023 16:57
@goldsteinn goldsteinn force-pushed the goldsteinn/cmp-excludes-zero-vecs branch from 3011996 to bf56cd2 Compare October 6, 2023 17:24
Copy link
Member

@dtcxzyw dtcxzyw left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM.

@goldsteinn
Copy link
Contributor Author

Pushed.

@goldsteinn goldsteinn closed this Oct 12, 2023
dtcxzyw added a commit that referenced this pull request Nov 15, 2023
#72365)

Related patch: #68331
This missed optimization is discovered with the help of
AliveToolkit/alive2#962.
zahiraam pushed a commit to zahiraam/llvm-project that referenced this pull request Nov 20, 2023
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

llvm:analysis Includes value tracking, cost tables and constant folding

Projects

None yet

Development

Successfully merging this pull request may close these issues.

4 participants