Skip to content

Conversation

dtcxzyw
Copy link
Member

@dtcxzyw dtcxzyw commented Mar 5, 2024

This patch recognizes never-overflow assumptions generated by rustc to improve the codegen.
Please refer to rust-lang/hashbrown#509 for more details.

Closes rust-lang/hashbrown#509
Closes #80637

dtcxzyw added a commit to dtcxzyw/llvm-opt-benchmark that referenced this pull request Mar 5, 2024
@dtcxzyw
Copy link
Member Author

dtcxzyw commented Mar 12, 2024

Closed in favor of rust-lang/rust#122282.

@dtcxzyw dtcxzyw closed this Mar 12, 2024
@dianqk
Copy link
Member

dianqk commented Mar 12, 2024

Hmm, this is the first step, and we still need subsequent PRs to complete this simplification.
(In fact, there's a problem here that cannot be solved for now. We cannot do this in std or core library.

@dtcxzyw dtcxzyw reopened this Mar 12, 2024
@dtcxzyw
Copy link
Member Author

dtcxzyw commented Apr 11, 2024

@dianqk Should I close this PR since rust-lang/rust#122975 landed?

@dianqk
Copy link
Member

dianqk commented Apr 11, 2024

@dianqk Should I close this PR since rust-lang/rust#122975 landed?

I believe it will take at least two more PRs to resolve this issue. The first one is rust-lang/rust#123256 (comment).

Given the differences in inlining rules between Rust and LLVM, there are likely some edge cases that still cannot be resolved.

@antoniofrighetto
Copy link
Contributor

This looks ready and rust-lang/rust#122975 looks orthogonal to me, should we undraft it?

@dtcxzyw dtcxzyw force-pushed the perf/overflow-assume branch from fc350f0 to 83b42c3 Compare January 5, 2025 11:02
@dtcxzyw dtcxzyw marked this pull request as ready for review January 5, 2025 11:02
@dtcxzyw dtcxzyw requested a review from nikic as a code owner January 5, 2025 11:02
@llvmbot llvmbot added llvm:instcombine Covers the InstCombine, InstSimplify and AggressiveInstCombine passes llvm:transforms labels Jan 5, 2025
@llvmbot
Copy link
Member

llvmbot commented Jan 5, 2025

@llvm/pr-subscribers-llvm-transforms

Author: Yingwei Zheng (dtcxzyw)

Changes

Related issue: rust-lang/hashbrown#509 #80637


Full diff: https://github.com/llvm/llvm-project/pull/84016.diff

2 Files Affected:

  • (modified) llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp (+29)
  • (modified) llvm/test/Transforms/InstCombine/overflow.ll (+93-5)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index fd38738e3be80b..cdb2c11ef046ad 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -839,6 +839,35 @@ InstCombinerImpl::foldIntrinsicWithOverflowCommon(IntrinsicInst *II) {
   if (OptimizeOverflowCheck(WO->getBinaryOp(), WO->isSigned(), WO->getLHS(),
                             WO->getRHS(), *WO, OperationResult, OverflowResult))
     return createOverflowTuple(WO, OperationResult, OverflowResult);
+
+  // See whether we can optimize the overflow check with assumption information.
+  for (User *U : WO->users()) {
+    if (!match(U, m_ExtractValue<1>(m_Value())))
+      continue;
+
+    for (auto &AssumeVH : AC.assumptionsFor(U)) {
+      if (!AssumeVH)
+        continue;
+      CallInst *I = cast<CallInst>(AssumeVH);
+      if (!match(I->getArgOperand(0), m_Not(m_Specific(U))))
+        continue;
+      if (!isValidAssumeForContext(I, II, /*DT=*/nullptr,
+                                   /*AllowEphemerals=*/true))
+        continue;
+      Value *Result =
+          Builder.CreateBinOp(WO->getBinaryOp(), WO->getLHS(), WO->getRHS());
+      Result->takeName(WO);
+      if (auto *Inst = dyn_cast<Instruction>(Result)) {
+        if (WO->isSigned())
+          Inst->setHasNoSignedWrap();
+        else
+          Inst->setHasNoUnsignedWrap();
+      }
+      return createOverflowTuple(WO, Result,
+                                 ConstantInt::getFalse(U->getType()));
+    }
+  }
+
   return nullptr;
 }
 
diff --git a/llvm/test/Transforms/InstCombine/overflow.ll b/llvm/test/Transforms/InstCombine/overflow.ll
index a8969a5ed02c36..22e1631f78ee91 100644
--- a/llvm/test/Transforms/InstCombine/overflow.ll
+++ b/llvm/test/Transforms/InstCombine/overflow.ll
@@ -11,7 +11,7 @@ define i32 @test1(i32 %a, i32 %b) nounwind ssp {
 ; CHECK-NEXT:    [[TMP0:%.*]] = extractvalue { i32, i1 } [[SADD]], 1
 ; CHECK-NEXT:    br i1 [[TMP0]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
 ; CHECK:       if.then:
-; CHECK-NEXT:    tail call void @throwAnExceptionOrWhatever() #[[ATTR2:[0-9]+]]
+; CHECK-NEXT:    tail call void @throwAnExceptionOrWhatever() #[[ATTR3:[0-9]+]]
 ; CHECK-NEXT:    br label [[IF_END]]
 ; CHECK:       if.end:
 ; CHECK-NEXT:    [[SADD_RESULT:%.*]] = extractvalue { i32, i1 } [[SADD]], 0
@@ -49,7 +49,7 @@ define i32 @test2(i32 %a, i32 %b, ptr %P) nounwind ssp {
 ; CHECK-NEXT:    [[TMP0:%.*]] = icmp ugt i64 [[ADD_OFF]], 4294967295
 ; CHECK-NEXT:    br i1 [[TMP0]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
 ; CHECK:       if.then:
-; CHECK-NEXT:    tail call void @throwAnExceptionOrWhatever() #[[ATTR2]]
+; CHECK-NEXT:    tail call void @throwAnExceptionOrWhatever() #[[ATTR3]]
 ; CHECK-NEXT:    br label [[IF_END]]
 ; CHECK:       if.end:
 ; CHECK-NEXT:    [[CONV9:%.*]] = trunc i64 [[ADD]] to i32
@@ -86,7 +86,7 @@ define i64 @test3(i32 %a, i32 %b) nounwind ssp {
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i64 [[TMP0]], -4294967296
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
 ; CHECK:       if.then:
-; CHECK-NEXT:    tail call void @throwAnExceptionOrWhatever() #[[ATTR2]]
+; CHECK-NEXT:    tail call void @throwAnExceptionOrWhatever() #[[ATTR3]]
 ; CHECK-NEXT:    br label [[IF_END]]
 ; CHECK:       if.end:
 ; CHECK-NEXT:    ret i64 [[ADD]]
@@ -116,7 +116,7 @@ define zeroext i8 @test4(i8 signext %a, i8 signext %b) nounwind ssp {
 ; CHECK-NEXT:    [[CMP:%.*]] = extractvalue { i8, i1 } [[SADD]], 1
 ; CHECK-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
 ; CHECK:       if.then:
-; CHECK-NEXT:    tail call void @throwAnExceptionOrWhatever() #[[ATTR2]]
+; CHECK-NEXT:    tail call void @throwAnExceptionOrWhatever() #[[ATTR3]]
 ; CHECK-NEXT:    unreachable
 ; CHECK:       if.end:
 ; CHECK-NEXT:    [[SADD_RESULT:%.*]] = extractvalue { i8, i1 } [[SADD]], 0
@@ -150,7 +150,7 @@ define i32 @test8(i64 %a, i64 %b) nounwind ssp {
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i64 [[TMP0]], -4294967296
 ; CHECK-NEXT:    br i1 [[TMP1]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
 ; CHECK:       if.then:
-; CHECK-NEXT:    tail call void @throwAnExceptionOrWhatever() #[[ATTR2]]
+; CHECK-NEXT:    tail call void @throwAnExceptionOrWhatever() #[[ATTR3]]
 ; CHECK-NEXT:    br label [[IF_END]]
 ; CHECK:       if.end:
 ; CHECK-NEXT:    [[CONV9:%.*]] = trunc i64 [[ADD]] to i32
@@ -171,3 +171,91 @@ if.end:
   ret i32 %conv9
 }
 
+define i32 @uadd_no_overflow(i32 %a, i32 %b) {
+; CHECK-LABEL: @uadd_no_overflow(
+; CHECK-NEXT:    [[TMP1:%.*]] = add nuw i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i32 [[TMP1]]
+;
+  %val = tail call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %a, i32 %b)
+  %ov = extractvalue { i32, i1 } %val, 1
+  %nowrap = xor i1 %ov, true
+  tail call void @llvm.assume(i1 %nowrap)
+  %res = extractvalue { i32, i1 } %val, 0
+  ret i32 %res
+}
+
+define i32 @smul_no_overflow(i32 %a, i32 %b) {
+; CHECK-LABEL: @smul_no_overflow(
+; CHECK-NEXT:    [[TMP1:%.*]] = mul nsw i32 [[A:%.*]], [[B:%.*]]
+; CHECK-NEXT:    ret i32 [[TMP1]]
+;
+  %val = tail call { i32, i1 } @llvm.smul.with.overflow.i32(i32 %a, i32 %b)
+  %ov = extractvalue { i32, i1 } %val, 1
+  %nowrap = xor i1 %ov, true
+  tail call void @llvm.assume(i1 %nowrap)
+  %res = extractvalue { i32, i1 } %val, 0
+  ret i32 %res
+}
+
+define i32 @smul_overflow(i32 %a, i32 %b) {
+; CHECK-LABEL: @smul_overflow(
+; CHECK-NEXT:    [[VAL:%.*]] = tail call { i32, i1 } @llvm.smul.with.overflow.i32(i32 [[A:%.*]], i32 [[B:%.*]])
+; CHECK-NEXT:    [[OV:%.*]] = extractvalue { i32, i1 } [[VAL]], 1
+; CHECK-NEXT:    tail call void @llvm.assume(i1 [[OV]])
+; CHECK-NEXT:    [[RES:%.*]] = extractvalue { i32, i1 } [[VAL]], 0
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %val = tail call { i32, i1 } @llvm.smul.with.overflow.i32(i32 %a, i32 %b)
+  %ov = extractvalue { i32, i1 } %val, 1
+  tail call void @llvm.assume(i1 %ov)
+  %res = extractvalue { i32, i1 } %val, 0
+  ret i32 %res
+}
+
+define i32 @uadd_no_overflow_invalid1(i32 %a, i32 %b, i1 %cond) {
+; CHECK-LABEL: @uadd_no_overflow_invalid1(
+; CHECK-NEXT:    [[VAL:%.*]] = tail call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 [[A:%.*]], i32 [[B:%.*]])
+; CHECK-NEXT:    [[RES:%.*]] = extractvalue { i32, i1 } [[VAL]], 0
+; CHECK-NEXT:    call void @use(i32 [[RES]])
+; CHECK-NEXT:    br i1 [[COND:%.*]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    [[OV:%.*]] = extractvalue { i32, i1 } [[VAL]], 1
+; CHECK-NEXT:    [[NOWRAP:%.*]] = xor i1 [[OV]], true
+; CHECK-NEXT:    tail call void @llvm.assume(i1 [[NOWRAP]])
+; CHECK-NEXT:    ret i32 [[RES]]
+; CHECK:       if.else:
+; CHECK-NEXT:    ret i32 0
+;
+  %val = tail call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %a, i32 %b)
+  %res = extractvalue { i32, i1 } %val, 0
+  call void @use(i32 %res)
+  br i1 %cond, label %if.then, label %if.else
+if.then:
+  %ov = extractvalue { i32, i1 } %val, 1
+  %nowrap = xor i1 %ov, true
+  tail call void @llvm.assume(i1 %nowrap)
+  ret i32 %res
+if.else:
+  ret i32 0
+}
+
+define i32 @uadd_no_overflow_invalid2(i32 %a, i32 %b, i1 %cond) {
+; CHECK-LABEL: @uadd_no_overflow_invalid2(
+; CHECK-NEXT:    [[VAL:%.*]] = tail call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 [[A:%.*]], i32 [[B:%.*]])
+; CHECK-NEXT:    [[OV:%.*]] = extractvalue { i32, i1 } [[VAL]], 1
+; CHECK-NEXT:    [[NOWRAP:%.*]] = xor i1 [[OV]], true
+; CHECK-NEXT:    call void @use(i32 0)
+; CHECK-NEXT:    tail call void @llvm.assume(i1 [[NOWRAP]])
+; CHECK-NEXT:    [[RES:%.*]] = extractvalue { i32, i1 } [[VAL]], 0
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %val = tail call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %a, i32 %b)
+  %ov = extractvalue { i32, i1 } %val, 1
+  %nowrap = xor i1 %ov, true
+  call void @use(i32 0) ; It is not guaranteed to transfer execution to its successors
+  tail call void @llvm.assume(i1 %nowrap)
+  %res = extractvalue { i32, i1 } %val, 0
+  ret i32 %res
+}
+
+declare void @use(i32)

@andjo403
Copy link
Contributor

andjo403 commented Jan 5, 2025

Looks good to me

Copy link
Contributor

@nikic nikic left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM

@dtcxzyw dtcxzyw merged commit 2adcec7 into llvm:main Jan 5, 2025
12 checks passed
@dtcxzyw dtcxzyw deleted the perf/overflow-assume branch January 5, 2025 13:15
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
llvm:instcombine Covers the InstCombine, InstSimplify and AggressiveInstCombine passes llvm:transforms
Projects
None yet
Development

Successfully merging this pull request may close these issues.

LLVM failed to use the knowledge from a never-overflow assumption Simplify sadd_overflow+assume to add nsw [InstCombine]
6 participants