-
Notifications
You must be signed in to change notification settings - Fork 14.8k
[InstCombine] Fold (x == A) || (x & -Pow2) == A + 1
into range check
#153842
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-llvm-transforms Author: Pedro Lobo (pedroclobo) ChangesExtends These patterns represent a special case of contiguous range checks and can be optimized into an addition and comparison. define i1 @<!-- -->src(i32 %x) {
%icmp1 = icmp eq i32 %x, 127
%and = and i32 %x, -32
%icmp2 = icmp eq i32 %and, 128
%ret = or i1 %icmp1, %icmp2
ret i1 %ret
}
define i1 @<!-- -->tgt(i32 %x) {
%1 = add i32 %x, -127
%2 = icmp ult i32 %1, 33
ret i1 %2
} Alive2 Proof: https://alive2.llvm.org/ce/z/gwELqs Closes #152948. Full diff: https://github.com/llvm/llvm-project/pull/153842.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index d7971e8e3caea..c77bfe93190f5 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -1338,16 +1338,44 @@ Value *InstCombinerImpl::foldAndOrOfICmpsUsingRanges(ICmpInst *ICmp1,
V2 = X;
}
+ // Look through and with a negative power of 2 mask on V1 or V2. This
+ // detects idioms of the form `(x == A) || ((x & Mask) == A + 1)` where A + 1
+ // is aligned to the mask and A + 1 >= |Mask|. This pattern corresponds to a
+ // contiguous range check, which can be folded into an addition and compare.
+ const APInt *Mask1 = nullptr, *Mask2 = nullptr;
+ bool matchedAnd1 = false, matchedAnd2 = false;
+ if (V1 != V2) {
+ Value *X;
+ if (match(V1, m_OneUse(m_And(m_Value(X), m_NegatedPower2(Mask1)))) &&
+ *C1 - *C2 == 1 && C1->uge(Mask1->abs()) && C1->isPowerOf2() &&
+ Pred1 == ICmpInst::ICMP_EQ) {
+ matchedAnd1 = true;
+ V1 = X;
+ }
+ if (match(V2, m_OneUse(m_And(m_Value(X), m_NegatedPower2(Mask2)))) &&
+ *C2 - *C1 == 1 && C2->uge(Mask2->abs()) && C2->isPowerOf2() &&
+ Pred2 == ICmpInst::ICMP_EQ) {
+ matchedAnd2 = true;
+ V2 = X;
+ }
+ }
+
if (V1 != V2)
return nullptr;
- ConstantRange CR1 = ConstantRange::makeExactICmpRegion(
- IsAnd ? ICmpInst::getInverseCmpPredicate(Pred1) : Pred1, *C1);
+ ConstantRange CR1 =
+ matchedAnd1
+ ? ConstantRange(*C1, *C1 - *Mask1)
+ : ConstantRange::makeExactICmpRegion(
+ IsAnd ? ICmpInst::getInverseCmpPredicate(Pred1) : Pred1, *C1);
if (Offset1)
CR1 = CR1.subtract(*Offset1);
- ConstantRange CR2 = ConstantRange::makeExactICmpRegion(
- IsAnd ? ICmpInst::getInverseCmpPredicate(Pred2) : Pred2, *C2);
+ ConstantRange CR2 =
+ matchedAnd2
+ ? ConstantRange(*C2, *C2 - *Mask2)
+ : ConstantRange::makeExactICmpRegion(
+ IsAnd ? ICmpInst::getInverseCmpPredicate(Pred2) : Pred2, *C2);
if (Offset2)
CR2 = CR2.subtract(*Offset2);
diff --git a/llvm/test/Transforms/InstCombine/and-or-icmps.ll b/llvm/test/Transforms/InstCombine/and-or-icmps.ll
index 42e5020748129..aa82525d0dd1c 100644
--- a/llvm/test/Transforms/InstCombine/and-or-icmps.ll
+++ b/llvm/test/Transforms/InstCombine/and-or-icmps.ll
@@ -3491,3 +3491,184 @@ define i1 @and_icmp_eq_with_binary_range_operands(i8 range(i8 0, 2) %x, i8 range
%ret = and i1 %icmp1, %icmp2
ret i1 %ret
}
+
+define i1 @or_icmp_eq_and_pow2_1(i32 %x) {
+; CHECK-LABEL: @or_icmp_eq_and_pow2_1(
+; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[X:%.*]], -127
+; CHECK-NEXT: [[TMP4:%.*]] = icmp ult i32 [[TMP1]], 33
+; CHECK-NEXT: ret i1 [[TMP4]]
+;
+ %icmp1 = icmp eq i32 %x, 127
+ %and = and i32 %x, -32
+ %icmp2 = icmp eq i32 %and, 128
+ %ret = or i1 %icmp1, %icmp2
+ ret i1 %ret
+}
+
+define i1 @or_icmp_eq_and_pow2_2(i32 %x) {
+; CHECK-LABEL: @or_icmp_eq_and_pow2_2(
+; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[X:%.*]], -31
+; CHECK-NEXT: [[TMP4:%.*]] = icmp ult i32 [[TMP1]], 33
+; CHECK-NEXT: ret i1 [[TMP4]]
+;
+ %icmp1 = icmp eq i32 %x, 31
+ %and = and i32 %x, -32
+ %icmp2 = icmp eq i32 %and, 32
+ %ret = or i1 %icmp1, %icmp2
+ ret i1 %ret
+}
+
+define i1 @or_icmp_eq_and_pow2_3(i32 %x) {
+; CHECK-LABEL: @or_icmp_eq_and_pow2_3(
+; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[X:%.*]], -127
+; CHECK-NEXT: [[TMP4:%.*]] = icmp ult i32 [[TMP1]], 65
+; CHECK-NEXT: ret i1 [[TMP4]]
+;
+ %icmp1 = icmp eq i32 %x, 127
+ %and = and i32 %x, -64
+ %icmp2 = icmp eq i32 %and, 128
+ %ret = or i1 %icmp1, %icmp2
+ ret i1 %ret
+}
+
+define i1 @or_icmp_eq_and_pow2_commute(i32 %x) {
+; CHECK-LABEL: @or_icmp_eq_and_pow2_commute(
+; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[X:%.*]], -127
+; CHECK-NEXT: [[TMP4:%.*]] = icmp ult i32 [[TMP1]], 33
+; CHECK-NEXT: ret i1 [[TMP4]]
+;
+ %icmp1 = and i32 %x, -32
+ %and = icmp eq i32 %icmp1, 128
+ %icmp2 = icmp eq i32 %x, 127
+ %ret = or i1 %and, %icmp2
+ ret i1 %ret
+}
+
+define i1 @neg_or_icmp_eq_and_pow2_multi_use(i32 %x) {
+; CHECK-LABEL: @neg_or_icmp_eq_and_pow2_multi_use(
+; CHECK-NEXT: [[ICMP1:%.*]] = icmp eq i32 [[X:%.*]], 127
+; CHECK-NEXT: [[AND:%.*]] = and i32 [[X]], -32
+; CHECK-NEXT: call void @use32(i32 [[AND]])
+; CHECK-NEXT: [[ICMP2:%.*]] = icmp eq i32 [[AND]], 128
+; CHECK-NEXT: [[RET:%.*]] = or i1 [[ICMP1]], [[ICMP2]]
+; CHECK-NEXT: ret i1 [[RET]]
+;
+ %icmp1 = icmp eq i32 %x, 127
+ %and = and i32 %x, -32
+ call void @use32(i32 %and)
+ %icmp2 = icmp eq i32 %and, 128
+ %ret = or i1 %icmp1, %icmp2
+ ret i1 %ret
+}
+
+define i1 @neg_and_icmp_eq_and_pow2(i32 %x) {
+; CHECK-LABEL: @neg_and_icmp_eq_and_pow2(
+; CHECK-NEXT: ret i1 false
+;
+ %icmp1 = icmp eq i32 %x, 127
+ %and = and i32 %x, -32
+ %icmp2 = icmp eq i32 %and, 128
+ %ret = and i1 %icmp1, %icmp2
+ ret i1 %ret
+}
+
+define i1 @neg_or_icmp_eq_and_non_pow2_mask(i32 %x) {
+; CHECK-LABEL: @neg_or_icmp_eq_and_non_pow2_mask(
+; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[X:%.*]], 127
+; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[X]], -33
+; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP2]], 128
+; CHECK-NEXT: [[TMP4:%.*]] = or i1 [[TMP1]], [[TMP3]]
+; CHECK-NEXT: ret i1 [[TMP4]]
+;
+ %icmp1 = icmp eq i32 %x, 127
+ %and = and i32 %x, -33
+ %icmp2 = icmp eq i32 %and, 128
+ %ret = or i1 %icmp1, %icmp2
+ ret i1 %ret
+}
+
+define i1 @neg_or_icmp_eq_and_non_pow2_icmp(i32 %x) {
+; CHECK-LABEL: @neg_or_icmp_eq_and_non_pow2_icmp(
+; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[X:%.*]], 127
+; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[X]], -33
+; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP2]], 128
+; CHECK-NEXT: [[TMP4:%.*]] = or i1 [[TMP1]], [[TMP3]]
+; CHECK-NEXT: ret i1 [[TMP4]]
+;
+ %icmp1 = icmp eq i32 %x, 127
+ %and = and i32 %x, -33
+ %icmp2 = icmp eq i32 %and, 128
+ %ret = or i1 %icmp1, %icmp2
+ ret i1 %ret
+}
+
+define i1 @neg_or_icmp_eq_and_const_less_than_mask(i32 %x) {
+; CHECK-LABEL: @neg_or_icmp_eq_and_const_less_than_mask(
+; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[X:%.*]], 15
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %icmp1 = icmp eq i32 %x, 15
+ %and = and i32 %x, -32
+ %icmp2 = icmp eq i32 %and, 16
+ %ret = or i1 %icmp1, %icmp2
+ ret i1 %ret
+}
+
+define i1 @neg_or_icmp_eq_and_pow2_disjoint(i32 %x) {
+; CHECK-LABEL: @neg_or_icmp_eq_and_pow2_disjoint(
+; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[X:%.*]], 126
+; CHECK-NEXT: [[AND:%.*]] = and i32 [[X]], -32
+; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i32 [[AND]], 128
+; CHECK-NEXT: [[TMP4:%.*]] = or i1 [[TMP1]], [[TMP3]]
+; CHECK-NEXT: ret i1 [[TMP4]]
+;
+ %icmp1 = icmp eq i32 %x, 126
+ %and = and i32 %x, -32
+ %icmp2 = icmp eq i32 %and, 128
+ %ret = or i1 %icmp1, %icmp2
+ ret i1 %ret
+}
+
+define i1 @neg_or_icmp_eq_double_and_pow2(i32 %x) {
+; CHECK-LABEL: @neg_or_icmp_eq_double_and_pow2(
+; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[X:%.*]], -16
+; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 64
+; CHECK-NEXT: [[AND2:%.*]] = and i32 [[X]], -32
+; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[AND2]], 128
+; CHECK-NEXT: [[TMP5:%.*]] = or i1 [[TMP2]], [[TMP4]]
+; CHECK-NEXT: ret i1 [[TMP5]]
+;
+ %and1 = and i32 %x, -16
+ %icmp1 = icmp eq i32 %and1, 64
+ %and2 = and i32 %x, -32
+ %icmp2 = icmp eq i32 %and2, 128
+ %ret = or i1 %icmp1, %icmp2
+ ret i1 %ret
+}
+
+define i1 @neg_or_icmp_eq_and_pow2_pred_non_eq_1(i32 %x) {
+; CHECK-LABEL: @neg_or_icmp_eq_and_pow2_pred_non_eq_1(
+; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 [[X:%.*]], 127
+; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[X]], -32
+; CHECK-NEXT: [[TMP3:%.*]] = icmp ugt i32 [[TMP2]], 128
+; CHECK-NEXT: [[TMP4:%.*]] = or i1 [[TMP1]], [[TMP3]]
+; CHECK-NEXT: ret i1 [[TMP4]]
+;
+ %icmp1 = icmp eq i32 %x, 127
+ %and = and i32 %x, -32
+ %icmp2 = icmp ugt i32 %and, 128
+ %ret = or i1 %icmp1, %icmp2
+ ret i1 %ret
+}
+
+define i1 @neg_or_icmp_eq_and_pow2_pred_non_eq_2(i32 %x) {
+; CHECK-LABEL: @neg_or_icmp_eq_and_pow2_pred_non_eq_2(
+; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i32 [[X:%.*]], 127
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %icmp1 = icmp ne i32 %x, 127
+ %and = and i32 %x, -32
+ %icmp2 = icmp eq i32 %and, 128
+ %ret = or i1 %icmp1, %icmp2
+ ret i1 %ret
+}
|
Not sure if some of the negative tests are needed, as they trigger other transforms. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Miscompilation reproducer: https://alive2.llvm.org/ce/z/vxo2cJ
define i1 @src(i32 %x) {
%icmp1 = icmp sgt i32 %x, 127
%and = and i32 %x, -32
%icmp2 = icmp eq i32 %and, 128
%1 = select i1 %icmp1, i1 %icmp2, i1 false
ret i1 %1
}
define i1 @tgt(i32 %x) {
%1 = icmp sgt i32 %x, 159
ret i1 %1
}
ERROR: Value mismatch
Example:
i32 %x = #x00000080 (128)
Source:
i1 %icmp1 = #x1 (1)
i32 %and = #x00000080 (128)
i1 %icmp2 = #x1 (1)
i1 %#1 = #x1 (1)
Target:
i1 %#1 = #x0 (0)
Source value: #x1 (1)
Target value: #x0 (0)
Extends `foldAndOrOfICmpsUsingRanges` to recognize and fold idioms of the form `(x == A) || (x & -Pow2) == A + 1`, where A + 1 is aligned to the mask and A > |mask|. These patterns represent a special case of contiguous range checks and can be optimized into an addition and comparison. ```llvm define i1 @src(i32 %x) { %icmp1 = icmp eq i32 %x, 127 %and = and i32 %x, -32 %icmp2 = icmp eq i32 %and, 128 %ret = or i1 %icmp1, %icmp2 ret i1 %ret } define i1 @tgt(i32 %x) { %1 = add i32 %x, -127 %2 = icmp ult i32 %1, 33 ret i1 %2 } ``` Alive2 Proof: https://alive2.llvm.org/ce/z/gwELqs
24a5cb4
to
4cf5747
Compare
Thanks. I was inadvertently handling ands. |
// is aligned to the mask and A + 1 >= |Mask|. This pattern corresponds to a | ||
// contiguous range check, which can be folded into an addition and compare. | ||
const APInt *Mask1 = nullptr, *Mask2 = nullptr; | ||
bool matchedAnd1 = false, matchedAnd2 = false; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
bool matchedAnd1 = false, matchedAnd2 = false; | |
bool MatchedAnd1 = false, MatchedAnd2 = false; |
// contiguous range check, which can be folded into an addition and compare. | ||
const APInt *Mask1 = nullptr, *Mask2 = nullptr; | ||
bool matchedAnd1 = false, matchedAnd2 = false; | ||
if (V1 != V2 && !IsAnd) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We can also fold (x != A) && (x & -Pow2) != A + 1
into a range check.
Extends
foldAndOrOfICmpsUsingRanges
to recognize and fold idioms of the form(x == A) || (x & -Pow2) == A + 1
, where A + 1 is aligned to the mask and A > |mask|.These patterns represent a special case of contiguous range checks and can be optimized into an addition and comparison.
Alive2 Proof: https://alive2.llvm.org/ce/z/gwELqs
Closes #152948.