Skip to content

Conversation

andjo403
Copy link
Contributor

Disable fold when it will result in more instructions.

@llvmbot llvmbot added llvm:instcombine Covers the InstCombine, InstSimplify and AggressiveInstCombine passes llvm:transforms labels Feb 16, 2025
@llvmbot
Copy link
Member

llvmbot commented Feb 16, 2025

@llvm/pr-subscribers-llvm-transforms

Author: Andreas Jonson (andjo403)

Changes

Disable fold when it will result in more instructions.


Full diff: https://github.com/llvm/llvm-project/pull/127398.diff

2 Files Affected:

  • (modified) llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp (+9-5)
  • (modified) llvm/test/Transforms/InstCombine/select-icmp-and.ll (+7-12)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 2e14145aef884..3989577ff2667 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -205,11 +205,15 @@ static Value *foldSelectICmpAnd(SelectInst &Sel, ICmpInst *Cmp,
   unsigned ValZeros = ValC.logBase2();
   unsigned AndZeros = AndMask.logBase2();
   bool ShouldNotVal = !TC.isZero();
-
-  // If we would need to create an 'and' + 'shift' + 'xor' to replace a 'select'
-  // + 'icmp', then this transformation would result in more instructions and
-  // potentially interfere with other folding.
-  if (CreateAnd && ShouldNotVal && ValZeros != AndZeros)
+  bool NeedShift = ValZeros != AndZeros;
+  bool NeedZExtTrunc =
+      SelType->getScalarSizeInBits() != V->getType()->getScalarSizeInBits();
+
+  // If we would need to create an 'and' + 'shift' + 'xor' + cast to replace
+  // a 'select' + 'icmp', then this transformation would result in more
+  // instructions and potentially interfere with other folding.
+  if (CreateAnd + ShouldNotVal + NeedShift + NeedZExtTrunc >
+      1 + Cmp->hasOneUse())
     return nullptr;
 
   // Insert the 'and' instruction on the input to the truncate.
diff --git a/llvm/test/Transforms/InstCombine/select-icmp-and.ll b/llvm/test/Transforms/InstCombine/select-icmp-and.ll
index 516a1e8496b43..dbe305a2d336a 100644
--- a/llvm/test/Transforms/InstCombine/select-icmp-and.ll
+++ b/llvm/test/Transforms/InstCombine/select-icmp-and.ll
@@ -391,9 +391,8 @@ define i32 @test15e_extra_use(i32 %X) {
 ;; (a & 128) ? 256 : 0
 define i32 @test15e_zext(i8 %X) {
 ; CHECK-LABEL: @test15e_zext(
-; CHECK-NEXT:    [[TMP1:%.*]] = and i8 [[X:%.*]], -128
-; CHECK-NEXT:    [[TMP2:%.*]] = zext i8 [[TMP1]] to i32
-; CHECK-NEXT:    [[T3:%.*]] = shl nuw nsw i32 [[TMP2]], 1
+; CHECK-NEXT:    [[T2_NOT:%.*]] = icmp sgt i8 [[X:%.*]], -1
+; CHECK-NEXT:    [[T3:%.*]] = select i1 [[T2_NOT]], i32 0, i32 256
 ; CHECK-NEXT:    ret i32 [[T3]]
 ;
   %t1 = and i8 %X, 128
@@ -406,9 +405,7 @@ define i32 @test15e_zext(i8 %X) {
 define i32 @test15e_zext_extra_use(i8 %X) {
 ; CHECK-LABEL: @test15e_zext_extra_use(
 ; CHECK-NEXT:    [[T2:%.*]] = icmp slt i8 [[X:%.*]], 0
-; CHECK-NEXT:    [[TMP1:%.*]] = and i8 [[X]], -128
-; CHECK-NEXT:    [[TMP2:%.*]] = zext i8 [[TMP1]] to i32
-; CHECK-NEXT:    [[T3:%.*]] = shl nuw nsw i32 [[TMP2]], 1
+; CHECK-NEXT:    [[T3:%.*]] = select i1 [[T2]], i32 256, i32 0
 ; CHECK-NEXT:    call void @use1(i1 [[T2]])
 ; CHECK-NEXT:    ret i32 [[T3]]
 ;
@@ -438,8 +435,7 @@ define i32 @test15f_extra_use(i32 %X) {
 ; CHECK-LABEL: @test15f_extra_use(
 ; CHECK-NEXT:    [[T1:%.*]] = and i32 [[X:%.*]], 128
 ; CHECK-NEXT:    [[T2:%.*]] = icmp ne i32 [[T1]], 0
-; CHECK-NEXT:    [[TMP1:%.*]] = shl nuw nsw i32 [[T1]], 1
-; CHECK-NEXT:    [[T3:%.*]] = xor i32 [[TMP1]], 256
+; CHECK-NEXT:    [[T3:%.*]] = select i1 [[T2]], i32 0, i32 256
 ; CHECK-NEXT:    call void @use1(i1 [[T2]])
 ; CHECK-NEXT:    ret i32 [[T3]]
 ;
@@ -453,10 +449,9 @@ define i32 @test15f_extra_use(i32 %X) {
 ;; (a & 128) ? 0 : 256
 define i16 @test15f_trunc(i32 %X) {
 ; CHECK-LABEL: @test15f_trunc(
-; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[X:%.*]] to i16
-; CHECK-NEXT:    [[TMP2:%.*]] = shl i16 [[TMP1]], 1
-; CHECK-NEXT:    [[TMP3:%.*]] = and i16 [[TMP2]], 256
-; CHECK-NEXT:    [[T3:%.*]] = xor i16 [[TMP3]], 256
+; CHECK-NEXT:    [[T1:%.*]] = and i32 [[X:%.*]], 128
+; CHECK-NEXT:    [[T2_NOT:%.*]] = icmp eq i32 [[T1]], 0
+; CHECK-NEXT:    [[T3:%.*]] = select i1 [[T2_NOT]], i16 256, i16 0
 ; CHECK-NEXT:    ret i16 [[T3]]
 ;
   %t1 = and i32 %X, 128

Copy link
Member

@dtcxzyw dtcxzyw left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM. Thank you!

Can you pre-commit this test? https://alive2.llvm.org/ce/z/BWBK9_

define i32 @src(i32 %x) {
  %cmp = icmp sgt i32 %x, -1
  %ext = zext i1 %cmp to i32
  call void @use(i32 %ext)
  %and = and i32 %x, 2147483647
  %masksel = select i1 %cmp, i32 -2147483648, i32 0
  %res = or disjoint i32 %masksel, %and
  ret i32 %res
}

declare void @use(i32)

A possible solution: https://godbolt.org/z/TfdMKKb4E
convert

%and = and i32 %x, 2147483647
%masksel = select i1 %cmp, i32 -2147483648, i32 0
%res = or disjoint i32 %masksel, %and

into

  %and = and i32 %x, 2147483647 ; bset
  %or = or i32 %x, -2147483648 ; bclr
  %masksel = select i1 %.not9, i32 %or, i32 %and ; cmov

@andjo403 andjo403 force-pushed the avoidExtraInstructionsSelectIcmpAnd branch from 3741ad8 to 7495db0 Compare February 18, 2025 20:00
@andjo403 andjo403 force-pushed the avoidExtraInstructionsSelectIcmpAnd branch from 7495db0 to fe78e0f Compare February 18, 2025 20:02
@andjo403
Copy link
Contributor Author

andjo403 commented Feb 18, 2025

It is actually canonicalized the other way around see:

/// Canonicalize a set or clear of a masked set of constant bits to
/// select-of-constants form.
static Instruction *foldSetClearBits(SelectInst &Sel,
InstCombiner::BuilderTy &Builder) {
Value *Cond = Sel.getCondition();
Value *T = Sel.getTrueValue();
Value *F = Sel.getFalseValue();
Type *Ty = Sel.getType();
Value *X;
const APInt *NotC, *C;
// Cond ? (X & ~C) : (X | C) --> (X & ~C) | (Cond ? 0 : C)
if (match(T, m_And(m_Value(X), m_APInt(NotC))) &&
match(F, m_OneUse(m_Or(m_Specific(X), m_APInt(C)))) && *NotC == ~(*C)) {
Constant *Zero = ConstantInt::getNullValue(Ty);
Constant *OrC = ConstantInt::get(Ty, *C);
Value *NewSel = Builder.CreateSelect(Cond, Zero, OrC, "masksel", &Sel);
return BinaryOperator::CreateOr(T, NewSel);
}
// Cond ? (X | C) : (X & ~C) --> (X & ~C) | (Cond ? C : 0)
if (match(F, m_And(m_Value(X), m_APInt(NotC))) &&
match(T, m_OneUse(m_Or(m_Specific(X), m_APInt(C)))) && *NotC == ~(*C)) {
Constant *Zero = ConstantInt::getNullValue(Ty);
Constant *OrC = ConstantInt::get(Ty, *C);
Value *NewSel = Builder.CreateSelect(Cond, OrC, Zero, "masksel", &Sel);
return BinaryOperator::CreateOr(F, NewSel);
}
return nullptr;
}

possible to add special case there for:
(icmp eq (X & C), 0) ? (X & ~C) : (X | C) -> X
(icmp eq (X & C), 0) ? (X | C) : (X & ~C) -> X ^ C
IFF C is a power of 2.

https://alive2.llvm.org/ce/z/dD3XTi

but is it common enough that special handling is wanted?

have added a test for the regression

wldfngrs pushed a commit to wldfngrs/llvm-project that referenced this pull request Feb 19, 2025
@dtcxzyw
Copy link
Member

dtcxzyw commented Feb 19, 2025

It is actually canonicalized the other way around see:

/// Canonicalize a set or clear of a masked set of constant bits to
/// select-of-constants form.
static Instruction *foldSetClearBits(SelectInst &Sel,
InstCombiner::BuilderTy &Builder) {
Value *Cond = Sel.getCondition();
Value *T = Sel.getTrueValue();
Value *F = Sel.getFalseValue();
Type *Ty = Sel.getType();
Value *X;
const APInt *NotC, *C;
// Cond ? (X & ~C) : (X | C) --> (X & ~C) | (Cond ? 0 : C)
if (match(T, m_And(m_Value(X), m_APInt(NotC))) &&
match(F, m_OneUse(m_Or(m_Specific(X), m_APInt(C)))) && *NotC == ~(*C)) {
Constant *Zero = ConstantInt::getNullValue(Ty);
Constant *OrC = ConstantInt::get(Ty, *C);
Value *NewSel = Builder.CreateSelect(Cond, Zero, OrC, "masksel", &Sel);
return BinaryOperator::CreateOr(T, NewSel);
}
// Cond ? (X | C) : (X & ~C) --> (X & ~C) | (Cond ? C : 0)
if (match(F, m_And(m_Value(X), m_APInt(NotC))) &&
match(T, m_OneUse(m_Or(m_Specific(X), m_APInt(C)))) && *NotC == ~(*C)) {
Constant *Zero = ConstantInt::getNullValue(Ty);
Constant *OrC = ConstantInt::get(Ty, *C);
Value *NewSel = Builder.CreateSelect(Cond, OrC, Zero, "masksel", &Sel);
return BinaryOperator::CreateOr(F, NewSel);
}
return nullptr;
}

possible to add special case there for: (icmp eq (X & C), 0) ? (X & ~C) : (X | C) -> X (icmp eq (X & C), 0) ? (X | C) : (X & ~C) -> X ^ C IFF C is a power of 2.

https://alive2.llvm.org/ce/z/dD3XTi

but is it common enough that special handling is wanted?

have added a test for the regression

Original C code: https://godbolt.org/z/oMMao8dqa
Looks like this pattern is rare: https://github.com/dtcxzyw/llvm-tools/blob/main/pr127398.cpp

@andjo403 andjo403 merged commit 8fc03e4 into llvm:main Feb 19, 2025
8 checks passed
@andjo403 andjo403 deleted the avoidExtraInstructionsSelectIcmpAnd branch February 19, 2025 17:10
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
llvm:instcombine Covers the InstCombine, InstSimplify and AggressiveInstCombine passes llvm:transforms
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants