From 9449548d4170b879b1d66c21f84ad8ecc773238d Mon Sep 17 00:00:00 2001 From: Yingwei Zheng Date: Sun, 6 Oct 2024 16:41:24 +0800 Subject: [PATCH 1/5] [CodeGenPrepare] Add pre-commit tests. NFC. --- llvm/test/CodeGen/AArch64/arm64-popcnt.ll | 72 +++++++++++++++++-- llvm/test/CodeGen/RISCV/rv32zbb.ll | 41 +++++++++++ llvm/test/CodeGen/RISCV/rv64zbb.ll | 84 +++++++++++++++++++++++ llvm/test/CodeGen/X86/ispow2.ll | 31 +++++++++ 4 files changed, 221 insertions(+), 7 deletions(-) diff --git a/llvm/test/CodeGen/AArch64/arm64-popcnt.ll b/llvm/test/CodeGen/AArch64/arm64-popcnt.ll index f5ce73a366125..e1d9f40a4ae34 100644 --- a/llvm/test/CodeGen/AArch64/arm64-popcnt.ll +++ b/llvm/test/CodeGen/AArch64/arm64-popcnt.ll @@ -15,7 +15,7 @@ define i32 @cnt32_advsimd(i32 %x) nounwind readnone { ; CHECK-NONEON-LABEL: cnt32_advsimd: ; CHECK-NONEON: // %bb.0: ; CHECK-NONEON-NEXT: lsr w9, w0, #1 -; CHECK-NONEON-NEXT: mov w8, #16843009 +; CHECK-NONEON-NEXT: mov w8, #16843009 // =0x1010101 ; CHECK-NONEON-NEXT: and w9, w9, #0x55555555 ; CHECK-NONEON-NEXT: sub w9, w0, w9 ; CHECK-NONEON-NEXT: lsr w10, w9, #2 @@ -50,7 +50,7 @@ define i32 @cnt32_advsimd_2(<2 x i32> %x) { ; CHECK-NONEON-LABEL: cnt32_advsimd_2: ; CHECK-NONEON: // %bb.0: ; CHECK-NONEON-NEXT: lsr w9, w0, #1 -; CHECK-NONEON-NEXT: mov w8, #16843009 +; CHECK-NONEON-NEXT: mov w8, #16843009 // =0x1010101 ; CHECK-NONEON-NEXT: and w9, w9, #0x55555555 ; CHECK-NONEON-NEXT: sub w9, w0, w9 ; CHECK-NONEON-NEXT: lsr w10, w9, #2 @@ -86,7 +86,7 @@ define i64 @cnt64_advsimd(i64 %x) nounwind readnone { ; CHECK-NONEON-LABEL: cnt64_advsimd: ; CHECK-NONEON: // %bb.0: ; CHECK-NONEON-NEXT: lsr x9, x0, #1 -; CHECK-NONEON-NEXT: mov x8, #72340172838076673 +; CHECK-NONEON-NEXT: mov x8, #72340172838076673 // =0x101010101010101 ; CHECK-NONEON-NEXT: and x9, x9, #0x5555555555555555 ; CHECK-NONEON-NEXT: sub x9, x0, x9 ; CHECK-NONEON-NEXT: lsr x10, x9, #2 @@ -114,7 +114,7 @@ define i32 @cnt32(i32 %x) nounwind readnone noimplicitfloat { ; CHECK-LABEL: cnt32: ; CHECK: // %bb.0: ; CHECK-NEXT: lsr w9, w0, #1 -; CHECK-NEXT: mov w8, #16843009 +; CHECK-NEXT: mov w8, #16843009 // =0x1010101 ; CHECK-NEXT: and w9, w9, #0x55555555 ; CHECK-NEXT: sub w9, w0, w9 ; CHECK-NEXT: lsr w10, w9, #2 @@ -130,7 +130,7 @@ define i32 @cnt32(i32 %x) nounwind readnone noimplicitfloat { ; CHECK-NONEON-LABEL: cnt32: ; CHECK-NONEON: // %bb.0: ; CHECK-NONEON-NEXT: lsr w9, w0, #1 -; CHECK-NONEON-NEXT: mov w8, #16843009 +; CHECK-NONEON-NEXT: mov w8, #16843009 // =0x1010101 ; CHECK-NONEON-NEXT: and w9, w9, #0x55555555 ; CHECK-NONEON-NEXT: sub w9, w0, w9 ; CHECK-NONEON-NEXT: lsr w10, w9, #2 @@ -155,7 +155,7 @@ define i64 @cnt64(i64 %x) nounwind readnone noimplicitfloat { ; CHECK-LABEL: cnt64: ; CHECK: // %bb.0: ; CHECK-NEXT: lsr x9, x0, #1 -; CHECK-NEXT: mov x8, #72340172838076673 +; CHECK-NEXT: mov x8, #72340172838076673 // =0x101010101010101 ; CHECK-NEXT: and x9, x9, #0x5555555555555555 ; CHECK-NEXT: sub x9, x0, x9 ; CHECK-NEXT: lsr x10, x9, #2 @@ -171,7 +171,7 @@ define i64 @cnt64(i64 %x) nounwind readnone noimplicitfloat { ; CHECK-NONEON-LABEL: cnt64: ; CHECK-NONEON: // %bb.0: ; CHECK-NONEON-NEXT: lsr x9, x0, #1 -; CHECK-NONEON-NEXT: mov x8, #72340172838076673 +; CHECK-NONEON-NEXT: mov x8, #72340172838076673 // =0x101010101010101 ; CHECK-NONEON-NEXT: and x9, x9, #0x5555555555555555 ; CHECK-NONEON-NEXT: sub x9, x0, x9 ; CHECK-NONEON-NEXT: lsr x10, x9, #2 @@ -278,5 +278,63 @@ define i1 @ctpop32_ne_one(i32 %x) nounwind readnone { ret i1 %cmp } +define i1 @ctpop32_eq_one_nonzero(i32 %x) { +; CHECK-LABEL: ctpop32_eq_one_nonzero: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sub w8, w0, #1 +; CHECK-NEXT: eor w9, w0, w8 +; CHECK-NEXT: cmp w9, w8 +; CHECK-NEXT: cset w0, hi +; CHECK-NEXT: ret +; +; CHECK-NONEON-LABEL: ctpop32_eq_one_nonzero: +; CHECK-NONEON: // %bb.0: // %entry +; CHECK-NONEON-NEXT: sub w8, w0, #1 +; CHECK-NONEON-NEXT: eor w9, w0, w8 +; CHECK-NONEON-NEXT: cmp w9, w8 +; CHECK-NONEON-NEXT: cset w0, hi +; CHECK-NONEON-NEXT: ret +; +; CHECK-CSSC-LABEL: ctpop32_eq_one_nonzero: +; CHECK-CSSC: // %bb.0: // %entry +; CHECK-CSSC-NEXT: cnt w8, w0 +; CHECK-CSSC-NEXT: cmp w8, #1 +; CHECK-CSSC-NEXT: cset w0, eq +; CHECK-CSSC-NEXT: ret +entry: + %popcnt = call range(i32 1, 33) i32 @llvm.ctpop.i32(i32 %x) + %cmp = icmp eq i32 %popcnt, 1 + ret i1 %cmp +} + +define i1 @ctpop32_ne_one_nonzero(i32 %x) { +; CHECK-LABEL: ctpop32_ne_one_nonzero: +; CHECK: // %bb.0: // %entry +; CHECK-NEXT: sub w8, w0, #1 +; CHECK-NEXT: eor w9, w0, w8 +; CHECK-NEXT: cmp w9, w8 +; CHECK-NEXT: cset w0, ls +; CHECK-NEXT: ret +; +; CHECK-NONEON-LABEL: ctpop32_ne_one_nonzero: +; CHECK-NONEON: // %bb.0: // %entry +; CHECK-NONEON-NEXT: sub w8, w0, #1 +; CHECK-NONEON-NEXT: eor w9, w0, w8 +; CHECK-NONEON-NEXT: cmp w9, w8 +; CHECK-NONEON-NEXT: cset w0, ls +; CHECK-NONEON-NEXT: ret +; +; CHECK-CSSC-LABEL: ctpop32_ne_one_nonzero: +; CHECK-CSSC: // %bb.0: // %entry +; CHECK-CSSC-NEXT: cnt w8, w0 +; CHECK-CSSC-NEXT: cmp w8, #1 +; CHECK-CSSC-NEXT: cset w0, ne +; CHECK-CSSC-NEXT: ret +entry: + %popcnt = tail call range(i32 1, 33) i32 @llvm.ctpop.i32(i32 %x) + %cmp = icmp ne i32 %popcnt, 1 + ret i1 %cmp +} + declare i32 @llvm.ctpop.i32(i32) nounwind readnone declare i64 @llvm.ctpop.i64(i64) nounwind readnone diff --git a/llvm/test/CodeGen/RISCV/rv32zbb.ll b/llvm/test/CodeGen/RISCV/rv32zbb.ll index e24b1b41645cd..edbe7d6518a33 100644 --- a/llvm/test/CodeGen/RISCV/rv32zbb.ll +++ b/llvm/test/CodeGen/RISCV/rv32zbb.ll @@ -1441,3 +1441,44 @@ define i32 @srai_slli2(i16 signext %0) { %3 = sext i16 %sext to i32 ret i32 %3 } + +define i1 @ctpop32_eq_one_nonzero(i32 %x) { +; RV32I-LABEL: ctpop32_eq_one_nonzero: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: addi a1, a0, -1 +; RV32I-NEXT: xor a0, a0, a1 +; RV32I-NEXT: sltu a0, a1, a0 +; RV32I-NEXT: ret +; +; RV32ZBB-LABEL: ctpop32_eq_one_nonzero: +; RV32ZBB: # %bb.0: # %entry +; RV32ZBB-NEXT: cpop a0, a0 +; RV32ZBB-NEXT: addi a0, a0, -1 +; RV32ZBB-NEXT: seqz a0, a0 +; RV32ZBB-NEXT: ret +entry: + %popcnt = call range(i32 1, 33) i32 @llvm.ctpop.i32(i32 %x) + %cmp = icmp eq i32 %popcnt, 1 + ret i1 %cmp +} + +define i1 @ctpop32_ne_one_nonzero(i32 %x) { +; RV32I-LABEL: ctpop32_ne_one_nonzero: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: addi a1, a0, -1 +; RV32I-NEXT: xor a0, a0, a1 +; RV32I-NEXT: sltu a0, a1, a0 +; RV32I-NEXT: xori a0, a0, 1 +; RV32I-NEXT: ret +; +; RV32ZBB-LABEL: ctpop32_ne_one_nonzero: +; RV32ZBB: # %bb.0: # %entry +; RV32ZBB-NEXT: cpop a0, a0 +; RV32ZBB-NEXT: addi a0, a0, -1 +; RV32ZBB-NEXT: snez a0, a0 +; RV32ZBB-NEXT: ret +entry: + %popcnt = tail call range(i32 1, 33) i32 @llvm.ctpop.i32(i32 %x) + %cmp = icmp ne i32 %popcnt, 1 + ret i1 %cmp +} diff --git a/llvm/test/CodeGen/RISCV/rv64zbb.ll b/llvm/test/CodeGen/RISCV/rv64zbb.ll index 43a499806ab5a..4364db625745c 100644 --- a/llvm/test/CodeGen/RISCV/rv64zbb.ll +++ b/llvm/test/CodeGen/RISCV/rv64zbb.ll @@ -1618,3 +1618,87 @@ entry: %5 = add nsw i32 %4, %0 ret i32 %5 } + +define i1 @ctpop32_eq_one_nonzero(i32 %x) { +; RV64I-LABEL: ctpop32_eq_one_nonzero: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: addiw a1, a0, -1 +; RV64I-NEXT: xor a0, a0, a1 +; RV64I-NEXT: sext.w a0, a0 +; RV64I-NEXT: sltu a0, a1, a0 +; RV64I-NEXT: ret +; +; RV64ZBB-LABEL: ctpop32_eq_one_nonzero: +; RV64ZBB: # %bb.0: # %entry +; RV64ZBB-NEXT: cpopw a0, a0 +; RV64ZBB-NEXT: addi a0, a0, -1 +; RV64ZBB-NEXT: seqz a0, a0 +; RV64ZBB-NEXT: ret +entry: + %popcnt = call range(i32 1, 33) i32 @llvm.ctpop.i32(i32 %x) + %cmp = icmp eq i32 %popcnt, 1 + ret i1 %cmp +} + +define i1 @ctpop32_ne_one_nonzero(i32 %x) { +; RV64I-LABEL: ctpop32_ne_one_nonzero: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: addiw a1, a0, -1 +; RV64I-NEXT: xor a0, a0, a1 +; RV64I-NEXT: sext.w a0, a0 +; RV64I-NEXT: sltu a0, a1, a0 +; RV64I-NEXT: xori a0, a0, 1 +; RV64I-NEXT: ret +; +; RV64ZBB-LABEL: ctpop32_ne_one_nonzero: +; RV64ZBB: # %bb.0: # %entry +; RV64ZBB-NEXT: cpopw a0, a0 +; RV64ZBB-NEXT: addi a0, a0, -1 +; RV64ZBB-NEXT: snez a0, a0 +; RV64ZBB-NEXT: ret +entry: + %popcnt = tail call range(i32 1, 33) i32 @llvm.ctpop.i32(i32 %x) + %cmp = icmp ne i32 %popcnt, 1 + ret i1 %cmp +} + +define i1 @ctpop64_eq_one_nonzero(i64 %x) { +; RV64I-LABEL: ctpop64_eq_one_nonzero: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: addi a1, a0, -1 +; RV64I-NEXT: xor a0, a0, a1 +; RV64I-NEXT: sltu a0, a1, a0 +; RV64I-NEXT: ret +; +; RV64ZBB-LABEL: ctpop64_eq_one_nonzero: +; RV64ZBB: # %bb.0: # %entry +; RV64ZBB-NEXT: cpop a0, a0 +; RV64ZBB-NEXT: addi a0, a0, -1 +; RV64ZBB-NEXT: seqz a0, a0 +; RV64ZBB-NEXT: ret +entry: + %popcnt = call range(i64 1, 65) i64 @llvm.ctpop.i64(i64 %x) + %cmp = icmp eq i64 %popcnt, 1 + ret i1 %cmp +} + +define i1 @ctpop32_eq_one_maybezero(i32 %x) { +; RV64I-LABEL: ctpop32_eq_one_maybezero: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: addiw a1, a0, -1 +; RV64I-NEXT: xor a0, a0, a1 +; RV64I-NEXT: sext.w a0, a0 +; RV64I-NEXT: sltu a0, a1, a0 +; RV64I-NEXT: ret +; +; RV64ZBB-LABEL: ctpop32_eq_one_maybezero: +; RV64ZBB: # %bb.0: # %entry +; RV64ZBB-NEXT: cpopw a0, a0 +; RV64ZBB-NEXT: addi a0, a0, -1 +; RV64ZBB-NEXT: seqz a0, a0 +; RV64ZBB-NEXT: ret +entry: + %popcnt = call range(i32 0, 16) i32 @llvm.ctpop.i32(i32 %x) + %cmp = icmp eq i32 %popcnt, 1 + ret i1 %cmp +} diff --git a/llvm/test/CodeGen/X86/ispow2.ll b/llvm/test/CodeGen/X86/ispow2.ll index 8723432de8b6b..de7828640da7a 100644 --- a/llvm/test/CodeGen/X86/ispow2.ll +++ b/llvm/test/CodeGen/X86/ispow2.ll @@ -220,3 +220,34 @@ define <4 x i1> @neither_pow2_non_zero_4xv64_x_maybe_z(<4 x i64> %x) { %r = icmp ne <4 x i64> %cnt, ret <4 x i1> %r } + + +define i1 @ctpop32_eq_one_nonzero(i32 %x) { +; CHECK-LABEL: ctpop32_eq_one_nonzero: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $edi killed $edi def $rdi +; CHECK-NEXT: leal -1(%rdi), %eax +; CHECK-NEXT: xorl %eax, %edi +; CHECK-NEXT: cmpl %eax, %edi +; CHECK-NEXT: seta %al +; CHECK-NEXT: retq +entry: + %popcnt = call range(i32 1, 33) i32 @llvm.ctpop.i32(i32 %x) + %cmp = icmp eq i32 %popcnt, 1 + ret i1 %cmp +} + +define i1 @ctpop32_ne_one_nonzero(i32 %x) { +; CHECK-LABEL: ctpop32_ne_one_nonzero: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: # kill: def $edi killed $edi def $rdi +; CHECK-NEXT: leal -1(%rdi), %eax +; CHECK-NEXT: xorl %eax, %edi +; CHECK-NEXT: cmpl %eax, %edi +; CHECK-NEXT: setbe %al +; CHECK-NEXT: retq +entry: + %popcnt = tail call range(i32 1, 33) i32 @llvm.ctpop.i32(i32 %x) + %cmp = icmp ne i32 %popcnt, 1 + ret i1 %cmp +} From bf11b6d37833fedc74e381e3d260f931af114f44 Mon Sep 17 00:00:00 2001 From: Yingwei Zheng Date: Sun, 6 Oct 2024 16:49:25 +0800 Subject: [PATCH 2/5] [CodeGenPrepare] Convert `ctpop(X) ==/!= 1 -> ctpop(X) u 2/1` --- llvm/lib/CodeGen/CodeGenPrepare.cpp | 29 ++++++++++++++++ llvm/test/CodeGen/AArch64/arm64-popcnt.ll | 28 +++++++-------- llvm/test/CodeGen/RISCV/rv32zbb.ll | 16 ++++----- llvm/test/CodeGen/RISCV/rv64zbb.ll | 27 +++++++-------- llvm/test/CodeGen/X86/ispow2.ll | 42 ++++++++++++++--------- 5 files changed, 86 insertions(+), 56 deletions(-) diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp index 631cc26d6022f..7953c0d09f2a8 100644 --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -2111,6 +2111,32 @@ bool CodeGenPrepare::optimizeURem(Instruction *Rem) { return false; } +/// Some targets have better codegen for `ctpop(X) u< 2` than `ctpop(X) == 1`. +/// This function converts `ctpop(X) ==/!= 1` into `ctpop(X) u 2/1` if the +/// result cannot be zero. +static bool adjustIsPower2Test(CmpInst *Cmp) { + ICmpInst::Predicate Pred; + if (!match(Cmp, m_ICmp(Pred, m_Intrinsic(), m_One()))) + return false; + if (!ICmpInst::isEquality(Pred)) + return false; + auto *II = cast(Cmp->getOperand(0)); + if (auto Range = II->getRange()) { + Type *Ty = II->getType(); + unsigned BitWidth = Ty->getScalarSizeInBits(); + if (Range->contains(APInt::getZero(BitWidth))) + return false; + + if (Pred == ICmpInst::ICMP_EQ) { + Cmp->setPredicate(ICmpInst::ICMP_ULT); + Cmp->setOperand(1, ConstantInt::get(Ty, 2)); + } else + Cmp->setPredicate(ICmpInst::ICMP_UGT); + return true; + } + return false; +} + bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT) { if (sinkCmpExpression(Cmp, *TLI)) return true; @@ -2130,6 +2156,9 @@ bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT) { if (foldFCmpToFPClassTest(Cmp, *TLI, *DL)) return true; + if (adjustIsPower2Test(Cmp)) + return true; + return false; } diff --git a/llvm/test/CodeGen/AArch64/arm64-popcnt.ll b/llvm/test/CodeGen/AArch64/arm64-popcnt.ll index e1d9f40a4ae34..0030e9ce80abb 100644 --- a/llvm/test/CodeGen/AArch64/arm64-popcnt.ll +++ b/llvm/test/CodeGen/AArch64/arm64-popcnt.ll @@ -282,23 +282,21 @@ define i1 @ctpop32_eq_one_nonzero(i32 %x) { ; CHECK-LABEL: ctpop32_eq_one_nonzero: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: sub w8, w0, #1 -; CHECK-NEXT: eor w9, w0, w8 -; CHECK-NEXT: cmp w9, w8 -; CHECK-NEXT: cset w0, hi +; CHECK-NEXT: tst w0, w8 +; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret ; ; CHECK-NONEON-LABEL: ctpop32_eq_one_nonzero: ; CHECK-NONEON: // %bb.0: // %entry ; CHECK-NONEON-NEXT: sub w8, w0, #1 -; CHECK-NONEON-NEXT: eor w9, w0, w8 -; CHECK-NONEON-NEXT: cmp w9, w8 -; CHECK-NONEON-NEXT: cset w0, hi +; CHECK-NONEON-NEXT: tst w0, w8 +; CHECK-NONEON-NEXT: cset w0, eq ; CHECK-NONEON-NEXT: ret ; ; CHECK-CSSC-LABEL: ctpop32_eq_one_nonzero: ; CHECK-CSSC: // %bb.0: // %entry -; CHECK-CSSC-NEXT: cnt w8, w0 -; CHECK-CSSC-NEXT: cmp w8, #1 +; CHECK-CSSC-NEXT: sub w8, w0, #1 +; CHECK-CSSC-NEXT: tst w0, w8 ; CHECK-CSSC-NEXT: cset w0, eq ; CHECK-CSSC-NEXT: ret entry: @@ -311,23 +309,21 @@ define i1 @ctpop32_ne_one_nonzero(i32 %x) { ; CHECK-LABEL: ctpop32_ne_one_nonzero: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: sub w8, w0, #1 -; CHECK-NEXT: eor w9, w0, w8 -; CHECK-NEXT: cmp w9, w8 -; CHECK-NEXT: cset w0, ls +; CHECK-NEXT: tst w0, w8 +; CHECK-NEXT: cset w0, ne ; CHECK-NEXT: ret ; ; CHECK-NONEON-LABEL: ctpop32_ne_one_nonzero: ; CHECK-NONEON: // %bb.0: // %entry ; CHECK-NONEON-NEXT: sub w8, w0, #1 -; CHECK-NONEON-NEXT: eor w9, w0, w8 -; CHECK-NONEON-NEXT: cmp w9, w8 -; CHECK-NONEON-NEXT: cset w0, ls +; CHECK-NONEON-NEXT: tst w0, w8 +; CHECK-NONEON-NEXT: cset w0, ne ; CHECK-NONEON-NEXT: ret ; ; CHECK-CSSC-LABEL: ctpop32_ne_one_nonzero: ; CHECK-CSSC: // %bb.0: // %entry -; CHECK-CSSC-NEXT: cnt w8, w0 -; CHECK-CSSC-NEXT: cmp w8, #1 +; CHECK-CSSC-NEXT: sub w8, w0, #1 +; CHECK-CSSC-NEXT: tst w0, w8 ; CHECK-CSSC-NEXT: cset w0, ne ; CHECK-CSSC-NEXT: ret entry: diff --git a/llvm/test/CodeGen/RISCV/rv32zbb.ll b/llvm/test/CodeGen/RISCV/rv32zbb.ll index edbe7d6518a33..4c52047b928f4 100644 --- a/llvm/test/CodeGen/RISCV/rv32zbb.ll +++ b/llvm/test/CodeGen/RISCV/rv32zbb.ll @@ -1446,15 +1446,14 @@ define i1 @ctpop32_eq_one_nonzero(i32 %x) { ; RV32I-LABEL: ctpop32_eq_one_nonzero: ; RV32I: # %bb.0: # %entry ; RV32I-NEXT: addi a1, a0, -1 -; RV32I-NEXT: xor a0, a0, a1 -; RV32I-NEXT: sltu a0, a1, a0 +; RV32I-NEXT: and a0, a0, a1 +; RV32I-NEXT: seqz a0, a0 ; RV32I-NEXT: ret ; ; RV32ZBB-LABEL: ctpop32_eq_one_nonzero: ; RV32ZBB: # %bb.0: # %entry ; RV32ZBB-NEXT: cpop a0, a0 -; RV32ZBB-NEXT: addi a0, a0, -1 -; RV32ZBB-NEXT: seqz a0, a0 +; RV32ZBB-NEXT: sltiu a0, a0, 2 ; RV32ZBB-NEXT: ret entry: %popcnt = call range(i32 1, 33) i32 @llvm.ctpop.i32(i32 %x) @@ -1466,16 +1465,15 @@ define i1 @ctpop32_ne_one_nonzero(i32 %x) { ; RV32I-LABEL: ctpop32_ne_one_nonzero: ; RV32I: # %bb.0: # %entry ; RV32I-NEXT: addi a1, a0, -1 -; RV32I-NEXT: xor a0, a0, a1 -; RV32I-NEXT: sltu a0, a1, a0 -; RV32I-NEXT: xori a0, a0, 1 +; RV32I-NEXT: and a0, a0, a1 +; RV32I-NEXT: snez a0, a0 ; RV32I-NEXT: ret ; ; RV32ZBB-LABEL: ctpop32_ne_one_nonzero: ; RV32ZBB: # %bb.0: # %entry ; RV32ZBB-NEXT: cpop a0, a0 -; RV32ZBB-NEXT: addi a0, a0, -1 -; RV32ZBB-NEXT: snez a0, a0 +; RV32ZBB-NEXT: sltiu a0, a0, 2 +; RV32ZBB-NEXT: xori a0, a0, 1 ; RV32ZBB-NEXT: ret entry: %popcnt = tail call range(i32 1, 33) i32 @llvm.ctpop.i32(i32 %x) diff --git a/llvm/test/CodeGen/RISCV/rv64zbb.ll b/llvm/test/CodeGen/RISCV/rv64zbb.ll index 4364db625745c..1e7814d588e4c 100644 --- a/llvm/test/CodeGen/RISCV/rv64zbb.ll +++ b/llvm/test/CodeGen/RISCV/rv64zbb.ll @@ -1622,17 +1622,16 @@ entry: define i1 @ctpop32_eq_one_nonzero(i32 %x) { ; RV64I-LABEL: ctpop32_eq_one_nonzero: ; RV64I: # %bb.0: # %entry -; RV64I-NEXT: addiw a1, a0, -1 -; RV64I-NEXT: xor a0, a0, a1 +; RV64I-NEXT: addi a1, a0, -1 +; RV64I-NEXT: and a0, a0, a1 ; RV64I-NEXT: sext.w a0, a0 -; RV64I-NEXT: sltu a0, a1, a0 +; RV64I-NEXT: seqz a0, a0 ; RV64I-NEXT: ret ; ; RV64ZBB-LABEL: ctpop32_eq_one_nonzero: ; RV64ZBB: # %bb.0: # %entry ; RV64ZBB-NEXT: cpopw a0, a0 -; RV64ZBB-NEXT: addi a0, a0, -1 -; RV64ZBB-NEXT: seqz a0, a0 +; RV64ZBB-NEXT: sltiu a0, a0, 2 ; RV64ZBB-NEXT: ret entry: %popcnt = call range(i32 1, 33) i32 @llvm.ctpop.i32(i32 %x) @@ -1643,18 +1642,17 @@ entry: define i1 @ctpop32_ne_one_nonzero(i32 %x) { ; RV64I-LABEL: ctpop32_ne_one_nonzero: ; RV64I: # %bb.0: # %entry -; RV64I-NEXT: addiw a1, a0, -1 -; RV64I-NEXT: xor a0, a0, a1 +; RV64I-NEXT: addi a1, a0, -1 +; RV64I-NEXT: and a0, a0, a1 ; RV64I-NEXT: sext.w a0, a0 -; RV64I-NEXT: sltu a0, a1, a0 -; RV64I-NEXT: xori a0, a0, 1 +; RV64I-NEXT: snez a0, a0 ; RV64I-NEXT: ret ; ; RV64ZBB-LABEL: ctpop32_ne_one_nonzero: ; RV64ZBB: # %bb.0: # %entry ; RV64ZBB-NEXT: cpopw a0, a0 -; RV64ZBB-NEXT: addi a0, a0, -1 -; RV64ZBB-NEXT: snez a0, a0 +; RV64ZBB-NEXT: sltiu a0, a0, 2 +; RV64ZBB-NEXT: xori a0, a0, 1 ; RV64ZBB-NEXT: ret entry: %popcnt = tail call range(i32 1, 33) i32 @llvm.ctpop.i32(i32 %x) @@ -1666,15 +1664,14 @@ define i1 @ctpop64_eq_one_nonzero(i64 %x) { ; RV64I-LABEL: ctpop64_eq_one_nonzero: ; RV64I: # %bb.0: # %entry ; RV64I-NEXT: addi a1, a0, -1 -; RV64I-NEXT: xor a0, a0, a1 -; RV64I-NEXT: sltu a0, a1, a0 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: seqz a0, a0 ; RV64I-NEXT: ret ; ; RV64ZBB-LABEL: ctpop64_eq_one_nonzero: ; RV64ZBB: # %bb.0: # %entry ; RV64ZBB-NEXT: cpop a0, a0 -; RV64ZBB-NEXT: addi a0, a0, -1 -; RV64ZBB-NEXT: seqz a0, a0 +; RV64ZBB-NEXT: sltiu a0, a0, 2 ; RV64ZBB-NEXT: ret entry: %popcnt = call range(i64 1, 65) i64 @llvm.ctpop.i64(i64 %x) diff --git a/llvm/test/CodeGen/X86/ispow2.ll b/llvm/test/CodeGen/X86/ispow2.ll index de7828640da7a..96e33e1dafdc4 100644 --- a/llvm/test/CodeGen/X86/ispow2.ll +++ b/llvm/test/CodeGen/X86/ispow2.ll @@ -223,14 +223,19 @@ define <4 x i1> @neither_pow2_non_zero_4xv64_x_maybe_z(<4 x i64> %x) { define i1 @ctpop32_eq_one_nonzero(i32 %x) { -; CHECK-LABEL: ctpop32_eq_one_nonzero: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: # kill: def $edi killed $edi def $rdi -; CHECK-NEXT: leal -1(%rdi), %eax -; CHECK-NEXT: xorl %eax, %edi -; CHECK-NEXT: cmpl %eax, %edi -; CHECK-NEXT: seta %al -; CHECK-NEXT: retq +; CHECK-NOBMI-LABEL: ctpop32_eq_one_nonzero: +; CHECK-NOBMI: # %bb.0: # %entry +; CHECK-NOBMI-NEXT: # kill: def $edi killed $edi def $rdi +; CHECK-NOBMI-NEXT: leal -1(%rdi), %eax +; CHECK-NOBMI-NEXT: testl %eax, %edi +; CHECK-NOBMI-NEXT: sete %al +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI2-LABEL: ctpop32_eq_one_nonzero: +; CHECK-BMI2: # %bb.0: # %entry +; CHECK-BMI2-NEXT: blsrl %edi, %eax +; CHECK-BMI2-NEXT: sete %al +; CHECK-BMI2-NEXT: retq entry: %popcnt = call range(i32 1, 33) i32 @llvm.ctpop.i32(i32 %x) %cmp = icmp eq i32 %popcnt, 1 @@ -238,14 +243,19 @@ entry: } define i1 @ctpop32_ne_one_nonzero(i32 %x) { -; CHECK-LABEL: ctpop32_ne_one_nonzero: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT: # kill: def $edi killed $edi def $rdi -; CHECK-NEXT: leal -1(%rdi), %eax -; CHECK-NEXT: xorl %eax, %edi -; CHECK-NEXT: cmpl %eax, %edi -; CHECK-NEXT: setbe %al -; CHECK-NEXT: retq +; CHECK-NOBMI-LABEL: ctpop32_ne_one_nonzero: +; CHECK-NOBMI: # %bb.0: # %entry +; CHECK-NOBMI-NEXT: # kill: def $edi killed $edi def $rdi +; CHECK-NOBMI-NEXT: leal -1(%rdi), %eax +; CHECK-NOBMI-NEXT: testl %eax, %edi +; CHECK-NOBMI-NEXT: setne %al +; CHECK-NOBMI-NEXT: retq +; +; CHECK-BMI2-LABEL: ctpop32_ne_one_nonzero: +; CHECK-BMI2: # %bb.0: # %entry +; CHECK-BMI2-NEXT: blsrl %edi, %eax +; CHECK-BMI2-NEXT: setne %al +; CHECK-BMI2-NEXT: retq entry: %popcnt = tail call range(i32 1, 33) i32 @llvm.ctpop.i32(i32 %x) %cmp = icmp ne i32 %popcnt, 1 From 2e810b5103abc72b059245dea1f033ea0f69792c Mon Sep 17 00:00:00 2001 From: Yingwei Zheng Date: Mon, 7 Oct 2024 10:31:11 +0800 Subject: [PATCH 3/5] [CodeGenPrepare] Use `isKnownNonZero` --- llvm/lib/CodeGen/CodeGenPrepare.cpp | 13 ++++--------- llvm/test/CodeGen/X86/ispow2.ll | 4 ++-- llvm/test/CodeGen/X86/known-never-zero.ll | 12 ++++++------ 3 files changed, 12 insertions(+), 17 deletions(-) diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp index 7953c0d09f2a8..0bfc10ca8c63d 100644 --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -2114,22 +2114,17 @@ bool CodeGenPrepare::optimizeURem(Instruction *Rem) { /// Some targets have better codegen for `ctpop(X) u< 2` than `ctpop(X) == 1`. /// This function converts `ctpop(X) ==/!= 1` into `ctpop(X) u 2/1` if the /// result cannot be zero. -static bool adjustIsPower2Test(CmpInst *Cmp) { +static bool adjustIsPower2Test(CmpInst *Cmp, const DataLayout &DL) { ICmpInst::Predicate Pred; if (!match(Cmp, m_ICmp(Pred, m_Intrinsic(), m_One()))) return false; if (!ICmpInst::isEquality(Pred)) return false; auto *II = cast(Cmp->getOperand(0)); - if (auto Range = II->getRange()) { - Type *Ty = II->getType(); - unsigned BitWidth = Ty->getScalarSizeInBits(); - if (Range->contains(APInt::getZero(BitWidth))) - return false; - + if (isKnownNonZero(II, DL)) { if (Pred == ICmpInst::ICMP_EQ) { Cmp->setPredicate(ICmpInst::ICMP_ULT); - Cmp->setOperand(1, ConstantInt::get(Ty, 2)); + Cmp->setOperand(1, ConstantInt::get(II->getType(), 2)); } else Cmp->setPredicate(ICmpInst::ICMP_UGT); return true; @@ -2156,7 +2151,7 @@ bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT) { if (foldFCmpToFPClassTest(Cmp, *TLI, *DL)) return true; - if (adjustIsPower2Test(Cmp)) + if (adjustIsPower2Test(Cmp, *DL)) return true; return false; diff --git a/llvm/test/CodeGen/X86/ispow2.ll b/llvm/test/CodeGen/X86/ispow2.ll index 96e33e1dafdc4..649d257b28d76 100644 --- a/llvm/test/CodeGen/X86/ispow2.ll +++ b/llvm/test/CodeGen/X86/ispow2.ll @@ -102,7 +102,7 @@ define <4 x i1> @is_pow2_non_zero_4xv64(<4 x i64> %xin) { ; CHECK-AVX512: # %bb.0: ; CHECK-AVX512-NEXT: vporq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %ymm0 ; CHECK-AVX512-NEXT: vpopcntq %ymm0, %ymm0 -; CHECK-AVX512-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1 +; CHECK-AVX512-NEXT: vpcmpltq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1 ; CHECK-AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; CHECK-AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; CHECK-AVX512-NEXT: vzeroupper @@ -155,7 +155,7 @@ define <4 x i1> @neither_pow2_non_zero_4xv64(<4 x i64> %xin) { ; CHECK-AVX512: # %bb.0: ; CHECK-AVX512-NEXT: vporq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %ymm0 ; CHECK-AVX512-NEXT: vpopcntq %ymm0, %ymm0 -; CHECK-AVX512-NEXT: vpcmpneqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1 +; CHECK-AVX512-NEXT: vpcmpgtq {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %k1 ; CHECK-AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 ; CHECK-AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z} ; CHECK-AVX512-NEXT: vzeroupper diff --git a/llvm/test/CodeGen/X86/known-never-zero.ll b/llvm/test/CodeGen/X86/known-never-zero.ll index ac41a3fe6bb7e..6c0aaeb451e14 100644 --- a/llvm/test/CodeGen/X86/known-never-zero.ll +++ b/llvm/test/CodeGen/X86/known-never-zero.ll @@ -555,9 +555,9 @@ define <4 x i32> @smax_known_zero_vec(<4 x i32> %x, <4 x i32> %y) { ; X86-NEXT: por %xmm2, %xmm0 ; X86-NEXT: pcmpeqd %xmm1, %xmm1 ; X86-NEXT: paddd %xmm0, %xmm1 -; X86-NEXT: pxor {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 -; X86-NEXT: pxor %xmm1, %xmm0 -; X86-NEXT: pcmpgtd %xmm1, %xmm0 +; X86-NEXT: pand %xmm1, %xmm0 +; X86-NEXT: pxor %xmm1, %xmm1 +; X86-NEXT: pcmpeqd %xmm1, %xmm0 ; X86-NEXT: psrld $31, %xmm0 ; X86-NEXT: retl ; @@ -566,10 +566,10 @@ define <4 x i32> @smax_known_zero_vec(<4 x i32> %x, <4 x i32> %y) { ; X64-NEXT: vpmaxsd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; X64-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; X64-NEXT: vpaddd %xmm1, %xmm0, %xmm1 -; X64-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; X64-NEXT: vpminud %xmm1, %xmm0, %xmm1 +; X64-NEXT: vpand %xmm1, %xmm0, %xmm0 +; X64-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; X64-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 -; X64-NEXT: vpandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 +; X64-NEXT: vpsrld $31, %xmm0, %xmm0 ; X64-NEXT: retq %z = call <4 x i32> @llvm.smax.v4i32(<4 x i32> %x, <4 x i32> ) %r = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %z) From 7357e3a321eea41442834273ed6f1c05ec2251c8 Mon Sep 17 00:00:00 2001 From: Yingwei Zheng Date: Mon, 7 Oct 2024 17:51:52 +0800 Subject: [PATCH 4/5] [CodeGenPrepare] Check if it is profitable for the target --- llvm/lib/CodeGen/CodeGenPrepare.cpp | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp index 0bfc10ca8c63d..d070a466d16b2 100644 --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -2114,19 +2114,30 @@ bool CodeGenPrepare::optimizeURem(Instruction *Rem) { /// Some targets have better codegen for `ctpop(X) u< 2` than `ctpop(X) == 1`. /// This function converts `ctpop(X) ==/!= 1` into `ctpop(X) u 2/1` if the /// result cannot be zero. -static bool adjustIsPower2Test(CmpInst *Cmp, const DataLayout &DL) { +static bool adjustIsPower2Test(CmpInst *Cmp, const TargetLowering &TLI, + const TargetTransformInfo &TTI, + const DataLayout &DL) { ICmpInst::Predicate Pred; if (!match(Cmp, m_ICmp(Pred, m_Intrinsic(), m_One()))) return false; if (!ICmpInst::isEquality(Pred)) return false; auto *II = cast(Cmp->getOperand(0)); + + // Check if it is profitable for the target + ICmpInst::Predicate NewPred = + Pred == ICmpInst::ICMP_EQ ? ICmpInst::ICMP_ULT : ICmpInst::ICMP_UGT; + if (TLI.isCtpopFast(TLI.getValueType(DL, II->getType())) && + TTI.getCmpSelInstrCost(Instruction::ICmp, II->getType(), Cmp->getType(), + Pred) < + TTI.getCmpSelInstrCost(Instruction::ICmp, II->getType(), + Cmp->getType(), NewPred)) + return false; + if (isKnownNonZero(II, DL)) { - if (Pred == ICmpInst::ICMP_EQ) { - Cmp->setPredicate(ICmpInst::ICMP_ULT); + if (Pred == ICmpInst::ICMP_EQ) Cmp->setOperand(1, ConstantInt::get(II->getType(), 2)); - } else - Cmp->setPredicate(ICmpInst::ICMP_UGT); + Cmp->setPredicate(NewPred); return true; } return false; @@ -2151,7 +2162,7 @@ bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT) { if (foldFCmpToFPClassTest(Cmp, *TLI, *DL)) return true; - if (adjustIsPower2Test(Cmp, *DL)) + if (adjustIsPower2Test(Cmp, *TLI, *TTI, *DL)) return true; return false; From fb33033a7044504010cf10b3b10bd3514b742edb Mon Sep 17 00:00:00 2001 From: Yingwei Zheng Date: Thu, 10 Oct 2024 22:58:21 +0800 Subject: [PATCH 5/5] [CodeGenPrepare] Remove target cost checks --- llvm/lib/CodeGen/CodeGenPrepare.cpp | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp index d070a466d16b2..0671f2451ea12 100644 --- a/llvm/lib/CodeGen/CodeGenPrepare.cpp +++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp @@ -2124,20 +2124,13 @@ static bool adjustIsPower2Test(CmpInst *Cmp, const TargetLowering &TLI, return false; auto *II = cast(Cmp->getOperand(0)); - // Check if it is profitable for the target - ICmpInst::Predicate NewPred = - Pred == ICmpInst::ICMP_EQ ? ICmpInst::ICMP_ULT : ICmpInst::ICMP_UGT; - if (TLI.isCtpopFast(TLI.getValueType(DL, II->getType())) && - TTI.getCmpSelInstrCost(Instruction::ICmp, II->getType(), Cmp->getType(), - Pred) < - TTI.getCmpSelInstrCost(Instruction::ICmp, II->getType(), - Cmp->getType(), NewPred)) - return false; - if (isKnownNonZero(II, DL)) { - if (Pred == ICmpInst::ICMP_EQ) + if (Pred == ICmpInst::ICMP_EQ) { Cmp->setOperand(1, ConstantInt::get(II->getType(), 2)); - Cmp->setPredicate(NewPred); + Cmp->setPredicate(ICmpInst::ICMP_ULT); + } else { + Cmp->setPredicate(ICmpInst::ICMP_UGT); + } return true; } return false;