Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 15 additions & 11 deletions llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4237,9 +4237,7 @@ static SDValue simplifySetCCWithCTPOP(const TargetLowering &TLI, EVT VT,
return DAG.getSetCC(dl, VT, Result, DAG.getConstant(0, dl, CTVT), CC);
}

// Expand a power-of-2 comparison based on ctpop:
// (ctpop x) == 1 --> (x != 0) && ((x & x-1) == 0)
// (ctpop x) != 1 --> (x == 0) || ((x & x-1) != 0)
// Expand a power-of-2 comparison based on ctpop
if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && C1 == 1) {
// Keep the CTPOP if it is cheap.
if (TLI.isCtpopFast(CTVT))
Expand All @@ -4248,17 +4246,23 @@ static SDValue simplifySetCCWithCTPOP(const TargetLowering &TLI, EVT VT,
SDValue Zero = DAG.getConstant(0, dl, CTVT);
SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
assert(CTVT.isInteger());
ISD::CondCode InvCond = ISD::getSetCCInverse(Cond, CTVT);
SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, CTOp, NegOne);
SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Add);
SDValue RHS = DAG.getSetCC(dl, VT, And, Zero, Cond);

// Its not uncommon for known-never-zero X to exist in (ctpop X) eq/ne 1, so
// check before the emit a potentially unnecessary op.
if (DAG.isKnownNeverZero(CTOp))
// check before emitting a potentially unnecessary op.
if (DAG.isKnownNeverZero(CTOp)) {
// (ctpop x) == 1 --> (x & x-1) == 0
// (ctpop x) != 1 --> (x & x-1) != 0
SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Add);
SDValue RHS = DAG.getSetCC(dl, VT, And, Zero, Cond);
return RHS;
SDValue LHS = DAG.getSetCC(dl, VT, CTOp, Zero, InvCond);
unsigned LogicOpcode = Cond == ISD::SETEQ ? ISD::AND : ISD::OR;
return DAG.getNode(LogicOpcode, dl, VT, LHS, RHS);
}

// (ctpop x) == 1 --> (x ^ x-1) > x-1
// (ctpop x) != 1 --> (x ^ x-1) <= x-1
SDValue Xor = DAG.getNode(ISD::XOR, dl, CTVT, CTOp, Add);
ISD::CondCode CmpCond = Cond == ISD::SETEQ ? ISD::SETUGT : ISD::SETULE;
return DAG.getSetCC(dl, VT, Xor, Add, CmpCond);
}

return SDValue();
Expand Down
36 changes: 18 additions & 18 deletions llvm/test/CodeGen/AArch64/arm64-popcnt.ll
Original file line number Diff line number Diff line change
Expand Up @@ -196,17 +196,17 @@ define i32 @ctpop_eq_one(i64 %x) nounwind readnone {
; CHECK-LABEL: ctpop_eq_one:
; CHECK: // %bb.0:
; CHECK-NEXT: sub x8, x0, #1
; CHECK-NEXT: tst x0, x8
; CHECK-NEXT: ccmp x0, #0, #4, eq
; CHECK-NEXT: cset w0, ne
; CHECK-NEXT: eor x9, x0, x8
; CHECK-NEXT: cmp x9, x8
; CHECK-NEXT: cset w0, hi
; CHECK-NEXT: ret
;
; CHECK-NONEON-LABEL: ctpop_eq_one:
; CHECK-NONEON: // %bb.0:
; CHECK-NONEON-NEXT: sub x8, x0, #1
; CHECK-NONEON-NEXT: tst x0, x8
; CHECK-NONEON-NEXT: ccmp x0, #0, #4, eq
; CHECK-NONEON-NEXT: cset w0, ne
; CHECK-NONEON-NEXT: eor x9, x0, x8
; CHECK-NONEON-NEXT: cmp x9, x8
; CHECK-NONEON-NEXT: cset w0, hi
; CHECK-NONEON-NEXT: ret
;
; CHECK-CSSC-LABEL: ctpop_eq_one:
Expand All @@ -225,17 +225,17 @@ define i32 @ctpop_ne_one(i64 %x) nounwind readnone {
; CHECK-LABEL: ctpop_ne_one:
; CHECK: // %bb.0:
; CHECK-NEXT: sub x8, x0, #1
; CHECK-NEXT: tst x0, x8
; CHECK-NEXT: ccmp x0, #0, #4, eq
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: eor x9, x0, x8
; CHECK-NEXT: cmp x9, x8
; CHECK-NEXT: cset w0, ls
; CHECK-NEXT: ret
;
; CHECK-NONEON-LABEL: ctpop_ne_one:
; CHECK-NONEON: // %bb.0:
; CHECK-NONEON-NEXT: sub x8, x0, #1
; CHECK-NONEON-NEXT: tst x0, x8
; CHECK-NONEON-NEXT: ccmp x0, #0, #4, eq
; CHECK-NONEON-NEXT: cset w0, eq
; CHECK-NONEON-NEXT: eor x9, x0, x8
; CHECK-NONEON-NEXT: cmp x9, x8
; CHECK-NONEON-NEXT: cset w0, ls
; CHECK-NONEON-NEXT: ret
;
; CHECK-CSSC-LABEL: ctpop_ne_one:
Expand All @@ -254,17 +254,17 @@ define i1 @ctpop32_ne_one(i32 %x) nounwind readnone {
; CHECK-LABEL: ctpop32_ne_one:
; CHECK: // %bb.0:
; CHECK-NEXT: sub w8, w0, #1
; CHECK-NEXT: tst w0, w8
; CHECK-NEXT: ccmp w0, #0, #4, eq
; CHECK-NEXT: cset w0, eq
; CHECK-NEXT: eor w9, w0, w8
; CHECK-NEXT: cmp w9, w8
; CHECK-NEXT: cset w0, ls
; CHECK-NEXT: ret
;
; CHECK-NONEON-LABEL: ctpop32_ne_one:
; CHECK-NONEON: // %bb.0:
; CHECK-NONEON-NEXT: sub w8, w0, #1
; CHECK-NONEON-NEXT: tst w0, w8
; CHECK-NONEON-NEXT: ccmp w0, #0, #4, eq
; CHECK-NONEON-NEXT: cset w0, eq
; CHECK-NONEON-NEXT: eor w9, w0, w8
; CHECK-NONEON-NEXT: cmp w9, w8
; CHECK-NONEON-NEXT: cset w0, ls
; CHECK-NONEON-NEXT: ret
;
; CHECK-CSSC-LABEL: ctpop32_ne_one:
Expand Down
14 changes: 6 additions & 8 deletions llvm/test/CodeGen/ARM/popcnt.ll
Original file line number Diff line number Diff line change
Expand Up @@ -286,14 +286,12 @@ define i32 @ctpop_eq_one(i64 %x) nounwind readnone {
; CHECK: @ %bb.0:
; CHECK-NEXT: subs r2, r0, #1
; CHECK-NEXT: sbc r3, r1, #0
; CHECK-NEXT: and r2, r0, r2
; CHECK-NEXT: and r3, r1, r3
; CHECK-NEXT: orr r2, r2, r3
; CHECK-NEXT: rsbs r3, r2, #0
; CHECK-NEXT: adc r2, r2, r3
; CHECK-NEXT: orrs r0, r0, r1
; CHECK-NEXT: movne r0, #1
; CHECK-NEXT: and r0, r0, r2
; CHECK-NEXT: eor r12, r1, r3
; CHECK-NEXT: eor r1, r0, r2
; CHECK-NEXT: subs r1, r2, r1
; CHECK-NEXT: mov r0, #0
; CHECK-NEXT: sbcs r1, r3, r12
; CHECK-NEXT: movlo r0, #1
; CHECK-NEXT: mov pc, lr
%count = tail call i64 @llvm.ctpop.i64(i64 %x)
%cmp = icmp eq i64 %count, 1
Expand Down
177 changes: 91 additions & 86 deletions llvm/test/CodeGen/RISCV/rv32zbb.ll
Original file line number Diff line number Diff line change
Expand Up @@ -350,10 +350,8 @@ define i1 @ctpop_i32_eq_one(i32 signext %a) nounwind {
; RV32I-LABEL: ctpop_i32_eq_one:
; RV32I: # %bb.0:
; RV32I-NEXT: addi a1, a0, -1
; RV32I-NEXT: and a1, a0, a1
; RV32I-NEXT: seqz a1, a1
; RV32I-NEXT: snez a0, a0
; RV32I-NEXT: and a0, a0, a1
; RV32I-NEXT: xor a0, a0, a1
; RV32I-NEXT: sltu a0, a1, a0
; RV32I-NEXT: ret
;
; RV32ZBB-LABEL: ctpop_i32_eq_one:
Expand All @@ -371,10 +369,9 @@ define i1 @ctpop_i32_ne_one(i32 signext %a) nounwind {
; RV32I-LABEL: ctpop_i32_ne_one:
; RV32I: # %bb.0:
; RV32I-NEXT: addi a1, a0, -1
; RV32I-NEXT: and a1, a0, a1
; RV32I-NEXT: snez a1, a1
; RV32I-NEXT: seqz a0, a0
; RV32I-NEXT: or a0, a0, a1
; RV32I-NEXT: xor a0, a0, a1
; RV32I-NEXT: sltu a0, a1, a0
; RV32I-NEXT: xori a0, a0, 1
; RV32I-NEXT: ret
;
; RV32ZBB-LABEL: ctpop_i32_ne_one:
Expand Down Expand Up @@ -508,15 +505,11 @@ define <2 x i1> @ctpop_v2i32_eq_one(<2 x i32> %a) nounwind {
; RV32I-LABEL: ctpop_v2i32_eq_one:
; RV32I: # %bb.0:
; RV32I-NEXT: addi a2, a0, -1
; RV32I-NEXT: and a2, a0, a2
; RV32I-NEXT: seqz a2, a2
; RV32I-NEXT: snez a0, a0
; RV32I-NEXT: and a0, a0, a2
; RV32I-NEXT: xor a0, a0, a2
; RV32I-NEXT: sltu a0, a2, a0
; RV32I-NEXT: addi a2, a1, -1
; RV32I-NEXT: and a2, a1, a2
; RV32I-NEXT: seqz a2, a2
; RV32I-NEXT: snez a1, a1
; RV32I-NEXT: and a1, a1, a2
; RV32I-NEXT: xor a1, a1, a2
; RV32I-NEXT: sltu a1, a2, a1
; RV32I-NEXT: ret
;
; RV32ZBB-LABEL: ctpop_v2i32_eq_one:
Expand All @@ -537,15 +530,13 @@ define <2 x i1> @ctpop_v2i32_ne_one(<2 x i32> %a) nounwind {
; RV32I-LABEL: ctpop_v2i32_ne_one:
; RV32I: # %bb.0:
; RV32I-NEXT: addi a2, a0, -1
; RV32I-NEXT: and a2, a0, a2
; RV32I-NEXT: snez a2, a2
; RV32I-NEXT: seqz a0, a0
; RV32I-NEXT: or a0, a0, a2
; RV32I-NEXT: xor a0, a0, a2
; RV32I-NEXT: sltu a0, a2, a0
; RV32I-NEXT: xori a0, a0, 1
; RV32I-NEXT: addi a2, a1, -1
; RV32I-NEXT: and a2, a1, a2
; RV32I-NEXT: snez a2, a2
; RV32I-NEXT: seqz a1, a1
; RV32I-NEXT: or a1, a1, a2
; RV32I-NEXT: xor a1, a1, a2
; RV32I-NEXT: sltu a1, a2, a1
; RV32I-NEXT: xori a1, a1, 1
; RV32I-NEXT: ret
;
; RV32ZBB-LABEL: ctpop_v2i32_ne_one:
Expand Down Expand Up @@ -685,16 +676,17 @@ define i1 @ctpop_i64_ugt_one(i64 %a) nounwind {
define i1 @ctpop_i64_eq_one(i64 %a) nounwind {
; RV32I-LABEL: ctpop_i64_eq_one:
; RV32I: # %bb.0:
; RV32I-NEXT: addi a2, a0, -1
; RV32I-NEXT: and a2, a0, a2
; RV32I-NEXT: seqz a3, a0
; RV32I-NEXT: sub a3, a1, a3
; RV32I-NEXT: and a3, a1, a3
; RV32I-NEXT: or a2, a2, a3
; RV32I-NEXT: seqz a2, a2
; RV32I-NEXT: or a0, a0, a1
; RV32I-NEXT: snez a0, a0
; RV32I-NEXT: and a0, a0, a2
; RV32I-NEXT: beqz a1, .LBB17_2
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: seqz a0, a0
; RV32I-NEXT: sub a0, a1, a0
; RV32I-NEXT: xor a1, a1, a0
; RV32I-NEXT: sltu a0, a0, a1
; RV32I-NEXT: ret
; RV32I-NEXT: .LBB17_2:
; RV32I-NEXT: addi a1, a0, -1
; RV32I-NEXT: xor a0, a0, a1
; RV32I-NEXT: sltu a0, a1, a0
; RV32I-NEXT: ret
;
; RV32ZBB-LABEL: ctpop_i64_eq_one:
Expand All @@ -713,16 +705,19 @@ define i1 @ctpop_i64_eq_one(i64 %a) nounwind {
define i1 @ctpop_i64_ne_one(i64 %a) nounwind {
; RV32I-LABEL: ctpop_i64_ne_one:
; RV32I: # %bb.0:
; RV32I-NEXT: addi a2, a0, -1
; RV32I-NEXT: and a2, a0, a2
; RV32I-NEXT: seqz a3, a0
; RV32I-NEXT: sub a3, a1, a3
; RV32I-NEXT: and a3, a1, a3
; RV32I-NEXT: or a2, a2, a3
; RV32I-NEXT: snez a2, a2
; RV32I-NEXT: or a0, a0, a1
; RV32I-NEXT: beqz a1, .LBB18_2
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: seqz a0, a0
; RV32I-NEXT: or a0, a0, a2
; RV32I-NEXT: sub a0, a1, a0
; RV32I-NEXT: xor a1, a1, a0
; RV32I-NEXT: sltu a0, a0, a1
; RV32I-NEXT: xori a0, a0, 1
; RV32I-NEXT: ret
; RV32I-NEXT: .LBB18_2:
; RV32I-NEXT: addi a1, a0, -1
; RV32I-NEXT: xor a0, a0, a1
; RV32I-NEXT: sltu a0, a1, a0
; RV32I-NEXT: xori a0, a0, 1
; RV32I-NEXT: ret
;
; RV32ZBB-LABEL: ctpop_i64_ne_one:
Expand Down Expand Up @@ -950,30 +945,34 @@ define <2 x i1> @ctpop_v2i64_ugt_one(<2 x i64> %a) nounwind {
define <2 x i1> @ctpop_v2i64_eq_one(<2 x i64> %a) nounwind {
; RV32I-LABEL: ctpop_v2i64_eq_one:
; RV32I: # %bb.0:
; RV32I-NEXT: lw a1, 0(a0)
; RV32I-NEXT: mv a1, a0
; RV32I-NEXT: lw a2, 12(a0)
; RV32I-NEXT: lw a3, 8(a0)
; RV32I-NEXT: lw a0, 4(a0)
; RV32I-NEXT: addi a4, a1, -1
; RV32I-NEXT: and a4, a1, a4
; RV32I-NEXT: seqz a5, a1
; RV32I-NEXT: sub a5, a0, a5
; RV32I-NEXT: and a5, a0, a5
; RV32I-NEXT: or a4, a4, a5
; RV32I-NEXT: seqz a4, a4
; RV32I-NEXT: or a0, a1, a0
; RV32I-NEXT: snez a0, a0
; RV32I-NEXT: and a0, a0, a4
; RV32I-NEXT: addi a1, a3, -1
; RV32I-NEXT: and a1, a3, a1
; RV32I-NEXT: seqz a4, a3
; RV32I-NEXT: sub a4, a2, a4
; RV32I-NEXT: and a4, a2, a4
; RV32I-NEXT: or a1, a1, a4
; RV32I-NEXT: lw a3, 0(a1)
; RV32I-NEXT: beqz a0, .LBB22_3
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: seqz a3, a3
; RV32I-NEXT: sub a3, a0, a3
; RV32I-NEXT: xor a0, a0, a3
; RV32I-NEXT: sltu a0, a3, a0
; RV32I-NEXT: lw a1, 8(a1)
; RV32I-NEXT: bnez a2, .LBB22_4
; RV32I-NEXT: .LBB22_2:
; RV32I-NEXT: addi a2, a1, -1
; RV32I-NEXT: xor a1, a1, a2
; RV32I-NEXT: sltu a1, a2, a1
; RV32I-NEXT: ret
; RV32I-NEXT: .LBB22_3:
; RV32I-NEXT: addi a0, a3, -1
; RV32I-NEXT: xor a3, a3, a0
; RV32I-NEXT: sltu a0, a0, a3
; RV32I-NEXT: lw a1, 8(a1)
; RV32I-NEXT: beqz a2, .LBB22_2
; RV32I-NEXT: .LBB22_4:
; RV32I-NEXT: seqz a1, a1
; RV32I-NEXT: or a2, a3, a2
; RV32I-NEXT: snez a2, a2
; RV32I-NEXT: and a1, a2, a1
; RV32I-NEXT: sub a1, a2, a1
; RV32I-NEXT: xor a2, a2, a1
; RV32I-NEXT: sltu a1, a1, a2
; RV32I-NEXT: ret
;
; RV32ZBB-LABEL: ctpop_v2i64_eq_one:
Expand Down Expand Up @@ -1001,30 +1000,36 @@ define <2 x i1> @ctpop_v2i64_eq_one(<2 x i64> %a) nounwind {
define <2 x i1> @ctpop_v2i64_ne_one(<2 x i64> %a) nounwind {
; RV32I-LABEL: ctpop_v2i64_ne_one:
; RV32I: # %bb.0:
; RV32I-NEXT: lw a1, 0(a0)
; RV32I-NEXT: lw a2, 12(a0)
; RV32I-NEXT: lw a1, 12(a0)
; RV32I-NEXT: lw a2, 4(a0)
; RV32I-NEXT: lw a3, 0(a0)
; RV32I-NEXT: beqz a2, .LBB23_2
; RV32I-NEXT: # %bb.1:
; RV32I-NEXT: seqz a3, a3
; RV32I-NEXT: sub a3, a2, a3
; RV32I-NEXT: xor a2, a2, a3
; RV32I-NEXT: sltu a2, a3, a2
; RV32I-NEXT: j .LBB23_3
; RV32I-NEXT: .LBB23_2:
; RV32I-NEXT: addi a2, a3, -1
; RV32I-NEXT: xor a3, a3, a2
; RV32I-NEXT: sltu a2, a2, a3
; RV32I-NEXT: .LBB23_3:
; RV32I-NEXT: lw a3, 8(a0)
; RV32I-NEXT: lw a0, 4(a0)
; RV32I-NEXT: addi a4, a1, -1
; RV32I-NEXT: and a4, a1, a4
; RV32I-NEXT: seqz a5, a1
; RV32I-NEXT: sub a5, a0, a5
; RV32I-NEXT: and a5, a0, a5
; RV32I-NEXT: or a4, a4, a5
; RV32I-NEXT: snez a4, a4
; RV32I-NEXT: or a0, a1, a0
; RV32I-NEXT: seqz a0, a0
; RV32I-NEXT: or a0, a0, a4
; RV32I-NEXT: xori a0, a2, 1
; RV32I-NEXT: beqz a1, .LBB23_5
; RV32I-NEXT: # %bb.4:
; RV32I-NEXT: seqz a2, a3
; RV32I-NEXT: sub a2, a1, a2
; RV32I-NEXT: xor a1, a1, a2
; RV32I-NEXT: sltu a1, a2, a1
; RV32I-NEXT: xori a1, a1, 1
; RV32I-NEXT: ret
; RV32I-NEXT: .LBB23_5:
; RV32I-NEXT: addi a1, a3, -1
; RV32I-NEXT: and a1, a3, a1
; RV32I-NEXT: seqz a4, a3
; RV32I-NEXT: sub a4, a2, a4
; RV32I-NEXT: and a4, a2, a4
; RV32I-NEXT: or a1, a1, a4
; RV32I-NEXT: snez a1, a1
; RV32I-NEXT: or a2, a3, a2
; RV32I-NEXT: seqz a2, a2
; RV32I-NEXT: or a1, a2, a1
; RV32I-NEXT: xor a3, a3, a1
; RV32I-NEXT: sltu a1, a1, a3
; RV32I-NEXT: xori a1, a1, 1
; RV32I-NEXT: ret
;
; RV32ZBB-LABEL: ctpop_v2i64_ne_one:
Expand Down
Loading