Skip to content

Commit 75cf672

Browse files
authored
[SDAG] Simplify is-power-of-2 codegen (#72275)
When x is not known to be nonzero, ctpop(x) == 1 is expanded to x != 0 && (x & (x - 1)) == 0 resulting in codegen like leal -1(%rdi), %eax testl %eax, %edi sete %cl testl %edi, %edi setne %al andb %cl, %al But another expression that works is (x ^ (x - 1)) > x - 1 which has nicer codegen: leal -1(%rdi), %eax xorl %eax, %edi cmpl %eax, %edi seta %al
1 parent 6d858e2 commit 75cf672

File tree

12 files changed

+1045
-1356
lines changed

12 files changed

+1045
-1356
lines changed

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Lines changed: 15 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -4237,9 +4237,7 @@ static SDValue simplifySetCCWithCTPOP(const TargetLowering &TLI, EVT VT,
42374237
return DAG.getSetCC(dl, VT, Result, DAG.getConstant(0, dl, CTVT), CC);
42384238
}
42394239

4240-
// Expand a power-of-2 comparison based on ctpop:
4241-
// (ctpop x) == 1 --> (x != 0) && ((x & x-1) == 0)
4242-
// (ctpop x) != 1 --> (x == 0) || ((x & x-1) != 0)
4240+
// Expand a power-of-2 comparison based on ctpop
42434241
if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && C1 == 1) {
42444242
// Keep the CTPOP if it is cheap.
42454243
if (TLI.isCtpopFast(CTVT))
@@ -4248,17 +4246,23 @@ static SDValue simplifySetCCWithCTPOP(const TargetLowering &TLI, EVT VT,
42484246
SDValue Zero = DAG.getConstant(0, dl, CTVT);
42494247
SDValue NegOne = DAG.getAllOnesConstant(dl, CTVT);
42504248
assert(CTVT.isInteger());
4251-
ISD::CondCode InvCond = ISD::getSetCCInverse(Cond, CTVT);
42524249
SDValue Add = DAG.getNode(ISD::ADD, dl, CTVT, CTOp, NegOne);
4253-
SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Add);
4254-
SDValue RHS = DAG.getSetCC(dl, VT, And, Zero, Cond);
4250+
42554251
// Its not uncommon for known-never-zero X to exist in (ctpop X) eq/ne 1, so
4256-
// check before the emit a potentially unnecessary op.
4257-
if (DAG.isKnownNeverZero(CTOp))
4252+
// check before emitting a potentially unnecessary op.
4253+
if (DAG.isKnownNeverZero(CTOp)) {
4254+
// (ctpop x) == 1 --> (x & x-1) == 0
4255+
// (ctpop x) != 1 --> (x & x-1) != 0
4256+
SDValue And = DAG.getNode(ISD::AND, dl, CTVT, CTOp, Add);
4257+
SDValue RHS = DAG.getSetCC(dl, VT, And, Zero, Cond);
42584258
return RHS;
4259-
SDValue LHS = DAG.getSetCC(dl, VT, CTOp, Zero, InvCond);
4260-
unsigned LogicOpcode = Cond == ISD::SETEQ ? ISD::AND : ISD::OR;
4261-
return DAG.getNode(LogicOpcode, dl, VT, LHS, RHS);
4259+
}
4260+
4261+
// (ctpop x) == 1 --> (x ^ x-1) > x-1
4262+
// (ctpop x) != 1 --> (x ^ x-1) <= x-1
4263+
SDValue Xor = DAG.getNode(ISD::XOR, dl, CTVT, CTOp, Add);
4264+
ISD::CondCode CmpCond = Cond == ISD::SETEQ ? ISD::SETUGT : ISD::SETULE;
4265+
return DAG.getSetCC(dl, VT, Xor, Add, CmpCond);
42624266
}
42634267

42644268
return SDValue();

llvm/test/CodeGen/AArch64/arm64-popcnt.ll

Lines changed: 18 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -196,17 +196,17 @@ define i32 @ctpop_eq_one(i64 %x) nounwind readnone {
196196
; CHECK-LABEL: ctpop_eq_one:
197197
; CHECK: // %bb.0:
198198
; CHECK-NEXT: sub x8, x0, #1
199-
; CHECK-NEXT: tst x0, x8
200-
; CHECK-NEXT: ccmp x0, #0, #4, eq
201-
; CHECK-NEXT: cset w0, ne
199+
; CHECK-NEXT: eor x9, x0, x8
200+
; CHECK-NEXT: cmp x9, x8
201+
; CHECK-NEXT: cset w0, hi
202202
; CHECK-NEXT: ret
203203
;
204204
; CHECK-NONEON-LABEL: ctpop_eq_one:
205205
; CHECK-NONEON: // %bb.0:
206206
; CHECK-NONEON-NEXT: sub x8, x0, #1
207-
; CHECK-NONEON-NEXT: tst x0, x8
208-
; CHECK-NONEON-NEXT: ccmp x0, #0, #4, eq
209-
; CHECK-NONEON-NEXT: cset w0, ne
207+
; CHECK-NONEON-NEXT: eor x9, x0, x8
208+
; CHECK-NONEON-NEXT: cmp x9, x8
209+
; CHECK-NONEON-NEXT: cset w0, hi
210210
; CHECK-NONEON-NEXT: ret
211211
;
212212
; CHECK-CSSC-LABEL: ctpop_eq_one:
@@ -225,17 +225,17 @@ define i32 @ctpop_ne_one(i64 %x) nounwind readnone {
225225
; CHECK-LABEL: ctpop_ne_one:
226226
; CHECK: // %bb.0:
227227
; CHECK-NEXT: sub x8, x0, #1
228-
; CHECK-NEXT: tst x0, x8
229-
; CHECK-NEXT: ccmp x0, #0, #4, eq
230-
; CHECK-NEXT: cset w0, eq
228+
; CHECK-NEXT: eor x9, x0, x8
229+
; CHECK-NEXT: cmp x9, x8
230+
; CHECK-NEXT: cset w0, ls
231231
; CHECK-NEXT: ret
232232
;
233233
; CHECK-NONEON-LABEL: ctpop_ne_one:
234234
; CHECK-NONEON: // %bb.0:
235235
; CHECK-NONEON-NEXT: sub x8, x0, #1
236-
; CHECK-NONEON-NEXT: tst x0, x8
237-
; CHECK-NONEON-NEXT: ccmp x0, #0, #4, eq
238-
; CHECK-NONEON-NEXT: cset w0, eq
236+
; CHECK-NONEON-NEXT: eor x9, x0, x8
237+
; CHECK-NONEON-NEXT: cmp x9, x8
238+
; CHECK-NONEON-NEXT: cset w0, ls
239239
; CHECK-NONEON-NEXT: ret
240240
;
241241
; CHECK-CSSC-LABEL: ctpop_ne_one:
@@ -254,17 +254,17 @@ define i1 @ctpop32_ne_one(i32 %x) nounwind readnone {
254254
; CHECK-LABEL: ctpop32_ne_one:
255255
; CHECK: // %bb.0:
256256
; CHECK-NEXT: sub w8, w0, #1
257-
; CHECK-NEXT: tst w0, w8
258-
; CHECK-NEXT: ccmp w0, #0, #4, eq
259-
; CHECK-NEXT: cset w0, eq
257+
; CHECK-NEXT: eor w9, w0, w8
258+
; CHECK-NEXT: cmp w9, w8
259+
; CHECK-NEXT: cset w0, ls
260260
; CHECK-NEXT: ret
261261
;
262262
; CHECK-NONEON-LABEL: ctpop32_ne_one:
263263
; CHECK-NONEON: // %bb.0:
264264
; CHECK-NONEON-NEXT: sub w8, w0, #1
265-
; CHECK-NONEON-NEXT: tst w0, w8
266-
; CHECK-NONEON-NEXT: ccmp w0, #0, #4, eq
267-
; CHECK-NONEON-NEXT: cset w0, eq
265+
; CHECK-NONEON-NEXT: eor w9, w0, w8
266+
; CHECK-NONEON-NEXT: cmp w9, w8
267+
; CHECK-NONEON-NEXT: cset w0, ls
268268
; CHECK-NONEON-NEXT: ret
269269
;
270270
; CHECK-CSSC-LABEL: ctpop32_ne_one:

llvm/test/CodeGen/ARM/popcnt.ll

Lines changed: 6 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -286,14 +286,12 @@ define i32 @ctpop_eq_one(i64 %x) nounwind readnone {
286286
; CHECK: @ %bb.0:
287287
; CHECK-NEXT: subs r2, r0, #1
288288
; CHECK-NEXT: sbc r3, r1, #0
289-
; CHECK-NEXT: and r2, r0, r2
290-
; CHECK-NEXT: and r3, r1, r3
291-
; CHECK-NEXT: orr r2, r2, r3
292-
; CHECK-NEXT: rsbs r3, r2, #0
293-
; CHECK-NEXT: adc r2, r2, r3
294-
; CHECK-NEXT: orrs r0, r0, r1
295-
; CHECK-NEXT: movne r0, #1
296-
; CHECK-NEXT: and r0, r0, r2
289+
; CHECK-NEXT: eor r12, r1, r3
290+
; CHECK-NEXT: eor r1, r0, r2
291+
; CHECK-NEXT: subs r1, r2, r1
292+
; CHECK-NEXT: mov r0, #0
293+
; CHECK-NEXT: sbcs r1, r3, r12
294+
; CHECK-NEXT: movlo r0, #1
297295
; CHECK-NEXT: mov pc, lr
298296
%count = tail call i64 @llvm.ctpop.i64(i64 %x)
299297
%cmp = icmp eq i64 %count, 1

llvm/test/CodeGen/RISCV/rv32zbb.ll

Lines changed: 91 additions & 86 deletions
Original file line numberDiff line numberDiff line change
@@ -350,10 +350,8 @@ define i1 @ctpop_i32_eq_one(i32 signext %a) nounwind {
350350
; RV32I-LABEL: ctpop_i32_eq_one:
351351
; RV32I: # %bb.0:
352352
; RV32I-NEXT: addi a1, a0, -1
353-
; RV32I-NEXT: and a1, a0, a1
354-
; RV32I-NEXT: seqz a1, a1
355-
; RV32I-NEXT: snez a0, a0
356-
; RV32I-NEXT: and a0, a0, a1
353+
; RV32I-NEXT: xor a0, a0, a1
354+
; RV32I-NEXT: sltu a0, a1, a0
357355
; RV32I-NEXT: ret
358356
;
359357
; RV32ZBB-LABEL: ctpop_i32_eq_one:
@@ -371,10 +369,9 @@ define i1 @ctpop_i32_ne_one(i32 signext %a) nounwind {
371369
; RV32I-LABEL: ctpop_i32_ne_one:
372370
; RV32I: # %bb.0:
373371
; RV32I-NEXT: addi a1, a0, -1
374-
; RV32I-NEXT: and a1, a0, a1
375-
; RV32I-NEXT: snez a1, a1
376-
; RV32I-NEXT: seqz a0, a0
377-
; RV32I-NEXT: or a0, a0, a1
372+
; RV32I-NEXT: xor a0, a0, a1
373+
; RV32I-NEXT: sltu a0, a1, a0
374+
; RV32I-NEXT: xori a0, a0, 1
378375
; RV32I-NEXT: ret
379376
;
380377
; RV32ZBB-LABEL: ctpop_i32_ne_one:
@@ -508,15 +505,11 @@ define <2 x i1> @ctpop_v2i32_eq_one(<2 x i32> %a) nounwind {
508505
; RV32I-LABEL: ctpop_v2i32_eq_one:
509506
; RV32I: # %bb.0:
510507
; RV32I-NEXT: addi a2, a0, -1
511-
; RV32I-NEXT: and a2, a0, a2
512-
; RV32I-NEXT: seqz a2, a2
513-
; RV32I-NEXT: snez a0, a0
514-
; RV32I-NEXT: and a0, a0, a2
508+
; RV32I-NEXT: xor a0, a0, a2
509+
; RV32I-NEXT: sltu a0, a2, a0
515510
; RV32I-NEXT: addi a2, a1, -1
516-
; RV32I-NEXT: and a2, a1, a2
517-
; RV32I-NEXT: seqz a2, a2
518-
; RV32I-NEXT: snez a1, a1
519-
; RV32I-NEXT: and a1, a1, a2
511+
; RV32I-NEXT: xor a1, a1, a2
512+
; RV32I-NEXT: sltu a1, a2, a1
520513
; RV32I-NEXT: ret
521514
;
522515
; RV32ZBB-LABEL: ctpop_v2i32_eq_one:
@@ -537,15 +530,13 @@ define <2 x i1> @ctpop_v2i32_ne_one(<2 x i32> %a) nounwind {
537530
; RV32I-LABEL: ctpop_v2i32_ne_one:
538531
; RV32I: # %bb.0:
539532
; RV32I-NEXT: addi a2, a0, -1
540-
; RV32I-NEXT: and a2, a0, a2
541-
; RV32I-NEXT: snez a2, a2
542-
; RV32I-NEXT: seqz a0, a0
543-
; RV32I-NEXT: or a0, a0, a2
533+
; RV32I-NEXT: xor a0, a0, a2
534+
; RV32I-NEXT: sltu a0, a2, a0
535+
; RV32I-NEXT: xori a0, a0, 1
544536
; RV32I-NEXT: addi a2, a1, -1
545-
; RV32I-NEXT: and a2, a1, a2
546-
; RV32I-NEXT: snez a2, a2
547-
; RV32I-NEXT: seqz a1, a1
548-
; RV32I-NEXT: or a1, a1, a2
537+
; RV32I-NEXT: xor a1, a1, a2
538+
; RV32I-NEXT: sltu a1, a2, a1
539+
; RV32I-NEXT: xori a1, a1, 1
549540
; RV32I-NEXT: ret
550541
;
551542
; RV32ZBB-LABEL: ctpop_v2i32_ne_one:
@@ -685,16 +676,17 @@ define i1 @ctpop_i64_ugt_one(i64 %a) nounwind {
685676
define i1 @ctpop_i64_eq_one(i64 %a) nounwind {
686677
; RV32I-LABEL: ctpop_i64_eq_one:
687678
; RV32I: # %bb.0:
688-
; RV32I-NEXT: addi a2, a0, -1
689-
; RV32I-NEXT: and a2, a0, a2
690-
; RV32I-NEXT: seqz a3, a0
691-
; RV32I-NEXT: sub a3, a1, a3
692-
; RV32I-NEXT: and a3, a1, a3
693-
; RV32I-NEXT: or a2, a2, a3
694-
; RV32I-NEXT: seqz a2, a2
695-
; RV32I-NEXT: or a0, a0, a1
696-
; RV32I-NEXT: snez a0, a0
697-
; RV32I-NEXT: and a0, a0, a2
679+
; RV32I-NEXT: beqz a1, .LBB17_2
680+
; RV32I-NEXT: # %bb.1:
681+
; RV32I-NEXT: seqz a0, a0
682+
; RV32I-NEXT: sub a0, a1, a0
683+
; RV32I-NEXT: xor a1, a1, a0
684+
; RV32I-NEXT: sltu a0, a0, a1
685+
; RV32I-NEXT: ret
686+
; RV32I-NEXT: .LBB17_2:
687+
; RV32I-NEXT: addi a1, a0, -1
688+
; RV32I-NEXT: xor a0, a0, a1
689+
; RV32I-NEXT: sltu a0, a1, a0
698690
; RV32I-NEXT: ret
699691
;
700692
; RV32ZBB-LABEL: ctpop_i64_eq_one:
@@ -713,16 +705,19 @@ define i1 @ctpop_i64_eq_one(i64 %a) nounwind {
713705
define i1 @ctpop_i64_ne_one(i64 %a) nounwind {
714706
; RV32I-LABEL: ctpop_i64_ne_one:
715707
; RV32I: # %bb.0:
716-
; RV32I-NEXT: addi a2, a0, -1
717-
; RV32I-NEXT: and a2, a0, a2
718-
; RV32I-NEXT: seqz a3, a0
719-
; RV32I-NEXT: sub a3, a1, a3
720-
; RV32I-NEXT: and a3, a1, a3
721-
; RV32I-NEXT: or a2, a2, a3
722-
; RV32I-NEXT: snez a2, a2
723-
; RV32I-NEXT: or a0, a0, a1
708+
; RV32I-NEXT: beqz a1, .LBB18_2
709+
; RV32I-NEXT: # %bb.1:
724710
; RV32I-NEXT: seqz a0, a0
725-
; RV32I-NEXT: or a0, a0, a2
711+
; RV32I-NEXT: sub a0, a1, a0
712+
; RV32I-NEXT: xor a1, a1, a0
713+
; RV32I-NEXT: sltu a0, a0, a1
714+
; RV32I-NEXT: xori a0, a0, 1
715+
; RV32I-NEXT: ret
716+
; RV32I-NEXT: .LBB18_2:
717+
; RV32I-NEXT: addi a1, a0, -1
718+
; RV32I-NEXT: xor a0, a0, a1
719+
; RV32I-NEXT: sltu a0, a1, a0
720+
; RV32I-NEXT: xori a0, a0, 1
726721
; RV32I-NEXT: ret
727722
;
728723
; RV32ZBB-LABEL: ctpop_i64_ne_one:
@@ -950,30 +945,34 @@ define <2 x i1> @ctpop_v2i64_ugt_one(<2 x i64> %a) nounwind {
950945
define <2 x i1> @ctpop_v2i64_eq_one(<2 x i64> %a) nounwind {
951946
; RV32I-LABEL: ctpop_v2i64_eq_one:
952947
; RV32I: # %bb.0:
953-
; RV32I-NEXT: lw a1, 0(a0)
948+
; RV32I-NEXT: mv a1, a0
954949
; RV32I-NEXT: lw a2, 12(a0)
955-
; RV32I-NEXT: lw a3, 8(a0)
956950
; RV32I-NEXT: lw a0, 4(a0)
957-
; RV32I-NEXT: addi a4, a1, -1
958-
; RV32I-NEXT: and a4, a1, a4
959-
; RV32I-NEXT: seqz a5, a1
960-
; RV32I-NEXT: sub a5, a0, a5
961-
; RV32I-NEXT: and a5, a0, a5
962-
; RV32I-NEXT: or a4, a4, a5
963-
; RV32I-NEXT: seqz a4, a4
964-
; RV32I-NEXT: or a0, a1, a0
965-
; RV32I-NEXT: snez a0, a0
966-
; RV32I-NEXT: and a0, a0, a4
967-
; RV32I-NEXT: addi a1, a3, -1
968-
; RV32I-NEXT: and a1, a3, a1
969-
; RV32I-NEXT: seqz a4, a3
970-
; RV32I-NEXT: sub a4, a2, a4
971-
; RV32I-NEXT: and a4, a2, a4
972-
; RV32I-NEXT: or a1, a1, a4
951+
; RV32I-NEXT: lw a3, 0(a1)
952+
; RV32I-NEXT: beqz a0, .LBB22_3
953+
; RV32I-NEXT: # %bb.1:
954+
; RV32I-NEXT: seqz a3, a3
955+
; RV32I-NEXT: sub a3, a0, a3
956+
; RV32I-NEXT: xor a0, a0, a3
957+
; RV32I-NEXT: sltu a0, a3, a0
958+
; RV32I-NEXT: lw a1, 8(a1)
959+
; RV32I-NEXT: bnez a2, .LBB22_4
960+
; RV32I-NEXT: .LBB22_2:
961+
; RV32I-NEXT: addi a2, a1, -1
962+
; RV32I-NEXT: xor a1, a1, a2
963+
; RV32I-NEXT: sltu a1, a2, a1
964+
; RV32I-NEXT: ret
965+
; RV32I-NEXT: .LBB22_3:
966+
; RV32I-NEXT: addi a0, a3, -1
967+
; RV32I-NEXT: xor a3, a3, a0
968+
; RV32I-NEXT: sltu a0, a0, a3
969+
; RV32I-NEXT: lw a1, 8(a1)
970+
; RV32I-NEXT: beqz a2, .LBB22_2
971+
; RV32I-NEXT: .LBB22_4:
973972
; RV32I-NEXT: seqz a1, a1
974-
; RV32I-NEXT: or a2, a3, a2
975-
; RV32I-NEXT: snez a2, a2
976-
; RV32I-NEXT: and a1, a2, a1
973+
; RV32I-NEXT: sub a1, a2, a1
974+
; RV32I-NEXT: xor a2, a2, a1
975+
; RV32I-NEXT: sltu a1, a1, a2
977976
; RV32I-NEXT: ret
978977
;
979978
; RV32ZBB-LABEL: ctpop_v2i64_eq_one:
@@ -1001,30 +1000,36 @@ define <2 x i1> @ctpop_v2i64_eq_one(<2 x i64> %a) nounwind {
10011000
define <2 x i1> @ctpop_v2i64_ne_one(<2 x i64> %a) nounwind {
10021001
; RV32I-LABEL: ctpop_v2i64_ne_one:
10031002
; RV32I: # %bb.0:
1004-
; RV32I-NEXT: lw a1, 0(a0)
1005-
; RV32I-NEXT: lw a2, 12(a0)
1003+
; RV32I-NEXT: lw a1, 12(a0)
1004+
; RV32I-NEXT: lw a2, 4(a0)
1005+
; RV32I-NEXT: lw a3, 0(a0)
1006+
; RV32I-NEXT: beqz a2, .LBB23_2
1007+
; RV32I-NEXT: # %bb.1:
1008+
; RV32I-NEXT: seqz a3, a3
1009+
; RV32I-NEXT: sub a3, a2, a3
1010+
; RV32I-NEXT: xor a2, a2, a3
1011+
; RV32I-NEXT: sltu a2, a3, a2
1012+
; RV32I-NEXT: j .LBB23_3
1013+
; RV32I-NEXT: .LBB23_2:
1014+
; RV32I-NEXT: addi a2, a3, -1
1015+
; RV32I-NEXT: xor a3, a3, a2
1016+
; RV32I-NEXT: sltu a2, a2, a3
1017+
; RV32I-NEXT: .LBB23_3:
10061018
; RV32I-NEXT: lw a3, 8(a0)
1007-
; RV32I-NEXT: lw a0, 4(a0)
1008-
; RV32I-NEXT: addi a4, a1, -1
1009-
; RV32I-NEXT: and a4, a1, a4
1010-
; RV32I-NEXT: seqz a5, a1
1011-
; RV32I-NEXT: sub a5, a0, a5
1012-
; RV32I-NEXT: and a5, a0, a5
1013-
; RV32I-NEXT: or a4, a4, a5
1014-
; RV32I-NEXT: snez a4, a4
1015-
; RV32I-NEXT: or a0, a1, a0
1016-
; RV32I-NEXT: seqz a0, a0
1017-
; RV32I-NEXT: or a0, a0, a4
1019+
; RV32I-NEXT: xori a0, a2, 1
1020+
; RV32I-NEXT: beqz a1, .LBB23_5
1021+
; RV32I-NEXT: # %bb.4:
1022+
; RV32I-NEXT: seqz a2, a3
1023+
; RV32I-NEXT: sub a2, a1, a2
1024+
; RV32I-NEXT: xor a1, a1, a2
1025+
; RV32I-NEXT: sltu a1, a2, a1
1026+
; RV32I-NEXT: xori a1, a1, 1
1027+
; RV32I-NEXT: ret
1028+
; RV32I-NEXT: .LBB23_5:
10181029
; RV32I-NEXT: addi a1, a3, -1
1019-
; RV32I-NEXT: and a1, a3, a1
1020-
; RV32I-NEXT: seqz a4, a3
1021-
; RV32I-NEXT: sub a4, a2, a4
1022-
; RV32I-NEXT: and a4, a2, a4
1023-
; RV32I-NEXT: or a1, a1, a4
1024-
; RV32I-NEXT: snez a1, a1
1025-
; RV32I-NEXT: or a2, a3, a2
1026-
; RV32I-NEXT: seqz a2, a2
1027-
; RV32I-NEXT: or a1, a2, a1
1030+
; RV32I-NEXT: xor a3, a3, a1
1031+
; RV32I-NEXT: sltu a1, a1, a3
1032+
; RV32I-NEXT: xori a1, a1, 1
10281033
; RV32I-NEXT: ret
10291034
;
10301035
; RV32ZBB-LABEL: ctpop_v2i64_ne_one:

0 commit comments

Comments
 (0)