Skip to content

Commit 43a0016

Browse files
committed
Extend performANDCSELCombine to performANDORCSELCombine
Differential Revision: https://reviews.llvm.org/D120422
1 parent 6467d1d commit 43a0016

File tree

7 files changed

+273
-129
lines changed

7 files changed

+273
-129
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 72 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -14034,15 +14034,85 @@ static SDValue tryCombineToBSL(SDNode *N,
1403414034
return SDValue();
1403514035
}
1403614036

14037+
// Given a tree of and/or(csel(0, 1, cc0), csel(0, 1, cc1)), we may be able to
14038+
// convert to csel(ccmp(.., cc0)), depending on cc1:
14039+
14040+
// (AND (CSET cc0 cmp0) (CSET cc1 (CMP x1 y1)))
14041+
// =>
14042+
// (CSET cc1 (CCMP x1 y1 !cc1 cc0 cmp0))
14043+
//
14044+
// (OR (CSET cc0 cmp0) (CSET cc1 (CMP x1 y1)))
14045+
// =>
14046+
// (CSET cc1 (CCMP x1 y1 cc1 !cc0 cmp0))
14047+
static SDValue performANDORCSELCombine(SDNode *N, SelectionDAG &DAG) {
14048+
EVT VT = N->getValueType(0);
14049+
SDValue CSel0 = N->getOperand(0);
14050+
SDValue CSel1 = N->getOperand(1);
14051+
14052+
if (CSel0.getOpcode() != AArch64ISD::CSEL ||
14053+
CSel1.getOpcode() != AArch64ISD::CSEL)
14054+
return SDValue();
14055+
14056+
if (!CSel0->hasOneUse() || !CSel1->hasOneUse())
14057+
return SDValue();
14058+
14059+
if (!isNullConstant(CSel0.getOperand(0)) ||
14060+
!isOneConstant(CSel0.getOperand(1)) ||
14061+
!isNullConstant(CSel1.getOperand(0)) ||
14062+
!isOneConstant(CSel1.getOperand(1)))
14063+
return SDValue();
14064+
14065+
SDValue Cmp0 = CSel0.getOperand(3);
14066+
SDValue Cmp1 = CSel1.getOperand(3);
14067+
AArch64CC::CondCode CC0 = (AArch64CC::CondCode)CSel0.getConstantOperandVal(2);
14068+
AArch64CC::CondCode CC1 = (AArch64CC::CondCode)CSel1.getConstantOperandVal(2);
14069+
if (!Cmp0->hasOneUse() || !Cmp1->hasOneUse())
14070+
return SDValue();
14071+
if (Cmp1.getOpcode() != AArch64ISD::SUBS &&
14072+
Cmp0.getOpcode() == AArch64ISD::SUBS) {
14073+
std::swap(Cmp0, Cmp1);
14074+
std::swap(CC0, CC1);
14075+
}
14076+
14077+
if (Cmp1.getOpcode() != AArch64ISD::SUBS)
14078+
return SDValue();
14079+
14080+
SDLoc DL(N);
14081+
SDValue CCmp;
14082+
14083+
if (N->getOpcode() == ISD::AND) {
14084+
AArch64CC::CondCode InvCC0 = AArch64CC::getInvertedCondCode(CC0);
14085+
SDValue Condition = DAG.getConstant(InvCC0, DL, MVT_CC);
14086+
unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(CC1);
14087+
SDValue NZCVOp = DAG.getConstant(NZCV, DL, MVT::i32);
14088+
CCmp = DAG.getNode(AArch64ISD::CCMP, DL, MVT_CC, Cmp1.getOperand(0),
14089+
Cmp1.getOperand(1), NZCVOp, Condition, Cmp0);
14090+
} else {
14091+
SDLoc DL(N);
14092+
AArch64CC::CondCode InvCC1 = AArch64CC::getInvertedCondCode(CC1);
14093+
SDValue Condition = DAG.getConstant(CC0, DL, MVT_CC);
14094+
unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(InvCC1);
14095+
SDValue NZCVOp = DAG.getConstant(NZCV, DL, MVT::i32);
14096+
CCmp = DAG.getNode(AArch64ISD::CCMP, DL, MVT_CC, Cmp1.getOperand(0),
14097+
Cmp1.getOperand(1), NZCVOp, Condition, Cmp0);
14098+
}
14099+
return DAG.getNode(AArch64ISD::CSEL, DL, VT, CSel0.getOperand(0),
14100+
CSel0.getOperand(1), DAG.getConstant(CC1, DL, MVT::i32),
14101+
CCmp);
14102+
}
14103+
1403714104
static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
1403814105
const AArch64Subtarget *Subtarget) {
14039-
// Attempt to form an EXTR from (or (shl VAL1, #N), (srl VAL2, #RegWidth-N))
1404014106
SelectionDAG &DAG = DCI.DAG;
1404114107
EVT VT = N->getValueType(0);
1404214108

14109+
if (SDValue R = performANDORCSELCombine(N, DAG))
14110+
return R;
14111+
1404314112
if (!DAG.getTargetLoweringInfo().isTypeLegal(VT))
1404414113
return SDValue();
1404514114

14115+
// Attempt to form an EXTR from (or (shl VAL1, #N), (srl VAL2, #RegWidth-N))
1404614116
if (SDValue Res = tryCombineToEXTR(N, DCI))
1404714117
return Res;
1404814118

@@ -14171,60 +14241,13 @@ static SDValue performSVEAndCombine(SDNode *N,
1417114241
return SDValue();
1417214242
}
1417314243

14174-
// Given a tree of and(csel(0, 1, cc0), csel(0, 1, cc1)), we may be able to
14175-
// convert to csel(ccmp(.., cc0)), depending on cc1.
14176-
static SDValue PerformANDCSELCombine(SDNode *N, SelectionDAG &DAG) {
14177-
EVT VT = N->getValueType(0);
14178-
SDValue CSel0 = N->getOperand(0);
14179-
SDValue CSel1 = N->getOperand(1);
14180-
14181-
if (CSel0.getOpcode() != AArch64ISD::CSEL ||
14182-
CSel1.getOpcode() != AArch64ISD::CSEL)
14183-
return SDValue();
14184-
14185-
if (!CSel0->hasOneUse() || !CSel1->hasOneUse())
14186-
return SDValue();
14187-
14188-
if (!isNullConstant(CSel0.getOperand(0)) ||
14189-
!isOneConstant(CSel0.getOperand(1)) ||
14190-
!isNullConstant(CSel1.getOperand(0)) ||
14191-
!isOneConstant(CSel1.getOperand(1)))
14192-
return SDValue();
14193-
14194-
SDValue Cmp0 = CSel0.getOperand(3);
14195-
SDValue Cmp1 = CSel1.getOperand(3);
14196-
AArch64CC::CondCode CC0 = (AArch64CC::CondCode)CSel0.getConstantOperandVal(2);
14197-
AArch64CC::CondCode CC1 = (AArch64CC::CondCode)CSel1.getConstantOperandVal(2);
14198-
if (!Cmp0->hasOneUse() || !Cmp1->hasOneUse())
14199-
return SDValue();
14200-
if (Cmp1.getOpcode() != AArch64ISD::SUBS &&
14201-
Cmp0.getOpcode() == AArch64ISD::SUBS) {
14202-
std::swap(Cmp0, Cmp1);
14203-
std::swap(CC0, CC1);
14204-
}
14205-
14206-
if (Cmp1.getOpcode() != AArch64ISD::SUBS)
14207-
return SDValue();
14208-
14209-
SDLoc DL(N);
14210-
AArch64CC::CondCode InvCC0 = AArch64CC::getInvertedCondCode(CC0);
14211-
SDValue Condition = DAG.getConstant(InvCC0, DL, MVT_CC);
14212-
unsigned NZCV = AArch64CC::getNZCVToSatisfyCondCode(CC1);
14213-
SDValue NZCVOp = DAG.getConstant(NZCV, DL, MVT::i32);
14214-
SDValue CCmp = DAG.getNode(AArch64ISD::CCMP, DL, MVT_CC, Cmp1.getOperand(0),
14215-
Cmp1.getOperand(1), NZCVOp, Condition, Cmp0);
14216-
return DAG.getNode(AArch64ISD::CSEL, DL, VT, CSel0.getOperand(0),
14217-
CSel0.getOperand(1), DAG.getConstant(CC1, DL, MVT::i32),
14218-
CCmp);
14219-
}
14220-
1422114244
static SDValue performANDCombine(SDNode *N,
1422214245
TargetLowering::DAGCombinerInfo &DCI) {
1422314246
SelectionDAG &DAG = DCI.DAG;
1422414247
SDValue LHS = N->getOperand(0);
1422514248
EVT VT = N->getValueType(0);
1422614249

14227-
if (SDValue R = PerformANDCSELCombine(N, DAG))
14250+
if (SDValue R = performANDORCSELCombine(N, DAG))
1422814251
return R;
1422914252

1423014253
if (!VT.isVector() || !DAG.getTargetLoweringInfo().isTypeLegal(VT))

llvm/test/CodeGen/AArch64/arm64-ccmp.ll

Lines changed: 10 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -754,16 +754,12 @@ define i64 @select_noccmp1(i64 %v1, i64 %v2, i64 %v3, i64 %r) {
754754

755755
@g = global i32 0
756756

757-
; Should not use ccmp if we have to compute the or expression in an integer
758-
; register anyway because of other users.
759757
define i64 @select_noccmp2(i64 %v1, i64 %v2, i64 %v3, i64 %r) {
760758
; CHECK-LABEL: select_noccmp2:
761759
; CHECK: ; %bb.0:
762760
; CHECK-NEXT: cmp x0, #0
763-
; CHECK-NEXT: cset w8, lt
764-
; CHECK-NEXT: cmp x0, #13
765-
; CHECK-NEXT: cset w9, gt
766-
; CHECK-NEXT: orr w8, w8, w9
761+
; CHECK-NEXT: ccmp x0, #13, #0, ge
762+
; CHECK-NEXT: cset w8, gt
767763
; CHECK-NEXT: cmp w8, #0
768764
; CHECK-NEXT: csel x0, xzr, x3, ne
769765
; CHECK-NEXT: sbfx w8, w8, #0, #1
@@ -799,21 +795,17 @@ define i32 @select_noccmp3(i32 %v0, i32 %v1, i32 %v2) {
799795
; CHECK-LABEL: select_noccmp3:
800796
; CHECK: ; %bb.0:
801797
; CHECK-NEXT: cmp w0, #0
802-
; CHECK-NEXT: cset w8, lt
803-
; CHECK-NEXT: cmp w0, #13
804-
; CHECK-NEXT: cset w9, gt
798+
; CHECK-NEXT: ccmp w0, #13, #0, ge
799+
; CHECK-NEXT: cset w8, gt
805800
; CHECK-NEXT: cmp w0, #22
806-
; CHECK-NEXT: cset w10, lt
807-
; CHECK-NEXT: cmp w0, #44
808-
; CHECK-NEXT: cset w11, gt
801+
; CHECK-NEXT: mov w9, #44
802+
; CHECK-NEXT: ccmp w0, w9, #0, ge
803+
; CHECK-NEXT: cset w9, gt
809804
; CHECK-NEXT: cmp w0, #99
810-
; CHECK-NEXT: cset w12, eq
811-
; CHECK-NEXT: cmp w0, #77
812-
; CHECK-NEXT: cset w13, eq
813-
; CHECK-NEXT: orr w8, w8, w9
814-
; CHECK-NEXT: orr w9, w10, w11
815805
; CHECK-NEXT: and w8, w8, w9
816-
; CHECK-NEXT: orr w9, w12, w13
806+
; CHECK-NEXT: mov w9, #77
807+
; CHECK-NEXT: ccmp w0, w9, #4, ne
808+
; CHECK-NEXT: cset w9, eq
817809
; CHECK-NEXT: tst w8, w9
818810
; CHECK-NEXT: csel w0, w1, w2, ne
819811
; CHECK-NEXT: ret

llvm/test/CodeGen/AArch64/arm64-fp128.ll

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -257,13 +257,12 @@ define dso_local i1 @test_setcc3() {
257257
; CHECK-NEXT: ldr q1, [x8, :lo12:rhs]
258258
; CHECK-NEXT: stp q1, q0, [sp] // 32-byte Folded Spill
259259
; CHECK-NEXT: bl __eqtf2
260-
; CHECK-NEXT: cmp w0, #0
261-
; CHECK-NEXT: cset w19, eq
260+
; CHECK-NEXT: mov x19, x0
262261
; CHECK-NEXT: ldp q1, q0, [sp] // 32-byte Folded Reload
263262
; CHECK-NEXT: bl __unordtf2
264263
; CHECK-NEXT: cmp w0, #0
265-
; CHECK-NEXT: cset w8, ne
266-
; CHECK-NEXT: orr w0, w8, w19
264+
; CHECK-NEXT: ccmp w19, #0, #4, eq
265+
; CHECK-NEXT: cset w0, eq
267266
; CHECK-NEXT: ldp x30, x19, [sp, #32] // 16-byte Folded Reload
268267
; CHECK-NEXT: add sp, sp, #48
269268
; CHECK-NEXT: ret
Lines changed: 145 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,145 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s
3+
4+
; Ensure chains of comparisons produce chains of `ccmp`
5+
6+
; (x0 < x1) && (x2 > x3)
7+
define i32 @cmp_and2(i32 %0, i32 %1, i32 %2, i32 %3) {
8+
; CHECK-LABEL: cmp_and2:
9+
; CHECK: // %bb.0:
10+
; CHECK-NEXT: cmp w0, w1
11+
; CHECK-NEXT: ccmp w2, w3, #0, lo
12+
; CHECK-NEXT: cset w0, hi
13+
; CHECK-NEXT: ret
14+
%5 = icmp ult i32 %0, %1
15+
%6 = icmp ugt i32 %2, %3
16+
%7 = select i1 %5, i1 %6, i1 false
17+
%8 = zext i1 %7 to i32
18+
ret i32 %8
19+
}
20+
21+
; (x0 < x1) && (x2 > x3) && (x4 != x5)
22+
define i32 @cmp_and3(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5) {
23+
; CHECK-LABEL: cmp_and3:
24+
; CHECK: // %bb.0:
25+
; CHECK-NEXT: cmp w0, w1
26+
; CHECK-NEXT: ccmp w2, w3, #0, lo
27+
; CHECK-NEXT: ccmp w4, w5, #4, hi
28+
; CHECK-NEXT: cset w0, ne
29+
; CHECK-NEXT: ret
30+
%7 = icmp ult i32 %0, %1
31+
%8 = icmp ugt i32 %2, %3
32+
%9 = select i1 %7, i1 %8, i1 false
33+
%10 = icmp ne i32 %4, %5
34+
%11 = select i1 %9, i1 %10, i1 false
35+
%12 = zext i1 %11 to i32
36+
ret i32 %12
37+
}
38+
39+
; (x0 < x1) && (x2 > x3) && (x4 != x5) && (x6 == x7)
40+
define i32 @cmp_and4(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7) {
41+
; CHECK-LABEL: cmp_and4:
42+
; CHECK: // %bb.0:
43+
; CHECK-NEXT: cmp w2, w3
44+
; CHECK-NEXT: ccmp w0, w1, #2, hi
45+
; CHECK-NEXT: ccmp w4, w5, #4, lo
46+
; CHECK-NEXT: ccmp w6, w7, #0, ne
47+
; CHECK-NEXT: cset w0, eq
48+
; CHECK-NEXT: ret
49+
%9 = icmp ugt i32 %2, %3
50+
%10 = icmp ult i32 %0, %1
51+
%11 = select i1 %9, i1 %10, i1 false
52+
%12 = icmp ne i32 %4, %5
53+
%13 = select i1 %11, i1 %12, i1 false
54+
%14 = icmp eq i32 %6, %7
55+
%15 = select i1 %13, i1 %14, i1 false
56+
%16 = zext i1 %15 to i32
57+
ret i32 %16
58+
}
59+
60+
; (x0 < x1) || (x2 > x3)
61+
define i32 @cmp_or2(i32 %0, i32 %1, i32 %2, i32 %3) {
62+
; CHECK-LABEL: cmp_or2:
63+
; CHECK: // %bb.0:
64+
; CHECK-NEXT: cmp w0, w1
65+
; CHECK-NEXT: ccmp w2, w3, #0, hs
66+
; CHECK-NEXT: cset w0, ne
67+
; CHECK-NEXT: ret
68+
%5 = icmp ult i32 %0, %1
69+
%6 = icmp ne i32 %2, %3
70+
%7 = select i1 %5, i1 true, i1 %6
71+
%8 = zext i1 %7 to i32
72+
ret i32 %8
73+
}
74+
75+
; (x0 < x1) || (x2 > x3) || (x4 != x5)
76+
define i32 @cmp_or3(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5) {
77+
; CHECK-LABEL: cmp_or3:
78+
; CHECK: // %bb.0:
79+
; CHECK-NEXT: cmp w0, w1
80+
; CHECK-NEXT: ccmp w2, w3, #2, hs
81+
; CHECK-NEXT: ccmp w4, w5, #0, ls
82+
; CHECK-NEXT: cset w0, ne
83+
; CHECK-NEXT: ret
84+
%7 = icmp ult i32 %0, %1
85+
%8 = icmp ugt i32 %2, %3
86+
%9 = select i1 %7, i1 true, i1 %8
87+
%10 = icmp ne i32 %4, %5
88+
%11 = select i1 %9, i1 true, i1 %10
89+
%12 = zext i1 %11 to i32
90+
ret i32 %12
91+
}
92+
93+
; (x0 < x1) || (x2 > x3) || (x4 != x5) || (x6 == x7)
94+
define i32 @cmp_or4(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7) {
95+
; CHECK-LABEL: cmp_or4:
96+
; CHECK: // %bb.0:
97+
; CHECK-NEXT: cmp w0, w1
98+
; CHECK-NEXT: ccmp w2, w3, #2, hs
99+
; CHECK-NEXT: ccmp w4, w5, #0, ls
100+
; CHECK-NEXT: ccmp w6, w7, #4, eq
101+
; CHECK-NEXT: cset w0, eq
102+
; CHECK-NEXT: ret
103+
%9 = icmp ult i32 %0, %1
104+
%10 = icmp ugt i32 %2, %3
105+
%11 = select i1 %9, i1 true, i1 %10
106+
%12 = icmp ne i32 %4, %5
107+
%13 = select i1 %11, i1 true, i1 %12
108+
%14 = icmp eq i32 %6, %7
109+
%15 = select i1 %13, i1 true, i1 %14
110+
%16 = zext i1 %15 to i32
111+
ret i32 %16
112+
}
113+
114+
; (x0 != 0) || (x1 != 0)
115+
define i32 @true_or2(i32 %0, i32 %1) {
116+
; CHECK-LABEL: true_or2:
117+
; CHECK: // %bb.0:
118+
; CHECK-NEXT: orr w8, w0, w1
119+
; CHECK-NEXT: cmp w8, #0
120+
; CHECK-NEXT: cset w0, ne
121+
; CHECK-NEXT: ret
122+
%3 = icmp ne i32 %0, 0
123+
%4 = icmp ne i32 %1, 0
124+
%5 = select i1 %3, i1 true, i1 %4
125+
%6 = zext i1 %5 to i32
126+
ret i32 %6
127+
}
128+
129+
; (x0 != 0) || (x1 != 0) || (x2 != 0)
130+
define i32 @true_or3(i32 %0, i32 %1, i32 %2) {
131+
; CHECK-LABEL: true_or3:
132+
; CHECK: // %bb.0:
133+
; CHECK-NEXT: orr w8, w0, w1
134+
; CHECK-NEXT: orr w8, w8, w2
135+
; CHECK-NEXT: cmp w8, #0
136+
; CHECK-NEXT: cset w0, ne
137+
; CHECK-NEXT: ret
138+
%4 = icmp ne i32 %0, 0
139+
%5 = icmp ne i32 %1, 0
140+
%6 = select i1 %4, i1 true, i1 %5
141+
%7 = icmp ne i32 %2, 0
142+
%8 = select i1 %6, i1 true, i1 %7
143+
%9 = zext i1 %8 to i32
144+
ret i32 %9
145+
}

llvm/test/CodeGen/AArch64/select-with-and-or.ll

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,8 @@ define i1 @or(i32 %x, i32 %y, i32 %z, i32 %w) {
1818
; CHECK-LABEL: or:
1919
; CHECK: // %bb.0:
2020
; CHECK-NEXT: cmp w0, w1
21-
; CHECK-NEXT: cset w8, eq
22-
; CHECK-NEXT: cmp w2, w3
23-
; CHECK-NEXT: cset w9, gt
24-
; CHECK-NEXT: orr w0, w8, w9
21+
; CHECK-NEXT: ccmp w2, w3, #0, ne
22+
; CHECK-NEXT: cset w0, gt
2523
; CHECK-NEXT: ret
2624
%a = icmp eq i32 %x, %y
2725
%b = icmp sgt i32 %z, %w
@@ -46,10 +44,8 @@ define i1 @or_not(i32 %x, i32 %y, i32 %z, i32 %w) {
4644
; CHECK-LABEL: or_not:
4745
; CHECK: // %bb.0:
4846
; CHECK-NEXT: cmp w0, w1
49-
; CHECK-NEXT: cset w8, ne
50-
; CHECK-NEXT: cmp w2, w3
51-
; CHECK-NEXT: cset w9, gt
52-
; CHECK-NEXT: orr w0, w8, w9
47+
; CHECK-NEXT: ccmp w2, w3, #0, eq
48+
; CHECK-NEXT: cset w0, gt
5349
; CHECK-NEXT: ret
5450
%a = icmp eq i32 %x, %y
5551
%b = icmp sgt i32 %z, %w

0 commit comments

Comments
 (0)