Skip to content

Commit 0415253

Browse files
committed
[AArch64] Add support for unsigned comparisons
We have to be extra careful to not allow unsigned wraps, however. This also required some adjusting of the logic in adjustCmp, as well as compare the true imm value with add or sub taken into effect. Because SIGNED_MIN and SIGNED_MAX cannot be an immediate, we do not need to worry about those edge cases when dealing with unsigned comparisons.
1 parent 4845543 commit 0415253

File tree

2 files changed

+87
-48
lines changed

2 files changed

+87
-48
lines changed

llvm/lib/Target/AArch64/AArch64ConditionOptimizer.cpp

Lines changed: 66 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -227,26 +227,43 @@ static int getComplementOpc(int Opc) {
227227
// Changes form of comparison inclusive <-> exclusive.
228228
static AArch64CC::CondCode getAdjustedCmp(AArch64CC::CondCode Cmp) {
229229
switch (Cmp) {
230-
case AArch64CC::GT: return AArch64CC::GE;
231-
case AArch64CC::GE: return AArch64CC::GT;
232-
case AArch64CC::LT: return AArch64CC::LE;
233-
case AArch64CC::LE: return AArch64CC::LT;
230+
case AArch64CC::GT:
231+
return AArch64CC::GE;
232+
case AArch64CC::GE:
233+
return AArch64CC::GT;
234+
case AArch64CC::LT:
235+
return AArch64CC::LE;
236+
case AArch64CC::LE:
237+
return AArch64CC::LT;
238+
case AArch64CC::HI:
239+
return AArch64CC::HS;
240+
case AArch64CC::HS:
241+
return AArch64CC::HI;
242+
case AArch64CC::LO:
243+
return AArch64CC::LS;
244+
case AArch64CC::LS:
245+
return AArch64CC::LO;
234246
default:
235247
llvm_unreachable("Unexpected condition code");
236248
}
237249
}
238250

239251
// Transforms GT -> GE, GE -> GT, LT -> LE, LE -> LT by updating comparison
240252
// operator and condition code.
241-
AArch64ConditionOptimizer::CmpInfo AArch64ConditionOptimizer::adjustCmp(
242-
MachineInstr *CmpMI, AArch64CC::CondCode Cmp) {
253+
AArch64ConditionOptimizer::CmpInfo
254+
AArch64ConditionOptimizer::adjustCmp(MachineInstr *CmpMI,
255+
AArch64CC::CondCode Cmp) {
243256
unsigned Opc = CmpMI->getOpcode();
257+
unsigned OldOpc = Opc;
258+
259+
bool isSigned = Cmp == AArch64CC::GT || Cmp == AArch64CC::GE ||
260+
Cmp == AArch64CC::LT || Cmp == AArch64CC::LE;
244261

245262
// CMN (compare with negative immediate) is an alias to ADDS (as
246263
// "operand - negative" == "operand + positive")
247264
bool Negative = (Opc == AArch64::ADDSWri || Opc == AArch64::ADDSXri);
248265

249-
int Correction = (Cmp == AArch64CC::GT) ? 1 : -1;
266+
int Correction = (Cmp == AArch64CC::GT || Cmp == AArch64CC::HI) ? 1 : -1;
250267
// Negate Correction value for comparison with negative immediate (CMN).
251268
if (Negative) {
252269
Correction = -Correction;
@@ -255,13 +272,23 @@ AArch64ConditionOptimizer::CmpInfo AArch64ConditionOptimizer::adjustCmp(
255272
const int OldImm = (int)CmpMI->getOperand(2).getImm();
256273
const int NewImm = std::abs(OldImm + Correction);
257274

258-
// Handle +0 -> -1 and -0 -> +1 (CMN with 0 immediate) transitions by
259-
// adjusting compare instruction opcode.
260-
if (OldImm == 0 && ((Negative && Correction == 1) ||
261-
(!Negative && Correction == -1))) {
275+
// Handle cmn 1 -> cmp 0, transitions by adjusting compare instruction opcode.
276+
if (OldImm == 1 && Negative && Correction == -1) {
277+
// If we are adjusting from -1 to 0, we need to change the opcode.
278+
Opc = getComplementOpc(Opc);
279+
}
280+
281+
// Handle +0 -> -1 transitions by adjusting compare instruction opcode.
282+
assert((OldImm != 0 || !Negative) && "Should not encounter cmn 0!");
283+
if (OldImm == 0 && Correction == -1) {
262284
Opc = getComplementOpc(Opc);
263285
}
264286

287+
// If we change opcodes, this means we did an unsigned wrap, so return the old
288+
// cmp.
289+
if (!isSigned && Opc != OldOpc)
290+
return CmpInfo(OldImm, OldOpc, Cmp);
291+
265292
return CmpInfo(NewImm, Opc, getAdjustedCmp(Cmp));
266293
}
267294

@@ -323,6 +350,14 @@ bool AArch64ConditionOptimizer::adjustTo(MachineInstr *CmpMI,
323350
return false;
324351
}
325352

353+
static bool isGreaterThan(AArch64CC::CondCode Cmp) {
354+
return Cmp == AArch64CC::GT || Cmp == AArch64CC::HI;
355+
}
356+
357+
static bool isLessThan(AArch64CC::CondCode Cmp) {
358+
return Cmp == AArch64CC::LT || Cmp == AArch64CC::LO;
359+
}
360+
326361
bool AArch64ConditionOptimizer::runOnMachineFunction(MachineFunction &MF) {
327362
LLVM_DEBUG(dbgs() << "********** AArch64 Conditional Compares **********\n"
328363
<< "********** Function: " << MF.getName() << '\n');
@@ -383,6 +418,9 @@ bool AArch64ConditionOptimizer::runOnMachineFunction(MachineFunction &MF) {
383418
const int HeadImm = (int)HeadCmpMI->getOperand(2).getImm();
384419
const int TrueImm = (int)TrueCmpMI->getOperand(2).getImm();
385420

421+
int HeadImmTrueValue = HeadImm;
422+
int TrueImmTrueValue = TrueImm;
423+
386424
LLVM_DEBUG(dbgs() << "Head branch:\n");
387425
LLVM_DEBUG(dbgs() << "\tcondition: " << AArch64CC::getCondCodeName(HeadCmp)
388426
<< '\n');
@@ -393,9 +431,17 @@ bool AArch64ConditionOptimizer::runOnMachineFunction(MachineFunction &MF) {
393431
<< '\n');
394432
LLVM_DEBUG(dbgs() << "\timmediate: " << TrueImm << '\n');
395433

396-
if (((HeadCmp == AArch64CC::GT && TrueCmp == AArch64CC::LT) ||
397-
(HeadCmp == AArch64CC::LT && TrueCmp == AArch64CC::GT)) &&
398-
std::abs(TrueImm - HeadImm) == 2) {
434+
unsigned Opc = HeadCmpMI->getOpcode();
435+
if (Opc == AArch64::ADDSWri || Opc == AArch64::ADDSXri)
436+
HeadImmTrueValue = -HeadImmTrueValue;
437+
438+
Opc = TrueCmpMI->getOpcode();
439+
if (Opc == AArch64::ADDSWri || Opc == AArch64::ADDSXri)
440+
TrueImmTrueValue = -TrueImmTrueValue;
441+
442+
if (((isGreaterThan(HeadCmp) && isLessThan(TrueCmp)) ||
443+
(isLessThan(HeadCmp) && isGreaterThan(TrueCmp))) &&
444+
std::abs(TrueImmTrueValue - HeadImmTrueValue) == 2) {
399445
// This branch transforms machine instructions that correspond to
400446
//
401447
// 1) (a > {TrueImm} && ...) || (a < {HeadImm} && ...)
@@ -414,9 +460,9 @@ bool AArch64ConditionOptimizer::runOnMachineFunction(MachineFunction &MF) {
414460
modifyCmp(TrueCmpMI, TrueCmpInfo);
415461
Changed = true;
416462
}
417-
} else if (((HeadCmp == AArch64CC::GT && TrueCmp == AArch64CC::GT) ||
418-
(HeadCmp == AArch64CC::LT && TrueCmp == AArch64CC::LT)) &&
419-
std::abs(TrueImm - HeadImm) == 1) {
463+
} else if (((isGreaterThan(HeadCmp) && isGreaterThan(TrueCmp)) ||
464+
(isLessThan(HeadCmp) && isLessThan(TrueCmp))) &&
465+
std::abs(TrueImmTrueValue - HeadImmTrueValue) == 1) {
420466
// This branch transforms machine instructions that correspond to
421467
//
422468
// 1) (a > {TrueImm} && ...) || (a > {HeadImm} && ...)
@@ -429,9 +475,9 @@ bool AArch64ConditionOptimizer::runOnMachineFunction(MachineFunction &MF) {
429475

430476
// GT -> GE transformation increases immediate value, so picking the
431477
// smaller one; LT -> LE decreases immediate value so invert the choice.
432-
bool adjustHeadCond = (HeadImm < TrueImm);
433-
if (HeadCmp == AArch64CC::LT) {
434-
adjustHeadCond = !adjustHeadCond;
478+
bool adjustHeadCond = (HeadImmTrueValue < TrueImmTrueValue);
479+
if (isLessThan(HeadCmp)) {
480+
adjustHeadCond = !adjustHeadCond;
435481
}
436482

437483
if (adjustHeadCond) {

llvm/test/CodeGen/AArch64/combine-comparisons-by-cse.ll

Lines changed: 21 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -851,11 +851,11 @@ define i32 @combine_ugt_uge_10() #0 {
851851
; CHECK: // %bb.0: // %entry
852852
; CHECK-NEXT: adrp x8, :got:a
853853
; CHECK-NEXT: ldr x8, [x8, :got_lo12:a]
854-
; CHECK-NEXT: ldr w9, [x8]
854+
; CHECK-NEXT: ldr w8, [x8]
855+
; CHECK-NEXT: cmp w8, #10
855856
; CHECK-NEXT: adrp x8, :got:b
856857
; CHECK-NEXT: ldr x8, [x8, :got_lo12:b]
857-
; CHECK-NEXT: cmp w9, #11
858-
; CHECK-NEXT: b.lo .LBB12_3
858+
; CHECK-NEXT: b.ls .LBB12_3
859859
; CHECK-NEXT: // %bb.1: // %land.lhs.true
860860
; CHECK-NEXT: adrp x9, :got:c
861861
; CHECK-NEXT: ldr x9, [x9, :got_lo12:c]
@@ -867,7 +867,6 @@ define i32 @combine_ugt_uge_10() #0 {
867867
; CHECK-NEXT: mov w0, #1 // =0x1
868868
; CHECK-NEXT: ret
869869
; CHECK-NEXT: .LBB12_3: // %lor.lhs.false
870-
; CHECK-NEXT: cmp w9, #10
871870
; CHECK-NEXT: b.lo .LBB12_6
872871
; CHECK-NEXT: .LBB12_4: // %land.lhs.true3
873872
; CHECK-NEXT: adrp x9, :got:d
@@ -918,8 +917,8 @@ define i32 @combine_ugt_ult_5() #0 {
918917
; CHECK-NEXT: adrp x8, :got:a
919918
; CHECK-NEXT: ldr x8, [x8, :got_lo12:a]
920919
; CHECK-NEXT: ldr w8, [x8]
921-
; CHECK-NEXT: cmp w8, #6
922-
; CHECK-NEXT: b.lo .LBB13_3
920+
; CHECK-NEXT: cmp w8, #5
921+
; CHECK-NEXT: b.ls .LBB13_3
923922
; CHECK-NEXT: // %bb.1: // %land.lhs.true
924923
; CHECK-NEXT: adrp x8, :got:b
925924
; CHECK-NEXT: adrp x9, :got:c
@@ -933,8 +932,7 @@ define i32 @combine_ugt_ult_5() #0 {
933932
; CHECK-NEXT: mov w0, #1 // =0x1
934933
; CHECK-NEXT: ret
935934
; CHECK-NEXT: .LBB13_3: // %lor.lhs.false
936-
; CHECK-NEXT: cmp w8, #4
937-
; CHECK-NEXT: b.hi .LBB13_6
935+
; CHECK-NEXT: b.hs .LBB13_6
938936
; CHECK-NEXT: // %bb.4: // %land.lhs.true3
939937
; CHECK-NEXT: adrp x8, :got:b
940938
; CHECK-NEXT: adrp x9, :got:d
@@ -985,11 +983,11 @@ define i32 @combine_ult_uge_5() #0 {
985983
; CHECK: // %bb.0: // %entry
986984
; CHECK-NEXT: adrp x8, :got:a
987985
; CHECK-NEXT: ldr x8, [x8, :got_lo12:a]
988-
; CHECK-NEXT: ldr w9, [x8]
986+
; CHECK-NEXT: ldr w8, [x8]
987+
; CHECK-NEXT: cmp w8, #5
989988
; CHECK-NEXT: adrp x8, :got:b
990989
; CHECK-NEXT: ldr x8, [x8, :got_lo12:b]
991-
; CHECK-NEXT: cmp w9, #4
992-
; CHECK-NEXT: b.hi .LBB14_3
990+
; CHECK-NEXT: b.hs .LBB14_3
993991
; CHECK-NEXT: // %bb.1: // %land.lhs.true
994992
; CHECK-NEXT: adrp x9, :got:c
995993
; CHECK-NEXT: ldr x9, [x9, :got_lo12:c]
@@ -1001,7 +999,6 @@ define i32 @combine_ult_uge_5() #0 {
1001999
; CHECK-NEXT: mov w0, #1 // =0x1
10021000
; CHECK-NEXT: ret
10031001
; CHECK-NEXT: .LBB14_3: // %lor.lhs.false
1004-
; CHECK-NEXT: cmp w9, #5
10051002
; CHECK-NEXT: b.hi .LBB14_6
10061003
; CHECK-NEXT: .LBB14_4: // %land.lhs.true3
10071004
; CHECK-NEXT: adrp x9, :got:d
@@ -1052,8 +1049,8 @@ define i32 @combine_ult_ugt_5() #0 {
10521049
; CHECK-NEXT: adrp x8, :got:a
10531050
; CHECK-NEXT: ldr x8, [x8, :got_lo12:a]
10541051
; CHECK-NEXT: ldr w8, [x8]
1055-
; CHECK-NEXT: cmp w8, #4
1056-
; CHECK-NEXT: b.hi .LBB15_3
1052+
; CHECK-NEXT: cmp w8, #5
1053+
; CHECK-NEXT: b.hs .LBB15_3
10571054
; CHECK-NEXT: // %bb.1: // %land.lhs.true
10581055
; CHECK-NEXT: adrp x8, :got:b
10591056
; CHECK-NEXT: adrp x9, :got:c
@@ -1067,8 +1064,7 @@ define i32 @combine_ult_ugt_5() #0 {
10671064
; CHECK-NEXT: mov w0, #1 // =0x1
10681065
; CHECK-NEXT: ret
10691066
; CHECK-NEXT: .LBB15_3: // %lor.lhs.false
1070-
; CHECK-NEXT: cmp w8, #6
1071-
; CHECK-NEXT: b.lo .LBB15_6
1067+
; CHECK-NEXT: b.ls .LBB15_6
10721068
; CHECK-NEXT: // %bb.4: // %land.lhs.true3
10731069
; CHECK-NEXT: adrp x8, :got:b
10741070
; CHECK-NEXT: adrp x9, :got:d
@@ -1120,8 +1116,8 @@ define i32 @combine_ugt_ult_n5() #0 {
11201116
; CHECK-NEXT: adrp x8, :got:a
11211117
; CHECK-NEXT: ldr x8, [x8, :got_lo12:a]
11221118
; CHECK-NEXT: ldr w8, [x8]
1123-
; CHECK-NEXT: cmn w8, #4
1124-
; CHECK-NEXT: b.lo .LBB16_3
1119+
; CHECK-NEXT: cmn w8, #5
1120+
; CHECK-NEXT: b.ls .LBB16_3
11251121
; CHECK-NEXT: // %bb.1: // %land.lhs.true
11261122
; CHECK-NEXT: adrp x8, :got:b
11271123
; CHECK-NEXT: adrp x9, :got:c
@@ -1135,8 +1131,7 @@ define i32 @combine_ugt_ult_n5() #0 {
11351131
; CHECK-NEXT: mov w0, #1 // =0x1
11361132
; CHECK-NEXT: ret
11371133
; CHECK-NEXT: .LBB16_3: // %lor.lhs.false
1138-
; CHECK-NEXT: cmn w8, #6
1139-
; CHECK-NEXT: b.hi .LBB16_6
1134+
; CHECK-NEXT: b.hs .LBB16_6
11401135
; CHECK-NEXT: // %bb.4: // %land.lhs.true3
11411136
; CHECK-NEXT: adrp x8, :got:b
11421137
; CHECK-NEXT: adrp x9, :got:d
@@ -1188,8 +1183,8 @@ define i32 @combine_ult_ugt_n5() #0 {
11881183
; CHECK-NEXT: adrp x8, :got:a
11891184
; CHECK-NEXT: ldr x8, [x8, :got_lo12:a]
11901185
; CHECK-NEXT: ldr w8, [x8]
1191-
; CHECK-NEXT: cmn w8, #6
1192-
; CHECK-NEXT: b.hi .LBB17_3
1186+
; CHECK-NEXT: cmn w8, #5
1187+
; CHECK-NEXT: b.hs .LBB17_3
11931188
; CHECK-NEXT: // %bb.1: // %land.lhs.true
11941189
; CHECK-NEXT: adrp x8, :got:b
11951190
; CHECK-NEXT: adrp x9, :got:c
@@ -1203,8 +1198,7 @@ define i32 @combine_ult_ugt_n5() #0 {
12031198
; CHECK-NEXT: mov w0, #1 // =0x1
12041199
; CHECK-NEXT: ret
12051200
; CHECK-NEXT: .LBB17_3: // %lor.lhs.false
1206-
; CHECK-NEXT: cmn w8, #4
1207-
; CHECK-NEXT: b.lo .LBB17_6
1201+
; CHECK-NEXT: b.ls .LBB17_6
12081202
; CHECK-NEXT: // %bb.4: // %land.lhs.true3
12091203
; CHECK-NEXT: adrp x8, :got:b
12101204
; CHECK-NEXT: adrp x9, :got:d
@@ -1257,8 +1251,8 @@ define i32 @combine_ult_gt_n5() #0 {
12571251
; CHECK-NEXT: adrp x8, :got:a
12581252
; CHECK-NEXT: ldr x8, [x8, :got_lo12:a]
12591253
; CHECK-NEXT: ldr w8, [x8]
1260-
; CHECK-NEXT: cmn w8, #6
1261-
; CHECK-NEXT: b.hi .LBB18_3
1254+
; CHECK-NEXT: cmn w8, #5
1255+
; CHECK-NEXT: b.hs .LBB18_3
12621256
; CHECK-NEXT: // %bb.1: // %land.lhs.true
12631257
; CHECK-NEXT: adrp x8, :got:b
12641258
; CHECK-NEXT: adrp x9, :got:c
@@ -1272,8 +1266,7 @@ define i32 @combine_ult_gt_n5() #0 {
12721266
; CHECK-NEXT: mov w0, #1 // =0x1
12731267
; CHECK-NEXT: ret
12741268
; CHECK-NEXT: .LBB18_3: // %lor.lhs.false
1275-
; CHECK-NEXT: cmn w8, #4
1276-
; CHECK-NEXT: b.lt .LBB18_6
1269+
; CHECK-NEXT: b.le .LBB18_6
12771270
; CHECK-NEXT: // %bb.4: // %land.lhs.true3
12781271
; CHECK-NEXT: adrp x8, :got:b
12791272
; CHECK-NEXT: adrp x9, :got:d

0 commit comments

Comments
 (0)