Skip to content

Commit 4ca6a8b

Browse files
committed
[AArch64] Add support for unsigned comparisons
We have to be extra careful to not allow unsigned wraps, however. This also required some adjusting of the logic in adjustCmp, as well as compare the true imm value with add or sub taken into effect. Because SIGNED_MIN and SIGNED_MAX cannot be an immediate, we do not need to worry about those edge cases when dealing with unsigned comparisons.
1 parent 59088e7 commit 4ca6a8b

File tree

2 files changed

+75
-44
lines changed

2 files changed

+75
-44
lines changed

llvm/lib/Target/AArch64/AArch64ConditionOptimizer.cpp

Lines changed: 54 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -231,22 +231,31 @@ static AArch64CC::CondCode getAdjustedCmp(AArch64CC::CondCode Cmp) {
231231
case AArch64CC::GE: return AArch64CC::GT;
232232
case AArch64CC::LT: return AArch64CC::LE;
233233
case AArch64CC::LE: return AArch64CC::LT;
234+
case AArch64CC::HI: return AArch64CC::HS;
235+
case AArch64CC::HS: return AArch64CC::HI;
236+
case AArch64CC::LO: return AArch64CC::LS;
237+
case AArch64CC::LS: return AArch64CC::LO;
234238
default:
235239
llvm_unreachable("Unexpected condition code");
236240
}
237241
}
238242

239243
// Transforms GT -> GE, GE -> GT, LT -> LE, LE -> LT by updating comparison
240244
// operator and condition code.
241-
AArch64ConditionOptimizer::CmpInfo AArch64ConditionOptimizer::adjustCmp(
242-
MachineInstr *CmpMI, AArch64CC::CondCode Cmp) {
245+
AArch64ConditionOptimizer::CmpInfo
246+
AArch64ConditionOptimizer::adjustCmp(MachineInstr *CmpMI,
247+
AArch64CC::CondCode Cmp) {
243248
unsigned Opc = CmpMI->getOpcode();
249+
unsigned OldOpc = Opc;
250+
251+
bool isSigned = Cmp == AArch64CC::GT || Cmp == AArch64CC::GE ||
252+
Cmp == AArch64CC::LT || Cmp == AArch64CC::LE;
244253

245254
// CMN (compare with negative immediate) is an alias to ADDS (as
246255
// "operand - negative" == "operand + positive")
247256
bool Negative = (Opc == AArch64::ADDSWri || Opc == AArch64::ADDSXri);
248257

249-
int Correction = (Cmp == AArch64CC::GT) ? 1 : -1;
258+
int Correction = (Cmp == AArch64CC::GT || Cmp == AArch64CC::HI) ? 1 : -1;
250259
// Negate Correction value for comparison with negative immediate (CMN).
251260
if (Negative) {
252261
Correction = -Correction;
@@ -255,13 +264,23 @@ AArch64ConditionOptimizer::CmpInfo AArch64ConditionOptimizer::adjustCmp(
255264
const int OldImm = (int)CmpMI->getOperand(2).getImm();
256265
const int NewImm = std::abs(OldImm + Correction);
257266

258-
// Handle +0 -> -1 and -0 -> +1 (CMN with 0 immediate) transitions by
259-
// adjusting compare instruction opcode.
260-
if (OldImm == 0 && ((Negative && Correction == 1) ||
261-
(!Negative && Correction == -1))) {
267+
// Handle cmn 1 -> cmp 0, transitions by adjusting compare instruction opcode.
268+
if (OldImm == 1 && Negative && Correction == -1) {
269+
// If we are adjusting from -1 to 0, we need to change the opcode.
270+
Opc = getComplementOpc(Opc);
271+
}
272+
273+
// Handle +0 -> -1 transitions by adjusting compare instruction opcode.
274+
assert((OldImm != 0 || !Negative) && "Should not encounter cmn 0!");
275+
if (OldImm == 0 && Correction == -1) {
262276
Opc = getComplementOpc(Opc);
263277
}
264278

279+
// If we change opcodes, this means we did an unsigned wrap, so return the old
280+
// cmp.
281+
if (!isSigned && Opc != OldOpc)
282+
return CmpInfo(OldImm, OldOpc, Cmp);
283+
265284
return CmpInfo(NewImm, Opc, getAdjustedCmp(Cmp));
266285
}
267286

@@ -323,6 +342,14 @@ bool AArch64ConditionOptimizer::adjustTo(MachineInstr *CmpMI,
323342
return false;
324343
}
325344

345+
static bool isGreaterThan(AArch64CC::CondCode Cmp) {
346+
return Cmp == AArch64CC::GT || Cmp == AArch64CC::HI;
347+
}
348+
349+
static bool isLessThan(AArch64CC::CondCode Cmp) {
350+
return Cmp == AArch64CC::LT || Cmp == AArch64CC::LO;
351+
}
352+
326353
bool AArch64ConditionOptimizer::runOnMachineFunction(MachineFunction &MF) {
327354
LLVM_DEBUG(dbgs() << "********** AArch64 Conditional Compares **********\n"
328355
<< "********** Function: " << MF.getName() << '\n');
@@ -383,6 +410,9 @@ bool AArch64ConditionOptimizer::runOnMachineFunction(MachineFunction &MF) {
383410
const int HeadImm = (int)HeadCmpMI->getOperand(2).getImm();
384411
const int TrueImm = (int)TrueCmpMI->getOperand(2).getImm();
385412

413+
int HeadImmTrueValue = HeadImm;
414+
int TrueImmTrueValue = TrueImm;
415+
386416
LLVM_DEBUG(dbgs() << "Head branch:\n");
387417
LLVM_DEBUG(dbgs() << "\tcondition: " << AArch64CC::getCondCodeName(HeadCmp)
388418
<< '\n');
@@ -393,9 +423,17 @@ bool AArch64ConditionOptimizer::runOnMachineFunction(MachineFunction &MF) {
393423
<< '\n');
394424
LLVM_DEBUG(dbgs() << "\timmediate: " << TrueImm << '\n');
395425

396-
if (((HeadCmp == AArch64CC::GT && TrueCmp == AArch64CC::LT) ||
397-
(HeadCmp == AArch64CC::LT && TrueCmp == AArch64CC::GT)) &&
398-
std::abs(TrueImm - HeadImm) == 2) {
426+
unsigned Opc = HeadCmpMI->getOpcode();
427+
if (Opc == AArch64::ADDSWri || Opc == AArch64::ADDSXri)
428+
HeadImmTrueValue = -HeadImmTrueValue;
429+
430+
Opc = TrueCmpMI->getOpcode();
431+
if (Opc == AArch64::ADDSWri || Opc == AArch64::ADDSXri)
432+
TrueImmTrueValue = -TrueImmTrueValue;
433+
434+
if (((isGreaterThan(HeadCmp) && isLessThan(TrueCmp)) ||
435+
(isLessThan(HeadCmp) && isGreaterThan(TrueCmp))) &&
436+
std::abs(TrueImmTrueValue - HeadImmTrueValue) == 2) {
399437
// This branch transforms machine instructions that correspond to
400438
//
401439
// 1) (a > {TrueImm} && ...) || (a < {HeadImm} && ...)
@@ -414,9 +452,9 @@ bool AArch64ConditionOptimizer::runOnMachineFunction(MachineFunction &MF) {
414452
modifyCmp(TrueCmpMI, TrueCmpInfo);
415453
Changed = true;
416454
}
417-
} else if (((HeadCmp == AArch64CC::GT && TrueCmp == AArch64CC::GT) ||
418-
(HeadCmp == AArch64CC::LT && TrueCmp == AArch64CC::LT)) &&
419-
std::abs(TrueImm - HeadImm) == 1) {
455+
} else if (((isGreaterThan(HeadCmp) && isGreaterThan(TrueCmp)) ||
456+
(isLessThan(HeadCmp) && isLessThan(TrueCmp))) &&
457+
std::abs(TrueImmTrueValue - HeadImmTrueValue) == 1) {
420458
// This branch transforms machine instructions that correspond to
421459
//
422460
// 1) (a > {TrueImm} && ...) || (a > {HeadImm} && ...)
@@ -429,9 +467,9 @@ bool AArch64ConditionOptimizer::runOnMachineFunction(MachineFunction &MF) {
429467

430468
// GT -> GE transformation increases immediate value, so picking the
431469
// smaller one; LT -> LE decreases immediate value so invert the choice.
432-
bool adjustHeadCond = (HeadImm < TrueImm);
433-
if (HeadCmp == AArch64CC::LT) {
434-
adjustHeadCond = !adjustHeadCond;
470+
bool adjustHeadCond = (HeadImmTrueValue < TrueImmTrueValue);
471+
if (isLessThan(HeadCmp)) {
472+
adjustHeadCond = !adjustHeadCond;
435473
}
436474

437475
if (adjustHeadCond) {

llvm/test/CodeGen/AArch64/combine-comparisons-by-cse.ll

Lines changed: 21 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -851,11 +851,11 @@ define i32 @combine_ugt_uge_10() #0 {
851851
; CHECK: // %bb.0: // %entry
852852
; CHECK-NEXT: adrp x8, :got:a
853853
; CHECK-NEXT: ldr x8, [x8, :got_lo12:a]
854-
; CHECK-NEXT: ldr w9, [x8]
854+
; CHECK-NEXT: ldr w8, [x8]
855+
; CHECK-NEXT: cmp w8, #10
855856
; CHECK-NEXT: adrp x8, :got:b
856857
; CHECK-NEXT: ldr x8, [x8, :got_lo12:b]
857-
; CHECK-NEXT: cmp w9, #11
858-
; CHECK-NEXT: b.lo .LBB12_3
858+
; CHECK-NEXT: b.ls .LBB12_3
859859
; CHECK-NEXT: // %bb.1: // %land.lhs.true
860860
; CHECK-NEXT: adrp x9, :got:c
861861
; CHECK-NEXT: ldr x9, [x9, :got_lo12:c]
@@ -867,7 +867,6 @@ define i32 @combine_ugt_uge_10() #0 {
867867
; CHECK-NEXT: mov w0, #1 // =0x1
868868
; CHECK-NEXT: ret
869869
; CHECK-NEXT: .LBB12_3: // %lor.lhs.false
870-
; CHECK-NEXT: cmp w9, #10
871870
; CHECK-NEXT: b.lo .LBB12_6
872871
; CHECK-NEXT: .LBB12_4: // %land.lhs.true3
873872
; CHECK-NEXT: adrp x9, :got:d
@@ -918,8 +917,8 @@ define i32 @combine_ugt_ult_5() #0 {
918917
; CHECK-NEXT: adrp x8, :got:a
919918
; CHECK-NEXT: ldr x8, [x8, :got_lo12:a]
920919
; CHECK-NEXT: ldr w8, [x8]
921-
; CHECK-NEXT: cmp w8, #6
922-
; CHECK-NEXT: b.lo .LBB13_3
920+
; CHECK-NEXT: cmp w8, #5
921+
; CHECK-NEXT: b.ls .LBB13_3
923922
; CHECK-NEXT: // %bb.1: // %land.lhs.true
924923
; CHECK-NEXT: adrp x8, :got:b
925924
; CHECK-NEXT: adrp x9, :got:c
@@ -933,8 +932,7 @@ define i32 @combine_ugt_ult_5() #0 {
933932
; CHECK-NEXT: mov w0, #1 // =0x1
934933
; CHECK-NEXT: ret
935934
; CHECK-NEXT: .LBB13_3: // %lor.lhs.false
936-
; CHECK-NEXT: cmp w8, #4
937-
; CHECK-NEXT: b.hi .LBB13_6
935+
; CHECK-NEXT: b.hs .LBB13_6
938936
; CHECK-NEXT: // %bb.4: // %land.lhs.true3
939937
; CHECK-NEXT: adrp x8, :got:b
940938
; CHECK-NEXT: adrp x9, :got:d
@@ -985,11 +983,11 @@ define i32 @combine_ult_uge_5() #0 {
985983
; CHECK: // %bb.0: // %entry
986984
; CHECK-NEXT: adrp x8, :got:a
987985
; CHECK-NEXT: ldr x8, [x8, :got_lo12:a]
988-
; CHECK-NEXT: ldr w9, [x8]
986+
; CHECK-NEXT: ldr w8, [x8]
987+
; CHECK-NEXT: cmp w8, #5
989988
; CHECK-NEXT: adrp x8, :got:b
990989
; CHECK-NEXT: ldr x8, [x8, :got_lo12:b]
991-
; CHECK-NEXT: cmp w9, #4
992-
; CHECK-NEXT: b.hi .LBB14_3
990+
; CHECK-NEXT: b.hs .LBB14_3
993991
; CHECK-NEXT: // %bb.1: // %land.lhs.true
994992
; CHECK-NEXT: adrp x9, :got:c
995993
; CHECK-NEXT: ldr x9, [x9, :got_lo12:c]
@@ -1001,7 +999,6 @@ define i32 @combine_ult_uge_5() #0 {
1001999
; CHECK-NEXT: mov w0, #1 // =0x1
10021000
; CHECK-NEXT: ret
10031001
; CHECK-NEXT: .LBB14_3: // %lor.lhs.false
1004-
; CHECK-NEXT: cmp w9, #5
10051002
; CHECK-NEXT: b.hi .LBB14_6
10061003
; CHECK-NEXT: .LBB14_4: // %land.lhs.true3
10071004
; CHECK-NEXT: adrp x9, :got:d
@@ -1052,8 +1049,8 @@ define i32 @combine_ult_ugt_5() #0 {
10521049
; CHECK-NEXT: adrp x8, :got:a
10531050
; CHECK-NEXT: ldr x8, [x8, :got_lo12:a]
10541051
; CHECK-NEXT: ldr w8, [x8]
1055-
; CHECK-NEXT: cmp w8, #4
1056-
; CHECK-NEXT: b.hi .LBB15_3
1052+
; CHECK-NEXT: cmp w8, #5
1053+
; CHECK-NEXT: b.hs .LBB15_3
10571054
; CHECK-NEXT: // %bb.1: // %land.lhs.true
10581055
; CHECK-NEXT: adrp x8, :got:b
10591056
; CHECK-NEXT: adrp x9, :got:c
@@ -1067,8 +1064,7 @@ define i32 @combine_ult_ugt_5() #0 {
10671064
; CHECK-NEXT: mov w0, #1 // =0x1
10681065
; CHECK-NEXT: ret
10691066
; CHECK-NEXT: .LBB15_3: // %lor.lhs.false
1070-
; CHECK-NEXT: cmp w8, #6
1071-
; CHECK-NEXT: b.lo .LBB15_6
1067+
; CHECK-NEXT: b.ls .LBB15_6
10721068
; CHECK-NEXT: // %bb.4: // %land.lhs.true3
10731069
; CHECK-NEXT: adrp x8, :got:b
10741070
; CHECK-NEXT: adrp x9, :got:d
@@ -1120,8 +1116,8 @@ define i32 @combine_ugt_ult_n5() #0 {
11201116
; CHECK-NEXT: adrp x8, :got:a
11211117
; CHECK-NEXT: ldr x8, [x8, :got_lo12:a]
11221118
; CHECK-NEXT: ldr w8, [x8]
1123-
; CHECK-NEXT: cmn w8, #4
1124-
; CHECK-NEXT: b.lo .LBB16_3
1119+
; CHECK-NEXT: cmn w8, #5
1120+
; CHECK-NEXT: b.ls .LBB16_3
11251121
; CHECK-NEXT: // %bb.1: // %land.lhs.true
11261122
; CHECK-NEXT: adrp x8, :got:b
11271123
; CHECK-NEXT: adrp x9, :got:c
@@ -1135,8 +1131,7 @@ define i32 @combine_ugt_ult_n5() #0 {
11351131
; CHECK-NEXT: mov w0, #1 // =0x1
11361132
; CHECK-NEXT: ret
11371133
; CHECK-NEXT: .LBB16_3: // %lor.lhs.false
1138-
; CHECK-NEXT: cmn w8, #6
1139-
; CHECK-NEXT: b.hi .LBB16_6
1134+
; CHECK-NEXT: b.hs .LBB16_6
11401135
; CHECK-NEXT: // %bb.4: // %land.lhs.true3
11411136
; CHECK-NEXT: adrp x8, :got:b
11421137
; CHECK-NEXT: adrp x9, :got:d
@@ -1188,8 +1183,8 @@ define i32 @combine_ult_ugt_n5() #0 {
11881183
; CHECK-NEXT: adrp x8, :got:a
11891184
; CHECK-NEXT: ldr x8, [x8, :got_lo12:a]
11901185
; CHECK-NEXT: ldr w8, [x8]
1191-
; CHECK-NEXT: cmn w8, #6
1192-
; CHECK-NEXT: b.hi .LBB17_3
1186+
; CHECK-NEXT: cmn w8, #5
1187+
; CHECK-NEXT: b.hs .LBB17_3
11931188
; CHECK-NEXT: // %bb.1: // %land.lhs.true
11941189
; CHECK-NEXT: adrp x8, :got:b
11951190
; CHECK-NEXT: adrp x9, :got:c
@@ -1203,8 +1198,7 @@ define i32 @combine_ult_ugt_n5() #0 {
12031198
; CHECK-NEXT: mov w0, #1 // =0x1
12041199
; CHECK-NEXT: ret
12051200
; CHECK-NEXT: .LBB17_3: // %lor.lhs.false
1206-
; CHECK-NEXT: cmn w8, #4
1207-
; CHECK-NEXT: b.lo .LBB17_6
1201+
; CHECK-NEXT: b.ls .LBB17_6
12081202
; CHECK-NEXT: // %bb.4: // %land.lhs.true3
12091203
; CHECK-NEXT: adrp x8, :got:b
12101204
; CHECK-NEXT: adrp x9, :got:d
@@ -1257,8 +1251,8 @@ define i32 @combine_ult_gt_n5() #0 {
12571251
; CHECK-NEXT: adrp x8, :got:a
12581252
; CHECK-NEXT: ldr x8, [x8, :got_lo12:a]
12591253
; CHECK-NEXT: ldr w8, [x8]
1260-
; CHECK-NEXT: cmn w8, #6
1261-
; CHECK-NEXT: b.hi .LBB18_3
1254+
; CHECK-NEXT: cmn w8, #5
1255+
; CHECK-NEXT: b.hs .LBB18_3
12621256
; CHECK-NEXT: // %bb.1: // %land.lhs.true
12631257
; CHECK-NEXT: adrp x8, :got:b
12641258
; CHECK-NEXT: adrp x9, :got:c
@@ -1272,8 +1266,7 @@ define i32 @combine_ult_gt_n5() #0 {
12721266
; CHECK-NEXT: mov w0, #1 // =0x1
12731267
; CHECK-NEXT: ret
12741268
; CHECK-NEXT: .LBB18_3: // %lor.lhs.false
1275-
; CHECK-NEXT: cmn w8, #4
1276-
; CHECK-NEXT: b.lt .LBB18_6
1269+
; CHECK-NEXT: b.le .LBB18_6
12771270
; CHECK-NEXT: // %bb.4: // %land.lhs.true3
12781271
; CHECK-NEXT: adrp x8, :got:b
12791272
; CHECK-NEXT: adrp x9, :got:d

0 commit comments

Comments
 (0)