Skip to content

Commit 1389980

Browse files
authored
[AArch64][SVE] Avoid extra pop of "FixedObject" with CalleeSavesAboveFrameRecord (#156452)
Previously, we would pop `FixedObject`-bytes after deallocating the SVE area, then again as part of the "AfterCSRPopSize". This could be seen in the tests `@f6` and `@f9`. This patch removes the erroneous pop, and refactors `CalleeSavesAboveFrameRecord` to reuse more of the existing GPR deallocation logic, which allows for post-decrements.
1 parent 74513f5 commit 1389980

File tree

4 files changed

+134
-116
lines changed

4 files changed

+134
-116
lines changed

llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp

Lines changed: 41 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1360,14 +1360,24 @@ void AArch64EpilogueEmitter::emitEpilogue() {
13601360
}
13611361

13621362
bool CombineSPBump = shouldCombineCSRLocalStackBump(NumBytes);
1363-
// Assume we can't combine the last pop with the sp restore.
1364-
bool CombineAfterCSRBump = false;
1363+
1364+
unsigned ProloguePopSize = PrologueSaveSize;
13651365
if (SVELayout == SVEStackLayout::CalleeSavesAboveFrameRecord) {
1366+
// With CalleeSavesAboveFrameRecord ProloguePopSize is the amount of stack
1367+
// that needs to be popped until we reach the start of the SVE save area.
1368+
// The "FixedObject" stack occurs after the SVE area and must be popped
1369+
// later.
1370+
ProloguePopSize -= FixedObject;
13661371
AfterCSRPopSize += FixedObject;
1367-
} else if (!CombineSPBump && PrologueSaveSize != 0) {
1372+
}
1373+
1374+
// Assume we can't combine the last pop with the sp restore.
1375+
if (!CombineSPBump && ProloguePopSize != 0) {
13681376
MachineBasicBlock::iterator Pop = std::prev(MBB.getFirstTerminator());
13691377
while (Pop->getOpcode() == TargetOpcode::CFI_INSTRUCTION ||
1370-
AArch64InstrInfo::isSEHInstruction(*Pop))
1378+
AArch64InstrInfo::isSEHInstruction(*Pop) ||
1379+
(SVELayout == SVEStackLayout::CalleeSavesAboveFrameRecord &&
1380+
isPartOfSVECalleeSaves(Pop)))
13711381
Pop = std::prev(Pop);
13721382
// Converting the last ldp to a post-index ldp is valid only if the last
13731383
// ldp's offset is 0.
@@ -1377,18 +1387,27 @@ void AArch64EpilogueEmitter::emitEpilogue() {
13771387
// may clobber), convert it to a post-index ldp.
13781388
if (OffsetOp.getImm() == 0 && AfterCSRPopSize >= 0) {
13791389
convertCalleeSaveRestoreToSPPrePostIncDec(
1380-
Pop, DL, PrologueSaveSize, EmitCFI, MachineInstr::FrameDestroy,
1381-
PrologueSaveSize);
1390+
Pop, DL, ProloguePopSize, EmitCFI, MachineInstr::FrameDestroy,
1391+
ProloguePopSize);
1392+
} else if (SVELayout == SVEStackLayout::CalleeSavesAboveFrameRecord) {
1393+
MachineBasicBlock::iterator AfterLastPop = std::next(Pop);
1394+
if (AArch64InstrInfo::isSEHInstruction(*AfterLastPop))
1395+
++AfterLastPop;
1396+
// If not, and CalleeSavesAboveFrameRecord is enabled, deallocate
1397+
// callee-save non-SVE registers to move the stack pointer to the start of
1398+
// the SVE area.
1399+
emitFrameOffset(MBB, AfterLastPop, DL, AArch64::SP, AArch64::SP,
1400+
StackOffset::getFixed(ProloguePopSize), TII,
1401+
MachineInstr::FrameDestroy, false, NeedsWinCFI,
1402+
&HasWinCFI);
13821403
} else {
1383-
// If not, make sure to emit an add after the last ldp.
1404+
// Otherwise, make sure to emit an add after the last ldp.
13841405
// We're doing this by transferring the size to be restored from the
13851406
// adjustment *before* the CSR pops to the adjustment *after* the CSR
13861407
// pops.
1387-
AfterCSRPopSize += PrologueSaveSize;
1388-
CombineAfterCSRBump = true;
1408+
AfterCSRPopSize += ProloguePopSize;
13891409
}
13901410
}
1391-
13921411
// Move past the restores of the callee-saved registers.
13931412
// If we plan on combining the sp bump of the local stack size and the callee
13941413
// save stack size, we might need to adjust the CSR save and restore offsets.
@@ -1419,6 +1438,17 @@ void AArch64EpilogueEmitter::emitEpilogue() {
14191438
--SEHEpilogueStartI;
14201439
}
14211440

1441+
// Determine the ranges of SVE callee-saves. This is done before emitting any
1442+
// code at the end of the epilogue (for Swift async), which can get in the way
1443+
// of finding SVE callee-saves with CalleeSavesAboveFrameRecord.
1444+
auto [PPR, ZPR] = getSVEStackFrameSizes();
1445+
auto [PPRRange, ZPRRange] = partitionSVECS(
1446+
MBB,
1447+
SVELayout == SVEStackLayout::CalleeSavesAboveFrameRecord
1448+
? MBB.getFirstTerminator()
1449+
: FirstGPRRestoreI,
1450+
PPR.CalleeSavesSize, ZPR.CalleeSavesSize, /*IsEpilogue=*/true);
1451+
14221452
if (HasFP && AFI->hasSwiftAsyncContext())
14231453
emitSwiftAsyncContextFramePointer(EpilogueEndI, DL);
14241454

@@ -1441,14 +1471,6 @@ void AArch64EpilogueEmitter::emitEpilogue() {
14411471
NumBytes -= PrologueSaveSize;
14421472
assert(NumBytes >= 0 && "Negative stack allocation size!?");
14431473

1444-
auto [PPR, ZPR] = getSVEStackFrameSizes();
1445-
auto [PPRRange, ZPRRange] = partitionSVECS(
1446-
MBB,
1447-
SVELayout == SVEStackLayout::CalleeSavesAboveFrameRecord
1448-
? MBB.getFirstTerminator()
1449-
: FirstGPRRestoreI,
1450-
PPR.CalleeSavesSize, ZPR.CalleeSavesSize, /*IsEpilogue=*/true);
1451-
14521474
StackOffset SVECalleeSavesSize = ZPR.CalleeSavesSize + PPR.CalleeSavesSize;
14531475
StackOffset SVEStackSize =
14541476
SVECalleeSavesSize + PPR.LocalsSize + ZPR.LocalsSize;
@@ -1467,16 +1489,6 @@ void AArch64EpilogueEmitter::emitEpilogue() {
14671489
NeedsWinCFI, &HasWinCFI);
14681490
}
14691491

1470-
// Deallocate callee-save non-SVE registers.
1471-
emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP,
1472-
StackOffset::getFixed(AFI->getCalleeSavedStackSize()), TII,
1473-
MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI);
1474-
1475-
// Deallocate fixed objects.
1476-
emitFrameOffset(MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP,
1477-
StackOffset::getFixed(FixedObject), TII,
1478-
MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI);
1479-
14801492
// Deallocate callee-save SVE registers.
14811493
emitFrameOffset(MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP,
14821494
SVECalleeSavesSize, TII, MachineInstr::FrameDestroy, false,
@@ -1619,7 +1631,7 @@ void AArch64EpilogueEmitter::emitEpilogue() {
16191631
MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP,
16201632
StackOffset::getFixed(AfterCSRPopSize), TII, MachineInstr::FrameDestroy,
16211633
false, NeedsWinCFI, &HasWinCFI, EmitCFI,
1622-
StackOffset::getFixed(CombineAfterCSRBump ? PrologueSaveSize : 0));
1634+
StackOffset::getFixed(AfterCSRPopSize - ArgumentStackToRestore));
16231635
}
16241636
}
16251637

llvm/test/CodeGen/AArch64/framelayout-sve-win.mir

Lines changed: 10 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -380,10 +380,8 @@ body: |
380380
; CHECK-NEXT: frame-destroy SEH_EpilogStart
381381
; CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 32, 0
382382
; CHECK-NEXT: frame-destroy SEH_StackAlloc 32
383-
; CHECK-NEXT: $lr = frame-destroy LDRXui $sp, 0 :: (load (s64) from %stack.1)
384-
; CHECK-NEXT: frame-destroy SEH_SaveReg 30, 0
385-
; CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 16, 0
386-
; CHECK-NEXT: frame-destroy SEH_StackAlloc 16
383+
; CHECK-NEXT: early-clobber $sp, $lr = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.1)
384+
; CHECK-NEXT: frame-destroy SEH_SaveReg_X 30, -16
387385
; CHECK-NEXT: $p4 = frame-destroy LDR_PXI $sp, 0 :: (load (s16) from %stack.4)
388386
; CHECK-NEXT: frame-destroy SEH_SavePReg 4, 0
389387
; CHECK-NEXT: $p5 = frame-destroy LDR_PXI $sp, 1 :: (load (s16) from %stack.3)
@@ -430,10 +428,8 @@ body: |
430428
; CHECK-NEXT: frame-destroy SEH_EpilogStart
431429
; CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 32, 0
432430
; CHECK-NEXT: frame-destroy SEH_StackAlloc 32
433-
; CHECK-NEXT: $lr = frame-destroy LDRXui $sp, 0 :: (load (s64) from %stack.1)
434-
; CHECK-NEXT: frame-destroy SEH_SaveReg 30, 0
435-
; CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 16, 0
436-
; CHECK-NEXT: frame-destroy SEH_StackAlloc 16
431+
; CHECK-NEXT: early-clobber $sp, $lr = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.1)
432+
; CHECK-NEXT: frame-destroy SEH_SaveReg_X 30, -16
437433
; CHECK-NEXT: $z8 = frame-destroy LDR_ZXI $sp, 0 :: (load (s128) from %stack.4)
438434
; CHECK-NEXT: frame-destroy SEH_SaveZReg 8, 0
439435
; CHECK-NEXT: $z9 = frame-destroy LDR_ZXI $sp, 1 :: (load (s128) from %stack.3)
@@ -557,10 +553,8 @@ body: |
557553
; CHECK-NEXT: frame-destroy SEH_StackAlloc 32
558554
; CHECK-NEXT: $x21, $lr = frame-destroy LDPXi $sp, 2 :: (load (s64) from %stack.2), (load (s64) from %stack.3)
559555
; CHECK-NEXT: frame-destroy SEH_SaveRegP 21, 30, 16
560-
; CHECK-NEXT: $x19, $x20 = frame-destroy LDPXi $sp, 0 :: (load (s64) from %stack.4), (load (s64) from %stack.5)
561-
; CHECK-NEXT: frame-destroy SEH_SaveRegP 19, 20, 0
562-
; CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 32, 0
563-
; CHECK-NEXT: frame-destroy SEH_StackAlloc 32
556+
; CHECK-NEXT: early-clobber $sp, $x19, $x20 = frame-destroy LDPXpost $sp, 4 :: (load (s64) from %stack.4), (load (s64) from %stack.5)
557+
; CHECK-NEXT: frame-destroy SEH_SaveRegP_X 19, 20, -32
564558
; CHECK-NEXT: $z8 = frame-destroy LDR_ZXI $sp, 2 :: (load (s128) from %stack.21)
565559
; CHECK-NEXT: frame-destroy SEH_SaveZReg 8, 2
566560
; CHECK-NEXT: $z9 = frame-destroy LDR_ZXI $sp, 3 :: (load (s128) from %stack.20)
@@ -745,10 +739,8 @@ body: |
745739
; CHECK-NEXT: frame-destroy SEH_EpilogStart
746740
; CHECK-NEXT: $sp = frame-destroy ADDXri $fp, 0, 0
747741
; CHECK-NEXT: frame-destroy SEH_SetFP
748-
; CHECK-NEXT: $fp, $lr = frame-destroy LDPXi $sp, 0 :: (load (s64) from %stack.2), (load (s64) from %stack.3)
749-
; CHECK-NEXT: frame-destroy SEH_SaveFPLR 0
750-
; CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 16, 0
751-
; CHECK-NEXT: frame-destroy SEH_StackAlloc 16
742+
; CHECK-NEXT: early-clobber $sp, $fp, $lr = frame-destroy LDPXpost $sp, 2 :: (load (s64) from %stack.2), (load (s64) from %stack.3)
743+
; CHECK-NEXT: frame-destroy SEH_SaveFPLR_X -16
752744
; CHECK-NEXT: $z8 = frame-destroy LDR_ZXI $sp, 2 :: (load (s128) from %stack.19)
753745
; CHECK-NEXT: frame-destroy SEH_SaveZReg 8, 2
754746
; CHECK-NEXT: $z9 = frame-destroy LDR_ZXI $sp, 3 :: (load (s128) from %stack.18)
@@ -869,10 +861,8 @@ body: |
869861
; CHECK-NEXT: frame-destroy SEH_EpilogStart
870862
; CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 7, implicit $vg
871863
; CHECK-NEXT: frame-destroy SEH_AllocZ 7
872-
; CHECK-NEXT: $lr = frame-destroy LDRXui $sp, 0 :: (load (s64) from %stack.6)
873-
; CHECK-NEXT: frame-destroy SEH_SaveReg 30, 0
874-
; CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 16, 0
875-
; CHECK-NEXT: frame-destroy SEH_StackAlloc 16
864+
; CHECK-NEXT: early-clobber $sp, $lr = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.6)
865+
; CHECK-NEXT: frame-destroy SEH_SaveReg_X 30, -16
876866
; CHECK-NEXT: $z8 = frame-destroy LDR_ZXI $sp, 1 :: (load (s128) from %stack.8)
877867
; CHECK-NEXT: frame-destroy SEH_SaveZReg 8, 1
878868
; CHECK-NEXT: $z23 = frame-destroy LDR_ZXI $sp, 2 :: (load (s128) from %stack.7)

0 commit comments

Comments
 (0)