Skip to content

Commit 63dc874

Browse files
committed
[AArch64][SME] Reshuffle emit[prologue|epilogue]() for splitSVEObjects (NFCI)
Requested in #142392 (comment) Change-Id: I842faddea1bd54c5e30a9985782baf5dce37e5bb
1 parent afb2628 commit 63dc874

File tree

2 files changed

+145
-142
lines changed

2 files changed

+145
-142
lines changed

llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -481,6 +481,8 @@ class AArch64FunctionInfo final : public MachineFunctionInfo {
481481
StackHazardCSRSlotIndex = Index;
482482
}
483483

484+
bool hasSplitSVEObjects() const { return false; }
485+
484486
SMEAttrs getSMEFnAttrs() const { return SMEFnAttrs; }
485487

486488
unsigned getSRetReturnReg() const { return SRetReturnReg; }

llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp

Lines changed: 143 additions & 142 deletions
Original file line numberDiff line numberDiff line change
@@ -715,85 +715,75 @@ void AArch64PrologueEmitter::emitPrologue() {
715715
if (AFL.windowsRequiresStackProbe(MF, NumBytes + RealignmentPadding))
716716
emitWindowsStackProbe(AfterGPRSavesI, DL, NumBytes, RealignmentPadding);
717717

718-
MachineBasicBlock::iterator CalleeSavesEnd = AfterGPRSavesI;
719-
720718
StackOffset PPRCalleeSavesSize =
721719
StackOffset::getScalable(AFI->getPPRCalleeSavedStackSize());
722720
StackOffset ZPRCalleeSavesSize =
723721
StackOffset::getScalable(AFI->getZPRCalleeSavedStackSize());
724722
StackOffset SVECalleeSavesSize = PPRCalleeSavesSize + ZPRCalleeSavesSize;
725723
StackOffset PPRLocalsSize = AFL.getPPRStackSize(MF) - PPRCalleeSavesSize;
726724
StackOffset ZPRLocalsSize = AFL.getZPRStackSize(MF) - ZPRCalleeSavesSize;
725+
std::optional<MachineBasicBlock::iterator> ZPRCalleeSavesBegin,
726+
ZPRCalleeSavesEnd, PPRCalleeSavesBegin, PPRCalleeSavesEnd;
727727

728728
StackOffset CFAOffset =
729729
StackOffset::getFixed((int64_t)MFI.getStackSize() - NumBytes);
730730
MachineBasicBlock::iterator AfterSVESavesI = AfterGPRSavesI;
731-
732731
if (!FPAfterSVECalleeSaves) {
733-
MachineBasicBlock::iterator ZPRCalleeSavesBegin = AfterGPRSavesI,
734-
ZPRCalleeSavesEnd = AfterGPRSavesI;
735-
MachineBasicBlock::iterator PPRCalleeSavesBegin = AfterGPRSavesI,
736-
PPRCalleeSavesEnd = AfterGPRSavesI;
737-
738-
// Process the SVE callee-saves to determine what space needs to be
739-
// allocated.
740-
732+
// Process the SVE callee-saves to find the starts/ends of the ZPR and PPR
733+
// areas.
734+
PPRCalleeSavesBegin = AfterGPRSavesI;
741735
if (PPRCalleeSavesSize) {
742736
LLVM_DEBUG(dbgs() << "PPRCalleeSavedStackSize = "
743737
<< PPRCalleeSavesSize.getScalable() << "\n");
744738

745-
PPRCalleeSavesBegin = AfterSVESavesI;
746-
assert(isPartOfPPRCalleeSaves(PPRCalleeSavesBegin) &&
739+
assert(isPartOfPPRCalleeSaves(*PPRCalleeSavesBegin) &&
747740
"Unexpected instruction");
748741
while (isPartOfPPRCalleeSaves(AfterSVESavesI) &&
749742
AfterSVESavesI != MBB.getFirstTerminator())
750743
++AfterSVESavesI;
751-
PPRCalleeSavesEnd = AfterSVESavesI;
752744
}
753-
745+
PPRCalleeSavesEnd = ZPRCalleeSavesBegin = AfterSVESavesI;
754746
if (ZPRCalleeSavesSize) {
755747
LLVM_DEBUG(dbgs() << "ZPRCalleeSavedStackSize = "
756748
<< ZPRCalleeSavesSize.getScalable() << "\n");
757-
ZPRCalleeSavesBegin = AfterSVESavesI;
758-
assert(isPartOfZPRCalleeSaves(ZPRCalleeSavesBegin) &&
749+
assert(isPartOfZPRCalleeSaves(*ZPRCalleeSavesBegin) &&
759750
"Unexpected instruction");
760751
while (isPartOfZPRCalleeSaves(AfterSVESavesI) &&
761752
AfterSVESavesI != MBB.getFirstTerminator())
762753
++AfterSVESavesI;
763-
ZPRCalleeSavesEnd = AfterSVESavesI;
764754
}
755+
ZPRCalleeSavesEnd = AfterSVESavesI;
756+
}
757+
758+
if (EmitAsyncCFI)
759+
emitCalleeSavedSVELocations(AfterSVESavesI);
765760

761+
if (AFI->hasSplitSVEObjects()) {
762+
reportFatalInternalError("not implemented yet");
763+
} else {
766764
// Allocate space for the callee saves (if any).
767765
StackOffset LocalsSize =
768766
PPRLocalsSize + ZPRLocalsSize + StackOffset::getFixed(NumBytes);
769-
MachineBasicBlock::iterator CalleeSavesBegin =
770-
AFI->getPPRCalleeSavedStackSize() ? PPRCalleeSavesBegin
771-
: ZPRCalleeSavesBegin;
772-
allocateStackSpace(CalleeSavesBegin, 0, SVECalleeSavesSize,
773-
EmitAsyncCFI && !HasFP, CFAOffset,
774-
MFI.hasVarSizedObjects() || LocalsSize);
775-
776-
CalleeSavesEnd = AFI->getZPRCalleeSavedStackSize() ? ZPRCalleeSavesEnd
777-
: PPRCalleeSavesEnd;
778-
}
779-
CFAOffset += SVECalleeSavesSize;
780-
781-
if (EmitAsyncCFI)
782-
emitCalleeSavedSVELocations(CalleeSavesEnd);
783-
784-
// Allocate space for the rest of the frame including SVE locals. Align the
785-
// stack as necessary.
786-
assert(!(AFL.canUseRedZone(MF) && NeedsRealignment) &&
787-
"Cannot use redzone with stack realignment");
788-
if (!AFL.canUseRedZone(MF)) {
789-
// FIXME: in the case of dynamic re-alignment, NumBytes doesn't have
790-
// the correct value here, as NumBytes also includes padding bytes,
791-
// which shouldn't be counted here.
792-
StackOffset SVELocalsSize = PPRLocalsSize + ZPRLocalsSize;
793-
allocateStackSpace(CalleeSavesEnd, RealignmentPadding,
794-
SVELocalsSize + StackOffset::getFixed(NumBytes),
795-
EmitAsyncCFI && !HasFP, CFAOffset,
796-
MFI.hasVarSizedObjects());
767+
if (!FPAfterSVECalleeSaves)
768+
allocateStackSpace(AfterGPRSavesI, 0, SVECalleeSavesSize,
769+
EmitAsyncCFI && !HasFP, CFAOffset,
770+
MFI.hasVarSizedObjects() || LocalsSize);
771+
CFAOffset += SVECalleeSavesSize;
772+
773+
// Allocate space for the rest of the frame including SVE locals. Align the
774+
// stack as necessary.
775+
assert(!(AFL.canUseRedZone(MF) && NeedsRealignment) &&
776+
"Cannot use redzone with stack realignment");
777+
if (!AFL.canUseRedZone(MF)) {
778+
// FIXME: in the case of dynamic re-alignment, NumBytes doesn't have
779+
// the correct value here, as NumBytes also includes padding bytes,
780+
// which shouldn't be counted here.
781+
StackOffset SVELocalsSize = PPRLocalsSize + ZPRLocalsSize;
782+
allocateStackSpace(AfterSVESavesI, RealignmentPadding,
783+
SVELocalsSize + StackOffset::getFixed(NumBytes),
784+
EmitAsyncCFI && !HasFP, CFAOffset,
785+
MFI.hasVarSizedObjects());
786+
}
797787
}
798788

799789
// If we need a base pointer, set it up here. It's whatever the value of the
@@ -1391,7 +1381,9 @@ void AArch64EpilogueEmitter::emitEpilogue() {
13911381
if (HasFP && AFI->hasSwiftAsyncContext())
13921382
emitSwiftAsyncContextFramePointer(EpilogueEndI, DL);
13931383

1394-
StackOffset SVEStackSize = AFL.getSVEStackSize(MF);
1384+
StackOffset ZPRStackSize = AFL.getZPRStackSize(MF);
1385+
StackOffset PPRStackSize = AFL.getPPRStackSize(MF);
1386+
StackOffset SVEStackSize = ZPRStackSize + PPRStackSize;
13951387

13961388
// If there is a single SP update, insert it before the ret and we're done.
13971389
if (CombineSPBump) {
@@ -1412,111 +1404,120 @@ void AArch64EpilogueEmitter::emitEpilogue() {
14121404
NumBytes -= PrologueSaveSize;
14131405
assert(NumBytes >= 0 && "Negative stack allocation size!?");
14141406

1415-
// Process the SVE callee-saves to determine what space needs to be
1416-
// deallocated.
1417-
StackOffset DeallocateBefore = {}, DeallocateAfter = SVEStackSize;
1418-
MachineBasicBlock::iterator RestoreBegin = FirstGPRRestoreI,
1419-
RestoreEnd = FirstGPRRestoreI;
1420-
int64_t ZPRCalleeSavedSize = AFI->getZPRCalleeSavedStackSize();
1421-
int64_t PPRCalleeSavedSize = AFI->getPPRCalleeSavedStackSize();
1422-
int64_t SVECalleeSavedSize = ZPRCalleeSavedSize + PPRCalleeSavedSize;
1423-
1424-
if (SVECalleeSavedSize) {
1425-
if (FPAfterSVECalleeSaves)
1426-
RestoreEnd = MBB.getFirstTerminator();
1427-
1428-
RestoreBegin = std::prev(RestoreEnd);
1429-
while (RestoreBegin != MBB.begin() &&
1430-
isPartOfSVECalleeSaves(std::prev(RestoreBegin)))
1431-
--RestoreBegin;
1432-
1433-
assert(isPartOfSVECalleeSaves(RestoreBegin) &&
1434-
isPartOfSVECalleeSaves(std::prev(RestoreEnd)) &&
1435-
"Unexpected instruction");
1436-
1437-
StackOffset CalleeSavedSizeAsOffset =
1438-
StackOffset::getScalable(SVECalleeSavedSize);
1439-
DeallocateBefore = SVEStackSize - CalleeSavedSizeAsOffset;
1440-
DeallocateAfter = CalleeSavedSizeAsOffset;
1441-
}
1407+
if (!AFI->hasSplitSVEObjects()) {
1408+
// Process the SVE callee-saves to determine what space needs to be
1409+
// deallocated.
1410+
StackOffset DeallocateBefore = {}, DeallocateAfter = SVEStackSize;
1411+
MachineBasicBlock::iterator RestoreBegin = FirstGPRRestoreI,
1412+
RestoreEnd = FirstGPRRestoreI;
1413+
int64_t ZPRCalleeSavedSize = AFI->getZPRCalleeSavedStackSize();
1414+
int64_t PPRCalleeSavedSize = AFI->getPPRCalleeSavedStackSize();
1415+
int64_t SVECalleeSavedSize = ZPRCalleeSavedSize + PPRCalleeSavedSize;
1416+
1417+
if (SVECalleeSavedSize) {
1418+
if (FPAfterSVECalleeSaves)
1419+
RestoreEnd = MBB.getFirstTerminator();
1420+
1421+
RestoreBegin = std::prev(RestoreEnd);
1422+
while (RestoreBegin != MBB.begin() &&
1423+
isPartOfSVECalleeSaves(std::prev(RestoreBegin)))
1424+
--RestoreBegin;
1425+
1426+
assert(isPartOfSVECalleeSaves(RestoreBegin) &&
1427+
isPartOfSVECalleeSaves(std::prev(RestoreEnd)) &&
1428+
"Unexpected instruction");
14421429

1443-
// Deallocate the SVE area.
1444-
if (FPAfterSVECalleeSaves) {
1445-
// If the callee-save area is before FP, restoring the FP implicitly
1446-
// deallocates non-callee-save SVE allocations. Otherwise, deallocate
1447-
// them explicitly.
1448-
if (!AFI->isStackRealigned() && !MFI.hasVarSizedObjects()) {
1449-
emitFrameOffset(MBB, FirstGPRRestoreI, DL, AArch64::SP, AArch64::SP,
1450-
DeallocateBefore, TII, MachineInstr::FrameDestroy, false,
1451-
NeedsWinCFI, &HasWinCFI);
1430+
StackOffset CalleeSavedSizeAsOffset =
1431+
StackOffset::getScalable(SVECalleeSavedSize);
1432+
DeallocateBefore = SVEStackSize - CalleeSavedSizeAsOffset;
1433+
DeallocateAfter = CalleeSavedSizeAsOffset;
14521434
}
14531435

1454-
// Deallocate callee-save non-SVE registers.
1455-
emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP,
1456-
StackOffset::getFixed(AFI->getCalleeSavedStackSize()), TII,
1457-
MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI);
1458-
1459-
// Deallocate fixed objects.
1460-
emitFrameOffset(MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP,
1461-
StackOffset::getFixed(FixedObject), TII,
1462-
MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI);
1463-
1464-
// Deallocate callee-save SVE registers.
1465-
emitFrameOffset(MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP,
1466-
DeallocateAfter, TII, MachineInstr::FrameDestroy, false,
1467-
NeedsWinCFI, &HasWinCFI);
1468-
} else if (SVEStackSize) {
1469-
int64_t SVECalleeSavedSize = AFI->getSVECalleeSavedStackSize();
1470-
// If we have stack realignment or variable-sized objects we must use the
1471-
// FP to restore SVE callee saves (as there is an unknown amount of
1472-
// data/padding between the SP and SVE CS area).
1473-
Register BaseForSVEDealloc =
1474-
(AFI->isStackRealigned() || MFI.hasVarSizedObjects()) ? AArch64::FP
1475-
: AArch64::SP;
1476-
if (SVECalleeSavedSize && BaseForSVEDealloc == AArch64::FP) {
1477-
Register CalleeSaveBase = AArch64::FP;
1478-
if (int64_t CalleeSaveBaseOffset =
1479-
AFI->getCalleeSaveBaseToFrameRecordOffset()) {
1480-
// If we have have an non-zero offset to the non-SVE CS base we need to
1481-
// compute the base address by subtracting the offest in a temporary
1482-
// register first (to avoid briefly deallocating the SVE CS).
1483-
CalleeSaveBase =
1484-
MF.getRegInfo().createVirtualRegister(&AArch64::GPR64RegClass);
1485-
emitFrameOffset(MBB, RestoreBegin, DL, CalleeSaveBase, AArch64::FP,
1486-
StackOffset::getFixed(-CalleeSaveBaseOffset), TII,
1487-
MachineInstr::FrameDestroy);
1488-
}
1489-
// The code below will deallocate the stack space space by moving the
1490-
// SP to the start of the SVE callee-save area.
1491-
emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, CalleeSaveBase,
1492-
StackOffset::getScalable(-SVECalleeSavedSize), TII,
1493-
MachineInstr::FrameDestroy);
1494-
} else if (BaseForSVEDealloc == AArch64::SP) {
1495-
if (SVECalleeSavedSize) {
1496-
// Deallocate the non-SVE locals first before we can deallocate (and
1497-
// restore callee saves) from the SVE area.
1498-
emitFrameOffset(
1499-
MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP,
1500-
StackOffset::getFixed(NumBytes), TII, MachineInstr::FrameDestroy,
1501-
false, NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP,
1502-
SVEStackSize + StackOffset::getFixed(NumBytes + PrologueSaveSize));
1503-
NumBytes = 0;
1436+
// Deallocate the SVE area.
1437+
if (FPAfterSVECalleeSaves) {
1438+
// If the callee-save area is before FP, restoring the FP implicitly
1439+
// deallocates non-callee-save SVE allocations. Otherwise, deallocate
1440+
// them explicitly.
1441+
if (!AFI->isStackRealigned() && !MFI.hasVarSizedObjects()) {
1442+
emitFrameOffset(MBB, FirstGPRRestoreI, DL, AArch64::SP, AArch64::SP,
1443+
DeallocateBefore, TII, MachineInstr::FrameDestroy,
1444+
false, NeedsWinCFI, &HasWinCFI);
15041445
}
15051446

1447+
// Deallocate callee-save non-SVE registers.
15061448
emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP,
1507-
DeallocateBefore, TII, MachineInstr::FrameDestroy, false,
1508-
NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP,
1509-
SVEStackSize +
1510-
StackOffset::getFixed(NumBytes + PrologueSaveSize));
1449+
StackOffset::getFixed(AFI->getCalleeSavedStackSize()),
1450+
TII, MachineInstr::FrameDestroy, false, NeedsWinCFI,
1451+
&HasWinCFI);
1452+
1453+
// Deallocate fixed objects.
1454+
emitFrameOffset(MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP,
1455+
StackOffset::getFixed(FixedObject), TII,
1456+
MachineInstr::FrameDestroy, false, NeedsWinCFI,
1457+
&HasWinCFI);
15111458

1459+
// Deallocate callee-save SVE registers.
15121460
emitFrameOffset(MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP,
15131461
DeallocateAfter, TII, MachineInstr::FrameDestroy, false,
1514-
NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP,
1515-
DeallocateAfter +
1516-
StackOffset::getFixed(NumBytes + PrologueSaveSize));
1462+
NeedsWinCFI, &HasWinCFI);
1463+
} else if (SVEStackSize) {
1464+
int64_t SVECalleeSavedSize = AFI->getSVECalleeSavedStackSize();
1465+
// If we have stack realignment or variable-sized objects we must use the
1466+
// FP to restore SVE callee saves (as there is an unknown amount of
1467+
// data/padding between the SP and SVE CS area).
1468+
Register BaseForSVEDealloc =
1469+
(AFI->isStackRealigned() || MFI.hasVarSizedObjects()) ? AArch64::FP
1470+
: AArch64::SP;
1471+
if (SVECalleeSavedSize && BaseForSVEDealloc == AArch64::FP) {
1472+
Register CalleeSaveBase = AArch64::FP;
1473+
if (int64_t CalleeSaveBaseOffset =
1474+
AFI->getCalleeSaveBaseToFrameRecordOffset()) {
1475+
// If we have have an non-zero offset to the non-SVE CS base we need
1476+
// to compute the base address by subtracting the offest in a
1477+
// temporary register first (to avoid briefly deallocating the SVE
1478+
// CS).
1479+
CalleeSaveBase = MBB.getParent()->getRegInfo().createVirtualRegister(
1480+
&AArch64::GPR64RegClass);
1481+
emitFrameOffset(MBB, RestoreBegin, DL, CalleeSaveBase, AArch64::FP,
1482+
StackOffset::getFixed(-CalleeSaveBaseOffset), TII,
1483+
MachineInstr::FrameDestroy);
1484+
}
1485+
// The code below will deallocate the stack space space by moving the
1486+
// SP to the start of the SVE callee-save area.
1487+
emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, CalleeSaveBase,
1488+
StackOffset::getScalable(-SVECalleeSavedSize), TII,
1489+
MachineInstr::FrameDestroy);
1490+
} else if (BaseForSVEDealloc == AArch64::SP) {
1491+
if (SVECalleeSavedSize) {
1492+
// Deallocate the non-SVE locals first before we can deallocate (and
1493+
// restore callee saves) from the SVE area.
1494+
emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP,
1495+
StackOffset::getFixed(NumBytes), TII,
1496+
MachineInstr::FrameDestroy, false, NeedsWinCFI,
1497+
&HasWinCFI, EmitCFI && !HasFP,
1498+
SVEStackSize + StackOffset::getFixed(
1499+
NumBytes + PrologueSaveSize));
1500+
NumBytes = 0;
1501+
}
1502+
1503+
emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP,
1504+
DeallocateBefore, TII, MachineInstr::FrameDestroy,
1505+
false, NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP,
1506+
SVEStackSize +
1507+
StackOffset::getFixed(NumBytes + PrologueSaveSize));
1508+
1509+
emitFrameOffset(MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP,
1510+
DeallocateAfter, TII, MachineInstr::FrameDestroy, false,
1511+
NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP,
1512+
DeallocateAfter +
1513+
StackOffset::getFixed(NumBytes + PrologueSaveSize));
1514+
}
1515+
1516+
if (EmitCFI)
1517+
emitCalleeSavedSVERestores(RestoreEnd);
15171518
}
1518-
if (EmitCFI)
1519-
emitCalleeSavedSVERestores(RestoreEnd);
1519+
} else if (AFI->hasSplitSVEObjects() && SVEStackSize) {
1520+
reportFatalInternalError("not implemented yet");
15201521
}
15211522

15221523
if (!HasFP) {

0 commit comments

Comments
 (0)