@@ -715,85 +715,75 @@ void AArch64PrologueEmitter::emitPrologue() {
715715 if (AFL.windowsRequiresStackProbe (MF, NumBytes + RealignmentPadding))
716716 emitWindowsStackProbe (AfterGPRSavesI, DL, NumBytes, RealignmentPadding);
717717
718- MachineBasicBlock::iterator CalleeSavesEnd = AfterGPRSavesI;
719-
720718 StackOffset PPRCalleeSavesSize =
721719 StackOffset::getScalable (AFI->getPPRCalleeSavedStackSize ());
722720 StackOffset ZPRCalleeSavesSize =
723721 StackOffset::getScalable (AFI->getZPRCalleeSavedStackSize ());
724722 StackOffset SVECalleeSavesSize = PPRCalleeSavesSize + ZPRCalleeSavesSize;
725723 StackOffset PPRLocalsSize = AFL.getPPRStackSize (MF) - PPRCalleeSavesSize;
726724 StackOffset ZPRLocalsSize = AFL.getZPRStackSize (MF) - ZPRCalleeSavesSize;
725+ std::optional<MachineBasicBlock::iterator> ZPRCalleeSavesBegin,
726+ ZPRCalleeSavesEnd, PPRCalleeSavesBegin, PPRCalleeSavesEnd;
727727
728728 StackOffset CFAOffset =
729729 StackOffset::getFixed ((int64_t )MFI.getStackSize () - NumBytes);
730730 MachineBasicBlock::iterator AfterSVESavesI = AfterGPRSavesI;
731-
732731 if (!FPAfterSVECalleeSaves) {
733- MachineBasicBlock::iterator ZPRCalleeSavesBegin = AfterGPRSavesI,
734- ZPRCalleeSavesEnd = AfterGPRSavesI;
735- MachineBasicBlock::iterator PPRCalleeSavesBegin = AfterGPRSavesI,
736- PPRCalleeSavesEnd = AfterGPRSavesI;
737-
738- // Process the SVE callee-saves to determine what space needs to be
739- // allocated.
740-
732+ // Process the SVE callee-saves to find the starts/ends of the ZPR and PPR
733+ // areas.
734+ PPRCalleeSavesBegin = AfterGPRSavesI;
741735 if (PPRCalleeSavesSize) {
742736 LLVM_DEBUG (dbgs () << " PPRCalleeSavedStackSize = "
743737 << PPRCalleeSavesSize.getScalable () << " \n " );
744738
745- PPRCalleeSavesBegin = AfterSVESavesI;
746- assert (isPartOfPPRCalleeSaves (PPRCalleeSavesBegin) &&
739+ assert (isPartOfPPRCalleeSaves (*PPRCalleeSavesBegin) &&
747740 " Unexpected instruction" );
748741 while (isPartOfPPRCalleeSaves (AfterSVESavesI) &&
749742 AfterSVESavesI != MBB.getFirstTerminator ())
750743 ++AfterSVESavesI;
751- PPRCalleeSavesEnd = AfterSVESavesI;
752744 }
753-
745+ PPRCalleeSavesEnd = ZPRCalleeSavesBegin = AfterSVESavesI;
754746 if (ZPRCalleeSavesSize) {
755747 LLVM_DEBUG (dbgs () << " ZPRCalleeSavedStackSize = "
756748 << ZPRCalleeSavesSize.getScalable () << " \n " );
757- ZPRCalleeSavesBegin = AfterSVESavesI;
758- assert (isPartOfZPRCalleeSaves (ZPRCalleeSavesBegin) &&
749+ assert (isPartOfZPRCalleeSaves (*ZPRCalleeSavesBegin) &&
759750 " Unexpected instruction" );
760751 while (isPartOfZPRCalleeSaves (AfterSVESavesI) &&
761752 AfterSVESavesI != MBB.getFirstTerminator ())
762753 ++AfterSVESavesI;
763- ZPRCalleeSavesEnd = AfterSVESavesI;
764754 }
755+ ZPRCalleeSavesEnd = AfterSVESavesI;
756+ }
757+
758+ if (EmitAsyncCFI)
759+ emitCalleeSavedSVELocations (AfterSVESavesI);
765760
761+ if (AFI->hasSplitSVEObjects ()) {
762+ reportFatalInternalError (" not implemented yet" );
763+ } else {
766764 // Allocate space for the callee saves (if any).
767765 StackOffset LocalsSize =
768766 PPRLocalsSize + ZPRLocalsSize + StackOffset::getFixed (NumBytes);
769- MachineBasicBlock::iterator CalleeSavesBegin =
770- AFI->getPPRCalleeSavedStackSize () ? PPRCalleeSavesBegin
771- : ZPRCalleeSavesBegin;
772- allocateStackSpace (CalleeSavesBegin, 0 , SVECalleeSavesSize,
773- EmitAsyncCFI && !HasFP, CFAOffset,
774- MFI.hasVarSizedObjects () || LocalsSize);
775-
776- CalleeSavesEnd = AFI->getZPRCalleeSavedStackSize () ? ZPRCalleeSavesEnd
777- : PPRCalleeSavesEnd;
778- }
779- CFAOffset += SVECalleeSavesSize;
780-
781- if (EmitAsyncCFI)
782- emitCalleeSavedSVELocations (CalleeSavesEnd);
783-
784- // Allocate space for the rest of the frame including SVE locals. Align the
785- // stack as necessary.
786- assert (!(AFL.canUseRedZone (MF) && NeedsRealignment) &&
787- " Cannot use redzone with stack realignment" );
788- if (!AFL.canUseRedZone (MF)) {
789- // FIXME: in the case of dynamic re-alignment, NumBytes doesn't have
790- // the correct value here, as NumBytes also includes padding bytes,
791- // which shouldn't be counted here.
792- StackOffset SVELocalsSize = PPRLocalsSize + ZPRLocalsSize;
793- allocateStackSpace (CalleeSavesEnd, RealignmentPadding,
794- SVELocalsSize + StackOffset::getFixed (NumBytes),
795- EmitAsyncCFI && !HasFP, CFAOffset,
796- MFI.hasVarSizedObjects ());
767+ if (!FPAfterSVECalleeSaves)
768+ allocateStackSpace (AfterGPRSavesI, 0 , SVECalleeSavesSize,
769+ EmitAsyncCFI && !HasFP, CFAOffset,
770+ MFI.hasVarSizedObjects () || LocalsSize);
771+ CFAOffset += SVECalleeSavesSize;
772+
773+ // Allocate space for the rest of the frame including SVE locals. Align the
774+ // stack as necessary.
775+ assert (!(AFL.canUseRedZone (MF) && NeedsRealignment) &&
776+ " Cannot use redzone with stack realignment" );
777+ if (!AFL.canUseRedZone (MF)) {
778+ // FIXME: in the case of dynamic re-alignment, NumBytes doesn't have
779+ // the correct value here, as NumBytes also includes padding bytes,
780+ // which shouldn't be counted here.
781+ StackOffset SVELocalsSize = PPRLocalsSize + ZPRLocalsSize;
782+ allocateStackSpace (AfterSVESavesI, RealignmentPadding,
783+ SVELocalsSize + StackOffset::getFixed (NumBytes),
784+ EmitAsyncCFI && !HasFP, CFAOffset,
785+ MFI.hasVarSizedObjects ());
786+ }
797787 }
798788
799789 // If we need a base pointer, set it up here. It's whatever the value of the
@@ -1391,7 +1381,9 @@ void AArch64EpilogueEmitter::emitEpilogue() {
13911381 if (HasFP && AFI->hasSwiftAsyncContext ())
13921382 emitSwiftAsyncContextFramePointer (EpilogueEndI, DL);
13931383
1394- StackOffset SVEStackSize = AFL.getSVEStackSize (MF);
1384+ StackOffset ZPRStackSize = AFL.getZPRStackSize (MF);
1385+ StackOffset PPRStackSize = AFL.getPPRStackSize (MF);
1386+ StackOffset SVEStackSize = ZPRStackSize + PPRStackSize;
13951387
13961388 // If there is a single SP update, insert it before the ret and we're done.
13971389 if (CombineSPBump) {
@@ -1412,111 +1404,120 @@ void AArch64EpilogueEmitter::emitEpilogue() {
14121404 NumBytes -= PrologueSaveSize;
14131405 assert (NumBytes >= 0 && " Negative stack allocation size!?" );
14141406
1415- // Process the SVE callee-saves to determine what space needs to be
1416- // deallocated.
1417- StackOffset DeallocateBefore = {}, DeallocateAfter = SVEStackSize;
1418- MachineBasicBlock::iterator RestoreBegin = FirstGPRRestoreI,
1419- RestoreEnd = FirstGPRRestoreI;
1420- int64_t ZPRCalleeSavedSize = AFI->getZPRCalleeSavedStackSize ();
1421- int64_t PPRCalleeSavedSize = AFI->getPPRCalleeSavedStackSize ();
1422- int64_t SVECalleeSavedSize = ZPRCalleeSavedSize + PPRCalleeSavedSize;
1423-
1424- if (SVECalleeSavedSize) {
1425- if (FPAfterSVECalleeSaves)
1426- RestoreEnd = MBB.getFirstTerminator ();
1427-
1428- RestoreBegin = std::prev (RestoreEnd);
1429- while (RestoreBegin != MBB.begin () &&
1430- isPartOfSVECalleeSaves (std::prev (RestoreBegin)))
1431- --RestoreBegin;
1432-
1433- assert (isPartOfSVECalleeSaves (RestoreBegin) &&
1434- isPartOfSVECalleeSaves (std::prev (RestoreEnd)) &&
1435- " Unexpected instruction" );
1436-
1437- StackOffset CalleeSavedSizeAsOffset =
1438- StackOffset::getScalable (SVECalleeSavedSize);
1439- DeallocateBefore = SVEStackSize - CalleeSavedSizeAsOffset;
1440- DeallocateAfter = CalleeSavedSizeAsOffset;
1441- }
1407+ if (!AFI->hasSplitSVEObjects ()) {
1408+ // Process the SVE callee-saves to determine what space needs to be
1409+ // deallocated.
1410+ StackOffset DeallocateBefore = {}, DeallocateAfter = SVEStackSize;
1411+ MachineBasicBlock::iterator RestoreBegin = FirstGPRRestoreI,
1412+ RestoreEnd = FirstGPRRestoreI;
1413+ int64_t ZPRCalleeSavedSize = AFI->getZPRCalleeSavedStackSize ();
1414+ int64_t PPRCalleeSavedSize = AFI->getPPRCalleeSavedStackSize ();
1415+ int64_t SVECalleeSavedSize = ZPRCalleeSavedSize + PPRCalleeSavedSize;
1416+
1417+ if (SVECalleeSavedSize) {
1418+ if (FPAfterSVECalleeSaves)
1419+ RestoreEnd = MBB.getFirstTerminator ();
1420+
1421+ RestoreBegin = std::prev (RestoreEnd);
1422+ while (RestoreBegin != MBB.begin () &&
1423+ isPartOfSVECalleeSaves (std::prev (RestoreBegin)))
1424+ --RestoreBegin;
1425+
1426+ assert (isPartOfSVECalleeSaves (RestoreBegin) &&
1427+ isPartOfSVECalleeSaves (std::prev (RestoreEnd)) &&
1428+ " Unexpected instruction" );
14421429
1443- // Deallocate the SVE area.
1444- if (FPAfterSVECalleeSaves) {
1445- // If the callee-save area is before FP, restoring the FP implicitly
1446- // deallocates non-callee-save SVE allocations. Otherwise, deallocate
1447- // them explicitly.
1448- if (!AFI->isStackRealigned () && !MFI.hasVarSizedObjects ()) {
1449- emitFrameOffset (MBB, FirstGPRRestoreI, DL, AArch64::SP, AArch64::SP,
1450- DeallocateBefore, TII, MachineInstr::FrameDestroy, false ,
1451- NeedsWinCFI, &HasWinCFI);
1430+ StackOffset CalleeSavedSizeAsOffset =
1431+ StackOffset::getScalable (SVECalleeSavedSize);
1432+ DeallocateBefore = SVEStackSize - CalleeSavedSizeAsOffset;
1433+ DeallocateAfter = CalleeSavedSizeAsOffset;
14521434 }
14531435
1454- // Deallocate callee-save non-SVE registers.
1455- emitFrameOffset (MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP,
1456- StackOffset::getFixed (AFI->getCalleeSavedStackSize ()), TII,
1457- MachineInstr::FrameDestroy, false , NeedsWinCFI, &HasWinCFI);
1458-
1459- // Deallocate fixed objects.
1460- emitFrameOffset (MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP,
1461- StackOffset::getFixed (FixedObject), TII,
1462- MachineInstr::FrameDestroy, false , NeedsWinCFI, &HasWinCFI);
1463-
1464- // Deallocate callee-save SVE registers.
1465- emitFrameOffset (MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP,
1466- DeallocateAfter, TII, MachineInstr::FrameDestroy, false ,
1467- NeedsWinCFI, &HasWinCFI);
1468- } else if (SVEStackSize) {
1469- int64_t SVECalleeSavedSize = AFI->getSVECalleeSavedStackSize ();
1470- // If we have stack realignment or variable-sized objects we must use the
1471- // FP to restore SVE callee saves (as there is an unknown amount of
1472- // data/padding between the SP and SVE CS area).
1473- Register BaseForSVEDealloc =
1474- (AFI->isStackRealigned () || MFI.hasVarSizedObjects ()) ? AArch64::FP
1475- : AArch64::SP;
1476- if (SVECalleeSavedSize && BaseForSVEDealloc == AArch64::FP) {
1477- Register CalleeSaveBase = AArch64::FP;
1478- if (int64_t CalleeSaveBaseOffset =
1479- AFI->getCalleeSaveBaseToFrameRecordOffset ()) {
1480- // If we have have an non-zero offset to the non-SVE CS base we need to
1481- // compute the base address by subtracting the offest in a temporary
1482- // register first (to avoid briefly deallocating the SVE CS).
1483- CalleeSaveBase =
1484- MF.getRegInfo ().createVirtualRegister (&AArch64::GPR64RegClass);
1485- emitFrameOffset (MBB, RestoreBegin, DL, CalleeSaveBase, AArch64::FP,
1486- StackOffset::getFixed (-CalleeSaveBaseOffset), TII,
1487- MachineInstr::FrameDestroy);
1488- }
1489- // The code below will deallocate the stack space space by moving the
1490- // SP to the start of the SVE callee-save area.
1491- emitFrameOffset (MBB, RestoreBegin, DL, AArch64::SP, CalleeSaveBase,
1492- StackOffset::getScalable (-SVECalleeSavedSize), TII,
1493- MachineInstr::FrameDestroy);
1494- } else if (BaseForSVEDealloc == AArch64::SP) {
1495- if (SVECalleeSavedSize) {
1496- // Deallocate the non-SVE locals first before we can deallocate (and
1497- // restore callee saves) from the SVE area.
1498- emitFrameOffset (
1499- MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP,
1500- StackOffset::getFixed (NumBytes), TII, MachineInstr::FrameDestroy,
1501- false , NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP,
1502- SVEStackSize + StackOffset::getFixed (NumBytes + PrologueSaveSize));
1503- NumBytes = 0 ;
1436+ // Deallocate the SVE area.
1437+ if (FPAfterSVECalleeSaves) {
1438+ // If the callee-save area is before FP, restoring the FP implicitly
1439+ // deallocates non-callee-save SVE allocations. Otherwise, deallocate
1440+ // them explicitly.
1441+ if (!AFI->isStackRealigned () && !MFI.hasVarSizedObjects ()) {
1442+ emitFrameOffset (MBB, FirstGPRRestoreI, DL, AArch64::SP, AArch64::SP,
1443+ DeallocateBefore, TII, MachineInstr::FrameDestroy,
1444+ false , NeedsWinCFI, &HasWinCFI);
15041445 }
15051446
1447+ // Deallocate callee-save non-SVE registers.
15061448 emitFrameOffset (MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP,
1507- DeallocateBefore, TII, MachineInstr::FrameDestroy, false ,
1508- NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP,
1509- SVEStackSize +
1510- StackOffset::getFixed (NumBytes + PrologueSaveSize));
1449+ StackOffset::getFixed (AFI->getCalleeSavedStackSize ()),
1450+ TII, MachineInstr::FrameDestroy, false , NeedsWinCFI,
1451+ &HasWinCFI);
1452+
1453+ // Deallocate fixed objects.
1454+ emitFrameOffset (MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP,
1455+ StackOffset::getFixed (FixedObject), TII,
1456+ MachineInstr::FrameDestroy, false , NeedsWinCFI,
1457+ &HasWinCFI);
15111458
1459+ // Deallocate callee-save SVE registers.
15121460 emitFrameOffset (MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP,
15131461 DeallocateAfter, TII, MachineInstr::FrameDestroy, false ,
1514- NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP,
1515- DeallocateAfter +
1516- StackOffset::getFixed (NumBytes + PrologueSaveSize));
1462+ NeedsWinCFI, &HasWinCFI);
1463+ } else if (SVEStackSize) {
1464+ int64_t SVECalleeSavedSize = AFI->getSVECalleeSavedStackSize ();
1465+ // If we have stack realignment or variable-sized objects we must use the
1466+ // FP to restore SVE callee saves (as there is an unknown amount of
1467+ // data/padding between the SP and SVE CS area).
1468+ Register BaseForSVEDealloc =
1469+ (AFI->isStackRealigned () || MFI.hasVarSizedObjects ()) ? AArch64::FP
1470+ : AArch64::SP;
1471+ if (SVECalleeSavedSize && BaseForSVEDealloc == AArch64::FP) {
1472+ Register CalleeSaveBase = AArch64::FP;
1473+ if (int64_t CalleeSaveBaseOffset =
1474+ AFI->getCalleeSaveBaseToFrameRecordOffset ()) {
1475+ // If we have have an non-zero offset to the non-SVE CS base we need
1476+ // to compute the base address by subtracting the offest in a
1477+ // temporary register first (to avoid briefly deallocating the SVE
1478+ // CS).
1479+ CalleeSaveBase = MBB.getParent ()->getRegInfo ().createVirtualRegister (
1480+ &AArch64::GPR64RegClass);
1481+ emitFrameOffset (MBB, RestoreBegin, DL, CalleeSaveBase, AArch64::FP,
1482+ StackOffset::getFixed (-CalleeSaveBaseOffset), TII,
1483+ MachineInstr::FrameDestroy);
1484+ }
1485+ // The code below will deallocate the stack space space by moving the
1486+ // SP to the start of the SVE callee-save area.
1487+ emitFrameOffset (MBB, RestoreBegin, DL, AArch64::SP, CalleeSaveBase,
1488+ StackOffset::getScalable (-SVECalleeSavedSize), TII,
1489+ MachineInstr::FrameDestroy);
1490+ } else if (BaseForSVEDealloc == AArch64::SP) {
1491+ if (SVECalleeSavedSize) {
1492+ // Deallocate the non-SVE locals first before we can deallocate (and
1493+ // restore callee saves) from the SVE area.
1494+ emitFrameOffset (MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP,
1495+ StackOffset::getFixed (NumBytes), TII,
1496+ MachineInstr::FrameDestroy, false , NeedsWinCFI,
1497+ &HasWinCFI, EmitCFI && !HasFP,
1498+ SVEStackSize + StackOffset::getFixed (
1499+ NumBytes + PrologueSaveSize));
1500+ NumBytes = 0 ;
1501+ }
1502+
1503+ emitFrameOffset (MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP,
1504+ DeallocateBefore, TII, MachineInstr::FrameDestroy,
1505+ false , NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP,
1506+ SVEStackSize +
1507+ StackOffset::getFixed (NumBytes + PrologueSaveSize));
1508+
1509+ emitFrameOffset (MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP,
1510+ DeallocateAfter, TII, MachineInstr::FrameDestroy, false ,
1511+ NeedsWinCFI, &HasWinCFI, EmitCFI && !HasFP,
1512+ DeallocateAfter +
1513+ StackOffset::getFixed (NumBytes + PrologueSaveSize));
1514+ }
1515+
1516+ if (EmitCFI)
1517+ emitCalleeSavedSVERestores (RestoreEnd);
15171518 }
1518- if (EmitCFI)
1519- emitCalleeSavedSVERestores (RestoreEnd );
1519+ } else if (AFI-> hasSplitSVEObjects () && SVEStackSize) {
1520+ reportFatalInternalError ( " not implemented yet " );
15201521 }
15211522
15221523 if (!HasFP) {
0 commit comments