@@ -330,13 +330,36 @@ static int64_t getArgumentStackToRestore(MachineFunction &MF,
330330
331331static bool produceCompactUnwindFrame (MachineFunction &MF);
332332static bool needsWinCFI (const MachineFunction &MF);
333- static StackOffset getZPRStackSize (const MachineFunction &MF);
334- static StackOffset getPPRStackSize (const MachineFunction &MF);
335- static StackOffset getSVEStackSize (const MachineFunction &MF);
336333static Register findScratchNonCalleeSaveRegister (MachineBasicBlock *MBB,
337334 bool HasCall = false );
338335static bool requiresSaveVG (const MachineFunction &MF);
339- static bool hasSVEStackSize (const MachineFunction &MF);
336+
337+ static unsigned getStackHazardSize (const MachineFunction &MF) {
338+ return MF.getSubtarget <AArch64Subtarget>().getStreamingHazardSize ();
339+ }
340+
341+ // / Returns the size of the entire ZPR stackframe (calleesaves + spills).
342+ static StackOffset getZPRStackSize (const MachineFunction &MF) {
343+ const AArch64FunctionInfo *AFI = MF.getInfo <AArch64FunctionInfo>();
344+ return StackOffset::getScalable (AFI->getStackSizeZPR ());
345+ }
346+
347+ // / Returns the size of the entire PPR stackframe (calleesaves + spills).
348+ static StackOffset getPPRStackSize (const MachineFunction &MF) {
349+ const AArch64FunctionInfo *AFI = MF.getInfo <AArch64FunctionInfo>();
350+ return StackOffset::getScalable (AFI->getStackSizePPR ());
351+ }
352+
353+ // / Returns the size of the entire SVE stackframe (PPRs + ZPRs).
354+ static StackOffset getSVEStackSize (const MachineFunction &MF) {
355+ return getZPRStackSize (MF) + getPPRStackSize (MF);
356+ }
357+
358+ // / Returns true if PPRs are spilled as ZPRs.
359+ static bool arePPRsSpilledAsZPR (const MachineFunction &MF) {
360+ return MF.getSubtarget ().getRegisterInfo ()->getSpillSize (
361+ AArch64::PPRRegClass) == 16 ;
362+ }
340363
341364// / Returns true if a homogeneous prolog or epilog code can be emitted
342365// / for the size optimization. If possible, a frame helper call is injected.
@@ -353,8 +376,10 @@ bool AArch64FrameLowering::homogeneousPrologEpilog(
353376 // TODO: Window is supported yet.
354377 if (needsWinCFI (MF))
355378 return false ;
379+
356380 // TODO: SVE is not supported yet.
357- if (hasSVEStackSize (MF))
381+ auto *AFI = MF.getInfo <AArch64FunctionInfo>();
382+ if (AFI->hasSVEStackSize ())
358383 return false ;
359384
360385 // Bail on stack adjustment needed on return for simplicity.
@@ -365,7 +390,6 @@ bool AArch64FrameLowering::homogeneousPrologEpilog(
365390 if (Exit && getArgumentStackToRestore (MF, *Exit))
366391 return false ;
367392
368- auto *AFI = MF.getInfo <AArch64FunctionInfo>();
369393 if (AFI->hasSwiftAsyncContext () || AFI->hasStreamingModeChanges ())
370394 return false ;
371395
@@ -454,38 +478,6 @@ static unsigned getFixedObjectSize(const MachineFunction &MF,
454478 }
455479}
456480
457- static unsigned getStackHazardSize (const MachineFunction &MF) {
458- return MF.getSubtarget <AArch64Subtarget>().getStreamingHazardSize ();
459- }
460-
461- // / Returns the size of the entire ZPR stackframe (calleesaves + spills).
462- static StackOffset getZPRStackSize (const MachineFunction &MF) {
463- const AArch64FunctionInfo *AFI = MF.getInfo <AArch64FunctionInfo>();
464- return StackOffset::getScalable (AFI->getStackSizeZPR ());
465- }
466-
467- // / Returns the size of the entire PPR stackframe (calleesaves + spills).
468- static StackOffset getPPRStackSize (const MachineFunction &MF) {
469- const AArch64FunctionInfo *AFI = MF.getInfo <AArch64FunctionInfo>();
470- return StackOffset::getScalable (AFI->getStackSizePPR ());
471- }
472-
473- // / Returns the size of the entire SVE stackframe (PPRs + ZPRs).
474- static StackOffset getSVEStackSize (const MachineFunction &MF) {
475- return getZPRStackSize (MF) + getPPRStackSize (MF);
476- }
477-
478- static bool hasSVEStackSize (const MachineFunction &MF) {
479- const AArch64FunctionInfo *AFI = MF.getInfo <AArch64FunctionInfo>();
480- return AFI->getStackSizeZPR () > 0 || AFI->getStackSizePPR () > 0 ;
481- }
482-
483- // / Returns true if PPRs are spilled as ZPRs.
484- static bool arePPRsSpilledAsZPR (const MachineFunction &MF) {
485- return MF.getSubtarget ().getRegisterInfo ()->getSpillSize (
486- AArch64::PPRRegClass) == 16 ;
487- }
488-
489481bool AArch64FrameLowering::canUseRedZone (const MachineFunction &MF) const {
490482 if (!EnableRedZone)
491483 return false ;
@@ -511,7 +503,7 @@ bool AArch64FrameLowering::canUseRedZone(const MachineFunction &MF) const {
511503 !Subtarget.hasSVE ();
512504
513505 return !(MFI.hasCalls () || hasFP (MF) || NumBytes > RedZoneSize ||
514- hasSVEStackSize (MF ) || LowerQRegCopyThroughMem);
506+ AFI-> hasSVEStackSize () || LowerQRegCopyThroughMem);
515507}
516508
517509// / hasFPImpl - Return true if the specified function should have a dedicated
@@ -1190,7 +1182,7 @@ bool AArch64FrameLowering::shouldCombineCSRLocalStackBump(
11901182
11911183 // When there is an SVE area on the stack, always allocate the
11921184 // callee-saves and spills/locals separately.
1193- if (hasSVEStackSize (MF ))
1185+ if (AFI-> hasSVEStackSize ())
11941186 return false ;
11951187
11961188 return true ;
@@ -1634,8 +1626,8 @@ static bool isTargetWindows(const MachineFunction &MF) {
16341626 return MF.getSubtarget <AArch64Subtarget>().isTargetWindows ();
16351627}
16361628
1637- // Convenience function to determine whether I is an SVE callee save .
1638- static bool IsZPRCalleeSave (MachineBasicBlock::iterator I) {
1629+ // Convenience function to determine whether I is part of the ZPR callee saves .
1630+ static bool isPartOfZPRCalleeSaves (MachineBasicBlock::iterator I) {
16391631 switch (I->getOpcode ()) {
16401632 default :
16411633 return false ;
@@ -1655,8 +1647,8 @@ static bool IsZPRCalleeSave(MachineBasicBlock::iterator I) {
16551647 }
16561648}
16571649
1658- // Convenience function to determine whether I is an SVE predicate callee save .
1659- static bool IsPPRCalleeSave (MachineBasicBlock::iterator I) {
1650+ // Convenience function to determine whether I is part of the PPR callee saves .
1651+ static bool isPartOfPPRCalleeSaves (MachineBasicBlock::iterator I) {
16601652 switch (I->getOpcode ()) {
16611653 default :
16621654 return false ;
@@ -1667,8 +1659,9 @@ static bool IsPPRCalleeSave(MachineBasicBlock::iterator I) {
16671659 }
16681660}
16691661
1670- static bool IsSVECalleeSave (MachineBasicBlock::iterator I) {
1671- return IsZPRCalleeSave (I) || IsPPRCalleeSave (I);
1662+ // Convenience function to determine whether I is part of the SVE callee saves.
1663+ static bool isPartOfSVECalleeSaves (MachineBasicBlock::iterator I) {
1664+ return isPartOfZPRCalleeSaves (I) || isPartOfPPRCalleeSaves (I);
16721665}
16731666
16741667static void emitShadowCallStackPrologue (const TargetInstrInfo &TII,
@@ -1912,7 +1905,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
19121905 IsFunclet ? getWinEHFuncletFrameSize (MF) : MFI.getStackSize ();
19131906 if (!AFI->hasStackFrame () && !windowsRequiresStackProbe (MF, NumBytes)) {
19141907 assert (!HasFP && " unexpected function without stack frame but with FP" );
1915- assert (!hasSVEStackSize (MF ) &&
1908+ assert (!AFI-> hasSVEStackSize () &&
19161909 " unexpected function without stack frame but with SVE objects" );
19171910 // All of the stack allocation is for locals.
19181911 AFI->setLocalStackSize (NumBytes);
@@ -1986,14 +1979,14 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
19861979 NumBytes -= FixedObject;
19871980
19881981 // Now allocate space for the GPR callee saves.
1989- while (MBBI != End && IsSVECalleeSave (MBBI))
1982+ while (MBBI != End && isPartOfSVECalleeSaves (MBBI))
19901983 ++MBBI;
19911984 MBBI = convertCalleeSaveRestoreToSPPrePostIncDec (
19921985 MBB, MBBI, DL, TII, -AFI->getCalleeSavedStackSize (), NeedsWinCFI,
19931986 &HasWinCFI, EmitAsyncCFI);
19941987 NumBytes -= AFI->getCalleeSavedStackSize ();
19951988 } else if (CombineSPBump) {
1996- assert (!hasSVEStackSize (MF ) && " Cannot combine SP bump with SVE" );
1989+ assert (!AFI-> hasSVEStackSize () && " Cannot combine SP bump with SVE" );
19971990 emitFrameOffset (MBB, MBBI, DL, AArch64::SP, AArch64::SP,
19981991 StackOffset::getFixed (-NumBytes), TII,
19991992 MachineInstr::FrameSetup, false , NeedsWinCFI, &HasWinCFI,
@@ -2014,7 +2007,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
20142007 // and pre-inc if we decided to combine the callee-save and local stack
20152008 // pointer bump above.
20162009 while (MBBI != End && MBBI->getFlag (MachineInstr::FrameSetup) &&
2017- !IsSVECalleeSave (MBBI)) {
2010+ !isPartOfSVECalleeSaves (MBBI)) {
20182011 if (CombineSPBump &&
20192012 // Only fix-up frame-setup load/store instructions.
20202013 (!requiresSaveVG (MF) || !isVGInstruction (MBBI)))
@@ -2278,8 +2271,9 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
22782271 << PPRCalleeSavesSize.getScalable () << " \n " );
22792272
22802273 PPRCalleeSavesBegin = MBBI;
2281- assert (IsPPRCalleeSave (PPRCalleeSavesBegin) && " Unexpected instruction" );
2282- while (IsPPRCalleeSave (MBBI) && MBBI != MBB.getFirstTerminator ())
2274+ assert (isPartOfPPRCalleeSaves (PPRCalleeSavesBegin) &&
2275+ " Unexpected instruction" );
2276+ while (isPartOfPPRCalleeSaves (MBBI) && MBBI != MBB.getFirstTerminator ())
22832277 ++MBBI;
22842278 PPRCalleeSavesEnd = MBBI;
22852279 }
@@ -2288,8 +2282,9 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
22882282 LLVM_DEBUG (dbgs () << " ZPRCalleeSavedStackSize = "
22892283 << ZPRCalleeSavesSize.getScalable () << " \n " );
22902284 ZPRCalleeSavesBegin = MBBI;
2291- assert (IsZPRCalleeSave (ZPRCalleeSavesBegin) && " Unexpected instruction" );
2292- while (IsZPRCalleeSave (MBBI) && MBBI != MBB.getFirstTerminator ())
2285+ assert (isPartOfZPRCalleeSaves (ZPRCalleeSavesBegin) &&
2286+ " Unexpected instruction" );
2287+ while (isPartOfZPRCalleeSaves (MBBI) && MBBI != MBB.getFirstTerminator ())
22932288 ++MBBI;
22942289 ZPRCalleeSavesEnd = MBBI;
22952290 }
@@ -2523,7 +2518,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
25232518 while (LastPopI != Begin) {
25242519 --LastPopI;
25252520 if (!LastPopI->getFlag (MachineInstr::FrameDestroy) ||
2526- (!FPAfterSVECalleeSaves && IsSVECalleeSave (LastPopI))) {
2521+ (!FPAfterSVECalleeSaves && isPartOfSVECalleeSaves (LastPopI))) {
25272522 ++LastPopI;
25282523 break ;
25292524 } else if (CombineSPBump)
@@ -2608,11 +2603,12 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
26082603
26092604 RestoreBegin = std::prev (RestoreEnd);
26102605 while (RestoreBegin != MBB.begin () &&
2611- IsSVECalleeSave (std::prev (RestoreBegin)))
2606+ isPartOfSVECalleeSaves (std::prev (RestoreBegin)))
26122607 --RestoreBegin;
26132608
2614- assert (IsSVECalleeSave (RestoreBegin) &&
2615- IsSVECalleeSave (std::prev (RestoreEnd)) && " Unexpected instruction" );
2609+ assert (isPartOfSVECalleeSaves (RestoreBegin) &&
2610+ isPartOfSVECalleeSaves (std::prev (RestoreEnd)) &&
2611+ " Unexpected instruction" );
26162612
26172613 StackOffset CalleeSavedSizeAsOffset =
26182614 StackOffset::getScalable (SVECalleeSavedSize);
@@ -4315,14 +4311,14 @@ determineSVEStackObjectOffsets(MachineFunction &MF, bool AssignOffsets,
43154311 bool SplitSVEObjects = false ) {
43164312 MachineFrameInfo &MFI = MF.getFrameInfo ();
43174313
4318- int64_t ZPRStack = 0 ;
4319- int64_t PPRStack = 0 ;
4314+ SVEStackSizes SVEStack{};
43204315
4321- auto [ZPROffset, PPROffset] = [&] {
4322- if (SplitSVEObjects)
4323- return std::tie (ZPRStack, PPRStack);
4324- return std::tie (ZPRStack, ZPRStack);
4325- }();
4316+ // With SplitSVEObjects we maintain separate stack offsets for predicates
4317+ // (PPRs) and SVE vectors (ZPRs). When SplitSVEObjects is disabled predicates
4318+ // are included in the SVE vector area.
4319+ int64_t &ZPROffset = SVEStack.ZPRStackSize ;
4320+ int64_t &PPROffset =
4321+ SplitSVEObjects ? SVEStack.PPRStackSize : SVEStack.ZPRStackSize ;
43264322
43274323#ifndef NDEBUG
43284324 // First process all fixed stack objects.
@@ -4404,14 +4400,7 @@ determineSVEStackObjectOffsets(MachineFunction &MF, bool AssignOffsets,
44044400
44054401 PPROffset = alignTo (PPROffset, Align (16U ));
44064402 ZPROffset = alignTo (ZPROffset, Align (16U ));
4407-
4408- if (&ZPROffset != &PPROffset) {
4409- // SplitSVEObjects (PPRs and ZPRs allocated to separate areas).
4410- return SVEStackSizes{ZPROffset, PPROffset};
4411- }
4412- // When SplitSVEObjects is disabled just attribute all the stack to ZPRs.
4413- // Determining the split is not necessary.
4414- return SVEStackSizes{ZPROffset, 0 };
4403+ return SVEStack;
44154404}
44164405
44174406SVEStackSizes
@@ -4736,8 +4725,7 @@ void AArch64FrameLowering::processFunctionBeforeFrameFinalized(
47364725 " Upwards growing stack unsupported" );
47374726
47384727 auto [ZPRStackSize, PPRStackSize] = assignSVEStackObjectOffsets (MF);
4739- AFI->setStackSizeZPR (ZPRStackSize);
4740- AFI->setStackSizePPR (PPRStackSize);
4728+ AFI->setStackSizeSVE (ZPRStackSize, PPRStackSize);
47414729
47424730 // If this function isn't doing Win64-style C++ EH, we don't need to do
47434731 // anything.
@@ -5269,7 +5257,8 @@ StackOffset AArch64FrameLowering::getFrameIndexReferencePreferSP(
52695257 }
52705258
52715259 // Go to common code if we cannot provide sp + offset.
5272- if (MFI.hasVarSizedObjects () || hasSVEStackSize (MF) ||
5260+ if (MFI.hasVarSizedObjects () ||
5261+ MF.getInfo <AArch64FunctionInfo>()->hasSVEStackSize () ||
52735262 MF.getSubtarget ().getRegisterInfo ()->hasStackRealignment (MF))
52745263 return getFrameIndexReference (MF, FI, FrameReg);
52755264
0 commit comments