@@ -3530,8 +3530,7 @@ void AArch64FrameLowering::determineStackHazardSlot(
35303530
35313531 // Stack hazards are only needed in streaming functions.
35323532 SMEAttrs Attrs (MF.getFunction ());
3533- if (!StackHazardInNonStreaming &&
3534- Attrs.hasNonStreamingInterfaceAndBody ())
3533+ if (!StackHazardInNonStreaming && Attrs.hasNonStreamingInterfaceAndBody ())
35353534 return ;
35363535
35373536 MachineFrameInfo &MFI = MF.getFrameInfo ();
@@ -4651,9 +4650,10 @@ struct FrameObject {
46514650 // ObjectFirst==true) should be placed first.
46524651 bool GroupFirst = false ;
46534652
4654- // Used to distinguish between FP and GPR accesses.
4655- // 1 = GPR, 2 = FPR, 8 = Hazard Object .
4653+ // Used to distinguish between FP and GPR accesses. The values are decided so
4654+ // that they sort FPR < Hazard < GPR and they can be or'd together .
46564655 unsigned Accesses = 0 ;
4656+ enum { AccessFPR = 1 , AccessHazard = 2 , AccessGPR = 4 };
46574657};
46584658
46594659class GroupBuilder {
@@ -4691,7 +4691,7 @@ bool FrameObjectCompare(const FrameObject &A, const FrameObject &B) {
46914691 //
46924692 // If we want to include a stack hazard region, order FPR accesses < the
46934693 // hazard object < GPRs accesses in order to create a separation between the
4694- // two. For the Accesses field 1 = GPR , 2 = FPR, 8 = Hazard Object .
4694+ // two. For the Accesses field 1 = FPR , 2 = Hazard Object, 4 = GPR .
46954695 //
46964696 // Otherwise the "first" object goes first (closest to SP), followed by the
46974697 // members of the "first" group.
@@ -4703,16 +4703,10 @@ bool FrameObjectCompare(const FrameObject &A, const FrameObject &B) {
47034703 //
47044704 // If all else equal, sort by the object index to keep the objects in the
47054705 // original order.
4706- if (A.IsValid != B.IsValid )
4707- return A.IsValid ;
4708- if (A.Accesses == 2 && B.Accesses != 2 )
4709- return true ;
4710- if (A.Accesses == 8 && B.Accesses != 2 )
4711- return true ;
4712- return std::make_tuple (A.ObjectFirst , A.GroupFirst , A.GroupIndex ,
4713- A.ObjectIndex ) <
4714- std::make_tuple (B.ObjectFirst , B.GroupFirst , B.GroupIndex ,
4715- B.ObjectIndex );
4706+ return std::make_tuple (!A.IsValid , A.Accesses , A.ObjectFirst , A.GroupFirst ,
4707+ A.GroupIndex , A.ObjectIndex ) <
4708+ std::make_tuple (!B.IsValid , B.Accesses , B.ObjectFirst , B.GroupFirst ,
4709+ B.GroupIndex , B.ObjectIndex );
47164710}
47174711} // namespace
47184712
@@ -4729,12 +4723,24 @@ void AArch64FrameLowering::orderFrameObjects(
47294723 FrameObjects[Obj].ObjectIndex = Obj;
47304724 }
47314725
4732- // Identify stack slots that are tagged at the same time.
4726+ // Identify FPR vs GPR slots for hazards, and stack slots that are tagged at
4727+ // the same time.
47334728 GroupBuilder GB (FrameObjects);
47344729 for (auto &MBB : MF) {
47354730 for (auto &MI : MBB) {
47364731 if (MI.isDebugInstr ())
47374732 continue ;
4733+
4734+ if (AFI.hasStackHazardSlotIndex ()) {
4735+ std::optional<int > FI = getLdStFrameID (MI, MFI);
4736+ if (FI && *FI >= 0 && *FI < (int )FrameObjects.size ()) {
4737+ if (MFI.getStackID (*FI) == 2 || AArch64InstrInfo::isFpOrNEON (MI))
4738+ FrameObjects[*FI].Accesses |= FrameObject::AccessFPR;
4739+ else
4740+ FrameObjects[*FI].Accesses |= FrameObject::AccessGPR;
4741+ }
4742+ }
4743+
47384744 int OpIndex;
47394745 switch (MI.getOpcode ()) {
47404746 case AArch64::STGloop:
@@ -4768,23 +4774,20 @@ void AArch64FrameLowering::orderFrameObjects(
47684774 GB.AddMember (TaggedFI);
47694775 else
47704776 GB.EndCurrentGroup ();
4771-
4772- if (AFI.hasStackHazardSlotIndex ()) {
4773- std::optional<int > FI = getLdStFrameID (MI, MFI);
4774- if (FI && *FI >= 0 && *FI < (int )FrameObjects.size ()) {
4775- if (MFI.getStackID (*FI) == 2 || AArch64InstrInfo::isFpOrNEON (MI))
4776- FrameObjects[*FI].Accesses |= 2 ;
4777- else
4778- FrameObjects[*FI].Accesses |= 1 ;
4779- }
4780- }
47814777 }
47824778 // Groups should never span multiple basic blocks.
47834779 GB.EndCurrentGroup ();
47844780 }
47854781
4786- if (AFI.hasStackHazardSlotIndex ())
4787- FrameObjects[AFI.getStackHazardSlotIndex ()].Accesses = 8 ;
4782+ if (AFI.hasStackHazardSlotIndex ()) {
4783+ FrameObjects[AFI.getStackHazardSlotIndex ()].Accesses =
4784+ FrameObject::AccessHazard;
4785+ // If a stack object is unknown or both GPR and FPR, sort it into GPR.
4786+ for (auto &Obj : FrameObjects)
4787+ if (!Obj.Accesses ||
4788+ Obj.Accesses == (FrameObject::AccessGPR | FrameObject::AccessFPR))
4789+ Obj.Accesses = FrameObject::AccessGPR;
4790+ }
47884791
47894792 // If the function's tagged base pointer is pinned to a stack slot, we want to
47904793 // put that slot first when possible. This will likely place it at SP + 0,
0 commit comments