240240#include " llvm/Support/CommandLine.h"
241241#include " llvm/Support/Debug.h"
242242#include " llvm/Support/ErrorHandling.h"
243+ #include " llvm/Support/FormatVariadic.h"
243244#include " llvm/Support/MathExtras.h"
244245#include " llvm/Support/raw_ostream.h"
245246#include " llvm/Target/TargetMachine.h"
@@ -275,6 +276,10 @@ cl::opt<bool> EnableHomogeneousPrologEpilog(
275276// Stack hazard padding size. 0 = disabled.
276277static cl::opt<unsigned > StackHazardSize (" aarch64-stack-hazard-size" ,
277278 cl::init (0 ), cl::Hidden);
279+ // Stack hazard size for analysis remarks. StackHazardSize takes precedence.
280+ static cl::opt<unsigned >
281+ StackHazardRemarkSize (" aarch64-stack-hazard-remark-size" , cl::init(0 ),
282+ cl::Hidden);
278283// Whether to insert padding into non-streaming functions (for testing).
279284static cl::opt<bool >
280285 StackHazardInNonStreaming (" aarch64-stack-hazard-in-non-streaming" ,
@@ -2615,9 +2620,16 @@ AArch64FrameLowering::getFrameIndexReferenceFromSP(const MachineFunction &MF,
26152620 const auto &MFI = MF.getFrameInfo ();
26162621
26172622 int64_t ObjectOffset = MFI.getObjectOffset (FI);
2623+ StackOffset SVEStackSize = getSVEStackSize (MF);
2624+
2625+ // For VLA-area objects, just emit an offset at the end of the stack frame.
2626+ // Whilst not quite correct, these objects do live at the end of the frame and
2627+ // so it is more useful for analysis for the offset to reflect this.
2628+ if (MFI.isVariableSizedObjectIndex (FI)) {
2629+ return StackOffset::getFixed (-((int64_t )MFI.getStackSize ())) - SVEStackSize;
2630+ }
26182631
26192632 // This is correct in the absence of any SVE stack objects.
2620- StackOffset SVEStackSize = getSVEStackSize (MF);
26212633 if (!SVEStackSize)
26222634 return StackOffset::getFixed (ObjectOffset - getOffsetOfLocalArea ());
26232635
@@ -3528,13 +3540,9 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters(
35283540 return true ;
35293541}
35303542
3531- // Return the FrameID for a Load/Store instruction by looking at the MMO.
3532- static std::optional<int > getLdStFrameID (const MachineInstr &MI,
3533- const MachineFrameInfo &MFI) {
3534- if (!MI.mayLoadOrStore () || MI.getNumMemOperands () < 1 )
3535- return std::nullopt ;
3536-
3537- MachineMemOperand *MMO = *MI.memoperands_begin ();
3543+ // Return the FrameID for a MMO.
3544+ static std::optional<int > getMMOFrameID (MachineMemOperand *MMO,
3545+ const MachineFrameInfo &MFI) {
35383546 auto *PSV =
35393547 dyn_cast_or_null<FixedStackPseudoSourceValue>(MMO->getPseudoValue ());
35403548 if (PSV)
@@ -3552,6 +3560,15 @@ static std::optional<int> getLdStFrameID(const MachineInstr &MI,
35523560 return std::nullopt ;
35533561}
35543562
3563+ // Return the FrameID for a Load/Store instruction by looking at the first MMO.
3564+ static std::optional<int > getLdStFrameID (const MachineInstr &MI,
3565+ const MachineFrameInfo &MFI) {
3566+ if (!MI.mayLoadOrStore () || MI.getNumMemOperands () < 1 )
3567+ return std::nullopt ;
3568+
3569+ return getMMOFrameID (*MI.memoperands_begin (), MFI);
3570+ }
3571+
35553572// Check if a Hazard slot is needed for the current function, and if so create
35563573// one for it. The index is stored in AArch64FunctionInfo->StackHazardSlotIndex,
35573574// which can be used to determine if any hazard padding is needed.
@@ -5029,3 +5046,174 @@ void AArch64FrameLowering::inlineStackProbe(MachineFunction &MF,
50295046 MI->eraseFromParent ();
50305047 }
50315048}
5049+
5050+ struct StackAccess {
5051+ enum AccessType {
5052+ NotAccessed = 0 , // Stack object not accessed by load/store instructions.
5053+ GPR = 1 << 0 , // A general purpose register.
5054+ PPR = 1 << 1 , // A predicate register.
5055+ FPR = 1 << 2 , // A floating point/Neon/SVE register.
5056+ };
5057+
5058+ int Idx;
5059+ StackOffset Offset;
5060+ int64_t Size;
5061+ unsigned AccessTypes;
5062+
5063+ StackAccess () : Idx(0 ), Offset(), Size(0 ), AccessTypes(NotAccessed) {}
5064+
5065+ bool operator <(const StackAccess &Rhs) const {
5066+ return std::make_tuple (start (), Idx) <
5067+ std::make_tuple (Rhs.start (), Rhs.Idx );
5068+ }
5069+
5070+ bool isCPU () const {
5071+ // Predicate register load and store instructions execute on the CPU.
5072+ return AccessTypes & (AccessType::GPR | AccessType::PPR);
5073+ }
5074+ bool isSME () const { return AccessTypes & AccessType::FPR; }
5075+ bool isMixed () const { return isCPU () && isSME (); }
5076+
5077+ int64_t start () const { return Offset.getFixed () + Offset.getScalable (); }
5078+ int64_t end () const { return start () + Size; }
5079+
5080+ std::string getTypeString () const {
5081+ switch (AccessTypes) {
5082+ case AccessType::FPR:
5083+ return " FPR" ;
5084+ case AccessType::PPR:
5085+ return " PPR" ;
5086+ case AccessType::GPR:
5087+ return " GPR" ;
5088+ case AccessType::NotAccessed:
5089+ return " NA" ;
5090+ default :
5091+ return " Mixed" ;
5092+ }
5093+ }
5094+
5095+ void print (raw_ostream &OS) const {
5096+ OS << getTypeString () << " stack object at [SP"
5097+ << (Offset.getFixed () < 0 ? " " : " +" ) << Offset.getFixed ();
5098+ if (Offset.getScalable ())
5099+ OS << (Offset.getScalable () < 0 ? " " : " +" ) << Offset.getScalable ()
5100+ << " * vscale" ;
5101+ OS << " ]" ;
5102+ }
5103+ };
5104+
5105+ static inline raw_ostream &operator <<(raw_ostream &OS, const StackAccess &SA) {
5106+ SA.print (OS);
5107+ return OS;
5108+ }
5109+
5110+ void AArch64FrameLowering::emitRemarks (
5111+ const MachineFunction &MF, MachineOptimizationRemarkEmitter *ORE) const {
5112+
5113+ SMEAttrs Attrs (MF.getFunction ());
5114+ if (Attrs.hasNonStreamingInterfaceAndBody ())
5115+ return ;
5116+
5117+ const uint64_t HazardSize =
5118+ (StackHazardSize) ? StackHazardSize : StackHazardRemarkSize;
5119+
5120+ if (HazardSize == 0 )
5121+ return ;
5122+
5123+ const MachineFrameInfo &MFI = MF.getFrameInfo ();
5124+ // Bail if function has no stack objects.
5125+ if (!MFI.hasStackObjects ())
5126+ return ;
5127+
5128+ std::vector<StackAccess> StackAccesses (MFI.getNumObjects ());
5129+
5130+ size_t NumFPLdSt = 0 ;
5131+ size_t NumNonFPLdSt = 0 ;
5132+
5133+ // Collect stack accesses via Load/Store instructions.
5134+ for (const MachineBasicBlock &MBB : MF) {
5135+ for (const MachineInstr &MI : MBB) {
5136+ if (!MI.mayLoadOrStore () || MI.getNumMemOperands () < 1 )
5137+ continue ;
5138+ for (MachineMemOperand *MMO : MI.memoperands ()) {
5139+ std::optional<int > FI = getMMOFrameID (MMO, MFI);
5140+ if (FI && !MFI.isDeadObjectIndex (*FI)) {
5141+ int FrameIdx = *FI;
5142+
5143+ size_t ArrIdx = FrameIdx + MFI.getNumFixedObjects ();
5144+ if (StackAccesses[ArrIdx].AccessTypes == StackAccess::NotAccessed) {
5145+ StackAccesses[ArrIdx].Idx = FrameIdx;
5146+ StackAccesses[ArrIdx].Offset =
5147+ getFrameIndexReferenceFromSP (MF, FrameIdx);
5148+ StackAccesses[ArrIdx].Size = MFI.getObjectSize (FrameIdx);
5149+ }
5150+
5151+ unsigned RegTy = StackAccess::AccessType::GPR;
5152+ if (MFI.getStackID (FrameIdx) == TargetStackID::ScalableVector) {
5153+ if (AArch64::PPRRegClass.contains (MI.getOperand (0 ).getReg ()))
5154+ RegTy = StackAccess::PPR;
5155+ else
5156+ RegTy = StackAccess::FPR;
5157+ } else if (AArch64InstrInfo::isFpOrNEON (MI)) {
5158+ RegTy = StackAccess::FPR;
5159+ }
5160+
5161+ StackAccesses[ArrIdx].AccessTypes |= RegTy;
5162+
5163+ if (RegTy == StackAccess::FPR)
5164+ ++NumFPLdSt;
5165+ else
5166+ ++NumNonFPLdSt;
5167+ }
5168+ }
5169+ }
5170+ }
5171+
5172+ if (NumFPLdSt == 0 || NumNonFPLdSt == 0 )
5173+ return ;
5174+
5175+ llvm::sort (StackAccesses);
5176+ StackAccesses.erase (llvm::remove_if (StackAccesses,
5177+ [](const StackAccess &S) {
5178+ return S.AccessTypes ==
5179+ StackAccess::NotAccessed;
5180+ }),
5181+ StackAccesses.end ());
5182+
5183+ SmallVector<const StackAccess *> MixedObjects;
5184+ SmallVector<std::pair<const StackAccess *, const StackAccess *>> HazardPairs;
5185+
5186+ if (StackAccesses.front ().isMixed ())
5187+ MixedObjects.push_back (&StackAccesses.front ());
5188+
5189+ for (auto It = StackAccesses.begin (), End = std::prev (StackAccesses.end ());
5190+ It != End; ++It) {
5191+ const auto &First = *It;
5192+ const auto &Second = *(It + 1 );
5193+
5194+ if (Second.isMixed ())
5195+ MixedObjects.push_back (&Second);
5196+
5197+ if ((First.isSME () && Second.isCPU ()) ||
5198+ (First.isCPU () && Second.isSME ())) {
5199+ uint64_t Distance = static_cast <uint64_t >(Second.start () - First.end ());
5200+ if (Distance < HazardSize)
5201+ HazardPairs.emplace_back (&First, &Second);
5202+ }
5203+ }
5204+
5205+ auto EmitRemark = [&](llvm::StringRef Str) {
5206+ ORE->emit ([&]() {
5207+ auto R = MachineOptimizationRemarkAnalysis (
5208+ " sme" , " StackHazard" , MF.getFunction ().getSubprogram (), &MF.front ());
5209+ return R << formatv (" stack hazard in '{0}': " , MF.getName ()).str () << Str;
5210+ });
5211+ };
5212+
5213+ for (const auto &P : HazardPairs)
5214+ EmitRemark (formatv (" {0} is too close to {1}" , *P.first , *P.second ).str ());
5215+
5216+ for (const auto *Obj : MixedObjects)
5217+ EmitRemark (
5218+ formatv (" {0} accessed by both GP and FP instructions" , *Obj).str ());
5219+ }
0 commit comments