From 211ec57533dc455135faa786322a3bc8561e7036 Mon Sep 17 00:00:00 2001 From: Amirhossein Pashaeehir Date: Tue, 6 May 2025 17:45:34 +0000 Subject: [PATCH 01/22] Forward CFI directives alongside their associated instruction to a simple CFI analysis --- llvm/tools/llvm-mc/CFIAnalysis.h | 88 ++++++++++++ llvm/tools/llvm-mc/CFIAnalysisMCStreamer.h | 151 +++++++++++++++++++++ 2 files changed, 239 insertions(+) create mode 100644 llvm/tools/llvm-mc/CFIAnalysis.h create mode 100644 llvm/tools/llvm-mc/CFIAnalysisMCStreamer.h diff --git a/llvm/tools/llvm-mc/CFIAnalysis.h b/llvm/tools/llvm-mc/CFIAnalysis.h new file mode 100644 index 0000000000000..eeb8741fb012b --- /dev/null +++ b/llvm/tools/llvm-mc/CFIAnalysis.h @@ -0,0 +1,88 @@ +#ifndef LLVM_TOOLS_LLVM_MC_CFI_ANALYSIS_H +#define LLVM_TOOLS_LLVM_MC_CFI_ANALYSIS_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCDwarf.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/Support/Debug.h" + +namespace llvm { + +// TODO remove it, it's just for debug purposes. +void printUntilNextLine(const char *Str) { + for (int I = 0; Str[I] != '\0' && Str[I] != '\n'; I++) + dbgs() << Str[I]; +} + +class CFIAnalysis { + MCContext &Context; + MCInstrInfo const &MCII; + +public: + CFIAnalysis(MCContext &Context, MCInstrInfo const &MCII) + : Context(Context), MCII(MCII) {} + + void update(const MCDwarfFrameInfo &DwarfFrame, const MCInst &Inst, + ArrayRef CFIDirectives) { + dbgs() << "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n"; + printUntilNextLine(Inst.getLoc().getPointer()); + dbgs() << "\n"; + dbgs() << "codes: "; + Inst.print(dbgs()); + dbgs() << "\n"; + dbgs() << "------------------------------\n"; + auto *RI = Context.getRegisterInfo(); + auto CFAReg = RI->getLLVMRegNum(DwarfFrame.CurrentCfaRegister, false); + dbgs() << "The CFA register is: " << CFAReg << "\n"; + bool GoingToChangeCFA = false; + for (auto CFIDirective : CFIDirectives) { + auto Op = CFIDirective.getOperation(); + GoingToChangeCFA |= (Op == MCCFIInstruction::OpDefCfa || + Op == MCCFIInstruction::OpDefCfaOffset || + Op == MCCFIInstruction::OpDefCfaRegister || + Op == MCCFIInstruction::OpAdjustCfaOffset || + Op == MCCFIInstruction::OpLLVMDefAspaceCfa); + } + dbgs() << "------------------------------\n"; + bool ChangedCFA = false; + for (int I = 0; I < Inst.getNumOperands(); I++) { + auto &&Operand = Inst.getOperand(I); + if (!Operand.isReg()) + continue; + if (MCII.get(Inst.getOpcode()) + .hasDefOfPhysReg(Inst, Operand.getReg(), *RI)) { + dbgs() << "This instruction modifies: " << Operand.getReg().id() + << "\n"; + if (Operand.getReg() == CFAReg.value()) + ChangedCFA = true; + } + } + dbgs() << "------------------------------\n"; + dbgs() << "The instruction DOES " << (ChangedCFA ? "" : "NOT ") + << "change the CFA.\n"; + dbgs() << "The CFI directives DOES " << (GoingToChangeCFA ? "" : "NOT ") + << "change the CFA.\n"; + if (ChangedCFA && !GoingToChangeCFA) { + Context.reportError(Inst.getLoc(), + "The instruction changes CFA register value, but the " + "CFI directives don't update the CFA."); + } + // TODO needs more work + // if (!ChangedCFA && GoingToChangeCFA) { + // Context.reportError( + // Inst.getLoc(), + // "The instruction doesn't change CFA register value, but the " + // "CFI directives update the CFA."); + // } + dbgs() << "vvvvvvvvvvvvvvvvvvvvvvvvvvvvvv\n"; + ChangedCFA = false; + } +}; + +} // namespace llvm +#endif \ No newline at end of file diff --git a/llvm/tools/llvm-mc/CFIAnalysisMCStreamer.h b/llvm/tools/llvm-mc/CFIAnalysisMCStreamer.h new file mode 100644 index 0000000000000..9c7747520a033 --- /dev/null +++ b/llvm/tools/llvm-mc/CFIAnalysisMCStreamer.h @@ -0,0 +1,151 @@ +#ifndef LLVM_TOOLS_LLVM_MC_CFI_ANALYSIS_MC_STREAMER_H +#define LLVM_TOOLS_LLVM_MC_CFI_ANALYSIS_MC_STREAMER_H + +#include "CFIAnalysis.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCDwarf.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/Support/Debug.h" +#include +#include +#include + +namespace llvm { + +class CFIAnalysisMCStreamer : public MCStreamer { + struct CFIDirectivesState { + int FrameIndex; // TODO remove it, no need for it + int DirectiveIndex; + + CFIDirectivesState() : FrameIndex(-1), DirectiveIndex(0) {} + + CFIDirectivesState(int FrameIndex, int InstructionIndex) + : FrameIndex(FrameIndex), DirectiveIndex(InstructionIndex) {} + } LastCFIDirectivesState; + std::vector FrameIndices; + + struct ICFI { + MCInst Instruction; + ArrayRef CFIDirectives; + + ICFI(MCInst Instruction, ArrayRef CFIDirectives) + : Instruction(Instruction), CFIDirectives(CFIDirectives) {} + }; + + std::optional LastInstruction; + std::optional LastDwarfFrameInfo; + CFIAnalysis CFIA; + + std::optional getLastICFI() { + if (!LastInstruction) + return std::nullopt; + + auto DwarfFrameInfos = getDwarfFrameInfos(); + int FrameIndex = FrameIndices.back(); + auto CurrentCFIDirectiveState = + hasUnfinishedDwarfFrameInfo() + ? CFIDirectivesState( + FrameIndex, DwarfFrameInfos[FrameIndex].Instructions.size()) + : CFIDirectivesState(); + assert(CurrentCFIDirectiveState.FrameIndex == + LastCFIDirectivesState.FrameIndex); + assert(CurrentCFIDirectiveState.DirectiveIndex >= + LastCFIDirectivesState.DirectiveIndex); + + std::vector CFIDirectives; + if (LastCFIDirectivesState.FrameIndex >= 0) { + auto CFIInstructions = DwarfFrameInfos[FrameIndex].Instructions; + CFIDirectives = std::vector( + CFIInstructions.begin() + LastCFIDirectivesState.DirectiveIndex, + CFIInstructions.begin() + CurrentCFIDirectiveState.DirectiveIndex); + } + + LastCFIDirectivesState = CurrentCFIDirectiveState; + return ICFI(LastInstruction.value(), CFIDirectives); + } + + void feedCFIA() { + if (!LastDwarfFrameInfo) { + // TODO Maybe this corner case causes bugs, when the programmer did a + // mistake in the startproc, endprocs and also made a mistake in not + // adding cfi directives for a instruction. Then this would cause to + // ignore the instruction. + auto LastICFI = getLastICFI(); + assert(!LastICFI || LastICFI->CFIDirectives.empty()); + return; + } + + if (auto ICFI = getLastICFI()) + CFIA.update(LastDwarfFrameInfo.value(), ICFI->Instruction, + ICFI->CFIDirectives); + } + +public: + CFIAnalysisMCStreamer(MCContext &Context, const MCInstrInfo &MCII) + : MCStreamer(Context), LastCFIDirectivesState(), + LastInstruction(std::nullopt), CFIA(Context, MCII) { + FrameIndices.push_back(-1); + } + + /// @name MCStreamer Interface + /// @{ + + bool hasRawTextSupport() const override { return true; } + void emitRawTextImpl(StringRef String) override {} + + bool emitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute) override { + return true; + } + + void emitCommonSymbol(MCSymbol *Symbol, uint64_t Size, + Align ByteAlignment) override {} + void beginCOFFSymbolDef(const MCSymbol *Symbol) override {} + void emitCOFFSymbolStorageClass(int StorageClass) override {} + void emitCOFFSymbolType(int Type) override {} + void endCOFFSymbolDef() override {} + void emitXCOFFSymbolLinkageWithVisibility(MCSymbol *Symbol, + MCSymbolAttr Linkage, + MCSymbolAttr Visibility) override {} + + void emitInstruction(const MCInst &Inst, + const MCSubtargetInfo &STI) override { + feedCFIA(); + LastInstruction = Inst; + if (hasUnfinishedDwarfFrameInfo()) + LastDwarfFrameInfo = getDwarfFrameInfos()[FrameIndices.back()]; + else + LastDwarfFrameInfo = std::nullopt; + } + + void emitCFIStartProcImpl(MCDwarfFrameInfo &Frame) override { + feedCFIA(); + FrameIndices.push_back(getNumFrameInfos()); + LastInstruction = std::nullopt; + LastDwarfFrameInfo = std::nullopt; + LastCFIDirectivesState.FrameIndex = FrameIndices.back(); + LastCFIDirectivesState.DirectiveIndex = 0; + MCStreamer::emitCFIStartProcImpl(Frame); + } + + void emitCFIEndProcImpl(MCDwarfFrameInfo &CurFrame) override { + feedCFIA(); + // TODO this will break if the input frame are malformed. + FrameIndices.pop_back(); + LastInstruction = std::nullopt; + LastDwarfFrameInfo = std::nullopt; + LastCFIDirectivesState.FrameIndex = FrameIndices.back(); + LastCFIDirectivesState.DirectiveIndex = + LastCFIDirectivesState.FrameIndex >= 0 + ? getDwarfFrameInfos()[LastCFIDirectivesState.FrameIndex] + .Instructions.size() + : 0; + MCStreamer::emitCFIEndProcImpl(CurFrame); + } +}; + +} // namespace llvm +#endif \ No newline at end of file From 29325f7e421075281bff3ed07c6bbc20988ea9cd Mon Sep 17 00:00:00 2001 From: Amirhossein Pashaeehir Date: Tue, 6 May 2025 18:00:50 +0000 Subject: [PATCH 02/22] Create a new CFI analysis for each dwarf separately --- llvm/tools/llvm-mc/CFIAnalysis.h | 2 +- llvm/tools/llvm-mc/CFIAnalysisMCStreamer.h | 21 ++++++++++++--------- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/llvm/tools/llvm-mc/CFIAnalysis.h b/llvm/tools/llvm-mc/CFIAnalysis.h index eeb8741fb012b..6abbdfba6d28f 100644 --- a/llvm/tools/llvm-mc/CFIAnalysis.h +++ b/llvm/tools/llvm-mc/CFIAnalysis.h @@ -50,7 +50,7 @@ class CFIAnalysis { } dbgs() << "------------------------------\n"; bool ChangedCFA = false; - for (int I = 0; I < Inst.getNumOperands(); I++) { + for (unsigned I = 0; I < Inst.getNumOperands(); I++) { auto &&Operand = Inst.getOperand(I); if (!Operand.isReg()) continue; diff --git a/llvm/tools/llvm-mc/CFIAnalysisMCStreamer.h b/llvm/tools/llvm-mc/CFIAnalysisMCStreamer.h index 9c7747520a033..76ae536ab4bd3 100644 --- a/llvm/tools/llvm-mc/CFIAnalysisMCStreamer.h +++ b/llvm/tools/llvm-mc/CFIAnalysisMCStreamer.h @@ -7,16 +7,15 @@ #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDwarf.h" #include "llvm/MC/MCInstrInfo.h" -#include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCStreamer.h" -#include "llvm/Support/Debug.h" #include #include -#include namespace llvm { class CFIAnalysisMCStreamer : public MCStreamer { + MCInstrInfo const &MCII; + struct CFIDirectivesState { int FrameIndex; // TODO remove it, no need for it int DirectiveIndex; @@ -27,6 +26,7 @@ class CFIAnalysisMCStreamer : public MCStreamer { : FrameIndex(FrameIndex), DirectiveIndex(InstructionIndex) {} } LastCFIDirectivesState; std::vector FrameIndices; + std::vector CFIAs; struct ICFI { MCInst Instruction; @@ -38,7 +38,6 @@ class CFIAnalysisMCStreamer : public MCStreamer { std::optional LastInstruction; std::optional LastDwarfFrameInfo; - CFIAnalysis CFIA; std::optional getLastICFI() { if (!LastInstruction) @@ -79,15 +78,17 @@ class CFIAnalysisMCStreamer : public MCStreamer { return; } - if (auto ICFI = getLastICFI()) - CFIA.update(LastDwarfFrameInfo.value(), ICFI->Instruction, - ICFI->CFIDirectives); + if (auto ICFI = getLastICFI()) { + assert(!CFIAs.empty()); + CFIAs.back().update(LastDwarfFrameInfo.value(), ICFI->Instruction, + ICFI->CFIDirectives); + } } public: CFIAnalysisMCStreamer(MCContext &Context, const MCInstrInfo &MCII) - : MCStreamer(Context), LastCFIDirectivesState(), - LastInstruction(std::nullopt), CFIA(Context, MCII) { + : MCStreamer(Context), MCII(MCII), LastCFIDirectivesState(), + LastInstruction(std::nullopt) { FrameIndices.push_back(-1); } @@ -124,6 +125,7 @@ class CFIAnalysisMCStreamer : public MCStreamer { void emitCFIStartProcImpl(MCDwarfFrameInfo &Frame) override { feedCFIA(); FrameIndices.push_back(getNumFrameInfos()); + CFIAs.emplace_back(getContext(), MCII); LastInstruction = std::nullopt; LastDwarfFrameInfo = std::nullopt; LastCFIDirectivesState.FrameIndex = FrameIndices.back(); @@ -135,6 +137,7 @@ class CFIAnalysisMCStreamer : public MCStreamer { feedCFIA(); // TODO this will break if the input frame are malformed. FrameIndices.pop_back(); + CFIAs.pop_back(); LastInstruction = std::nullopt; LastDwarfFrameInfo = std::nullopt; LastCFIDirectivesState.FrameIndex = FrameIndices.back(); From 946b5c6e386108f33de69f5e9535603b2e7c58c2 Mon Sep 17 00:00:00 2001 From: Amirhossein Pashaeehir Date: Tue, 6 May 2025 23:42:31 +0000 Subject: [PATCH 03/22] Separate the logic and display --- llvm/tools/llvm-mc/CFIAnalysis.h | 93 +++++++++++++++++++------------- 1 file changed, 57 insertions(+), 36 deletions(-) diff --git a/llvm/tools/llvm-mc/CFIAnalysis.h b/llvm/tools/llvm-mc/CFIAnalysis.h index 6abbdfba6d28f..f527043d3dbbb 100644 --- a/llvm/tools/llvm-mc/CFIAnalysis.h +++ b/llvm/tools/llvm-mc/CFIAnalysis.h @@ -2,14 +2,18 @@ #define LLVM_TOOLS_LLVM_MC_CFI_ANALYSIS_H #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDwarf.h" +#include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCStreamer.h" #include "llvm/Support/Debug.h" +#include +#include namespace llvm { @@ -25,20 +29,47 @@ class CFIAnalysis { public: CFIAnalysis(MCContext &Context, MCInstrInfo const &MCII) - : Context(Context), MCII(MCII) {} + : Context(Context), MCII(MCII) { + // TODO it should look at the poluge directives and setup the + // registers' previous value state here, but for now, it's assumed that all + // values are by default `samevalue`. + } void update(const MCDwarfFrameInfo &DwarfFrame, const MCInst &Inst, ArrayRef CFIDirectives) { - dbgs() << "^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n"; - printUntilNextLine(Inst.getLoc().getPointer()); - dbgs() << "\n"; - dbgs() << "codes: "; - Inst.print(dbgs()); - dbgs() << "\n"; - dbgs() << "------------------------------\n"; + + auto MCInstInfo = MCII.get(Inst.getOpcode()); auto *RI = Context.getRegisterInfo(); auto CFAReg = RI->getLLVMRegNum(DwarfFrame.CurrentCfaRegister, false); - dbgs() << "The CFA register is: " << CFAReg << "\n"; + + std::set Writes, Reads; // TODO reads is not ready for now + // FIXME this way of extracting uses is buggy: + // for (unsigned I = 0; I < MCInstInfo.NumImplicitUses; I++) + // Reads.insert(MCInstInfo.implicit_uses()[I]); + // for (unsigned I = 0; I < MCInstInfo.NumImplicitDefs; I++) + // Writes.insert(MCInstInfo.implicit_defs()[I]); + + for (unsigned I = 0; I < Inst.getNumOperands(); I++) { + auto &&Operand = Inst.getOperand(I); + if (Operand.isReg()) { + // TODO it is not percise, maybe this operand is for output, then it + // means that there is no read happening here. + Reads.insert(Operand.getReg()); + } + + if (Operand.isExpr()) { + // TODO maybe the argument is not a register, but is a expression like + // `34(sp)` that has a register in it. Check if this works or not and if + // no change it somehow that it count that register as reads or writes + // too. + } + + if (Operand.isReg() && + MCInstInfo.hasDefOfPhysReg(Inst, Operand.getReg(), *RI)) + Writes.insert(Operand.getReg().id()); + } + + bool ChangedCFA = Writes.count(CFAReg->id()); bool GoingToChangeCFA = false; for (auto CFIDirective : CFIDirectives) { auto Op = CFIDirective.getOperation(); @@ -48,38 +79,28 @@ class CFIAnalysis { Op == MCCFIInstruction::OpAdjustCfaOffset || Op == MCCFIInstruction::OpLLVMDefAspaceCfa); } - dbgs() << "------------------------------\n"; - bool ChangedCFA = false; - for (unsigned I = 0; I < Inst.getNumOperands(); I++) { - auto &&Operand = Inst.getOperand(I); - if (!Operand.isReg()) - continue; - if (MCII.get(Inst.getOpcode()) - .hasDefOfPhysReg(Inst, Operand.getReg(), *RI)) { - dbgs() << "This instruction modifies: " << Operand.getReg().id() - << "\n"; - if (Operand.getReg() == CFAReg.value()) - ChangedCFA = true; - } - } - dbgs() << "------------------------------\n"; - dbgs() << "The instruction DOES " << (ChangedCFA ? "" : "NOT ") - << "change the CFA.\n"; - dbgs() << "The CFI directives DOES " << (GoingToChangeCFA ? "" : "NOT ") - << "change the CFA.\n"; if (ChangedCFA && !GoingToChangeCFA) { Context.reportError(Inst.getLoc(), "The instruction changes CFA register value, but the " "CFI directives don't update the CFA."); } - // TODO needs more work - // if (!ChangedCFA && GoingToChangeCFA) { - // Context.reportError( - // Inst.getLoc(), - // "The instruction doesn't change CFA register value, but the " - // "CFI directives update the CFA."); - // } - dbgs() << "vvvvvvvvvvvvvvvvvvvvvvvvvvvvvv\n"; + + dbgs() << "^^^^^^^^^^^^^^^^ InsCFIs ^^^^^^^^^^^^^^^^\n"; + printUntilNextLine(Inst.getLoc().getPointer()); + dbgs() << "\n"; + dbgs() << "-----------------------------------------\n"; + dbgs() << "writes into: { "; + for (auto Reg : Writes) { + dbgs() << (int)Reg << " "; + } + dbgs() << "}\n"; + dbgs() << "-----------------------------------------\n"; + dbgs() << "The CFA register is: " << CFAReg << "\n"; + dbgs() << "The instruction does " << (ChangedCFA ? "" : "NOT ") + << "change the CFA.\n"; + dbgs() << "The CFI directives does " << (GoingToChangeCFA ? "" : "NOT ") + << "change the CFA.\n"; + dbgs() << "vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv\n"; ChangedCFA = false; } }; From 2376ee856cd37090719d3920e028ad4490bcb83a Mon Sep 17 00:00:00 2001 From: Amirhossein Pashaeehir Date: Fri, 9 May 2025 22:52:04 +0000 Subject: [PATCH 04/22] Compile llvm mc depending on bolt --- bolt/lib/Utils/CommandLineOpts.cpp | 2 +- llvm/tools/llvm-mc/CFIAnalysis.h | 35 ++++++++++++++++-- llvm/tools/llvm-mc/CFIAnalysisMCStreamer.h | 12 ++++-- llvm/tools/llvm-mc/CMakeLists.txt | 3 ++ llvm/tools/llvm-mc/llvm-mc.cpp | 43 +++++++++++++--------- 5 files changed, 69 insertions(+), 26 deletions(-) diff --git a/bolt/lib/Utils/CommandLineOpts.cpp b/bolt/lib/Utils/CommandLineOpts.cpp index ad714371436e0..e27606078b633 100644 --- a/bolt/lib/Utils/CommandLineOpts.cpp +++ b/bolt/lib/Utils/CommandLineOpts.cpp @@ -146,7 +146,7 @@ cl::opt Lite("lite", cl::desc("skip processing of cold functions"), cl::cat(BoltCategory)); cl::opt -OutputFilename("o", +OutputFilename("ooo", cl::desc(""), cl::Optional, cl::cat(BoltOutputCategory)); diff --git a/llvm/tools/llvm-mc/CFIAnalysis.h b/llvm/tools/llvm-mc/CFIAnalysis.h index f527043d3dbbb..e3a09139d42af 100644 --- a/llvm/tools/llvm-mc/CFIAnalysis.h +++ b/llvm/tools/llvm-mc/CFIAnalysis.h @@ -1,8 +1,8 @@ #ifndef LLVM_TOOLS_LLVM_MC_CFI_ANALYSIS_H #define LLVM_TOOLS_LLVM_MC_CFI_ANALYSIS_H +#include "bolt/Core/MCPlusBuilder.h" #include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDwarf.h" @@ -11,12 +11,32 @@ #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/TargetRegistry.h" #include "llvm/Support/Debug.h" -#include +#include #include namespace llvm { +bolt::MCPlusBuilder *createMCPlusBuilder(const Triple::ArchType Arch, + const MCInstrAnalysis *Analysis, + const MCInstrInfo *Info, + const MCRegisterInfo *RegInfo, + const MCSubtargetInfo *STI) { + dbgs() << "arch: " << Arch << ", and expected " << Triple::x86_64 << "\n"; + if (Arch == Triple::x86_64) + return bolt::createX86MCPlusBuilder(Analysis, Info, RegInfo, STI); + + // if (Arch == Triple::aarch64) + // return createAArch64MCPlusBuilder(Analysis, Info, RegInfo, STI); + + // if (Arch == Triple::riscv64) + // return createRISCVMCPlusBuilder(Analysis, Info, RegInfo, STI); + + llvm_unreachable("architecture unsupported by MCPlusBuilder"); +} + // TODO remove it, it's just for debug purposes. void printUntilNextLine(const char *Str) { for (int I = 0; Str[I] != '\0' && Str[I] != '\n'; I++) @@ -26,19 +46,24 @@ void printUntilNextLine(const char *Str) { class CFIAnalysis { MCContext &Context; MCInstrInfo const &MCII; + std::unique_ptr MCPB; public: - CFIAnalysis(MCContext &Context, MCInstrInfo const &MCII) + CFIAnalysis(MCContext &Context, MCInstrInfo const &MCII, + MCInstrAnalysis *MCIA) : Context(Context), MCII(MCII) { // TODO it should look at the poluge directives and setup the // registers' previous value state here, but for now, it's assumed that all // values are by default `samevalue`. + MCPB.reset(createMCPlusBuilder(Context.getTargetTriple().getArch(), MCIA, + &MCII, Context.getRegisterInfo(), + Context.getSubtargetInfo())); } void update(const MCDwarfFrameInfo &DwarfFrame, const MCInst &Inst, ArrayRef CFIDirectives) { - auto MCInstInfo = MCII.get(Inst.getOpcode()); + const MCInstrDesc &MCInstInfo = MCII.get(Inst.getOpcode()); auto *RI = Context.getRegisterInfo(); auto CFAReg = RI->getLLVMRegNum(DwarfFrame.CurrentCfaRegister, false); @@ -95,6 +120,8 @@ class CFIAnalysis { } dbgs() << "}\n"; dbgs() << "-----------------------------------------\n"; + dbgs() << "isMoveMem2Reg: " << MCPB->isMoveMem2Reg(Inst) << "\n"; + dbgs() << "-----------------------------------------\n"; dbgs() << "The CFA register is: " << CFAReg << "\n"; dbgs() << "The instruction does " << (ChangedCFA ? "" : "NOT ") << "change the CFA.\n"; diff --git a/llvm/tools/llvm-mc/CFIAnalysisMCStreamer.h b/llvm/tools/llvm-mc/CFIAnalysisMCStreamer.h index 76ae536ab4bd3..382a80aca94b4 100644 --- a/llvm/tools/llvm-mc/CFIAnalysisMCStreamer.h +++ b/llvm/tools/llvm-mc/CFIAnalysisMCStreamer.h @@ -6,15 +6,18 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDwarf.h" +#include "llvm/MC/MCInstrAnalysis.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCStreamer.h" #include +#include #include namespace llvm { class CFIAnalysisMCStreamer : public MCStreamer { MCInstrInfo const &MCII; + std::unique_ptr MCIA; struct CFIDirectivesState { int FrameIndex; // TODO remove it, no need for it @@ -86,9 +89,10 @@ class CFIAnalysisMCStreamer : public MCStreamer { } public: - CFIAnalysisMCStreamer(MCContext &Context, const MCInstrInfo &MCII) - : MCStreamer(Context), MCII(MCII), LastCFIDirectivesState(), - LastInstruction(std::nullopt) { + CFIAnalysisMCStreamer(MCContext &Context, const MCInstrInfo &MCII, + std::unique_ptr MCIA) + : MCStreamer(Context), MCII(MCII), MCIA(std::move(MCIA)), + LastCFIDirectivesState(), LastInstruction(std::nullopt) { FrameIndices.push_back(-1); } @@ -125,7 +129,7 @@ class CFIAnalysisMCStreamer : public MCStreamer { void emitCFIStartProcImpl(MCDwarfFrameInfo &Frame) override { feedCFIA(); FrameIndices.push_back(getNumFrameInfos()); - CFIAs.emplace_back(getContext(), MCII); + CFIAs.emplace_back(getContext(), MCII, MCIA.get()); LastInstruction = std::nullopt; LastDwarfFrameInfo = std::nullopt; LastCFIDirectivesState.FrameIndex = FrameIndices.back(); diff --git a/llvm/tools/llvm-mc/CMakeLists.txt b/llvm/tools/llvm-mc/CMakeLists.txt index f57356f9ee0c0..368f29c62201f 100644 --- a/llvm/tools/llvm-mc/CMakeLists.txt +++ b/llvm/tools/llvm-mc/CMakeLists.txt @@ -13,3 +13,6 @@ add_llvm_tool(llvm-mc llvm-mc.cpp Disassembler.cpp ) + +target_link_libraries(llvm-mc PRIVATE LLVMBOLTCore LLVMBOLTTargetX86) +target_include_directories(llvm-mc PRIVATE "${LLVM_MAIN_SRC_DIR}/../bolt/include") diff --git a/llvm/tools/llvm-mc/llvm-mc.cpp b/llvm/tools/llvm-mc/llvm-mc.cpp index b59a54d8fbc8a..32f2fa0f05a7c 100644 --- a/llvm/tools/llvm-mc/llvm-mc.cpp +++ b/llvm/tools/llvm-mc/llvm-mc.cpp @@ -11,12 +11,14 @@ // //===----------------------------------------------------------------------===// +#include "CFIAnalysisMCStreamer.h" #include "Disassembler.h" #include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCInstPrinter.h" +#include "llvm/MC/MCInstrAnalysis.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/MCObjectWriter.h" @@ -25,6 +27,7 @@ #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MCTargetOptions.h" #include "llvm/MC/MCTargetOptionsCommandFlags.h" #include "llvm/MC/TargetRegistry.h" #include "llvm/Support/CommandLine.h" @@ -37,7 +40,11 @@ #include "llvm/Support/TargetSelect.h" #include "llvm/Support/ToolOutputFile.h" #include "llvm/Support/WithColor.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" #include "llvm/TargetParser/Host.h" +#include +#include using namespace llvm; @@ -114,11 +121,7 @@ static cl::opt CommentColumn("comment-column", cl::desc("Asm comments indentation"), cl::init(40)); -enum OutputFileType { - OFT_Null, - OFT_AssemblyFile, - OFT_ObjectFile -}; +enum OutputFileType { OFT_Null, OFT_AssemblyFile, OFT_ObjectFile }; static cl::opt FileType("filetype", cl::init(OFT_AssemblyFile), cl::desc("Choose an output file type:"), @@ -241,8 +244,8 @@ static const Target *GetTarget(const char *ProgName) { // Get the target specific parser. std::string Error; - const Target *TheTarget = TargetRegistry::lookupTarget(ArchName, TheTriple, - Error); + const Target *TheTarget = + TargetRegistry::lookupTarget(ArchName, TheTriple, Error); if (!TheTarget) { WithColor::error(errs(), ProgName) << Error; return nullptr; @@ -253,8 +256,8 @@ static const Target *GetTarget(const char *ProgName) { return TheTarget; } -static std::unique_ptr GetOutputStream(StringRef Path, - sys::fs::OpenFlags Flags) { +static std::unique_ptr +GetOutputStream(StringRef Path, sys::fs::OpenFlags Flags) { std::error_code EC; auto Out = std::make_unique(Path, EC, Flags); if (EC) { @@ -278,13 +281,12 @@ static void setDwarfDebugFlags(int argc, char **argv) { static std::string DwarfDebugProducer; static void setDwarfDebugProducer() { - if(!getenv("DEBUG_PRODUCER")) + if (!getenv("DEBUG_PRODUCER")) return; DwarfDebugProducer += getenv("DEBUG_PRODUCER"); } -static int AsLexInput(SourceMgr &SrcMgr, MCAsmInfo &MAI, - raw_ostream &OS) { +static int AsLexInput(SourceMgr &SrcMgr, MCAsmInfo &MAI, raw_ostream &OS) { AsmLexer Lexer(MAI); Lexer.setBuffer(SrcMgr.getMemoryBuffer(SrcMgr.getMainFileID())->getBuffer()); @@ -301,7 +303,7 @@ static int AsLexInput(SourceMgr &SrcMgr, MCAsmInfo &MAI, } static int fillCommandLineSymbols(MCAsmParser &Parser) { - for (auto &I: DefineSymbol) { + for (auto &I : DefineSymbol) { auto Pair = StringRef(I).split('='); auto Sym = Pair.first; auto Val = Pair.second; @@ -325,8 +327,7 @@ static int AssembleInput(const char *ProgName, const Target *TheTarget, SourceMgr &SrcMgr, MCContext &Ctx, MCStreamer &Str, MCAsmInfo &MAI, MCSubtargetInfo &STI, MCInstrInfo &MCII, MCTargetOptions const &MCOptions) { - std::unique_ptr Parser( - createMCAsmParser(SrcMgr, Ctx, Str, MAI)); + std::unique_ptr Parser(createMCAsmParser(SrcMgr, Ctx, Str, MAI)); std::unique_ptr TAP( TheTarget->createMCAsmParser(STI, *Parser, MCII, MCOptions)); @@ -337,7 +338,7 @@ static int AssembleInput(const char *ProgName, const Target *TheTarget, } int SymbolResult = fillCommandLineSymbols(*Parser); - if(SymbolResult) + if (SymbolResult) return SymbolResult; Parser->setShowParsedOperands(ShowInstOperands); Parser->setTargetParser(*TAP); @@ -519,6 +520,10 @@ int main(int argc, char **argv) { std::unique_ptr MCII(TheTarget->createMCInstrInfo()); assert(MCII && "Unable to create instruction info!"); + std::unique_ptr MCIA( + TheTarget->createMCInstrAnalysis(MCII.get())); + assert(MCIA && "Unable to create instruction analysis!"); + std::unique_ptr IP; if (FileType == OFT_AssemblyFile) { IP.reset(TheTarget->createMCInstPrinter( @@ -564,7 +569,11 @@ int main(int argc, char **argv) { std::move(CE), std::move(MAB))); } else if (FileType == OFT_Null) { - Str.reset(TheTarget->createNullStreamer(Ctx)); + auto *MyDummyStreamer = + new CFIAnalysisMCStreamer(Ctx, *MCII, std::move(MCIA)); + TheTarget->createNullTargetStreamer(*MyDummyStreamer); + Str.reset(MyDummyStreamer); + // Str.reset(TheTarget->createNullStreamer(Ctx)); } else { assert(FileType == OFT_ObjectFile && "Invalid file type!"); From 2cfa1be3d10e655994d6948c230a19ecc6d696f7 Mon Sep 17 00:00:00 2001 From: Amirhossein Pashaeehir Date: Tue, 13 May 2025 00:17:57 +0000 Subject: [PATCH 05/22] Extract semantic info on MCInst from bolt --- llvm/tools/llvm-mc/CFIAnalysis.h | 152 +++++++++++++++++++++++++++---- 1 file changed, 136 insertions(+), 16 deletions(-) diff --git a/llvm/tools/llvm-mc/CFIAnalysis.h b/llvm/tools/llvm-mc/CFIAnalysis.h index e3a09139d42af..70b8a855643b9 100644 --- a/llvm/tools/llvm-mc/CFIAnalysis.h +++ b/llvm/tools/llvm-mc/CFIAnalysis.h @@ -4,17 +4,21 @@ #include "bolt/Core/MCPlusBuilder.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDwarf.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCInst.h" #include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCRegister.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/TargetRegistry.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/Error.h" #include +#include #include namespace llvm { @@ -60,7 +64,7 @@ class CFIAnalysis { Context.getSubtargetInfo())); } - void update(const MCDwarfFrameInfo &DwarfFrame, const MCInst &Inst, + void update(const MCDwarfFrameInfo &DwarfFrame, MCInst &Inst, ArrayRef CFIDirectives) { const MCInstrDesc &MCInstInfo = MCII.get(Inst.getOpcode()); @@ -71,8 +75,8 @@ class CFIAnalysis { // FIXME this way of extracting uses is buggy: // for (unsigned I = 0; I < MCInstInfo.NumImplicitUses; I++) // Reads.insert(MCInstInfo.implicit_uses()[I]); - // for (unsigned I = 0; I < MCInstInfo.NumImplicitDefs; I++) - // Writes.insert(MCInstInfo.implicit_defs()[I]); + for (unsigned I = 0; I < MCInstInfo.NumImplicitDefs; I++) + Writes.insert(MCInstInfo.implicit_defs()[I]); for (unsigned I = 0; I < Inst.getNumOperands(); I++) { auto &&Operand = Inst.getOperand(I); @@ -113,20 +117,136 @@ class CFIAnalysis { dbgs() << "^^^^^^^^^^^^^^^^ InsCFIs ^^^^^^^^^^^^^^^^\n"; printUntilNextLine(Inst.getLoc().getPointer()); dbgs() << "\n"; - dbgs() << "-----------------------------------------\n"; - dbgs() << "writes into: { "; - for (auto Reg : Writes) { - dbgs() << (int)Reg << " "; + dbgs() << "Op code: " << Inst.getOpcode() << "\n"; + dbgs() << "--------------Operands Info--------------\n"; + auto DefCount = MCInstInfo.getNumDefs(); + for (unsigned I = 0; I < Inst.getNumOperands(); I++) { + dbgs() << "Operand #" << I << ", which will be " + << (I < DefCount ? "defined" : "used") << ", is a"; + if (I == MCPB->getMemoryOperandNo(Inst)) { + dbgs() << " memory access, details are:\n"; + auto X86MemoryOperand = MCPB->evaluateX86MemoryOperand(Inst); + dbgs() << " Base Register{ reg#" << X86MemoryOperand->BaseRegNum + << " }"; + dbgs() << " + (Index Register{ reg#" << X86MemoryOperand->IndexRegNum + << " }"; + dbgs() << " * Scale{ value " << X86MemoryOperand->ScaleImm << " }"; + dbgs() << ") + Displacement{ " + << (X86MemoryOperand->DispExpr + ? "an expression" + : "value " + itostr(X86MemoryOperand->DispImm)) + << " }\n"; + // TODO, it's not correct always, it cannot be assumed (or should be + // checked) that memory operands are flatten into 4 operands in MCInc. + I += 4; + continue; + } + auto &&Operand = Inst.getOperand(I); + if (Operand.isImm()) { + dbgs() << "n immediate with value " << Operand.getImm() << "\n"; + continue; + } + if (Operand.isReg()) { + dbgs() << " reg#" << Operand.getReg() << "\n"; + continue; + } + assert(Operand.isExpr()); + dbgs() << "n unknown expression \n"; + } + if (MCInstInfo.NumImplicitDefs) { + dbgs() << "implicitly defines: { "; + for (unsigned I = 0; I < MCInstInfo.NumImplicitDefs; I++) { + dbgs() << "reg#" << MCInstInfo.implicit_defs()[I] << " "; + } + dbgs() << "}\n"; + } + if (MCInstInfo.NumImplicitUses) { + dbgs() << "implicitly uses: { "; + for (unsigned I = 0; I < MCInstInfo.NumImplicitUses; I++) { + dbgs() << "reg#" << MCInstInfo.implicit_uses()[I] << " "; + } + dbgs() << "}\n"; + } + dbgs() << "----------------Move Info----------------\n"; + { // move + { // Reg2Reg + MCPhysReg From, To; + if (MCPB->isRegToRegMove(Inst, From, To)) { + dbgs() << "It's a reg to reg move.\nFrom reg#" << From << " to reg#" + << To << "\n"; + } else if (MCInstInfo.isMoveReg()) { + dbgs() << "It's reg to reg move from MCInstInfo view but not from " + "MCPlus view.\n"; + } + } + if (MCPB->isConditionalMove(Inst)) { + dbgs() << "Its a conditional move.\n"; + } + if (MCPB->isMoveMem2Reg(Inst)) { + dbgs() << "It's a move from memory to register\n"; + assert(MCPB->getMemoryOperandNo(Inst) == 1); + } + } + + dbgs() << "---------------Stack Info----------------\n"; + { // stack + int32_t SrcImm = 0; + MCPhysReg Reg = 0; + uint16_t StackPtrReg = 0; + int64_t StackOffset = 0; + uint8_t Size = 0; + bool IsSimple = false; + bool IsIndexed = false; + bool IsLoad = false; + bool IsStore = false; + bool IsStoreFromReg = false; + if (MCPB->isStackAccess(Inst, IsLoad, IsStore, IsStoreFromReg, Reg, + SrcImm, StackPtrReg, StackOffset, Size, IsSimple, + IsIndexed)) { + dbgs() << "This instruction accesses the stack, the details are:\n"; + dbgs() << " Source immediate: " << SrcImm << "\n"; + dbgs() << " Source reg#" << Reg << "\n"; + dbgs() << " Stack pointer: reg#" << StackPtrReg << "\n"; + dbgs() << " Stack offset: " << StackOffset << "\n"; + dbgs() << " size: " << (int)Size << "\n"; + dbgs() << " Is simple: " << (IsSimple ? "yes" : "no") << "\n"; + dbgs() << " Is indexed: " << (IsIndexed ? "yes" : "no") << "\n"; + dbgs() << " Is load: " << (IsLoad ? "yes" : "no") << "\n"; + dbgs() << " Is store: " << (IsStore ? "yes" : "no") << "\n"; + dbgs() << " Is store from reg: " << (IsStoreFromReg ? "yes" : "no") + << "\n"; + } + if (MCPB->isPush(Inst)) { + dbgs() << "This is a push instruction with size " + << MCPB->getPushSize(Inst) << "\n"; + } + if (MCPB->isPop(Inst)) { + dbgs() << "This is a pop instruction with size " + << MCPB->getPopSize(Inst) << "\n"; + } + } + + dbgs() << "---------------Arith Info----------------\n"; + { // arith + MutableArrayRef MAR = {Inst}; + if (MCPB->matchAdd(MCPB->matchAnyOperand(), MCPB->matchAnyOperand()) + ->match(*RI, *MCPB, MutableArrayRef(), -1)) { + dbgs() << "It is an addition instruction.\n"; + } else if (MCInstInfo.isAdd()) { + dbgs() << "It is an addition from MCInstInfo view but not from MCPlus " + "view.\n"; + } + if (MCPB->isSUB(Inst)) { + dbgs() << "This is a subtraction.\n"; + } } - dbgs() << "}\n"; - dbgs() << "-----------------------------------------\n"; - dbgs() << "isMoveMem2Reg: " << MCPB->isMoveMem2Reg(Inst) << "\n"; - dbgs() << "-----------------------------------------\n"; - dbgs() << "The CFA register is: " << CFAReg << "\n"; - dbgs() << "The instruction does " << (ChangedCFA ? "" : "NOT ") - << "change the CFA.\n"; - dbgs() << "The CFI directives does " << (GoingToChangeCFA ? "" : "NOT ") - << "change the CFA.\n"; + + // dbgs() << "-----------------------------------------\n"; + // dbgs() << "The CFA register is: " << CFAReg << "\n"; + // dbgs() << "The instruction does " << (ChangedCFA ? "" : "NOT ") + // << "change the CFA.\n"; + // dbgs() << "The CFI directives does " << (GoingToChangeCFA ? "" : "NOT ") + // << "change the CFA.\n"; dbgs() << "vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv\n"; ChangedCFA = false; } From f01a9fdebd41db5982a7d818bbe1f6632507b2a3 Mon Sep 17 00:00:00 2001 From: Amirhossein Pashaeehir Date: Tue, 13 May 2025 22:50:03 +0000 Subject: [PATCH 06/22] Typo in evaluateStackOffsetExpr doc --- bolt/include/bolt/Core/MCPlusBuilder.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bolt/include/bolt/Core/MCPlusBuilder.h b/bolt/include/bolt/Core/MCPlusBuilder.h index cf37a984da93f..cc6f377f51741 100644 --- a/bolt/include/bolt/Core/MCPlusBuilder.h +++ b/bolt/include/bolt/Core/MCPlusBuilder.h @@ -1086,7 +1086,7 @@ class MCPlusBuilder { return false; } - /// Use \p Input1 or Input2 as the current value for the input + /// Use \p Input1 or \p Input2 as the current value for the input /// register and put in \p Output the changes incurred by executing /// \p Inst. Return false if it was not possible to perform the /// evaluation. evaluateStackOffsetExpr is restricted to operations From 610e393b0c0472052118a81937b346a2cf94e968 Mon Sep 17 00:00:00 2001 From: Amirhossein Pashaeehir Date: Tue, 13 May 2025 22:50:40 +0000 Subject: [PATCH 07/22] Add a DS to keep track of each register CFI state --- llvm/tools/llvm-mc/CFIState.h | 160 ++++++++++++++++++++++++++++++++++ 1 file changed, 160 insertions(+) create mode 100644 llvm/tools/llvm-mc/CFIState.h diff --git a/llvm/tools/llvm-mc/CFIState.h b/llvm/tools/llvm-mc/CFIState.h new file mode 100644 index 0000000000000..b32de2b385316 --- /dev/null +++ b/llvm/tools/llvm-mc/CFIState.h @@ -0,0 +1,160 @@ +#ifndef LLVM_TOOLS_LLVM_MC_CFI_STATE_H +#define LLVM_TOOLS_LLVM_MC_CFI_STATE_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/MC/MCDwarf.h" +#include "llvm/MC/MCRegister.h" +#include "llvm/Support/MathExtras.h" +#include +namespace llvm { + +using DWARFRegType = int64_t; + +struct RegisterCFIState { + enum Approach { + Undefined, + SameValue, + AnotherRegister, + OffsetFromCFAAddr, + OffsetFromCFAVal, + Other, + } RetrieveApproach; + + // TODO use a correct type for this. + union { + int OffsetFromCFA; + DWARFRegType Register; + } Info; + + static RegisterCFIState createUndefined() { + RegisterCFIState State; + State.RetrieveApproach = Undefined; + + return State; + } + + static RegisterCFIState createSameValue() { + RegisterCFIState State; + State.RetrieveApproach = SameValue; + + return State; + } + + static RegisterCFIState createAnotherRegister(DWARFRegType Register) { + RegisterCFIState State; + State.RetrieveApproach = AnotherRegister; + State.Info.Register = Register; + + return State; + } + + static RegisterCFIState createOffsetFromCFAAddr(int OffsetFromCFA) { + RegisterCFIState State; + State.RetrieveApproach = OffsetFromCFAAddr; + State.Info.OffsetFromCFA = OffsetFromCFA; + + return State; + } + + static RegisterCFIState createOffsetFromCFAVal(int OffsetFromCFA) { + RegisterCFIState State; + State.RetrieveApproach = OffsetFromCFAVal; + State.Info.OffsetFromCFA = OffsetFromCFA; + + return State; + } + + static RegisterCFIState createOther() { + RegisterCFIState State; + State.RetrieveApproach = Other; + + return State; + } +}; +struct CFIState { + DenseMap RegisterCFIStates; + DWARFRegType CFARegister; + int CFAOffset; + + CFIState() : CFARegister(-1 /* Change to to max unsigned */), CFAOffset(-1) {} + + CFIState(const CFIState &Other) { + CFARegister = Other.CFARegister; + CFAOffset = Other.CFAOffset; + RegisterCFIStates = Other.RegisterCFIStates; + } + + CFIState &operator=(const CFIState &Other) { + if (this != &Other) { + CFARegister = Other.CFARegister; + CFAOffset = Other.CFAOffset; + RegisterCFIStates = Other.RegisterCFIStates; + } + + return *this; + } + + CFIState(DWARFRegType CFARegister, int CFIOffset) + : CFARegister(CFARegister), CFAOffset(CFIOffset) {} + + bool apply(const MCCFIInstruction &CFIDirective) { + switch (CFIDirective.getOperation()) { + case MCCFIInstruction::OpDefCfaRegister: + CFARegister = CFIDirective.getRegister(); + break; + case MCCFIInstruction::OpDefCfaOffset: + CFAOffset = CFIDirective.getOffset(); + break; + case MCCFIInstruction::OpAdjustCfaOffset: + CFAOffset += CFIDirective.getOffset(); + break; + case MCCFIInstruction::OpDefCfa: + CFARegister = CFIDirective.getRegister(); + CFAOffset = CFIDirective.getOffset(); + break; + case MCCFIInstruction::OpOffset: + RegisterCFIStates[CFIDirective.getRegister()] = + RegisterCFIState::createOffsetFromCFAAddr(CFIDirective.getOffset()); + break; + case MCCFIInstruction::OpRegister: + RegisterCFIStates[CFIDirective.getRegister()] = + RegisterCFIState::createAnotherRegister(CFIDirective.getRegister2()); + break; + case MCCFIInstruction::OpRelOffset: + RegisterCFIStates[CFIDirective.getRegister()] = + RegisterCFIState::createOffsetFromCFAAddr(CFIDirective.getOffset() - + CFAOffset); + break; + case MCCFIInstruction::OpUndefined: + RegisterCFIStates[CFIDirective.getRegister()] = + RegisterCFIState::createUndefined(); + break; + case MCCFIInstruction::OpSameValue: + RegisterCFIStates[CFIDirective.getRegister()] = + RegisterCFIState::createSameValue(); + break; + case MCCFIInstruction::OpValOffset: + RegisterCFIStates[CFIDirective.getRegister()] = + RegisterCFIState::createOffsetFromCFAVal(CFIDirective.getOffset()); + break; + case MCCFIInstruction::OpRestoreState: + case MCCFIInstruction::OpRememberState: + case MCCFIInstruction::OpLLVMDefAspaceCfa: + case MCCFIInstruction::OpRestore: + case MCCFIInstruction::OpEscape: + case MCCFIInstruction::OpWindowSave: + case MCCFIInstruction::OpNegateRAState: + case MCCFIInstruction::OpNegateRAStateWithPC: + case MCCFIInstruction::OpGnuArgsSize: + case MCCFIInstruction::OpLabel: + // TODO it's not supported. + return false; + break; + } + + return true; + } +}; +} // namespace llvm + +#endif \ No newline at end of file From ccf43ad2d45cafc4924ee5934493c8632bd8d0e0 Mon Sep 17 00:00:00 2001 From: Amirhossein Pashaeehir Date: Wed, 14 May 2025 18:19:55 +0000 Subject: [PATCH 08/22] Check if the CFA change is correct or not --- llvm/tools/llvm-mc/CFIAnalysis.h | 457 ++++++++++++++------- llvm/tools/llvm-mc/CFIAnalysisMCStreamer.h | 36 +- 2 files changed, 326 insertions(+), 167 deletions(-) diff --git a/llvm/tools/llvm-mc/CFIAnalysis.h b/llvm/tools/llvm-mc/CFIAnalysis.h index 70b8a855643b9..424290f3cab86 100644 --- a/llvm/tools/llvm-mc/CFIAnalysis.h +++ b/llvm/tools/llvm-mc/CFIAnalysis.h @@ -1,10 +1,12 @@ #ifndef LLVM_TOOLS_LLVM_MC_CFI_ANALYSIS_H #define LLVM_TOOLS_LLVM_MC_CFI_ANALYSIS_H +#include "CFIState.h" #include "bolt/Core/MCPlusBuilder.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/Twine.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDwarf.h" #include "llvm/MC/MCExpr.h" @@ -17,6 +19,8 @@ #include "llvm/MC/TargetRegistry.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Error.h" +#include "llvm/Support/FormatVariadic.h" +#include #include #include #include @@ -51,51 +55,196 @@ class CFIAnalysis { MCContext &Context; MCInstrInfo const &MCII; std::unique_ptr MCPB; + MCRegisterInfo const *MCRI; + CFIState State; public: CFIAnalysis(MCContext &Context, MCInstrInfo const &MCII, MCInstrAnalysis *MCIA) - : Context(Context), MCII(MCII) { - // TODO it should look at the poluge directives and setup the + : Context(Context), MCII(MCII), MCRI(Context.getRegisterInfo()) { + // TODO it should look at the prologue directives and setup the // registers' previous value state here, but for now, it's assumed that all // values are by default `samevalue`. MCPB.reset(createMCPlusBuilder(Context.getTargetTriple().getArch(), MCIA, &MCII, Context.getRegisterInfo(), Context.getSubtargetInfo())); + + // TODO CFA offset should be the slot size, but for now I don't have any + // access to it, maybe can be read from the prologue + // TODO check what should be passed as EH? + State = CFIState(MCRI->getDwarfRegNum(MCPB->getStackPointer(), false), 8); + for (unsigned I = 0; I < MCRI->getNumRegs(); I++) { + DWARFRegType DwarfI = MCRI->getDwarfRegNum(I, false); + State.RegisterCFIStates[DwarfI] = RegisterCFIState::createSameValue(); + } + + // TODO these are temporay added to make things work. + // Setup the basic information: + State.RegisterCFIStates[MCRI->getDwarfRegNum(MCRI->getProgramCounter(), + false)] = + RegisterCFIState::createUndefined(); // For now, we don't care about the + // PC + State.RegisterCFIStates[MCRI->getDwarfRegNum(MCPB->getStackPointer(), + false)] = + RegisterCFIState::createOffsetFromCFAVal(0); // sp's old value is CFA + } + + bool doesConstantChange(const MCInst &Inst, MCPhysReg Reg, int64_t &HowMuch) { + if (MCPB->isPush(Inst) && Reg == MCPB->getStackPointer()) { + // TODO should get the stack direction here, now it assumes that it goes + // down. + HowMuch = -MCPB->getPushSize(Inst); + return true; + } + + if (MCPB->isPop(Inst) && Reg == MCPB->getStackPointer()) { + // TODO should get the stack direction here, now it assumes that it goes + // down. + HowMuch = MCPB->getPushSize(Inst); + return true; + } + + return false; + } + + // Tries to guess Reg1's value in a form of Reg2 (before Inst's execution) + + // Diff. + bool isInConstantDistanceOfEachOther(const MCInst &Inst, MCPhysReg Reg1, + MCPhysReg Reg2, int &Diff) { + { + MCPhysReg From; + MCPhysReg To; + if (MCPB->isRegToRegMove(Inst, From, To) && From == Reg2 && To == Reg1) { + Diff = 0; + return true; + } + } + + return false; + } + + void checkCFADiff(const MCInst &Inst, const CFIState &PrevState, + const CFIState &NextState, + const std::set &Reads, + const std::set &Writes) { + if (PrevState.CFARegister == NextState.CFARegister) { + if (PrevState.CFAOffset == NextState.CFAOffset) { + if (Writes.count(PrevState.CFARegister)) { + Context.reportWarning( + Inst.getLoc(), + formatv("This instruction changes reg#{0}, which is " + "the CFA register, but the CFI directives do not.", + MCRI->getLLVMRegNum(PrevState.CFARegister, false))); + } + return; + } + // The offset is changed. + if (!Writes.count(PrevState.CFARegister)) { + Context.reportWarning( + Inst.getLoc(), + formatv( + "You changed the CFA offset, but there is no modification to " + "the CFA register happening in this instruction.")); + } + int64_t HowMuch; + if (!doesConstantChange( + Inst, MCRI->getLLVMRegNum(PrevState.CFARegister, false).value(), + HowMuch)) { + Context.reportWarning( + Inst.getLoc(), + formatv("The CFA register changed, but I don't know how, finger " + "crossed the CFI directives are correct.")); + return; + } + // we know it is changed by HowMuch, so the CFA offset should be changed + // by -HowMuch, i.e. AfterState.Offset - State.Offset = -HowMuch + if (NextState.CFAOffset - PrevState.CFAOffset != -HowMuch) { + Context.reportError( + Inst.getLoc(), + formatv("The CFA offset is changed by {0}, but " + "the directives changed it by {1}. (to be exact, the new " + "offset should be {2}, but it is {3})", + -HowMuch, NextState.CFAOffset - PrevState.CFAOffset, + PrevState.CFAOffset - HowMuch, NextState.CFAOffset)); + return; + } + + // Everything OK! + return; + } + // The CFA register is changed. + // TODO move it up + MCPhysReg OldCFAReg = MCRI->getLLVMRegNum(PrevState.CFARegister, false).value(); + MCPhysReg NewCFAReg = + MCRI->getLLVMRegNum(NextState.CFARegister, false).value(); + if (!Writes.count(NextState.CFARegister)) { + Context.reportWarning( + Inst.getLoc(), + formatv( + "The new CFA register reg#{0}'s value is not assigned by this " + "instruction, try to move the new CFA def to where this " + "value is changed, now I can't infer if this change is " + "correct or not.", + NewCFAReg)); + return; + } + // Because CFA should is the CFA always stays the same: + int OffsetDiff = PrevState.CFAOffset - NextState.CFAOffset; + int Diff; + if (!isInConstantDistanceOfEachOther(Inst, NewCFAReg, OldCFAReg, Diff)) { + Context.reportWarning( + Inst.getLoc(), + formatv("Based on this instruction I cannot infer that the new and " + "old CFA registers are in {0} distance of each other. I " + "hope it's ok.", + OffsetDiff)); + return; + } + if (Diff != OffsetDiff) { + Context.reportError( + Inst.getLoc(), formatv("After changing the CFA register, the CFA " + "offset should be {0}, but it is {1}.", + PrevState.CFAOffset - Diff, NextState.CFAOffset)); + return; + } + // Everything is OK! } void update(const MCDwarfFrameInfo &DwarfFrame, MCInst &Inst, - ArrayRef CFIDirectives) { + std::pair + CFIDirectivesRange) { // FIXME this should not be a pair, + // but an ArrayRef + ArrayRef CFIDirectives(DwarfFrame.Instructions); + CFIDirectives = CFIDirectives.drop_front(CFIDirectivesRange.first) + .drop_back(DwarfFrame.Instructions.size() - + CFIDirectivesRange.second); const MCInstrDesc &MCInstInfo = MCII.get(Inst.getOpcode()); - auto *RI = Context.getRegisterInfo(); - auto CFAReg = RI->getLLVMRegNum(DwarfFrame.CurrentCfaRegister, false); + CFIState AfterState(State); + for (auto &&CFIDirective : CFIDirectives) + if (!AfterState.apply(CFIDirective)) + Context.reportWarning( + CFIDirective.getLoc(), + "I don't support this CFI directive, I assume this does nothing " + "(which will probably break other things)"); + auto CFAReg = MCRI->getLLVMRegNum(DwarfFrame.CurrentCfaRegister, false); + assert(DwarfFrame.CurrentCfaRegister == AfterState.CFARegister && + "Checking if the CFA tracking is working"); - std::set Writes, Reads; // TODO reads is not ready for now - // FIXME this way of extracting uses is buggy: - // for (unsigned I = 0; I < MCInstInfo.NumImplicitUses; I++) - // Reads.insert(MCInstInfo.implicit_uses()[I]); + std::set Writes, Reads; + for (unsigned I = 0; I < MCInstInfo.NumImplicitUses; I++) + Reads.insert(MCRI->getDwarfRegNum(MCInstInfo.implicit_uses()[I], false)); for (unsigned I = 0; I < MCInstInfo.NumImplicitDefs; I++) - Writes.insert(MCInstInfo.implicit_defs()[I]); + Writes.insert(MCRI->getDwarfRegNum(MCInstInfo.implicit_defs()[I], false)); for (unsigned I = 0; I < Inst.getNumOperands(); I++) { auto &&Operand = Inst.getOperand(I); if (Operand.isReg()) { - // TODO it is not percise, maybe this operand is for output, then it - // means that there is no read happening here. - Reads.insert(Operand.getReg()); - } - - if (Operand.isExpr()) { - // TODO maybe the argument is not a register, but is a expression like - // `34(sp)` that has a register in it. Check if this works or not and if - // no change it somehow that it count that register as reads or writes - // too. + if (I < MCInstInfo.getNumDefs()) + Writes.insert(MCRI->getDwarfRegNum(Operand.getReg(), false)); + else + Reads.insert(MCRI->getDwarfRegNum(Operand.getReg(), false)); } - - if (Operand.isReg() && - MCInstInfo.hasDefOfPhysReg(Inst, Operand.getReg(), *RI)) - Writes.insert(Operand.getReg().id()); } bool ChangedCFA = Writes.count(CFAReg->id()); @@ -114,132 +263,137 @@ class CFIAnalysis { "CFI directives don't update the CFA."); } - dbgs() << "^^^^^^^^^^^^^^^^ InsCFIs ^^^^^^^^^^^^^^^^\n"; - printUntilNextLine(Inst.getLoc().getPointer()); - dbgs() << "\n"; - dbgs() << "Op code: " << Inst.getOpcode() << "\n"; - dbgs() << "--------------Operands Info--------------\n"; - auto DefCount = MCInstInfo.getNumDefs(); - for (unsigned I = 0; I < Inst.getNumOperands(); I++) { - dbgs() << "Operand #" << I << ", which will be " - << (I < DefCount ? "defined" : "used") << ", is a"; - if (I == MCPB->getMemoryOperandNo(Inst)) { - dbgs() << " memory access, details are:\n"; - auto X86MemoryOperand = MCPB->evaluateX86MemoryOperand(Inst); - dbgs() << " Base Register{ reg#" << X86MemoryOperand->BaseRegNum - << " }"; - dbgs() << " + (Index Register{ reg#" << X86MemoryOperand->IndexRegNum - << " }"; - dbgs() << " * Scale{ value " << X86MemoryOperand->ScaleImm << " }"; - dbgs() << ") + Displacement{ " - << (X86MemoryOperand->DispExpr - ? "an expression" - : "value " + itostr(X86MemoryOperand->DispImm)) - << " }\n"; - // TODO, it's not correct always, it cannot be assumed (or should be - // checked) that memory operands are flatten into 4 operands in MCInc. - I += 4; - continue; - } - auto &&Operand = Inst.getOperand(I); - if (Operand.isImm()) { - dbgs() << "n immediate with value " << Operand.getImm() << "\n"; - continue; - } - if (Operand.isReg()) { - dbgs() << " reg#" << Operand.getReg() << "\n"; - continue; - } - assert(Operand.isExpr()); - dbgs() << "n unknown expression \n"; - } - if (MCInstInfo.NumImplicitDefs) { - dbgs() << "implicitly defines: { "; - for (unsigned I = 0; I < MCInstInfo.NumImplicitDefs; I++) { - dbgs() << "reg#" << MCInstInfo.implicit_defs()[I] << " "; - } - dbgs() << "}\n"; - } - if (MCInstInfo.NumImplicitUses) { - dbgs() << "implicitly uses: { "; - for (unsigned I = 0; I < MCInstInfo.NumImplicitUses; I++) { - dbgs() << "reg#" << MCInstInfo.implicit_uses()[I] << " "; - } - dbgs() << "}\n"; - } - dbgs() << "----------------Move Info----------------\n"; - { // move - { // Reg2Reg - MCPhysReg From, To; - if (MCPB->isRegToRegMove(Inst, From, To)) { - dbgs() << "It's a reg to reg move.\nFrom reg#" << From << " to reg#" - << To << "\n"; - } else if (MCInstInfo.isMoveReg()) { - dbgs() << "It's reg to reg move from MCInstInfo view but not from " - "MCPlus view.\n"; - } - } - if (MCPB->isConditionalMove(Inst)) { - dbgs() << "Its a conditional move.\n"; - } - if (MCPB->isMoveMem2Reg(Inst)) { - dbgs() << "It's a move from memory to register\n"; - assert(MCPB->getMemoryOperandNo(Inst) == 1); - } - } + ///////////// being diffing the CFI states + checkCFADiff(Inst, State, AfterState, Reads, Writes); + ///////////// end diffing the CFI states - dbgs() << "---------------Stack Info----------------\n"; - { // stack - int32_t SrcImm = 0; - MCPhysReg Reg = 0; - uint16_t StackPtrReg = 0; - int64_t StackOffset = 0; - uint8_t Size = 0; - bool IsSimple = false; - bool IsIndexed = false; - bool IsLoad = false; - bool IsStore = false; - bool IsStoreFromReg = false; - if (MCPB->isStackAccess(Inst, IsLoad, IsStore, IsStoreFromReg, Reg, - SrcImm, StackPtrReg, StackOffset, Size, IsSimple, - IsIndexed)) { - dbgs() << "This instruction accesses the stack, the details are:\n"; - dbgs() << " Source immediate: " << SrcImm << "\n"; - dbgs() << " Source reg#" << Reg << "\n"; - dbgs() << " Stack pointer: reg#" << StackPtrReg << "\n"; - dbgs() << " Stack offset: " << StackOffset << "\n"; - dbgs() << " size: " << (int)Size << "\n"; - dbgs() << " Is simple: " << (IsSimple ? "yes" : "no") << "\n"; - dbgs() << " Is indexed: " << (IsIndexed ? "yes" : "no") << "\n"; - dbgs() << " Is load: " << (IsLoad ? "yes" : "no") << "\n"; - dbgs() << " Is store: " << (IsStore ? "yes" : "no") << "\n"; - dbgs() << " Is store from reg: " << (IsStoreFromReg ? "yes" : "no") - << "\n"; - } - if (MCPB->isPush(Inst)) { - dbgs() << "This is a push instruction with size " - << MCPB->getPushSize(Inst) << "\n"; - } - if (MCPB->isPop(Inst)) { - dbgs() << "This is a pop instruction with size " - << MCPB->getPopSize(Inst) << "\n"; - } - } + // dbgs() << "^^^^^^^^^^^^^^^^ InsCFIs ^^^^^^^^^^^^^^^^\n"; + // printUntilNextLine(Inst.getLoc().getPointer()); + // dbgs() << "\n"; + // dbgs() << "Op code: " << Inst.getOpcode() << "\n"; + // dbgs() << "--------------Operands Info--------------\n"; + // auto DefCount = MCInstInfo.getNumDefs(); + // for (unsigned I = 0; I < Inst.getNumOperands(); I++) { + // dbgs() << "Operand #" << I << ", which will be " + // << (I < DefCount ? "defined" : "used") << ", is a"; + // if (I == MCPB->getMemoryOperandNo(Inst)) { + // dbgs() << " memory access, details are:\n"; + // auto X86MemoryOperand = MCPB->evaluateX86MemoryOperand(Inst); + // dbgs() << " Base Register{ reg#" << X86MemoryOperand->BaseRegNum + // << " }"; + // dbgs() << " + (Index Register{ reg#" << X86MemoryOperand->IndexRegNum + // << " }"; + // dbgs() << " * Scale{ value " << X86MemoryOperand->ScaleImm << " }"; + // dbgs() << ") + Displacement{ " + // << (X86MemoryOperand->DispExpr + // ? "an expression" + // : "value " + itostr(X86MemoryOperand->DispImm)) + // << " }\n"; + // // TODO, it's not correct always, it cannot be assumed (or should be + // // checked) that memory operands are flatten into 4 operands in + // MCInc. I += 4; continue; + // } + // auto &&Operand = Inst.getOperand(I); + // if (Operand.isImm()) { + // dbgs() << "n immediate with value " << Operand.getImm() << "\n"; + // continue; + // } + // if (Operand.isReg()) { + // dbgs() << " reg#" << Operand.getReg() << "\n"; + // continue; + // } + // assert(Operand.isExpr()); + // dbgs() << "n unknown expression \n"; + // } + // if (MCInstInfo.NumImplicitDefs) { + // dbgs() << "implicitly defines: { "; + // for (unsigned I = 0; I < MCInstInfo.NumImplicitDefs; I++) { + // dbgs() << "reg#" << MCInstInfo.implicit_defs()[I] << " "; + // } + // dbgs() << "}\n"; + // } + // if (MCInstInfo.NumImplicitUses) { + // dbgs() << "implicitly uses: { "; + // for (unsigned I = 0; I < MCInstInfo.NumImplicitUses; I++) { + // dbgs() << "reg#" << MCInstInfo.implicit_uses()[I] << " "; + // } + // dbgs() << "}\n"; + // } + // dbgs() << "----------------Move Info----------------\n"; + // { // move + // { // Reg2Reg + // MCPhysReg From, To; + // if (MCPB->isRegToRegMove(Inst, From, To)) { + // dbgs() << "It's a reg to reg move.\nFrom reg#" << From << " to + // reg#" + // << To << "\n"; + // } else if (MCInstInfo.isMoveReg()) { + // dbgs() << "It's reg to reg move from MCInstInfo view but not from " + // "MCPlus view.\n"; + // } + // } + // if (MCPB->isConditionalMove(Inst)) { + // dbgs() << "Its a conditional move.\n"; + // } + // if (MCPB->isMoveMem2Reg(Inst)) { + // dbgs() << "It's a move from memory to register\n"; + // assert(MCPB->getMemoryOperandNo(Inst) == 1); + // } + // } - dbgs() << "---------------Arith Info----------------\n"; - { // arith - MutableArrayRef MAR = {Inst}; - if (MCPB->matchAdd(MCPB->matchAnyOperand(), MCPB->matchAnyOperand()) - ->match(*RI, *MCPB, MutableArrayRef(), -1)) { - dbgs() << "It is an addition instruction.\n"; - } else if (MCInstInfo.isAdd()) { - dbgs() << "It is an addition from MCInstInfo view but not from MCPlus " - "view.\n"; - } - if (MCPB->isSUB(Inst)) { - dbgs() << "This is a subtraction.\n"; - } - } + // dbgs() << "---------------Stack Info----------------\n"; + // { // stack + // int32_t SrcImm = 0; + // MCPhysReg Reg = 0; + // uint16_t StackPtrReg = 0; + // int64_t StackOffset = 0; + // uint8_t Size = 0; + // bool IsSimple = false; + // bool IsIndexed = false; + // bool IsLoad = false; + // bool IsStore = false; + // bool IsStoreFromReg = false; + // if (MCPB->isStackAccess(Inst, IsLoad, IsStore, IsStoreFromReg, Reg, + // SrcImm, StackPtrReg, StackOffset, Size, + // IsSimple, IsIndexed)) { + // dbgs() << "This instruction accesses the stack, the details are:\n"; + // dbgs() << " Source immediate: " << SrcImm << "\n"; + // dbgs() << " Source reg#" << Reg << "\n"; + // dbgs() << " Stack pointer: reg#" << StackPtrReg << "\n"; + // dbgs() << " Stack offset: " << StackOffset << "\n"; + // dbgs() << " size: " << (int)Size << "\n"; + // dbgs() << " Is simple: " << (IsSimple ? "yes" : "no") << "\n"; + // dbgs() << " Is indexed: " << (IsIndexed ? "yes" : "no") << "\n"; + // dbgs() << " Is load: " << (IsLoad ? "yes" : "no") << "\n"; + // dbgs() << " Is store: " << (IsStore ? "yes" : "no") << "\n"; + // dbgs() << " Is store from reg: " << (IsStoreFromReg ? "yes" : "no") + // << "\n"; + // } + // if (MCPB->isPush(Inst)) { + // dbgs() << "This is a push instruction with size " + // << MCPB->getPushSize(Inst) << "\n"; + // } + // if (MCPB->isPop(Inst)) { + // dbgs() << "This is a pop instruction with size " + // << MCPB->getPopSize(Inst) << "\n"; + // } + // } + + // dbgs() << "---------------Arith Info----------------\n"; + // { // arith + // MutableArrayRef MAR = {Inst}; + // if (MCPB->matchAdd(MCPB->matchAnyOperand(), MCPB->matchAnyOperand()) + // ->match(*MCRI, *MCPB, MutableArrayRef(), -1)) { + // dbgs() << "It is an addition instruction.\n"; + // } else if (MCInstInfo.isAdd()) { + // dbgs() << "It is an addition from MCInstInfo view but not from MCPlus + // " + // "view.\n"; + // } + // if (MCPB->isSUB(Inst)) { + // dbgs() << "This is a subtraction.\n"; + // } + // } // dbgs() << "-----------------------------------------\n"; // dbgs() << "The CFA register is: " << CFAReg << "\n"; @@ -247,9 +401,12 @@ class CFIAnalysis { // << "change the CFA.\n"; // dbgs() << "The CFI directives does " << (GoingToChangeCFA ? "" : "NOT ") // << "change the CFA.\n"; - dbgs() << "vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv\n"; - ChangedCFA = false; + // dbgs() << "vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv\n"; + + State = AfterState; } + +private: }; } // namespace llvm diff --git a/llvm/tools/llvm-mc/CFIAnalysisMCStreamer.h b/llvm/tools/llvm-mc/CFIAnalysisMCStreamer.h index 382a80aca94b4..48f530e6a9811 100644 --- a/llvm/tools/llvm-mc/CFIAnalysisMCStreamer.h +++ b/llvm/tools/llvm-mc/CFIAnalysisMCStreamer.h @@ -33,14 +33,14 @@ class CFIAnalysisMCStreamer : public MCStreamer { struct ICFI { MCInst Instruction; - ArrayRef CFIDirectives; + std::pair CFIDirectivesRange; - ICFI(MCInst Instruction, ArrayRef CFIDirectives) - : Instruction(Instruction), CFIDirectives(CFIDirectives) {} + ICFI(MCInst Instruction, std::pair CFIDirectives) + : Instruction(Instruction), CFIDirectivesRange(CFIDirectives) {} }; std::optional LastInstruction; - std::optional LastDwarfFrameInfo; + MCDwarfFrameInfo const *LastDwarfFrameInfo; std::optional getLastICFI() { if (!LastInstruction) @@ -58,16 +58,14 @@ class CFIAnalysisMCStreamer : public MCStreamer { assert(CurrentCFIDirectiveState.DirectiveIndex >= LastCFIDirectivesState.DirectiveIndex); - std::vector CFIDirectives; + std::pair CFIDirectivesRange; if (LastCFIDirectivesState.FrameIndex >= 0) { - auto CFIInstructions = DwarfFrameInfos[FrameIndex].Instructions; - CFIDirectives = std::vector( - CFIInstructions.begin() + LastCFIDirectivesState.DirectiveIndex, - CFIInstructions.begin() + CurrentCFIDirectiveState.DirectiveIndex); + CFIDirectivesRange = {LastCFIDirectivesState.DirectiveIndex, + CurrentCFIDirectiveState.DirectiveIndex}; } LastCFIDirectivesState = CurrentCFIDirectiveState; - return ICFI(LastInstruction.value(), CFIDirectives); + return ICFI(LastInstruction.value(), CFIDirectivesRange); } void feedCFIA() { @@ -77,14 +75,15 @@ class CFIAnalysisMCStreamer : public MCStreamer { // adding cfi directives for a instruction. Then this would cause to // ignore the instruction. auto LastICFI = getLastICFI(); - assert(!LastICFI || LastICFI->CFIDirectives.empty()); + assert(!LastICFI || LastICFI->CFIDirectivesRange.first == + LastICFI->CFIDirectivesRange.second); return; } if (auto ICFI = getLastICFI()) { assert(!CFIAs.empty()); - CFIAs.back().update(LastDwarfFrameInfo.value(), ICFI->Instruction, - ICFI->CFIDirectives); + CFIAs.back().update(*LastDwarfFrameInfo, ICFI->Instruction, + ICFI->CFIDirectivesRange); } } @@ -121,9 +120,12 @@ class CFIAnalysisMCStreamer : public MCStreamer { feedCFIA(); LastInstruction = Inst; if (hasUnfinishedDwarfFrameInfo()) - LastDwarfFrameInfo = getDwarfFrameInfos()[FrameIndices.back()]; + LastDwarfFrameInfo = + &getDwarfFrameInfos() + [FrameIndices.back()]; // FIXME get this from emit yourself, + // instead of getting it in this way else - LastDwarfFrameInfo = std::nullopt; + LastDwarfFrameInfo = nullptr; } void emitCFIStartProcImpl(MCDwarfFrameInfo &Frame) override { @@ -131,7 +133,7 @@ class CFIAnalysisMCStreamer : public MCStreamer { FrameIndices.push_back(getNumFrameInfos()); CFIAs.emplace_back(getContext(), MCII, MCIA.get()); LastInstruction = std::nullopt; - LastDwarfFrameInfo = std::nullopt; + LastDwarfFrameInfo = nullptr; LastCFIDirectivesState.FrameIndex = FrameIndices.back(); LastCFIDirectivesState.DirectiveIndex = 0; MCStreamer::emitCFIStartProcImpl(Frame); @@ -143,7 +145,7 @@ class CFIAnalysisMCStreamer : public MCStreamer { FrameIndices.pop_back(); CFIAs.pop_back(); LastInstruction = std::nullopt; - LastDwarfFrameInfo = std::nullopt; + LastDwarfFrameInfo = nullptr; LastCFIDirectivesState.FrameIndex = FrameIndices.back(); LastCFIDirectivesState.DirectiveIndex = LastCFIDirectivesState.FrameIndex >= 0 From aa95e823b5263d10819f6cb307a62b93a8bba1e0 Mon Sep 17 00:00:00 2001 From: Amirhossein Pashaeehir Date: Wed, 14 May 2025 22:45:23 +0000 Subject: [PATCH 09/22] Add the CFI validation as a option to llvm-mc tool --- llvm/tools/llvm-mc/llvm-mc.cpp | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/llvm/tools/llvm-mc/llvm-mc.cpp b/llvm/tools/llvm-mc/llvm-mc.cpp index 32f2fa0f05a7c..ca4f71cb9cabd 100644 --- a/llvm/tools/llvm-mc/llvm-mc.cpp +++ b/llvm/tools/llvm-mc/llvm-mc.cpp @@ -215,6 +215,10 @@ static cl::opt NoExecStack("no-exec-stack", cl::desc("File doesn't need an exec stack"), cl::cat(MCCategory)); +static cl::opt ValidateCFI("validate-cfi", + cl::desc("Validate the CFI directives"), + cl::cat(MCCategory)); + enum ActionType { AC_AsLex, AC_Assemble, @@ -525,7 +529,12 @@ int main(int argc, char **argv) { assert(MCIA && "Unable to create instruction analysis!"); std::unique_ptr IP; - if (FileType == OFT_AssemblyFile) { + if (ValidateCFI) { + assert(FileType == OFT_Null); + auto *CFIAMCS = new CFIAnalysisMCStreamer(Ctx, *MCII, std::move(MCIA)); + TheTarget->createNullTargetStreamer(*CFIAMCS); + Str.reset(CFIAMCS); + } else if (FileType == OFT_AssemblyFile) { IP.reset(TheTarget->createMCInstPrinter( Triple(TripleName), OutputAsmVariant, *MAI, *MCII, *MRI)); @@ -569,11 +578,7 @@ int main(int argc, char **argv) { std::move(CE), std::move(MAB))); } else if (FileType == OFT_Null) { - auto *MyDummyStreamer = - new CFIAnalysisMCStreamer(Ctx, *MCII, std::move(MCIA)); - TheTarget->createNullTargetStreamer(*MyDummyStreamer); - Str.reset(MyDummyStreamer); - // Str.reset(TheTarget->createNullStreamer(Ctx)); + Str.reset(TheTarget->createNullStreamer(Ctx)); } else { assert(FileType == OFT_ObjectFile && "Invalid file type!"); From fc88bfa7ac02e8901d53d67926a32eafce4a36b0 Mon Sep 17 00:00:00 2001 From: Amirhossein Pashaeehir Date: Wed, 14 May 2025 23:15:26 +0000 Subject: [PATCH 10/22] Provide the directives as an arrayref instead of index range --- llvm/tools/llvm-mc/CFIAnalysisMCStreamer.h | 42 +++++++++------------- 1 file changed, 17 insertions(+), 25 deletions(-) diff --git a/llvm/tools/llvm-mc/CFIAnalysisMCStreamer.h b/llvm/tools/llvm-mc/CFIAnalysisMCStreamer.h index 48f530e6a9811..a9354d7af8a2f 100644 --- a/llvm/tools/llvm-mc/CFIAnalysisMCStreamer.h +++ b/llvm/tools/llvm-mc/CFIAnalysisMCStreamer.h @@ -3,7 +3,6 @@ #include "CFIAnalysis.h" #include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCDwarf.h" #include "llvm/MC/MCInstrAnalysis.h" @@ -20,13 +19,12 @@ class CFIAnalysisMCStreamer : public MCStreamer { std::unique_ptr MCIA; struct CFIDirectivesState { - int FrameIndex; // TODO remove it, no need for it int DirectiveIndex; - CFIDirectivesState() : FrameIndex(-1), DirectiveIndex(0) {} + CFIDirectivesState() : DirectiveIndex(0) {} CFIDirectivesState(int FrameIndex, int InstructionIndex) - : FrameIndex(FrameIndex), DirectiveIndex(InstructionIndex) {} + : DirectiveIndex(InstructionIndex) {} } LastCFIDirectivesState; std::vector FrameIndices; std::vector CFIAs; @@ -53,16 +51,12 @@ class CFIAnalysisMCStreamer : public MCStreamer { ? CFIDirectivesState( FrameIndex, DwarfFrameInfos[FrameIndex].Instructions.size()) : CFIDirectivesState(); - assert(CurrentCFIDirectiveState.FrameIndex == - LastCFIDirectivesState.FrameIndex); assert(CurrentCFIDirectiveState.DirectiveIndex >= LastCFIDirectivesState.DirectiveIndex); - std::pair CFIDirectivesRange; - if (LastCFIDirectivesState.FrameIndex >= 0) { - CFIDirectivesRange = {LastCFIDirectivesState.DirectiveIndex, - CurrentCFIDirectiveState.DirectiveIndex}; - } + std::pair CFIDirectivesRange( + LastCFIDirectivesState.DirectiveIndex, + CurrentCFIDirectiveState.DirectiveIndex); LastCFIDirectivesState = CurrentCFIDirectiveState; return ICFI(LastInstruction.value(), CFIDirectivesRange); @@ -82,8 +76,12 @@ class CFIAnalysisMCStreamer : public MCStreamer { if (auto ICFI = getLastICFI()) { assert(!CFIAs.empty()); - CFIAs.back().update(*LastDwarfFrameInfo, ICFI->Instruction, - ICFI->CFIDirectivesRange); + ArrayRef CFIDirectives( + LastDwarfFrameInfo->Instructions); + CFIDirectives = CFIDirectives.drop_front(ICFI->CFIDirectivesRange.first) + .drop_back(LastDwarfFrameInfo->Instructions.size() - + ICFI->CFIDirectivesRange.second); + CFIAs.back().update(ICFI->Instruction, CFIDirectives); } } @@ -95,9 +93,6 @@ class CFIAnalysisMCStreamer : public MCStreamer { FrameIndices.push_back(-1); } - /// @name MCStreamer Interface - /// @{ - bool hasRawTextSupport() const override { return true; } void emitRawTextImpl(StringRef String) override {} @@ -121,9 +116,9 @@ class CFIAnalysisMCStreamer : public MCStreamer { LastInstruction = Inst; if (hasUnfinishedDwarfFrameInfo()) LastDwarfFrameInfo = - &getDwarfFrameInfos() - [FrameIndices.back()]; // FIXME get this from emit yourself, - // instead of getting it in this way + &getDwarfFrameInfos()[FrameIndices.back()]; // TODO get this from emit + // yourself, instead of + // getting it in this way else LastDwarfFrameInfo = nullptr; } @@ -134,7 +129,6 @@ class CFIAnalysisMCStreamer : public MCStreamer { CFIAs.emplace_back(getContext(), MCII, MCIA.get()); LastInstruction = std::nullopt; LastDwarfFrameInfo = nullptr; - LastCFIDirectivesState.FrameIndex = FrameIndices.back(); LastCFIDirectivesState.DirectiveIndex = 0; MCStreamer::emitCFIStartProcImpl(Frame); } @@ -146,12 +140,10 @@ class CFIAnalysisMCStreamer : public MCStreamer { CFIAs.pop_back(); LastInstruction = std::nullopt; LastDwarfFrameInfo = nullptr; - LastCFIDirectivesState.FrameIndex = FrameIndices.back(); + auto FrameIndex = FrameIndices.back(); LastCFIDirectivesState.DirectiveIndex = - LastCFIDirectivesState.FrameIndex >= 0 - ? getDwarfFrameInfos()[LastCFIDirectivesState.FrameIndex] - .Instructions.size() - : 0; + FrameIndex >= 0 ? getDwarfFrameInfos()[FrameIndex].Instructions.size() + : 0; MCStreamer::emitCFIEndProcImpl(CurFrame); } }; From 832d5a6079a52d37df04e4136271ed08b0988347 Mon Sep 17 00:00:00 2001 From: Amirhossein Pashaeehir Date: Wed, 14 May 2025 23:47:48 +0000 Subject: [PATCH 11/22] Remove TODOs --- llvm/tools/llvm-mc/CFIState.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/tools/llvm-mc/CFIState.h b/llvm/tools/llvm-mc/CFIState.h index b32de2b385316..5551e9778f1d2 100644 --- a/llvm/tools/llvm-mc/CFIState.h +++ b/llvm/tools/llvm-mc/CFIState.h @@ -76,7 +76,7 @@ struct CFIState { DWARFRegType CFARegister; int CFAOffset; - CFIState() : CFARegister(-1 /* Change to to max unsigned */), CFAOffset(-1) {} + CFIState() : CFARegister(-1), CFAOffset(-1) {} CFIState(const CFIState &Other) { CFARegister = Other.CFARegister; @@ -147,7 +147,7 @@ struct CFIState { case MCCFIInstruction::OpNegateRAStateWithPC: case MCCFIInstruction::OpGnuArgsSize: case MCCFIInstruction::OpLabel: - // TODO it's not supported. + // These instructions are not supported. return false; break; } From 527130325563d0920f58d059f1c2a8cc10aac490 Mon Sep 17 00:00:00 2001 From: Amirhossein Pashaeehir Date: Wed, 14 May 2025 23:48:11 +0000 Subject: [PATCH 12/22] Wrap all the MCPB info needed into a ExtendedMCInstrAnalysis class --- llvm/tools/llvm-mc/ExtendedMCInstrAnalysis.h | 85 ++++++++++++++++++++ 1 file changed, 85 insertions(+) create mode 100644 llvm/tools/llvm-mc/ExtendedMCInstrAnalysis.h diff --git a/llvm/tools/llvm-mc/ExtendedMCInstrAnalysis.h b/llvm/tools/llvm-mc/ExtendedMCInstrAnalysis.h new file mode 100644 index 0000000000000..5323c4e3d1730 --- /dev/null +++ b/llvm/tools/llvm-mc/ExtendedMCInstrAnalysis.h @@ -0,0 +1,85 @@ +#ifndef LLVM_TOOLS_LLVM_MC_EXTENDED_MC_INSTR_ANALYSIS_H +#define LLVM_TOOLS_LLVM_MC_EXTENDED_MC_INSTR_ANALYSIS_H + +#include "bolt/Core/MCPlusBuilder.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCDwarf.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCRegister.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/TargetRegistry.h" +#include +#include +#include + +namespace llvm { + +class ExtendedMCInstrAnalysis { +private: + std::unique_ptr MCPB; + + static bolt::MCPlusBuilder * + createMCPlusBuilder(const Triple::ArchType Arch, + const MCInstrAnalysis *Analysis, const MCInstrInfo *Info, + const MCRegisterInfo *RegInfo, + const MCSubtargetInfo *STI) { + if (Arch == Triple::x86_64) + return bolt::createX86MCPlusBuilder(Analysis, Info, RegInfo, STI); + + llvm_unreachable("architecture unsupported by ExtendedMCInstrAnalysis"); + } + +public: + ExtendedMCInstrAnalysis(MCContext &Context, MCInstrInfo const &MCII, + MCInstrAnalysis *MCIA) { + MCPB.reset(createMCPlusBuilder(Context.getTargetTriple().getArch(), MCIA, + &MCII, Context.getRegisterInfo(), + Context.getSubtargetInfo())); + } + + /// Extra semantic information needed from MC layer: + + MCPhysReg getStackPointer() const { return MCPB->getStackPointer(); } + + bool isPush(const MCInst &Inst) const { return MCPB->isPush(Inst); } + int getPushSize(const MCInst &Inst) const { return MCPB->getPushSize(Inst); } + + bool isPop(const MCInst &Inst) const { return MCPB->isPop(Inst); } + int getPopSize(const MCInst &Inst) const { return MCPB->getPopSize(Inst); } + + bool isRegToRegMove(const MCInst &Inst, MCPhysReg &From, + MCPhysReg &To) const { + return MCPB->isRegToRegMove(Inst, From, To); + } + bool isConditionalMove(const MCInst &Inst) const { + return MCPB->isConditionalMove(Inst); + } + bool isMoveMem2Reg(const MCInst &Inst) const { + return MCPB->isMoveMem2Reg(Inst); + } + bool isSUB(const MCInst &Inst) const { return MCPB->isSUB(Inst); } + + int getMemoryOperandNo(const MCInst &Inst) const { + return MCPB->getMemoryOperandNo(Inst); + } + std::optional + evaluateX86MemoryOperand(const MCInst &Inst) const { + return MCPB->evaluateX86MemoryOperand(Inst); + } + bool isStackAccess(const MCInst &Inst, bool &IsLoad, bool &IsStore, + bool &IsStoreFromReg, MCPhysReg &Reg, int32_t &SrcImm, + uint16_t &StackPtrReg, int64_t &StackOffset, uint8_t &Size, + bool &IsSimple, bool &IsIndexed) const { + return MCPB->isStackAccess(Inst, IsLoad, IsStore, IsStoreFromReg, Reg, + SrcImm, StackPtrReg, StackOffset, Size, IsSimple, + IsIndexed); + } +}; + +} // namespace llvm + +#endif \ No newline at end of file From 84f8d18507ef92338f87d8424e8bacf26e6b41cc Mon Sep 17 00:00:00 2001 From: Amirhossein Pashaeehir Date: Wed, 14 May 2025 23:48:50 +0000 Subject: [PATCH 13/22] Use extended MCAnalysis instead of MCplus (directly) --- llvm/tools/llvm-mc/CFIAnalysis.h | 123 ++++++++++--------------------- 1 file changed, 39 insertions(+), 84 deletions(-) diff --git a/llvm/tools/llvm-mc/CFIAnalysis.h b/llvm/tools/llvm-mc/CFIAnalysis.h index 424290f3cab86..1a05dd6de5327 100644 --- a/llvm/tools/llvm-mc/CFIAnalysis.h +++ b/llvm/tools/llvm-mc/CFIAnalysis.h @@ -2,6 +2,7 @@ #define LLVM_TOOLS_LLVM_MC_CFI_ANALYSIS_H #include "CFIState.h" +#include "ExtendedMCInstrAnalysis.h" #include "bolt/Core/MCPlusBuilder.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallVector.h" @@ -18,7 +19,6 @@ #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/TargetRegistry.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/Error.h" #include "llvm/Support/FormatVariadic.h" #include #include @@ -27,24 +27,6 @@ namespace llvm { -bolt::MCPlusBuilder *createMCPlusBuilder(const Triple::ArchType Arch, - const MCInstrAnalysis *Analysis, - const MCInstrInfo *Info, - const MCRegisterInfo *RegInfo, - const MCSubtargetInfo *STI) { - dbgs() << "arch: " << Arch << ", and expected " << Triple::x86_64 << "\n"; - if (Arch == Triple::x86_64) - return bolt::createX86MCPlusBuilder(Analysis, Info, RegInfo, STI); - - // if (Arch == Triple::aarch64) - // return createAArch64MCPlusBuilder(Analysis, Info, RegInfo, STI); - - // if (Arch == Triple::riscv64) - // return createRISCVMCPlusBuilder(Analysis, Info, RegInfo, STI); - - llvm_unreachable("architecture unsupported by MCPlusBuilder"); -} - // TODO remove it, it's just for debug purposes. void printUntilNextLine(const char *Str) { for (int I = 0; Str[I] != '\0' && Str[I] != '\n'; I++) @@ -54,8 +36,8 @@ void printUntilNextLine(const char *Str) { class CFIAnalysis { MCContext &Context; MCInstrInfo const &MCII; - std::unique_ptr MCPB; MCRegisterInfo const *MCRI; + std::unique_ptr EMCIA; CFIState State; public: @@ -65,14 +47,12 @@ class CFIAnalysis { // TODO it should look at the prologue directives and setup the // registers' previous value state here, but for now, it's assumed that all // values are by default `samevalue`. - MCPB.reset(createMCPlusBuilder(Context.getTargetTriple().getArch(), MCIA, - &MCII, Context.getRegisterInfo(), - Context.getSubtargetInfo())); + EMCIA.reset(new ExtendedMCInstrAnalysis(Context, MCII, MCIA)); // TODO CFA offset should be the slot size, but for now I don't have any // access to it, maybe can be read from the prologue // TODO check what should be passed as EH? - State = CFIState(MCRI->getDwarfRegNum(MCPB->getStackPointer(), false), 8); + State = CFIState(MCRI->getDwarfRegNum(EMCIA->getStackPointer(), false), 8); for (unsigned I = 0; I < MCRI->getNumRegs(); I++) { DWARFRegType DwarfI = MCRI->getDwarfRegNum(I, false); State.RegisterCFIStates[DwarfI] = RegisterCFIState::createSameValue(); @@ -84,23 +64,23 @@ class CFIAnalysis { false)] = RegisterCFIState::createUndefined(); // For now, we don't care about the // PC - State.RegisterCFIStates[MCRI->getDwarfRegNum(MCPB->getStackPointer(), + State.RegisterCFIStates[MCRI->getDwarfRegNum(EMCIA->getStackPointer(), false)] = RegisterCFIState::createOffsetFromCFAVal(0); // sp's old value is CFA } bool doesConstantChange(const MCInst &Inst, MCPhysReg Reg, int64_t &HowMuch) { - if (MCPB->isPush(Inst) && Reg == MCPB->getStackPointer()) { + if (EMCIA->isPush(Inst) && Reg == EMCIA->getStackPointer()) { // TODO should get the stack direction here, now it assumes that it goes // down. - HowMuch = -MCPB->getPushSize(Inst); + HowMuch = -EMCIA->getPushSize(Inst); return true; } - if (MCPB->isPop(Inst) && Reg == MCPB->getStackPointer()) { + if (EMCIA->isPop(Inst) && Reg == EMCIA->getStackPointer()) { // TODO should get the stack direction here, now it assumes that it goes // down. - HowMuch = MCPB->getPushSize(Inst); + HowMuch = EMCIA->getPushSize(Inst); return true; } @@ -114,7 +94,7 @@ class CFIAnalysis { { MCPhysReg From; MCPhysReg To; - if (MCPB->isRegToRegMove(Inst, From, To) && From == Reg2 && To == Reg1) { + if (EMCIA->isRegToRegMove(Inst, From, To) && From == Reg2 && To == Reg1) { Diff = 0; return true; } @@ -174,7 +154,8 @@ class CFIAnalysis { } // The CFA register is changed. // TODO move it up - MCPhysReg OldCFAReg = MCRI->getLLVMRegNum(PrevState.CFARegister, false).value(); + MCPhysReg OldCFAReg = + MCRI->getLLVMRegNum(PrevState.CFARegister, false).value(); MCPhysReg NewCFAReg = MCRI->getLLVMRegNum(NextState.CFARegister, false).value(); if (!Writes.count(NextState.CFARegister)) { @@ -201,24 +182,17 @@ class CFIAnalysis { return; } if (Diff != OffsetDiff) { - Context.reportError( - Inst.getLoc(), formatv("After changing the CFA register, the CFA " - "offset should be {0}, but it is {1}.", - PrevState.CFAOffset - Diff, NextState.CFAOffset)); + Context.reportError(Inst.getLoc(), + formatv("After changing the CFA register, the CFA " + "offset should be {0}, but it is {1}.", + PrevState.CFAOffset - Diff, + NextState.CFAOffset)); return; } // Everything is OK! } - void update(const MCDwarfFrameInfo &DwarfFrame, MCInst &Inst, - std::pair - CFIDirectivesRange) { // FIXME this should not be a pair, - // but an ArrayRef - ArrayRef CFIDirectives(DwarfFrame.Instructions); - CFIDirectives = CFIDirectives.drop_front(CFIDirectivesRange.first) - .drop_back(DwarfFrame.Instructions.size() - - CFIDirectivesRange.second); - + void update(MCInst &Inst, ArrayRef CFIDirectives) { const MCInstrDesc &MCInstInfo = MCII.get(Inst.getOpcode()); CFIState AfterState(State); for (auto &&CFIDirective : CFIDirectives) @@ -227,9 +201,6 @@ class CFIAnalysis { CFIDirective.getLoc(), "I don't support this CFI directive, I assume this does nothing " "(which will probably break other things)"); - auto CFAReg = MCRI->getLLVMRegNum(DwarfFrame.CurrentCfaRegister, false); - assert(DwarfFrame.CurrentCfaRegister == AfterState.CFARegister && - "Checking if the CFA tracking is working"); std::set Writes, Reads; for (unsigned I = 0; I < MCInstInfo.NumImplicitUses; I++) @@ -246,23 +217,6 @@ class CFIAnalysis { Reads.insert(MCRI->getDwarfRegNum(Operand.getReg(), false)); } } - - bool ChangedCFA = Writes.count(CFAReg->id()); - bool GoingToChangeCFA = false; - for (auto CFIDirective : CFIDirectives) { - auto Op = CFIDirective.getOperation(); - GoingToChangeCFA |= (Op == MCCFIInstruction::OpDefCfa || - Op == MCCFIInstruction::OpDefCfaOffset || - Op == MCCFIInstruction::OpDefCfaRegister || - Op == MCCFIInstruction::OpAdjustCfaOffset || - Op == MCCFIInstruction::OpLLVMDefAspaceCfa); - } - if (ChangedCFA && !GoingToChangeCFA) { - Context.reportError(Inst.getLoc(), - "The instruction changes CFA register value, but the " - "CFI directives don't update the CFA."); - } - ///////////// being diffing the CFI states checkCFADiff(Inst, State, AfterState, Reads, Writes); ///////////// end diffing the CFI states @@ -276,9 +230,9 @@ class CFIAnalysis { // for (unsigned I = 0; I < Inst.getNumOperands(); I++) { // dbgs() << "Operand #" << I << ", which will be " // << (I < DefCount ? "defined" : "used") << ", is a"; - // if (I == MCPB->getMemoryOperandNo(Inst)) { + // if (I == EMCIA->getMemoryOperandNo(Inst)) { // dbgs() << " memory access, details are:\n"; - // auto X86MemoryOperand = MCPB->evaluateX86MemoryOperand(Inst); + // auto X86MemoryOperand = EMCIA->evaluateX86MemoryOperand(Inst); // dbgs() << " Base Register{ reg#" << X86MemoryOperand->BaseRegNum // << " }"; // dbgs() << " + (Index Register{ reg#" << X86MemoryOperand->IndexRegNum @@ -323,21 +277,21 @@ class CFIAnalysis { // { // move // { // Reg2Reg // MCPhysReg From, To; - // if (MCPB->isRegToRegMove(Inst, From, To)) { - // dbgs() << "It's a reg to reg move.\nFrom reg#" << From << " to - // reg#" + // if (EMCIA->isRegToRegMove(Inst, From, To)) { + // dbgs() << "It's a reg to reg move.\nFrom reg#" << From << " to reg + // #" // << To << "\n"; // } else if (MCInstInfo.isMoveReg()) { // dbgs() << "It's reg to reg move from MCInstInfo view but not from " // "MCPlus view.\n"; // } // } - // if (MCPB->isConditionalMove(Inst)) { + // if (EMCIA->isConditionalMove(Inst)) { // dbgs() << "Its a conditional move.\n"; // } - // if (MCPB->isMoveMem2Reg(Inst)) { + // if (EMCIA->isMoveMem2Reg(Inst)) { // dbgs() << "It's a move from memory to register\n"; - // assert(MCPB->getMemoryOperandNo(Inst) == 1); + // assert(EMCIA->getMemoryOperandNo(Inst) == 1); // } // } @@ -353,9 +307,9 @@ class CFIAnalysis { // bool IsLoad = false; // bool IsStore = false; // bool IsStoreFromReg = false; - // if (MCPB->isStackAccess(Inst, IsLoad, IsStore, IsStoreFromReg, Reg, - // SrcImm, StackPtrReg, StackOffset, Size, - // IsSimple, IsIndexed)) { + // if (EMCIA->isStackAccess(Inst, IsLoad, IsStore, IsStoreFromReg, Reg, + // SrcImm, StackPtrReg, StackOffset, Size, + // IsSimple, IsIndexed)) { // dbgs() << "This instruction accesses the stack, the details are:\n"; // dbgs() << " Source immediate: " << SrcImm << "\n"; // dbgs() << " Source reg#" << Reg << "\n"; @@ -369,28 +323,29 @@ class CFIAnalysis { // dbgs() << " Is store from reg: " << (IsStoreFromReg ? "yes" : "no") // << "\n"; // } - // if (MCPB->isPush(Inst)) { + // if (EMCIA->isPush(Inst)) { // dbgs() << "This is a push instruction with size " - // << MCPB->getPushSize(Inst) << "\n"; + // << EMCIA->getPushSize(Inst) << "\n"; // } - // if (MCPB->isPop(Inst)) { + // if (EMCIA->isPop(Inst)) { // dbgs() << "This is a pop instruction with size " - // << MCPB->getPopSize(Inst) << "\n"; + // << EMCIA->getPopSize(Inst) << "\n"; // } // } // dbgs() << "---------------Arith Info----------------\n"; // { // arith - // MutableArrayRef MAR = {Inst}; + // /* MutableArrayRef MAR = {Inst}; // if (MCPB->matchAdd(MCPB->matchAnyOperand(), MCPB->matchAnyOperand()) // ->match(*MCRI, *MCPB, MutableArrayRef(), -1)) { // dbgs() << "It is an addition instruction.\n"; - // } else if (MCInstInfo.isAdd()) { - // dbgs() << "It is an addition from MCInstInfo view but not from MCPlus - // " + // } else */ + // if (MCInstInfo.isAdd()) { + // dbgs() << "It is an addition from MCInstInfo view but not from + // MCPlus" // "view.\n"; // } - // if (MCPB->isSUB(Inst)) { + // if (EMCIA->isSUB(Inst)) { // dbgs() << "This is a subtraction.\n"; // } // } From 37186b8041a1546860bcd09245393f0a31bad53e Mon Sep 17 00:00:00 2001 From: Amirhossein Pashaeehir Date: Mon, 19 May 2025 18:08:23 +0000 Subject: [PATCH 14/22] Make the CFA checker to first generate, if not matched check the Read/Write conditions --- llvm/tools/llvm-mc/CFIAnalysis.h | 138 +++++++++++++++++-------------- 1 file changed, 74 insertions(+), 64 deletions(-) diff --git a/llvm/tools/llvm-mc/CFIAnalysis.h b/llvm/tools/llvm-mc/CFIAnalysis.h index 1a05dd6de5327..ab858e9ae7e52 100644 --- a/llvm/tools/llvm-mc/CFIAnalysis.h +++ b/llvm/tools/llvm-mc/CFIAnalysis.h @@ -89,12 +89,13 @@ class CFIAnalysis { // Tries to guess Reg1's value in a form of Reg2 (before Inst's execution) + // Diff. - bool isInConstantDistanceOfEachOther(const MCInst &Inst, MCPhysReg Reg1, + bool isInConstantDistanceOfEachOther(const MCInst &Inst, MCPhysReg &Reg1, MCPhysReg Reg2, int &Diff) { { MCPhysReg From; MCPhysReg To; - if (EMCIA->isRegToRegMove(Inst, From, To) && From == Reg2 && To == Reg1) { + if (EMCIA->isRegToRegMove(Inst, From, To) && From == Reg2) { + Reg1 = To; Diff = 0; return true; } @@ -107,89 +108,98 @@ class CFIAnalysis { const CFIState &NextState, const std::set &Reads, const std::set &Writes) { + MCPhysReg PrevCFAPhysReg = + MCRI->getLLVMRegNum(PrevState.CFARegister, false).value(); + MCPhysReg NextCFAPhysReg = + MCRI->getLLVMRegNum(NextState.CFARegister, false).value(); + + // Try to guess the next state. (widen) + std::vector> PossibleNextCFAStates; + { // try generate + { // no change + if (!Writes.count(PrevState.CFARegister)) { + PossibleNextCFAStates.emplace_back(PrevState.CFARegister, + PrevState.CFAOffset); + } + } + + { // const change + int64_t HowMuch; + if (doesConstantChange(Inst, PrevCFAPhysReg, HowMuch)) { + PossibleNextCFAStates.emplace_back(PrevState.CFARegister, + PrevState.CFAOffset - HowMuch); + } + } + + { // constant distance with each other + int Diff; + MCPhysReg PossibleNewCFAReg; + if (isInConstantDistanceOfEachOther(Inst, PossibleNewCFAReg, + PrevCFAPhysReg, Diff)) { + PossibleNextCFAStates.emplace_back( + MCRI->getDwarfRegNum(PossibleNewCFAReg, false), + PrevState.CFAOffset - Diff); + } + } + } + + for (auto &&[PossibleNextCFAReg, PossibleNextCFAOffset] : + PossibleNextCFAStates) { + if (PossibleNextCFAReg != NextState.CFARegister) + continue; + + if (PossibleNextCFAOffset == NextState.CFAOffset) { + // Everything is ok! + return; + } + + Context.reportError(Inst.getLoc(), + formatv("Expected CFA [reg: {0}, offset: {1}] but " + "got [reg: {2}, offset: {3}].", + NextCFAPhysReg, PossibleNextCFAOffset, + NextCFAPhysReg, NextState.CFAOffset)); + return; + } + + // Either couldn't generate, or did, but the programmer wants to change the + // source of register for CFA to something not expected by the generator. So + // it falls back into read/write checks. + if (PrevState.CFARegister == NextState.CFARegister) { if (PrevState.CFAOffset == NextState.CFAOffset) { if (Writes.count(PrevState.CFARegister)) { - Context.reportWarning( + Context.reportError( Inst.getLoc(), formatv("This instruction changes reg#{0}, which is " "the CFA register, but the CFI directives do not.", - MCRI->getLLVMRegNum(PrevState.CFARegister, false))); + PrevCFAPhysReg)); } + + // Everything is ok! return; } // The offset is changed. + if (!Writes.count(PrevState.CFARegister)) { - Context.reportWarning( + Context.reportError( Inst.getLoc(), formatv( "You changed the CFA offset, but there is no modification to " "the CFA register happening in this instruction.")); } - int64_t HowMuch; - if (!doesConstantChange( - Inst, MCRI->getLLVMRegNum(PrevState.CFARegister, false).value(), - HowMuch)) { - Context.reportWarning( - Inst.getLoc(), - formatv("The CFA register changed, but I don't know how, finger " - "crossed the CFI directives are correct.")); - return; - } - // we know it is changed by HowMuch, so the CFA offset should be changed - // by -HowMuch, i.e. AfterState.Offset - State.Offset = -HowMuch - if (NextState.CFAOffset - PrevState.CFAOffset != -HowMuch) { - Context.reportError( - Inst.getLoc(), - formatv("The CFA offset is changed by {0}, but " - "the directives changed it by {1}. (to be exact, the new " - "offset should be {2}, but it is {3})", - -HowMuch, NextState.CFAOffset - PrevState.CFAOffset, - PrevState.CFAOffset - HowMuch, NextState.CFAOffset)); - return; - } - // Everything OK! - return; - } - // The CFA register is changed. - // TODO move it up - MCPhysReg OldCFAReg = - MCRI->getLLVMRegNum(PrevState.CFARegister, false).value(); - MCPhysReg NewCFAReg = - MCRI->getLLVMRegNum(NextState.CFARegister, false).value(); - if (!Writes.count(NextState.CFARegister)) { Context.reportWarning( Inst.getLoc(), - formatv( - "The new CFA register reg#{0}'s value is not assigned by this " - "instruction, try to move the new CFA def to where this " - "value is changed, now I can't infer if this change is " - "correct or not.", - NewCFAReg)); - return; - } - // Because CFA should is the CFA always stays the same: - int OffsetDiff = PrevState.CFAOffset - NextState.CFAOffset; - int Diff; - if (!isInConstantDistanceOfEachOther(Inst, NewCFAReg, OldCFAReg, Diff)) { - Context.reportWarning( - Inst.getLoc(), - formatv("Based on this instruction I cannot infer that the new and " - "old CFA registers are in {0} distance of each other. I " - "hope it's ok.", - OffsetDiff)); - return; - } - if (Diff != OffsetDiff) { - Context.reportError(Inst.getLoc(), - formatv("After changing the CFA register, the CFA " - "offset should be {0}, but it is {1}.", - PrevState.CFAOffset - Diff, - NextState.CFAOffset)); + "I don't know what the instruction did, but it changed the CFA reg's " + "value, and the offset is changed as well by the CFI directives."); + // Everything may be ok! return; } - // Everything is OK! + // The CFA register is changed + Context.reportWarning( + Inst.getLoc(), "The CFA register is changed to something, and I don't " + "have any idea on the new register relevance to CFA."); + // Everything may be ok! } void update(MCInst &Inst, ArrayRef CFIDirectives) { From 203a2380c11ca4bdfb48563c5b0395f89205b43a Mon Sep 17 00:00:00 2001 From: Amirhossein Pashaeehir Date: Tue, 20 May 2025 01:19:00 +0000 Subject: [PATCH 15/22] Check register states as well --- llvm/tools/llvm-mc/CFIAnalysis.h | 405 ++++++++++++++++++++++++++++--- llvm/tools/llvm-mc/CFIState.h | 38 +++ 2 files changed, 403 insertions(+), 40 deletions(-) diff --git a/llvm/tools/llvm-mc/CFIAnalysis.h b/llvm/tools/llvm-mc/CFIAnalysis.h index ab858e9ae7e52..f0b81a85e2fd4 100644 --- a/llvm/tools/llvm-mc/CFIAnalysis.h +++ b/llvm/tools/llvm-mc/CFIAnalysis.h @@ -19,6 +19,7 @@ #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/TargetRegistry.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormatVariadic.h" #include #include @@ -40,6 +41,41 @@ class CFIAnalysis { std::unique_ptr EMCIA; CFIState State; +private: + // The CFI analysis only keeps track and cares about super registers, not the + // subregisters. All reads to/writes from subregisters and considered the same + // operation to super registers. Other operations like loading and stores are + // considered only if they are exactly doing the operation to or from a super + // register. + // As en example, if you spill a sub register to stack, the CFI analysis does + // not consider that a register spilling. + bool isSuperReg(MCPhysReg Reg) { return MCRI->superregs(Reg).empty(); } + + std::set getAllSuperRegs() { + std::set SuperRegs; + for (auto &&RegClass : MCRI->regclasses()) { + for (unsigned I = 0; I < RegClass.getNumRegs(); I++) { + MCPhysReg Reg = RegClass.getRegister(I); + if (!isSuperReg(Reg)) + continue; + SuperRegs.insert(Reg); + } + } + + return SuperRegs; + } + + MCPhysReg getSuperReg(MCPhysReg Reg) { + if (isSuperReg(Reg)) + return Reg; + for (auto SuperReg : MCRI->superregs(Reg)) { + if (isSuperReg(SuperReg)) + return SuperReg; + } + + llvm_unreachable("Should either be a super reg, or have a super reg"); + } + public: CFIAnalysis(MCContext &Context, MCInstrInfo const &MCII, MCInstrAnalysis *MCIA) @@ -53,7 +89,7 @@ class CFIAnalysis { // access to it, maybe can be read from the prologue // TODO check what should be passed as EH? State = CFIState(MCRI->getDwarfRegNum(EMCIA->getStackPointer(), false), 8); - for (unsigned I = 0; I < MCRI->getNumRegs(); I++) { + for (MCPhysReg I : getAllSuperRegs()) { DWARFRegType DwarfI = MCRI->getDwarfRegNum(I, false); State.RegisterCFIStates[DwarfI] = RegisterCFIState::createSameValue(); } @@ -104,6 +140,275 @@ class CFIAnalysis { return false; } + bool doStoreFromReg(const MCInst &Inst, MCPhysReg StoringReg, + MCPhysReg FromReg, int64_t &Offset) { + if (EMCIA->isPush(Inst) && FromReg == EMCIA->getStackPointer()) { + // TODO should get the stack direction here, now it assumes that it goes + // down. + Offset = -EMCIA->getPushSize(Inst); + return true; + } + + { + bool IsLoad; + bool IsStore; + bool IsStoreFromReg; + MCPhysReg SrcReg; + int32_t SrcImm; + uint16_t StackPtrReg; + int64_t StackOffset; + uint8_t Size; + bool IsSimple; + bool IsIndexed; + if (EMCIA->isStackAccess(Inst, IsLoad, IsStore, IsStoreFromReg, SrcReg, + SrcImm, StackPtrReg, StackOffset, Size, IsSimple, + IsIndexed)) { + // TODO make sure that simple means that it's store and does nothing + // more. + if (IsStore && IsSimple && StackPtrReg == FromReg && IsStoreFromReg && + SrcReg == StoringReg) { + Offset = StackOffset; + return true; + } + } + } + + return false; + } + + bool doLoadFromReg(const MCInst &Inst, MCPhysReg FromReg, int64_t &Offset, + MCPhysReg &LoadingReg) { + if (EMCIA->isPop(Inst) && FromReg == EMCIA->getStackPointer()) { + // TODO should get the stack direction here, now it assumes that it goes + // down. + Offset = 0; + LoadingReg = Inst.getOperand(0).getReg(); + return true; + } + + { + bool IsLoad; + bool IsStore; + bool IsStoreFromReg; + MCPhysReg SrcReg; + int32_t SrcImm; + uint16_t StackPtrReg; + int64_t StackOffset; + uint8_t Size; + bool IsSimple; + bool IsIndexed; + if (EMCIA->isStackAccess(Inst, IsLoad, IsStore, IsStoreFromReg, SrcReg, + SrcImm, StackPtrReg, StackOffset, Size, IsSimple, + IsIndexed)) { + // TODO make sure that simple means that it's store and does nothing + // more. + if (IsLoad && IsSimple && StackPtrReg == FromReg) { + Offset = StackOffset; + LoadingReg = SrcReg; + return true; + } + } + } + + { + if (EMCIA->isMoveMem2Reg(Inst)) { + auto X86MemAccess = EMCIA->evaluateX86MemoryOperand(Inst).value(); + if (X86MemAccess.BaseRegNum == FromReg && + (X86MemAccess.ScaleImm == 0 || X86MemAccess.IndexRegNum == 0) && + !X86MemAccess.DispExpr) { + LoadingReg = Inst.getOperand(0).getReg(); + Offset = X86MemAccess.DispImm; + return true; + } + } + } + + return false; + } + + void checkRegDiff(const MCInst &Inst, DWARFRegType Reg, + const CFIState &PrevState, const CFIState &NextState, + const RegisterCFIState &PrevRegState, + const RegisterCFIState &NextRegState, + const std::set &Reads, + const std::set &Writes) { + auto RegLLVMOpt = MCRI->getLLVMRegNum(Reg, false); + if (RegLLVMOpt == std::nullopt) { + assert(PrevRegState == NextRegState); + return; + } + MCPhysReg RegLLVM = RegLLVMOpt.value(); + + auto &&PrevRefReg = + PrevState.getReferenceRegisterForCallerValueOfRegister(Reg); + auto &&NextRefReg = + NextState.getReferenceRegisterForCallerValueOfRegister(Reg); + + std::optional PrevRefRegLLVM = + (PrevRefReg != std::nullopt + ? std::make_optional( + MCRI->getLLVMRegNum(PrevRefReg.value(), false).value()) + : std::nullopt); + std::optional NextRefRegLLVM = + (PrevRefReg != std::nullopt + ? std::make_optional( + MCRI->getLLVMRegNum(PrevRefReg.value(), false).value()) + : std::nullopt); + + MCPhysReg PrevStateCFARegLLVM = + MCRI->getLLVMRegNum(PrevState.CFARegister, false).value(); + + { // try generate + // Widen + std::vector PossibleNextRegStates; + { // storing to offset from CFA + if (PrevRegState.RetrieveApproach == RegisterCFIState::SameValue || + PrevRegState.RetrieveApproach == + RegisterCFIState::AnotherRegister) { + int64_t OffsetFromCFAReg; + if (doStoreFromReg(Inst, PrevRefRegLLVM.value(), PrevStateCFARegLLVM, + OffsetFromCFAReg)) { + PossibleNextRegStates.push_back( + RegisterCFIState::createOffsetFromCFAAddr(OffsetFromCFAReg - + PrevState.CFAOffset)); + } + } + } + + { // loading from an offset from CFA + if (PrevRegState.RetrieveApproach == + RegisterCFIState::OffsetFromCFAAddr) { + int64_t OffsetFromCFAReg; + MCPhysReg ToRegLLVM; + if (doLoadFromReg(Inst, PrevStateCFARegLLVM, OffsetFromCFAReg, + ToRegLLVM) && + OffsetFromCFAReg == PrevRegState.Info.OffsetFromCFA) { + DWARFRegType ToReg = MCRI->getDwarfRegNum(ToRegLLVM, false); + if (ToReg == Reg) { + PossibleNextRegStates.push_back( + RegisterCFIState::createSameValue()); + } else { + PossibleNextRegStates.push_back( + RegisterCFIState::createAnotherRegister(ToReg)); + } + } + } + } + + { // moved from reg to other reg + if (PrevRegState.RetrieveApproach == RegisterCFIState::SameValue || + PrevRegState.RetrieveApproach == + RegisterCFIState::AnotherRegister) { + + int Diff; + MCPhysReg PossibleRegLLVM; + if (isInConstantDistanceOfEachOther(Inst, PossibleRegLLVM, + PrevRefRegLLVM.value(), Diff)) { + DWARFRegType PossibleReg = + MCRI->getDwarfRegNum(PossibleRegLLVM, false); + if (Diff == 0) { + if (PossibleReg == Reg) { + PossibleNextRegStates.push_back( + RegisterCFIState::createSameValue()); + } else { + PossibleNextRegStates.push_back( + RegisterCFIState::createAnotherRegister(PossibleReg)); + } + } + } + } + } + + { // stay the same + bool CanStayTheSame = false; + + switch (PrevRegState.RetrieveApproach) { + case RegisterCFIState::Undefined: + case RegisterCFIState::OffsetFromCFAVal: + CanStayTheSame = true; + break; + case RegisterCFIState::SameValue: + case RegisterCFIState::AnotherRegister: + CanStayTheSame = !Writes.count(PrevRefReg.value()); + break; + case RegisterCFIState::OffsetFromCFAAddr: + case RegisterCFIState::Other: + // cannot be sure + break; + } + + if (CanStayTheSame) + PossibleNextRegStates.push_back(PrevRegState); + } + + for (auto &&PossibleNextRegState : PossibleNextRegStates) { + if (PossibleNextRegState == NextRegState) { + // Everything is ok + return; + } + } + + for (auto &&PossibleNextRegState : PossibleNextRegStates) { + if (PossibleNextRegState.RetrieveApproach != + NextRegState.RetrieveApproach) + continue; + + if (PossibleNextRegState.RetrieveApproach == + RegisterCFIState::OffsetFromCFAAddr) { + Context.reportError( + Inst.getLoc(), + formatv( + "Expected caller's value of reg#{0} should be at offset {1} " + "of CFA but the CFI directives say it's in {2}", + RegLLVM, PossibleNextRegState.Info.OffsetFromCFA, + NextRegState.Info.OffsetFromCFA)); + } + } + } + // Either couldn't generate, or the programmer changed the state to + // something that couldn't be matched to any of the generated states. So + // it falls back into read/write checks. + + if (PrevRegState == NextRegState) { + switch (PrevRegState.RetrieveApproach) { + case RegisterCFIState::SameValue: + case RegisterCFIState::AnotherRegister: + if (Writes.count(PrevRefReg.value())) { + Context.reportError( + Inst.getLoc(), + formatv("Reg#{0} caller's value is in reg#{1} which is changed " + "by this instruction, but not changed in CFI directives", + RegLLVM, PrevRefRegLLVM.value())); + return; + } + break; + default: + // Everything may be ok + break; + } + return; + } + + if (PrevRegState.RetrieveApproach == NextRegState.RetrieveApproach) { + // Everything may be ok + return; + } + + if (PrevRegState.RetrieveApproach == RegisterCFIState::Undefined) { + Context.reportError(Inst.getLoc(), + "Cannot change a register CFI information from " + "undefined to something else."); + return; + } + + Context.reportWarning(Inst.getLoc(), + formatv("The reg#{0} CFI state is changed, but I " + "don't have any idea how.", + RegLLVM)); + // Everything may be ok + return; + } + void checkCFADiff(const MCInst &Inst, const CFIState &PrevState, const CFIState &NextState, const std::set &Reads, @@ -113,9 +418,9 @@ class CFIAnalysis { MCPhysReg NextCFAPhysReg = MCRI->getLLVMRegNum(NextState.CFARegister, false).value(); - // Try to guess the next state. (widen) - std::vector> PossibleNextCFAStates; - { // try generate + { // try generate + // Widen + std::vector> PossibleNextCFAStates; { // no change if (!Writes.count(PrevState.CFARegister)) { PossibleNextCFAStates.emplace_back(PrevState.CFARegister, @@ -141,29 +446,29 @@ class CFIAnalysis { PrevState.CFAOffset - Diff); } } - } - for (auto &&[PossibleNextCFAReg, PossibleNextCFAOffset] : - PossibleNextCFAStates) { - if (PossibleNextCFAReg != NextState.CFARegister) - continue; + for (auto &&[PossibleNextCFAReg, PossibleNextCFAOffset] : + PossibleNextCFAStates) { + if (PossibleNextCFAReg != NextState.CFARegister) + continue; - if (PossibleNextCFAOffset == NextState.CFAOffset) { - // Everything is ok! + if (PossibleNextCFAOffset == NextState.CFAOffset) { + // Everything is ok! + return; + } + + Context.reportError(Inst.getLoc(), + formatv("Expected CFA [reg: {0}, offset: {1}] but " + "got [reg: {2}, offset: {3}].", + NextCFAPhysReg, PossibleNextCFAOffset, + NextCFAPhysReg, NextState.CFAOffset)); return; } - - Context.reportError(Inst.getLoc(), - formatv("Expected CFA [reg: {0}, offset: {1}] but " - "got [reg: {2}, offset: {3}].", - NextCFAPhysReg, PossibleNextCFAOffset, - NextCFAPhysReg, NextState.CFAOffset)); - return; } - // Either couldn't generate, or did, but the programmer wants to change the - // source of register for CFA to something not expected by the generator. So - // it falls back into read/write checks. + // Either couldn't generate, or did, but the programmer wants to change + // the source of register for CFA to something not expected by the + // generator. So it falls back into read/write checks. if (PrevState.CFARegister == NextState.CFARegister) { if (PrevState.CFAOffset == NextState.CFAOffset) { @@ -173,6 +478,7 @@ class CFIAnalysis { formatv("This instruction changes reg#{0}, which is " "the CFA register, but the CFI directives do not.", PrevCFAPhysReg)); + return; } // Everything is ok! @@ -190,7 +496,8 @@ class CFIAnalysis { Context.reportWarning( Inst.getLoc(), - "I don't know what the instruction did, but it changed the CFA reg's " + "I don't know what the instruction did, but it changed the CFA " + "reg's " "value, and the offset is changed as well by the CFI directives."); // Everything may be ok! return; @@ -198,7 +505,8 @@ class CFIAnalysis { // The CFA register is changed Context.reportWarning( Inst.getLoc(), "The CFA register is changed to something, and I don't " - "have any idea on the new register relevance to CFA."); + "have any idea on the new register relevance to CFA. I " + "assume CFA is preserved."); // Everything may be ok! } @@ -214,21 +522,33 @@ class CFIAnalysis { std::set Writes, Reads; for (unsigned I = 0; I < MCInstInfo.NumImplicitUses; I++) - Reads.insert(MCRI->getDwarfRegNum(MCInstInfo.implicit_uses()[I], false)); + Reads.insert(MCRI->getDwarfRegNum( + getSuperReg(MCInstInfo.implicit_uses()[I]), false)); for (unsigned I = 0; I < MCInstInfo.NumImplicitDefs; I++) - Writes.insert(MCRI->getDwarfRegNum(MCInstInfo.implicit_defs()[I], false)); + Writes.insert(MCRI->getDwarfRegNum( + getSuperReg(MCInstInfo.implicit_defs()[I]), false)); for (unsigned I = 0; I < Inst.getNumOperands(); I++) { auto &&Operand = Inst.getOperand(I); if (Operand.isReg()) { if (I < MCInstInfo.getNumDefs()) - Writes.insert(MCRI->getDwarfRegNum(Operand.getReg(), false)); - else - Reads.insert(MCRI->getDwarfRegNum(Operand.getReg(), false)); + Writes.insert( + MCRI->getDwarfRegNum(getSuperReg(Operand.getReg()), false)); + else if (Operand.getReg()) + Reads.insert( + MCRI->getDwarfRegNum(getSuperReg(Operand.getReg()), false)); } } + ///////////// being diffing the CFI states checkCFADiff(Inst, State, AfterState, Reads, Writes); + + for (auto &&[Reg, RegState] : State.RegisterCFIStates) { + assert(AfterState.RegisterCFIStates.count(Reg) && + "Registers' state should not be deleted by CFI instruction."); + checkRegDiff(Inst, Reg, State, AfterState, RegState, + AfterState.RegisterCFIStates[Reg], Reads, Writes); + } ///////////// end diffing the CFI states // dbgs() << "^^^^^^^^^^^^^^^^ InsCFIs ^^^^^^^^^^^^^^^^\n"; @@ -245,7 +565,8 @@ class CFIAnalysis { // auto X86MemoryOperand = EMCIA->evaluateX86MemoryOperand(Inst); // dbgs() << " Base Register{ reg#" << X86MemoryOperand->BaseRegNum // << " }"; - // dbgs() << " + (Index Register{ reg#" << X86MemoryOperand->IndexRegNum + // dbgs() << " + (Index Register{ reg#" << + // X86MemoryOperand->IndexRegNum // << " }"; // dbgs() << " * Scale{ value " << X86MemoryOperand->ScaleImm << " }"; // dbgs() << ") + Displacement{ " @@ -253,7 +574,8 @@ class CFIAnalysis { // ? "an expression" // : "value " + itostr(X86MemoryOperand->DispImm)) // << " }\n"; - // // TODO, it's not correct always, it cannot be assumed (or should be + // // TODO, it's not correct always, it cannot be assumed (or should + // be // // checked) that memory operands are flatten into 4 operands in // MCInc. I += 4; continue; // } @@ -288,11 +610,13 @@ class CFIAnalysis { // { // Reg2Reg // MCPhysReg From, To; // if (EMCIA->isRegToRegMove(Inst, From, To)) { - // dbgs() << "It's a reg to reg move.\nFrom reg#" << From << " to reg + // dbgs() << "It's a reg to reg move.\nFrom reg#" << From << " to + // reg // #" // << To << "\n"; // } else if (MCInstInfo.isMoveReg()) { - // dbgs() << "It's reg to reg move from MCInstInfo view but not from " + // dbgs() << "It's reg to reg move from MCInstInfo view but not from + // " // "MCPlus view.\n"; // } // } @@ -320,17 +644,17 @@ class CFIAnalysis { // if (EMCIA->isStackAccess(Inst, IsLoad, IsStore, IsStoreFromReg, Reg, // SrcImm, StackPtrReg, StackOffset, Size, // IsSimple, IsIndexed)) { - // dbgs() << "This instruction accesses the stack, the details are:\n"; - // dbgs() << " Source immediate: " << SrcImm << "\n"; - // dbgs() << " Source reg#" << Reg << "\n"; - // dbgs() << " Stack pointer: reg#" << StackPtrReg << "\n"; - // dbgs() << " Stack offset: " << StackOffset << "\n"; - // dbgs() << " size: " << (int)Size << "\n"; + // dbgs() << "This instruction accesses the stack, the details + // are:\n"; dbgs() << " Source immediate: " << SrcImm << "\n"; dbgs() + // << " Source reg#" << Reg << "\n"; dbgs() << " Stack pointer: + // reg#" << StackPtrReg << "\n"; dbgs() << " Stack offset: " << + // StackOffset << "\n"; dbgs() << " size: " << (int)Size << "\n"; // dbgs() << " Is simple: " << (IsSimple ? "yes" : "no") << "\n"; // dbgs() << " Is indexed: " << (IsIndexed ? "yes" : "no") << "\n"; // dbgs() << " Is load: " << (IsLoad ? "yes" : "no") << "\n"; // dbgs() << " Is store: " << (IsStore ? "yes" : "no") << "\n"; - // dbgs() << " Is store from reg: " << (IsStoreFromReg ? "yes" : "no") + // dbgs() << " Is store from reg: " << (IsStoreFromReg ? "yes" : + // "no") // << "\n"; // } // if (EMCIA->isPush(Inst)) { @@ -364,7 +688,8 @@ class CFIAnalysis { // dbgs() << "The CFA register is: " << CFAReg << "\n"; // dbgs() << "The instruction does " << (ChangedCFA ? "" : "NOT ") // << "change the CFA.\n"; - // dbgs() << "The CFI directives does " << (GoingToChangeCFA ? "" : "NOT ") + // dbgs() << "The CFI directives does " << (GoingToChangeCFA ? "" : "NOT + // ") // << "change the CFA.\n"; // dbgs() << "vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv\n"; diff --git a/llvm/tools/llvm-mc/CFIState.h b/llvm/tools/llvm-mc/CFIState.h index 5551e9778f1d2..b0091efecbb7b 100644 --- a/llvm/tools/llvm-mc/CFIState.h +++ b/llvm/tools/llvm-mc/CFIState.h @@ -5,7 +5,9 @@ #include "llvm/MC/MCDwarf.h" #include "llvm/MC/MCRegister.h" #include "llvm/Support/MathExtras.h" +#include #include +#include namespace llvm { using DWARFRegType = int64_t; @@ -26,6 +28,23 @@ struct RegisterCFIState { DWARFRegType Register; } Info; + bool operator==(const RegisterCFIState &OtherState) const { + if (RetrieveApproach != OtherState.RetrieveApproach) + return false; + + switch (RetrieveApproach) { + case Undefined: + case SameValue: + case Other: + return true; + case AnotherRegister: + return Info.Register == OtherState.Info.Register; + case OffsetFromCFAAddr: + case OffsetFromCFAVal: + return Info.OffsetFromCFA == OtherState.Info.OffsetFromCFA; + } + } + static RegisterCFIState createUndefined() { RegisterCFIState State; State.RetrieveApproach = Undefined; @@ -97,6 +116,25 @@ struct CFIState { CFIState(DWARFRegType CFARegister, int CFIOffset) : CFARegister(CFARegister), CFAOffset(CFIOffset) {} + std::optional + getReferenceRegisterForCallerValueOfRegister(DWARFRegType Reg) const { + assert(RegisterCFIStates.count(Reg) && + "The register should be tracked inside the register states"); + auto &&RegState = RegisterCFIStates.at(Reg); + switch (RegState.RetrieveApproach) { + case RegisterCFIState::Undefined: + case RegisterCFIState::Other: + return std::nullopt; + case RegisterCFIState::SameValue: + return Reg; + case RegisterCFIState::AnotherRegister: + return RegState.Info.Register; + case RegisterCFIState::OffsetFromCFAAddr: + case RegisterCFIState::OffsetFromCFAVal: + return CFARegister; + } + } + bool apply(const MCCFIInstruction &CFIDirective) { switch (CFIDirective.getOperation()) { case MCCFIInstruction::OpDefCfaRegister: From 5389a61dd6a30a76dd74e62ab24cee24582b85c7 Mon Sep 17 00:00:00 2001 From: Amirhossein Pashaeehir Date: Tue, 20 May 2025 02:29:47 +0000 Subject: [PATCH 16/22] Process prologue as well --- llvm/tools/llvm-mc/CFIAnalysis.h | 11 +++-- llvm/tools/llvm-mc/CFIAnalysisMCStreamer.h | 53 ++++++++++------------ 2 files changed, 32 insertions(+), 32 deletions(-) diff --git a/llvm/tools/llvm-mc/CFIAnalysis.h b/llvm/tools/llvm-mc/CFIAnalysis.h index f0b81a85e2fd4..533ead49c300d 100644 --- a/llvm/tools/llvm-mc/CFIAnalysis.h +++ b/llvm/tools/llvm-mc/CFIAnalysis.h @@ -78,11 +78,9 @@ class CFIAnalysis { public: CFIAnalysis(MCContext &Context, MCInstrInfo const &MCII, - MCInstrAnalysis *MCIA) + MCInstrAnalysis *MCIA, + ArrayRef PrologueCFIDirectives) : Context(Context), MCII(MCII), MCRI(Context.getRegisterInfo()) { - // TODO it should look at the prologue directives and setup the - // registers' previous value state here, but for now, it's assumed that all - // values are by default `samevalue`. EMCIA.reset(new ExtendedMCInstrAnalysis(Context, MCII, MCIA)); // TODO CFA offset should be the slot size, but for now I don't have any @@ -103,6 +101,11 @@ class CFIAnalysis { State.RegisterCFIStates[MCRI->getDwarfRegNum(EMCIA->getStackPointer(), false)] = RegisterCFIState::createOffsetFromCFAVal(0); // sp's old value is CFA + + // Applying the prologue after default assumptions to overwrite them. + for (auto &&PrologueCFIDirective : PrologueCFIDirectives) { + State.apply(PrologueCFIDirective); + } } bool doesConstantChange(const MCInst &Inst, MCPhysReg Reg, int64_t &HowMuch) { diff --git a/llvm/tools/llvm-mc/CFIAnalysisMCStreamer.h b/llvm/tools/llvm-mc/CFIAnalysisMCStreamer.h index a9354d7af8a2f..7f79653004e5e 100644 --- a/llvm/tools/llvm-mc/CFIAnalysisMCStreamer.h +++ b/llvm/tools/llvm-mc/CFIAnalysisMCStreamer.h @@ -38,12 +38,8 @@ class CFIAnalysisMCStreamer : public MCStreamer { }; std::optional LastInstruction; - MCDwarfFrameInfo const *LastDwarfFrameInfo; - - std::optional getLastICFI() { - if (!LastInstruction) - return std::nullopt; + std::pair getCFIDirectivesRange() { auto DwarfFrameInfos = getDwarfFrameInfos(); int FrameIndex = FrameIndices.back(); auto CurrentCFIDirectiveState = @@ -57,31 +53,42 @@ class CFIAnalysisMCStreamer : public MCStreamer { std::pair CFIDirectivesRange( LastCFIDirectivesState.DirectiveIndex, CurrentCFIDirectiveState.DirectiveIndex); - LastCFIDirectivesState = CurrentCFIDirectiveState; - return ICFI(LastInstruction.value(), CFIDirectivesRange); + return CFIDirectivesRange; } void feedCFIA() { - if (!LastDwarfFrameInfo) { + auto FrameIndex = FrameIndices.back(); + if (FrameIndex < 0) { // TODO Maybe this corner case causes bugs, when the programmer did a // mistake in the startproc, endprocs and also made a mistake in not // adding cfi directives for a instruction. Then this would cause to // ignore the instruction. - auto LastICFI = getLastICFI(); - assert(!LastICFI || LastICFI->CFIDirectivesRange.first == - LastICFI->CFIDirectivesRange.second); + auto CFIDirectivesRange = getCFIDirectivesRange(); + assert(!LastInstruction || + CFIDirectivesRange.first == CFIDirectivesRange.second); return; } - if (auto ICFI = getLastICFI()) { - assert(!CFIAs.empty()); - ArrayRef CFIDirectives( - LastDwarfFrameInfo->Instructions); - CFIDirectives = CFIDirectives.drop_front(ICFI->CFIDirectivesRange.first) + // TODO get this from emit yourself, instead of getting it in this way + const auto *LastDwarfFrameInfo = &getDwarfFrameInfos()[FrameIndex]; + + auto CFIDirectivesRange = getCFIDirectivesRange(); + ArrayRef CFIDirectives; + if (CFIDirectivesRange.first < CFIDirectivesRange.second) { + CFIDirectives = + ArrayRef(LastDwarfFrameInfo->Instructions); + CFIDirectives = CFIDirectives.drop_front(CFIDirectivesRange.first) .drop_back(LastDwarfFrameInfo->Instructions.size() - - ICFI->CFIDirectivesRange.second); - CFIAs.back().update(ICFI->Instruction, CFIDirectives); + CFIDirectivesRange.second); + } + + if (LastInstruction != std::nullopt) { + assert(!CFIAs.empty()); + + CFIAs.back().update(LastInstruction.value(), CFIDirectives); + } else { + CFIAs.emplace_back(getContext(), MCII, MCIA.get(), CFIDirectives); } } @@ -114,21 +121,12 @@ class CFIAnalysisMCStreamer : public MCStreamer { const MCSubtargetInfo &STI) override { feedCFIA(); LastInstruction = Inst; - if (hasUnfinishedDwarfFrameInfo()) - LastDwarfFrameInfo = - &getDwarfFrameInfos()[FrameIndices.back()]; // TODO get this from emit - // yourself, instead of - // getting it in this way - else - LastDwarfFrameInfo = nullptr; } void emitCFIStartProcImpl(MCDwarfFrameInfo &Frame) override { feedCFIA(); FrameIndices.push_back(getNumFrameInfos()); - CFIAs.emplace_back(getContext(), MCII, MCIA.get()); LastInstruction = std::nullopt; - LastDwarfFrameInfo = nullptr; LastCFIDirectivesState.DirectiveIndex = 0; MCStreamer::emitCFIStartProcImpl(Frame); } @@ -139,7 +137,6 @@ class CFIAnalysisMCStreamer : public MCStreamer { FrameIndices.pop_back(); CFIAs.pop_back(); LastInstruction = std::nullopt; - LastDwarfFrameInfo = nullptr; auto FrameIndex = FrameIndices.back(); LastCFIDirectivesState.DirectiveIndex = FrameIndex >= 0 ? getDwarfFrameInfos()[FrameIndex].Instructions.size() From fe03122d9bad89a9260792f09fb1a97091bcbbaf Mon Sep 17 00:00:00 2001 From: Amirhossein Pashaeehir Date: Tue, 20 May 2025 17:54:28 +0000 Subject: [PATCH 17/22] Ignore flag registers as well --- llvm/tools/llvm-mc/CFIAnalysis.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/llvm/tools/llvm-mc/CFIAnalysis.h b/llvm/tools/llvm-mc/CFIAnalysis.h index 533ead49c300d..55a02dcb1013e 100644 --- a/llvm/tools/llvm-mc/CFIAnalysis.h +++ b/llvm/tools/llvm-mc/CFIAnalysis.h @@ -102,6 +102,9 @@ class CFIAnalysis { false)] = RegisterCFIState::createOffsetFromCFAVal(0); // sp's old value is CFA + State.RegisterCFIStates[MCRI->getDwarfRegNum(EMCIA->getFlagsReg(), false)] = + RegisterCFIState::createUndefined(); // Flags cannot be caller-saved + // Applying the prologue after default assumptions to overwrite them. for (auto &&PrologueCFIDirective : PrologueCFIDirectives) { State.apply(PrologueCFIDirective); @@ -119,7 +122,7 @@ class CFIAnalysis { if (EMCIA->isPop(Inst) && Reg == EMCIA->getStackPointer()) { // TODO should get the stack direction here, now it assumes that it goes // down. - HowMuch = EMCIA->getPushSize(Inst); + HowMuch = EMCIA->getPopSize(Inst); return true; } @@ -285,7 +288,8 @@ class CFIAnalysis { MCPhysReg ToRegLLVM; if (doLoadFromReg(Inst, PrevStateCFARegLLVM, OffsetFromCFAReg, ToRegLLVM) && - OffsetFromCFAReg == PrevRegState.Info.OffsetFromCFA) { + OffsetFromCFAReg - PrevState.CFAOffset == + PrevRegState.Info.OffsetFromCFA) { DWARFRegType ToReg = MCRI->getDwarfRegNum(ToRegLLVM, false); if (ToReg == Reg) { PossibleNextRegStates.push_back( From 9ded4f3d8cbd47b3808f1b42a05ae8a4eaa65cae Mon Sep 17 00:00:00 2001 From: Amirhossein Pashaeehir Date: Tue, 20 May 2025 17:54:44 +0000 Subject: [PATCH 18/22] Enable dump --- llvm/tools/llvm-mc/CFIState.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/llvm/tools/llvm-mc/CFIState.h b/llvm/tools/llvm-mc/CFIState.h index b0091efecbb7b..1b66d0dd40904 100644 --- a/llvm/tools/llvm-mc/CFIState.h +++ b/llvm/tools/llvm-mc/CFIState.h @@ -2,12 +2,15 @@ #define LLVM_TOOLS_LLVM_MC_CFI_STATE_H #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/StringRef.h" #include "llvm/MC/MCDwarf.h" #include "llvm/MC/MCRegister.h" +#include "llvm/Support/FormatVariadic.h" #include "llvm/Support/MathExtras.h" #include #include #include +#include namespace llvm { using DWARFRegType = int64_t; @@ -28,6 +31,24 @@ struct RegisterCFIState { DWARFRegType Register; } Info; + std::string dump() { + switch (RetrieveApproach) { + case Undefined: + return "undefined"; + case SameValue: + return "same value"; + case AnotherRegister: + return formatv("stored in another register, which is reg#{0}", + Info.Register); + case OffsetFromCFAAddr: + return formatv("offset {0} from CFA", Info.OffsetFromCFA); + case OffsetFromCFAVal: + return formatv("CFA value + {0}", Info.OffsetFromCFA); + case Other: + return "other"; + } + } + bool operator==(const RegisterCFIState &OtherState) const { if (RetrieveApproach != OtherState.RetrieveApproach) return false; @@ -45,6 +66,10 @@ struct RegisterCFIState { } } + bool operator!=(const RegisterCFIState &OtherState) const { + return !(*this == OtherState); + } + static RegisterCFIState createUndefined() { RegisterCFIState State; State.RetrieveApproach = Undefined; From 6b7dd6adce4b91d8df3d74a51ff6b3cdfdfee11f Mon Sep 17 00:00:00 2001 From: Amirhossein Pashaeehir Date: Tue, 20 May 2025 17:55:25 +0000 Subject: [PATCH 19/22] Enable getting flag register through extended MCInstrAnalysis --- llvm/tools/llvm-mc/ExtendedMCInstrAnalysis.h | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/tools/llvm-mc/ExtendedMCInstrAnalysis.h b/llvm/tools/llvm-mc/ExtendedMCInstrAnalysis.h index 5323c4e3d1730..547d132671f45 100644 --- a/llvm/tools/llvm-mc/ExtendedMCInstrAnalysis.h +++ b/llvm/tools/llvm-mc/ExtendedMCInstrAnalysis.h @@ -44,6 +44,7 @@ class ExtendedMCInstrAnalysis { /// Extra semantic information needed from MC layer: MCPhysReg getStackPointer() const { return MCPB->getStackPointer(); } + MCPhysReg getFlagsReg() const { return MCPB->getFlagsReg(); } bool isPush(const MCInst &Inst) const { return MCPB->isPush(Inst); } int getPushSize(const MCInst &Inst) const { return MCPB->getPushSize(Inst); } From 972490702ebad0591fec452dd588daeacc454b63 Mon Sep 17 00:00:00 2001 From: Amirhossein Pashaeehir Date: Tue, 20 May 2025 18:29:25 +0000 Subject: [PATCH 20/22] Move the isolated MC semantic methods to ExtendedMCAnalysis --- llvm/tools/llvm-mc/CFIAnalysis.h | 350 +++---------------- llvm/tools/llvm-mc/ExtendedMCInstrAnalysis.h | 120 +++++++ 2 files changed, 162 insertions(+), 308 deletions(-) diff --git a/llvm/tools/llvm-mc/CFIAnalysis.h b/llvm/tools/llvm-mc/CFIAnalysis.h index 55a02dcb1013e..57e2b7247b45f 100644 --- a/llvm/tools/llvm-mc/CFIAnalysis.h +++ b/llvm/tools/llvm-mc/CFIAnalysis.h @@ -111,127 +111,49 @@ class CFIAnalysis { } } - bool doesConstantChange(const MCInst &Inst, MCPhysReg Reg, int64_t &HowMuch) { - if (EMCIA->isPush(Inst) && Reg == EMCIA->getStackPointer()) { - // TODO should get the stack direction here, now it assumes that it goes - // down. - HowMuch = -EMCIA->getPushSize(Inst); - return true; - } - - if (EMCIA->isPop(Inst) && Reg == EMCIA->getStackPointer()) { - // TODO should get the stack direction here, now it assumes that it goes - // down. - HowMuch = EMCIA->getPopSize(Inst); - return true; - } - - return false; - } - - // Tries to guess Reg1's value in a form of Reg2 (before Inst's execution) + - // Diff. - bool isInConstantDistanceOfEachOther(const MCInst &Inst, MCPhysReg &Reg1, - MCPhysReg Reg2, int &Diff) { - { - MCPhysReg From; - MCPhysReg To; - if (EMCIA->isRegToRegMove(Inst, From, To) && From == Reg2) { - Reg1 = To; - Diff = 0; - return true; - } - } - - return false; - } + void update(MCInst &Inst, ArrayRef CFIDirectives) { + const MCInstrDesc &MCInstInfo = MCII.get(Inst.getOpcode()); + CFIState AfterState(State); + for (auto &&CFIDirective : CFIDirectives) + if (!AfterState.apply(CFIDirective)) + Context.reportWarning( + CFIDirective.getLoc(), + "I don't support this CFI directive, I assume this does nothing " + "(which will probably break other things)"); - bool doStoreFromReg(const MCInst &Inst, MCPhysReg StoringReg, - MCPhysReg FromReg, int64_t &Offset) { - if (EMCIA->isPush(Inst) && FromReg == EMCIA->getStackPointer()) { - // TODO should get the stack direction here, now it assumes that it goes - // down. - Offset = -EMCIA->getPushSize(Inst); - return true; - } + std::set Writes, Reads; + for (unsigned I = 0; I < MCInstInfo.NumImplicitUses; I++) + Reads.insert(MCRI->getDwarfRegNum( + getSuperReg(MCInstInfo.implicit_uses()[I]), false)); + for (unsigned I = 0; I < MCInstInfo.NumImplicitDefs; I++) + Writes.insert(MCRI->getDwarfRegNum( + getSuperReg(MCInstInfo.implicit_defs()[I]), false)); - { - bool IsLoad; - bool IsStore; - bool IsStoreFromReg; - MCPhysReg SrcReg; - int32_t SrcImm; - uint16_t StackPtrReg; - int64_t StackOffset; - uint8_t Size; - bool IsSimple; - bool IsIndexed; - if (EMCIA->isStackAccess(Inst, IsLoad, IsStore, IsStoreFromReg, SrcReg, - SrcImm, StackPtrReg, StackOffset, Size, IsSimple, - IsIndexed)) { - // TODO make sure that simple means that it's store and does nothing - // more. - if (IsStore && IsSimple && StackPtrReg == FromReg && IsStoreFromReg && - SrcReg == StoringReg) { - Offset = StackOffset; - return true; - } + for (unsigned I = 0; I < Inst.getNumOperands(); I++) { + auto &&Operand = Inst.getOperand(I); + if (Operand.isReg()) { + if (I < MCInstInfo.getNumDefs()) + Writes.insert( + MCRI->getDwarfRegNum(getSuperReg(Operand.getReg()), false)); + else if (Operand.getReg()) + Reads.insert( + MCRI->getDwarfRegNum(getSuperReg(Operand.getReg()), false)); } } - return false; - } - - bool doLoadFromReg(const MCInst &Inst, MCPhysReg FromReg, int64_t &Offset, - MCPhysReg &LoadingReg) { - if (EMCIA->isPop(Inst) && FromReg == EMCIA->getStackPointer()) { - // TODO should get the stack direction here, now it assumes that it goes - // down. - Offset = 0; - LoadingReg = Inst.getOperand(0).getReg(); - return true; - } - - { - bool IsLoad; - bool IsStore; - bool IsStoreFromReg; - MCPhysReg SrcReg; - int32_t SrcImm; - uint16_t StackPtrReg; - int64_t StackOffset; - uint8_t Size; - bool IsSimple; - bool IsIndexed; - if (EMCIA->isStackAccess(Inst, IsLoad, IsStore, IsStoreFromReg, SrcReg, - SrcImm, StackPtrReg, StackOffset, Size, IsSimple, - IsIndexed)) { - // TODO make sure that simple means that it's store and does nothing - // more. - if (IsLoad && IsSimple && StackPtrReg == FromReg) { - Offset = StackOffset; - LoadingReg = SrcReg; - return true; - } - } - } + checkCFADiff(Inst, State, AfterState, Reads, Writes); - { - if (EMCIA->isMoveMem2Reg(Inst)) { - auto X86MemAccess = EMCIA->evaluateX86MemoryOperand(Inst).value(); - if (X86MemAccess.BaseRegNum == FromReg && - (X86MemAccess.ScaleImm == 0 || X86MemAccess.IndexRegNum == 0) && - !X86MemAccess.DispExpr) { - LoadingReg = Inst.getOperand(0).getReg(); - Offset = X86MemAccess.DispImm; - return true; - } - } + for (auto &&[Reg, RegState] : State.RegisterCFIStates) { + assert(AfterState.RegisterCFIStates.count(Reg) && + "Registers' state should not be deleted by CFI instruction."); + checkRegDiff(Inst, Reg, State, AfterState, RegState, + AfterState.RegisterCFIStates[Reg], Reads, Writes); } - return false; + State = AfterState; } +private: void checkRegDiff(const MCInst &Inst, DWARFRegType Reg, const CFIState &PrevState, const CFIState &NextState, const RegisterCFIState &PrevRegState, @@ -272,8 +194,8 @@ class CFIAnalysis { PrevRegState.RetrieveApproach == RegisterCFIState::AnotherRegister) { int64_t OffsetFromCFAReg; - if (doStoreFromReg(Inst, PrevRefRegLLVM.value(), PrevStateCFARegLLVM, - OffsetFromCFAReg)) { + if (EMCIA->doStoreFromReg(Inst, PrevRefRegLLVM.value(), + PrevStateCFARegLLVM, OffsetFromCFAReg)) { PossibleNextRegStates.push_back( RegisterCFIState::createOffsetFromCFAAddr(OffsetFromCFAReg - PrevState.CFAOffset)); @@ -286,8 +208,8 @@ class CFIAnalysis { RegisterCFIState::OffsetFromCFAAddr) { int64_t OffsetFromCFAReg; MCPhysReg ToRegLLVM; - if (doLoadFromReg(Inst, PrevStateCFARegLLVM, OffsetFromCFAReg, - ToRegLLVM) && + if (EMCIA->doLoadFromReg(Inst, PrevStateCFARegLLVM, OffsetFromCFAReg, + ToRegLLVM) && OffsetFromCFAReg - PrevState.CFAOffset == PrevRegState.Info.OffsetFromCFA) { DWARFRegType ToReg = MCRI->getDwarfRegNum(ToRegLLVM, false); @@ -309,8 +231,8 @@ class CFIAnalysis { int Diff; MCPhysReg PossibleRegLLVM; - if (isInConstantDistanceOfEachOther(Inst, PossibleRegLLVM, - PrevRefRegLLVM.value(), Diff)) { + if (EMCIA->isInConstantDistanceOfEachOther( + Inst, PossibleRegLLVM, PrevRefRegLLVM.value(), Diff)) { DWARFRegType PossibleReg = MCRI->getDwarfRegNum(PossibleRegLLVM, false); if (Diff == 0) { @@ -437,7 +359,7 @@ class CFIAnalysis { { // const change int64_t HowMuch; - if (doesConstantChange(Inst, PrevCFAPhysReg, HowMuch)) { + if (EMCIA->doesConstantChange(Inst, PrevCFAPhysReg, HowMuch)) { PossibleNextCFAStates.emplace_back(PrevState.CFARegister, PrevState.CFAOffset - HowMuch); } @@ -446,8 +368,8 @@ class CFIAnalysis { { // constant distance with each other int Diff; MCPhysReg PossibleNewCFAReg; - if (isInConstantDistanceOfEachOther(Inst, PossibleNewCFAReg, - PrevCFAPhysReg, Diff)) { + if (EMCIA->isInConstantDistanceOfEachOther(Inst, PossibleNewCFAReg, + PrevCFAPhysReg, Diff)) { PossibleNextCFAStates.emplace_back( MCRI->getDwarfRegNum(PossibleNewCFAReg, false), PrevState.CFAOffset - Diff); @@ -516,194 +438,6 @@ class CFIAnalysis { "assume CFA is preserved."); // Everything may be ok! } - - void update(MCInst &Inst, ArrayRef CFIDirectives) { - const MCInstrDesc &MCInstInfo = MCII.get(Inst.getOpcode()); - CFIState AfterState(State); - for (auto &&CFIDirective : CFIDirectives) - if (!AfterState.apply(CFIDirective)) - Context.reportWarning( - CFIDirective.getLoc(), - "I don't support this CFI directive, I assume this does nothing " - "(which will probably break other things)"); - - std::set Writes, Reads; - for (unsigned I = 0; I < MCInstInfo.NumImplicitUses; I++) - Reads.insert(MCRI->getDwarfRegNum( - getSuperReg(MCInstInfo.implicit_uses()[I]), false)); - for (unsigned I = 0; I < MCInstInfo.NumImplicitDefs; I++) - Writes.insert(MCRI->getDwarfRegNum( - getSuperReg(MCInstInfo.implicit_defs()[I]), false)); - - for (unsigned I = 0; I < Inst.getNumOperands(); I++) { - auto &&Operand = Inst.getOperand(I); - if (Operand.isReg()) { - if (I < MCInstInfo.getNumDefs()) - Writes.insert( - MCRI->getDwarfRegNum(getSuperReg(Operand.getReg()), false)); - else if (Operand.getReg()) - Reads.insert( - MCRI->getDwarfRegNum(getSuperReg(Operand.getReg()), false)); - } - } - - ///////////// being diffing the CFI states - checkCFADiff(Inst, State, AfterState, Reads, Writes); - - for (auto &&[Reg, RegState] : State.RegisterCFIStates) { - assert(AfterState.RegisterCFIStates.count(Reg) && - "Registers' state should not be deleted by CFI instruction."); - checkRegDiff(Inst, Reg, State, AfterState, RegState, - AfterState.RegisterCFIStates[Reg], Reads, Writes); - } - ///////////// end diffing the CFI states - - // dbgs() << "^^^^^^^^^^^^^^^^ InsCFIs ^^^^^^^^^^^^^^^^\n"; - // printUntilNextLine(Inst.getLoc().getPointer()); - // dbgs() << "\n"; - // dbgs() << "Op code: " << Inst.getOpcode() << "\n"; - // dbgs() << "--------------Operands Info--------------\n"; - // auto DefCount = MCInstInfo.getNumDefs(); - // for (unsigned I = 0; I < Inst.getNumOperands(); I++) { - // dbgs() << "Operand #" << I << ", which will be " - // << (I < DefCount ? "defined" : "used") << ", is a"; - // if (I == EMCIA->getMemoryOperandNo(Inst)) { - // dbgs() << " memory access, details are:\n"; - // auto X86MemoryOperand = EMCIA->evaluateX86MemoryOperand(Inst); - // dbgs() << " Base Register{ reg#" << X86MemoryOperand->BaseRegNum - // << " }"; - // dbgs() << " + (Index Register{ reg#" << - // X86MemoryOperand->IndexRegNum - // << " }"; - // dbgs() << " * Scale{ value " << X86MemoryOperand->ScaleImm << " }"; - // dbgs() << ") + Displacement{ " - // << (X86MemoryOperand->DispExpr - // ? "an expression" - // : "value " + itostr(X86MemoryOperand->DispImm)) - // << " }\n"; - // // TODO, it's not correct always, it cannot be assumed (or should - // be - // // checked) that memory operands are flatten into 4 operands in - // MCInc. I += 4; continue; - // } - // auto &&Operand = Inst.getOperand(I); - // if (Operand.isImm()) { - // dbgs() << "n immediate with value " << Operand.getImm() << "\n"; - // continue; - // } - // if (Operand.isReg()) { - // dbgs() << " reg#" << Operand.getReg() << "\n"; - // continue; - // } - // assert(Operand.isExpr()); - // dbgs() << "n unknown expression \n"; - // } - // if (MCInstInfo.NumImplicitDefs) { - // dbgs() << "implicitly defines: { "; - // for (unsigned I = 0; I < MCInstInfo.NumImplicitDefs; I++) { - // dbgs() << "reg#" << MCInstInfo.implicit_defs()[I] << " "; - // } - // dbgs() << "}\n"; - // } - // if (MCInstInfo.NumImplicitUses) { - // dbgs() << "implicitly uses: { "; - // for (unsigned I = 0; I < MCInstInfo.NumImplicitUses; I++) { - // dbgs() << "reg#" << MCInstInfo.implicit_uses()[I] << " "; - // } - // dbgs() << "}\n"; - // } - // dbgs() << "----------------Move Info----------------\n"; - // { // move - // { // Reg2Reg - // MCPhysReg From, To; - // if (EMCIA->isRegToRegMove(Inst, From, To)) { - // dbgs() << "It's a reg to reg move.\nFrom reg#" << From << " to - // reg - // #" - // << To << "\n"; - // } else if (MCInstInfo.isMoveReg()) { - // dbgs() << "It's reg to reg move from MCInstInfo view but not from - // " - // "MCPlus view.\n"; - // } - // } - // if (EMCIA->isConditionalMove(Inst)) { - // dbgs() << "Its a conditional move.\n"; - // } - // if (EMCIA->isMoveMem2Reg(Inst)) { - // dbgs() << "It's a move from memory to register\n"; - // assert(EMCIA->getMemoryOperandNo(Inst) == 1); - // } - // } - - // dbgs() << "---------------Stack Info----------------\n"; - // { // stack - // int32_t SrcImm = 0; - // MCPhysReg Reg = 0; - // uint16_t StackPtrReg = 0; - // int64_t StackOffset = 0; - // uint8_t Size = 0; - // bool IsSimple = false; - // bool IsIndexed = false; - // bool IsLoad = false; - // bool IsStore = false; - // bool IsStoreFromReg = false; - // if (EMCIA->isStackAccess(Inst, IsLoad, IsStore, IsStoreFromReg, Reg, - // SrcImm, StackPtrReg, StackOffset, Size, - // IsSimple, IsIndexed)) { - // dbgs() << "This instruction accesses the stack, the details - // are:\n"; dbgs() << " Source immediate: " << SrcImm << "\n"; dbgs() - // << " Source reg#" << Reg << "\n"; dbgs() << " Stack pointer: - // reg#" << StackPtrReg << "\n"; dbgs() << " Stack offset: " << - // StackOffset << "\n"; dbgs() << " size: " << (int)Size << "\n"; - // dbgs() << " Is simple: " << (IsSimple ? "yes" : "no") << "\n"; - // dbgs() << " Is indexed: " << (IsIndexed ? "yes" : "no") << "\n"; - // dbgs() << " Is load: " << (IsLoad ? "yes" : "no") << "\n"; - // dbgs() << " Is store: " << (IsStore ? "yes" : "no") << "\n"; - // dbgs() << " Is store from reg: " << (IsStoreFromReg ? "yes" : - // "no") - // << "\n"; - // } - // if (EMCIA->isPush(Inst)) { - // dbgs() << "This is a push instruction with size " - // << EMCIA->getPushSize(Inst) << "\n"; - // } - // if (EMCIA->isPop(Inst)) { - // dbgs() << "This is a pop instruction with size " - // << EMCIA->getPopSize(Inst) << "\n"; - // } - // } - - // dbgs() << "---------------Arith Info----------------\n"; - // { // arith - // /* MutableArrayRef MAR = {Inst}; - // if (MCPB->matchAdd(MCPB->matchAnyOperand(), MCPB->matchAnyOperand()) - // ->match(*MCRI, *MCPB, MutableArrayRef(), -1)) { - // dbgs() << "It is an addition instruction.\n"; - // } else */ - // if (MCInstInfo.isAdd()) { - // dbgs() << "It is an addition from MCInstInfo view but not from - // MCPlus" - // "view.\n"; - // } - // if (EMCIA->isSUB(Inst)) { - // dbgs() << "This is a subtraction.\n"; - // } - // } - - // dbgs() << "-----------------------------------------\n"; - // dbgs() << "The CFA register is: " << CFAReg << "\n"; - // dbgs() << "The instruction does " << (ChangedCFA ? "" : "NOT ") - // << "change the CFA.\n"; - // dbgs() << "The CFI directives does " << (GoingToChangeCFA ? "" : "NOT - // ") - // << "change the CFA.\n"; - // dbgs() << "vvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv\n"; - - State = AfterState; - } - -private: }; } // namespace llvm diff --git a/llvm/tools/llvm-mc/ExtendedMCInstrAnalysis.h b/llvm/tools/llvm-mc/ExtendedMCInstrAnalysis.h index 547d132671f45..dd23063555053 100644 --- a/llvm/tools/llvm-mc/ExtendedMCInstrAnalysis.h +++ b/llvm/tools/llvm-mc/ExtendedMCInstrAnalysis.h @@ -46,6 +46,126 @@ class ExtendedMCInstrAnalysis { MCPhysReg getStackPointer() const { return MCPB->getStackPointer(); } MCPhysReg getFlagsReg() const { return MCPB->getFlagsReg(); } + bool doesConstantChange(const MCInst &Inst, MCPhysReg Reg, int64_t &HowMuch) { + if (isPush(Inst) && Reg == getStackPointer()) { + // TODO should get the stack direction here, now it assumes that it goes + // down. + HowMuch = -getPushSize(Inst); + return true; + } + + if (isPop(Inst) && Reg == getStackPointer()) { + // TODO should get the stack direction here, now it assumes that it goes + // down. + HowMuch = getPopSize(Inst); + return true; + } + + return false; + } + + // Tries to guess Reg1's value in a form of Reg2 (before Inst's execution) + + // Diff. + bool isInConstantDistanceOfEachOther(const MCInst &Inst, MCPhysReg &Reg1, + MCPhysReg Reg2, int &Diff) { + { + MCPhysReg From; + MCPhysReg To; + if (isRegToRegMove(Inst, From, To) && From == Reg2) { + Reg1 = To; + Diff = 0; + return true; + } + } + + return false; + } + + bool doStoreFromReg(const MCInst &Inst, MCPhysReg StoringReg, + MCPhysReg FromReg, int64_t &Offset) { + if (isPush(Inst) && FromReg == getStackPointer()) { + // TODO should get the stack direction here, now it assumes that it goes + // down. + Offset = -getPushSize(Inst); + return true; + } + + { + bool IsLoad; + bool IsStore; + bool IsStoreFromReg; + MCPhysReg SrcReg; + int32_t SrcImm; + uint16_t StackPtrReg; + int64_t StackOffset; + uint8_t Size; + bool IsSimple; + bool IsIndexed; + if (isStackAccess(Inst, IsLoad, IsStore, IsStoreFromReg, SrcReg, SrcImm, + StackPtrReg, StackOffset, Size, IsSimple, IsIndexed)) { + // TODO make sure that simple means that it's store and does nothing + // more. + if (IsStore && IsSimple && StackPtrReg == FromReg && IsStoreFromReg && + SrcReg == StoringReg) { + Offset = StackOffset; + return true; + } + } + } + + return false; + } + + bool doLoadFromReg(const MCInst &Inst, MCPhysReg FromReg, int64_t &Offset, + MCPhysReg &LoadingReg) { + if (isPop(Inst) && FromReg == getStackPointer()) { + // TODO should get the stack direction here, now it assumes that it goes + // down. + Offset = 0; + LoadingReg = Inst.getOperand(0).getReg(); + return true; + } + + { + bool IsLoad; + bool IsStore; + bool IsStoreFromReg; + MCPhysReg SrcReg; + int32_t SrcImm; + uint16_t StackPtrReg; + int64_t StackOffset; + uint8_t Size; + bool IsSimple; + bool IsIndexed; + if (isStackAccess(Inst, IsLoad, IsStore, IsStoreFromReg, SrcReg, SrcImm, + StackPtrReg, StackOffset, Size, IsSimple, IsIndexed)) { + // TODO make sure that simple means that it's store and does nothing + // more. + if (IsLoad && IsSimple && StackPtrReg == FromReg) { + Offset = StackOffset; + LoadingReg = SrcReg; + return true; + } + } + } + + { + if (isMoveMem2Reg(Inst)) { + auto X86MemAccess = evaluateX86MemoryOperand(Inst).value(); + if (X86MemAccess.BaseRegNum == FromReg && + (X86MemAccess.ScaleImm == 0 || X86MemAccess.IndexRegNum == 0) && + !X86MemAccess.DispExpr) { + LoadingReg = Inst.getOperand(0).getReg(); + Offset = X86MemAccess.DispImm; + return true; + } + } + } + + return false; + } + +private: bool isPush(const MCInst &Inst) const { return MCPB->isPush(Inst); } int getPushSize(const MCInst &Inst) const { return MCPB->getPushSize(Inst); } From 6a58a99ce57711a4652e196f06ae6db293c50e55 Mon Sep 17 00:00:00 2001 From: Amirhossein Pashaeehir Date: Wed, 21 May 2025 17:12:04 +0000 Subject: [PATCH 21/22] Remove unused includes --- llvm/tools/llvm-mc/CFIState.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/llvm/tools/llvm-mc/CFIState.h b/llvm/tools/llvm-mc/CFIState.h index 1b66d0dd40904..4ffcda3c5063a 100644 --- a/llvm/tools/llvm-mc/CFIState.h +++ b/llvm/tools/llvm-mc/CFIState.h @@ -2,11 +2,8 @@ #define LLVM_TOOLS_LLVM_MC_CFI_STATE_H #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/StringRef.h" #include "llvm/MC/MCDwarf.h" -#include "llvm/MC/MCRegister.h" #include "llvm/Support/FormatVariadic.h" -#include "llvm/Support/MathExtras.h" #include #include #include From 63b8cfbf95df8a8300edf0e2d44e029e41a1b629 Mon Sep 17 00:00:00 2001 From: Amirhossein Pashaeehir Date: Thu, 22 May 2025 18:40:39 +0000 Subject: [PATCH 22/22] Test cfi validator on following cases: - Changing the register and not informing it using cfi directives - Changing the CFA and report it wrong - Load a register from another register stored location --- .../cfi-validation/single-func-cfa-mistake.s | 33 +++++++++++++ .../single-func-missed-cfi-directive.s | 32 +++++++++++++ .../llvm-mc/cfi-validation/single-func.s | 31 ++++++++++++ .../cfi-validation/spill-two-reg-reversed.s | 47 +++++++++++++++++++ .../llvm-mc/cfi-validation/spill-two-reg.s | 43 +++++++++++++++++ .../cfi-validation/update-with-no-cfi.s | 31 ++++++++++++ 6 files changed, 217 insertions(+) create mode 100644 llvm/test/tools/llvm-mc/cfi-validation/single-func-cfa-mistake.s create mode 100644 llvm/test/tools/llvm-mc/cfi-validation/single-func-missed-cfi-directive.s create mode 100644 llvm/test/tools/llvm-mc/cfi-validation/single-func.s create mode 100644 llvm/test/tools/llvm-mc/cfi-validation/spill-two-reg-reversed.s create mode 100644 llvm/test/tools/llvm-mc/cfi-validation/spill-two-reg.s create mode 100644 llvm/test/tools/llvm-mc/cfi-validation/update-with-no-cfi.s diff --git a/llvm/test/tools/llvm-mc/cfi-validation/single-func-cfa-mistake.s b/llvm/test/tools/llvm-mc/cfi-validation/single-func-cfa-mistake.s new file mode 100644 index 0000000000000..eb55ff6b378e8 --- /dev/null +++ b/llvm/test/tools/llvm-mc/cfi-validation/single-func-cfa-mistake.s @@ -0,0 +1,33 @@ +# RUN: not llvm-mc %s --validate-cfi --filetype=null 2>&1 \ +# RUN: | FileCheck %s + .text + .globl f + .type f,@function +f: + .cfi_startproc + + .cfi_undefined %rax + + pushq %rbp + # CHECK: error: Expected CFA [reg: 61, offset: 16] but got [reg: 61, offset: 17] + .cfi_def_cfa_offset 17 + .cfi_offset %rbp, -16 + + movq %rsp, %rbp + .cfi_def_cfa_register %rbp + + movl %edi, -4(%rbp) + + movl -4(%rbp), %eax + + addl $10, %eax + + popq %rbp + .cfi_def_cfa %rsp, 8 + + retq + +.Lfunc_end0: + .size f, .Lfunc_end0-f + + .cfi_endproc diff --git a/llvm/test/tools/llvm-mc/cfi-validation/single-func-missed-cfi-directive.s b/llvm/test/tools/llvm-mc/cfi-validation/single-func-missed-cfi-directive.s new file mode 100644 index 0000000000000..044c0bbf88eb6 --- /dev/null +++ b/llvm/test/tools/llvm-mc/cfi-validation/single-func-missed-cfi-directive.s @@ -0,0 +1,32 @@ +# RUN: not llvm-mc %s --validate-cfi --filetype=null 2>&1 \ +# RUN: | FileCheck %s + .text + .globl f + .type f,@function +f: + .cfi_startproc + + .cfi_undefined %rax + + pushq %rbp + .cfi_def_cfa_offset 16 + + movq %rsp, %rbp + # CHECK: error: Reg#52 caller's value is in reg#52 which is changed by this instruction, but not changed in CFI directives + .cfi_def_cfa_register %rbp + + movl %edi, -4(%rbp) + + movl -4(%rbp), %eax + + addl $10, %eax + + popq %rbp + .cfi_def_cfa %rsp, 8 + + retq + +.Lfunc_end0: + .size f, .Lfunc_end0-f + + .cfi_endproc diff --git a/llvm/test/tools/llvm-mc/cfi-validation/single-func.s b/llvm/test/tools/llvm-mc/cfi-validation/single-func.s new file mode 100644 index 0000000000000..b2615eafce1d5 --- /dev/null +++ b/llvm/test/tools/llvm-mc/cfi-validation/single-func.s @@ -0,0 +1,31 @@ +# RUN: llvm-mc %s --validate-cfi --filetype=null + .text + .globl f + .type f,@function +f: + .cfi_startproc + + .cfi_undefined %rax + + pushq %rbp + .cfi_def_cfa_offset 16 + .cfi_offset %rbp, -16 + + movq %rsp, %rbp + .cfi_def_cfa_register %rbp + + movl %edi, -4(%rbp) + + movl -4(%rbp), %eax + + addl $10, %eax + + popq %rbp + .cfi_def_cfa %rsp, 8 + + retq + +.Lfunc_end0: + .size f, .Lfunc_end0-f + + .cfi_endproc diff --git a/llvm/test/tools/llvm-mc/cfi-validation/spill-two-reg-reversed.s b/llvm/test/tools/llvm-mc/cfi-validation/spill-two-reg-reversed.s new file mode 100644 index 0000000000000..d7baf3e871e87 --- /dev/null +++ b/llvm/test/tools/llvm-mc/cfi-validation/spill-two-reg-reversed.s @@ -0,0 +1,47 @@ +# RUN: not llvm-mc %s --validate-cfi --filetype=null 2>&1 \ +# RUN: | FileCheck %s + .text + .type _start,@function + .globl _start + .hidden _start +_start: + .cfi_startproc + + .cfi_same_value %rdi + .cfi_same_value %rsi + + pushq %rbp + .cfi_adjust_cfa_offset 8 + .cfi_offset %rbp, -16 + + movq %rsp, %rbp + + pushq %rdi + .cfi_adjust_cfa_offset 8 + .cfi_rel_offset %rdi, 0 + + pushq %rsi + .cfi_adjust_cfa_offset 8 + .cfi_rel_offset %rsi, 0 + + popq %rsi + # CHECK: warning: The reg#55 CFI state is changed + .cfi_adjust_cfa_offset -8 + .cfi_same_value %rdi + + popq %rdi + # CHECK: warning: The reg#60 CFI state is changed + # CHECK: Reg#55 caller's value is in reg#55 which is changed by this instruction, but not changed in CFI directives + .cfi_adjust_cfa_offset -8 + .cfi_same_value %rsi + + popq %rbp + .cfi_adjust_cfa_offset -8 + .cfi_same_value %rbp + + retq + + .cfi_endproc +.Ltmp0: + .size _start, .Ltmp0-_start + .text diff --git a/llvm/test/tools/llvm-mc/cfi-validation/spill-two-reg.s b/llvm/test/tools/llvm-mc/cfi-validation/spill-two-reg.s new file mode 100644 index 0000000000000..70ea3b49d8c75 --- /dev/null +++ b/llvm/test/tools/llvm-mc/cfi-validation/spill-two-reg.s @@ -0,0 +1,43 @@ +# RUN: llvm-mc %s --validate-cfi --filetype=null + .text + .type _start,@function + .globl _start + .hidden _start +_start: + .cfi_startproc + + .cfi_same_value %rdi + .cfi_same_value %rsi + + pushq %rbp + .cfi_adjust_cfa_offset 8 + .cfi_offset %rbp, -16 + + movq %rsp, %rbp + + pushq %rdi + .cfi_adjust_cfa_offset 8 + .cfi_rel_offset %rdi, 0 + + pushq %rsi + .cfi_adjust_cfa_offset 8 + .cfi_rel_offset %rsi, 0 + + popq %rsi + .cfi_adjust_cfa_offset -8 + .cfi_same_value %rsi + + popq %rdi + .cfi_adjust_cfa_offset -8 + .cfi_same_value %rdi + + popq %rbp + .cfi_adjust_cfa_offset -8 + .cfi_same_value %rbp + + retq + + .cfi_endproc +.Ltmp0: + .size _start, .Ltmp0-_start + .text diff --git a/llvm/test/tools/llvm-mc/cfi-validation/update-with-no-cfi.s b/llvm/test/tools/llvm-mc/cfi-validation/update-with-no-cfi.s new file mode 100644 index 0000000000000..1384d8a864608 --- /dev/null +++ b/llvm/test/tools/llvm-mc/cfi-validation/update-with-no-cfi.s @@ -0,0 +1,31 @@ +# RUN: not llvm-mc %s --validate-cfi --filetype=null 2>&1 \ +# RUN: | FileCheck %s + .text + .globl f + .type f,@function +f: + .cfi_startproc + + .cfi_same_value %rax + .cfi_same_value %rbx + .cfi_same_value %rcx + .cfi_same_value %rdx + + movq $10, %rax + # CHECK: error: Reg#51 caller's value is in reg#51 which is changed by this instruction, but not changed in CFI directives + + movq $10, %rbx + # CHECK: error: Reg#53 caller's value is in reg#53 which is changed by this instruction, but not changed in CFI directives + + movq $10, %rcx + # CHECK: error: Reg#54 caller's value is in reg#54 which is changed by this instruction, but not changed in CFI directives + + movq $10, %rdx + # CHECK: error: Reg#56 caller's value is in reg#56 which is changed by this instruction, but not changed in CFI directives + + retq + +.Lfunc_end0: + .size f, .Lfunc_end0-f + + .cfi_endproc