From 9761e5d53a0dc620889ca63d5e90d0110afbda7a Mon Sep 17 00:00:00 2001 From: Anatoly Trosinenko Date: Fri, 14 Mar 2025 18:29:05 +0300 Subject: [PATCH 1/3] [BOLT] Gadget scanner: factor out utility code Factor out the code for mapping from physical registers to consecutive array indexes. Introduce helper functions to print instructions and registers to prevent mixing of analysis logic and implementation details of debug output. Removed the debug printing from `Gadget::generateReport`, as it doesn't seem to add important information to what was already printed in the report itself. --- .../lib/Passes/NonPacProtectedRetAnalysis.cpp | 136 +++++++++++------- 1 file changed, 84 insertions(+), 52 deletions(-) diff --git a/bolt/lib/Passes/NonPacProtectedRetAnalysis.cpp b/bolt/lib/Passes/NonPacProtectedRetAnalysis.cpp index dc7cb275f5664..77a16379a14b9 100644 --- a/bolt/lib/Passes/NonPacProtectedRetAnalysis.cpp +++ b/bolt/lib/Passes/NonPacProtectedRetAnalysis.cpp @@ -14,6 +14,7 @@ #include "bolt/Passes/NonPacProtectedRetAnalysis.h" #include "bolt/Core/ParallelUtilities.h" #include "bolt/Passes/DataflowAnalysis.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallSet.h" #include "llvm/MC/MCInst.h" #include "llvm/Support/Format.h" @@ -58,6 +59,71 @@ raw_ostream &operator<<(raw_ostream &OS, const MCInstReference &Ref) { namespace NonPacProtectedRetAnalysis { +static void traceInst(const BinaryContext &BC, StringRef Label, + const MCInst &MI) { + dbgs() << " " << Label << ": "; + BC.printInstruction(dbgs(), MI); +} + +static void traceReg(const BinaryContext &BC, StringRef Label, + ErrorOr Reg) { + dbgs() << " " << Label << ": "; + if (Reg.getError()) + dbgs() << "(error)"; + else if (*Reg == BC.MIB->getNoRegister()) + dbgs() << "(none)"; + else + dbgs() << BC.MRI->getName(*Reg); + dbgs() << "\n"; +} + +static void traceRegMask(const BinaryContext &BC, StringRef Label, + BitVector Mask) { + dbgs() << " " << Label << ": "; + RegStatePrinter(BC).print(dbgs(), Mask); + dbgs() << "\n"; +} + +// This class represents mapping from arbitrary physical registers to +// consecutive array indexes. +class TrackedRegisters { + static const uint16_t NoIndex = -1; + const std::vector Registers; + std::vector RegToIndexMapping; + + static size_t getMappingSize(const std::vector &RegsToTrack) { + if (RegsToTrack.empty()) + return 0; + return 1 + *llvm::max_element(RegsToTrack); + } + +public: + TrackedRegisters(const std::vector &RegsToTrack) + : Registers(RegsToTrack), + RegToIndexMapping(getMappingSize(RegsToTrack), NoIndex) { + for (unsigned I = 0; I < RegsToTrack.size(); ++I) + RegToIndexMapping[RegsToTrack[I]] = I; + } + + const ArrayRef getRegisters() const { return Registers; } + + size_t getNumTrackedRegisters() const { return Registers.size(); } + + bool empty() const { return Registers.empty(); } + + bool isTracked(MCPhysReg Reg) const { + bool IsTracked = (unsigned)Reg < RegToIndexMapping.size() && + RegToIndexMapping[Reg] != NoIndex; + assert(IsTracked == llvm::is_contained(Registers, Reg)); + return IsTracked; + } + + unsigned getIndex(MCPhysReg Reg) const { + assert(isTracked(Reg) && "Register is not tracked"); + return RegToIndexMapping[Reg]; + } +}; + // The security property that is checked is: // When a register is used as the address to jump to in a return instruction, // that register must either: @@ -169,52 +235,34 @@ class PacRetAnalysis PacRetAnalysis(BinaryFunction &BF, MCPlusBuilder::AllocatorIdTy AllocId, const std::vector &RegsToTrackInstsFor) : Parent(BF, AllocId), NumRegs(BF.getBinaryContext().MRI->getNumRegs()), - RegsToTrackInstsFor(RegsToTrackInstsFor), - TrackingLastInsts(!RegsToTrackInstsFor.empty()), - Reg2StateIdx(RegsToTrackInstsFor.empty() - ? 0 - : *llvm::max_element(RegsToTrackInstsFor) + 1, - -1) { - for (unsigned I = 0; I < RegsToTrackInstsFor.size(); ++I) - Reg2StateIdx[RegsToTrackInstsFor[I]] = I; - } + RegsToTrackInstsFor(RegsToTrackInstsFor) {} virtual ~PacRetAnalysis() {} protected: const unsigned NumRegs; /// RegToTrackInstsFor is the set of registers for which the dataflow analysis /// must compute which the last set of instructions writing to it are. - const std::vector RegsToTrackInstsFor; - const bool TrackingLastInsts; - /// Reg2StateIdx maps Register to the index in the vector used in State to - /// track which instructions last wrote to this register. - std::vector Reg2StateIdx; + const TrackedRegisters RegsToTrackInstsFor; SmallPtrSet &lastWritingInsts(State &S, MCPhysReg Reg) const { - assert(Reg < Reg2StateIdx.size()); - assert(isTrackingReg(Reg)); - return S.LastInstWritingReg[Reg2StateIdx[Reg]]; + unsigned Index = RegsToTrackInstsFor.getIndex(Reg); + return S.LastInstWritingReg[Index]; } const SmallPtrSet &lastWritingInsts(const State &S, MCPhysReg Reg) const { - assert(Reg < Reg2StateIdx.size()); - assert(isTrackingReg(Reg)); - return S.LastInstWritingReg[Reg2StateIdx[Reg]]; - } - - bool isTrackingReg(MCPhysReg Reg) const { - return llvm::is_contained(RegsToTrackInstsFor, Reg); + unsigned Index = RegsToTrackInstsFor.getIndex(Reg); + return S.LastInstWritingReg[Index]; } void preflight() {} State getStartingStateAtBB(const BinaryBasicBlock &BB) { - return State(NumRegs, RegsToTrackInstsFor.size()); + return State(NumRegs, RegsToTrackInstsFor.getNumTrackedRegisters()); } State getStartingStateAtPoint(const MCInst &Point) { - return State(NumRegs, RegsToTrackInstsFor.size()); + return State(NumRegs, RegsToTrackInstsFor.getNumTrackedRegisters()); } void doConfluence(State &StateOut, const State &StateIn) { @@ -275,7 +323,7 @@ class PacRetAnalysis Next.NonAutClobRegs |= Written; // Keep track of this instruction if it writes to any of the registers we // need to track that for: - for (MCPhysReg Reg : RegsToTrackInstsFor) + for (MCPhysReg Reg : RegsToTrackInstsFor.getRegisters()) if (Written[Reg]) lastWritingInsts(Next, Reg) = {&Point}; @@ -287,7 +335,7 @@ class PacRetAnalysis // https://github.com/llvm/llvm-project/pull/122304#discussion_r1939515516 Next.NonAutClobRegs.reset( BC.MIB->getAliases(*AutReg, /*OnlySmaller=*/true)); - if (TrackingLastInsts && isTrackingReg(*AutReg)) + if (RegsToTrackInstsFor.isTracked(*AutReg)) lastWritingInsts(Next, *AutReg).clear(); } @@ -306,7 +354,7 @@ class PacRetAnalysis std::vector getLastClobberingInsts(const MCInst Ret, BinaryFunction &BF, const BitVector &UsedDirtyRegs) const { - if (!TrackingLastInsts) + if (RegsToTrackInstsFor.empty()) return {}; auto MaybeState = getStateAt(Ret); if (!MaybeState) @@ -355,28 +403,18 @@ Analysis::computeDfState(PacRetAnalysis &PRA, BinaryFunction &BF, } MCPhysReg RetReg = *MaybeRetReg; LLVM_DEBUG({ - dbgs() << " Found RET inst: "; - BC.printInstruction(dbgs(), Inst); - dbgs() << " RetReg: " << BC.MRI->getName(RetReg) - << "; authenticatesReg: " - << BC.MIB->isAuthenticationOfReg(Inst, RetReg) << "\n"; + traceInst(BC, "Found RET inst", Inst); + traceReg(BC, "RetReg", RetReg); + traceReg(BC, "Authenticated reg", BC.MIB->getAuthenticatedReg(Inst)); }); if (BC.MIB->isAuthenticationOfReg(Inst, RetReg)) break; BitVector UsedDirtyRegs = PRA.getStateAt(Inst)->NonAutClobRegs; - LLVM_DEBUG({ - dbgs() << " NonAutClobRegs at Ret: "; - RegStatePrinter RSP(BC); - RSP.print(dbgs(), UsedDirtyRegs); - dbgs() << "\n"; - }); + LLVM_DEBUG( + { traceRegMask(BC, "NonAutClobRegs at Ret", UsedDirtyRegs); }); UsedDirtyRegs &= BC.MIB->getAliases(RetReg, /*OnlySmaller=*/true); - LLVM_DEBUG({ - dbgs() << " Intersection with RetReg: "; - RegStatePrinter RSP(BC); - RSP.print(dbgs(), UsedDirtyRegs); - dbgs() << "\n"; - }); + LLVM_DEBUG( + { traceRegMask(BC, "Intersection with RetReg", UsedDirtyRegs); }); if (UsedDirtyRegs.any()) { // This return instruction needs to be reported Result.Diagnostics.push_back(std::make_shared( @@ -472,12 +510,6 @@ void Gadget::generateReport(raw_ostream &OS, const BinaryContext &BC) const { OS << " " << (I + 1) << ". "; BC.printInstruction(OS, InstRef, InstRef.getAddress(), BF); }; - LLVM_DEBUG({ - dbgs() << " .. OverWritingRetRegInst:\n"; - for (MCInstReference Ref : OverwritingRetRegInst) { - dbgs() << " " << Ref << "\n"; - } - }); if (OverwritingRetRegInst.size() == 1) { const MCInstReference OverwInst = OverwritingRetRegInst[0]; assert(OverwInst.ParentKind == MCInstReference::BasicBlockParent); From aa84078388b0bd7705c482c06c7a2df6b45b734e Mon Sep 17 00:00:00 2001 From: Anatoly Trosinenko Date: Thu, 20 Mar 2025 14:29:52 +0300 Subject: [PATCH 2/3] Address review comments --- .../lib/Passes/NonPacProtectedRetAnalysis.cpp | 2 +- .../AArch64/gs-pauth-debug-output.s | 142 ++++++++++++++++++ 2 files changed, 143 insertions(+), 1 deletion(-) create mode 100644 bolt/test/binary-analysis/AArch64/gs-pauth-debug-output.s diff --git a/bolt/lib/Passes/NonPacProtectedRetAnalysis.cpp b/bolt/lib/Passes/NonPacProtectedRetAnalysis.cpp index 77a16379a14b9..fb21098df97f4 100644 --- a/bolt/lib/Passes/NonPacProtectedRetAnalysis.cpp +++ b/bolt/lib/Passes/NonPacProtectedRetAnalysis.cpp @@ -84,7 +84,7 @@ static void traceRegMask(const BinaryContext &BC, StringRef Label, dbgs() << "\n"; } -// This class represents mapping from arbitrary physical registers to +// This class represents mapping from a set of arbitrary physical registers to // consecutive array indexes. class TrackedRegisters { static const uint16_t NoIndex = -1; diff --git a/bolt/test/binary-analysis/AArch64/gs-pauth-debug-output.s b/bolt/test/binary-analysis/AArch64/gs-pauth-debug-output.s new file mode 100644 index 0000000000000..d58fe20ed9d76 --- /dev/null +++ b/bolt/test/binary-analysis/AArch64/gs-pauth-debug-output.s @@ -0,0 +1,142 @@ +// REQUIRES: asserts +// +// RUN: %clang %cflags -march=armv8.3-a %s -o %t.exe +// RUN: llvm-bolt-binary-analysis --scanners=pacret -no-threads \ +// RUN: -debug-only bolt-nonpacprotectedret %t.exe 2>&1 | FileCheck %s + +// Check the debug output generated by PAuth gadget scanner to make sure the +// that output is kept meaningful and to provide an overview of what happens +// inside the scanner. + + .globl simple + .type simple,@function +simple: + paciasp + b 1f +1: + autiasp + ret + .size simple, .-simple + +// CHECK-LABEL:Analyzing in function simple, AllocatorId 1 +// CHECK-NEXT: Binary Function "simple" { +// CHECK-NEXT: Number : 1 +// CHECK-NEXT: State : CFG constructed +// ... +// CHECK: BB Layout : [[BB0:[0-9a-zA-Z.]+]], [[BB1:[0-9a-zA-Z.]+]] +// CHECK-NEXT: } +// CHECK-NEXT: [[BB0]] (2 instructions, align : 1) +// CHECK-NEXT: Entry Point +// CHECK-NEXT: 00000000: paciasp +// CHECK-NEXT: 00000004: b [[BB1]] +// CHECK-NEXT: Successors: [[BB1]] +// CHECK-EMPTY: +// CHECK-NEXT: [[BB1]] (2 instructions, align : 1) +// CHECK-NEXT: Predecessors: [[BB0]] +// CHECK-NEXT: 00000008: autiasp +// CHECK-NEXT: 0000000c: ret +// CHECK-EMPTY: +// CHECK-NEXT: DWARF CFI Instructions: +// CHECK-NEXT: +// CHECK-NEXT: End of Function "simple" +// CHECK-EMPTY: +// CHECK-NEXT: PacRetAnalysis::ComputeNext( hint #25, pacret-state) +// CHECK-NEXT: .. result: (pacret-state) +// CHECK-NEXT: PacRetAnalysis::ComputeNext( b [[BB1]], pacret-state) +// CHECK-NEXT: .. result: (pacret-state) +// CHECK-NEXT: PacRetAnalysis::Confluence( +// CHECK-NEXT: State 1: pacret-state +// CHECK-NEXT: State 2: pacret-state) +// CHECK-NEXT: merged state: pacret-state +// CHECK-NEXT: PacRetAnalysis::ComputeNext( hint #29, pacret-state) +// CHECK-NEXT: .. result: (pacret-state) +// CHECK-NEXT: PacRetAnalysis::ComputeNext( ret x30, pacret-state) +// CHECK-NEXT: .. result: (pacret-state) +// CHECK-NEXT: PacRetAnalysis::Confluence( +// CHECK-NEXT: State 1: pacret-state +// CHECK-NEXT: State 2: pacret-state) +// CHECK-NEXT: merged state: pacret-state +// CHECK-NEXT: PacRetAnalysis::ComputeNext( hint #29, pacret-state) +// CHECK-NEXT: .. result: (pacret-state) +// CHECK-NEXT: PacRetAnalysis::ComputeNext( ret x30, pacret-state) +// CHECK-NEXT: .. result: (pacret-state) +// CHECK-NEXT: After PacRetAnalysis: +// CHECK-NEXT: Binary Function "simple" { +// CHECK-NEXT: Number : 1 +// CHECK-NEXT: State : CFG constructed +// ... +// CHECK: BB Layout : [[BB0]], [[BB1]] +// CHECK-NEXT: } +// CHECK-NEXT: [[BB0]] (2 instructions, align : 1) +// CHECK-NEXT: Entry Point +// CHECK-NEXT: 00000000: paciasp # PacRetAnalysis: pacret-state +// CHECK-NEXT: 00000004: b [[BB1]] # PacRetAnalysis: pacret-state +// CHECK-NEXT: Successors: [[BB1]] +// CHECK-EMPTY: +// CHECK-NEXT: [[BB1]] (2 instructions, align : 1) +// CHECK-NEXT: Predecessors: [[BB0]] +// CHECK-NEXT: 00000008: autiasp # PacRetAnalysis: pacret-state +// CHECK-NEXT: 0000000c: ret # PacRetAnalysis: pacret-state +// CHECK-EMPTY: +// CHECK-NEXT: DWARF CFI Instructions: +// CHECK-NEXT: +// CHECK-NEXT: End of Function "simple" +// CHECK-EMPTY: +// CHECK-NEXT: Found RET inst: 00000000: ret # PacRetAnalysis: pacret-state +// CHECK-NEXT: RetReg: LR +// CHECK-NEXT: Authenticated reg: (none) +// CHECK-NEXT: NonAutClobRegs at Ret:{{[ \t]*$}} +// CHECK-NEXT: Intersection with RetReg:{{[ \t]*$}} + + .globl clobber + .type clobber,@function +clobber: + mov w30, #0 + ret + .size clobber, .-clobber + +// CHECK-LABEL:Analyzing in function clobber, AllocatorId 1 +// ... +// CHECK: PacRetAnalysis::ComputeNext( mov w30, #0x0, pacret-state) +// CHECK-NEXT: .. result: (pacret-state) +// CHECK-NEXT: PacRetAnalysis::ComputeNext( ret x30, pacret-state) +// CHECK-NEXT: .. result: (pacret-state) +// CHECK-NEXT: After PacRetAnalysis: +// CHECK-NEXT: Binary Function "clobber" { +// ... +// CHECK: End of Function "clobber" + +// The above output was printed after first run of analysis + +// CHECK-EMPTY: +// CHECK-NEXT: Found RET inst: 00000000: ret # PacRetAnalysis: pacret-state +// CHECK-NEXT: RetReg: LR +// CHECK-NEXT: Authenticated reg: (none) +// CHECK-NEXT: NonAutClobRegs at Ret: W30 +// CHECK-NEXT: Intersection with RetReg: W30 +// CHECK-NEXT: PacRetAnalysis::ComputeNext( mov w30, #0x0, pacret-state) +// CHECK-NEXT: .. result: (pacret-state) +// CHECK-NEXT: PacRetAnalysis::ComputeNext( ret x30, pacret-state) +// CHECK-NEXT: .. result: (pacret-state) +// CHECK-NEXT: After PacRetAnalysis: +// CHECK-NEXT: Binary Function "clobber" { +// ... +// CHECK: End of Function "clobber" + +// The analysis was re-computed with register tracking, as an issue was found in this function. +// Re-checking the instructions: + +// CHECK-EMPTY: +// CHECK-NEXT: Found RET inst: 00000000: ret # PacRetAnalysis: pacret-state +// CHECK-NEXT: RetReg: LR +// CHECK-NEXT: Authenticated reg: (none) +// CHECK-NEXT: NonAutClobRegs at Ret: W30 +// CHECK-NEXT: Intersection with RetReg: W30 + + +// CHECK-LABEL:Analyzing in function main, AllocatorId 1 + .globl main + .type main,@function +main: + ret + .size main, .-main From f57d97edb7d1852a0a70851120bd939aac33e122 Mon Sep 17 00:00:00 2001 From: Anatoly Trosinenko Date: Thu, 20 Mar 2025 14:38:45 +0300 Subject: [PATCH 3/3] Fix link error at -O0 --- bolt/lib/Passes/NonPacProtectedRetAnalysis.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bolt/lib/Passes/NonPacProtectedRetAnalysis.cpp b/bolt/lib/Passes/NonPacProtectedRetAnalysis.cpp index fb21098df97f4..5bec32844447b 100644 --- a/bolt/lib/Passes/NonPacProtectedRetAnalysis.cpp +++ b/bolt/lib/Passes/NonPacProtectedRetAnalysis.cpp @@ -87,7 +87,7 @@ static void traceRegMask(const BinaryContext &BC, StringRef Label, // This class represents mapping from a set of arbitrary physical registers to // consecutive array indexes. class TrackedRegisters { - static const uint16_t NoIndex = -1; + static constexpr uint16_t NoIndex = -1; const std::vector Registers; std::vector RegToIndexMapping;