@@ -126,18 +126,16 @@ class TrackedRegisters {
126126
127127// The security property that is checked is:
128128// When a register is used as the address to jump to in a return instruction,
129- // that register must either:
130- // (a) never be changed within this function, i.e. have the same value as when
131- // the function started, or
129+ // that register must be safe-to-dereference. It must either
130+ // (a) be safe-to-dereference at function entry and never be changed within this
131+ // function, i.e. have the same value as when the function started, or
132132// (b) the last write to the register must be by an authentication instruction.
133133
134134// This property is checked by using dataflow analysis to keep track of which
135- // registers have been written (def-ed), since last authenticated. Those are
136- // exactly the registers containing values that should not be trusted (as they
137- // could have changed since the last time they were authenticated). For pac-ret,
138- // any return instruction using such a register is a gadget to be reported. For
139- // PAuthABI, probably at least any indirect control flow using such a register
140- // should be reported.
135+ // registers have been written (def-ed), since last authenticated. For pac-ret,
136+ // any return instruction using a register which is not safe-to-dereference is
137+ // a gadget to be reported. For PAuthABI, probably at least any indirect control
138+ // flow using such a register should be reported.
141139
142140// Furthermore, when producing a diagnostic for a found non-pac-ret protected
143141// return, the analysis also lists the last instructions that wrote to the
@@ -156,29 +154,62 @@ class TrackedRegisters {
156154// in the gadgets to be reported. This information is used in the second run
157155// to also track which instructions last wrote to those registers.
158156
157+ // / A state representing which registers are safe to use by an instruction
158+ // / at a given program point.
159+ // /
160+ // / To simplify reasoning, let's stick with the following approach:
161+ // / * when state is updated by the data-flow analysis, the sub-, super- and
162+ // / overlapping registers are marked as needed
163+ // / * when the particular instruction is checked if it represents a gadget,
164+ // / the specific bit of BitVector should be usable to answer this.
165+ // /
166+ // / For example, on AArch64:
167+ // / * An AUTIZA X0 instruction marks both X0 and W0 (as well as W0_HI) as
168+ // / safe-to-dereference. It does not change the state of X0_X1, for example,
169+ // / as super-registers partially retain their old, unsafe values.
170+ // / * LDR X1, [X0] marks as unsafe both X1 itself and anything it overlaps
171+ // / with: W1, W1_HI, X0_X1 and so on.
172+ // / * RET (which is implicitly RET X30) is a protected return if and only if
173+ // / X30 is safe-to-dereference - the state computed for sub- and
174+ // / super-registers is not inspected.
159175struct State {
160- // / A BitVector containing the registers that have been clobbered, and
161- // / not authenticated.
162- BitVector NonAutClobRegs;
176+ // / A BitVector containing the registers that are either safe at function
177+ // / entry and were not clobbered yet, or those not clobbered since being
178+ // / authenticated.
179+ BitVector SafeToDerefRegs;
163180 // / A vector of sets, only used in the second data flow run.
164181 // / Each element in the vector represents one of the registers for which we
165182 // / track the set of last instructions that wrote to this register. For
166183 // / pac-ret analysis, the expectation is that almost all return instructions
167184 // / only use register `X30`, and therefore, this vector will probably have
168185 // / length 1 in the second run.
169186 std::vector<SmallPtrSet<const MCInst *, 4 >> LastInstWritingReg;
187+
188+ // / Construct an empty state.
170189 State () {}
190+
171191 State (unsigned NumRegs, unsigned NumRegsToTrack)
172- : NonAutClobRegs(NumRegs), LastInstWritingReg(NumRegsToTrack) {}
173- State &operator |=(const State &StateIn) {
174- NonAutClobRegs |= StateIn.NonAutClobRegs ;
192+ : SafeToDerefRegs(NumRegs), LastInstWritingReg(NumRegsToTrack) {}
193+
194+ State &merge (const State &StateIn) {
195+ if (StateIn.empty ())
196+ return *this ;
197+ if (empty ())
198+ return (*this = StateIn);
199+
200+ SafeToDerefRegs &= StateIn.SafeToDerefRegs ;
175201 for (unsigned I = 0 ; I < LastInstWritingReg.size (); ++I)
176202 for (const MCInst *J : StateIn.LastInstWritingReg [I])
177203 LastInstWritingReg[I].insert (J);
178204 return *this ;
179205 }
206+
207+ // / Returns true if this object does not store state of any registers -
208+ // / neither safe, nor unsafe ones.
209+ bool empty () const { return SafeToDerefRegs.empty (); }
210+
180211 bool operator ==(const State &RHS) const {
181- return NonAutClobRegs == RHS.NonAutClobRegs &&
212+ return SafeToDerefRegs == RHS.SafeToDerefRegs &&
182213 LastInstWritingReg == RHS.LastInstWritingReg ;
183214 }
184215 bool operator !=(const State &RHS) const { return !((*this ) == RHS); }
@@ -199,8 +230,12 @@ static void printLastInsts(
199230
200231raw_ostream &operator <<(raw_ostream &OS, const State &S) {
201232 OS << " pacret-state<" ;
202- OS << " NonAutClobRegs: " << S.NonAutClobRegs << " , " ;
203- printLastInsts (OS, S.LastInstWritingReg );
233+ if (S.empty ()) {
234+ OS << " empty" ;
235+ } else {
236+ OS << " SafeToDerefRegs: " << S.SafeToDerefRegs << " , " ;
237+ printLastInsts (OS, S.LastInstWritingReg );
238+ }
204239 OS << " >" ;
205240 return OS;
206241}
@@ -217,10 +252,16 @@ class PacStatePrinter {
217252void PacStatePrinter::print (raw_ostream &OS, const State &S) const {
218253 RegStatePrinter RegStatePrinter (BC);
219254 OS << " pacret-state<" ;
220- OS << " NonAutClobRegs: " ;
221- RegStatePrinter.print (OS, S.NonAutClobRegs );
222- OS << " , " ;
223- printLastInsts (OS, S.LastInstWritingReg );
255+ if (S.empty ()) {
256+ assert (S.SafeToDerefRegs .empty ());
257+ assert (S.LastInstWritingReg .empty ());
258+ OS << " empty" ;
259+ } else {
260+ OS << " SafeToDerefRegs: " ;
261+ RegStatePrinter.print (OS, S.SafeToDerefRegs );
262+ OS << " , " ;
263+ printLastInsts (OS, S.LastInstWritingReg );
264+ }
224265 OS << " >" ;
225266}
226267
@@ -257,14 +298,22 @@ class PacRetAnalysis
257298
258299 void preflight () {}
259300
260- State getStartingStateAtBB (const BinaryBasicBlock &BB) {
261- return State (NumRegs, RegsToTrackInstsFor.getNumTrackedRegisters ());
301+ State createEntryState () {
302+ State S (NumRegs, RegsToTrackInstsFor.getNumTrackedRegisters ());
303+ for (MCPhysReg Reg : BC.MIB ->getTrustedLiveInRegs ())
304+ S.SafeToDerefRegs |= BC.MIB ->getAliases (Reg, /* OnlySmaller=*/ true );
305+ return S;
262306 }
263307
264- State getStartingStateAtPoint (const MCInst &Point) {
265- return State (NumRegs, RegsToTrackInstsFor.getNumTrackedRegisters ());
308+ State getStartingStateAtBB (const BinaryBasicBlock &BB) {
309+ if (BB.isEntryPoint ())
310+ return createEntryState ();
311+
312+ return State ();
266313 }
267314
315+ State getStartingStateAtPoint (const MCInst &Point) { return State (); }
316+
268317 void doConfluence (State &StateOut, const State &StateIn) {
269318 PacStatePrinter P (BC);
270319 LLVM_DEBUG ({
@@ -277,7 +326,7 @@ class PacRetAnalysis
277326 dbgs () << " )\n " ;
278327 });
279328
280- StateOut |= StateIn;
329+ StateOut. merge ( StateIn) ;
281330
282331 LLVM_DEBUG ({
283332 dbgs () << " merged state: " ;
@@ -297,8 +346,17 @@ class PacRetAnalysis
297346 dbgs () << " )\n " ;
298347 });
299348
349+ // If this instruction is reachable, a non-empty state will be propagated
350+ // to it from the entry basic block sooner or later. Until then, it is both
351+ // more efficient and easier to reason about to skip computeNext().
352+ if (Cur.empty ()) {
353+ LLVM_DEBUG (
354+ { dbgs () << " Skipping computeNext(Point, Cur) as Cur is empty.\n " ; });
355+ return State ();
356+ }
357+
300358 State Next = Cur;
301- BitVector Written = BitVector (NumRegs, false );
359+ BitVector Clobbered (NumRegs, false );
302360 // Assume a call can clobber all registers, including callee-saved
303361 // registers. There's a good chance that callee-saved registers will be
304362 // saved on the stack at some point during execution of the callee.
@@ -307,36 +365,27 @@ class PacRetAnalysis
307365 // Also, not all functions may respect the AAPCS ABI rules about
308366 // caller/callee-saved registers.
309367 if (BC.MIB ->isCall (Point))
310- Written .set ();
368+ Clobbered .set ();
311369 else
312- // FIXME: `getWrittenRegs` only sets the register directly written in the
313- // instruction, and the smaller aliasing registers. It does not set the
314- // larger aliasing registers. To also set the larger aliasing registers,
315- // we'd have to call `getClobberedRegs`.
316- // It is unclear if there is any test case which shows a different
317- // behaviour between using `getWrittenRegs` vs `getClobberedRegs`. We'd
318- // first would like to see such a test case before making a decision
319- // on whether using `getClobberedRegs` below would be better.
320- // Also see the discussion on this at
321- // https://github.com/llvm/llvm-project/pull/122304#discussion_r1939511909
322- BC.MIB ->getWrittenRegs (Point, Written);
323- Next.NonAutClobRegs |= Written;
370+ BC.MIB ->getClobberedRegs (Point, Clobbered);
371+ Next.SafeToDerefRegs .reset (Clobbered);
324372 // Keep track of this instruction if it writes to any of the registers we
325373 // need to track that for:
326374 for (MCPhysReg Reg : RegsToTrackInstsFor.getRegisters ())
327- if (Written [Reg])
375+ if (Clobbered [Reg])
328376 lastWritingInsts (Next, Reg) = {&Point};
329377
330378 ErrorOr<MCPhysReg> AutReg = BC.MIB ->getAuthenticatedReg (Point);
331379 if (AutReg && *AutReg != BC.MIB ->getNoRegister ()) {
332- // FIXME: should we use `OnlySmaller=false` below? See similar
333- // FIXME about `getWrittenRegs` above and further discussion about this
334- // at
335- // https://github.com/llvm/llvm-project/pull/122304#discussion_r1939515516
336- Next.NonAutClobRegs .reset (
337- BC.MIB ->getAliases (*AutReg, /* OnlySmaller=*/ true ));
338- if (RegsToTrackInstsFor.isTracked (*AutReg))
339- lastWritingInsts (Next, *AutReg).clear ();
380+ // The sub-registers of *AutReg are also trusted now, but not its
381+ // super-registers (as they retain untrusted register units).
382+ BitVector AuthenticatedSubregs =
383+ BC.MIB ->getAliases (*AutReg, /* OnlySmaller=*/ true );
384+ for (MCPhysReg Reg : AuthenticatedSubregs.set_bits ()) {
385+ Next.SafeToDerefRegs .set (Reg);
386+ if (RegsToTrackInstsFor.isTracked (Reg))
387+ lastWritingInsts (Next, Reg).clear ();
388+ }
340389 }
341390
342391 LLVM_DEBUG ({
@@ -397,14 +446,11 @@ shouldReportReturnGadget(const BinaryContext &BC, const MCInstReference &Inst,
397446 });
398447 if (BC.MIB ->isAuthenticationOfReg (Inst, RetReg))
399448 return nullptr ;
400- BitVector UsedDirtyRegs = S.NonAutClobRegs ;
401- LLVM_DEBUG ({ traceRegMask (BC, " NonAutClobRegs at Ret" , UsedDirtyRegs); });
402- UsedDirtyRegs &= BC.MIB ->getAliases (RetReg, /* OnlySmaller=*/ true );
403- LLVM_DEBUG ({ traceRegMask (BC, " Intersection with RetReg" , UsedDirtyRegs); });
404- if (!UsedDirtyRegs.any ())
449+ LLVM_DEBUG ({ traceRegMask (BC, " SafeToDerefRegs" , S.SafeToDerefRegs ); });
450+ if (S.SafeToDerefRegs [RetReg])
405451 return nullptr ;
406452
407- return std::make_shared<GadgetReport>(RetKind, Inst, UsedDirtyRegs );
453+ return std::make_shared<GadgetReport>(RetKind, Inst, RetReg );
408454}
409455
410456FunctionAnalysisResult
@@ -425,6 +471,14 @@ Analysis::findGadgets(BinaryFunction &BF,
425471 MCInstReference Inst (&BB, I);
426472 const State &S = *PRA.getStateAt (Inst);
427473
474+ // If non-empty state was never propagated from the entry basic block
475+ // to Inst, assume it to be unreachable and report a warning.
476+ if (S.empty ()) {
477+ Result.Diagnostics .push_back (std::make_shared<GenericReport>(
478+ Inst, " Warning: unreachable instruction found" ));
479+ continue ;
480+ }
481+
428482 if (auto Report = shouldReportReturnGadget (BC, Inst, S))
429483 Result.Diagnostics .push_back (Report);
430484 }
0 commit comments