Skip to content

Commit d884b55

Browse files
authored
[BOLT] Introduce helpers to match MCInsts one at a time (NFC) (#138883)
Introduce a low-level instruction matching DSL to capture and/or match the operands of MCInst, single instruction at a time. Unlike the existing `MCPlusBuilder::MCInstMatcher` machinery, this DSL is intended for the use cases when the precise control over the instruction order is required. For example, when validating PtrAuth hardening, all registers are usually considered unsafe after a function call, even though callee-saved registers should preserve their old values _under normal operation_. Usage example: // Bring the short names into the local scope: using namespace LowLevelInstMatcherDSL; // Declare the registers to capture: Reg Xn, Xm; // Capture the 0th and 1st operands, match the 2nd operand against the // just captured Xm register, match the 3rd operand against literal 0: if (!matchInst(MaybeAdd, AArch64::ADDXrs, Xm, Xn, Xm, Imm(0)) return AArch64::NoRegister; // Match the 0th operand against Xm: if (!matchInst(MaybeBr, AArch64::BR, Xm)) return AArch64::NoRegister; // Manually check that Xm and Xn did not match the same register: if (Xm.get() == Xn.get()) return AArch64::NoRegister; // Return the matched register: return Xm.get();
1 parent 6caa0d0 commit d884b55

File tree

2 files changed

+201
-56
lines changed

2 files changed

+201
-56
lines changed

bolt/include/bolt/Core/MCInstUtils.h

Lines changed: 166 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#define BOLT_CORE_MCINSTUTILS_H
1111

1212
#include "bolt/Core/BinaryBasicBlock.h"
13+
#include "bolt/Core/MCPlus.h"
1314
#include <map>
1415
#include <variant>
1516

@@ -175,6 +176,171 @@ static inline raw_ostream &operator<<(raw_ostream &OS,
175176
return Ref.print(OS);
176177
}
177178

179+
/// Instruction-matching helpers operating on a single instruction at a time.
180+
///
181+
/// The idea is to make low-level instruction matching as readable as possible.
182+
/// The classes contained in this namespace are intended to be used as a
183+
/// domain-specific language to match MCInst with the particular opcode and
184+
/// operands.
185+
///
186+
/// The goals of this DSL include
187+
/// * matching a single instruction against the template consisting of the
188+
/// particular target-specific opcode and a pattern of operands
189+
/// * matching operands against the known values (such as 42, AArch64::X1 or
190+
/// "the value of --brk-operand=N command line argument")
191+
/// * capturing operands of an instruction ("whatever is the destination
192+
/// register of AArch64::ADDXri instruction, store it to Xd variable to be
193+
/// queried later")
194+
/// * expressing repeated operands of a single matched instruction (such as
195+
/// "ADDXri Xd, Xd, 42, 0" for an arbitrary register Xd) as well as across
196+
/// multiple calls to matchInst(), which is naturally achieved by sequentially
197+
/// capturing the operands and matching operands against the known values
198+
/// * matching multi-instruction code patterns by sequentially calling
199+
/// matchInst() while passing around already matched operands
200+
///
201+
/// The non-goals (compared to MCPlusBuilder::MCInstMatcher) include
202+
/// * matching an arbitrary tree of instructions in a single matchInst() call
203+
/// * encapsulation of target-specific knowledge ("match an increment of Xm
204+
/// by 42")
205+
///
206+
/// Unlike MCPlusBuilder::MCInstMatcher, this DSL focuses on the use cases when
207+
/// the precise control over the instruction order is important. For example,
208+
/// let's consider a target-specific function that has to match two particular
209+
/// instructions against this pattern (for two different registers Xm and Xn)
210+
///
211+
/// ADDXrs Xm, Xn, Xm, #0
212+
/// BR Xm
213+
///
214+
/// and return the register holding the branch target. Assuming the instructions
215+
/// are available as MaybeAdd and MaybeBr, the following code can be used:
216+
///
217+
/// // Bring the short names into the local scope:
218+
/// using namespace LowLevelInstMatcherDSL;
219+
/// // Declare the registers to capture:
220+
/// Reg Xn, Xm;
221+
/// // Capture the 0th and 1st operands, match the 2nd operand against the
222+
/// // just captured Xm register, match the 3rd operand against literal 0:
223+
/// if (!matchInst(MaybeAdd, AArch64::ADDXrs, Xm, Xn, Xm, Imm(0))
224+
/// return AArch64::NoRegister;
225+
/// // Match the 0th operand against Xm:
226+
/// if (!matchInst(MaybeBr, AArch64::BR, Xm))
227+
/// return AArch64::NoRegister;
228+
/// // Manually check that Xm and Xn did not match the same register:
229+
/// if (Xm.get() == Xn.get())
230+
/// return AArch64::NoRegister;
231+
/// // Return the matched register:
232+
/// return Xm.get();
233+
///
234+
namespace LowLevelInstMatcherDSL {
235+
236+
// The base class to match an operand of type T.
237+
//
238+
// The subclasses of OpMatcher are intended to be allocated on the stack and
239+
// to only be used by passing them to matchInst() and by calling their get()
240+
// function, thus the peculiar `mutable` specifiers: to make the calling code
241+
// compact and readable, the templated matchInst() function has to accept both
242+
// long-lived Imm/Reg wrappers declared as local variables (intended to capture
243+
// the first operand's value and match the subsequent operands, whether inside
244+
// a single instruction or across multiple instructions), as well as temporary
245+
// wrappers around literal values to match, f.e. Imm(42) or Reg(AArch64::XZR).
246+
template <typename T> class OpMatcher {
247+
mutable std::optional<T> Value;
248+
mutable std::optional<T> SavedValue;
249+
250+
// Remember/restore the last Value - to be called by matchInst.
251+
void remember() const { SavedValue = Value; }
252+
void restore() const { Value = SavedValue; }
253+
254+
template <class... OpMatchers>
255+
friend bool matchInst(const MCInst &, unsigned, const OpMatchers &...);
256+
257+
protected:
258+
OpMatcher(std::optional<T> ValueToMatch) : Value(ValueToMatch) {}
259+
260+
bool matchValue(T OpValue) const {
261+
// Check that OpValue does not contradict the existing Value.
262+
bool MatchResult = !Value || *Value == OpValue;
263+
// If MatchResult is false, all matchers will be reset before returning from
264+
// matchInst, including this one, thus no need to assign conditionally.
265+
Value = OpValue;
266+
267+
return MatchResult;
268+
}
269+
270+
public:
271+
/// Returns the captured value.
272+
T get() const {
273+
assert(Value.has_value());
274+
return *Value;
275+
}
276+
};
277+
278+
class Reg : public OpMatcher<MCPhysReg> {
279+
bool matches(const MCOperand &Op) const {
280+
if (!Op.isReg())
281+
return false;
282+
283+
return matchValue(Op.getReg());
284+
}
285+
286+
template <class... OpMatchers>
287+
friend bool matchInst(const MCInst &, unsigned, const OpMatchers &...);
288+
289+
public:
290+
Reg(std::optional<MCPhysReg> RegToMatch = std::nullopt)
291+
: OpMatcher<MCPhysReg>(RegToMatch) {}
292+
};
293+
294+
class Imm : public OpMatcher<int64_t> {
295+
bool matches(const MCOperand &Op) const {
296+
if (!Op.isImm())
297+
return false;
298+
299+
return matchValue(Op.getImm());
300+
}
301+
302+
template <class... OpMatchers>
303+
friend bool matchInst(const MCInst &, unsigned, const OpMatchers &...);
304+
305+
public:
306+
Imm(std::optional<int64_t> ImmToMatch = std::nullopt)
307+
: OpMatcher<int64_t>(ImmToMatch) {}
308+
};
309+
310+
/// Tries to match Inst and updates Ops on success.
311+
///
312+
/// If Inst has the specified Opcode and its operand list prefix matches Ops,
313+
/// this function returns true and updates Ops, otherwise false is returned and
314+
/// values of Ops are kept as before matchInst was called.
315+
///
316+
/// Please note that while Ops are technically passed by a const reference to
317+
/// make invocations like `matchInst(MI, Opcode, Imm(42))` possible, all their
318+
/// fields are marked mutable.
319+
template <class... OpMatchers>
320+
bool matchInst(const MCInst &Inst, unsigned Opcode, const OpMatchers &...Ops) {
321+
if (Inst.getOpcode() != Opcode)
322+
return false;
323+
assert(sizeof...(Ops) <= MCPlus::getNumPrimeOperands(Inst) &&
324+
"Too many operands are matched for the Opcode");
325+
326+
// Ask each matcher to remember its current value in case of rollback.
327+
(Ops.remember(), ...);
328+
329+
// Check if all matchers match the corresponding operands.
330+
auto It = Inst.begin();
331+
auto AllMatched = (Ops.matches(*(It++)) && ... && true);
332+
333+
// If match failed, restore the original captured values.
334+
if (!AllMatched) {
335+
(Ops.restore(), ...);
336+
return false;
337+
}
338+
339+
return true;
340+
}
341+
342+
} // namespace LowLevelInstMatcherDSL
343+
178344
} // namespace bolt
179345
} // namespace llvm
180346

bolt/lib/Target/AArch64/AArch64MCPlusBuilder.cpp

Lines changed: 35 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
#include "Utils/AArch64BaseInfo.h"
2020
#include "bolt/Core/BinaryBasicBlock.h"
2121
#include "bolt/Core/BinaryFunction.h"
22+
#include "bolt/Core/MCInstUtils.h"
2223
#include "bolt/Core/MCPlusBuilder.h"
2324
#include "llvm/BinaryFormat/ELF.h"
2425
#include "llvm/MC/MCContext.h"
@@ -401,81 +402,59 @@ class AArch64MCPlusBuilder : public MCPlusBuilder {
401402

402403
// Iterate over the instructions of BB in reverse order, matching opcodes
403404
// and operands.
404-
MCPhysReg TestedReg = 0;
405-
MCPhysReg ScratchReg = 0;
405+
406406
auto It = BB.end();
407-
auto StepAndGetOpcode = [&It, &BB]() -> int {
408-
if (It == BB.begin())
409-
return -1;
410-
--It;
411-
return It->getOpcode();
407+
auto StepBack = [&]() {
408+
while (It != BB.begin()) {
409+
--It;
410+
// Skip any CFI instructions, but no other pseudos are expected here.
411+
if (!isCFI(*It))
412+
return true;
413+
}
414+
return false;
412415
};
413-
414-
switch (StepAndGetOpcode()) {
415-
default:
416-
// Not matched the branch instruction.
416+
// Step to the last non-CFI instruction.
417+
if (!StepBack())
417418
return std::nullopt;
418-
case AArch64::Bcc:
419-
// Bcc EQ, .Lon_success
420-
if (It->getOperand(0).getImm() != AArch64CC::EQ)
421-
return std::nullopt;
422-
// Not checking .Lon_success (see above).
423419

424-
// SUBSXrs XZR, TestedReg, ScratchReg, 0 (used by "CMP reg, reg" alias)
425-
if (StepAndGetOpcode() != AArch64::SUBSXrs ||
426-
It->getOperand(0).getReg() != AArch64::XZR ||
427-
It->getOperand(3).getImm() != 0)
420+
using namespace llvm::bolt::LowLevelInstMatcherDSL;
421+
Reg TestedReg;
422+
Reg ScratchReg;
423+
424+
if (matchInst(*It, AArch64::Bcc, Imm(AArch64CC::EQ) /*, .Lon_success*/)) {
425+
if (!StepBack() || !matchInst(*It, AArch64::SUBSXrs, Reg(AArch64::XZR),
426+
TestedReg, ScratchReg, Imm(0)))
428427
return std::nullopt;
429-
TestedReg = It->getOperand(1).getReg();
430-
ScratchReg = It->getOperand(2).getReg();
431428

432429
// Either XPAC(I|D) ScratchReg, ScratchReg
433430
// or XPACLRI
434-
switch (StepAndGetOpcode()) {
435-
default:
431+
if (!StepBack())
436432
return std::nullopt;
437-
case AArch64::XPACLRI:
433+
if (matchInst(*It, AArch64::XPACLRI)) {
438434
// No operands to check, but using XPACLRI forces TestedReg to be X30.
439-
if (TestedReg != AArch64::LR)
440-
return std::nullopt;
441-
break;
442-
case AArch64::XPACI:
443-
case AArch64::XPACD:
444-
if (It->getOperand(0).getReg() != ScratchReg ||
445-
It->getOperand(1).getReg() != ScratchReg)
435+
if (TestedReg.get() != AArch64::LR)
446436
return std::nullopt;
447-
break;
437+
} else if (!matchInst(*It, AArch64::XPACI, ScratchReg, ScratchReg) &&
438+
!matchInst(*It, AArch64::XPACD, ScratchReg, ScratchReg)) {
439+
return std::nullopt;
448440
}
449441

450-
// ORRXrs ScratchReg, XZR, TestedReg, 0 (used by "MOV reg, reg" alias)
451-
if (StepAndGetOpcode() != AArch64::ORRXrs)
442+
if (!StepBack() || !matchInst(*It, AArch64::ORRXrs, ScratchReg,
443+
Reg(AArch64::XZR), TestedReg, Imm(0)))
452444
return std::nullopt;
453-
if (It->getOperand(0).getReg() != ScratchReg ||
454-
It->getOperand(1).getReg() != AArch64::XZR ||
455-
It->getOperand(2).getReg() != TestedReg ||
456-
It->getOperand(3).getImm() != 0)
457-
return std::nullopt;
458-
459-
return std::make_pair(TestedReg, &*It);
460445

461-
case AArch64::TBZX:
462-
// TBZX ScratchReg, 62, .Lon_success
463-
ScratchReg = It->getOperand(0).getReg();
464-
if (It->getOperand(1).getImm() != 62)
465-
return std::nullopt;
466-
// Not checking .Lon_success (see above).
446+
return std::make_pair(TestedReg.get(), &*It);
447+
}
467448

468-
// EORXrs ScratchReg, TestedReg, TestedReg, 1
469-
if (StepAndGetOpcode() != AArch64::EORXrs)
470-
return std::nullopt;
471-
TestedReg = It->getOperand(1).getReg();
472-
if (It->getOperand(0).getReg() != ScratchReg ||
473-
It->getOperand(2).getReg() != TestedReg ||
474-
It->getOperand(3).getImm() != 1)
449+
if (matchInst(*It, AArch64::TBZX, ScratchReg, Imm(62) /*, .Lon_success*/)) {
450+
if (!StepBack() || !matchInst(*It, AArch64::EORXrs, ScratchReg, TestedReg,
451+
TestedReg, Imm(1)))
475452
return std::nullopt;
476453

477-
return std::make_pair(TestedReg, &*It);
454+
return std::make_pair(TestedReg.get(), &*It);
478455
}
456+
457+
return std::nullopt;
479458
}
480459

481460
std::optional<MCPhysReg> getAuthCheckedReg(const MCInst &Inst,

0 commit comments

Comments
 (0)