Skip to content

trace: incorporate events #4456

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jul 16, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#![feature(unqualified_local_imports)]
#![feature(derive_coerce_pointee)]
#![feature(arbitrary_self_types)]
#![feature(iter_advance_by)]
// Configure clippy and other lints
#![allow(
clippy::collapsible_else_if,
Expand Down
123 changes: 105 additions & 18 deletions src/shims/native_lib/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,18 +34,25 @@ pub struct MemEvents {
/// A single memory access.
#[allow(dead_code)]
#[cfg_attr(target_os = "linux", derive(serde::Serialize, serde::Deserialize))]
#[derive(Debug)]
#[derive(Clone, Debug)]
pub enum AccessEvent {
/// A read may have occurred on this memory range.
/// Some instructions *may* read memory without *always* doing that,
/// so this can be an over-approximation.
/// The range info, however, is reliable if the access did happen.
/// A read occurred on this memory range.
Read(AccessRange),
/// A read may have occurred on this memory range.
/// A write may have occurred on this memory range.
/// Some instructions *may* write memory without *always* doing that,
/// so this can be an over-approximation.
/// The range info, however, is reliable if the access did happen.
Write(AccessRange),
/// If the second field is true, the access definitely happened.
Write(AccessRange, bool),
}

impl AccessEvent {
fn get_range(&self) -> AccessRange {
match self {
AccessEvent::Read(access_range) => access_range.clone(),
AccessEvent::Write(access_range, _) => access_range.clone(),
}
}
}

/// The memory touched by a given access.
Expand All @@ -59,6 +66,12 @@ pub struct AccessRange {
pub size: usize,
}

impl AccessRange {
fn end(&self) -> usize {
self.addr.strict_add(self.size)
}
}

impl<'tcx> EvalContextExtPriv<'tcx> for crate::MiriInterpCx<'tcx> {}
trait EvalContextExtPriv<'tcx>: crate::MiriInterpCxExt<'tcx> {
/// Call native host function and return the output as an immediate.
Expand Down Expand Up @@ -196,6 +209,73 @@ trait EvalContextExtPriv<'tcx>: crate::MiriInterpCxExt<'tcx> {
}
None
}

/// Applies the `events` to Miri's internal state. The event vector must be
/// ordered sequentially by when the accesses happened, and the sizes are
/// assumed to be exact.
fn tracing_apply_accesses(&mut self, events: MemEvents) -> InterpResult<'tcx> {
let this = self.eval_context_mut();

for evt in events.acc_events {
let evt_rg = evt.get_range();
// LLVM at least permits vectorising accesses to adjacent allocations,
// so we cannot assume 1 access = 1 allocation. :(
let mut rg = evt_rg.addr..evt_rg.end();
while let Some(curr) = rg.next() {
let Some(alloc_id) = this.alloc_id_from_addr(
curr.to_u64(),
rg.len().try_into().unwrap(),
/* only_exposed_allocations */ true,
) else {
throw_ub_format!("Foreign code did an out-of-bounds access!")
};
let alloc = this.get_alloc_raw(alloc_id)?;
// The logical and physical address of the allocation coincide, so we can use
// this instead of `addr_from_alloc_id`.
let alloc_addr = alloc.get_bytes_unchecked_raw().addr();

// Determine the range inside the allocation that this access covers. This range is
// in terms of offsets from the start of `alloc`. The start of the overlap range
// will be `curr`; the end will be the minimum of the end of the allocation and the
// end of the access' range.
let overlap = curr.strict_sub(alloc_addr)
..std::cmp::min(alloc.len(), rg.end.strict_sub(alloc_addr));
// Skip forward however many bytes of the access are contained in the current
// allocation, subtracting 1 since the overlap range includes the current addr
// that was already popped off of the range.
rg.advance_by(overlap.len().strict_sub(1)).unwrap();

match evt {
AccessEvent::Read(_) => {
// FIXME: ProvenanceMap should have something like get_range().
let p_map = alloc.provenance();
for idx in overlap {
// If a provenance was read by the foreign code, expose it.
if let Some(prov) = p_map.get(Size::from_bytes(idx), this) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please add a FIXME saying that we should really have a get_range on ProvenanceMap.

(Here's a next PR for you if you are interested. ;)

this.expose_provenance(prov)?;
}
}
}
AccessEvent::Write(_, certain) => {
// Sometimes we aren't certain if a write happened, in which case we
// only initialise that data if the allocation is mutable.
if certain || alloc.mutability.is_mut() {
let (alloc, cx) = this.get_alloc_raw_mut(alloc_id)?;
alloc.process_native_write(
&cx.tcx,
Some(AllocRange {
start: Size::from_bytes(overlap.start),
size: Size::from_bytes(overlap.len()),
}),
)
}
}
}
}
}

interp_ok(())
}
}

impl<'tcx> EvalContextExt<'tcx> for crate::MiriInterpCx<'tcx> {}
Expand All @@ -221,6 +301,9 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
}
};

// Do we have ptrace?
let tracing = trace::Supervisor::is_enabled();

// Get the function arguments, and convert them to `libffi`-compatible form.
let mut libffi_args = Vec::<CArg>::with_capacity(args.len());
for arg in args.iter() {
Expand All @@ -240,9 +323,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
// The first time this happens, print a warning.
if !this.machine.native_call_mem_warned.replace(true) {
// Newly set, so first time we get here.
this.emit_diagnostic(NonHaltingDiagnostic::NativeCallSharedMem {
tracing: self::trace::Supervisor::is_enabled(),
});
this.emit_diagnostic(NonHaltingDiagnostic::NativeCallSharedMem { tracing });
}

this.expose_provenance(prov)?;
Expand All @@ -269,15 +350,23 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
// be read by FFI. The `black_box` is defensive programming as LLVM likes
// to (incorrectly) optimize away ptr2int casts whose result is unused.
std::hint::black_box(alloc.get_bytes_unchecked_raw().expose_provenance());
// Expose all provenances in this allocation, since the native code can do $whatever.
for prov in alloc.provenance().provenances() {
this.expose_provenance(prov)?;

if !tracing {
// Expose all provenances in this allocation, since the native code can do $whatever.
// Can be skipped when tracing; in that case we'll expose just the actually-read parts later.
for prov in alloc.provenance().provenances() {
this.expose_provenance(prov)?;
}
}

// Prepare for possible write from native code if mutable.
if info.mutbl.is_mut() {
let (alloc, cx) = this.get_alloc_raw_mut(alloc_id)?;
alloc.process_native_write(&cx.tcx, None);
// These writes could initialize everything and wreck havoc with the pointers.
// We can skip that when tracing; in that case we'll later do that only for the memory that got actually written.
if !tracing {
alloc.process_native_write(&cx.tcx, None);
}
// Also expose *mutable* provenance for the interpreter-level allocation.
std::hint::black_box(alloc.get_bytes_unchecked_raw_mut().expose_provenance());
}
Expand All @@ -289,10 +378,8 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
let (ret, maybe_memevents) =
this.call_native_with_args(link_name, dest, code_ptr, libffi_args)?;

if cfg!(target_os = "linux")
&& let Some(events) = maybe_memevents
{
trace!("Registered FFI events:\n{events:#0x?}");
if tracing {
this.tracing_apply_accesses(maybe_memevents.unwrap())?;
}

this.write_immediate(*ret, dest)?;
Expand Down
23 changes: 16 additions & 7 deletions src/shims/native_lib/trace/parent.rs
Original file line number Diff line number Diff line change
Expand Up @@ -58,11 +58,11 @@ impl ArchIndependentRegs for libc::user_regs_struct {
#[rustfmt::skip]
impl ArchIndependentRegs for libc::user_regs_struct {
#[inline]
fn ip(&self) -> usize { self.eip.try_into().unwrap() }
fn ip(&self) -> usize { self.eip.cast_unsigned().try_into().unwrap() }
#[inline]
fn set_ip(&mut self, ip: usize) { self.eip = ip.try_into().unwrap() }
fn set_ip(&mut self, ip: usize) { self.eip = ip.cast_signed().try_into().unwrap() }
#[inline]
fn set_sp(&mut self, sp: usize) { self.esp = sp.try_into().unwrap() }
fn set_sp(&mut self, sp: usize) { self.esp = sp.cast_signed().try_into().unwrap() }
}

/// A unified event representing something happening on the child process. Wraps
Expand Down Expand Up @@ -386,7 +386,17 @@ fn capstone_find_events(
acc_events.push(AccessEvent::Read(push.clone()));
}
if acc_ty.is_writable() {
acc_events.push(AccessEvent::Write(push));
// FIXME: This could be made certain; either determine all cases where
// only reads happen, or have an intermediate mempr_* function to first
// map the page(s) as readonly and check if a segfault occurred.

// Per https://docs.rs/iced-x86/latest/iced_x86/enum.OpAccess.html,
// we know that the possible access types are Read, CondRead, Write,
// CondWrite, ReadWrite, and ReadCondWrite. Since we got a segfault
// we know some kind of access happened so Cond{Read, Write}s are
// certain reads and writes; the only uncertainty is with an RW op
// as it might be a ReadCondWrite with the write condition unmet.
acc_events.push(AccessEvent::Write(push, !acc_ty.is_readable()));
}

return true;
Expand Down Expand Up @@ -442,8 +452,7 @@ fn handle_segfault(
// Get information on what caused the segfault. This contains the address
// that triggered it.
let siginfo = ptrace::getsiginfo(pid).unwrap();
// All x86, ARM, etc. instructions only have at most one memory operand
// (thankfully!)
// All x86 instructions only have at most one memory operand (thankfully!)
// SAFETY: si_addr is safe to call.
let addr = unsafe { siginfo.si_addr().addr() };
let page_addr = addr.strict_sub(addr.strict_rem(page_size));
Expand Down Expand Up @@ -490,7 +499,7 @@ fn handle_segfault(
ptrace::write(
pid,
(&raw const PAGE_ADDR).cast_mut().cast(),
libc::c_long::try_from(page_addr).unwrap(),
libc::c_long::try_from(page_addr.cast_signed()).unwrap(),
)
.unwrap();

Expand Down
25 changes: 25 additions & 0 deletions tests/native-lib/fail/tracing/partial_init.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
//@only-target: x86_64-unknown-linux-gnu i686-unknown-linux-gnu
//@compile-flags: -Zmiri-native-lib-enable-tracing

extern "C" {
fn init_n(n: i32, ptr: *mut u8);
}

fn main() {
partial_init();
}

// Initialise the first 2 elements of the slice from native code, and check
// that the 3rd is correctly deemed uninit.
fn partial_init() {
let mut slice = std::mem::MaybeUninit::<[u8; 3]>::uninit();
let slice_ptr = slice.as_mut_ptr().cast::<u8>();
unsafe {
// Initialize the first two elements.
init_n(2, slice_ptr);
assert!(*slice_ptr == 0);
assert!(*slice_ptr.offset(1) == 0);
// Reading the third is UB!
let _val = *slice_ptr.offset(2); //~ ERROR: Undefined Behavior: using uninitialized data
}
}
39 changes: 39 additions & 0 deletions tests/native-lib/fail/tracing/partial_init.stderr
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
warning: sharing memory with a native function called via FFI
--> tests/native-lib/fail/tracing/partial_init.rs:LL:CC
|
LL | init_n(2, slice_ptr);
| ^^^^^^^^^^^^^^^^^^^^ sharing memory with a native function
|
= help: when memory is shared with a native function call, Miri can only track initialisation and provenance on a best-effort basis
= help: in particular, Miri assumes that the native call initializes all memory it has written to
= help: Miri also assumes that any part of this memory may be a pointer that is permitted to point to arbitrary exposed memory
= help: what this means is that Miri will easily miss Undefined Behavior related to incorrect usage of this shared memory, so you should not take a clean Miri run as a signal that your FFI code is UB-free
= help: tracing memory accesses in native code is not yet fully implemented, so there can be further imprecisions beyond what is documented here
= note: BACKTRACE:
= note: inside `partial_init` at tests/native-lib/fail/tracing/partial_init.rs:LL:CC
note: inside `main`
--> tests/native-lib/fail/tracing/partial_init.rs:LL:CC
|
LL | partial_init();
| ^^^^^^^^^^^^^^

error: Undefined Behavior: using uninitialized data, but this operation requires initialized memory
--> tests/native-lib/fail/tracing/partial_init.rs:LL:CC
|
LL | let _val = *slice_ptr.offset(2);
| ^^^^^^^^^^^^^^^^^^^^ Undefined Behavior occurred here
|
= help: this indicates a bug in the program: it performed an invalid operation, and caused Undefined Behavior
= help: see https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html for further information
= note: BACKTRACE:
= note: inside `partial_init` at tests/native-lib/fail/tracing/partial_init.rs:LL:CC
note: inside `main`
--> tests/native-lib/fail/tracing/partial_init.rs:LL:CC
|
LL | partial_init();
| ^^^^^^^^^^^^^^

note: some details are omitted, run with `MIRIFLAGS=-Zmiri-backtrace=full` for a verbose backtrace

error: aborting due to 1 previous error; 1 warning emitted

29 changes: 29 additions & 0 deletions tests/native-lib/fail/tracing/unexposed_reachable_alloc.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
//@only-target: x86_64-unknown-linux-gnu i686-unknown-linux-gnu
//@compile-flags: -Zmiri-permissive-provenance -Zmiri-native-lib-enable-tracing

extern "C" {
fn do_one_deref(ptr: *const *const *const i32) -> usize;
}

fn main() {
unexposed_reachable_alloc();
}

// Expose 2 pointers by virtue of doing a native read and assert that the 3rd in
// the chain remains properly unexposed.
fn unexposed_reachable_alloc() {
let inner = 42;
let intermediate_a = &raw const inner;
let intermediate_b = &raw const intermediate_a;
let exposed = &raw const intermediate_b;
// Discard the return value; it's just there so the access in C doesn't get optimised away.
unsafe { do_one_deref(exposed) };
// Native read should have exposed the address of intermediate_b...
let valid: *const i32 = std::ptr::with_exposed_provenance(intermediate_b.addr());
// but not of intermediate_a.
let invalid: *const i32 = std::ptr::with_exposed_provenance(intermediate_a.addr());
unsafe {
let _ok = *valid;
let _not_ok = *invalid; //~ ERROR: Undefined Behavior: memory access failed: attempting to access
}
}
39 changes: 39 additions & 0 deletions tests/native-lib/fail/tracing/unexposed_reachable_alloc.stderr
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
warning: sharing memory with a native function called via FFI
--> tests/native-lib/fail/tracing/unexposed_reachable_alloc.rs:LL:CC
|
LL | unsafe { do_one_deref(exposed) };
| ^^^^^^^^^^^^^^^^^^^^^ sharing memory with a native function
|
= help: when memory is shared with a native function call, Miri can only track initialisation and provenance on a best-effort basis
= help: in particular, Miri assumes that the native call initializes all memory it has written to
= help: Miri also assumes that any part of this memory may be a pointer that is permitted to point to arbitrary exposed memory
= help: what this means is that Miri will easily miss Undefined Behavior related to incorrect usage of this shared memory, so you should not take a clean Miri run as a signal that your FFI code is UB-free
= help: tracing memory accesses in native code is not yet fully implemented, so there can be further imprecisions beyond what is documented here
= note: BACKTRACE:
= note: inside `unexposed_reachable_alloc` at tests/native-lib/fail/tracing/unexposed_reachable_alloc.rs:LL:CC
note: inside `main`
--> tests/native-lib/fail/tracing/unexposed_reachable_alloc.rs:LL:CC
|
LL | unexposed_reachable_alloc();
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^

error: Undefined Behavior: memory access failed: attempting to access 4 bytes, but got $HEX[noalloc] which is a dangling pointer (it has no provenance)
--> tests/native-lib/fail/tracing/unexposed_reachable_alloc.rs:LL:CC
|
LL | let _not_ok = *invalid;
| ^^^^^^^^ Undefined Behavior occurred here
|
= help: this indicates a bug in the program: it performed an invalid operation, and caused Undefined Behavior
= help: see https://doc.rust-lang.org/nightly/reference/behavior-considered-undefined.html for further information
= note: BACKTRACE:
= note: inside `unexposed_reachable_alloc` at tests/native-lib/fail/tracing/unexposed_reachable_alloc.rs:LL:CC
note: inside `main`
--> tests/native-lib/fail/tracing/unexposed_reachable_alloc.rs:LL:CC
|
LL | unexposed_reachable_alloc();
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^

note: some details are omitted, run with `MIRIFLAGS=-Zmiri-backtrace=full` for a verbose backtrace

error: aborting due to 1 previous error; 1 warning emitted

4 changes: 4 additions & 0 deletions tests/native-lib/pass/ptr_read_access.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
//@revisions: trace notrace
//@[trace] only-target: x86_64-unknown-linux-gnu i686-unknown-linux-gnu
//@[trace] compile-flags: -Zmiri-native-lib-enable-tracing

fn main() {
test_access_pointer();
test_access_simple();
Expand Down
Loading
Loading