diff --git a/Cargo.toml b/Cargo.toml index f5d6119c..1bbfe3e1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -130,3 +130,8 @@ edition = '2018' name = "concurrent-panics" required-features = ["std"] harness = false + +[[test]] +name = "current-exe-mismatch" +required-features = ["std"] +harness = false diff --git a/src/symbolize/gimli.rs b/src/symbolize/gimli.rs index 5f10122d..cd4cec58 100644 --- a/src/symbolize/gimli.rs +++ b/src/symbolize/gimli.rs @@ -184,6 +184,8 @@ cfg_if::cfg_if! { ))] { mod libs_dl_iterate_phdr; use libs_dl_iterate_phdr::native_libraries; + #[path = "gimli/parse_running_mmaps_unix.rs"] + mod parse_running_mmaps; } else if #[cfg(target_env = "libnx")] { mod libs_libnx; use libs_libnx::native_libraries; diff --git a/src/symbolize/gimli/libs_dl_iterate_phdr.rs b/src/symbolize/gimli/libs_dl_iterate_phdr.rs index a011e608..9f0304ce 100644 --- a/src/symbolize/gimli/libs_dl_iterate_phdr.rs +++ b/src/symbolize/gimli/libs_dl_iterate_phdr.rs @@ -17,6 +17,20 @@ pub(super) fn native_libraries() -> Vec { return ret; } +fn infer_current_exe(base_addr: usize) -> OsString { + if let Ok(entries) = super::parse_running_mmaps::parse_maps() { + let opt_path = entries + .iter() + .find(|e| e.ip_matches(base_addr) && e.pathname().len() > 0) + .map(|e| e.pathname()) + .cloned(); + if let Some(path) = opt_path { + return path; + } + } + env::current_exe().map(|e| e.into()).unwrap_or_default() +} + // `info` should be a valid pointers. // `vec` should be a valid pointer to a `std::Vec`. unsafe extern "C" fn callback( @@ -28,8 +42,12 @@ unsafe extern "C" fn callback( let libs = &mut *(vec as *mut Vec); let is_main_prog = info.dlpi_name.is_null() || *info.dlpi_name == 0; let name = if is_main_prog { + // The man page for dl_iterate_phdr says that the first object visited by + // callback is the main program; so the first time we encounter a + // nameless entry, we can assume its the main program and try to infer its path. + // After that, we cannot continue that assumption, and we use an empty string. if libs.is_empty() { - env::current_exe().map(|e| e.into()).unwrap_or_default() + infer_current_exe(info.dlpi_addr as usize) } else { OsString::new() } diff --git a/src/symbolize/gimli/parse_running_mmaps_unix.rs b/src/symbolize/gimli/parse_running_mmaps_unix.rs new file mode 100644 index 00000000..a196ffcf --- /dev/null +++ b/src/symbolize/gimli/parse_running_mmaps_unix.rs @@ -0,0 +1,242 @@ +// Note: This file is only currently used on targets that call out to the code +// in `mod libs_dl_iterate_phdr` (e.g. linux, freebsd, ...); it may be more +// general purpose, but it hasn't been tested elsewhere. + +use super::mystd::fs::File; +use super::mystd::io::Read; +use super::mystd::str::FromStr; +use super::{OsString, String, Vec}; + +#[derive(PartialEq, Eq, Debug)] +pub(super) struct MapsEntry { + /// start (inclusive) and limit (exclusive) of address range. + address: (usize, usize), + /// The perms field are the permissions for the entry + /// + /// r = read + /// w = write + /// x = execute + /// s = shared + /// p = private (copy on write) + perms: [char; 4], + /// Offset into the file (or "whatever"). + offset: usize, + /// device (major, minor) + dev: (usize, usize), + /// inode on the device. 0 indicates that no inode is associated with the memory region (e.g. uninitalized data aka BSS). + inode: usize, + /// Usually the file backing the mapping. + /// + /// Note: The man page for proc includes a note about "coordination" by + /// using readelf to see the Offset field in ELF program headers. pnkfelix + /// is not yet sure if that is intended to be a comment on pathname, or what + /// form/purpose such coordination is meant to have. + /// + /// There are also some pseudo-paths: + /// "[stack]": The initial process's (aka main thread's) stack. + /// "[stack:]": a specific thread's stack. (This was only present for a limited range of Linux verisons; it was determined to be too expensive to provide.) + /// "[vdso]": Virtual dynamically linked shared object + /// "[heap]": The process's heap + /// + /// The pathname can be blank, which means it is an anonymous mapping + /// obtained via mmap. + /// + /// Newlines in pathname are replaced with an octal escape sequence. + /// + /// The pathname may have "(deleted)" appended onto it if the file-backed + /// path has been deleted. + /// + /// Note that modifications like the latter two indicated above imply that + /// in general the pathname may be ambiguous. (I.e. you cannot tell if the + /// denoted filename actually ended with the text "(deleted)", or if that + /// was added by the maps rendering. + pathname: OsString, +} + +pub(super) fn parse_maps() -> Result, &'static str> { + let mut v = Vec::new(); + let mut proc_self_maps = + File::open("/proc/self/maps").map_err(|_| "Couldn't open /proc/self/maps")?; + let mut buf = String::new(); + let _bytes_read = proc_self_maps + .read_to_string(&mut buf) + .map_err(|_| "Couldn't read /proc/self/maps")?; + for line in buf.lines() { + v.push(line.parse()?); + } + + Ok(v) +} + +impl MapsEntry { + pub(super) fn pathname(&self) -> &OsString { + &self.pathname + } + + pub(super) fn ip_matches(&self, ip: usize) -> bool { + self.address.0 <= ip && ip < self.address.1 + } +} + +impl FromStr for MapsEntry { + type Err = &'static str; + + // Format: address perms offset dev inode pathname + // e.g.: "ffffffffff600000-ffffffffff601000 --xp 00000000 00:00 0 [vsyscall]" + // e.g.: "7f5985f46000-7f5985f48000 rw-p 00039000 103:06 76021795 /usr/lib/x86_64-linux-gnu/ld-linux-x86-64.so.2" + // e.g.: "35b1a21000-35b1a22000 rw-p 00000000 00:00 0" + fn from_str(s: &str) -> Result { + let mut parts = s + .split(' ') // space-separated fields + .filter(|s| s.len() > 0); // multiple spaces implies empty strings that need to be skipped. + let range_str = parts.next().ok_or("Couldn't find address")?; + let perms_str = parts.next().ok_or("Couldn't find permissions")?; + let offset_str = parts.next().ok_or("Couldn't find offset")?; + let dev_str = parts.next().ok_or("Couldn't find dev")?; + let inode_str = parts.next().ok_or("Couldn't find inode")?; + let pathname_str = parts.next().unwrap_or(""); // pathname may be omitted. + + let hex = |s| usize::from_str_radix(s, 16).map_err(|_| "Couldn't parse hex number"); + let address = { + // This could use `range_str.split_once('-')` once the MSRV passes 1.52. + if let Some(idx) = range_str.find('-') { + let (start, rest) = range_str.split_at(idx); + let (_div, limit) = rest.split_at(1); + (hex(start)?, hex(limit)?) + } else { + return Err("Couldn't parse address range"); + } + }; + let perms: [char; 4] = { + let mut chars = perms_str.chars(); + let mut c = || chars.next().ok_or("insufficient perms"); + let perms = [c()?, c()?, c()?, c()?]; + if chars.next().is_some() { + return Err("too many perms"); + } + perms + }; + let offset = hex(offset_str)?; + let dev = { + // This could use `dev_str.split_once(':')` once the MSRV passes 1.52. + if let Some(idx) = dev_str.find(':') { + let (major, rest) = dev_str.split_at(idx); + let (_div, minor) = rest.split_at(1); + (hex(major)?, hex(minor)?) + } else { + return Err("Couldn't parse dev")?; + } + }; + let inode = hex(inode_str)?; + let pathname = pathname_str.into(); + + Ok(MapsEntry { + address, + perms, + offset, + dev, + inode, + pathname, + }) + } +} + +// Make sure we can parse 64-bit sample output if we're on a 64-bit target. +#[cfg(target_pointer_width = "64")] +#[test] +fn check_maps_entry_parsing_64bit() { + assert_eq!( + "ffffffffff600000-ffffffffff601000 --xp 00000000 00:00 0 \ + [vsyscall]" + .parse::() + .unwrap(), + MapsEntry { + address: (0xffffffffff600000, 0xffffffffff601000), + perms: ['-', '-', 'x', 'p'], + offset: 0x00000000, + dev: (0x00, 0x00), + inode: 0x0, + pathname: "[vsyscall]".into(), + } + ); + + assert_eq!( + "7f5985f46000-7f5985f48000 rw-p 00039000 103:06 76021795 \ + /usr/lib/x86_64-linux-gnu/ld-linux-x86-64.so.2" + .parse::() + .unwrap(), + MapsEntry { + address: (0x7f5985f46000, 0x7f5985f48000), + perms: ['r', 'w', '-', 'p'], + offset: 0x00039000, + dev: (0x103, 0x06), + inode: 0x76021795, + pathname: "/usr/lib/x86_64-linux-gnu/ld-linux-x86-64.so.2".into(), + } + ); + assert_eq!( + "35b1a21000-35b1a22000 rw-p 00000000 00:00 0" + .parse::() + .unwrap(), + MapsEntry { + address: (0x35b1a21000, 0x35b1a22000), + perms: ['r', 'w', '-', 'p'], + offset: 0x00000000, + dev: (0x00, 0x00), + inode: 0x0, + pathname: Default::default(), + } + ); +} + +// (This output was taken from a 32-bit machine, but will work on any target) +#[test] +fn check_maps_entry_parsing_32bit() { + /* Example snippet of output: + 08056000-08077000 rw-p 00000000 00:00 0 [heap] + b7c79000-b7e02000 r--p 00000000 08:01 60662705 /usr/lib/locale/locale-archive + b7e02000-b7e03000 rw-p 00000000 00:00 0 + */ + assert_eq!( + "08056000-08077000 rw-p 00000000 00:00 0 \ + [heap]" + .parse::() + .unwrap(), + MapsEntry { + address: (0x08056000, 0x08077000), + perms: ['r', 'w', '-', 'p'], + offset: 0x00000000, + dev: (0x00, 0x00), + inode: 0x0, + pathname: "[heap]".into(), + } + ); + + assert_eq!( + "b7c79000-b7e02000 r--p 00000000 08:01 60662705 \ + /usr/lib/locale/locale-archive" + .parse::() + .unwrap(), + MapsEntry { + address: (0xb7c79000, 0xb7e02000), + perms: ['r', '-', '-', 'p'], + offset: 0x00000000, + dev: (0x08, 0x01), + inode: 0x60662705, + pathname: "/usr/lib/locale/locale-archive".into(), + } + ); + assert_eq!( + "b7e02000-b7e03000 rw-p 00000000 00:00 0" + .parse::() + .unwrap(), + MapsEntry { + address: (0xb7e02000, 0xb7e03000), + perms: ['r', 'w', '-', 'p'], + offset: 0x00000000, + dev: (0x00, 0x00), + inode: 0x0, + pathname: Default::default(), + } + ); +} diff --git a/tests/common/mod.rs b/tests/common/mod.rs new file mode 100644 index 00000000..3c07934f --- /dev/null +++ b/tests/common/mod.rs @@ -0,0 +1,14 @@ +/// Some tests only make sense in contexts where they can re-exec the test +/// itself. Not all contexts support this, so you can call this method to find +/// out which case you are in. +pub fn cannot_reexec_the_test() -> bool { + // These run in docker containers on CI where they can't re-exec the test, + // so just skip these for CI. No other reason this can't run on those + // platforms though. + // Miri does not have support for re-execing a file + cfg!(unix) + && (cfg!(target_arch = "arm") + || cfg!(target_arch = "aarch64") + || cfg!(target_arch = "s390x")) + || cfg!(miri) +} diff --git a/tests/concurrent-panics.rs b/tests/concurrent-panics.rs index 470245cc..a44a2677 100644 --- a/tests/concurrent-panics.rs +++ b/tests/concurrent-panics.rs @@ -9,17 +9,11 @@ const PANICS: usize = 100; const THREADS: usize = 8; const VAR: &str = "__THE_TEST_YOU_ARE_LUKE"; +mod common; + fn main() { - // These run in docker containers on CI where they can't re-exec the test, - // so just skip these for CI. No other reason this can't run on those - // platforms though. - // Miri does not have support for re-execing a file - if cfg!(unix) - && (cfg!(target_arch = "arm") - || cfg!(target_arch = "aarch64") - || cfg!(target_arch = "s390x")) - || cfg!(miri) - { + // If we cannot re-exec this test, there's no point in trying to do it. + if common::cannot_reexec_the_test() { println!("test result: ok"); return; } diff --git a/tests/current-exe-mismatch.rs b/tests/current-exe-mismatch.rs new file mode 100644 index 00000000..21c67bcb --- /dev/null +++ b/tests/current-exe-mismatch.rs @@ -0,0 +1,137 @@ +// rust-lang/rust#101913: when you run your program explicitly via `ld.so`, +// `std::env::current_exe` will return the path of *that* program, and not +// the Rust program itself. + +use std::io::{BufRead, BufReader}; +use std::path::{Path, PathBuf}; +use std::process::Command; + +mod common; + +fn main() { + if std::env::var(VAR).is_err() { + // the parent waits for the child; then we then handle either printing + // "test result: ok", "test result: ignored", or panicking. + match parent() { + Ok(()) => { + println!("test result: ok"); + } + Err(EarlyExit::IgnoreTest(_)) => { + println!("test result: ignored"); + } + Err(EarlyExit::IoError(e)) => { + println!("{} parent encoutered IoError: {:?}", file!(), e); + panic!(); + } + } + } else { + // println!("{} running child", file!()); + child().unwrap(); + } +} + +const VAR: &str = "__THE_TEST_YOU_ARE_LUKE"; + +#[derive(Debug)] +enum EarlyExit { + IgnoreTest(String), + IoError(std::io::Error), +} + +impl From for EarlyExit { + fn from(e: std::io::Error) -> Self { + EarlyExit::IoError(e) + } +} + +fn parent() -> Result<(), EarlyExit> { + // If we cannot re-exec this test, there's no point in trying to do it. + if common::cannot_reexec_the_test() { + return Err(EarlyExit::IgnoreTest("(cannot reexec)".into())); + } + + let me = std::env::current_exe().unwrap(); + let ld_so = find_interpreter(&me)?; + + // use interp to invoke current exe, yielding child test. + // + // (if you're curious what you might compare this against, you can try + // swapping in the below definition for `result`, which is the easy case of + // not using the ld.so interpreter directly that Rust handled fine even + // prior to resolution of rust-lang/rust#101913.) + // + // let result = Command::new(me).env(VAR, "1").output()?; + let result = Command::new(ld_so).env(VAR, "1").arg(&me).output().unwrap(); + + if result.status.success() { + return Ok(()); + } + println!("stdout:\n{}", String::from_utf8_lossy(&result.stdout)); + println!("stderr:\n{}", String::from_utf8_lossy(&result.stderr)); + println!("code: {}", result.status); + panic!(); +} + +fn child() -> Result<(), EarlyExit> { + let bt = backtrace::Backtrace::new(); + println!("{:?}", bt); + + let mut found_my_name = false; + + let my_filename = file!(); + 'frames: for frame in bt.frames() { + let symbols = frame.symbols(); + if symbols.is_empty() { + continue; + } + + for sym in symbols { + if let Some(filename) = sym.filename() { + if filename.ends_with(my_filename) { + // huzzah! + found_my_name = true; + break 'frames; + } + } + } + } + + assert!(found_my_name); + + Ok(()) +} + +// we use the `readelf` command to extract the path to the interpreter requested +// by our binary. +// +// if we cannot `readelf` for some reason, or if we fail to parse its output, +// then we will just give up on this test (and not treat it as a test failure). +fn find_interpreter(me: &Path) -> Result { + let result = Command::new("readelf") + .arg("-l") + .arg(me) + .output() + .map_err(|_err| EarlyExit::IgnoreTest("readelf invocation failed".into()))?; + if result.status.success() { + let r = BufReader::new(&result.stdout[..]); + for line in r.lines() { + let line = line?; + let line = line.trim(); + let prefix = "[Requesting program interpreter: "; + // This could use `line.split_once` and `suffix.rsplit_once` once the MSRV passes 1.52 + if let Some(idx) = line.find(prefix) { + let (_, suffix) = line.split_at(idx + prefix.len()); + if let Some(idx) = suffix.rfind("]") { + let (found_path, _ignore_remainder) = suffix.split_at(idx); + return Ok(found_path.into()); + } + } + } + + Err(EarlyExit::IgnoreTest( + "could not find interpreter from readelf output".into(), + )) + } else { + Err(EarlyExit::IgnoreTest("readelf returned non-success".into())) + } +}