From 82f5cdf33e15bf08548ec8b9f13bb4cad8ac5ae4 Mon Sep 17 00:00:00 2001 From: joboet Date: Sat, 30 Aug 2025 00:21:38 +0200 Subject: [PATCH 1/2] std: improve the `dlsym!` macro and add a test for it * Ensure nul-termination of the symbol name at compile-time * Use an acquire load instead of a relaxed load and acquire fence * Properly use `unsafe` and add safety comments * Add tests --- library/std/src/lib.rs | 1 + library/std/src/sys/pal/unix/weak.rs | 120 ++++++++++++--------- library/std/src/sys/pal/unix/weak/tests.rs | 32 ++++++ 3 files changed, 102 insertions(+), 51 deletions(-) create mode 100644 library/std/src/sys/pal/unix/weak/tests.rs diff --git a/library/std/src/lib.rs b/library/std/src/lib.rs index 30a1b10881784..69f640a68551f 100644 --- a/library/std/src/lib.rs +++ b/library/std/src/lib.rs @@ -350,6 +350,7 @@ #![feature(float_gamma)] #![feature(float_minimum_maximum)] #![feature(fmt_internals)] +#![feature(fn_ptr_trait)] #![feature(generic_atomic)] #![feature(hasher_prefixfree_extras)] #![feature(hashmap_internals)] diff --git a/library/std/src/sys/pal/unix/weak.rs b/library/std/src/sys/pal/unix/weak.rs index c8cf75b876c26..ad649fdbab6c6 100644 --- a/library/std/src/sys/pal/unix/weak.rs +++ b/library/std/src/sys/pal/unix/weak.rs @@ -22,11 +22,14 @@ #![allow(dead_code, unused_macros)] #![forbid(unsafe_op_in_unsafe_fn)] -use crate::ffi::CStr; -use crate::marker::PhantomData; -use crate::sync::atomic::{self, Atomic, AtomicPtr, Ordering}; +use crate::ffi::{CStr, c_char, c_void}; +use crate::marker::{FnPtr, PhantomData}; +use crate::sync::atomic::{Atomic, AtomicPtr, Ordering}; use crate::{mem, ptr}; +#[cfg(test)] +mod tests; + // We can use true weak linkage on ELF targets. #[cfg(all(unix, not(target_vendor = "apple")))] pub(crate) macro weak { @@ -64,7 +67,7 @@ impl ExternWeak { pub(crate) macro dlsym { (fn $name:ident($($param:ident : $t:ty),* $(,)?) -> $ret:ty;) => ( - dlsym!( + dlsym!( #[link_name = stringify!($name)] fn $name($($param : $t),*) -> $ret; ); @@ -73,21 +76,39 @@ pub(crate) macro dlsym { #[link_name = $sym:expr] fn $name:ident($($param:ident : $t:ty),* $(,)?) -> $ret:ty; ) => ( - static DLSYM: DlsymWeak $ret> = - DlsymWeak::new(concat!($sym, '\0')); + static DLSYM: DlsymWeak $ret> = { + let Ok(name) = CStr::from_bytes_with_nul(concat!($sym, '\0').as_bytes()) else { + panic!("symbol name may not contain NUL") + }; + + // SAFETY: Whoever calls the function pointer returned by `get()` + // is responsible for ensuring that the signature is correct. Just + // like with extern blocks, this is syntactically enforced by making + // the function pointer be unsafe. + unsafe { DlsymWeak::new(name) } + }; + let $name = &DLSYM; ) } + pub(crate) struct DlsymWeak { - name: &'static str, + /// A pointer to the nul-terminated name of the symbol. + // Use a pointer instead of `&'static CStr` to save space. + name: *const c_char, func: Atomic<*mut libc::c_void>, _marker: PhantomData, } -impl DlsymWeak { - pub(crate) const fn new(name: &'static str) -> Self { +impl DlsymWeak { + /// # Safety + /// + /// If the signature of `F` does not match the signature of the symbol (if + /// it exists), calling the function pointer returned by `get()` is + /// undefined behaviour. + pub(crate) const unsafe fn new(name: &'static CStr) -> Self { DlsymWeak { - name, + name: name.as_ptr(), func: AtomicPtr::new(ptr::without_provenance_mut(1)), _marker: PhantomData, } @@ -95,62 +116,59 @@ impl DlsymWeak { #[inline] pub(crate) fn get(&self) -> Option { - unsafe { - // Relaxed is fine here because we fence before reading through the - // pointer (see the comment below). - match self.func.load(Ordering::Relaxed) { - func if func.addr() == 1 => self.initialize(), - func if func.is_null() => None, - func => { - let func = mem::transmute_copy::<*mut libc::c_void, F>(&func); - // The caller is presumably going to read through this value - // (by calling the function we've dlsymed). This means we'd - // need to have loaded it with at least C11's consume - // ordering in order to be guaranteed that the data we read - // from the pointer isn't from before the pointer was - // stored. Rust has no equivalent to memory_order_consume, - // so we use an acquire fence (sorry, ARM). - // - // Now, in practice this likely isn't needed even on CPUs - // where relaxed and consume mean different things. The - // symbols we're loading are probably present (or not) at - // init, and even if they aren't the runtime dynamic loader - // is extremely likely have sufficient barriers internally - // (possibly implicitly, for example the ones provided by - // invoking `mprotect`). - // - // That said, none of that's *guaranteed*, and so we fence. - atomic::fence(Ordering::Acquire); - Some(func) - } - } + // The caller is presumably going to read through this value + // (by calling the function we've dlsymed). This means we'd + // need to have loaded it with at least C11's consume + // ordering in order to be guaranteed that the data we read + // from the pointer isn't from before the pointer was + // stored. Rust has no equivalent to memory_order_consume, + // so we use an acquire load (sorry, ARM). + // + // Now, in practice this likely isn't needed even on CPUs + // where relaxed and consume mean different things. The + // symbols we're loading are probably present (or not) at + // init, and even if they aren't the runtime dynamic loader + // is extremely likely have sufficient barriers internally + // (possibly implicitly, for example the ones provided by + // invoking `mprotect`). + // + // That said, none of that's *guaranteed*, so we use acquire. + match self.func.load(Ordering::Acquire) { + func if func.addr() == 1 => self.initialize(), + func if func.is_null() => None, + // SAFETY: + // `func` is not null and `F` implements `FnPtr`, thus this + // transmutation is well-defined. It is the responsibility of the + // creator of this `DlsymWeak` to ensure that calling the resulting + // function pointer does not result in undefined behaviour (though + // the `dlsym!` macro delegates this responsibility to the caller + // of the function by using `unsafe` function pointers). + // FIXME: use `transmute` once it stops complaining about generics. + func => Some(unsafe { mem::transmute_copy::<*mut c_void, F>(&func) }), } } // Cold because it should only happen during first-time initialization. #[cold] - unsafe fn initialize(&self) -> Option { - assert_eq!(size_of::(), size_of::<*mut libc::c_void>()); - - let val = unsafe { fetch(self.name) }; - // This synchronizes with the acquire fence in `get`. + fn initialize(&self) -> Option { + // SAFETY: `self.name` was created from a `&'static CStr` and is + // therefore a valid C string pointer. + let val = unsafe { libc::dlsym(libc::RTLD_DEFAULT, self.name) }; + // This synchronizes with the acquire load in `get`. self.func.store(val, Ordering::Release); if val.is_null() { None } else { + // SAFETY: see the comment in `get`. + // FIXME: use `transmute` once it stops complaining about generics. Some(unsafe { mem::transmute_copy::<*mut libc::c_void, F>(&val) }) } } } -unsafe fn fetch(name: &str) -> *mut libc::c_void { - let name = match CStr::from_bytes_with_nul(name.as_bytes()) { - Ok(cstr) => cstr, - Err(..) => return ptr::null_mut(), - }; - unsafe { libc::dlsym(libc::RTLD_DEFAULT, name.as_ptr()) } -} +unsafe impl Send for DlsymWeak {} +unsafe impl Sync for DlsymWeak {} #[cfg(not(any(target_os = "linux", target_os = "android")))] pub(crate) macro syscall { diff --git a/library/std/src/sys/pal/unix/weak/tests.rs b/library/std/src/sys/pal/unix/weak/tests.rs new file mode 100644 index 0000000000000..d807ba64e3577 --- /dev/null +++ b/library/std/src/sys/pal/unix/weak/tests.rs @@ -0,0 +1,32 @@ +use super::*; + +#[test] +fn dlsym_existing() { + const TEST_STRING: &'static CStr = c"Ferris!"; + + // Try to find a symbol that definitely exists. + dlsym! { + fn strlen(cs: *const c_char) -> usize; + } + + dlsym! { + #[link_name = "strlen"] + fn custom_name(cs: *const c_char) -> usize; + } + + let strlen = strlen.get().unwrap(); + assert_eq!(unsafe { strlen(TEST_STRING.as_ptr()) }, TEST_STRING.count_bytes()); + + let custom_name = custom_name.get().unwrap(); + assert_eq!(unsafe { custom_name(TEST_STRING.as_ptr()) }, TEST_STRING.count_bytes()); +} + +#[test] +fn dlsym_missing() { + // Try to find a symbol that definitely does not exist. + dlsym! { + fn test_symbol_that_does_not_exist() -> i32; + } + + assert!(test_symbol_that_does_not_exist.get().is_none()); +} From 4c992199598b4fddc25d52f40fa3642c9e065e60 Mon Sep 17 00:00:00 2001 From: joboet Date: Tue, 9 Sep 2025 13:17:45 +0200 Subject: [PATCH 2/2] std: only test `dlsym!` on platforms where it is actually used `dlsym` doesn't work for finding libc symbols on platforms like linux-musl, so the test will fail. --- library/std/src/sys/pal/unix/weak.rs | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/library/std/src/sys/pal/unix/weak.rs b/library/std/src/sys/pal/unix/weak.rs index ad649fdbab6c6..a3b980a3f3d85 100644 --- a/library/std/src/sys/pal/unix/weak.rs +++ b/library/std/src/sys/pal/unix/weak.rs @@ -27,6 +27,16 @@ use crate::marker::{FnPtr, PhantomData}; use crate::sync::atomic::{Atomic, AtomicPtr, Ordering}; use crate::{mem, ptr}; +// We currently only test `dlsym!`, but that doesn't work on all platforms, so +// we gate the tests to only the platforms where it is actually used. +// +// FIXME(joboet): add more tests, reorganise the whole module and get rid of +// `#[allow(dead_code, unused_macros)]`. +#[cfg(any( + target_vendor = "apple", + all(target_os = "linux", target_env = "gnu"), + target_os = "freebsd", +))] #[cfg(test)] mod tests;