From 1cff528d3e0f1c980b8c52a85f29b601468da248 Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Sun, 9 Nov 2025 17:02:43 +0100 Subject: [PATCH 01/13] epoll: unblock threads when new interest is registered --- src/tools/miri/src/shims/files.rs | 20 ++ .../miri/src/shims/unix/linux_like/epoll.rs | 202 ++++++++---------- .../miri/src/shims/unix/linux_like/eventfd.rs | 4 +- .../miri/src/shims/unix/unnamed_socket.rs | 6 +- src/tools/miri/tests/deps/Cargo.lock | 96 +++++++++ src/tools/miri/tests/deps/Cargo.toml | 1 + .../fail-dep/libc/libc-epoll-data-race.rs | 2 +- .../pass-dep/libc/libc-epoll-blocking.rs | 39 ++++ .../pass-dep/libc/libc-epoll-no-blocking.rs | 11 +- 9 files changed, 259 insertions(+), 122 deletions(-) diff --git a/src/tools/miri/src/shims/files.rs b/src/tools/miri/src/shims/files.rs index 8c29cb040b55a..22c2e3d083529 100644 --- a/src/tools/miri/src/shims/files.rs +++ b/src/tools/miri/src/shims/files.rs @@ -50,6 +50,26 @@ impl FileDescriptionRef { } } +impl PartialEq for FileDescriptionRef { + fn eq(&self, other: &Self) -> bool { + self.id() == other.id() + } +} + +impl Eq for FileDescriptionRef {} + +impl PartialOrd for FileDescriptionRef { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl Ord for FileDescriptionRef { + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + self.id().cmp(&other.id()) + } +} + /// Holds a weak reference to the actual file description. #[derive(Debug)] pub struct WeakFileDescriptionRef(Weak>); diff --git a/src/tools/miri/src/shims/unix/linux_like/epoll.rs b/src/tools/miri/src/shims/unix/linux_like/epoll.rs index 3133c149293db..4958cdfa5dc4b 100644 --- a/src/tools/miri/src/shims/unix/linux_like/epoll.rs +++ b/src/tools/miri/src/shims/unix/linux_like/epoll.rs @@ -1,5 +1,5 @@ use std::cell::RefCell; -use std::collections::BTreeMap; +use std::collections::{BTreeMap, BTreeSet}; use std::io; use std::rc::{Rc, Weak}; use std::time::Duration; @@ -18,11 +18,13 @@ use crate::*; struct Epoll { /// A map of EpollEventInterests registered under this epoll instance. /// Each entry is differentiated using FdId and file descriptor value. + /// The interest are separately refcounted so that we can track, for each FD, what epoll + /// instances are currently interested in it. interest_list: RefCell>>>, /// A map of EpollEventInstance that will be returned when `epoll_wait` is called. /// Similar to interest_list, the entry is also differentiated using FdId /// and file descriptor value. - ready_list: ReadyList, + ready_list: RefCell>, /// A list of thread ids blocked on this epoll instance. blocked_tid: RefCell>, } @@ -36,9 +38,9 @@ impl VisitProvenance for Epoll { /// EpollEventInstance contains information that will be returned by epoll_wait. #[derive(Debug)] pub struct EpollEventInstance { - /// Xor-ed event types that happened to the file description. + /// Bitmask of event types that happened to the file description. events: u32, - /// Original data retrieved from `epoll_event` during `epoll_ctl`. + /// User-defined data associated with the interest that triggered this instance. data: u64, /// The release clock associated with this event. clock: VClock, @@ -99,11 +101,6 @@ pub struct EpollReadyEvents { pub epollerr: bool, } -#[derive(Debug, Default)] -struct ReadyList { - mapping: RefCell>, -} - impl EpollReadyEvents { pub fn new() -> Self { EpollReadyEvents { @@ -185,8 +182,8 @@ impl EpollInterestTable { } } - pub fn get_epoll_interest(&self, id: FdId) -> Option<&Vec>>> { - self.0.get(&id) + pub fn get_epoll_interest(&self, id: FdId) -> Option<&[Weak>]> { + self.0.get(&id).map(|v| &**v) } pub fn get_epoll_interest_mut( @@ -328,18 +325,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { let epoll_key = (id, fd); - // Check the existence of fd in the interest list. - if op == epoll_ctl_add { - if interest_list.contains_key(&epoll_key) { - return this.set_last_error_and_return_i32(LibcError("EEXIST")); - } - } else { - if !interest_list.contains_key(&epoll_key) { - return this.set_last_error_and_return_i32(LibcError("ENOENT")); - } - } - - if op == epoll_ctl_add { + let new_interest = if op == epoll_ctl_add { // Create an epoll_interest. let interest = Rc::new(RefCell::new(EpollEventInterest { fd_num: fd, @@ -347,24 +333,34 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { data, weak_epfd: FileDescriptionRef::downgrade(&epfd), })); - // Notification will be returned for current epfd if there is event in the file - // descriptor we registered. - check_and_update_one_event_interest(&fd_ref, &interest, id, this)?; - - // Insert an epoll_interest to global epoll_interest list. + // Register it in the right places. this.machine.epoll_interests.insert_epoll_interest(id, Rc::downgrade(&interest)); - interest_list.insert(epoll_key, interest); + if interest_list.insert(epoll_key, interest.clone()).is_some() { + // We already had interest in this. + return this.set_last_error_and_return_i32(LibcError("EEXIST")); + } + + interest } else { // Modify the existing interest. - let epoll_interest = interest_list.get_mut(&epoll_key).unwrap(); + let Some(interest) = interest_list.get_mut(&epoll_key) else { + return this.set_last_error_and_return_i32(LibcError("ENOENT")); + }; { - let mut epoll_interest = epoll_interest.borrow_mut(); - epoll_interest.events = events; - epoll_interest.data = data; + let mut interest = interest.borrow_mut(); + interest.events = events; + interest.data = data; } - // Updating an FD interest triggers events. - check_and_update_one_event_interest(&fd_ref, epoll_interest, id, this)?; - } + interest.clone() + }; + + // Deliver events for the new interest. + send_ready_events_to_interests( + this, + fd_ref.as_unix(this).get_epoll_ready_events()?.get_event_bitmask(this), + id, + std::iter::once(new_interest), + )?; interp_ok(Scalar::from_i32(0)) } else if op == epoll_ctl_del { @@ -378,7 +374,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { drop(epoll_interest); // Remove related epoll_interest from ready list. - epfd.ready_list.mapping.borrow_mut().remove(&epoll_key); + epfd.ready_list.borrow_mut().remove(&epoll_key); // Remove dangling EpollEventInterest from its global table. // .unwrap() below should succeed because the file description id must have registered @@ -462,7 +458,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { }; // We just need to know if the ready list is empty and borrow the thread_ids out. - let ready_list_empty = epfd.ready_list.mapping.borrow().is_empty(); + let ready_list_empty = epfd.ready_list.borrow().is_empty(); if timeout == 0 || !ready_list_empty { // If the ready list is not empty, or the timeout is 0, we can return immediately. return_ready_list(&epfd, dest, &event, this)?; @@ -518,50 +514,71 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { interp_ok(()) } - /// For a specific file description, get its ready events and update the corresponding ready - /// list. This function should be called whenever an event causes more bytes or an EOF to become - /// newly readable from an FD, and whenever more bytes can be written to an FD or no more future - /// writes are possible. + /// For a specific file description, get its ready events and send it to everyone who registered + /// interest in this FD. This function should be called whenever an event causes more bytes or + /// an EOF to become newly readable from an FD, and whenever more bytes can be written to an FD + /// or no more future writes are possible. /// /// This *will* report an event if anyone is subscribed to it, without any further filtering, so /// do not call this function when an FD didn't have anything happen to it! - fn check_and_update_readiness( - &mut self, - fd_ref: DynFileDescriptionRef, - ) -> InterpResult<'tcx, ()> { + fn epoll_send_fd_ready_events(&mut self, fd_ref: DynFileDescriptionRef) -> InterpResult<'tcx> { let this = self.eval_context_mut(); + let ready_events_bitmask = + fd_ref.as_unix(this).get_epoll_ready_events()?.get_event_bitmask(this); let id = fd_ref.id(); - let mut waiter = Vec::new(); - // Get a list of EpollEventInterest that is associated to a specific file description. - if let Some(epoll_interests) = this.machine.epoll_interests.get_epoll_interest(id) { - for weak_epoll_interest in epoll_interests { - if let Some(epoll_interest) = weak_epoll_interest.upgrade() { - let is_updated = - check_and_update_one_event_interest(&fd_ref, &epoll_interest, id, this)?; - if is_updated { - // Edge-triggered notification only notify one thread even if there are - // multiple threads blocked on the same epfd. - - // This unwrap can never fail because if the current epoll instance were - // closed, the upgrade of weak_epoll_interest - // above would fail. This guarantee holds because only the epoll instance - // holds a strong ref to epoll_interest. - let epfd = epoll_interest.borrow().weak_epfd.upgrade().unwrap(); - // FIXME: We can randomly pick a thread to unblock. - if let Some(thread_id) = epfd.blocked_tid.borrow_mut().pop() { - waiter.push(thread_id); - }; - } - } - } + // Figure out who is interested in this. We need to clone this list since we can't prove + // that `send_ready_events_to_interests` won't mutate it. + let interests = this.machine.epoll_interests.get_epoll_interest(id).unwrap_or(&[]); + let interests = interests.iter().filter_map(|weak| weak.upgrade()).collect::>(); + send_ready_events_to_interests(this, ready_events_bitmask, id, interests.into_iter()) + } +} + +/// Send the latest ready events for one particular FD (identified by `event_fd_id`) to everyone in +/// the `interests` list, if they are interested in this kind of event. +fn send_ready_events_to_interests<'tcx>( + ecx: &mut MiriInterpCx<'tcx>, + event_bitmask: u32, + event_fd_id: FdId, + interests: impl Iterator>>, +) -> InterpResult<'tcx> { + #[allow(clippy::mutable_key_type)] // yeah, we know + let mut triggered_epolls = BTreeSet::new(); + for interest in interests { + let interest = interest.borrow(); + let epfd = interest.weak_epfd.upgrade().unwrap(); + // This checks if any of the events specified in epoll_event_interest.events + // match those in ready_events. + let flags = interest.events & event_bitmask; + if flags == 0 { + continue; } - waiter.sort(); - waiter.dedup(); - for thread_id in waiter { - this.unblock_thread(thread_id, BlockReason::Epoll)?; + // Geenrate a new event instance, with the flags that this one is interested in. + let mut new_instance = EpollEventInstance::new(flags, interest.data); + ecx.release_clock(|clock| { + new_instance.clock.clone_from(clock); + })?; + // Add event to ready list for this epoll instance. + // Tests confirm that we have to *overwrite* the old instance for the same key. + let mut ready_list = epfd.ready_list.borrow_mut(); + ready_list.insert((event_fd_id, interest.fd_num), new_instance); + drop(ready_list); + // Remember to wake up this epoll later. + // (We might encounter the same epoll multiple times if there are multiple interests for + // different file descriptors that references the same file description.) + triggered_epolls.insert(epfd); + } + + // For each epoll instance where an interest triggered, wake up one thread. + for epoll in triggered_epolls { + // Edge-triggered notification only notify one thread even if there are + // multiple threads blocked on the same epfd. + if let Some(thread_id) = epoll.blocked_tid.borrow_mut().pop() { + ecx.unblock_thread(thread_id, BlockReason::Epoll)?; } - interp_ok(()) } + + interp_ok(()) } /// This function takes in ready list and returns EpollEventInstance with file description @@ -582,41 +599,6 @@ fn ready_list_next( None } -/// This helper function checks whether an epoll notification should be triggered for a specific -/// epoll_interest and, if necessary, triggers the notification, and returns whether the -/// notification was added/updated. Unlike check_and_update_readiness, this function sends a -/// notification to only one epoll instance. -fn check_and_update_one_event_interest<'tcx>( - fd_ref: &DynFileDescriptionRef, - interest: &RefCell, - id: FdId, - ecx: &MiriInterpCx<'tcx>, -) -> InterpResult<'tcx, bool> { - // Get the bitmask of ready events for a file description. - let ready_events_bitmask = fd_ref.as_unix(ecx).get_epoll_ready_events()?.get_event_bitmask(ecx); - let epoll_event_interest = interest.borrow(); - let epfd = epoll_event_interest.weak_epfd.upgrade().unwrap(); - // This checks if any of the events specified in epoll_event_interest.events - // match those in ready_events. - let flags = epoll_event_interest.events & ready_events_bitmask; - // If there is any event that we are interested in being specified as ready, - // insert an epoll_return to the ready list. - if flags != 0 { - let epoll_key = (id, epoll_event_interest.fd_num); - let mut ready_list = epfd.ready_list.mapping.borrow_mut(); - let mut event_instance = EpollEventInstance::new(flags, epoll_event_interest.data); - // If we are tracking data races, remember the current clock so we can sync with it later. - ecx.release_clock(|clock| { - event_instance.clock.clone_from(clock); - })?; - // Triggers the notification by inserting it to the ready list. - ready_list.insert(epoll_key, event_instance); - interp_ok(true) - } else { - interp_ok(false) - } -} - /// Stores the ready list of the `epfd` epoll instance into `events` (which must be an array), /// and the number of returned events into `dest`. fn return_ready_list<'tcx>( @@ -625,7 +607,7 @@ fn return_ready_list<'tcx>( events: &MPlaceTy<'tcx>, ecx: &mut MiriInterpCx<'tcx>, ) -> InterpResult<'tcx> { - let mut ready_list = epfd.ready_list.mapping.borrow_mut(); + let mut ready_list = epfd.ready_list.borrow_mut(); let mut num_of_events: i32 = 0; let mut array_iter = ecx.project_array_fields(events)?; diff --git a/src/tools/miri/src/shims/unix/linux_like/eventfd.rs b/src/tools/miri/src/shims/unix/linux_like/eventfd.rs index 5d4f207d365e1..ea68698df2cdc 100644 --- a/src/tools/miri/src/shims/unix/linux_like/eventfd.rs +++ b/src/tools/miri/src/shims/unix/linux_like/eventfd.rs @@ -216,7 +216,7 @@ fn eventfd_write<'tcx>( // The state changed; we check and update the status of all supported event // types for current file description. - ecx.check_and_update_readiness(eventfd)?; + ecx.epoll_send_fd_ready_events(eventfd)?; // Return how many bytes we consumed from the user-provided buffer. return finish.call(ecx, Ok(buf_place.layout.size.bytes_usize())); @@ -311,7 +311,7 @@ fn eventfd_read<'tcx>( // The state changed; we check and update the status of all supported event // types for current file description. - ecx.check_and_update_readiness(eventfd)?; + ecx.epoll_send_fd_ready_events(eventfd)?; // Tell userspace how many bytes we put into the buffer. return finish.call(ecx, Ok(buf_place.layout.size.bytes_usize())); diff --git a/src/tools/miri/src/shims/unix/unnamed_socket.rs b/src/tools/miri/src/shims/unix/unnamed_socket.rs index 81703d6e176bf..d0a5ba911b3f6 100644 --- a/src/tools/miri/src/shims/unix/unnamed_socket.rs +++ b/src/tools/miri/src/shims/unix/unnamed_socket.rs @@ -96,7 +96,7 @@ impl FileDescription for AnonSocket { } } // Notify peer fd that close has happened, since that can unblock reads and writes. - ecx.check_and_update_readiness(peer_fd)?; + ecx.epoll_send_fd_ready_events(peer_fd)?; } interp_ok(Ok(())) } @@ -276,7 +276,7 @@ fn anonsocket_write<'tcx>( } // Notification should be provided for peer fd as it became readable. // The kernel does this even if the fd was already readable before, so we follow suit. - ecx.check_and_update_readiness(peer_fd)?; + ecx.epoll_send_fd_ready_events(peer_fd)?; return finish.call(ecx, Ok(write_size)); } @@ -369,7 +369,7 @@ fn anonsocket_read<'tcx>( ecx.unblock_thread(thread_id, BlockReason::UnnamedSocket)?; } // Notify epoll waiters. - ecx.check_and_update_readiness(peer_fd)?; + ecx.epoll_send_fd_ready_events(peer_fd)?; }; return finish.call(ecx, Ok(read_size)); diff --git a/src/tools/miri/tests/deps/Cargo.lock b/src/tools/miri/tests/deps/Cargo.lock index 65ca4215c6001..187411588e802 100644 --- a/src/tools/miri/tests/deps/Cargo.lock +++ b/src/tools/miri/tests/deps/Cargo.lock @@ -72,6 +72,95 @@ version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" +[[package]] +name = "futures" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "65bc07b1a8bc7c85c5f2e110c476c7389b4554ba72af57d8445ea63a576b0876" +dependencies = [ + "futures-channel", + "futures-core", + "futures-executor", + "futures-io", + "futures-sink", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-channel" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2dff15bf788c671c1934e366d07e30c1814a8ef514e1af724a602e8a2fbe1b10" +dependencies = [ + "futures-core", + "futures-sink", +] + +[[package]] +name = "futures-core" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e" + +[[package]] +name = "futures-executor" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e28d1d997f585e54aebc3f97d39e72338912123a67330d723fdbb564d646c9f" +dependencies = [ + "futures-core", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-io" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6" + +[[package]] +name = "futures-macro" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "futures-sink" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e575fab7d1e0dcb8d0c7bcf9a63ee213816ab51902e6d244a95819acacf1d4f7" + +[[package]] +name = "futures-task" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988" + +[[package]] +name = "futures-util" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81" +dependencies = [ + "futures-channel", + "futures-core", + "futures-io", + "futures-macro", + "futures-sink", + "futures-task", + "memchr", + "pin-project-lite", + "pin-utils", + "slab", +] + [[package]] name = "getrandom" version = "0.1.16" @@ -190,6 +279,7 @@ name = "miri-test-deps" version = "0.1.0" dependencies = [ "cfg-if", + "futures", "getrandom 0.1.16", "getrandom 0.2.16", "getrandom 0.3.3", @@ -242,6 +332,12 @@ version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b" +[[package]] +name = "pin-utils" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" + [[package]] name = "proc-macro2" version = "1.0.95" diff --git a/src/tools/miri/tests/deps/Cargo.toml b/src/tools/miri/tests/deps/Cargo.toml index d85723f0915fb..fe1586280b5ed 100644 --- a/src/tools/miri/tests/deps/Cargo.toml +++ b/src/tools/miri/tests/deps/Cargo.toml @@ -23,6 +23,7 @@ page_size = "0.6" # Avoid pulling in all of tokio's dependencies. # However, without `net` and `signal`, tokio uses fewer relevant system APIs. tokio = { version = "1", features = ["macros", "rt-multi-thread", "time", "net", "fs", "sync", "signal", "io-util"] } +futures = { version = "0.3.0", features = ["async-await"] } [target.'cfg(windows)'.dependencies] windows-sys = { version = "0.60", features = [ diff --git a/src/tools/miri/tests/fail-dep/libc/libc-epoll-data-race.rs b/src/tools/miri/tests/fail-dep/libc/libc-epoll-data-race.rs index f6ec5be61bb60..bca886d375995 100644 --- a/src/tools/miri/tests/fail-dep/libc/libc-epoll-data-race.rs +++ b/src/tools/miri/tests/fail-dep/libc/libc-epoll-data-race.rs @@ -32,7 +32,7 @@ fn check_epoll_wait(epfd: i32, expected_notifications: &[(u32, u for (return_event, expected_event) in slice.iter().zip(expected_notifications.iter()) { let event = return_event.events; let data = return_event.u64; - assert_eq!(event, expected_event.0, "got wrong events"); + assert_eq!(event, expected_event.0, "got wrong events bitmask"); assert_eq!(data, expected_event.1, "got wrong data"); } } diff --git a/src/tools/miri/tests/pass-dep/libc/libc-epoll-blocking.rs b/src/tools/miri/tests/pass-dep/libc/libc-epoll-blocking.rs index c97206487a101..6609dd6e25501 100644 --- a/src/tools/miri/tests/pass-dep/libc/libc-epoll-blocking.rs +++ b/src/tools/miri/tests/pass-dep/libc/libc-epoll-blocking.rs @@ -16,6 +16,7 @@ fn main() { test_epoll_block_then_unblock(); test_notification_after_timeout(); test_epoll_race(); + wakeup_on_new_interest(); } // Using `as` cast since `EPOLLET` wraps around @@ -179,3 +180,41 @@ fn test_epoll_race() { }; thread1.join().unwrap(); } + +/// Ensure that a blocked thread gets woken up when new interested are registered with the +/// epoll it is blocked on. +fn wakeup_on_new_interest() { + // Create an epoll instance. + let epfd = unsafe { libc::epoll_create1(0) }; + assert_ne!(epfd, -1); + + // Create a socketpair instance. + let mut fds = [-1, -1]; + let res = unsafe { libc::socketpair(libc::AF_UNIX, libc::SOCK_STREAM, 0, fds.as_mut_ptr()) }; + assert_eq!(res, 0); + + // Write to fd[0] + let data = "abcde".as_bytes().as_ptr(); + let res = unsafe { libc_utils::write_all(fds[0], data as *const libc::c_void, 5) }; + assert_eq!(res, 5); + + // Block a thread on the epoll instance. + let t = std::thread::spawn(move || { + let expected_event = u32::try_from(libc::EPOLLIN | libc::EPOLLOUT).unwrap(); + let expected_value = u64::try_from(fds[1]).unwrap(); + check_epoll_wait::<8>(epfd, &[(expected_event, expected_value)], -1); + }); + // Ensure the thread is blocked. + std::thread::yield_now(); + + // Register fd[1] with EPOLLIN|EPOLLOUT|EPOLLET|EPOLLRDHUP + let mut ev = libc::epoll_event { + events: (libc::EPOLLIN | libc::EPOLLOUT | libc::EPOLLET | libc::EPOLLRDHUP) as _, + u64: u64::try_from(fds[1]).unwrap(), + }; + let res = unsafe { libc::epoll_ctl(epfd, libc::EPOLL_CTL_ADD, fds[1], &mut ev) }; + assert_eq!(res, 0); + + // This should wake up the thread. + t.join().unwrap(); +} diff --git a/src/tools/miri/tests/pass-dep/libc/libc-epoll-no-blocking.rs b/src/tools/miri/tests/pass-dep/libc/libc-epoll-no-blocking.rs index 7130790b86d61..01dfbbfaaee4f 100644 --- a/src/tools/miri/tests/pass-dep/libc/libc-epoll-no-blocking.rs +++ b/src/tools/miri/tests/pass-dep/libc/libc-epoll-no-blocking.rs @@ -70,16 +70,16 @@ fn test_epoll_socketpair() { let res = unsafe { libc_utils::write_all(fds[0], data as *const libc::c_void, 5) }; assert_eq!(res, 5); - // Register fd[1] with EPOLLIN|EPOLLOUT|EPOLLET|EPOLLRDHUP + // Register fd[1] with EPOLLOUT|EPOLLET|EPOLLRDHUP but NOT EPOLLIN let mut ev = libc::epoll_event { - events: (libc::EPOLLIN | libc::EPOLLOUT | libc::EPOLLET | libc::EPOLLRDHUP) as _, + events: (libc::EPOLLOUT | libc::EPOLLET | libc::EPOLLRDHUP) as _, u64: u64::try_from(fds[1]).unwrap(), }; let res = unsafe { libc::epoll_ctl(epfd, libc::EPOLL_CTL_ADD, fds[1], &mut ev) }; assert_eq!(res, 0); - // Check result from epoll_wait. - let expected_event = u32::try_from(libc::EPOLLIN | libc::EPOLLOUT).unwrap(); + // Check result from epoll_wait. EPOLLIN should be masked away. + let expected_event = u32::try_from(libc::EPOLLOUT).unwrap(); let expected_value = u64::try_from(fds[1]).unwrap(); check_epoll_wait::<8>(epfd, &[(expected_event, expected_value)]); @@ -101,8 +101,7 @@ fn test_epoll_socketpair() { // Check result from epoll_wait. // We expect to get a read, write, HUP notification from the close since closing an FD always unblocks reads and writes on its peer. - let expected_event = - u32::try_from(libc::EPOLLRDHUP | libc::EPOLLIN | libc::EPOLLOUT | libc::EPOLLHUP).unwrap(); + let expected_event = u32::try_from(libc::EPOLLRDHUP | libc::EPOLLOUT | libc::EPOLLHUP).unwrap(); let expected_value = u64::try_from(fds[1]).unwrap(); check_epoll_wait::<8>(epfd, &[(expected_event, expected_value)]); } From ac12e462c7611bc9e6495de23c6c29e5c3b22d07 Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Sun, 9 Nov 2025 18:42:49 +0100 Subject: [PATCH 02/13] simplify epoll data structures: dont make the interests themselves into refcounted objects --- src/tools/miri/src/shims/files.rs | 46 ++-- .../miri/src/shims/unix/linux_like/epoll.rs | 245 +++++++++--------- .../miri/src/shims/unix/linux_like/eventfd.rs | 3 +- .../miri/src/shims/unix/unnamed_socket.rs | 3 +- src/tools/miri/src/shims/windows/fs.rs | 6 +- src/tools/miri/tests/deps/Cargo.lock | 96 ------- src/tools/miri/tests/deps/Cargo.toml | 1 - .../fail-dep/libc/libc-epoll-data-race.rs | 11 +- .../pass-dep/libc/libc-epoll-blocking.rs | 16 +- .../pass-dep/libc/libc-epoll-no-blocking.rs | 83 +++--- 10 files changed, 191 insertions(+), 319 deletions(-) diff --git a/src/tools/miri/src/shims/files.rs b/src/tools/miri/src/shims/files.rs index 22c2e3d083529..ea19c2d5016e0 100644 --- a/src/tools/miri/src/shims/files.rs +++ b/src/tools/miri/src/shims/files.rs @@ -14,7 +14,7 @@ use crate::*; /// A unique id for file descriptions. While we could use the address, considering that /// is definitely unique, the address would expose interpreter internal state when used -/// for sorting things. So instead we generate a unique id per file description is the name +/// for sorting things. So instead we generate a unique id per file description which is the same /// for all `dup`licates and is never reused. #[derive(Debug, Copy, Clone, Default, Eq, PartialEq, Ord, PartialOrd)] pub struct FdId(usize); @@ -48,25 +48,11 @@ impl FileDescriptionRef { pub fn id(&self) -> FdId { self.0.id } -} - -impl PartialEq for FileDescriptionRef { - fn eq(&self, other: &Self) -> bool { - self.id() == other.id() - } -} - -impl Eq for FileDescriptionRef {} -impl PartialOrd for FileDescriptionRef { - fn partial_cmp(&self, other: &Self) -> Option { - Some(self.cmp(other)) - } -} - -impl Ord for FileDescriptionRef { - fn cmp(&self, other: &Self) -> std::cmp::Ordering { - self.id().cmp(&other.id()) + /// Returns the raw address of this file description. Useful for equality comparisons. + /// Use `id` instead if this can affect user-visible behavior! + pub fn addr(&self) -> usize { + Rc::as_ptr(&self.0).addr() } } @@ -90,6 +76,11 @@ impl WeakFileDescriptionRef { pub fn upgrade(&self) -> Option> { self.0.upgrade().map(FileDescriptionRef) } + + /// Returns the raw address of this file description. Useful for equality comparisons. + pub fn addr(&self) -> usize { + self.0.as_ptr().addr() + } } impl VisitProvenance for WeakFileDescriptionRef { @@ -125,12 +116,13 @@ impl FileDescriptionExt for T { communicate_allowed: bool, ecx: &mut MiriInterpCx<'tcx>, ) -> InterpResult<'tcx, io::Result<()>> { + let addr = self.addr(); match Rc::into_inner(self.0) { Some(fd) => { - // Remove entry from the global epoll_event_interest table. - ecx.machine.epoll_interests.remove(fd.id); + // There might have been epolls interested in this FD. Remove that. + ecx.machine.epoll_interests.remove_epolls(fd.id); - fd.inner.close(communicate_allowed, ecx) + fd.inner.destroy(addr, communicate_allowed, ecx) } None => { // Not the last reference. @@ -203,9 +195,12 @@ pub trait FileDescription: std::fmt::Debug + FileDescriptionExt { throw_unsup_format!("cannot seek on {}", self.name()); } - /// Close the file descriptor. - fn close<'tcx>( + /// Destroys the file description. Only called when the last duplicate file descriptor is closed. + /// + /// `self_addr` is the address that this file description used to be stored at. + fn destroy<'tcx>( self, + _self_addr: usize, _communicate_allowed: bool, _ecx: &mut MiriInterpCx<'tcx>, ) -> InterpResult<'tcx, io::Result<()>> @@ -382,8 +377,9 @@ impl FileDescription for FileHandle { interp_ok((&mut &self.file).seek(offset)) } - fn close<'tcx>( + fn destroy<'tcx>( self, + _self_addr: usize, communicate_allowed: bool, _ecx: &mut MiriInterpCx<'tcx>, ) -> InterpResult<'tcx, io::Result<()>> { diff --git a/src/tools/miri/src/shims/unix/linux_like/epoll.rs b/src/tools/miri/src/shims/unix/linux_like/epoll.rs index 4958cdfa5dc4b..d832d1078240e 100644 --- a/src/tools/miri/src/shims/unix/linux_like/epoll.rs +++ b/src/tools/miri/src/shims/unix/linux_like/epoll.rs @@ -1,30 +1,33 @@ use std::cell::RefCell; -use std::collections::{BTreeMap, BTreeSet}; +use std::collections::BTreeMap; use std::io; -use std::rc::{Rc, Weak}; use std::time::Duration; use rustc_abi::FieldIdx; use crate::concurrency::VClock; use crate::shims::files::{ - DynFileDescriptionRef, FdId, FileDescription, FileDescriptionRef, WeakFileDescriptionRef, + DynFileDescriptionRef, FdId, FdNum, FileDescription, FileDescriptionRef, WeakFileDescriptionRef, }; use crate::shims::unix::UnixFileDescription; use crate::*; +type EpollEventKey = (FdId, FdNum); + /// An `Epoll` file descriptor connects file handles and epoll events #[derive(Debug, Default)] struct Epoll { /// A map of EpollEventInterests registered under this epoll instance. /// Each entry is differentiated using FdId and file descriptor value. - /// The interest are separately refcounted so that we can track, for each FD, what epoll - /// instances are currently interested in it. - interest_list: RefCell>>>, + interest_list: RefCell>, /// A map of EpollEventInstance that will be returned when `epoll_wait` is called. /// Similar to interest_list, the entry is also differentiated using FdId /// and file descriptor value. - ready_list: RefCell>, + /// We keep this separate from `interest_list` for two reasons: there might be many + /// interests but only a few of them ready (so with a separate list it is more efficient + /// to find a ready event), and having separate `RefCell` lets us mutate the `interest_list` + /// while unblocking threads which might mutate the `ready_list`. + ready_list: RefCell>, /// A list of thread ids blocked on this epoll instance. blocked_tid: RefCell>, } @@ -35,6 +38,11 @@ impl VisitProvenance for Epoll { } } +/// Returns the range of all EpollEventKey for the given FD ID. +fn range_for_id(id: FdId) -> std::ops::RangeInclusive { + (id, 0)..=(id, i32::MAX) +} + /// EpollEventInstance contains information that will be returned by epoll_wait. #[derive(Debug)] pub struct EpollEventInstance { @@ -61,13 +69,8 @@ impl EpollEventInstance { /// see the man page: /// /// -#[derive(Debug)] +#[derive(Debug, Copy, Clone)] pub struct EpollEventInterest { - /// The file descriptor value of the file description registered. - /// This is only used for ready_list, to inform userspace which FD triggered an event. - /// For that, it is crucial to preserve the original FD number. - /// This FD number must never be "dereferenced" to a file description inside Miri. - fd_num: i32, /// The events bitmask retrieved from `epoll_event`. events: u32, /// The data retrieved from `epoll_event`. @@ -75,13 +78,10 @@ pub struct EpollEventInterest { /// but only u64 is supported for now. /// data: u64, - /// The epoll file description that this EpollEventInterest is registered under. - /// This is weak to avoid cycles, but an upgrade is always guaranteed to succeed - /// because only the `Epoll` holds a strong ref to a `EpollEventInterest`. - weak_epfd: WeakFileDescriptionRef, } /// EpollReadyEvents reflects the readiness of a file description. +#[derive(Debug)] pub struct EpollReadyEvents { /// The associated file is available for read(2) operations, in the sense that a read will not block. /// (I.e., returning EOF is considered "ready".) @@ -144,11 +144,18 @@ impl FileDescription for Epoll { "epoll" } - fn close<'tcx>( - self, + fn destroy<'tcx>( + mut self, + self_addr: usize, _communicate_allowed: bool, - _ecx: &mut MiriInterpCx<'tcx>, + ecx: &mut MiriInterpCx<'tcx>, ) -> InterpResult<'tcx, io::Result<()>> { + // If we were interested in some FDs, we can remove that now. + let mut ids = self.interest_list.get_mut().keys().map(|(id, _num)| *id).collect::>(); + ids.dedup(); // they come out of the map sorted + for id in ids { + ecx.machine.epoll_interests.remove(id, self_addr); + } interp_ok(Ok(())) } @@ -160,41 +167,54 @@ impl FileDescription for Epoll { impl UnixFileDescription for Epoll {} /// The table of all EpollEventInterest. -/// The BTreeMap key is the FdId of an active file description registered with -/// any epoll instance. The value is a list of EpollEventInterest associated -/// with that file description. -pub struct EpollInterestTable(BTreeMap>>>); +/// This tracks, for each file description, which epoll instances have an interest in events +/// for this file description. +pub struct EpollInterestTable(BTreeMap>>); impl EpollInterestTable { pub(crate) fn new() -> Self { EpollInterestTable(BTreeMap::new()) } - pub fn insert_epoll_interest(&mut self, id: FdId, fd: Weak>) { - match self.0.get_mut(&id) { - Some(fds) => { - fds.push(fd); - } - None => { - let vec = vec![fd]; - self.0.insert(id, vec); - } - } + fn insert(&mut self, id: FdId, epoll: WeakFileDescriptionRef) { + let epolls = self.0.entry(id).or_default(); + epolls.push(epoll); } - pub fn get_epoll_interest(&self, id: FdId) -> Option<&[Weak>]> { - self.0.get(&id).map(|v| &**v) + fn remove(&mut self, id: FdId, epoll_addr: usize) { + let epolls = self.0.entry(id).or_default(); + // FIXME: linear scan. Keep the list sorted so we can do binary search? + let idx = epolls + .iter() + .position(|old_ref| old_ref.addr() == epoll_addr) + .expect("trying to remove an epoll that's not in the list"); + epolls.remove(idx); } - pub fn get_epoll_interest_mut( - &mut self, - id: FdId, - ) -> Option<&mut Vec>>> { - self.0.get_mut(&id) + fn get_epolls(&self, id: FdId) -> Option<&Vec>> { + self.0.get(&id) } - pub fn remove(&mut self, id: FdId) { - self.0.remove(&id); + pub fn remove_epolls(&mut self, id: FdId) { + if let Some(epolls) = self.0.remove(&id) { + for epoll in epolls.iter().filter_map(|e| e.upgrade()) { + // This is a still-live epoll with interest in this FD. Remove all + // relevent interests. + epoll + .interest_list + .borrow_mut() + .extract_if(range_for_id(id), |_, _| true) + // Consume the iterator. + .for_each(|_| ()); + // Also remove all events from the ready list that refer to this FD. + epoll + .ready_list + .borrow_mut() + .extract_if(range_for_id(id), |_, _| true) + // Consume the iterator. + .for_each(|_| ()); + } + } } } @@ -261,9 +281,9 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { let epollhup = this.eval_libc_u32("EPOLLHUP"); let epollerr = this.eval_libc_u32("EPOLLERR"); - // Throw EINVAL if epfd and fd have the same value. + // Throw EFAULT if epfd and fd have the same value. if epfd_value == fd { - return this.set_last_error_and_return_i32(LibcError("EINVAL")); + return this.set_last_error_and_return_i32(LibcError("EFAULT")); } // Check if epfd is a valid epoll file descriptor. @@ -323,43 +343,33 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { ); } + // Add new interest to list. let epoll_key = (id, fd); - - let new_interest = if op == epoll_ctl_add { - // Create an epoll_interest. - let interest = Rc::new(RefCell::new(EpollEventInterest { - fd_num: fd, - events, - data, - weak_epfd: FileDescriptionRef::downgrade(&epfd), - })); - // Register it in the right places. - this.machine.epoll_interests.insert_epoll_interest(id, Rc::downgrade(&interest)); - if interest_list.insert(epoll_key, interest.clone()).is_some() { + let new_interest = EpollEventInterest { events, data }; + if op == epoll_ctl_add { + if interest_list.range(range_for_id(id)).next().is_none() { + // This is the first time this FD got added to this epoll. + // Remember that in the global list so we get notified about FD events. + this.machine.epoll_interests.insert(id, FileDescriptionRef::downgrade(&epfd)); + } + if interest_list.insert(epoll_key, new_interest).is_some() { // We already had interest in this. return this.set_last_error_and_return_i32(LibcError("EEXIST")); } - - interest } else { // Modify the existing interest. let Some(interest) = interest_list.get_mut(&epoll_key) else { return this.set_last_error_and_return_i32(LibcError("ENOENT")); }; - { - let mut interest = interest.borrow_mut(); - interest.events = events; - interest.data = data; - } - interest.clone() + *interest = new_interest; }; // Deliver events for the new interest. send_ready_events_to_interests( this, + &epfd, fd_ref.as_unix(this).get_epoll_ready_events()?.get_event_bitmask(this), - id, - std::iter::once(new_interest), + std::iter::once((&epoll_key, &new_interest)), )?; interp_ok(Scalar::from_i32(0)) @@ -367,25 +377,19 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { let epoll_key = (id, fd); // Remove epoll_event_interest from interest_list. - let Some(epoll_interest) = interest_list.remove(&epoll_key) else { + if interest_list.remove(&epoll_key).is_none() { + // We did not have interest in this. return this.set_last_error_and_return_i32(LibcError("ENOENT")); }; - // All related Weak will fail to upgrade after the drop. - drop(epoll_interest); + // If this was the last interest in this FD, remove us from the global list + // of who is interested in this FD. + if interest_list.range(range_for_id(id)).next().is_none() { + this.machine.epoll_interests.remove(id, epfd.addr()); + } // Remove related epoll_interest from ready list. epfd.ready_list.borrow_mut().remove(&epoll_key); - // Remove dangling EpollEventInterest from its global table. - // .unwrap() below should succeed because the file description id must have registered - // at least one epoll_interest, if not, it will fail when removing epoll_interest from - // interest list. - this.machine - .epoll_interests - .get_epoll_interest_mut(id) - .unwrap() - .retain(|event| event.upgrade().is_some()); - interp_ok(Scalar::from_i32(0)) } else { throw_unsup_format!("unsupported epoll_ctl operation: {op}"); @@ -523,30 +527,43 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { /// do not call this function when an FD didn't have anything happen to it! fn epoll_send_fd_ready_events(&mut self, fd_ref: DynFileDescriptionRef) -> InterpResult<'tcx> { let this = self.eval_context_mut(); - let ready_events_bitmask = - fd_ref.as_unix(this).get_epoll_ready_events()?.get_event_bitmask(this); let id = fd_ref.id(); // Figure out who is interested in this. We need to clone this list since we can't prove - // that `send_ready_events_to_interests` won't mutate it. - let interests = this.machine.epoll_interests.get_epoll_interest(id).unwrap_or(&[]); - let interests = interests.iter().filter_map(|weak| weak.upgrade()).collect::>(); - send_ready_events_to_interests(this, ready_events_bitmask, id, interests.into_iter()) + // that `send_ready_events_to_interest` won't mutate it. + let Some(epolls) = this.machine.epoll_interests.get_epolls(id) else { + return interp_ok(()); + }; + let epolls = epolls + .iter() + .map(|weak| { + weak.upgrade() + .expect("someone forgot to remove the garbage from `machine.epoll_interests`") + }) + .collect::>(); + let event_bitmask = fd_ref.as_unix(this).get_epoll_ready_events()?.get_event_bitmask(this); + for epoll in epolls { + send_ready_events_to_interests( + this, + &epoll, + event_bitmask, + epoll.interest_list.borrow().range(range_for_id(id)), + )?; + } + + interp_ok(()) } } -/// Send the latest ready events for one particular FD (identified by `event_fd_id`) to everyone in +/// Send the latest ready events for one particular FD (identified by `event_key`) to everyone in /// the `interests` list, if they are interested in this kind of event. -fn send_ready_events_to_interests<'tcx>( +fn send_ready_events_to_interests<'tcx, 'a>( ecx: &mut MiriInterpCx<'tcx>, + epoll: &Epoll, event_bitmask: u32, - event_fd_id: FdId, - interests: impl Iterator>>, + interests: impl Iterator, ) -> InterpResult<'tcx> { - #[allow(clippy::mutable_key_type)] // yeah, we know - let mut triggered_epolls = BTreeSet::new(); - for interest in interests { - let interest = interest.borrow(); - let epfd = interest.weak_epfd.upgrade().unwrap(); + let mut wakeup = false; + for (&event_key, interest) in interests { // This checks if any of the events specified in epoll_event_interest.events // match those in ready_events. let flags = interest.events & event_bitmask; @@ -560,19 +577,15 @@ fn send_ready_events_to_interests<'tcx>( })?; // Add event to ready list for this epoll instance. // Tests confirm that we have to *overwrite* the old instance for the same key. - let mut ready_list = epfd.ready_list.borrow_mut(); - ready_list.insert((event_fd_id, interest.fd_num), new_instance); - drop(ready_list); - // Remember to wake up this epoll later. - // (We might encounter the same epoll multiple times if there are multiple interests for - // different file descriptors that references the same file description.) - triggered_epolls.insert(epfd); + let mut ready_list = epoll.ready_list.borrow_mut(); + ready_list.insert(event_key, new_instance); + wakeup = true; } - - // For each epoll instance where an interest triggered, wake up one thread. - for epoll in triggered_epolls { + if wakeup { + // Wake up threads that may have been waiting for events on this epoll. + // Do this only once for all the interests. // Edge-triggered notification only notify one thread even if there are - // multiple threads blocked on the same epfd. + // multiple threads blocked on the same epoll. if let Some(thread_id) = epoll.blocked_tid.borrow_mut().pop() { ecx.unblock_thread(thread_id, BlockReason::Epoll)?; } @@ -581,24 +594,6 @@ fn send_ready_events_to_interests<'tcx>( interp_ok(()) } -/// This function takes in ready list and returns EpollEventInstance with file description -/// that is not closed. -fn ready_list_next( - ecx: &MiriInterpCx<'_>, - ready_list: &mut BTreeMap<(FdId, i32), EpollEventInstance>, -) -> Option { - while let Some((epoll_key, epoll_event_instance)) = ready_list.pop_first() { - // This ensures that we only return events that we are interested. The FD might have been closed since - // the event was generated, in which case we are not interested anymore. - // When a file description is fully closed, it gets removed from `machine.epoll_interests`, - // so we skip events whose FD is not in that map anymore. - if ecx.machine.epoll_interests.get_epoll_interest(epoll_key.0).is_some() { - return Some(epoll_event_instance); - } - } - None -} - /// Stores the ready list of the `epfd` epoll instance into `events` (which must be an array), /// and the number of returned events into `dest`. fn return_ready_list<'tcx>( @@ -612,7 +607,7 @@ fn return_ready_list<'tcx>( let mut array_iter = ecx.project_array_fields(events)?; while let Some(des) = array_iter.next(ecx)? { - if let Some(epoll_event_instance) = ready_list_next(ecx, &mut ready_list) { + if let Some((_, epoll_event_instance)) = ready_list.pop_first() { ecx.write_int_fields_named( &[ ("events", epoll_event_instance.events.into()), diff --git a/src/tools/miri/src/shims/unix/linux_like/eventfd.rs b/src/tools/miri/src/shims/unix/linux_like/eventfd.rs index ea68698df2cdc..ce51f257935be 100644 --- a/src/tools/miri/src/shims/unix/linux_like/eventfd.rs +++ b/src/tools/miri/src/shims/unix/linux_like/eventfd.rs @@ -37,8 +37,9 @@ impl FileDescription for EventFd { "event" } - fn close<'tcx>( + fn destroy<'tcx>( self, + _self_addr: usize, _communicate_allowed: bool, _ecx: &mut MiriInterpCx<'tcx>, ) -> InterpResult<'tcx, io::Result<()>> { diff --git a/src/tools/miri/src/shims/unix/unnamed_socket.rs b/src/tools/miri/src/shims/unix/unnamed_socket.rs index d0a5ba911b3f6..cfd453df2679b 100644 --- a/src/tools/miri/src/shims/unix/unnamed_socket.rs +++ b/src/tools/miri/src/shims/unix/unnamed_socket.rs @@ -82,8 +82,9 @@ impl FileDescription for AnonSocket { } } - fn close<'tcx>( + fn destroy<'tcx>( self, + _self_addr: usize, _communicate_allowed: bool, ecx: &mut MiriInterpCx<'tcx>, ) -> InterpResult<'tcx, io::Result<()>> { diff --git a/src/tools/miri/src/shims/windows/fs.rs b/src/tools/miri/src/shims/windows/fs.rs index e4ec1b0130c9d..936bbf06ebf80 100644 --- a/src/tools/miri/src/shims/windows/fs.rs +++ b/src/tools/miri/src/shims/windows/fs.rs @@ -24,8 +24,9 @@ impl FileDescription for DirHandle { interp_ok(self.path.metadata()) } - fn close<'tcx>( + fn destroy<'tcx>( self, + _self_addr: usize, _communicate_allowed: bool, _ecx: &mut MiriInterpCx<'tcx>, ) -> InterpResult<'tcx, io::Result<()>> { @@ -50,8 +51,9 @@ impl FileDescription for MetadataHandle { interp_ok(Ok(self.meta.clone())) } - fn close<'tcx>( + fn destroy<'tcx>( self, + _self_addr: usize, _communicate_allowed: bool, _ecx: &mut MiriInterpCx<'tcx>, ) -> InterpResult<'tcx, io::Result<()>> { diff --git a/src/tools/miri/tests/deps/Cargo.lock b/src/tools/miri/tests/deps/Cargo.lock index 187411588e802..65ca4215c6001 100644 --- a/src/tools/miri/tests/deps/Cargo.lock +++ b/src/tools/miri/tests/deps/Cargo.lock @@ -72,95 +72,6 @@ version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" -[[package]] -name = "futures" -version = "0.3.31" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "65bc07b1a8bc7c85c5f2e110c476c7389b4554ba72af57d8445ea63a576b0876" -dependencies = [ - "futures-channel", - "futures-core", - "futures-executor", - "futures-io", - "futures-sink", - "futures-task", - "futures-util", -] - -[[package]] -name = "futures-channel" -version = "0.3.31" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2dff15bf788c671c1934e366d07e30c1814a8ef514e1af724a602e8a2fbe1b10" -dependencies = [ - "futures-core", - "futures-sink", -] - -[[package]] -name = "futures-core" -version = "0.3.31" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e" - -[[package]] -name = "futures-executor" -version = "0.3.31" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e28d1d997f585e54aebc3f97d39e72338912123a67330d723fdbb564d646c9f" -dependencies = [ - "futures-core", - "futures-task", - "futures-util", -] - -[[package]] -name = "futures-io" -version = "0.3.31" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6" - -[[package]] -name = "futures-macro" -version = "0.3.31" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - -[[package]] -name = "futures-sink" -version = "0.3.31" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e575fab7d1e0dcb8d0c7bcf9a63ee213816ab51902e6d244a95819acacf1d4f7" - -[[package]] -name = "futures-task" -version = "0.3.31" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988" - -[[package]] -name = "futures-util" -version = "0.3.31" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81" -dependencies = [ - "futures-channel", - "futures-core", - "futures-io", - "futures-macro", - "futures-sink", - "futures-task", - "memchr", - "pin-project-lite", - "pin-utils", - "slab", -] - [[package]] name = "getrandom" version = "0.1.16" @@ -279,7 +190,6 @@ name = "miri-test-deps" version = "0.1.0" dependencies = [ "cfg-if", - "futures", "getrandom 0.1.16", "getrandom 0.2.16", "getrandom 0.3.3", @@ -332,12 +242,6 @@ version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b" -[[package]] -name = "pin-utils" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" - [[package]] name = "proc-macro2" version = "1.0.95" diff --git a/src/tools/miri/tests/deps/Cargo.toml b/src/tools/miri/tests/deps/Cargo.toml index fe1586280b5ed..d85723f0915fb 100644 --- a/src/tools/miri/tests/deps/Cargo.toml +++ b/src/tools/miri/tests/deps/Cargo.toml @@ -23,7 +23,6 @@ page_size = "0.6" # Avoid pulling in all of tokio's dependencies. # However, without `net` and `signal`, tokio uses fewer relevant system APIs. tokio = { version = "1", features = ["macros", "rt-multi-thread", "time", "net", "fs", "sync", "signal", "io-util"] } -futures = { version = "0.3.0", features = ["async-await"] } [target.'cfg(windows)'.dependencies] windows-sys = { version = "0.60", features = [ diff --git a/src/tools/miri/tests/fail-dep/libc/libc-epoll-data-race.rs b/src/tools/miri/tests/fail-dep/libc/libc-epoll-data-race.rs index bca886d375995..3b1217cda126a 100644 --- a/src/tools/miri/tests/fail-dep/libc/libc-epoll-data-race.rs +++ b/src/tools/miri/tests/fail-dep/libc/libc-epoll-data-race.rs @@ -28,13 +28,10 @@ fn check_epoll_wait(epfd: i32, expected_notifications: &[(u32, u expected_notifications.len().try_into().unwrap(), "got wrong number of notifications" ); - let slice = unsafe { std::slice::from_raw_parts(array_ptr, res.try_into().unwrap()) }; - for (return_event, expected_event) in slice.iter().zip(expected_notifications.iter()) { - let event = return_event.events; - let data = return_event.u64; - assert_eq!(event, expected_event.0, "got wrong events bitmask"); - assert_eq!(data, expected_event.1, "got wrong data"); - } + let got_notifications = + unsafe { std::slice::from_raw_parts(array_ptr, res.try_into().unwrap()) }; + let got_notifications = got_notifications.iter().map(|e| (e.events, e.u64)).collect::>(); + assert_eq!(got_notifications, expected_notifications, "got wrong notifications"); } fn main() { diff --git a/src/tools/miri/tests/pass-dep/libc/libc-epoll-blocking.rs b/src/tools/miri/tests/pass-dep/libc/libc-epoll-blocking.rs index 6609dd6e25501..81d4e501c43a6 100644 --- a/src/tools/miri/tests/pass-dep/libc/libc-epoll-blocking.rs +++ b/src/tools/miri/tests/pass-dep/libc/libc-epoll-blocking.rs @@ -36,18 +36,10 @@ fn check_epoll_wait( if res < 0 { panic!("epoll_wait failed: {}", std::io::Error::last_os_error()); } - assert_eq!( - res, - expected_notifications.len().try_into().unwrap(), - "got wrong number of notifications" - ); - let slice = unsafe { std::slice::from_raw_parts(array_ptr, res.try_into().unwrap()) }; - for (return_event, expected_event) in slice.iter().zip(expected_notifications.iter()) { - let event = return_event.events; - let data = return_event.u64; - assert_eq!(event, expected_event.0, "got wrong events"); - assert_eq!(data, expected_event.1, "got wrong data"); - } + let got_notifications = + unsafe { std::slice::from_raw_parts(array_ptr, res.try_into().unwrap()) }; + let got_notifications = got_notifications.iter().map(|e| (e.events, e.u64)).collect::>(); + assert_eq!(got_notifications, expected_notifications, "got wrong notifications"); } // This test allows epoll_wait to block, then unblock without notification. diff --git a/src/tools/miri/tests/pass-dep/libc/libc-epoll-no-blocking.rs b/src/tools/miri/tests/pass-dep/libc/libc-epoll-no-blocking.rs index 01dfbbfaaee4f..c39192766d2a5 100644 --- a/src/tools/miri/tests/pass-dep/libc/libc-epoll-no-blocking.rs +++ b/src/tools/miri/tests/pass-dep/libc/libc-epoll-no-blocking.rs @@ -41,18 +41,10 @@ fn check_epoll_wait(epfd: i32, expected_notifications: &[(u32, u if res < 0 { panic!("epoll_wait failed: {}", std::io::Error::last_os_error()); } - assert_eq!( - res, - expected_notifications.len().try_into().unwrap(), - "got wrong number of notifications" - ); - let slice = unsafe { std::slice::from_raw_parts(array_ptr, res.try_into().unwrap()) }; - for (return_event, expected_event) in slice.iter().zip(expected_notifications.iter()) { - let event = return_event.events; - let data = return_event.u64; - assert_eq!(event, expected_event.0, "got wrong events"); - assert_eq!(data, expected_event.1, "got wrong data"); - } + let got_notifications = + unsafe { std::slice::from_raw_parts(array_ptr, res.try_into().unwrap()) }; + let got_notifications = got_notifications.iter().map(|e| (e.events, e.u64)).collect::>(); + assert_eq!(got_notifications, expected_notifications, "got wrong notifications"); } fn test_epoll_socketpair() { @@ -70,16 +62,16 @@ fn test_epoll_socketpair() { let res = unsafe { libc_utils::write_all(fds[0], data as *const libc::c_void, 5) }; assert_eq!(res, 5); - // Register fd[1] with EPOLLOUT|EPOLLET|EPOLLRDHUP but NOT EPOLLIN + // Register fd[1] with EPOLLIN|EPOLLOUT|EPOLLET|EPOLLRDHUP let mut ev = libc::epoll_event { - events: (libc::EPOLLOUT | libc::EPOLLET | libc::EPOLLRDHUP) as _, + events: (libc::EPOLLIN | libc::EPOLLOUT | libc::EPOLLET | libc::EPOLLRDHUP) as _, u64: u64::try_from(fds[1]).unwrap(), }; let res = unsafe { libc::epoll_ctl(epfd, libc::EPOLL_CTL_ADD, fds[1], &mut ev) }; assert_eq!(res, 0); - // Check result from epoll_wait. EPOLLIN should be masked away. - let expected_event = u32::try_from(libc::EPOLLOUT).unwrap(); + // Check result from epoll_wait. + let expected_event = u32::try_from(libc::EPOLLIN | libc::EPOLLOUT).unwrap(); let expected_value = u64::try_from(fds[1]).unwrap(); check_epoll_wait::<8>(epfd, &[(expected_event, expected_value)]); @@ -101,7 +93,8 @@ fn test_epoll_socketpair() { // Check result from epoll_wait. // We expect to get a read, write, HUP notification from the close since closing an FD always unblocks reads and writes on its peer. - let expected_event = u32::try_from(libc::EPOLLRDHUP | libc::EPOLLOUT | libc::EPOLLHUP).unwrap(); + let expected_event = + u32::try_from(libc::EPOLLIN | libc::EPOLLOUT | libc::EPOLLHUP | libc::EPOLLRDHUP).unwrap(); let expected_value = u64::try_from(fds[1]).unwrap(); check_epoll_wait::<8>(epfd, &[(expected_event, expected_value)]); } @@ -229,7 +222,7 @@ fn test_two_same_fd_in_same_epoll_instance() { let res = unsafe { libc_utils::write_all(fds[0], data as *const libc::c_void, 5) }; assert_eq!(res, 5); - //Two notification should be received. + // Two notification should be received. let expected_event = u32::try_from(libc::EPOLLIN | libc::EPOLLOUT).unwrap(); let expected_value = 5 as u64; check_epoll_wait::<8>( @@ -287,7 +280,7 @@ fn test_epoll_socketpair_both_sides() { let res = unsafe { libc_utils::write_all(fds[1], data as *const libc::c_void, 5) }; assert_eq!(res, 5); - //Two notification should be received. + // Two notification should be received. let expected_event0 = u32::try_from(libc::EPOLLIN | libc::EPOLLOUT).unwrap(); let expected_value0 = fds[0] as u64; let expected_event1 = u32::try_from(libc::EPOLLOUT).unwrap(); @@ -447,12 +440,13 @@ fn test_socketpair_read() { let res = unsafe { libc::epoll_ctl(epfd, libc::EPOLL_CTL_ADD, fds[1], &mut ev) }; assert_eq!(res, 0); - // Write 5 bytes to fds[1]. - let data = "abcde".as_bytes().as_ptr(); - let res = unsafe { libc_utils::write_all(fds[1], data as *const libc::c_void, 5) }; - assert_eq!(res, 5); + // Write a bunch of data bytes to fds[1]. + let data = [42u8; 40000]; + let res = + unsafe { libc_utils::write_all(fds[1], data.as_ptr() as *const libc::c_void, data.len()) }; + assert_eq!(res, data.len() as isize); - //Two notification should be received. + // Two notification should be received. let expected_event0 = u32::try_from(libc::EPOLLIN | libc::EPOLLOUT).unwrap(); let expected_value0 = fds[0] as u64; let expected_event1 = u32::try_from(libc::EPOLLOUT).unwrap(); @@ -462,36 +456,26 @@ fn test_socketpair_read() { &[(expected_event0, expected_value0), (expected_event1, expected_value1)], ); - // Read 3 bytes from fds[0]. - let mut buf: [u8; 3] = [0; 3]; + // Read a lof of databytes from fds[0]. + // If we make this read too small, Linux won't actually trigger a notification. + // So to ensure the test works on real systems, we make a sizable read. + let mut buf = [0; 38000]; let res = unsafe { libc_utils::read_all(fds[0], buf.as_mut_ptr().cast(), buf.len() as libc::size_t) }; - assert_eq!(res, 3); - assert_eq!(buf, "abc".as_bytes()); + assert_eq!(res, buf.len() as isize); - // Notification will be provided in Miri. - // But in real systems, no notification will be provided here, since Linux prefers to avoid - // wakeups that are likely to lead to only small amounts of data being read/written. - // We make the test work in both cases, thus documenting the difference in behavior. + // fds[1] can be written now. let expected_event = u32::try_from(libc::EPOLLOUT).unwrap(); let expected_value = fds[1] as u64; - if cfg!(miri) { - check_epoll_wait::<8>(epfd, &[(expected_event, expected_value)]); - } else { - check_epoll_wait::<8>(epfd, &[]); - } + check_epoll_wait::<8>(epfd, &[(expected_event, expected_value)]); // Read until the buffer is empty. - let mut buf: [u8; 2] = [0; 2]; + let rest = data.len() - buf.len(); let res = - unsafe { libc_utils::read_all(fds[0], buf.as_mut_ptr().cast(), buf.len() as libc::size_t) }; - assert_eq!(res, 2); - assert_eq!(buf, "de".as_bytes()); + unsafe { libc_utils::read_all(fds[0], buf.as_mut_ptr().cast(), rest as libc::size_t) }; + assert_eq!(res, rest as isize); - // Notification will be provided. - // In real system, notification will be provided too. - let expected_event = u32::try_from(libc::EPOLLOUT).unwrap(); - let expected_value = fds[1] as u64; + // Again we get a notification that fds[1] can be written. check_epoll_wait::<8>(epfd, &[(expected_event, expected_value)]); } @@ -511,7 +495,7 @@ fn test_no_notification_for_unregister_flag() { events: (libc::EPOLLOUT | libc::EPOLLET) as _, u64: u64::try_from(fds[0]).unwrap(), }; - let res = unsafe { libc::epoll_ctl(epfd, libc::EPOLL_CTL_ADD, fds[1], &mut ev) }; + let res = unsafe { libc::epoll_ctl(epfd, libc::EPOLL_CTL_ADD, fds[0], &mut ev) }; assert_eq!(res, 0); // Write to fd[1]. @@ -598,7 +582,7 @@ fn test_epoll_lost_events() { let res = unsafe { libc::epoll_ctl(epfd, libc::EPOLL_CTL_ADD, fds[1], &mut ev) }; assert_eq!(res, 0); - //Two notification should be received. But we only provide buffer for one event. + // Two notification should be received. But we only provide buffer for one event. let expected_event0 = u32::try_from(libc::EPOLLOUT).unwrap(); let expected_value0 = fds[0] as u64; check_epoll_wait::<1>(epfd, &[(expected_event0, expected_value0)]); @@ -640,7 +624,8 @@ fn test_ready_list_fetching_logic() { check_epoll_wait::<1>(epfd, &[(expected_event1, expected_value1)]); } -// In epoll_ctl, if the value of epfd equals to fd, EINVAL should be returned. +// In epoll_ctl, if the value of epfd equals to fd, EFAULT should be returned. +// (The docs say loops cause EINVAL, but experiments show it is EFAULT.) fn test_epoll_ctl_epfd_equal_fd() { // Create an epoll instance. let epfd = unsafe { libc::epoll_create1(0) }; @@ -649,7 +634,7 @@ fn test_epoll_ctl_epfd_equal_fd() { let array_ptr = std::ptr::without_provenance_mut::(0x100); let res = unsafe { libc::epoll_ctl(epfd, libc::EPOLL_CTL_ADD, epfd, array_ptr) }; let e = std::io::Error::last_os_error(); - assert_eq!(e.raw_os_error(), Some(libc::EINVAL)); + assert_eq!(e.raw_os_error(), Some(libc::EFAULT)); assert_eq!(res, -1); } From 047ee0af61aa64a2a510c87663cbe989b35f5226 Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Mon, 10 Nov 2025 01:08:11 +0100 Subject: [PATCH 03/13] epoll: when an event disappears entirely, remove it from the ready list --- src/tools/miri/src/shims/unix/linux_like/epoll.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/tools/miri/src/shims/unix/linux_like/epoll.rs b/src/tools/miri/src/shims/unix/linux_like/epoll.rs index d832d1078240e..48585567571c6 100644 --- a/src/tools/miri/src/shims/unix/linux_like/epoll.rs +++ b/src/tools/miri/src/shims/unix/linux_like/epoll.rs @@ -564,10 +564,14 @@ fn send_ready_events_to_interests<'tcx, 'a>( ) -> InterpResult<'tcx> { let mut wakeup = false; for (&event_key, interest) in interests { + let mut ready_list = epoll.ready_list.borrow_mut(); // This checks if any of the events specified in epoll_event_interest.events // match those in ready_events. let flags = interest.events & event_bitmask; if flags == 0 { + // Make sure we *remove* any previous item from the ready list, since this + // is not ready any more. + ready_list.remove(&event_key); continue; } // Geenrate a new event instance, with the flags that this one is interested in. @@ -577,7 +581,6 @@ fn send_ready_events_to_interests<'tcx, 'a>( })?; // Add event to ready list for this epoll instance. // Tests confirm that we have to *overwrite* the old instance for the same key. - let mut ready_list = epoll.ready_list.borrow_mut(); ready_list.insert(event_key, new_instance); wakeup = true; } From a8baf33dd5fc9660997c51cc863a9241eef6776a Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Mon, 10 Nov 2025 01:16:52 +0100 Subject: [PATCH 04/13] epoll: do proper edge detection inside the epoll system --- .../miri/src/shims/unix/linux_like/epoll.rs | 75 ++++++++----- .../miri/src/shims/unix/linux_like/eventfd.rs | 4 +- .../miri/src/shims/unix/unnamed_socket.rs | 22 ++-- .../pass-dep/libc/libc-epoll-no-blocking.rs | 102 +++++++++++++++--- .../tests/pass-dep/libc/libc-socketpair.rs | 6 +- 5 files changed, 157 insertions(+), 52 deletions(-) diff --git a/src/tools/miri/src/shims/unix/linux_like/epoll.rs b/src/tools/miri/src/shims/unix/linux_like/epoll.rs index 48585567571c6..573517c3d9410 100644 --- a/src/tools/miri/src/shims/unix/linux_like/epoll.rs +++ b/src/tools/miri/src/shims/unix/linux_like/epoll.rs @@ -1,5 +1,5 @@ use std::cell::RefCell; -use std::collections::BTreeMap; +use std::collections::{BTreeMap, btree_map}; use std::io; use std::time::Duration; @@ -55,8 +55,8 @@ pub struct EpollEventInstance { } impl EpollEventInstance { - pub fn new(events: u32, data: u64) -> EpollEventInstance { - EpollEventInstance { events, data, clock: Default::default() } + pub fn new(data: u64) -> EpollEventInstance { + EpollEventInstance { events: 0, data, clock: Default::default() } } } @@ -69,10 +69,12 @@ impl EpollEventInstance { /// see the man page: /// /// -#[derive(Debug, Copy, Clone)] +#[derive(Debug)] pub struct EpollEventInterest { /// The events bitmask retrieved from `epoll_event`. events: u32, + /// The way the events looked last time we checked (for edge trigger / ET detection). + prev_events: u32, /// The data retrieved from `epoll_event`. /// libc's data field in epoll_event can store integer or pointer, /// but only u64 is supported for now. @@ -345,23 +347,28 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { // Add new interest to list. let epoll_key = (id, fd); - let new_interest = EpollEventInterest { events, data }; - if op == epoll_ctl_add { + let new_interest = if op == epoll_ctl_add { if interest_list.range(range_for_id(id)).next().is_none() { // This is the first time this FD got added to this epoll. // Remember that in the global list so we get notified about FD events. this.machine.epoll_interests.insert(id, FileDescriptionRef::downgrade(&epfd)); } - if interest_list.insert(epoll_key, new_interest).is_some() { - // We already had interest in this. - return this.set_last_error_and_return_i32(LibcError("EEXIST")); + match interest_list.entry(epoll_key) { + btree_map::Entry::Occupied(_) => { + // We already had interest in this. + return this.set_last_error_and_return_i32(LibcError("EEXIST")); + } + btree_map::Entry::Vacant(e) => + e.insert(EpollEventInterest { events, data, prev_events: 0 }), } } else { // Modify the existing interest. let Some(interest) = interest_list.get_mut(&epoll_key) else { return this.set_last_error_and_return_i32(LibcError("ENOENT")); }; - *interest = new_interest; + interest.events = events; + // FIXME what about the data? + interest }; // Deliver events for the new interest. @@ -369,7 +376,8 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { this, &epfd, fd_ref.as_unix(this).get_epoll_ready_events()?.get_event_bitmask(this), - std::iter::once((&epoll_key, &new_interest)), + /* force_edge */ false, + std::iter::once((&epoll_key, new_interest)), )?; interp_ok(Scalar::from_i32(0)) @@ -387,7 +395,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { this.machine.epoll_interests.remove(id, epfd.addr()); } - // Remove related epoll_interest from ready list. + // Remove related event instance from ready list. epfd.ready_list.borrow_mut().remove(&epoll_key); interp_ok(Scalar::from_i32(0)) @@ -519,13 +527,16 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { } /// For a specific file description, get its ready events and send it to everyone who registered - /// interest in this FD. This function should be called whenever an event causes more bytes or - /// an EOF to become newly readable from an FD, and whenever more bytes can be written to an FD - /// or no more future writes are possible. + /// interest in this FD. This function should be called whenever the result of + /// `get_epoll_ready_events` would change. /// - /// This *will* report an event if anyone is subscribed to it, without any further filtering, so - /// do not call this function when an FD didn't have anything happen to it! - fn epoll_send_fd_ready_events(&mut self, fd_ref: DynFileDescriptionRef) -> InterpResult<'tcx> { + /// If `force_edge` is set, edge-triggered interests will be triggered even if the set of + /// ready events did not change. This can lead to spurious wakeups. Use with caution! + fn epoll_send_fd_ready_events( + &mut self, + fd_ref: DynFileDescriptionRef, + force_edge: bool, + ) -> InterpResult<'tcx> { let this = self.eval_context_mut(); let id = fd_ref.id(); // Figure out who is interested in this. We need to clone this list since we can't prove @@ -546,7 +557,8 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { this, &epoll, event_bitmask, - epoll.interest_list.borrow().range(range_for_id(id)), + force_edge, + epoll.interest_list.borrow_mut().range_mut(range_for_id(id)), )?; } @@ -560,7 +572,8 @@ fn send_ready_events_to_interests<'tcx, 'a>( ecx: &mut MiriInterpCx<'tcx>, epoll: &Epoll, event_bitmask: u32, - interests: impl Iterator, + force_edge: bool, + interests: impl Iterator, ) -> InterpResult<'tcx> { let mut wakeup = false; for (&event_key, interest) in interests { @@ -568,20 +581,30 @@ fn send_ready_events_to_interests<'tcx, 'a>( // This checks if any of the events specified in epoll_event_interest.events // match those in ready_events. let flags = interest.events & event_bitmask; + let prev = std::mem::replace(&mut interest.prev_events, flags); if flags == 0 { // Make sure we *remove* any previous item from the ready list, since this // is not ready any more. ready_list.remove(&event_key); continue; } - // Geenrate a new event instance, with the flags that this one is interested in. - let mut new_instance = EpollEventInstance::new(flags, interest.data); + // Generate new instance, or update existing one. + let instance = match ready_list.entry(event_key) { + btree_map::Entry::Occupied(e) => e.into_mut(), + btree_map::Entry::Vacant(e) => { + if !force_edge && flags == prev & flags { + // Every bit in `flags` was already set in `prev`, and there's currently + // no entry in the ready list for this. So there is nothing new and no + // prior entry to update; just skip it. + continue; + } + e.insert(EpollEventInstance::new(interest.data)) + } + }; + instance.events = flags; ecx.release_clock(|clock| { - new_instance.clock.clone_from(clock); + instance.clock.join(clock); })?; - // Add event to ready list for this epoll instance. - // Tests confirm that we have to *overwrite* the old instance for the same key. - ready_list.insert(event_key, new_instance); wakeup = true; } if wakeup { diff --git a/src/tools/miri/src/shims/unix/linux_like/eventfd.rs b/src/tools/miri/src/shims/unix/linux_like/eventfd.rs index ce51f257935be..b42648ed412e6 100644 --- a/src/tools/miri/src/shims/unix/linux_like/eventfd.rs +++ b/src/tools/miri/src/shims/unix/linux_like/eventfd.rs @@ -217,7 +217,7 @@ fn eventfd_write<'tcx>( // The state changed; we check and update the status of all supported event // types for current file description. - ecx.epoll_send_fd_ready_events(eventfd)?; + ecx.epoll_send_fd_ready_events(eventfd, /* force_edge */ false)?; // Return how many bytes we consumed from the user-provided buffer. return finish.call(ecx, Ok(buf_place.layout.size.bytes_usize())); @@ -312,7 +312,7 @@ fn eventfd_read<'tcx>( // The state changed; we check and update the status of all supported event // types for current file description. - ecx.epoll_send_fd_ready_events(eventfd)?; + ecx.epoll_send_fd_ready_events(eventfd, /* force_edge */ false)?; // Tell userspace how many bytes we put into the buffer. return finish.call(ecx, Ok(buf_place.layout.size.bytes_usize())); diff --git a/src/tools/miri/src/shims/unix/unnamed_socket.rs b/src/tools/miri/src/shims/unix/unnamed_socket.rs index cfd453df2679b..6918a91f92e08 100644 --- a/src/tools/miri/src/shims/unix/unnamed_socket.rs +++ b/src/tools/miri/src/shims/unix/unnamed_socket.rs @@ -18,7 +18,7 @@ use crate::*; /// The maximum capacity of the socketpair buffer in bytes. /// This number is arbitrary as the value can always /// be configured in the real system. -const MAX_SOCKETPAIR_BUFFER_CAPACITY: usize = 212992; +const MAX_SOCKETPAIR_BUFFER_CAPACITY: usize = 0x34000; #[derive(Debug, PartialEq)] enum AnonSocketType { @@ -97,7 +97,7 @@ impl FileDescription for AnonSocket { } } // Notify peer fd that close has happened, since that can unblock reads and writes. - ecx.epoll_send_fd_ready_events(peer_fd)?; + ecx.epoll_send_fd_ready_events(peer_fd, /* force_edge */ false)?; } interp_ok(Ok(())) } @@ -275,9 +275,11 @@ fn anonsocket_write<'tcx>( for thread_id in waiting_threads { ecx.unblock_thread(thread_id, BlockReason::UnnamedSocket)?; } - // Notification should be provided for peer fd as it became readable. - // The kernel does this even if the fd was already readable before, so we follow suit. - ecx.epoll_send_fd_ready_events(peer_fd)?; + // Notify epoll waiters: we might be no longer writable, peer might now be readable. + // The notification to the peer seems to be always sent on Linux, even if the + // FD was readable before. + ecx.epoll_send_fd_ready_events(self_ref, /* force_edge */ false)?; + ecx.epoll_send_fd_ready_events(peer_fd, /* force_edge */ true)?; return finish.call(ecx, Ok(write_size)); } @@ -351,6 +353,7 @@ fn anonsocket_read<'tcx>( // Do full read / partial read based on the space available. // Conveniently, `read` exists on `VecDeque` and has exactly the desired behavior. let read_size = ecx.read_from_host(&mut readbuf.buf, len, ptr)?.unwrap(); + let readbuf_now_empty = readbuf.buf.is_empty(); // Need to drop before others can access the readbuf again. drop(readbuf); @@ -369,9 +372,14 @@ fn anonsocket_read<'tcx>( for thread_id in waiting_threads { ecx.unblock_thread(thread_id, BlockReason::UnnamedSocket)?; } - // Notify epoll waiters. - ecx.epoll_send_fd_ready_events(peer_fd)?; + // Notify epoll waiters: peer is now writable. + // Linux seems to always notify the peer if the read buffer is now empty. + // (Linux also does that if this was a "big" read, but to avoid some arbitrary + // threshold, we do not match that.) + ecx.epoll_send_fd_ready_events(peer_fd, /* force_edge */ readbuf_now_empty)?; }; + // Notify epoll waiters: we might be no longer readable. + ecx.epoll_send_fd_ready_events(self_ref, /* force_edge */ false)?; return finish.call(ecx, Ok(read_size)); } diff --git a/src/tools/miri/tests/pass-dep/libc/libc-epoll-no-blocking.rs b/src/tools/miri/tests/pass-dep/libc/libc-epoll-no-blocking.rs index c39192766d2a5..33fce1f3fde80 100644 --- a/src/tools/miri/tests/pass-dep/libc/libc-epoll-no-blocking.rs +++ b/src/tools/miri/tests/pass-dep/libc/libc-epoll-no-blocking.rs @@ -26,6 +26,8 @@ fn main() { test_epoll_ctl_epfd_equal_fd(); test_epoll_ctl_notification(); test_issue_3858(); + test_issue_4374(); + test_issue_4374_reads(); } // Using `as` cast since `EPOLLET` wraps around @@ -56,6 +58,7 @@ fn test_epoll_socketpair() { let mut fds = [-1, -1]; let res = unsafe { libc::socketpair(libc::AF_UNIX, libc::SOCK_STREAM, 0, fds.as_mut_ptr()) }; assert_eq!(res, 0); + let fds = [fds[1], fds[0]]; // Write to fd[0] let data = "abcde".as_bytes().as_ptr(); @@ -83,8 +86,9 @@ fn test_epoll_socketpair() { let res = unsafe { libc_utils::write_all(fds[0], data as *const libc::c_void, 5) }; assert_eq!(res, 5); - // This did not change the readiness of fd[1]. And yet, we're seeing the event reported - // again by the kernel, so Miri does the same. + // This did not change the readiness of fd[1], so we should get no event. + // However, Linux seems to always deliver spurious events to the peer on each write, + // so we match that. check_epoll_wait::<8>(epfd, &[(expected_event, expected_value)]); // Close the peer socketpair. @@ -276,6 +280,8 @@ fn test_epoll_socketpair_both_sides() { assert_eq!(res, 0); // Write to fds[1]. + // (We do the write after the register here, unlike in `test_epoll_socketpair`, to ensure + // we cover both orders in which this could be done.) let data = "abcde".as_bytes().as_ptr(); let res = unsafe { libc_utils::write_all(fds[1], data as *const libc::c_void, 5) }; assert_eq!(res, 5); @@ -297,10 +303,9 @@ fn test_epoll_socketpair_both_sides() { assert_eq!(res, 5); assert_eq!(buf, "abcde".as_bytes()); - // Notification should be provided for fds[1]. - let expected_event = u32::try_from(libc::EPOLLOUT).unwrap(); - let expected_value = fds[1] as u64; - check_epoll_wait::<8>(epfd, &[(expected_event, expected_value)]); + // The state of fds[1] does not change (was writable, is writable). + // However, we force a spurious wakeup as the read buffer just got emptied. + check_epoll_wait::<8>(epfd, &[(expected_event1, expected_value1)]); } // When file description is fully closed, epoll_wait should not provide any notification for @@ -441,7 +446,7 @@ fn test_socketpair_read() { assert_eq!(res, 0); // Write a bunch of data bytes to fds[1]. - let data = [42u8; 40000]; + let data = [42u8; 1024]; let res = unsafe { libc_utils::write_all(fds[1], data.as_ptr() as *const libc::c_void, data.len()) }; assert_eq!(res, data.len() as isize); @@ -456,18 +461,16 @@ fn test_socketpair_read() { &[(expected_event0, expected_value0), (expected_event1, expected_value1)], ); - // Read a lof of databytes from fds[0]. - // If we make this read too small, Linux won't actually trigger a notification. - // So to ensure the test works on real systems, we make a sizable read. - let mut buf = [0; 38000]; + // Read some of the data from fds[0]. + let mut buf = [0; 512]; let res = unsafe { libc_utils::read_all(fds[0], buf.as_mut_ptr().cast(), buf.len() as libc::size_t) }; assert_eq!(res, buf.len() as isize); - // fds[1] can be written now. + // fds[1] did not change, it is still writable, so we get no event. let expected_event = u32::try_from(libc::EPOLLOUT).unwrap(); let expected_value = fds[1] as u64; - check_epoll_wait::<8>(epfd, &[(expected_event, expected_value)]); + check_epoll_wait::<8>(epfd, &[]); // Read until the buffer is empty. let rest = data.len() - buf.len(); @@ -475,7 +478,9 @@ fn test_socketpair_read() { unsafe { libc_utils::read_all(fds[0], buf.as_mut_ptr().cast(), rest as libc::size_t) }; assert_eq!(res, rest as isize); - // Again we get a notification that fds[1] can be written. + // Now we get a notification that fds[1] can be written. This is spurious since it + // could already be written before, but Linux seems to always emit a notification for + // the writer when a read empties the buffer. check_epoll_wait::<8>(epfd, &[(expected_event, expected_value)]); } @@ -712,3 +717,72 @@ fn test_issue_3858() { let res = unsafe { libc_utils::write_all(fd, sized_8_data.as_ptr() as *const libc::c_void, 8) }; assert_eq!(res, 8); } + +/// Ensure that if a socket becomes un-writable, we don't see it any more. +fn test_issue_4374() { + // Create an epoll instance. + let epfd0 = unsafe { libc::epoll_create1(0) }; + assert_ne!(epfd0, -1); + + // Create a socketpair instance, make it non-blocking. + let mut fds = [-1, -1]; + let res = unsafe { libc::socketpair(libc::AF_UNIX, libc::SOCK_STREAM, 0, fds.as_mut_ptr()) }; + assert_eq!(res, 0); + assert_eq!(unsafe { libc::fcntl(fds[0], libc::F_SETFL, libc::O_NONBLOCK) }, 0); + assert_eq!(unsafe { libc::fcntl(fds[1], libc::F_SETFL, libc::O_NONBLOCK) }, 0); + + // Register fds[0] with epoll while it is writable (but not readable). + let mut ev = libc::epoll_event { events: EPOLL_IN_OUT_ET, u64: fds[0] as u64 }; + let res = unsafe { libc::epoll_ctl(epfd0, libc::EPOLL_CTL_ADD, fds[0], &mut ev) }; + assert_eq!(res, 0); + + // Fill up fds[0] so that it is not writable any more. + let zeros = [0u8; 512]; + loop { + let res = unsafe { + libc_utils::write_all(fds[0], zeros.as_ptr() as *const libc::c_void, zeros.len()) + }; + if res < 0 { + break; + } + } + + // This should have canceled the previous readiness, so now we get nothing. + check_epoll_wait::<1>(epfd0, &[]); +} + +/// Same as above, but for becoming un-readable. +fn test_issue_4374_reads() { + // Create an epoll instance. + let epfd0 = unsafe { libc::epoll_create1(0) }; + assert_ne!(epfd0, -1); + + // Create a socketpair instance, make it non-blocking. + let mut fds = [-1, -1]; + let res = unsafe { libc::socketpair(libc::AF_UNIX, libc::SOCK_STREAM, 0, fds.as_mut_ptr()) }; + assert_eq!(res, 0); + assert_eq!(unsafe { libc::fcntl(fds[0], libc::F_SETFL, libc::O_NONBLOCK) }, 0); + assert_eq!(unsafe { libc::fcntl(fds[1], libc::F_SETFL, libc::O_NONBLOCK) }, 0); + + // Write to fds[1] so that fds[0] becomes readable. + let data = "abcde".as_bytes().as_ptr(); + let res: i32 = unsafe { + libc_utils::write_all(fds[1], data as *const libc::c_void, 5).try_into().unwrap() + }; + assert_eq!(res, 5); + + // Register fds[0] with epoll while it is readable. + let mut ev = libc::epoll_event { events: EPOLL_IN_OUT_ET, u64: fds[0] as u64 }; + let res = unsafe { libc::epoll_ctl(epfd0, libc::EPOLL_CTL_ADD, fds[0], &mut ev) }; + assert_eq!(res, 0); + + // Read fds[0] so it is no longer readable. + let mut buf = [0u8; 512]; + let res = unsafe { libc_utils::read_all(fds[0], buf.as_mut_ptr() as *mut libc::c_void, 5) }; + assert_eq!(res, 5); + + // We should now still see a notification, but only about it being writable. + let expected_event = u32::try_from(libc::EPOLLOUT).unwrap(); + let expected_value = fds[0] as u64; + check_epoll_wait::<1>(epfd0, &[(expected_event, expected_value)]); +} diff --git a/src/tools/miri/tests/pass-dep/libc/libc-socketpair.rs b/src/tools/miri/tests/pass-dep/libc/libc-socketpair.rs index 9c211ffbdbe4a..ce3927ce48ca7 100644 --- a/src/tools/miri/tests/pass-dep/libc/libc-socketpair.rs +++ b/src/tools/miri/tests/pass-dep/libc/libc-socketpair.rs @@ -187,11 +187,11 @@ fn test_blocking_write() { let mut fds = [-1, -1]; let res = unsafe { libc::socketpair(libc::AF_UNIX, libc::SOCK_STREAM, 0, fds.as_mut_ptr()) }; assert_eq!(res, 0); - let arr1: [u8; 212992] = [1; 212992]; + let arr1: [u8; 0x34000] = [1; 0x34000]; // Exhaust the space in the buffer so the subsequent write will block. let res = - unsafe { libc_utils::write_all(fds[0], arr1.as_ptr() as *const libc::c_void, 212992) }; - assert_eq!(res, 212992); + unsafe { libc_utils::write_all(fds[0], arr1.as_ptr() as *const libc::c_void, arr1.len()) }; + assert_eq!(res, 0x34000); let thread1 = thread::spawn(move || { let data = "abc".as_bytes().as_ptr(); // The write below will be blocked because the buffer is already full. From 6c6aa0f26bc6be5d4e5a3f8208ac3883cd2680c0 Mon Sep 17 00:00:00 2001 From: The Miri Cronjob Bot Date: Mon, 10 Nov 2025 04:54:57 +0000 Subject: [PATCH 05/13] Prepare for merging from rust-lang/rust This updates the rust-version file to 8401398e1f14a24670ee1a3203713dc2f0f8b3a8. --- src/tools/miri/rust-version | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tools/miri/rust-version b/src/tools/miri/rust-version index 1ce491a50eae4..04d41c96f5c08 100644 --- a/src/tools/miri/rust-version +++ b/src/tools/miri/rust-version @@ -1 +1 @@ -ceb7df7e6f17c92c7d49f7e4f02df0e68bc9b38b +8401398e1f14a24670ee1a3203713dc2f0f8b3a8 From e8aa88fb1be525d9f5a8420fa9cb45a994897401 Mon Sep 17 00:00:00 2001 From: The Miri Cronjob Bot Date: Mon, 10 Nov 2025 05:03:25 +0000 Subject: [PATCH 06/13] fmt --- src/tools/miri/src/machine.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/tools/miri/src/machine.rs b/src/tools/miri/src/machine.rs index 304ae00b01a92..2235b3c020ac7 100644 --- a/src/tools/miri/src/machine.rs +++ b/src/tools/miri/src/machine.rs @@ -1353,7 +1353,10 @@ impl<'tcx> Machine<'tcx> for MiriMachine<'tcx> { } #[inline(always)] - fn runtime_checks(ecx: &InterpCx<'tcx, Self>, r: mir::RuntimeChecks) -> InterpResult<'tcx, bool> { + fn runtime_checks( + ecx: &InterpCx<'tcx, Self>, + r: mir::RuntimeChecks, + ) -> InterpResult<'tcx, bool> { interp_ok(r.value(&ecx.tcx.sess)) } From 3c569193ddd4a17e5fcd6a33130a8013b3c7d036 Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Mon, 10 Nov 2025 08:19:32 +0100 Subject: [PATCH 07/13] clippy --- src/tools/miri/src/machine.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tools/miri/src/machine.rs b/src/tools/miri/src/machine.rs index 2235b3c020ac7..498058a279f83 100644 --- a/src/tools/miri/src/machine.rs +++ b/src/tools/miri/src/machine.rs @@ -1357,7 +1357,7 @@ impl<'tcx> Machine<'tcx> for MiriMachine<'tcx> { ecx: &InterpCx<'tcx, Self>, r: mir::RuntimeChecks, ) -> InterpResult<'tcx, bool> { - interp_ok(r.value(&ecx.tcx.sess)) + interp_ok(r.value(ecx.tcx.sess)) } #[inline(always)] From 4412a0aa70bbde59b7426eb1eecf63ccfc5a928a Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Mon, 10 Nov 2025 01:03:33 +0100 Subject: [PATCH 08/13] eventfd: force spurious wakeups, matching Linux --- .../miri/src/shims/unix/linux_like/eventfd.rs | 8 ++++-- .../pass-dep/libc/libc-epoll-no-blocking.rs | 28 +++++++++++++++++++ 2 files changed, 34 insertions(+), 2 deletions(-) diff --git a/src/tools/miri/src/shims/unix/linux_like/eventfd.rs b/src/tools/miri/src/shims/unix/linux_like/eventfd.rs index b42648ed412e6..31ce7af3930e1 100644 --- a/src/tools/miri/src/shims/unix/linux_like/eventfd.rs +++ b/src/tools/miri/src/shims/unix/linux_like/eventfd.rs @@ -217,7 +217,10 @@ fn eventfd_write<'tcx>( // The state changed; we check and update the status of all supported event // types for current file description. - ecx.epoll_send_fd_ready_events(eventfd, /* force_edge */ false)?; + // Linux seems to cause spurious wakeups here, and Tokio seems to rely on that + // (see + // and also ). + ecx.epoll_send_fd_ready_events(eventfd, /* force_edge */ true)?; // Return how many bytes we consumed from the user-provided buffer. return finish.call(ecx, Ok(buf_place.layout.size.bytes_usize())); @@ -312,7 +315,8 @@ fn eventfd_read<'tcx>( // The state changed; we check and update the status of all supported event // types for current file description. - ecx.epoll_send_fd_ready_events(eventfd, /* force_edge */ false)?; + // Linux seems to always emit do notifications here, even if we were already writable. + ecx.epoll_send_fd_ready_events(eventfd, /* force_edge */ true)?; // Tell userspace how many bytes we put into the buffer. return finish.call(ecx, Ok(buf_place.layout.size.bytes_usize())); diff --git a/src/tools/miri/tests/pass-dep/libc/libc-epoll-no-blocking.rs b/src/tools/miri/tests/pass-dep/libc/libc-epoll-no-blocking.rs index 33fce1f3fde80..4599423e6e6bb 100644 --- a/src/tools/miri/tests/pass-dep/libc/libc-epoll-no-blocking.rs +++ b/src/tools/miri/tests/pass-dep/libc/libc-epoll-no-blocking.rs @@ -258,6 +258,34 @@ fn test_epoll_eventfd() { let expected_event = u32::try_from(libc::EPOLLIN | libc::EPOLLOUT).unwrap(); let expected_value = u64::try_from(fd).unwrap(); check_epoll_wait::<8>(epfd, &[(expected_event, expected_value)]); + + // Write to the eventfd again. + let res = unsafe { libc_utils::write_all(fd, sized_8_data.as_ptr() as *const libc::c_void, 8) }; + assert_eq!(res, 8); + + // This does not change the status, so we should get no event. + // However, Linux performs a spurious wakeup. + check_epoll_wait::<8>(epfd, &[(expected_event, expected_value)]); + + // Read from the eventfd. + let mut buf = [0u8; 8]; + let res = unsafe { libc_utils::read_all(fd, buf.as_mut_ptr().cast(), 8) }; + assert_eq!(res, 8); + + // This consumes the event, so the read status is gone. However, deactivation + // does not trigger an event. + // Still, we see a spurious wakeup. + let expected_event = u32::try_from(libc::EPOLLOUT).unwrap(); + check_epoll_wait::<8>(epfd, &[(expected_event, expected_value)]); + + // Write the maximum possible value. + let sized_8_data: [u8; 8] = (u64::MAX - 1).to_ne_bytes(); + let res = unsafe { libc_utils::write_all(fd, sized_8_data.as_ptr() as *const libc::c_void, 8) }; + assert_eq!(res, 8); + + // This reactivates reads, therefore triggering an event. Writing is no longer possible. + let expected_event = u32::try_from(libc::EPOLLIN).unwrap(); + check_epoll_wait::<8>(epfd, &[(expected_event, expected_value)]); } // When read/write happened on one side of the socketpair, only the other side will be notified. From 7b4802d34d4e8fd6db6f6127fef939fe56504a85 Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Mon, 10 Nov 2025 08:32:56 +0100 Subject: [PATCH 09/13] fix EPOLL_CTL_MOD --- .../miri/src/shims/unix/linux_like/epoll.rs | 27 +++++------ .../pass-dep/libc/libc-epoll-no-blocking.rs | 46 ++++++++++++++----- 2 files changed, 46 insertions(+), 27 deletions(-) diff --git a/src/tools/miri/src/shims/unix/linux_like/epoll.rs b/src/tools/miri/src/shims/unix/linux_like/epoll.rs index 573517c3d9410..b5fc7eeb95316 100644 --- a/src/tools/miri/src/shims/unix/linux_like/epoll.rs +++ b/src/tools/miri/src/shims/unix/linux_like/epoll.rs @@ -44,7 +44,7 @@ fn range_for_id(id: FdId) -> std::ops::RangeInclusive { } /// EpollEventInstance contains information that will be returned by epoll_wait. -#[derive(Debug)] +#[derive(Debug, Default)] pub struct EpollEventInstance { /// Bitmask of event types that happened to the file description. events: u32, @@ -54,12 +54,6 @@ pub struct EpollEventInstance { clock: VClock, } -impl EpollEventInstance { - pub fn new(data: u64) -> EpollEventInstance { - EpollEventInstance { events: 0, data, clock: Default::default() } - } -} - /// EpollEventInterest registers the file description information to an epoll /// instance during a successful `epoll_ctl` call. It also stores additional /// information needed to check and update readiness state for `epoll_wait`. @@ -345,8 +339,10 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { ); } - // Add new interest to list. + // Add new interest to list. Experiments show that we need to reset all state + // on `EPOLL_CTL_MOD`, including the edge tracking. let epoll_key = (id, fd); + let new_interest = EpollEventInterest { events, data, prev_events: 0 }; let new_interest = if op == epoll_ctl_add { if interest_list.range(range_for_id(id)).next().is_none() { // This is the first time this FD got added to this epoll. @@ -358,25 +354,24 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { // We already had interest in this. return this.set_last_error_and_return_i32(LibcError("EEXIST")); } - btree_map::Entry::Vacant(e) => - e.insert(EpollEventInterest { events, data, prev_events: 0 }), + btree_map::Entry::Vacant(e) => e.insert(new_interest), } } else { // Modify the existing interest. let Some(interest) = interest_list.get_mut(&epoll_key) else { return this.set_last_error_and_return_i32(LibcError("ENOENT")); }; - interest.events = events; - // FIXME what about the data? + *interest = new_interest; interest }; // Deliver events for the new interest. + let force_edge = true; // makes no difference since we reset `prev_events` send_ready_events_to_interests( this, &epfd, fd_ref.as_unix(this).get_epoll_ready_events()?.get_event_bitmask(this), - /* force_edge */ false, + force_edge, std::iter::once((&epoll_key, new_interest)), )?; @@ -588,7 +583,8 @@ fn send_ready_events_to_interests<'tcx, 'a>( ready_list.remove(&event_key); continue; } - // Generate new instance, or update existing one. + // Generate new instance, or update existing one. It is crucial that whe we are done, + // if an interest exists in the ready list, then it matches the latest events and data! let instance = match ready_list.entry(event_key) { btree_map::Entry::Occupied(e) => e.into_mut(), btree_map::Entry::Vacant(e) => { @@ -598,10 +594,11 @@ fn send_ready_events_to_interests<'tcx, 'a>( // prior entry to update; just skip it. continue; } - e.insert(EpollEventInstance::new(interest.data)) + e.insert(EpollEventInstance::default()) } }; instance.events = flags; + instance.data = interest.data; ecx.release_clock(|clock| { instance.clock.join(clock); })?; diff --git a/src/tools/miri/tests/pass-dep/libc/libc-epoll-no-blocking.rs b/src/tools/miri/tests/pass-dep/libc/libc-epoll-no-blocking.rs index 4599423e6e6bb..569675a5e3c4d 100644 --- a/src/tools/miri/tests/pass-dep/libc/libc-epoll-no-blocking.rs +++ b/src/tools/miri/tests/pass-dep/libc/libc-epoll-no-blocking.rs @@ -105,6 +105,7 @@ fn test_epoll_socketpair() { // This test first registers a file description with a flag that does not lead to notification, // then EPOLL_CTL_MOD to add another flag that will lead to notification. +// Also check that the new data value set via MOD is applied properly. fn test_epoll_ctl_mod() { // Create an epoll instance. let epfd = unsafe { libc::epoll_create1(0) }; @@ -115,28 +116,49 @@ fn test_epoll_ctl_mod() { let res = unsafe { libc::socketpair(libc::AF_UNIX, libc::SOCK_STREAM, 0, fds.as_mut_ptr()) }; assert_eq!(res, 0); - // Register fd[1] with EPOLLIN|EPOLLET. - let mut ev = libc::epoll_event { - events: (libc::EPOLLIN | libc::EPOLLET) as _, - u64: u64::try_from(fds[1]).unwrap(), - }; + // Register fd[1] with EPOLLIN|EPOLLET, and data of "0". + let mut ev = libc::epoll_event { events: (libc::EPOLLIN | libc::EPOLLET) as _, u64: 0 }; let res = unsafe { libc::epoll_ctl(epfd, libc::EPOLL_CTL_ADD, fds[1], &mut ev) }; assert_eq!(res, 0); // Check result from epoll_wait. No notification would be returned. check_epoll_wait::<8>(epfd, &[]); - // Use EPOLL_CTL_MOD to change to EPOLLOUT flag. - let mut ev = libc::epoll_event { - events: (libc::EPOLLOUT | libc::EPOLLET) as _, - u64: u64::try_from(fds[1]).unwrap(), - }; + // Use EPOLL_CTL_MOD to change to EPOLLOUT flag and data. + let mut ev = libc::epoll_event { events: (libc::EPOLLOUT | libc::EPOLLET) as _, u64: 1 }; let res = unsafe { libc::epoll_ctl(epfd, libc::EPOLL_CTL_MOD, fds[1], &mut ev) }; assert_eq!(res, 0); - // Check result from epoll_wait. EPOLLOUT notification is expected. + // Check result from epoll_wait. EPOLLOUT notification and new data is expected. let expected_event = u32::try_from(libc::EPOLLOUT).unwrap(); - let expected_value = u64::try_from(fds[1]).unwrap(); + let expected_value = 1; + check_epoll_wait::<8>(epfd, &[(expected_event, expected_value)]); + + // Write to fds[1] and read from fds[0] to make the notification ready again + // (relying on there always being an event when the buffer gets emptied). + let data = "abc".as_bytes(); + let res = unsafe { libc_utils::write_all(fds[1], data.as_ptr().cast(), data.len()) }; + assert_eq!(res, 3); + let mut buf = [0u8; 3]; + let res = unsafe { libc_utils::read_all(fds[0], buf.as_mut_ptr().cast(), buf.len()) }; + assert_eq!(res, 3); + + // Now that the event is already ready, change the "data" value. + let mut ev = libc::epoll_event { events: (libc::EPOLLOUT | libc::EPOLLET) as _, u64: 2 }; + let res = unsafe { libc::epoll_ctl(epfd, libc::EPOLL_CTL_MOD, fds[1], &mut ev) }; + assert_eq!(res, 0); + + // Receive event, with latest data value. + let expected_event = u32::try_from(libc::EPOLLOUT).unwrap(); + let expected_value = 2; + check_epoll_wait::<8>(epfd, &[(expected_event, expected_value)]); + + // Do another update that changes nothing. + let mut ev = libc::epoll_event { events: (libc::EPOLLOUT | libc::EPOLLET) as _, u64: 2 }; + let res = unsafe { libc::epoll_ctl(epfd, libc::EPOLL_CTL_MOD, fds[1], &mut ev) }; + assert_eq!(res, 0); + + // This re-triggers the event, even if it's the same flags as before. check_epoll_wait::<8>(epfd, &[(expected_event, expected_value)]); } From fc4748ca6ae539ba1e474225d03414c7db53e2b0 Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Mon, 10 Nov 2025 08:58:27 +0100 Subject: [PATCH 10/13] make EpollInterestTable sorted to avoid linear scan --- src/tools/miri/src/shims/unix/linux_like/epoll.rs | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/tools/miri/src/shims/unix/linux_like/epoll.rs b/src/tools/miri/src/shims/unix/linux_like/epoll.rs index b5fc7eeb95316..8d22806fe6867 100644 --- a/src/tools/miri/src/shims/unix/linux_like/epoll.rs +++ b/src/tools/miri/src/shims/unix/linux_like/epoll.rs @@ -164,7 +164,7 @@ impl UnixFileDescription for Epoll {} /// The table of all EpollEventInterest. /// This tracks, for each file description, which epoll instances have an interest in events -/// for this file description. +/// for this file description. The `Vec` is sorted by `addr`! pub struct EpollInterestTable(BTreeMap>>); impl EpollInterestTable { @@ -174,15 +174,16 @@ impl EpollInterestTable { fn insert(&mut self, id: FdId, epoll: WeakFileDescriptionRef) { let epolls = self.0.entry(id).or_default(); - epolls.push(epoll); + let idx = epolls + .binary_search_by_key(&epoll.addr(), |e| e.addr()) + .expect_err("trying to add an epoll that's already in the list"); + epolls.insert(idx, epoll); } fn remove(&mut self, id: FdId, epoll_addr: usize) { let epolls = self.0.entry(id).or_default(); - // FIXME: linear scan. Keep the list sorted so we can do binary search? let idx = epolls - .iter() - .position(|old_ref| old_ref.addr() == epoll_addr) + .binary_search_by_key(&epoll_addr, |e| e.addr()) .expect("trying to remove an epoll that's not in the list"); epolls.remove(idx); } From fe6bc020fe9bbe0e8041b4e84da6f6c59f6f7509 Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Mon, 10 Nov 2025 11:39:01 +0100 Subject: [PATCH 11/13] avoid using the address of an FD; now that we sort by that it may be user-visible --- src/tools/miri/src/shims/files.rs | 18 ++--------- .../miri/src/shims/unix/linux_like/epoll.rs | 30 +++++++++---------- .../miri/src/shims/unix/linux_like/eventfd.rs | 4 +-- .../miri/src/shims/unix/unnamed_socket.rs | 4 +-- src/tools/miri/src/shims/windows/fs.rs | 6 ++-- 5 files changed, 25 insertions(+), 37 deletions(-) diff --git a/src/tools/miri/src/shims/files.rs b/src/tools/miri/src/shims/files.rs index ea19c2d5016e0..f86933029341e 100644 --- a/src/tools/miri/src/shims/files.rs +++ b/src/tools/miri/src/shims/files.rs @@ -48,12 +48,6 @@ impl FileDescriptionRef { pub fn id(&self) -> FdId { self.0.id } - - /// Returns the raw address of this file description. Useful for equality comparisons. - /// Use `id` instead if this can affect user-visible behavior! - pub fn addr(&self) -> usize { - Rc::as_ptr(&self.0).addr() - } } /// Holds a weak reference to the actual file description. @@ -76,11 +70,6 @@ impl WeakFileDescriptionRef { pub fn upgrade(&self) -> Option> { self.0.upgrade().map(FileDescriptionRef) } - - /// Returns the raw address of this file description. Useful for equality comparisons. - pub fn addr(&self) -> usize { - self.0.as_ptr().addr() - } } impl VisitProvenance for WeakFileDescriptionRef { @@ -116,13 +105,12 @@ impl FileDescriptionExt for T { communicate_allowed: bool, ecx: &mut MiriInterpCx<'tcx>, ) -> InterpResult<'tcx, io::Result<()>> { - let addr = self.addr(); match Rc::into_inner(self.0) { Some(fd) => { // There might have been epolls interested in this FD. Remove that. ecx.machine.epoll_interests.remove_epolls(fd.id); - fd.inner.destroy(addr, communicate_allowed, ecx) + fd.inner.destroy(fd.id, communicate_allowed, ecx) } None => { // Not the last reference. @@ -200,7 +188,7 @@ pub trait FileDescription: std::fmt::Debug + FileDescriptionExt { /// `self_addr` is the address that this file description used to be stored at. fn destroy<'tcx>( self, - _self_addr: usize, + _self_id: FdId, _communicate_allowed: bool, _ecx: &mut MiriInterpCx<'tcx>, ) -> InterpResult<'tcx, io::Result<()>> @@ -379,7 +367,7 @@ impl FileDescription for FileHandle { fn destroy<'tcx>( self, - _self_addr: usize, + _self_id: FdId, communicate_allowed: bool, _ecx: &mut MiriInterpCx<'tcx>, ) -> InterpResult<'tcx, io::Result<()>> { diff --git a/src/tools/miri/src/shims/unix/linux_like/epoll.rs b/src/tools/miri/src/shims/unix/linux_like/epoll.rs index 8d22806fe6867..865cdb2533cfd 100644 --- a/src/tools/miri/src/shims/unix/linux_like/epoll.rs +++ b/src/tools/miri/src/shims/unix/linux_like/epoll.rs @@ -142,7 +142,7 @@ impl FileDescription for Epoll { fn destroy<'tcx>( mut self, - self_addr: usize, + self_id: FdId, _communicate_allowed: bool, ecx: &mut MiriInterpCx<'tcx>, ) -> InterpResult<'tcx, io::Result<()>> { @@ -150,7 +150,7 @@ impl FileDescription for Epoll { let mut ids = self.interest_list.get_mut().keys().map(|(id, _num)| *id).collect::>(); ids.dedup(); // they come out of the map sorted for id in ids { - ecx.machine.epoll_interests.remove(id, self_addr); + ecx.machine.epoll_interests.remove(id, self_id); } interp_ok(Ok(())) } @@ -164,37 +164,38 @@ impl UnixFileDescription for Epoll {} /// The table of all EpollEventInterest. /// This tracks, for each file description, which epoll instances have an interest in events -/// for this file description. The `Vec` is sorted by `addr`! -pub struct EpollInterestTable(BTreeMap>>); +/// for this file description. The `FdId` is the ID of the epoll instance, so that we can recognize +/// it later when it is slated for removal. The vector is sorted by that ID. +pub struct EpollInterestTable(BTreeMap)>>); impl EpollInterestTable { pub(crate) fn new() -> Self { EpollInterestTable(BTreeMap::new()) } - fn insert(&mut self, id: FdId, epoll: WeakFileDescriptionRef) { + fn insert(&mut self, id: FdId, epoll: &FileDescriptionRef) { let epolls = self.0.entry(id).or_default(); let idx = epolls - .binary_search_by_key(&epoll.addr(), |e| e.addr()) + .binary_search_by_key(&epoll.id(), |&(id, _)| id) .expect_err("trying to add an epoll that's already in the list"); - epolls.insert(idx, epoll); + epolls.insert(idx, (epoll.id(), FileDescriptionRef::downgrade(epoll))); } - fn remove(&mut self, id: FdId, epoll_addr: usize) { + fn remove(&mut self, id: FdId, epoll_id: FdId) { let epolls = self.0.entry(id).or_default(); let idx = epolls - .binary_search_by_key(&epoll_addr, |e| e.addr()) + .binary_search_by_key(&epoll_id, |&(id, _)| id) .expect("trying to remove an epoll that's not in the list"); epolls.remove(idx); } - fn get_epolls(&self, id: FdId) -> Option<&Vec>> { - self.0.get(&id) + fn get_epolls(&self, id: FdId) -> Option>> { + self.0.get(&id).map(|epolls| epolls.iter().map(|(_id, epoll)| epoll)) } pub fn remove_epolls(&mut self, id: FdId) { if let Some(epolls) = self.0.remove(&id) { - for epoll in epolls.iter().filter_map(|e| e.upgrade()) { + for epoll in epolls.iter().filter_map(|(_id, epoll)| epoll.upgrade()) { // This is a still-live epoll with interest in this FD. Remove all // relevent interests. epoll @@ -348,7 +349,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { if interest_list.range(range_for_id(id)).next().is_none() { // This is the first time this FD got added to this epoll. // Remember that in the global list so we get notified about FD events. - this.machine.epoll_interests.insert(id, FileDescriptionRef::downgrade(&epfd)); + this.machine.epoll_interests.insert(id, &epfd); } match interest_list.entry(epoll_key) { btree_map::Entry::Occupied(_) => { @@ -388,7 +389,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { // If this was the last interest in this FD, remove us from the global list // of who is interested in this FD. if interest_list.range(range_for_id(id)).next().is_none() { - this.machine.epoll_interests.remove(id, epfd.addr()); + this.machine.epoll_interests.remove(id, epfd.id()); } // Remove related event instance from ready list. @@ -541,7 +542,6 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { return interp_ok(()); }; let epolls = epolls - .iter() .map(|weak| { weak.upgrade() .expect("someone forgot to remove the garbage from `machine.epoll_interests`") diff --git a/src/tools/miri/src/shims/unix/linux_like/eventfd.rs b/src/tools/miri/src/shims/unix/linux_like/eventfd.rs index 31ce7af3930e1..1b5f1db43953b 100644 --- a/src/tools/miri/src/shims/unix/linux_like/eventfd.rs +++ b/src/tools/miri/src/shims/unix/linux_like/eventfd.rs @@ -4,7 +4,7 @@ use std::io; use std::io::ErrorKind; use crate::concurrency::VClock; -use crate::shims::files::{FileDescription, FileDescriptionRef, WeakFileDescriptionRef}; +use crate::shims::files::{FdId, FileDescription, FileDescriptionRef, WeakFileDescriptionRef}; use crate::shims::unix::UnixFileDescription; use crate::shims::unix::linux_like::epoll::{EpollReadyEvents, EvalContextExt as _}; use crate::*; @@ -39,7 +39,7 @@ impl FileDescription for EventFd { fn destroy<'tcx>( self, - _self_addr: usize, + _self_id: FdId, _communicate_allowed: bool, _ecx: &mut MiriInterpCx<'tcx>, ) -> InterpResult<'tcx, io::Result<()>> { diff --git a/src/tools/miri/src/shims/unix/unnamed_socket.rs b/src/tools/miri/src/shims/unix/unnamed_socket.rs index 6918a91f92e08..3e2949759377e 100644 --- a/src/tools/miri/src/shims/unix/unnamed_socket.rs +++ b/src/tools/miri/src/shims/unix/unnamed_socket.rs @@ -9,7 +9,7 @@ use std::io::ErrorKind; use crate::concurrency::VClock; use crate::shims::files::{ - EvalContextExt as _, FileDescription, FileDescriptionRef, WeakFileDescriptionRef, + EvalContextExt as _, FdId, FileDescription, FileDescriptionRef, WeakFileDescriptionRef, }; use crate::shims::unix::UnixFileDescription; use crate::shims::unix::linux_like::epoll::{EpollReadyEvents, EvalContextExt as _}; @@ -84,7 +84,7 @@ impl FileDescription for AnonSocket { fn destroy<'tcx>( self, - _self_addr: usize, + _self_id: FdId, _communicate_allowed: bool, ecx: &mut MiriInterpCx<'tcx>, ) -> InterpResult<'tcx, io::Result<()>> { diff --git a/src/tools/miri/src/shims/windows/fs.rs b/src/tools/miri/src/shims/windows/fs.rs index 936bbf06ebf80..7192d8aec8510 100644 --- a/src/tools/miri/src/shims/windows/fs.rs +++ b/src/tools/miri/src/shims/windows/fs.rs @@ -6,7 +6,7 @@ use std::time::SystemTime; use bitflags::bitflags; -use crate::shims::files::{FileDescription, FileHandle}; +use crate::shims::files::{FdId, FileDescription, FileHandle}; use crate::shims::windows::handle::{EvalContextExt as _, Handle}; use crate::*; @@ -26,7 +26,7 @@ impl FileDescription for DirHandle { fn destroy<'tcx>( self, - _self_addr: usize, + _self_id: FdId, _communicate_allowed: bool, _ecx: &mut MiriInterpCx<'tcx>, ) -> InterpResult<'tcx, io::Result<()>> { @@ -53,7 +53,7 @@ impl FileDescription for MetadataHandle { fn destroy<'tcx>( self, - _self_addr: usize, + _self_id: FdId, _communicate_allowed: bool, _ecx: &mut MiriInterpCx<'tcx>, ) -> InterpResult<'tcx, io::Result<()>> { From b340f0830feab51b639f9c6c43ddfa7fa8df6a3b Mon Sep 17 00:00:00 2001 From: Ralf Jung Date: Mon, 10 Nov 2025 11:42:02 +0100 Subject: [PATCH 12/13] add the tokio poll_fns test that triggered this investigation --- src/tools/miri/tests/deps/Cargo.lock | 80 +++++++++ src/tools/miri/tests/deps/Cargo.toml | 1 + .../miri/tests/pass-dep/tokio/poll_fns.rs | 161 ++++++++++++++++++ 3 files changed, 242 insertions(+) create mode 100644 src/tools/miri/tests/pass-dep/tokio/poll_fns.rs diff --git a/src/tools/miri/tests/deps/Cargo.lock b/src/tools/miri/tests/deps/Cargo.lock index 65ca4215c6001..2549396251672 100644 --- a/src/tools/miri/tests/deps/Cargo.lock +++ b/src/tools/miri/tests/deps/Cargo.lock @@ -72,6 +72,79 @@ version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" +[[package]] +name = "futures" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "65bc07b1a8bc7c85c5f2e110c476c7389b4554ba72af57d8445ea63a576b0876" +dependencies = [ + "futures-channel", + "futures-core", + "futures-io", + "futures-sink", + "futures-task", + "futures-util", +] + +[[package]] +name = "futures-channel" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2dff15bf788c671c1934e366d07e30c1814a8ef514e1af724a602e8a2fbe1b10" +dependencies = [ + "futures-core", + "futures-sink", +] + +[[package]] +name = "futures-core" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e" + +[[package]] +name = "futures-io" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6" + +[[package]] +name = "futures-macro" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "futures-sink" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e575fab7d1e0dcb8d0c7bcf9a63ee213816ab51902e6d244a95819acacf1d4f7" + +[[package]] +name = "futures-task" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988" + +[[package]] +name = "futures-util" +version = "0.3.31" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81" +dependencies = [ + "futures-core", + "futures-macro", + "futures-sink", + "futures-task", + "pin-project-lite", + "pin-utils", +] + [[package]] name = "getrandom" version = "0.1.16" @@ -190,6 +263,7 @@ name = "miri-test-deps" version = "0.1.0" dependencies = [ "cfg-if", + "futures", "getrandom 0.1.16", "getrandom 0.2.16", "getrandom 0.3.3", @@ -242,6 +316,12 @@ version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b" +[[package]] +name = "pin-utils" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" + [[package]] name = "proc-macro2" version = "1.0.95" diff --git a/src/tools/miri/tests/deps/Cargo.toml b/src/tools/miri/tests/deps/Cargo.toml index d85723f0915fb..fd301fc5cf321 100644 --- a/src/tools/miri/tests/deps/Cargo.toml +++ b/src/tools/miri/tests/deps/Cargo.toml @@ -23,6 +23,7 @@ page_size = "0.6" # Avoid pulling in all of tokio's dependencies. # However, without `net` and `signal`, tokio uses fewer relevant system APIs. tokio = { version = "1", features = ["macros", "rt-multi-thread", "time", "net", "fs", "sync", "signal", "io-util"] } +futures = { version = "0.3.0", default-features = false, features = ["alloc", "async-await"] } [target.'cfg(windows)'.dependencies] windows-sys = { version = "0.60", features = [ diff --git a/src/tools/miri/tests/pass-dep/tokio/poll_fns.rs b/src/tools/miri/tests/pass-dep/tokio/poll_fns.rs new file mode 100644 index 0000000000000..738c80b51ad4e --- /dev/null +++ b/src/tools/miri/tests/pass-dep/tokio/poll_fns.rs @@ -0,0 +1,161 @@ +//! This is a stand-alone version of the `poll_fns` test in Tokio. It hits various +//! interesting edge cases in the epoll logic, making it a good integration test. +//! It also seems to depend on Tokio internals, so if Tokio changes we have have to update +//! or remove the test. + +//@only-target: linux # We only support tokio on Linux + +use std::fs::File; +use std::io::{ErrorKind, Read, Write}; +use std::os::fd::FromRawFd; +use std::sync::Arc; +use std::sync::atomic::{AtomicBool, Ordering}; +use std::task::{Context, Waker}; +use std::time::Duration; + +use futures::poll; +use tokio::io::unix::AsyncFd; + +macro_rules! assert_pending { + ($e:expr) => {{ + use core::task::Poll; + match $e { + Poll::Pending => {} + Poll::Ready(v) => panic!("ready; value = {:?}", v), + } + }}; +} + +struct TestWaker { + inner: Arc, + waker: Waker, +} + +#[derive(Default)] +struct TestWakerInner { + awoken: AtomicBool, +} + +impl futures::task::ArcWake for TestWakerInner { + fn wake_by_ref(arc_self: &Arc) { + arc_self.awoken.store(true, Ordering::SeqCst); + } +} + +impl TestWaker { + fn new() -> Self { + let inner: Arc = Default::default(); + + Self { inner: inner.clone(), waker: futures::task::waker(inner) } + } + + fn awoken(&self) -> bool { + self.inner.awoken.swap(false, Ordering::SeqCst) + } + + fn context(&self) -> Context<'_> { + Context::from_waker(&self.waker) + } +} + +fn socketpair() -> (File, File) { + let mut fds = [-1, -1]; + let res = unsafe { libc::socketpair(libc::AF_UNIX, libc::SOCK_STREAM, 0, fds.as_mut_ptr()) }; + assert_eq!(res, 0); + + assert_eq!(unsafe { libc::fcntl(fds[0], libc::F_SETFL, libc::O_NONBLOCK) }, 0); + assert_eq!(unsafe { libc::fcntl(fds[1], libc::F_SETFL, libc::O_NONBLOCK) }, 0); + + unsafe { (File::from_raw_fd(fds[0]), File::from_raw_fd(fds[1])) } +} + +fn drain(mut fd: &File, mut amt: usize) { + let mut buf = [0u8; 512]; + while amt > 0 { + match fd.read(&mut buf[..]) { + Err(e) if e.kind() == ErrorKind::WouldBlock => {} + Ok(0) => panic!("unexpected EOF"), + Err(e) => panic!("unexpected error: {e:?}"), + Ok(x) => amt -= x, + } + } +} + +fn main() { + tokio::runtime::Builder::new_current_thread() + .enable_all() + .build() + .unwrap() + .block_on(the_test()); +} + +async fn the_test() { + let (a, b) = socketpair(); + let afd_a = Arc::new(AsyncFd::new(a).unwrap()); + let afd_b = Arc::new(AsyncFd::new(b).unwrap()); + + // Fill up the write side of A + let mut bytes = 0; + while let Ok(amt) = afd_a.get_ref().write(&[0; 512]) { + bytes += amt; + } + + let waker = TestWaker::new(); + + assert_pending!(afd_a.as_ref().poll_read_ready(&mut waker.context())); + + let afd_a_2 = afd_a.clone(); + let r_barrier = Arc::new(tokio::sync::Barrier::new(2)); + let barrier_clone = r_barrier.clone(); + + let read_fut = tokio::spawn(async move { + // Move waker onto this task first + assert_pending!(poll!(std::future::poll_fn(|cx| afd_a_2.as_ref().poll_read_ready(cx)))); + barrier_clone.wait().await; + + let _ = std::future::poll_fn(|cx| afd_a_2.as_ref().poll_read_ready(cx)).await; + }); + + let afd_a_2 = afd_a.clone(); + let w_barrier = Arc::new(tokio::sync::Barrier::new(2)); + let barrier_clone = w_barrier.clone(); + + let mut write_fut = tokio::spawn(async move { + // Move waker onto this task first + assert_pending!(poll!(std::future::poll_fn(|cx| afd_a_2.as_ref().poll_write_ready(cx)))); + barrier_clone.wait().await; + + let _ = std::future::poll_fn(|cx| afd_a_2.as_ref().poll_write_ready(cx)).await; + }); + + r_barrier.wait().await; + w_barrier.wait().await; + + let readable = afd_a.readable(); + tokio::pin!(readable); + + tokio::select! { + _ = &mut readable => unreachable!(), + _ = tokio::task::yield_now() => {} + } + + // Make A readable. We expect that 'readable' and 'read_fut' will both complete quickly + afd_b.get_ref().write_all(b"0").unwrap(); + + let _ = tokio::join!(readable, read_fut); + + // Our original waker should _not_ be awoken (poll_read_ready retains only the last context) + assert!(!waker.awoken()); + + // The writable side should not be awoken + tokio::select! { + _ = &mut write_fut => unreachable!(), + _ = tokio::time::sleep(Duration::from_millis(50)) => {} + } + + // Make it writable now + drain(afd_b.get_ref(), bytes); + + // now we should be writable (ie - the waker for poll_write should still be registered after we wake the read side) + let _ = write_fut.await; +} From 050412a5ab514fc47739e5852a0a4307c6d1a046 Mon Sep 17 00:00:00 2001 From: Folkert de Vries Date: Mon, 10 Nov 2025 15:31:13 +0100 Subject: [PATCH 13/13] add shim for avx512 ternarylogic functions --- src/tools/miri/src/shims/x86/avx512.rs | 85 +++++++++++++++++++ src/tools/miri/src/shims/x86/mod.rs | 6 ++ .../pass/shims/x86/intrinsics-x86-avx512.rs | 72 ++++++++++++++++ 3 files changed, 163 insertions(+) create mode 100644 src/tools/miri/src/shims/x86/avx512.rs diff --git a/src/tools/miri/src/shims/x86/avx512.rs b/src/tools/miri/src/shims/x86/avx512.rs new file mode 100644 index 0000000000000..e15b99beba8b4 --- /dev/null +++ b/src/tools/miri/src/shims/x86/avx512.rs @@ -0,0 +1,85 @@ +use rustc_abi::CanonAbi; +use rustc_middle::ty::Ty; +use rustc_span::Symbol; +use rustc_target::callconv::FnAbi; + +use crate::*; + +impl<'tcx> EvalContextExt<'tcx> for crate::MiriInterpCx<'tcx> {} +pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { + fn emulate_x86_avx512_intrinsic( + &mut self, + link_name: Symbol, + abi: &FnAbi<'tcx, Ty<'tcx>>, + args: &[OpTy<'tcx>], + dest: &MPlaceTy<'tcx>, + ) -> InterpResult<'tcx, EmulateItemResult> { + let this = self.eval_context_mut(); + // Prefix should have already been checked. + let unprefixed_name = link_name.as_str().strip_prefix("llvm.x86.avx512.").unwrap(); + + match unprefixed_name { + // Used by the ternarylogic functions. + "pternlog.d.128" | "pternlog.d.256" | "pternlog.d.512" => { + this.expect_target_feature_for_intrinsic(link_name, "avx512f")?; + if matches!(unprefixed_name, "pternlog.d.128" | "pternlog.d.256") { + this.expect_target_feature_for_intrinsic(link_name, "avx512vl")?; + } + + let [a, b, c, imm8] = + this.check_shim_sig_lenient(abi, CanonAbi::C, link_name, args)?; + + assert_eq!(dest.layout, a.layout); + assert_eq!(dest.layout, b.layout); + assert_eq!(dest.layout, c.layout); + + // The signatures of these operations are: + // + // ``` + // fn vpternlogd(a: i32x16, b: i32x16, c: i32x16, imm8: i32) -> i32x16; + // fn vpternlogd256(a: i32x8, b: i32x8, c: i32x8, imm8: i32) -> i32x8; + // fn vpternlogd128(a: i32x4, b: i32x4, c: i32x4, imm8: i32) -> i32x4; + // ``` + // + // The element type is always a 32-bit integer, the width varies. + + let (a, _a_len) = this.project_to_simd(a)?; + let (b, _b_len) = this.project_to_simd(b)?; + let (c, _c_len) = this.project_to_simd(c)?; + let (dest, dest_len) = this.project_to_simd(dest)?; + + // Compute one lane with ternary table. + let tern = |xa: u32, xb: u32, xc: u32, imm: u32| -> u32 { + let mut out = 0u32; + // At each bit position, select bit from imm8 at index = (a << 2) | (b << 1) | c + for bit in 0..32 { + let ia = (xa >> bit) & 1; + let ib = (xb >> bit) & 1; + let ic = (xc >> bit) & 1; + let idx = (ia << 2) | (ib << 1) | ic; + let v = (imm >> idx) & 1; + out |= v << bit; + } + out + }; + + let imm8 = this.read_scalar(imm8)?.to_u32()? & 0xFF; + for i in 0..dest_len { + let a_lane = this.project_index(&a, i)?; + let b_lane = this.project_index(&b, i)?; + let c_lane = this.project_index(&c, i)?; + let d_lane = this.project_index(&dest, i)?; + + let va = this.read_scalar(&a_lane)?.to_u32()?; + let vb = this.read_scalar(&b_lane)?.to_u32()?; + let vc = this.read_scalar(&c_lane)?.to_u32()?; + + let r = tern(va, vb, vc, imm8); + this.write_scalar(Scalar::from_u32(r), &d_lane)?; + } + } + _ => return interp_ok(EmulateItemResult::NotSupported), + } + interp_ok(EmulateItemResult::NeedsReturn) + } +} diff --git a/src/tools/miri/src/shims/x86/mod.rs b/src/tools/miri/src/shims/x86/mod.rs index 63d2b2d044b42..c730609a4a57f 100644 --- a/src/tools/miri/src/shims/x86/mod.rs +++ b/src/tools/miri/src/shims/x86/mod.rs @@ -13,6 +13,7 @@ use crate::*; mod aesni; mod avx; mod avx2; +mod avx512; mod bmi; mod gfni; mod sha; @@ -152,6 +153,11 @@ pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { this, link_name, abi, args, dest, ); } + name if name.starts_with("avx512.") => { + return avx512::EvalContextExt::emulate_x86_avx512_intrinsic( + this, link_name, abi, args, dest, + ); + } _ => return interp_ok(EmulateItemResult::NotSupported), } diff --git a/src/tools/miri/tests/pass/shims/x86/intrinsics-x86-avx512.rs b/src/tools/miri/tests/pass/shims/x86/intrinsics-x86-avx512.rs index 65d7b57d1ce51..c22227a8c6599 100644 --- a/src/tools/miri/tests/pass/shims/x86/intrinsics-x86-avx512.rs +++ b/src/tools/miri/tests/pass/shims/x86/intrinsics-x86-avx512.rs @@ -17,6 +17,7 @@ fn main() { unsafe { test_avx512bitalg(); test_avx512vpopcntdq(); + test_avx512ternarylogic(); } } @@ -191,6 +192,77 @@ unsafe fn test_avx512vpopcntdq() { test_mm_popcnt_epi64(); } +#[target_feature(enable = "avx512f,avx512vl")] +unsafe fn test_avx512ternarylogic() { + #[target_feature(enable = "avx512f")] + unsafe fn test_mm512_ternarylogic_epi32() { + let a = _mm512_set4_epi32(0b100, 0b110, 0b001, 0b101); + let b = _mm512_set4_epi32(0b010, 0b011, 0b001, 0b110); + let c = _mm512_set4_epi32(0b001, 0b000, 0b001, 0b111); + + // Identity of A. + let r = _mm512_ternarylogic_epi32::<0b1111_0000>(a, b, c); + assert_eq_m512i(r, a); + + // Bitwise xor. + let r = _mm512_ternarylogic_epi32::<0b10010110>(a, b, c); + let e = _mm512_set4_epi32(0b111, 0b101, 0b001, 0b100); + assert_eq_m512i(r, e); + + // Majority (2 or more bits set). + let r = _mm512_ternarylogic_epi32::<0b1110_1000>(a, b, c); + let e = _mm512_set4_epi32(0b000, 0b010, 0b001, 0b111); + assert_eq_m512i(r, e); + } + test_mm512_ternarylogic_epi32(); + + #[target_feature(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_ternarylogic_epi32() { + let _mm256_set4_epi32 = |a, b, c, d| _mm256_setr_epi32(a, b, c, d, a, b, c, d); + + let a = _mm256_set4_epi32(0b100, 0b110, 0b001, 0b101); + let b = _mm256_set4_epi32(0b010, 0b011, 0b001, 0b110); + let c = _mm256_set4_epi32(0b001, 0b000, 0b001, 0b111); + + // Identity of A. + let r = _mm256_ternarylogic_epi32::<0b1111_0000>(a, b, c); + assert_eq_m256i(r, a); + + // Bitwise xor. + let r = _mm256_ternarylogic_epi32::<0b10010110>(a, b, c); + let e = _mm256_set4_epi32(0b111, 0b101, 0b001, 0b100); + assert_eq_m256i(r, e); + + // Majority (2 or more bits set). + let r = _mm256_ternarylogic_epi32::<0b1110_1000>(a, b, c); + let e = _mm256_set4_epi32(0b000, 0b010, 0b001, 0b111); + assert_eq_m256i(r, e); + } + test_mm256_ternarylogic_epi32(); + + #[target_feature(enable = "avx512f,avx512vl")] + unsafe fn test_mm_ternarylogic_epi32() { + let a = _mm_setr_epi32(0b100, 0b110, 0b001, 0b101); + let b = _mm_setr_epi32(0b010, 0b011, 0b001, 0b110); + let c = _mm_setr_epi32(0b001, 0b000, 0b001, 0b111); + + // Identity of A. + let r = _mm_ternarylogic_epi32::<0b1111_0000>(a, b, c); + assert_eq_m128i(r, a); + + // Bitwise xor. + let r = _mm_ternarylogic_epi32::<0b10010110>(a, b, c); + let e = _mm_setr_epi32(0b111, 0b101, 0b001, 0b100); + assert_eq_m128i(r, e); + + // Majority (2 or more bits set). + let r = _mm_ternarylogic_epi32::<0b1110_1000>(a, b, c); + let e = _mm_setr_epi32(0b000, 0b010, 0b001, 0b111); + assert_eq_m128i(r, e); + } + test_mm_ternarylogic_epi32(); +} + #[track_caller] unsafe fn assert_eq_m512i(a: __m512i, b: __m512i) { assert_eq!(transmute::<_, [i32; 16]>(a), transmute::<_, [i32; 16]>(b))