Skip to content

Commit 15299b7

Browse files
authored
Merge pull request #4676 from RalfJung/epoll-edge
epoll: do proper edge detection inside the epoll system
2 parents 656fe2f + b340f08 commit 15299b7

File tree

10 files changed

+498
-108
lines changed

10 files changed

+498
-108
lines changed

src/tools/miri/src/shims/files.rs

Lines changed: 3 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -48,12 +48,6 @@ impl<T: ?Sized> FileDescriptionRef<T> {
4848
pub fn id(&self) -> FdId {
4949
self.0.id
5050
}
51-
52-
/// Returns the raw address of this file description. Useful for equality comparisons.
53-
/// Use `id` instead if this can affect user-visible behavior!
54-
pub fn addr(&self) -> usize {
55-
Rc::as_ptr(&self.0).addr()
56-
}
5751
}
5852

5953
/// Holds a weak reference to the actual file description.
@@ -76,11 +70,6 @@ impl<T: ?Sized> WeakFileDescriptionRef<T> {
7670
pub fn upgrade(&self) -> Option<FileDescriptionRef<T>> {
7771
self.0.upgrade().map(FileDescriptionRef)
7872
}
79-
80-
/// Returns the raw address of this file description. Useful for equality comparisons.
81-
pub fn addr(&self) -> usize {
82-
self.0.as_ptr().addr()
83-
}
8473
}
8574

8675
impl<T> VisitProvenance for WeakFileDescriptionRef<T> {
@@ -116,13 +105,12 @@ impl<T: FileDescription + 'static> FileDescriptionExt for T {
116105
communicate_allowed: bool,
117106
ecx: &mut MiriInterpCx<'tcx>,
118107
) -> InterpResult<'tcx, io::Result<()>> {
119-
let addr = self.addr();
120108
match Rc::into_inner(self.0) {
121109
Some(fd) => {
122110
// There might have been epolls interested in this FD. Remove that.
123111
ecx.machine.epoll_interests.remove_epolls(fd.id);
124112

125-
fd.inner.destroy(addr, communicate_allowed, ecx)
113+
fd.inner.destroy(fd.id, communicate_allowed, ecx)
126114
}
127115
None => {
128116
// Not the last reference.
@@ -200,7 +188,7 @@ pub trait FileDescription: std::fmt::Debug + FileDescriptionExt {
200188
/// `self_addr` is the address that this file description used to be stored at.
201189
fn destroy<'tcx>(
202190
self,
203-
_self_addr: usize,
191+
_self_id: FdId,
204192
_communicate_allowed: bool,
205193
_ecx: &mut MiriInterpCx<'tcx>,
206194
) -> InterpResult<'tcx, io::Result<()>>
@@ -379,7 +367,7 @@ impl FileDescription for FileHandle {
379367

380368
fn destroy<'tcx>(
381369
self,
382-
_self_addr: usize,
370+
_self_id: FdId,
383371
communicate_allowed: bool,
384372
_ecx: &mut MiriInterpCx<'tcx>,
385373
) -> InterpResult<'tcx, io::Result<()>> {

src/tools/miri/src/shims/unix/linux_like/epoll.rs

Lines changed: 72 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
use std::cell::RefCell;
2-
use std::collections::BTreeMap;
2+
use std::collections::{BTreeMap, btree_map};
33
use std::io;
44
use std::time::Duration;
55

@@ -44,7 +44,7 @@ fn range_for_id(id: FdId) -> std::ops::RangeInclusive<EpollEventKey> {
4444
}
4545

4646
/// EpollEventInstance contains information that will be returned by epoll_wait.
47-
#[derive(Debug)]
47+
#[derive(Debug, Default)]
4848
pub struct EpollEventInstance {
4949
/// Bitmask of event types that happened to the file description.
5050
events: u32,
@@ -54,12 +54,6 @@ pub struct EpollEventInstance {
5454
clock: VClock,
5555
}
5656

57-
impl EpollEventInstance {
58-
pub fn new(events: u32, data: u64) -> EpollEventInstance {
59-
EpollEventInstance { events, data, clock: Default::default() }
60-
}
61-
}
62-
6357
/// EpollEventInterest registers the file description information to an epoll
6458
/// instance during a successful `epoll_ctl` call. It also stores additional
6559
/// information needed to check and update readiness state for `epoll_wait`.
@@ -69,10 +63,12 @@ impl EpollEventInstance {
6963
/// see the man page:
7064
///
7165
/// <https://man7.org/linux/man-pages/man2/epoll_ctl.2.html>
72-
#[derive(Debug, Copy, Clone)]
66+
#[derive(Debug)]
7367
pub struct EpollEventInterest {
7468
/// The events bitmask retrieved from `epoll_event`.
7569
events: u32,
70+
/// The way the events looked last time we checked (for edge trigger / ET detection).
71+
prev_events: u32,
7672
/// The data retrieved from `epoll_event`.
7773
/// libc's data field in epoll_event can store integer or pointer,
7874
/// but only u64 is supported for now.
@@ -146,15 +142,15 @@ impl FileDescription for Epoll {
146142

147143
fn destroy<'tcx>(
148144
mut self,
149-
self_addr: usize,
145+
self_id: FdId,
150146
_communicate_allowed: bool,
151147
ecx: &mut MiriInterpCx<'tcx>,
152148
) -> InterpResult<'tcx, io::Result<()>> {
153149
// If we were interested in some FDs, we can remove that now.
154150
let mut ids = self.interest_list.get_mut().keys().map(|(id, _num)| *id).collect::<Vec<_>>();
155151
ids.dedup(); // they come out of the map sorted
156152
for id in ids {
157-
ecx.machine.epoll_interests.remove(id, self_addr);
153+
ecx.machine.epoll_interests.remove(id, self_id);
158154
}
159155
interp_ok(Ok(()))
160156
}
@@ -168,36 +164,38 @@ impl UnixFileDescription for Epoll {}
168164

169165
/// The table of all EpollEventInterest.
170166
/// This tracks, for each file description, which epoll instances have an interest in events
171-
/// for this file description.
172-
pub struct EpollInterestTable(BTreeMap<FdId, Vec<WeakFileDescriptionRef<Epoll>>>);
167+
/// for this file description. The `FdId` is the ID of the epoll instance, so that we can recognize
168+
/// it later when it is slated for removal. The vector is sorted by that ID.
169+
pub struct EpollInterestTable(BTreeMap<FdId, Vec<(FdId, WeakFileDescriptionRef<Epoll>)>>);
173170

174171
impl EpollInterestTable {
175172
pub(crate) fn new() -> Self {
176173
EpollInterestTable(BTreeMap::new())
177174
}
178175

179-
fn insert(&mut self, id: FdId, epoll: WeakFileDescriptionRef<Epoll>) {
176+
fn insert(&mut self, id: FdId, epoll: &FileDescriptionRef<Epoll>) {
180177
let epolls = self.0.entry(id).or_default();
181-
epolls.push(epoll);
178+
let idx = epolls
179+
.binary_search_by_key(&epoll.id(), |&(id, _)| id)
180+
.expect_err("trying to add an epoll that's already in the list");
181+
epolls.insert(idx, (epoll.id(), FileDescriptionRef::downgrade(epoll)));
182182
}
183183

184-
fn remove(&mut self, id: FdId, epoll_addr: usize) {
184+
fn remove(&mut self, id: FdId, epoll_id: FdId) {
185185
let epolls = self.0.entry(id).or_default();
186-
// FIXME: linear scan. Keep the list sorted so we can do binary search?
187186
let idx = epolls
188-
.iter()
189-
.position(|old_ref| old_ref.addr() == epoll_addr)
187+
.binary_search_by_key(&epoll_id, |&(id, _)| id)
190188
.expect("trying to remove an epoll that's not in the list");
191189
epolls.remove(idx);
192190
}
193191

194-
fn get_epolls(&self, id: FdId) -> Option<&Vec<WeakFileDescriptionRef<Epoll>>> {
195-
self.0.get(&id)
192+
fn get_epolls(&self, id: FdId) -> Option<impl Iterator<Item = &WeakFileDescriptionRef<Epoll>>> {
193+
self.0.get(&id).map(|epolls| epolls.iter().map(|(_id, epoll)| epoll))
196194
}
197195

198196
pub fn remove_epolls(&mut self, id: FdId) {
199197
if let Some(epolls) = self.0.remove(&id) {
200-
for epoll in epolls.iter().filter_map(|e| e.upgrade()) {
198+
for epoll in epolls.iter().filter_map(|(_id, epoll)| epoll.upgrade()) {
201199
// This is a still-live epoll with interest in this FD. Remove all
202200
// relevent interests.
203201
epoll
@@ -343,33 +341,40 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
343341
);
344342
}
345343

346-
// Add new interest to list.
344+
// Add new interest to list. Experiments show that we need to reset all state
345+
// on `EPOLL_CTL_MOD`, including the edge tracking.
347346
let epoll_key = (id, fd);
348-
let new_interest = EpollEventInterest { events, data };
349-
if op == epoll_ctl_add {
347+
let new_interest = EpollEventInterest { events, data, prev_events: 0 };
348+
let new_interest = if op == epoll_ctl_add {
350349
if interest_list.range(range_for_id(id)).next().is_none() {
351350
// This is the first time this FD got added to this epoll.
352351
// Remember that in the global list so we get notified about FD events.
353-
this.machine.epoll_interests.insert(id, FileDescriptionRef::downgrade(&epfd));
352+
this.machine.epoll_interests.insert(id, &epfd);
354353
}
355-
if interest_list.insert(epoll_key, new_interest).is_some() {
356-
// We already had interest in this.
357-
return this.set_last_error_and_return_i32(LibcError("EEXIST"));
354+
match interest_list.entry(epoll_key) {
355+
btree_map::Entry::Occupied(_) => {
356+
// We already had interest in this.
357+
return this.set_last_error_and_return_i32(LibcError("EEXIST"));
358+
}
359+
btree_map::Entry::Vacant(e) => e.insert(new_interest),
358360
}
359361
} else {
360362
// Modify the existing interest.
361363
let Some(interest) = interest_list.get_mut(&epoll_key) else {
362364
return this.set_last_error_and_return_i32(LibcError("ENOENT"));
363365
};
364366
*interest = new_interest;
367+
interest
365368
};
366369

367370
// Deliver events for the new interest.
371+
let force_edge = true; // makes no difference since we reset `prev_events`
368372
send_ready_events_to_interests(
369373
this,
370374
&epfd,
371375
fd_ref.as_unix(this).get_epoll_ready_events()?.get_event_bitmask(this),
372-
std::iter::once((&epoll_key, &new_interest)),
376+
force_edge,
377+
std::iter::once((&epoll_key, new_interest)),
373378
)?;
374379

375380
interp_ok(Scalar::from_i32(0))
@@ -384,10 +389,10 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
384389
// If this was the last interest in this FD, remove us from the global list
385390
// of who is interested in this FD.
386391
if interest_list.range(range_for_id(id)).next().is_none() {
387-
this.machine.epoll_interests.remove(id, epfd.addr());
392+
this.machine.epoll_interests.remove(id, epfd.id());
388393
}
389394

390-
// Remove related epoll_interest from ready list.
395+
// Remove related event instance from ready list.
391396
epfd.ready_list.borrow_mut().remove(&epoll_key);
392397

393398
interp_ok(Scalar::from_i32(0))
@@ -519,13 +524,16 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
519524
}
520525

521526
/// For a specific file description, get its ready events and send it to everyone who registered
522-
/// interest in this FD. This function should be called whenever an event causes more bytes or
523-
/// an EOF to become newly readable from an FD, and whenever more bytes can be written to an FD
524-
/// or no more future writes are possible.
527+
/// interest in this FD. This function should be called whenever the result of
528+
/// `get_epoll_ready_events` would change.
525529
///
526-
/// This *will* report an event if anyone is subscribed to it, without any further filtering, so
527-
/// do not call this function when an FD didn't have anything happen to it!
528-
fn epoll_send_fd_ready_events(&mut self, fd_ref: DynFileDescriptionRef) -> InterpResult<'tcx> {
530+
/// If `force_edge` is set, edge-triggered interests will be triggered even if the set of
531+
/// ready events did not change. This can lead to spurious wakeups. Use with caution!
532+
fn epoll_send_fd_ready_events(
533+
&mut self,
534+
fd_ref: DynFileDescriptionRef,
535+
force_edge: bool,
536+
) -> InterpResult<'tcx> {
529537
let this = self.eval_context_mut();
530538
let id = fd_ref.id();
531539
// Figure out who is interested in this. We need to clone this list since we can't prove
@@ -534,7 +542,6 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
534542
return interp_ok(());
535543
};
536544
let epolls = epolls
537-
.iter()
538545
.map(|weak| {
539546
weak.upgrade()
540547
.expect("someone forgot to remove the garbage from `machine.epoll_interests`")
@@ -546,7 +553,8 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
546553
this,
547554
&epoll,
548555
event_bitmask,
549-
epoll.interest_list.borrow().range(range_for_id(id)),
556+
force_edge,
557+
epoll.interest_list.borrow_mut().range_mut(range_for_id(id)),
550558
)?;
551559
}
552560

@@ -560,25 +568,41 @@ fn send_ready_events_to_interests<'tcx, 'a>(
560568
ecx: &mut MiriInterpCx<'tcx>,
561569
epoll: &Epoll,
562570
event_bitmask: u32,
563-
interests: impl Iterator<Item = (&'a EpollEventKey, &'a EpollEventInterest)>,
571+
force_edge: bool,
572+
interests: impl Iterator<Item = (&'a EpollEventKey, &'a mut EpollEventInterest)>,
564573
) -> InterpResult<'tcx> {
565574
let mut wakeup = false;
566575
for (&event_key, interest) in interests {
576+
let mut ready_list = epoll.ready_list.borrow_mut();
567577
// This checks if any of the events specified in epoll_event_interest.events
568578
// match those in ready_events.
569579
let flags = interest.events & event_bitmask;
580+
let prev = std::mem::replace(&mut interest.prev_events, flags);
570581
if flags == 0 {
582+
// Make sure we *remove* any previous item from the ready list, since this
583+
// is not ready any more.
584+
ready_list.remove(&event_key);
571585
continue;
572586
}
573-
// Geenrate a new event instance, with the flags that this one is interested in.
574-
let mut new_instance = EpollEventInstance::new(flags, interest.data);
587+
// Generate new instance, or update existing one. It is crucial that whe we are done,
588+
// if an interest exists in the ready list, then it matches the latest events and data!
589+
let instance = match ready_list.entry(event_key) {
590+
btree_map::Entry::Occupied(e) => e.into_mut(),
591+
btree_map::Entry::Vacant(e) => {
592+
if !force_edge && flags == prev & flags {
593+
// Every bit in `flags` was already set in `prev`, and there's currently
594+
// no entry in the ready list for this. So there is nothing new and no
595+
// prior entry to update; just skip it.
596+
continue;
597+
}
598+
e.insert(EpollEventInstance::default())
599+
}
600+
};
601+
instance.events = flags;
602+
instance.data = interest.data;
575603
ecx.release_clock(|clock| {
576-
new_instance.clock.clone_from(clock);
604+
instance.clock.join(clock);
577605
})?;
578-
// Add event to ready list for this epoll instance.
579-
// Tests confirm that we have to *overwrite* the old instance for the same key.
580-
let mut ready_list = epoll.ready_list.borrow_mut();
581-
ready_list.insert(event_key, new_instance);
582606
wakeup = true;
583607
}
584608
if wakeup {

src/tools/miri/src/shims/unix/linux_like/eventfd.rs

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ use std::io;
44
use std::io::ErrorKind;
55

66
use crate::concurrency::VClock;
7-
use crate::shims::files::{FileDescription, FileDescriptionRef, WeakFileDescriptionRef};
7+
use crate::shims::files::{FdId, FileDescription, FileDescriptionRef, WeakFileDescriptionRef};
88
use crate::shims::unix::UnixFileDescription;
99
use crate::shims::unix::linux_like::epoll::{EpollReadyEvents, EvalContextExt as _};
1010
use crate::*;
@@ -39,7 +39,7 @@ impl FileDescription for EventFd {
3939

4040
fn destroy<'tcx>(
4141
self,
42-
_self_addr: usize,
42+
_self_id: FdId,
4343
_communicate_allowed: bool,
4444
_ecx: &mut MiriInterpCx<'tcx>,
4545
) -> InterpResult<'tcx, io::Result<()>> {
@@ -217,7 +217,10 @@ fn eventfd_write<'tcx>(
217217

218218
// The state changed; we check and update the status of all supported event
219219
// types for current file description.
220-
ecx.epoll_send_fd_ready_events(eventfd)?;
220+
// Linux seems to cause spurious wakeups here, and Tokio seems to rely on that
221+
// (see <https://github.com/rust-lang/miri/pull/4676#discussion_r2510528994>
222+
// and also <https://www.illumos.org/issues/16700>).
223+
ecx.epoll_send_fd_ready_events(eventfd, /* force_edge */ true)?;
221224

222225
// Return how many bytes we consumed from the user-provided buffer.
223226
return finish.call(ecx, Ok(buf_place.layout.size.bytes_usize()));
@@ -312,7 +315,8 @@ fn eventfd_read<'tcx>(
312315

313316
// The state changed; we check and update the status of all supported event
314317
// types for current file description.
315-
ecx.epoll_send_fd_ready_events(eventfd)?;
318+
// Linux seems to always emit do notifications here, even if we were already writable.
319+
ecx.epoll_send_fd_ready_events(eventfd, /* force_edge */ true)?;
316320

317321
// Tell userspace how many bytes we put into the buffer.
318322
return finish.call(ecx, Ok(buf_place.layout.size.bytes_usize()));

0 commit comments

Comments
 (0)