Skip to content

Commit dcd2dd9

Browse files
authored
Rollup merge of rust-lang#146503 - joboet:macos-condvar-timeout, r=ibraheemdev
std: improve handling of timed condition variable waits on macOS Fixes rust-lang#37440 (for good). This fixes two issues with `Condvar::wait_timeout` on macOS: Apple's implementation of `pthread_cond_timedwait` internally converts the absolute timeout to a relative one, measured in nanoseconds, but fails to consider overflow when doing so. This results in `wait_timeout` returning much earlier than anticipated when passed a duration that is slightly longer than `u64::MAX` nanoseconds (around 584 years). The existing clamping introduced by rust-lang#42604 to address rust-lang#37440 unfortunately used a maximum duration of 1000 years and thus still runs into the bug when run on older macOS versions (or with `PTHREAD_MUTEX_USE_ULOCK` set to a value other than "1"). See rust-lang#37440 (comment) for context. Reducing the maximum duration alone however would not be enough to make the implementation completely correct. As macOS does not support `pthread_condattr_setclock`, the deadline passed to `pthread_cond_timedwait` is measured against the wall-time clock. `std` currently calculates the deadline by retrieving the current time and adding the duration to that, only for macOS to convert the deadline back to a relative duration by [retrieving the current time itself](https://github.com/apple-oss-distributions/libpthread/blob/1ebf56b3a702df53213c2996e5e128a535d2577e/src/pthread_cond.c#L802-L819) (this conversion is performed before the aforementioned problematic one). Thus, if the wall-time clock is adjusted between the `std` lookup and the system lookup, the relative duration could have changed, possibly even to a value larger than $2^{64}\ \textrm{ns}$. Luckily however, macOS supports the non-standard, tongue-twisting `pthread_cond_timedwait_relative_np` function which avoids the wall-clock-time roundtrip by taking a relative timeout. Even apart from that, this function is perfectly suited for `std`'s purposes: it is public (albeit badly-documented) API, [available since macOS 10.4](https://github.com/apple-oss-distributions/libpthread/blob/1ebf56b3a702df53213c2996e5e128a535d2577e/include/pthread/pthread.h#L555-L559) (that's way below our minimum of 10.12) and completely resilient against wall-time changes as all timeouts are [measured against the monotonic clock](https://github.com/apple-oss-distributions/xnu/blob/e3723e1f17661b24996789d8afc084c0c3303b26/bsd/kern/sys_ulock.c#L741) inside the kernel. Thus, this PR switches `Condvar::wait_timeout` to `pthread_cond_timedwait_relative_np`, making sure to clamp the duration to a maximum of $2^{64} - 1 \ \textrm{ns}$. I've added a miri shim as well, so the only thing missing is a definition of `pthread_cond_timedwait_relative_np` inside `libc`.
2 parents 252974a + fe1238e commit dcd2dd9

File tree

8 files changed

+179
-29
lines changed

8 files changed

+179
-29
lines changed

library/Cargo.lock

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -139,9 +139,9 @@ dependencies = [
139139

140140
[[package]]
141141
name = "libc"
142-
version = "0.2.175"
142+
version = "0.2.177"
143143
source = "registry+https://github.com/rust-lang/crates.io-index"
144-
checksum = "6a82ae493e598baaea5209805c49bbf2ea7de956d50d7da0da1164f9c6d28543"
144+
checksum = "2874a2af47a2325c2001a6e6fad9b16a53b802102b528163885171cf92b15976"
145145
dependencies = [
146146
"rustc-std-workspace-core",
147147
]

library/std/Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ miniz_oxide = { version = "0.8.0", optional = true, default-features = false }
3333
addr2line = { version = "0.25.0", optional = true, default-features = false }
3434

3535
[target.'cfg(not(all(windows, target_env = "msvc")))'.dependencies]
36-
libc = { version = "0.2.172", default-features = false, features = [
36+
libc = { version = "0.2.177", default-features = false, features = [
3737
'rustc-dep-of-std',
3838
], public = true }
3939

library/std/src/sys/pal/unix/sync/condvar.rs

Lines changed: 80 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,6 @@
11
use super::Mutex;
22
use crate::cell::UnsafeCell;
33
use crate::pin::Pin;
4-
#[cfg(not(target_os = "nto"))]
5-
use crate::sys::pal::time::TIMESPEC_MAX;
6-
#[cfg(target_os = "nto")]
7-
use crate::sys::pal::time::TIMESPEC_MAX_CAPPED;
8-
use crate::sys::pal::time::Timespec;
94
use crate::time::Duration;
105

116
pub struct Condvar {
@@ -47,27 +42,29 @@ impl Condvar {
4742
let r = unsafe { libc::pthread_cond_wait(self.raw(), mutex.raw()) };
4843
debug_assert_eq!(r, 0);
4944
}
45+
}
5046

47+
#[cfg(not(target_vendor = "apple"))]
48+
impl Condvar {
5149
/// # Safety
5250
/// * `init` must have been called on this instance.
5351
/// * `mutex` must be locked by the current thread.
5452
/// * This condition variable may only be used with the same mutex.
5553
pub unsafe fn wait_timeout(&self, mutex: Pin<&Mutex>, dur: Duration) -> bool {
54+
#[cfg(not(target_os = "nto"))]
55+
use crate::sys::pal::time::TIMESPEC_MAX;
56+
#[cfg(target_os = "nto")]
57+
use crate::sys::pal::time::TIMESPEC_MAX_CAPPED;
58+
use crate::sys::pal::time::Timespec;
59+
5660
let mutex = mutex.raw();
5761

58-
// OSX implementation of `pthread_cond_timedwait` is buggy
59-
// with super long durations. When duration is greater than
60-
// 0x100_0000_0000_0000 seconds, `pthread_cond_timedwait`
61-
// in macOS Sierra returns error 316.
62-
//
63-
// This program demonstrates the issue:
64-
// https://gist.github.com/stepancheg/198db4623a20aad2ad7cddb8fda4a63c
65-
//
66-
// To work around this issue, the timeout is clamped to 1000 years.
67-
//
68-
// Cygwin implementation is based on NT API and a super large timeout
69-
// makes the syscall block forever.
70-
#[cfg(any(target_vendor = "apple", target_os = "cygwin"))]
62+
// Cygwin's implementation is based on the NT API, which measures time
63+
// in units of 100 ns. Unfortunately, Cygwin does not properly guard
64+
// against overflow when converting the time, hence we clamp the interval
65+
// to 1000 years, which will only become a problem in around 27000 years,
66+
// when the next rollover is less than 1000 years away...
67+
#[cfg(target_os = "cygwin")]
7168
let dur = Duration::min(dur, Duration::from_secs(1000 * 365 * 86400));
7269

7370
let timeout = Timespec::now(Self::CLOCK).checked_add_duration(&dur);
@@ -84,6 +81,57 @@ impl Condvar {
8481
}
8582
}
8683

84+
// Apple platforms (since macOS version 10.4 and iOS version 2.0) have
85+
// `pthread_cond_timedwait_relative_np`, a non-standard extension that
86+
// measures timeouts based on the monotonic clock and is thus resilient
87+
// against wall-clock changes.
88+
#[cfg(target_vendor = "apple")]
89+
impl Condvar {
90+
/// # Safety
91+
/// * `init` must have been called on this instance.
92+
/// * `mutex` must be locked by the current thread.
93+
/// * This condition variable may only be used with the same mutex.
94+
pub unsafe fn wait_timeout(&self, mutex: Pin<&Mutex>, dur: Duration) -> bool {
95+
let mutex = mutex.raw();
96+
97+
// The macOS implementation of `pthread_cond_timedwait` internally
98+
// converts the timeout passed to `pthread_cond_timedwait_relative_np`
99+
// to nanoseconds. Unfortunately, the "psynch" variant of condvars does
100+
// not guard against overflow during the conversion[^1], which means
101+
// that `pthread_cond_timedwait_relative_np` will return `ETIMEDOUT`
102+
// much earlier than expected if the relative timeout is longer than
103+
// `u64::MAX` nanoseconds.
104+
//
105+
// This can be observed even on newer platforms (by setting the environment
106+
// variable PTHREAD_MUTEX_USE_ULOCK to a value other than "1") by calling e.g.
107+
// ```
108+
// condvar.wait_timeout(..., Duration::from_secs(u64::MAX.div_ceil(1_000_000_000));
109+
// ```
110+
// (see #37440, especially
111+
// https://github.com/rust-lang/rust/issues/37440#issuecomment-3285958326).
112+
//
113+
// To work around this issue, always clamp the timeout to u64::MAX nanoseconds,
114+
// even if the "ulock" variant is used (which does guard against overflow).
115+
//
116+
// [^1]: https://github.com/apple-oss-distributions/libpthread/blob/1ebf56b3a702df53213c2996e5e128a535d2577e/kern/kern_synch.c#L1269
117+
const MAX_DURATION: Duration = Duration::from_nanos(u64::MAX);
118+
119+
let (dur, clamped) = if dur <= MAX_DURATION { (dur, false) } else { (MAX_DURATION, true) };
120+
121+
let timeout = libc::timespec {
122+
// This cannot overflow because of the clamping above.
123+
tv_sec: dur.as_secs() as i64,
124+
tv_nsec: dur.subsec_nanos() as i64,
125+
};
126+
127+
let r = unsafe { libc::pthread_cond_timedwait_relative_np(self.raw(), mutex, &timeout) };
128+
assert!(r == libc::ETIMEDOUT || r == 0);
129+
// Report clamping as a spurious wakeup. Who knows, maybe some
130+
// interstellar space probe will rely on this ;-).
131+
r == 0 || clamped
132+
}
133+
}
134+
87135
#[cfg(not(any(
88136
target_os = "android",
89137
target_vendor = "apple",
@@ -125,10 +173,23 @@ impl Condvar {
125173
}
126174
}
127175

176+
#[cfg(target_vendor = "apple")]
177+
impl Condvar {
178+
// `pthread_cond_timedwait_relative_np` measures the timeout
179+
// based on the monotonic clock.
180+
pub const PRECISE_TIMEOUT: bool = true;
181+
182+
/// # Safety
183+
/// May only be called once per instance of `Self`.
184+
pub unsafe fn init(self: Pin<&mut Self>) {
185+
// `PTHREAD_COND_INITIALIZER` is fully supported and we don't need to
186+
// change clocks, so there's nothing to do here.
187+
}
188+
}
189+
128190
// `pthread_condattr_setclock` is unfortunately not supported on these platforms.
129191
#[cfg(any(
130192
target_os = "android",
131-
target_vendor = "apple",
132193
target_os = "espidf",
133194
target_os = "horizon",
134195
target_os = "l4re",

library/std/tests/sync/condvar.rs

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -267,3 +267,35 @@ nonpoison_and_poison_unwrap_test!(
267267
}
268268
}
269269
);
270+
271+
// Some platforms internally cast the timeout duration into nanoseconds.
272+
// If they fail to consider overflow during the conversion (I'm looking
273+
// at you, macOS), `wait_timeout` will return immediately and indicate a
274+
// timeout for durations that are slightly longer than u64::MAX nanoseconds.
275+
// `std` should guard against this by clamping the timeout.
276+
// See #37440 for context.
277+
nonpoison_and_poison_unwrap_test!(
278+
name: timeout_nanoseconds,
279+
test_body: {
280+
use locks::Mutex;
281+
use locks::Condvar;
282+
283+
let sent = Mutex::new(false);
284+
let cond = Condvar::new();
285+
286+
thread::scope(|s| {
287+
s.spawn(|| {
288+
thread::sleep(Duration::from_secs(2));
289+
maybe_unwrap(sent.set(true));
290+
cond.notify_all();
291+
});
292+
293+
let guard = maybe_unwrap(sent.lock());
294+
// If there is internal overflow, this call will return almost
295+
// immediately, before the other thread has reached the `notify_all`
296+
let (guard, res) = maybe_unwrap(cond.wait_timeout(guard, Duration::from_secs(u64::MAX.div_ceil(1_000_000_000))));
297+
assert!(!res.timed_out());
298+
assert!(*guard);
299+
})
300+
}
301+
);

src/tools/miri/src/shims/unix/foreign_items.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -815,7 +815,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
815815
"pthread_cond_timedwait" => {
816816
let [cond, mutex, abstime] =
817817
this.check_shim_sig_lenient(abi, CanonAbi::C, link_name, args)?;
818-
this.pthread_cond_timedwait(cond, mutex, abstime, dest)?;
818+
this.pthread_cond_timedwait(cond, mutex, abstime, dest, /* macos_relative_np */ false)?;
819819
}
820820
"pthread_cond_destroy" => {
821821
let [cond] = this.check_shim_sig_lenient(abi, CanonAbi::C, link_name, args)?;

src/tools/miri/src/shims/unix/macos/foreign_items.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -307,6 +307,12 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
307307
this.os_unfair_lock_assert_not_owner(lock_op)?;
308308
}
309309

310+
"pthread_cond_timedwait_relative_np" => {
311+
let [cond, mutex, reltime] =
312+
this.check_shim_sig_lenient(abi, CanonAbi::C, link_name, args)?;
313+
this.pthread_cond_timedwait(cond, mutex, reltime, dest, /* macos_relative_np */ true)?;
314+
}
315+
310316
_ => return interp_ok(EmulateItemResult::NotSupported),
311317
};
312318

src/tools/miri/src/shims/unix/sync.rs

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -834,8 +834,9 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
834834
&mut self,
835835
cond_op: &OpTy<'tcx>,
836836
mutex_op: &OpTy<'tcx>,
837-
abstime_op: &OpTy<'tcx>,
837+
timeout_op: &OpTy<'tcx>,
838838
dest: &MPlaceTy<'tcx>,
839+
macos_relative_np: bool,
839840
) -> InterpResult<'tcx> {
840841
let this = self.eval_context_mut();
841842

@@ -844,7 +845,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
844845

845846
// Extract the timeout.
846847
let duration = match this
847-
.read_timespec(&this.deref_pointer_as(abstime_op, this.libc_ty_layout("timespec"))?)?
848+
.read_timespec(&this.deref_pointer_as(timeout_op, this.libc_ty_layout("timespec"))?)?
848849
{
849850
Some(duration) => duration,
850851
None => {
@@ -853,14 +854,23 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
853854
return interp_ok(());
854855
}
855856
};
856-
if data.clock == TimeoutClock::RealTime {
857-
this.check_no_isolation("`pthread_cond_timedwait` with `CLOCK_REALTIME`")?;
858-
}
857+
858+
let (clock, anchor) = if macos_relative_np {
859+
// `pthread_cond_timedwait_relative_np` always measures time against the
860+
// monotonic clock, regardless of the condvar clock.
861+
(TimeoutClock::Monotonic, TimeoutAnchor::Relative)
862+
} else {
863+
if data.clock == TimeoutClock::RealTime {
864+
this.check_no_isolation("`pthread_cond_timedwait` with `CLOCK_REALTIME`")?;
865+
}
866+
867+
(data.clock, TimeoutAnchor::Absolute)
868+
};
859869

860870
this.condvar_wait(
861871
data.condvar_ref,
862872
mutex_ref,
863-
Some((data.clock, TimeoutAnchor::Absolute, duration)),
873+
Some((clock, anchor, duration)),
864874
Scalar::from_i32(0),
865875
this.eval_libc("ETIMEDOUT"), // retval_timeout
866876
dest.clone(),
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
//@only-target: apple # `pthread_cond_timedwait_relative_np` is a non-standard extension
2+
3+
use std::time::Instant;
4+
5+
// FIXME: remove once this is in libc.
6+
mod libc {
7+
pub use ::libc::*;
8+
unsafe extern "C" {
9+
pub unsafe fn pthread_cond_timedwait_relative_np(
10+
cond: *mut libc::pthread_cond_t,
11+
lock: *mut libc::pthread_mutex_t,
12+
timeout: *const libc::timespec,
13+
) -> libc::c_int;
14+
}
15+
}
16+
17+
fn main() {
18+
unsafe {
19+
let mut mutex: libc::pthread_mutex_t = libc::PTHREAD_MUTEX_INITIALIZER;
20+
let mut cond: libc::pthread_cond_t = libc::PTHREAD_COND_INITIALIZER;
21+
22+
// Wait for 100 ms.
23+
let timeout = libc::timespec { tv_sec: 0, tv_nsec: 100_000_000 };
24+
25+
assert_eq!(libc::pthread_mutex_lock(&mut mutex as *mut _), 0);
26+
27+
let current_time = Instant::now();
28+
assert_eq!(
29+
libc::pthread_cond_timedwait_relative_np(&mut cond, &mut mutex, &timeout),
30+
libc::ETIMEDOUT
31+
);
32+
let elapsed_time = current_time.elapsed().as_millis();
33+
// This is actually deterministic (since isolation remains enabled),
34+
// but can change slightly with Rust updates.
35+
assert!(90 <= elapsed_time && elapsed_time <= 110);
36+
37+
assert_eq!(libc::pthread_mutex_unlock(&mut mutex), 0);
38+
assert_eq!(libc::pthread_mutex_destroy(&mut mutex), 0);
39+
assert_eq!(libc::pthread_cond_destroy(&mut cond), 0);
40+
}
41+
}

0 commit comments

Comments
 (0)