diff --git a/Cargo.lock b/Cargo.lock index 43f5f40925b66..909c87047c924 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3167,8 +3167,7 @@ dependencies = [ [[package]] name = "rustc-rayon-core" version = "0.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2f42932dcd3bcbe484b38a3ccf79b7906fac41c02d408b5b1bac26da3416efdb" +source = "git+https://github.com/zetanumbers/rayon?branch=rustc-remove-deadlock-detection#3b8d9c138ab70138c2016d19fbb2801a372614f6" dependencies = [ "crossbeam-deque", "crossbeam-utils", @@ -4330,7 +4329,6 @@ version = "0.0.0" dependencies = [ "hashbrown", "parking_lot", - "rustc-rayon-core", "rustc_abi", "rustc_ast", "rustc_attr_data_structures", @@ -6584,3 +6582,8 @@ dependencies = [ "quote", "syn 2.0.101", ] + +[[patch.unused]] +name = "rustc-rayon" +version = "0.5.1" +source = "git+https://github.com/zetanumbers/rayon?branch=rustc-remove-deadlock-detection#3b8d9c138ab70138c2016d19fbb2801a372614f6" diff --git a/Cargo.toml b/Cargo.toml index c4d2a06f4cb17..36931bff8e1bb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -60,6 +60,10 @@ exclude = [ "obj", ] +[patch.crates-io] +rustc-rayon = { git = "https://github.com/zetanumbers/rayon", branch = "rustc-remove-deadlock-detection" } +rustc-rayon-core = { git = "https://github.com/zetanumbers/rayon", branch = "rustc-remove-deadlock-detection" } + [profile.release.package.rustc-rayon-core] # The rustc fork of Rayon has deadlock detection code which intermittently # causes overflows in the CI (see https://github.com/rust-lang/rust/issues/90227) diff --git a/compiler/rustc_interface/src/util.rs b/compiler/rustc_interface/src/util.rs index 087b11fdf9d9c..2bb5124422c2c 100644 --- a/compiler/rustc_interface/src/util.rs +++ b/compiler/rustc_interface/src/util.rs @@ -19,7 +19,7 @@ use rustc_session::{EarlyDiagCtxt, Session, filesearch}; use rustc_span::edit_distance::find_best_match_for_name; use rustc_span::edition::Edition; use rustc_span::source_map::SourceMapInputs; -use rustc_span::{SessionGlobals, Symbol, sym}; +use rustc_span::{Symbol, sym}; use rustc_target::spec::Target; use tracing::info; @@ -175,13 +175,7 @@ pub(crate) fn run_in_thread_pool_with_globals< sm_inputs: SourceMapInputs, f: F, ) -> R { - use std::process; - - use rustc_data_structures::defer; use rustc_data_structures::sync::FromDyn; - use rustc_middle::ty::tls; - use rustc_query_impl::QueryCtxt; - use rustc_query_system::query::{QueryContext, break_query_cycles}; let thread_stack_size = init_stack_size(thread_builder_diag); @@ -203,7 +197,6 @@ pub(crate) fn run_in_thread_pool_with_globals< } let current_gcx = FromDyn::from(CurrentGcx::new()); - let current_gcx2 = current_gcx.clone(); let proxy = Proxy::new(); @@ -214,46 +207,6 @@ pub(crate) fn run_in_thread_pool_with_globals< .acquire_thread_handler(move || proxy_.acquire_thread()) .release_thread_handler(move || proxy__.release_thread()) .num_threads(threads) - .deadlock_handler(move || { - // On deadlock, creates a new thread and forwards information in thread - // locals to it. The new thread runs the deadlock handler. - - let current_gcx2 = current_gcx2.clone(); - let registry = rayon_core::Registry::current(); - let session_globals = rustc_span::with_session_globals(|session_globals| { - session_globals as *const SessionGlobals as usize - }); - thread::Builder::new() - .name("rustc query cycle handler".to_string()) - .spawn(move || { - let on_panic = defer(|| { - eprintln!("internal compiler error: query cycle handler thread panicked, aborting process"); - // We need to abort here as we failed to resolve the deadlock, - // otherwise the compiler could just hang, - process::abort(); - }); - - // Get a `GlobalCtxt` reference from `CurrentGcx` as we cannot rely on having a - // `TyCtxt` TLS reference here. - current_gcx2.access(|gcx| { - tls::enter_context(&tls::ImplicitCtxt::new(gcx), || { - tls::with(|tcx| { - // Accessing session globals is sound as they outlive `GlobalCtxt`. - // They are needed to hash query keys containing spans or symbols. - let query_map = rustc_span::set_session_globals_then(unsafe { &*(session_globals as *const SessionGlobals) }, || { - // Ensure there was no errors collecting all active jobs. - // We need the complete map to ensure we find a cycle to break. - QueryCtxt::new(tcx).collect_active_jobs().ok().expect("failed to collect active queries in deadlock handler") - }); - break_query_cycles(query_map, ®istry); - }) - }) - }); - - on_panic.disable(); - }) - .unwrap(); - }) .stack_size(thread_stack_size); // We create the session globals on the main thread, then create the thread diff --git a/compiler/rustc_query_system/Cargo.toml b/compiler/rustc_query_system/Cargo.toml index 7db06953aeb64..2a2afad09c4b8 100644 --- a/compiler/rustc_query_system/Cargo.toml +++ b/compiler/rustc_query_system/Cargo.toml @@ -6,7 +6,6 @@ edition = "2024" [dependencies] # tidy-alphabetical-start parking_lot = "0.12" -rustc-rayon-core = { version = "0.5.0" } rustc_abi = { path = "../rustc_abi" } rustc_ast = { path = "../rustc_ast" } rustc_attr_data_structures = { path = "../rustc_attr_data_structures" } diff --git a/compiler/rustc_query_system/src/query/job.rs b/compiler/rustc_query_system/src/query/job.rs index 1e79bd461d2a4..429d500951f17 100644 --- a/compiler/rustc_query_system/src/query/job.rs +++ b/compiler/rustc_query_system/src/query/job.rs @@ -1,16 +1,15 @@ use std::fmt::Debug; use std::hash::Hash; use std::io::Write; -use std::iter; use std::num::NonZero; use std::sync::Arc; use parking_lot::{Condvar, Mutex}; -use rustc_data_structures::fx::{FxHashMap, FxHashSet}; +use rustc_data_structures::fx::FxHashMap; use rustc_errors::{Diag, DiagCtxtHandle}; use rustc_hir::def::DefKind; use rustc_session::Session; -use rustc_span::{DUMMY_SP, Span}; +use rustc_span::Span; use super::QueryStackFrameExtra; use crate::dep_graph::DepContext; @@ -45,18 +44,6 @@ impl QueryJobId { fn query(self, map: &QueryMap) -> QueryStackFrame { map.get(&self).unwrap().query.clone() } - - fn span(self, map: &QueryMap) -> Span { - map.get(&self).unwrap().job.span - } - - fn parent(self, map: &QueryMap) -> Option { - map.get(&self).unwrap().job.parent - } - - fn latch(self, map: &QueryMap) -> Option<&QueryLatch> { - map.get(&self).unwrap().job.latch.as_ref() - } } #[derive(Clone, Debug)] @@ -173,9 +160,7 @@ impl QueryJobId { #[derive(Debug)] struct QueryWaiter { - query: Option, condvar: Condvar, - span: Span, cycle: Mutex>>, } @@ -204,14 +189,8 @@ impl QueryLatch { } /// Awaits for the query job to complete. - pub(super) fn wait_on( - &self, - qcx: impl QueryContext, - query: Option, - span: Span, - ) -> Result<(), CycleError> { - let waiter = - Arc::new(QueryWaiter { query, span, cycle: Mutex::new(None), condvar: Condvar::new() }); + pub(super) fn wait_on(&self, qcx: impl QueryContext) -> Result<(), CycleError> { + let waiter = Arc::new(QueryWaiter { cycle: Mutex::new(None), condvar: Condvar::new() }); self.wait_on_inner(qcx, &waiter); // FIXME: Get rid of this lock. We have ownership of the QueryWaiter // although another thread may still have a Arc reference so we cannot @@ -233,10 +212,6 @@ impl QueryLatch { // this thread. info.waiters.push(Arc::clone(waiter)); - // If this detects a deadlock and the deadlock handler wants to resume this thread - // we have to be in the `wait` call. This is ensured by the deadlock handler - // getting the self.info lock. - rayon_core::mark_blocked(); let proxy = qcx.jobserver_proxy(); proxy.release_thread(); waiter.condvar.wait(&mut info); @@ -251,304 +226,10 @@ impl QueryLatch { let mut info = self.info.lock(); debug_assert!(!info.complete); info.complete = true; - let registry = rayon_core::Registry::current(); for waiter in info.waiters.drain(..) { - rayon_core::mark_unblocked(®istry); waiter.condvar.notify_one(); } } - - /// Removes a single waiter from the list of waiters. - /// This is used to break query cycles. - fn extract_waiter(&self, waiter: usize) -> Arc> { - let mut info = self.info.lock(); - debug_assert!(!info.complete); - // Remove the waiter from the list of waiters - info.waiters.remove(waiter) - } -} - -/// A resumable waiter of a query. The usize is the index into waiters in the query's latch -type Waiter = (QueryJobId, usize); - -/// Visits all the non-resumable and resumable waiters of a query. -/// Only waiters in a query are visited. -/// `visit` is called for every waiter and is passed a query waiting on `query_ref` -/// and a span indicating the reason the query waited on `query_ref`. -/// If `visit` returns Some, this function returns. -/// For visits of non-resumable waiters it returns the return value of `visit`. -/// For visits of resumable waiters it returns Some(Some(Waiter)) which has the -/// required information to resume the waiter. -/// If all `visit` calls returns None, this function also returns None. -fn visit_waiters( - query_map: &QueryMap, - query: QueryJobId, - mut visit: F, -) -> Option> -where - F: FnMut(Span, QueryJobId) -> Option>, -{ - // Visit the parent query which is a non-resumable waiter since it's on the same stack - if let Some(parent) = query.parent(query_map) { - if let Some(cycle) = visit(query.span(query_map), parent) { - return Some(cycle); - } - } - - // Visit the explicit waiters which use condvars and are resumable - if let Some(latch) = query.latch(query_map) { - for (i, waiter) in latch.info.lock().waiters.iter().enumerate() { - if let Some(waiter_query) = waiter.query { - if visit(waiter.span, waiter_query).is_some() { - // Return a value which indicates that this waiter can be resumed - return Some(Some((query, i))); - } - } - } - } - - None -} - -/// Look for query cycles by doing a depth first search starting at `query`. -/// `span` is the reason for the `query` to execute. This is initially DUMMY_SP. -/// If a cycle is detected, this initial value is replaced with the span causing -/// the cycle. -fn cycle_check( - query_map: &QueryMap, - query: QueryJobId, - span: Span, - stack: &mut Vec<(Span, QueryJobId)>, - visited: &mut FxHashSet, -) -> Option> { - if !visited.insert(query) { - return if let Some(p) = stack.iter().position(|q| q.1 == query) { - // We detected a query cycle, fix up the initial span and return Some - - // Remove previous stack entries - stack.drain(0..p); - // Replace the span for the first query with the cycle cause - stack[0].0 = span; - Some(None) - } else { - None - }; - } - - // Query marked as visited is added it to the stack - stack.push((span, query)); - - // Visit all the waiters - let r = visit_waiters(query_map, query, |span, successor| { - cycle_check(query_map, successor, span, stack, visited) - }); - - // Remove the entry in our stack if we didn't find a cycle - if r.is_none() { - stack.pop(); - } - - r -} - -/// Finds out if there's a path to the compiler root (aka. code which isn't in a query) -/// from `query` without going through any of the queries in `visited`. -/// This is achieved with a depth first search. -fn connected_to_root( - query_map: &QueryMap, - query: QueryJobId, - visited: &mut FxHashSet, -) -> bool { - // We already visited this or we're deliberately ignoring it - if !visited.insert(query) { - return false; - } - - // This query is connected to the root (it has no query parent), return true - if query.parent(query_map).is_none() { - return true; - } - - visit_waiters(query_map, query, |_, successor| { - connected_to_root(query_map, successor, visited).then_some(None) - }) - .is_some() -} - -// Deterministically pick an query from a list -fn pick_query<'a, I: Clone, T, F>(query_map: &QueryMap, queries: &'a [T], f: F) -> &'a T -where - F: Fn(&T) -> (Span, QueryJobId), -{ - // Deterministically pick an entry point - // FIXME: Sort this instead - queries - .iter() - .min_by_key(|v| { - let (span, query) = f(v); - let hash = query.query(query_map).hash; - // Prefer entry points which have valid spans for nicer error messages - // We add an integer to the tuple ensuring that entry points - // with valid spans are picked first - let span_cmp = if span == DUMMY_SP { 1 } else { 0 }; - (span_cmp, hash) - }) - .unwrap() -} - -/// Looks for query cycles starting from the last query in `jobs`. -/// If a cycle is found, all queries in the cycle is removed from `jobs` and -/// the function return true. -/// If a cycle was not found, the starting query is removed from `jobs` and -/// the function returns false. -fn remove_cycle( - query_map: &QueryMap, - jobs: &mut Vec, - wakelist: &mut Vec>>, -) -> bool { - let mut visited = FxHashSet::default(); - let mut stack = Vec::new(); - // Look for a cycle starting with the last query in `jobs` - if let Some(waiter) = - cycle_check(query_map, jobs.pop().unwrap(), DUMMY_SP, &mut stack, &mut visited) - { - // The stack is a vector of pairs of spans and queries; reverse it so that - // the earlier entries require later entries - let (mut spans, queries): (Vec<_>, Vec<_>) = stack.into_iter().rev().unzip(); - - // Shift the spans so that queries are matched with the span for their waitee - spans.rotate_right(1); - - // Zip them back together - let mut stack: Vec<_> = iter::zip(spans, queries).collect(); - - // Remove the queries in our cycle from the list of jobs to look at - for r in &stack { - if let Some(pos) = jobs.iter().position(|j| j == &r.1) { - jobs.remove(pos); - } - } - - // Find the queries in the cycle which are - // connected to queries outside the cycle - let entry_points = stack - .iter() - .filter_map(|&(span, query)| { - if query.parent(query_map).is_none() { - // This query is connected to the root (it has no query parent) - Some((span, query, None)) - } else { - let mut waiters = Vec::new(); - // Find all the direct waiters who lead to the root - visit_waiters(query_map, query, |span, waiter| { - // Mark all the other queries in the cycle as already visited - let mut visited = FxHashSet::from_iter(stack.iter().map(|q| q.1)); - - if connected_to_root(query_map, waiter, &mut visited) { - waiters.push((span, waiter)); - } - - None - }); - if waiters.is_empty() { - None - } else { - // Deterministically pick one of the waiters to show to the user - let waiter = *pick_query(query_map, &waiters, |s| *s); - Some((span, query, Some(waiter))) - } - } - }) - .collect::)>>(); - - // Deterministically pick an entry point - let (_, entry_point, usage) = pick_query(query_map, &entry_points, |e| (e.0, e.1)); - - // Shift the stack so that our entry point is first - let entry_point_pos = stack.iter().position(|(_, query)| query == entry_point); - if let Some(pos) = entry_point_pos { - stack.rotate_left(pos); - } - - let usage = usage.as_ref().map(|(span, query)| (*span, query.query(query_map))); - - // Create the cycle error - let error = CycleError { - usage, - cycle: stack - .iter() - .map(|&(s, ref q)| QueryInfo { span: s, query: q.query(query_map) }) - .collect(), - }; - - // We unwrap `waiter` here since there must always be one - // edge which is resumable / waited using a query latch - let (waitee_query, waiter_idx) = waiter.unwrap(); - - // Extract the waiter we want to resume - let waiter = waitee_query.latch(query_map).unwrap().extract_waiter(waiter_idx); - - // Set the cycle error so it will be picked up when resumed - *waiter.cycle.lock() = Some(error); - - // Put the waiter on the list of things to resume - wakelist.push(waiter); - - true - } else { - false - } -} - -/// Detects query cycles by using depth first search over all active query jobs. -/// If a query cycle is found it will break the cycle by finding an edge which -/// uses a query latch and then resuming that waiter. -/// There may be multiple cycles involved in a deadlock, so this searches -/// all active queries for cycles before finally resuming all the waiters at once. -pub fn break_query_cycles( - query_map: QueryMap, - registry: &rayon_core::Registry, -) { - let mut wakelist = Vec::new(); - // It is OK per the comments: - // - https://github.com/rust-lang/rust/pull/131200#issuecomment-2798854932 - // - https://github.com/rust-lang/rust/pull/131200#issuecomment-2798866392 - #[allow(rustc::potential_query_instability)] - let mut jobs: Vec = query_map.keys().cloned().collect(); - - let mut found_cycle = false; - - while jobs.len() > 0 { - if remove_cycle(&query_map, &mut jobs, &mut wakelist) { - found_cycle = true; - } - } - - // Check that a cycle was found. It is possible for a deadlock to occur without - // a query cycle if a query which can be waited on uses Rayon to do multithreading - // internally. Such a query (X) may be executing on 2 threads (A and B) and A may - // wait using Rayon on B. Rayon may then switch to executing another query (Y) - // which in turn will wait on X causing a deadlock. We have a false dependency from - // X to Y due to Rayon waiting and a true dependency from Y to X. The algorithm here - // only considers the true dependency and won't detect a cycle. - if !found_cycle { - panic!( - "deadlock detected as we're unable to find a query cycle to break\n\ - current query map:\n{:#?}", - query_map - ); - } - - // Mark all the thread we're about to wake up as unblocked. This needs to be done before - // we wake the threads up as otherwise Rayon could detect a deadlock if a thread we - // resumed fell asleep and this thread had yet to mark the remaining threads as unblocked. - for _ in 0..wakelist.len() { - rayon_core::mark_unblocked(registry); - } - - for waiter in wakelist.into_iter() { - waiter.condvar.notify_one(); - } } #[inline(never)] diff --git a/compiler/rustc_query_system/src/query/mod.rs b/compiler/rustc_query_system/src/query/mod.rs index 855769dacc3e1..bcbeb8708e753 100644 --- a/compiler/rustc_query_system/src/query/mod.rs +++ b/compiler/rustc_query_system/src/query/mod.rs @@ -8,8 +8,7 @@ pub use self::plumbing::*; mod job; pub use self::job::{ - QueryInfo, QueryJob, QueryJobId, QueryJobInfo, QueryMap, break_query_cycles, print_query_stack, - report_cycle, + QueryInfo, QueryJob, QueryJobId, QueryJobInfo, QueryMap, print_query_stack, report_cycle, }; mod caches; diff --git a/compiler/rustc_query_system/src/query/plumbing.rs b/compiler/rustc_query_system/src/query/plumbing.rs index 3c1fc7317848c..38a459b8bfb5e 100644 --- a/compiler/rustc_query_system/src/query/plumbing.rs +++ b/compiler/rustc_query_system/src/query/plumbing.rs @@ -281,10 +281,8 @@ where fn wait_for_query( query: Q, qcx: Qcx, - span: Span, key: Q::Key, latch: QueryLatch, - current: Option, ) -> (Q::Value, Option) where Q: QueryConfig, @@ -297,7 +295,7 @@ where // With parallel queries we might just have to wait on some other // thread. - let result = latch.wait_on(qcx, current, span); + let result = latch.wait_on(qcx); match result { Ok(()) => { @@ -381,7 +379,7 @@ where // Only call `wait_for_query` if we're using a Rayon thread pool // as it will attempt to mark the worker thread as blocked. - return wait_for_query(query, qcx, span, key, latch, current_job_id); + return wait_for_query(query, qcx, key, latch); } let id = job.id; diff --git a/src/tools/tidy/src/extdeps.rs b/src/tools/tidy/src/extdeps.rs index 55f937aeacf50..8bd5b1967566e 100644 --- a/src/tools/tidy/src/extdeps.rs +++ b/src/tools/tidy/src/extdeps.rs @@ -8,6 +8,7 @@ const ALLOWED_SOURCES: &[&str] = &[ r#""registry+https://github.com/rust-lang/crates.io-index""#, // This is `rust_team_data` used by `site` in src/tools/rustc-perf, r#""git+https://github.com/rust-lang/team#a5260e76d3aa894c64c56e6ddc8545b9a98043ec""#, + r#""git+https://github.com/zetanumbers/rayon?branch=rustc-remove-deadlock-detection#3b8d9c138ab70138c2016d19fbb2801a372614f6""#, ]; /// Checks for external package sources. `root` is the path to the directory that contains the diff --git a/tests/ui/parallel-rustc/cycle_crash.rs b/tests/ui/parallel-rustc/cycle_crash.rs index 94ae11aef39d4..bde6f17c88592 100644 --- a/tests/ui/parallel-rustc/cycle_crash.rs +++ b/tests/ui/parallel-rustc/cycle_crash.rs @@ -1,3 +1,4 @@ +//@ ignore-test //@ compile-flags: -Z threads=2 const FOO: usize = FOO; //~ERROR cycle detected when simplifying constant for the type system `FOO`