diff --git a/Cargo.lock b/Cargo.lock index beeb201dc51..bea4bc729fe 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5886,6 +5886,15 @@ dependencies = [ "uuid", ] +[[package]] +name = "nexus-background-task-interface" +version = "0.1.0" +dependencies = [ + "omicron-workspace-hack", + "thiserror 1.0.69", + "tokio", +] + [[package]] name = "nexus-client" version = "0.1.0" @@ -7312,6 +7321,7 @@ dependencies = [ "macaddr", "mg-admin-client", "nexus-auth", + "nexus-background-task-interface", "nexus-client", "nexus-config", "nexus-db-lookup", diff --git a/Cargo.toml b/Cargo.toml index 52bb4d489d8..a9262efafc4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -77,6 +77,7 @@ members = [ "nexus-sled-agent-shared", "nexus/authz-macros", "nexus/auth", + "nexus/background-task-interface", "nexus/db-errors", "nexus/db-fixed-data", "nexus/db-lookup", @@ -227,6 +228,7 @@ default-members = [ "nexus-sled-agent-shared", "nexus/authz-macros", "nexus/auth", + "nexus/background-task-interface", "nexus/db-errors", "nexus/db-fixed-data", "nexus/db-lookup", @@ -500,6 +502,7 @@ mg-admin-client = { git = "https://github.com/oxidecomputer/maghemite", rev = "8 ddm-admin-client = { git = "https://github.com/oxidecomputer/maghemite", rev = "8452936a53c3b16e53cbbf4e34e5e59899afc965" } multimap = "0.10.0" nexus-auth = { path = "nexus/auth" } +nexus-background-task-interface = { path = "nexus/background-task-interface" } nexus-client = { path = "clients/nexus-client" } nexus-config = { path = "nexus-config" } nexus-db-errors = { path = "nexus/db-errors" } diff --git a/nexus/Cargo.toml b/nexus/Cargo.toml index eaac2386186..0570aeb0690 100644 --- a/nexus/Cargo.toml +++ b/nexus/Cargo.toml @@ -52,6 +52,7 @@ ipnetwork.workspace = true itertools.workspace = true lldpd_client.workspace = true macaddr.workspace = true +nexus-background-task-interface.workspace = true # Not under "dev-dependencies"; these also need to be implemented for # integration tests. nexus-config.workspace = true diff --git a/nexus/background-task-interface/Cargo.toml b/nexus/background-task-interface/Cargo.toml new file mode 100644 index 00000000000..b91bba0a00d --- /dev/null +++ b/nexus/background-task-interface/Cargo.toml @@ -0,0 +1,13 @@ +[package] +name = "nexus-background-task-interface" +version = "0.1.0" +edition = "2021" +license = "MPL-2.0" + +[lints] +workspace = true + +[dependencies] +omicron-workspace-hack.workspace = true +thiserror.workspace = true +tokio.workspace = true diff --git a/nexus/background-task-interface/src/activator.rs b/nexus/background-task-interface/src/activator.rs new file mode 100644 index 00000000000..da8d12f49c0 --- /dev/null +++ b/nexus/background-task-interface/src/activator.rs @@ -0,0 +1,95 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use std::sync::{ + Arc, + atomic::{AtomicBool, Ordering}, +}; + +use thiserror::Error; +use tokio::sync::Notify; + +/// Activates a background task +/// +/// For more on what this means, see the documentation at +/// `nexus/src/app/background/mod.rs`. +/// +/// Activators are created with [`Activator::new()`] and then wired up to +/// specific background tasks using Nexus's `Driver::register()`. If you call +/// `Activator::activate()` before the activator is wired up to a background +/// task, then once the Activator _is_ wired up to a task, that task will +/// immediately be activated. +/// +/// Activators are designed specifically so they can be created before the +/// corresponding task has been created and then wired up with just an +/// `&Activator` (not a `&mut Activator`). See the +/// `nexus/src/app/background/mod.rs` documentation for more on why. +#[derive(Clone)] +pub struct Activator(Arc); + +/// Shared state for an `Activator`. +struct ActivatorInner { + pub(super) notify: Notify, + pub(super) wired_up: AtomicBool, +} + +impl Activator { + /// Create an activator that is not yet wired up to any background task + pub fn new() -> Activator { + Self(Arc::new(ActivatorInner { + notify: Notify::new(), + wired_up: AtomicBool::new(false), + })) + } + + /// Activate the background task that this Activator has been wired up to + /// + /// If this Activator has not yet been wired up, then whenever it _is_ wired + /// up, that task will be immediately activated. + pub fn activate(&self) { + self.0.notify.notify_one(); + } + + /// Sets the task as wired up. + /// + /// Returns an error if the task was already wired up. + pub fn mark_wired_up(&self) -> Result<(), AlreadyWiredUpError> { + match self.0.wired_up.compare_exchange( + false, + true, + Ordering::SeqCst, + Ordering::SeqCst, + ) { + Ok(false) => Ok(()), + Ok(true) => unreachable!( + "on success, the return value is always \ + the previous value (false)" + ), + Err(true) => Err(AlreadyWiredUpError {}), + Err(false) => unreachable!( + "on failure, the return value is always \ + the previous and current value (true)" + ), + } + } + + /// Blocks until the background task that this Activator has been wired up + /// to is activated. + /// + /// If this Activator has not yet been wired up, then whenever it _is_ wired + /// up, that task will be immediately activated. + pub async fn activated(&self) { + debug_assert!( + self.0.wired_up.load(Ordering::SeqCst), + "nothing should await activation from an activator that hasn't \ + been wired up" + ); + self.0.notify.notified().await + } +} + +/// Indicates that an activator was wired up more than once. +#[derive(Debug, Error)] +#[error("activator was already wired up")] +pub struct AlreadyWiredUpError {} diff --git a/nexus/background-task-interface/src/init.rs b/nexus/background-task-interface/src/init.rs new file mode 100644 index 00000000000..de3a11ba600 --- /dev/null +++ b/nexus/background-task-interface/src/init.rs @@ -0,0 +1,64 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use crate::Activator; + +/// Interface for activating various background tasks and read data that they +/// expose to Nexus at-large +pub struct BackgroundTasks { + // Handles to activate specific background tasks + pub task_internal_dns_config: Activator, + pub task_internal_dns_servers: Activator, + pub task_external_dns_config: Activator, + pub task_external_dns_servers: Activator, + pub task_metrics_producer_gc: Activator, + pub task_external_endpoints: Activator, + pub task_nat_cleanup: Activator, + pub task_bfd_manager: Activator, + pub task_inventory_collection: Activator, + pub task_support_bundle_collector: Activator, + pub task_physical_disk_adoption: Activator, + pub task_decommissioned_disk_cleaner: Activator, + pub task_phantom_disks: Activator, + pub task_blueprint_loader: Activator, + pub task_blueprint_executor: Activator, + pub task_blueprint_rendezvous: Activator, + pub task_crdb_node_id_collector: Activator, + pub task_service_zone_nat_tracker: Activator, + pub task_switch_port_settings_manager: Activator, + pub task_v2p_manager: Activator, + pub task_region_replacement: Activator, + pub task_region_replacement_driver: Activator, + pub task_instance_watcher: Activator, + pub task_instance_updater: Activator, + pub task_instance_reincarnation: Activator, + pub task_service_firewall_propagation: Activator, + pub task_abandoned_vmm_reaper: Activator, + pub task_vpc_route_manager: Activator, + pub task_saga_recovery: Activator, + pub task_lookup_region_port: Activator, + pub task_region_snapshot_replacement_start: Activator, + pub task_region_snapshot_replacement_garbage_collection: Activator, + pub task_region_snapshot_replacement_step: Activator, + pub task_region_snapshot_replacement_finish: Activator, + pub task_tuf_artifact_replication: Activator, + pub task_read_only_region_replacement_start: Activator, + + // Handles to activate background tasks that do not get used by Nexus + // at-large. These background tasks are implementation details as far as + // the rest of Nexus is concerned. These handles don't even really need to + // be here, but it's convenient. + pub task_internal_dns_propagation: Activator, + pub task_external_dns_propagation: Activator, +} + +impl BackgroundTasks { + /// Activate the specified background task + /// + /// If the task is currently running, it will be activated again when it + /// finishes. + pub fn activate(&self, task: &Activator) { + task.activate(); + } +} diff --git a/nexus/background-task-interface/src/lib.rs b/nexus/background-task-interface/src/lib.rs new file mode 100644 index 00000000000..6a9c096c12b --- /dev/null +++ b/nexus/background-task-interface/src/lib.rs @@ -0,0 +1,17 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Common interface for describint and activating Nexus background tasks. +//! +//! This crate defines the [`BackgroundTasks`] type, which lists out all of the +//! background tasks within Nexus, and provides handles to activate them. +//! +//! For more about background tasks, see the documentation at +//! `nexus/src/app/background/mod.rs`. + +mod activator; +mod init; + +pub use activator::*; +pub use init::*; diff --git a/nexus/src/app/background/driver.rs b/nexus/src/app/background/driver.rs index cafc7011ac8..853ba798bbd 100644 --- a/nexus/src/app/background/driver.rs +++ b/nexus/src/app/background/driver.rs @@ -15,6 +15,7 @@ use futures::FutureExt; use futures::StreamExt; use futures::future::BoxFuture; use futures::stream::FuturesUnordered; +use nexus_background_task_interface::Activator; use nexus_db_queries::context::OpContext; use nexus_types::internal_api::views::ActivationReason; use nexus_types::internal_api::views::CurrentStatus; @@ -23,12 +24,8 @@ use nexus_types::internal_api::views::LastResult; use nexus_types::internal_api::views::LastResultCompleted; use nexus_types::internal_api::views::TaskStatus; use std::collections::BTreeMap; -use std::sync::Arc; -use std::sync::atomic::AtomicBool; -use std::sync::atomic::Ordering; use std::time::Duration; use std::time::Instant; -use tokio::sync::Notify; use tokio::sync::watch; use tokio::time::MissedTickBehavior; @@ -118,17 +115,10 @@ impl Driver { // requested. The caller provides their own Activator, which just // provides a specific Notify for us to use here. let activator = taskdef.activator; - if let Err(previous) = activator.0.wired_up.compare_exchange( - false, - true, - Ordering::SeqCst, - Ordering::SeqCst, - ) { + if let Err(error) = activator.mark_wired_up() { panic!( - "attempted to wire up the same background task handle \ - twice (previous \"wired_up\" = {}): currently attempting \ - to wire it up to task {:?}", - previous, name + "{error}: currently attempting to wire it up to task {:?}", + name ); } @@ -141,7 +131,7 @@ impl Driver { let task_exec = TaskExec::new( taskdef.period, taskdef.task_impl, - Arc::clone(&activator.0), + activator.clone(), opctx, status_tx, ); @@ -241,59 +231,6 @@ pub struct TaskDefinition<'a, N: ToString, D: ToString> { pub activator: &'a Activator, } -/// Activates a background task -/// -/// See [`crate::app::background`] module-level documentation for more on what -/// that means. -/// -/// Activators are created with [`Activator::new()`] and then wired up to -/// specific background tasks using [`Driver::register()`]. If you call -/// `Activator::activate()` before the activator is wired up to a background -/// task, then once the Activator _is_ wired up to a task, that task will -/// immediately be activated. -/// -/// Activators are designed specifically so they can be created before the -/// corresponding task has been created and then wired up with just an -/// `&Activator` (not a `&mut Activator`). See the [`super::init`] module-level -/// documentation for more on why. -#[derive(Clone)] -pub struct Activator(Arc); - -/// Shared state for an `Activator`. -struct ActivatorInner { - pub(super) notify: Notify, - pub(super) wired_up: AtomicBool, -} - -impl Activator { - /// Create an activator that is not yet wired up to any background task - pub fn new() -> Activator { - Self(Arc::new(ActivatorInner { - notify: Notify::new(), - wired_up: AtomicBool::new(false), - })) - } - - /// Activate the background task that this Activator has been wired up to - /// - /// If this Activator has not yet been wired up with [`Driver::register()`], - /// then whenever it _is_ wired up, that task will be immediately activated. - pub fn activate(&self) { - self.0.notify.notify_one(); - } -} - -impl ActivatorInner { - async fn activated(&self) { - debug_assert!( - self.wired_up.load(Ordering::SeqCst), - "nothing should await activation from an activator that hasn't \ - been wired up" - ); - self.notify.notified().await - } -} - /// Encapsulates state needed by the background tokio task to manage activation /// of the background task struct TaskExec { @@ -303,7 +240,7 @@ struct TaskExec { imp: Box, /// used to receive notifications from the Driver that someone has requested /// explicit activation - activation: Arc, + activation: Activator, /// passed through to the background task impl when activated opctx: OpContext, /// used to send current status back to the Driver @@ -316,7 +253,7 @@ impl TaskExec { fn new( period: Duration, imp: Box, - activation: Arc, + activation: Activator, opctx: OpContext, status_tx: watch::Sender, ) -> TaskExec { diff --git a/nexus/src/app/background/init.rs b/nexus/src/app/background/init.rs index 026f85fca3a..d72a9345926 100644 --- a/nexus/src/app/background/init.rs +++ b/nexus/src/app/background/init.rs @@ -87,7 +87,6 @@ //! It's not foolproof but hopefully these mechanisms will catch the easy //! mistakes. -use super::Activator; use super::Driver; use super::driver::TaskDefinition; use super::tasks::abandoned_vmm_reaper; @@ -128,6 +127,8 @@ use super::tasks::vpc_routes; use crate::Nexus; use crate::app::oximeter::PRODUCER_LEASE_DURATION; use crate::app::saga::StartSaga; +use nexus_background_task_interface::Activator; +use nexus_background_task_interface::BackgroundTasks; use nexus_config::BackgroundTaskConfig; use nexus_config::DnsTasksConfig; use nexus_db_model::DnsGroup; @@ -143,70 +144,15 @@ use tokio::sync::watch; use update_common::artifacts::ArtifactsWithPlan; use uuid::Uuid; -/// Interface for activating various background tasks and read data that they -/// expose to Nexus at-large -pub struct BackgroundTasks { - // Handles to activate specific background tasks - pub task_internal_dns_config: Activator, - pub task_internal_dns_servers: Activator, - pub task_external_dns_config: Activator, - pub task_external_dns_servers: Activator, - pub task_metrics_producer_gc: Activator, - pub task_external_endpoints: Activator, - pub task_nat_cleanup: Activator, - pub task_bfd_manager: Activator, - pub task_inventory_collection: Activator, - pub task_support_bundle_collector: Activator, - pub task_physical_disk_adoption: Activator, - pub task_decommissioned_disk_cleaner: Activator, - pub task_phantom_disks: Activator, - pub task_blueprint_loader: Activator, - pub task_blueprint_executor: Activator, - pub task_blueprint_rendezvous: Activator, - pub task_crdb_node_id_collector: Activator, - pub task_service_zone_nat_tracker: Activator, - pub task_switch_port_settings_manager: Activator, - pub task_v2p_manager: Activator, - pub task_region_replacement: Activator, - pub task_region_replacement_driver: Activator, - pub task_instance_watcher: Activator, - pub task_instance_updater: Activator, - pub task_instance_reincarnation: Activator, - pub task_service_firewall_propagation: Activator, - pub task_abandoned_vmm_reaper: Activator, - pub task_vpc_route_manager: Activator, - pub task_saga_recovery: Activator, - pub task_lookup_region_port: Activator, - pub task_region_snapshot_replacement_start: Activator, - pub task_region_snapshot_replacement_garbage_collection: Activator, - pub task_region_snapshot_replacement_step: Activator, - pub task_region_snapshot_replacement_finish: Activator, - pub task_tuf_artifact_replication: Activator, - pub task_read_only_region_replacement_start: Activator, - - // Handles to activate background tasks that do not get used by Nexus - // at-large. These background tasks are implementation details as far as - // the rest of Nexus is concerned. These handles don't even really need to - // be here, but it's convenient. - task_internal_dns_propagation: Activator, - task_external_dns_propagation: Activator, - - // Data exposed by various background tasks to the rest of Nexus - /// list of currently configured external endpoints - pub external_endpoints: +/// Internal state for communication between Nexus and background tasks. +/// +/// This is not part of the larger `BackgroundTask` type because it contains +/// references to internal types. +pub(crate) struct BackgroundTasksInternal { + pub(crate) external_endpoints: watch::Receiver>, } -impl BackgroundTasks { - /// Activate the specified background task - /// - /// If the task is currently running, it will be activated again when it - /// finishes. - pub fn activate(&self, task: &Activator) { - task.activate(); - } -} - /// Initializes the background task subsystem /// /// See the module-level documentation for more on the two-phase initialization @@ -227,7 +173,9 @@ impl BackgroundTasksInitializer { /// call `start()` to actually start the tasks /// * a long-lived `BackgroundTasks` object that you can use to activate any /// of the tasks that will be started and read data that they provide - pub fn new() -> (BackgroundTasksInitializer, BackgroundTasks) { + pub fn new() + -> (BackgroundTasksInitializer, BackgroundTasks, BackgroundTasksInternal) + { let (external_endpoints_tx, external_endpoints_rx) = watch::channel(None); @@ -277,11 +225,13 @@ impl BackgroundTasksInitializer { task_internal_dns_propagation: Activator::new(), task_external_dns_propagation: Activator::new(), + }; + let internal = BackgroundTasksInternal { external_endpoints: external_endpoints_rx, }; - (initializer, background_tasks) + (initializer, background_tasks, internal) } /// Starts all the Nexus background tasks @@ -348,9 +298,6 @@ impl BackgroundTasksInitializer { // call to `Driver::register()` below. That's what actually wires // up the Activator to the corresponding background task. - // The following fields can be safely ignored here because they're - // already wired up as needed. - external_endpoints: _, // Do NOT add a `..` catch-all here! See above. } = &background_tasks; diff --git a/nexus/src/app/background/mod.rs b/nexus/src/app/background/mod.rs index 5b24907b0f4..26b0e34995c 100644 --- a/nexus/src/app/background/mod.rs +++ b/nexus/src/app/background/mod.rs @@ -133,11 +133,11 @@ mod init; mod status; mod tasks; -pub use driver::Activator; pub use driver::Driver; -pub use init::BackgroundTasks; pub use init::BackgroundTasksData; pub use init::BackgroundTasksInitializer; +pub(crate) use init::BackgroundTasksInternal; +pub use nexus_background_task_interface::Activator; pub use tasks::saga_recovery::SagaRecoveryHelpers; use futures::future::BoxFuture; diff --git a/nexus/src/app/external_endpoints.rs b/nexus/src/app/external_endpoints.rs index 00cc83b2b44..c126e0c92dd 100644 --- a/nexus/src/app/external_endpoints.rs +++ b/nexus/src/app/external_endpoints.rs @@ -677,7 +677,7 @@ impl super::Nexus { endpoint_for_authority( log, &requested_authority, - &self.background_tasks.external_endpoints, + &self.background_tasks_internal.external_endpoints, ) } } diff --git a/nexus/src/app/instance_network.rs b/nexus/src/app/instance_network.rs index 649dd870d7e..8c61b2e3936 100644 --- a/nexus/src/app/instance_network.rs +++ b/nexus/src/app/instance_network.rs @@ -6,6 +6,7 @@ use crate::app::switch_port; use ipnetwork::IpNetwork; +use nexus_background_task_interface::BackgroundTasks; use nexus_db_lookup::LookupPath; use nexus_db_model::ExternalIp; use nexus_db_model::IpAttachState; @@ -27,7 +28,6 @@ use std::str::FromStr; use uuid::Uuid; use super::Nexus; -use super::background::BackgroundTasks; impl Nexus { /// Returns the set of switches with uplinks configured and boundary diff --git a/nexus/src/app/mod.rs b/nexus/src/app/mod.rs index 3509d370e18..1d637734438 100644 --- a/nexus/src/app/mod.rs +++ b/nexus/src/app/mod.rs @@ -15,6 +15,7 @@ use crate::populate::populate_start; use ::oximeter::types::ProducerRegistry; use anyhow::anyhow; use internal_dns_types::names::ServiceName; +use nexus_background_task_interface::BackgroundTasks; use nexus_config::NexusConfig; use nexus_config::RegionAllocationStrategy; use nexus_config::Tunables; @@ -226,7 +227,10 @@ pub struct Nexus { background_tasks_driver: OnceLock, /// Handles to various specific tasks - background_tasks: background::BackgroundTasks, + background_tasks: BackgroundTasks, + + /// Internal state related to background tasks + background_tasks_internal: background::BackgroundTasksInternal, /// Default Crucible region allocation strategy default_region_allocation_strategy: RegionAllocationStrategy, @@ -368,8 +372,11 @@ impl Nexus { Arc::clone(&db_datastore) as Arc, ); - let (background_tasks_initializer, background_tasks) = - background::BackgroundTasksInitializer::new(); + let ( + background_tasks_initializer, + background_tasks, + background_tasks_internal, + ) = background::BackgroundTasksInitializer::new(); let external_resolver = { if config.deployment.external_dns_servers.is_empty() { @@ -440,6 +447,7 @@ impl Nexus { .clone(), background_tasks_driver: OnceLock::new(), background_tasks, + background_tasks_internal, default_region_allocation_strategy: config .pkg .default_region_allocation_strategy @@ -559,7 +567,7 @@ impl Nexus { // Wait for the background task to complete at least once. We don't // care about its value. To do this, we need our own copy of the // channel. - let mut rx = self.background_tasks.external_endpoints.clone(); + let mut rx = self.background_tasks_internal.external_endpoints.clone(); let _ = rx.wait_for(|s| s.is_some()).await; if !tls_enabled { return None; @@ -569,7 +577,7 @@ impl Nexus { .with_no_client_auth() .with_cert_resolver(Arc::new(NexusCertResolver::new( self.log.new(o!("component" => "NexusCertResolver")), - self.background_tasks.external_endpoints.clone(), + self.background_tasks_internal.external_endpoints.clone(), ))); rustls_cfg.alpn_protocols = vec![b"h2".to_vec(), b"http/1.1".to_vec()]; Some(rustls_cfg)