diff --git a/Cargo.lock b/Cargo.lock index 466e63de7dc..4c7bf0d71ce 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -11678,6 +11678,7 @@ dependencies = [ "omicron-uuid-kinds", "omicron-workspace-hack", "proptest", + "schemars", "scopeguard", "serde", "serde_json", diff --git a/dev-tools/omdb/src/bin/omdb/db.rs b/dev-tools/omdb/src/bin/omdb/db.rs index bf651cda923..223ca38dde9 100644 --- a/dev-tools/omdb/src/bin/omdb/db.rs +++ b/dev-tools/omdb/src/bin/omdb/db.rs @@ -7351,30 +7351,28 @@ fn inv_collection_print_sleds(collection: &Collection) { "LAST RECONCILED CONFIG", &last_reconciliation.last_reconciled_config, ); - let disk_errs = collect_config_reconciler_errors( - &last_reconciliation.external_disks, - ); - let dataset_errs = collect_config_reconciler_errors( - &last_reconciliation.datasets, - ); - let zone_errs = collect_config_reconciler_errors( - &last_reconciliation.zones, - ); - for (label, errs) in [ - ("disk", disk_errs), - ("dataset", dataset_errs), - ("zone", zone_errs), - ] { - if errs.is_empty() { - println!(" all {label}s reconciled successfully"); - } else { - println!( - " {} {label} reconciliation errors:", - errs.len() - ); - for err in errs { - println!(" {err}"); - } + } + let disk_errs = collect_config_reconciler_errors( + &last_reconciliation.external_disks, + ); + let dataset_errs = + collect_config_reconciler_errors(&last_reconciliation.datasets); + let zone_errs = + collect_config_reconciler_errors(&last_reconciliation.zones); + for (label, errs) in [ + ("disk", disk_errs), + ("dataset", dataset_errs), + ("zone", zone_errs), + ] { + if errs.is_empty() { + println!(" all {label}s reconciled successfully"); + } else { + println!( + " {} {label} reconciliation errors:", + errs.len() + ); + for err in errs { + println!(" {err}"); } } } diff --git a/nexus-sled-agent-shared/src/inventory.rs b/nexus-sled-agent-shared/src/inventory.rs index dc9f019e5c0..70a87cba41d 100644 --- a/nexus-sled-agent-shared/src/inventory.rs +++ b/nexus-sled-agent-shared/src/inventory.rs @@ -12,6 +12,7 @@ use chrono::{DateTime, Utc}; use daft::Diffable; use id_map::IdMap; use id_map::IdMappable; +use omicron_common::disk::{DatasetKind, DatasetName}; use omicron_common::ledger::Ledgerable; use omicron_common::{ api::{ @@ -327,6 +328,10 @@ impl OmicronZoneConfig { Some(self.id), ) } + + pub fn dataset_name(&self) -> Option { + self.zone_type.dataset_name() + } } /// Describes a persistent ZFS dataset associated with an Omicron zone @@ -613,6 +618,41 @@ impl OmicronZoneType { | OmicronZoneType::Oximeter { .. } => None, } } + + /// If this kind of zone has an associated dataset, return the dataset's + /// name. Otherwise, return `None`. + pub fn dataset_name(&self) -> Option { + let (dataset, dataset_kind) = match self { + OmicronZoneType::BoundaryNtp { .. } + | OmicronZoneType::InternalNtp { .. } + | OmicronZoneType::Nexus { .. } + | OmicronZoneType::Oximeter { .. } + | OmicronZoneType::CruciblePantry { .. } => None, + OmicronZoneType::Clickhouse { dataset, .. } => { + Some((dataset, DatasetKind::Clickhouse)) + } + OmicronZoneType::ClickhouseKeeper { dataset, .. } => { + Some((dataset, DatasetKind::ClickhouseKeeper)) + } + OmicronZoneType::ClickhouseServer { dataset, .. } => { + Some((dataset, DatasetKind::ClickhouseServer)) + } + OmicronZoneType::CockroachDb { dataset, .. } => { + Some((dataset, DatasetKind::Cockroach)) + } + OmicronZoneType::Crucible { dataset, .. } => { + Some((dataset, DatasetKind::Crucible)) + } + OmicronZoneType::ExternalDns { dataset, .. } => { + Some((dataset, DatasetKind::ExternalDns)) + } + OmicronZoneType::InternalDns { dataset, .. } => { + Some((dataset, DatasetKind::InternalDns)) + } + }?; + + Some(DatasetName::new(dataset.pool_name, dataset_kind)) + } } /// Like [`OmicronZoneType`], but without any associated data. diff --git a/schema/all-zones-requests.json b/schema/all-zones-requests.json index 73e2e4de6e0..e9400245755 100644 --- a/schema/all-zones-requests.json +++ b/schema/all-zones-requests.json @@ -1,7 +1,7 @@ { "$schema": "http://json-schema.org/draft-07/schema#", "title": "OmicronZonesConfigLocal", - "description": "Combines the Nexus-provided `OmicronZonesConfig` (which describes what Nexus wants for all of its zones) with the locally-determined configuration for these zones.", + "description": "Legacy type of the ledgered zone config.", "type": "object", "required": [ "ledger_generation", @@ -10,20 +10,10 @@ ], "properties": { "ledger_generation": { - "description": "ledger-managed generation number\n\nThis generation is managed by the ledger facility itself. It's bumped whenever we write a new ledger. In practice, we don't currently have any reason to bump this _for a given Omicron generation_ so it's somewhat redundant. In principle, if we needed to modify the ledgered configuration due to some event that doesn't change the Omicron config (e.g., if we wanted to move the root filesystem to a different path), we could do that by bumping this generation.", - "allOf": [ - { - "$ref": "#/definitions/Generation" - } - ] + "$ref": "#/definitions/Generation" }, "omicron_generation": { - "description": "generation of the Omicron-provided part of the configuration\n\nThis generation number is outside of Sled Agent's control. We store exactly what we were given and use this number to decide when to fail requests to establish an outdated configuration.\n\nYou can think of this as a major version number, with `ledger_generation` being a minor version number. See `is_newer_than()`.", - "allOf": [ - { - "$ref": "#/definitions/Generation" - } - ] + "$ref": "#/definitions/Generation" }, "zones": { "type": "array", @@ -269,7 +259,6 @@ } }, "OmicronZoneConfigLocal": { - "description": "Combines the Nexus-provided `OmicronZoneConfig` (which describes what Nexus wants for this zone) with any locally-determined configuration (like the path to the root filesystem)", "type": "object", "required": [ "root", diff --git a/sled-agent/config-reconciler/Cargo.toml b/sled-agent/config-reconciler/Cargo.toml index 9d173f35861..f5f8a61e4e1 100644 --- a/sled-agent/config-reconciler/Cargo.toml +++ b/sled-agent/config-reconciler/Cargo.toml @@ -45,6 +45,7 @@ expectorate.workspace = true illumos-utils = { workspace = true, features = ["testing"] } omicron-test-utils.workspace = true proptest.workspace = true +schemars.workspace = true scopeguard.workspace = true serde_json.workspace = true sled-storage = { workspace = true, features = ["testing"] } diff --git a/sled-agent/config-reconciler/src/dataset_serialization_task.rs b/sled-agent/config-reconciler/src/dataset_serialization_task.rs index 9cc35285545..05556c3abf5 100644 --- a/sled-agent/config-reconciler/src/dataset_serialization_task.rs +++ b/sled-agent/config-reconciler/src/dataset_serialization_task.rs @@ -25,9 +25,6 @@ use illumos_utils::zfs::DestroyDatasetError; use illumos_utils::zfs::Mountpoint; use illumos_utils::zfs::WhichDatasets; use illumos_utils::zfs::Zfs; -use illumos_utils::zpool::PathInPool; -use illumos_utils::zpool::ZpoolOrRamdisk; -use nexus_sled_agent_shared::inventory::ConfigReconcilerInventoryResult; use nexus_sled_agent_shared::inventory::InventoryDataset; use omicron_common::disk::DatasetConfig; use omicron_common::disk::DatasetKind; @@ -37,7 +34,6 @@ use omicron_common::zpool_name::ZpoolName; use omicron_uuid_kinds::DatasetUuid; use sled_storage::config::MountConfig; use sled_storage::dataset::CRYPT_DATASET; -use sled_storage::dataset::U2_DEBUG_DATASET; use sled_storage::dataset::ZONE_DATASET; use sled_storage::manager::NestedDatasetConfig; use sled_storage::manager::NestedDatasetListOptions; @@ -91,7 +87,7 @@ pub enum DatasetEnsureError { } impl DatasetEnsureError { - fn is_retryable(&self) -> bool { + pub(crate) fn is_retryable(&self) -> bool { match self { // These errors might be retryable; there are probably cases where // they won't be, but we need more context than we have available @@ -161,89 +157,13 @@ pub enum NestedDatasetListError { }, } -#[derive(Debug, Clone, Default)] -pub(crate) struct DatasetEnsureResult(IdMap); - -impl DatasetEnsureResult { - pub(crate) fn has_retryable_error(&self) -> bool { - self.0.iter().any(|result| match &result.state { - DatasetState::Ensured => false, - DatasetState::FailedToEnsure(err) => err.is_retryable(), - }) - } - - pub(crate) fn to_inventory( - &self, - ) -> BTreeMap { - self.0 - .iter() - .map(|dataset| match &dataset.state { - DatasetState::Ensured => { - (dataset.config.id, ConfigReconcilerInventoryResult::Ok) - } - DatasetState::FailedToEnsure(err) => ( - dataset.config.id, - ConfigReconcilerInventoryResult::Err { - message: InlineErrorChain::new(err).to_string(), - }, - ), - }) - .collect() - } - - pub(crate) fn all_mounted_debug_datasets<'a>( - &'a self, - mount_config: &'a MountConfig, - ) -> impl Iterator + 'a { - self.all_mounted_datasets(mount_config, DatasetKind::Debug) - } - - pub(crate) fn all_mounted_zone_root_datasets<'a>( - &'a self, - mount_config: &'a MountConfig, - ) -> impl Iterator + 'a { - self.all_mounted_datasets(mount_config, DatasetKind::TransientZoneRoot) - } - - fn all_mounted_datasets<'a>( - &'a self, - mount_config: &'a MountConfig, - kind: DatasetKind, - ) -> impl Iterator + 'a { - // We're a helper called by the pub methods on this type, so we only - // have to handle the `kind`s they call us with. - let mountpoint = match &kind { - DatasetKind::Debug => U2_DEBUG_DATASET, - DatasetKind::TransientZoneRoot => ZONE_DATASET, - _ => unreachable!( - "private function called with unexpected kind {kind:?}" - ), - }; - self.0 - .iter() - .filter(|result| match &result.state { - DatasetState::Ensured => true, - DatasetState::FailedToEnsure(_) => false, - }) - .filter(move |result| *result.config.name.kind() == kind) - .map(|result| { - let pool = *result.config.name.pool(); - PathInPool { - pool: ZpoolOrRamdisk::Zpool(pool), - path: pool - .dataset_mountpoint(&mount_config.root, mountpoint), - } - }) - } -} - -#[derive(Debug, Clone)] -struct SingleDatasetEnsureResult { - config: DatasetConfig, - state: DatasetState, +#[derive(Debug)] +pub(crate) struct DatasetEnsureResult { + pub(crate) config: DatasetConfig, + pub(crate) result: Result<(), Arc>, } -impl IdMappable for SingleDatasetEnsureResult { +impl IdMappable for DatasetEnsureResult { type Id = DatasetUuid; fn id(&self) -> Self::Id { @@ -251,16 +171,18 @@ impl IdMappable for SingleDatasetEnsureResult { } } -#[derive(Debug, Clone)] -enum DatasetState { - Ensured, - FailedToEnsure(Arc), -} - #[derive(Debug, Clone)] pub(crate) struct DatasetTaskHandle(mpsc::Sender); impl DatasetTaskHandle { + // For testing, create a handle on which requests will always fail with a + // `DatasetTaskError`. + #[cfg(any(test, feature = "testing"))] + pub(crate) fn spawn_noop() -> Self { + let (tx, _rx) = mpsc::channel(1); + Self(tx) + } + pub fn spawn_dataset_task( mount_config: Arc, currently_managed_zpools_rx: CurrentlyManagedZpoolsReceiver, @@ -296,7 +218,7 @@ impl DatasetTaskHandle { mount_config, currently_managed_zpools_rx, request_rx, - datasets: DatasetEnsureResult::default(), + ensured_datasets: BTreeSet::new(), log: base_log.new(slog::o!("component" => "DatasetTask")), } .run(zfs), @@ -317,7 +239,7 @@ impl DatasetTaskHandle { pub async fn datasets_ensure( &self, datasets: IdMap, - ) -> Result { + ) -> Result, DatasetTaskError> { self.try_send_request(|tx| DatasetTaskRequest::DatasetsEnsure { datasets, tx, @@ -403,7 +325,7 @@ struct DatasetTask { mount_config: Arc, request_rx: mpsc::Receiver, currently_managed_zpools_rx: CurrentlyManagedZpoolsReceiver, - datasets: DatasetEnsureResult, + ensured_datasets: BTreeSet, log: Logger, } @@ -426,8 +348,7 @@ impl DatasetTask { _ = tx.0.send(self.inventory(zpools, zfs).await); } DatasetTaskRequest::DatasetsEnsure { datasets, tx } => { - self.datasets_ensure(datasets, zfs).await; - _ = tx.0.send(self.datasets.clone()); + _ = tx.0.send(self.datasets_ensure(datasets, zfs).await); } DatasetTaskRequest::NestedDatasetMount { name, tx } => { _ = tx.0.send(self.nested_dataset_mount(name, zfs).await); @@ -482,7 +403,9 @@ impl DatasetTask { &mut self, config: IdMap, zfs: &T, - ) { + ) -> IdMap { + let mut ensure_results = IdMap::new(); + // There's an implicit hierarchy inside the list of `DatasetConfig`s: // // 1. Each zpool may contain many datasets @@ -535,9 +458,9 @@ impl DatasetTask { "dataset" => ?dataset, ); let err = DatasetEnsureError::ZpoolNotFound(*zpool); - self.datasets.0.insert(SingleDatasetEnsureResult { + ensure_results.insert(DatasetEnsureResult { config: dataset, - state: DatasetState::FailedToEnsure(Arc::new(err)), + result: Err(Arc::new(err)), }); continue; } @@ -613,7 +536,7 @@ impl DatasetTask { const DATASET_ENSURE_CONCURRENCY_LIMIT: usize = 16; let mut non_transient_zones = futures::stream::iter( non_transient_zone_configs.into_iter().map(|dataset| async move { - let state = match Self::ensure_one_dataset( + let result = Self::ensure_one_dataset( DatasetCreationDetails::Config( &dataset, old_datasets.get(&dataset.name.full_name()), @@ -622,18 +545,14 @@ impl DatasetTask { &log, zfs, ) - .await - { - Ok(state) => state, - Err(err) => DatasetState::FailedToEnsure(Arc::new(err)), - }; - (dataset, state) + .await; + (dataset, result.map_err(Arc::new)) }), ) .buffer_unordered(DATASET_ENSURE_CONCURRENCY_LIMIT); - while let Some((config, state)) = non_transient_zones.next().await { - self.datasets.0.insert(SingleDatasetEnsureResult { config, state }); + while let Some((config, result)) = non_transient_zones.next().await { + ensure_results.insert(DatasetEnsureResult { config, result }); } // For each transient zone dataset: either ensure it or mark down why we @@ -649,30 +568,28 @@ impl DatasetTask { else { let err = DatasetEnsureError::TransientZoneRootNoConfig(zpool); - self.datasets.0.insert(SingleDatasetEnsureResult { + ensure_results.insert(DatasetEnsureResult { config: dataset, - state: DatasetState::FailedToEnsure(Arc::new(err)), + result: Err(Arc::new(err)), }); continue; }; // Have we successfully ensured that parent dataset? - match self - .datasets - .0 + match ensure_results .get(zpool_transient_zone_root_dataset_id) - .map(|d| &d.state) + .map(|d| &d.result) { - Some(DatasetState::Ensured) => (), - Some(DatasetState::FailedToEnsure(err)) => { + Some(Ok(())) => (), + Some(Err(err)) => { let err = DatasetEnsureError::TransientZoneRootFailure { zpool, err: Arc::clone(err), }; - self.datasets.0.insert(SingleDatasetEnsureResult { + ensure_results.insert(DatasetEnsureResult { config: dataset, - state: DatasetState::FailedToEnsure(Arc::new(err)), + result: Err(Arc::new(err)), }); continue; } @@ -685,7 +602,7 @@ impl DatasetTask { } transient_zone_futures.push(async move { - let state = match Self::ensure_one_dataset( + let result = Self::ensure_one_dataset( DatasetCreationDetails::Config( &dataset, old_datasets.get(&dataset.name.full_name()), @@ -694,32 +611,44 @@ impl DatasetTask { &log, zfs, ) - .await - { - Ok(state) => state, - Err(err) => DatasetState::FailedToEnsure(Arc::new(err)), - }; - (dataset, state) + .await; + (dataset, result.map_err(Arc::new)) }); } } let mut transient_zones = futures::stream::iter(transient_zone_futures) .buffer_unordered(DATASET_ENSURE_CONCURRENCY_LIMIT); - while let Some((config, state)) = transient_zones.next().await { - self.datasets.0.insert(SingleDatasetEnsureResult { config, state }); + while let Some((config, result)) = transient_zones.next().await { + ensure_results.insert(DatasetEnsureResult { config, result }); } + + // Remember all successfully-ensured datasets (used by + // `nested_dataset_ensure()` to check that any nested datasets' parents + // have been ensured). + self.ensured_datasets = ensure_results + .iter() + .filter_map(|d| { + if d.result.is_ok() { + Some(d.config.name.clone()) + } else { + None + } + }) + .collect(); + + ensure_results } /// Compare `dataset`'s properties against `old_dataset` (an set of /// recently-retrieved properties from ZFS). If we already know /// the state of `dataset` based on those properties, return `Some(state)`; /// otherwise, return `None`. - fn is_dataset_state_known( + fn is_dataset_ensure_result_known( dataset: &DatasetConfig, old_dataset: Option<&DatasetProperties>, log: &Logger, - ) -> Option { + ) -> Option> { let log = log.new(slog::o!("dataset" => dataset.name.full_name())); let Some(old_dataset) = old_dataset else { @@ -736,13 +665,11 @@ impl DatasetTask { // We cannot do anything here: we already have a dataset with this // name, but it has a different ID. Nexus has sent us bad // information (or we have a bug somewhere); refuse to proceed. - return Some(DatasetState::FailedToEnsure(Arc::new( - DatasetEnsureError::UuidMismatch { - name: dataset.name.full_name(), - expected: dataset.id, - got: old_id, - }, - ))); + return Some(Err(DatasetEnsureError::UuidMismatch { + name: dataset.name.full_name(), + expected: dataset.id, + got: old_id, + })); } let old_props = match SharedDatasetConfig::try_from(old_dataset) { @@ -778,7 +705,7 @@ impl DatasetTask { } info!(log, "No changes necessary, returning early"); - return Some(DatasetState::Ensured); + return Some(Ok(())); } // Ensures a dataset exists within a zpool. @@ -792,7 +719,7 @@ impl DatasetTask { mount_config: &MountConfig, log: &Logger, zfs: &T, - ) -> Result { + ) -> Result<(), DatasetEnsureError> { info!(log, "ensure_dataset"; "details" => ?details); // Unpack the particulars of the kind of dataset we're creating. @@ -810,10 +737,10 @@ impl DatasetTask { DatasetCreationDetails::Config(config, old_props) => { // Do we alread know the state of this dataset based on // `old_props`? - if let Some(state) = - Self::is_dataset_state_known(config, old_props, log) - { - return Ok(state); + if let Some(result) = Self::is_dataset_ensure_result_known( + config, old_props, log, + ) { + return result; } let dataset_id = Some(config.id); @@ -848,8 +775,7 @@ impl DatasetTask { id: dataset_id, additional_options: None, }) - .await?; - Ok(DatasetState::Ensured) + .await } async fn nested_dataset_mount( @@ -876,13 +802,7 @@ impl DatasetTask { let log = self.log.new(slog::o!("request" => "nested_dataset_ensure")); // Has our parent dataset been mounted? - // - // TODO-cleanup Could we get the parent dataset ID instead of its name? - // Then we could do a lookup instead of a scan. - if !self.datasets.0.iter().any(|result| { - result.config.name == config.name.root - && matches!(result.state, DatasetState::Ensured) - }) { + if !self.ensured_datasets.contains(&config.name.root) { return Err(NestedDatasetEnsureError::ParentDatasetNotMounted( config.name.root, )); @@ -1014,7 +934,7 @@ enum DatasetTaskRequest { }, DatasetsEnsure { datasets: IdMap, - tx: DebugIgnore>, + tx: DebugIgnore>>, }, NestedDatasetMount { name: NestedDatasetLocation, @@ -1396,21 +1316,20 @@ mod tests { // The returned map should record success for all datasets on managed // zpools and errors on all unmanaged pools. - assert_eq!(result.0.len(), datasets.len()); + assert_eq!(result.len(), datasets.len()); let mut num_datasets_on_managed_pools = 0; for dataset in &datasets { - let single_result = result - .0 + let single_dataset = result .get(&dataset.id) .expect("result contains entry for each dataset"); if managed_pools.contains(dataset.name.pool()) { - assert_matches!(single_result.state, DatasetState::Ensured); + assert_matches!(single_dataset.result, Ok(())); num_datasets_on_managed_pools += 1; } else { assert_matches!( - &single_result.state, - DatasetState::FailedToEnsure(err) + &single_dataset.result, + Err(err) if matches!(**err, DatasetEnsureError::ZpoolNotFound(_)) ); } @@ -1533,35 +1452,32 @@ mod tests { // the `Succeed` behavior // * errors for all other transient zones (with the specific error // depending on whether the parent failed or was omitted) - assert_eq!(result.0.len(), datasets.len()); + assert_eq!(result.len(), datasets.len()); for dataset in &datasets { let behavior = pools .get(dataset.name.pool()) .expect("datasets only exist for pools we have"); - let result = result - .0 + let result = &result .get(&dataset.id) - .expect("result contains entry for each dataset"); + .expect("result contains entry for each dataset") + .result; match (behavior, dataset.name.kind()) { ( TransientZoneRootBehavior::Succeed, DatasetKind::TransientZoneRoot | DatasetKind::TransientZone { .. }, - ) => assert_matches!(result.state, DatasetState::Ensured), + ) => assert_matches!(result, Ok(())), ( TransientZoneRootBehavior::Fail, DatasetKind::TransientZoneRoot, - ) => assert_matches!( - result.state, - DatasetState::FailedToEnsure(_) - ), + ) => assert_matches!(result, Err(_)), ( TransientZoneRootBehavior::Fail, DatasetKind::TransientZone { .. }, ) => assert_matches!( - &result.state, - DatasetState::FailedToEnsure(err) if matches!( + result, + Err(err) if matches!( **err, DatasetEnsureError::TransientZoneRootFailure { .. } ) @@ -1570,8 +1486,8 @@ mod tests { TransientZoneRootBehavior::Omit, DatasetKind::TransientZone { .. }, ) => assert_matches!( - &result.state, - DatasetState::FailedToEnsure(err) if matches!( + result, + Err(err) if matches!( **err, DatasetEnsureError::TransientZoneRootNoConfig(_) ) ), @@ -1649,11 +1565,11 @@ mod tests { // Our in-memory ZFS will return an error if we tried to mount a // transient zone before its parent zone root, so it's sufficient to // check that all the datasets ensured successfully. - assert_eq!(result.0.len(), datasets.len()); - for single_result in result.0 { + assert_eq!(result.len(), datasets.len()); + for single_result in result { assert_matches!( - single_result.state, - DatasetState::Ensured, + single_result.result, + Ok(()), "bad state for {:?}", single_result.config ); @@ -1712,13 +1628,13 @@ mod tests { .expect("no task error"); // Each dataset should have been ensured exactly once. - assert_eq!(result.0.len(), datasets.len()); + assert_eq!(result.len(), datasets.len()); { let zfs = zfs.inner.lock().unwrap(); for dataset in &datasets { assert_matches!( - result.0.get(&dataset.id).unwrap().state, - DatasetState::Ensured + result.get(&dataset.id).unwrap().result, + Ok(()) ); assert_eq!( zfs.ensure_call_counts.get(&dataset.name.full_name()), @@ -1733,13 +1649,13 @@ mod tests { .datasets_ensure(datasets.clone()) .await .expect("no task error"); - assert_eq!(result.0.len(), datasets.len()); + assert_eq!(result.len(), datasets.len()); { let zfs = zfs.inner.lock().unwrap(); for dataset in &datasets { assert_matches!( - result.0.get(&dataset.id).unwrap().state, - DatasetState::Ensured + result.get(&dataset.id).unwrap().result, + Ok(()) ); assert_eq!( zfs.ensure_call_counts.get(&dataset.name.full_name()), @@ -1783,13 +1699,13 @@ mod tests { .datasets_ensure(datasets.clone()) .await .expect("no task error"); - assert_eq!(result.0.len(), datasets.len()); + assert_eq!(result.len(), datasets.len()); { let zfs = zfs.inner.lock().unwrap(); for dataset in &datasets { assert_matches!( - result.0.get(&dataset.id).unwrap().state, - DatasetState::Ensured + result.get(&dataset.id).unwrap().result, + Ok(()) ); let expected_count = if mutated_datasets.contains(&dataset.id) { 2 } else { 1 }; @@ -1893,10 +1809,8 @@ mod tests { .datasets_ensure(datasets.clone()) .await .expect("no task error"); - assert_eq!(result.0.len(), datasets.len()); - assert!( - result.0.iter().all(|r| matches!(r.state, DatasetState::Ensured)) - ); + assert_eq!(result.len(), datasets.len()); + assert!(result.iter().all(|r| matches!(r.result, Ok(())))); // Try to ensure each of the nested datasets. This should succeed for // any where the debug dataset was mounted, and fail with an appropriate @@ -2260,14 +2174,13 @@ mod illumos_tests { .datasets_ensure([dataset.clone()].into_iter().collect()) .await .expect("task should not fail"); - assert_eq!(result.0.len(), 1); + assert_eq!(result.len(), 1); assert_matches!( result - .0 .get(&dataset.id) .expect("result contains entry for dataset") - .state, - DatasetState::Ensured + .result, + Ok(()) ); // Calling "datasets_ensure" with the same input should succeed. @@ -2275,14 +2188,13 @@ mod illumos_tests { .datasets_ensure([dataset.clone()].into_iter().collect()) .await .expect("task should not fail"); - assert_eq!(result.0.len(), 1); + assert_eq!(result.len(), 1); assert_matches!( result - .0 .get(&dataset.id) .expect("result contains entry for dataset") - .state, - DatasetState::Ensured + .result, + Ok(()) ); harness.cleanup().await; @@ -2334,14 +2246,13 @@ mod illumos_tests { .datasets_ensure([dataset.clone()].into_iter().collect()) .await .expect("task should not fail"); - assert_eq!(result.0.len(), 1); + assert_eq!(result.len(), 1); assert_matches!( result - .0 .get(&dataset.id) .expect("result contains entry for dataset") - .state, - DatasetState::Ensured + .result, + Ok(()) ); // Creating the dataset should have mounted it @@ -2356,14 +2267,13 @@ mod illumos_tests { .datasets_ensure([dataset.clone()].into_iter().collect()) .await .expect("task should not fail"); - assert_eq!(result.0.len(), 1); + assert_eq!(result.len(), 1); assert_matches!( result - .0 .get(&dataset.id) .expect("result contains entry for dataset") - .state, - DatasetState::Ensured + .result, + Ok(()) ); // ... and doing so mounts the dataset again. @@ -2418,9 +2328,9 @@ mod illumos_tests { .datasets_ensure(dataset_configs.clone()) .await .expect("task should not fail"); - assert_eq!(result.0.len(), dataset_configs.len()); - for result in &result.0 { - assert_matches!(result.state, DatasetState::Ensured); + assert_eq!(result.len(), dataset_configs.len()); + for result in &result { + assert_matches!(result.result, Ok(())); } // Creating the dataset should have mounted it @@ -2496,9 +2406,9 @@ mod illumos_tests { .datasets_ensure(datasets.clone()) .await .expect("task should not fail"); - assert_eq!(result.0.len(), datasets.len()); - for result in &result.0 { - assert_matches!(result.state, DatasetState::Ensured); + assert_eq!(result.len(), datasets.len()); + for result in &result { + assert_matches!(result.result, Ok(())); } // Calling "datasets_ensure" with the same input should succeed. @@ -2506,9 +2416,9 @@ mod illumos_tests { .datasets_ensure(datasets.clone()) .await .expect("task should not fail"); - assert_eq!(result.0.len(), datasets.len()); - for result in &result.0 { - assert_matches!(result.state, DatasetState::Ensured); + assert_eq!(result.len(), datasets.len()); + for result in &result { + assert_matches!(result.result, Ok(())); } harness.cleanup().await; @@ -2535,14 +2445,13 @@ mod illumos_tests { .datasets_ensure([debug_dataset.clone()].into_iter().collect()) .await .expect("task should not fail"); - assert_eq!(result.0.len(), 1); + assert_eq!(result.len(), 1); assert_matches!( result - .0 .get(&debug_dataset.id) .expect("result contains entry for dataset") - .state, - DatasetState::Ensured + .result, + Ok(()) ); // Start querying the state of nested datasets. diff --git a/sled-agent/config-reconciler/src/ledger/legacy_configs.rs b/sled-agent/config-reconciler/src/ledger/legacy_configs.rs index 7bd3e9b7c6f..587303397ad 100644 --- a/sled-agent/config-reconciler/src/ledger/legacy_configs.rs +++ b/sled-agent/config-reconciler/src/ledger/legacy_configs.rs @@ -218,6 +218,7 @@ fn merge_old_configs( /// Legacy type of the ledgered zone config. #[derive(Debug, Clone, Deserialize, Serialize)] +#[cfg_attr(test, derive(schemars::JsonSchema))] struct OmicronZonesConfigLocal { omicron_generation: Generation, ledger_generation: Generation, @@ -237,9 +238,11 @@ impl Ledgerable for OmicronZonesConfigLocal { } #[derive(Debug, Clone, Deserialize, Serialize)] +#[cfg_attr(test, derive(schemars::JsonSchema))] struct OmicronZoneConfigLocal { zone: OmicronZoneConfig, #[serde(rename = "root")] + #[cfg_attr(test, schemars(with = "String"))] _root: Utf8PathBuf, } @@ -264,6 +267,15 @@ pub(super) mod tests { const MERGED_CONFIG_PATH: &str = "test-data/expectorate/merged-sled-config.json"; + #[test] + fn test_old_config_schema() { + let schema = schemars::schema_for!(OmicronZonesConfigLocal); + expectorate::assert_contents( + "../../schema/all-zones-requests.json", + &serde_json::to_string_pretty(&schema).unwrap(), + ); + } + #[test] fn test_merge_old_configs() { let disks: OmicronPhysicalDisksConfig = { diff --git a/sled-agent/config-reconciler/src/reconciler_task.rs b/sled-agent/config-reconciler/src/reconciler_task.rs index e09c586e142..95c782b47a7 100644 --- a/sled-agent/config-reconciler/src/reconciler_task.rs +++ b/sled-agent/config-reconciler/src/reconciler_task.rs @@ -9,19 +9,23 @@ use chrono::Utc; use either::Either; use futures::future; use illumos_utils::zpool::PathInPool; +use illumos_utils::zpool::ZpoolOrRamdisk; use key_manager::StorageKeyRequester; use nexus_sled_agent_shared::inventory::ConfigReconcilerInventory; use nexus_sled_agent_shared::inventory::ConfigReconcilerInventoryResult; use nexus_sled_agent_shared::inventory::ConfigReconcilerInventoryStatus; use nexus_sled_agent_shared::inventory::OmicronSledConfig; +use omicron_common::disk::DatasetKind; +use omicron_uuid_kinds::DatasetUuid; use omicron_uuid_kinds::OmicronZoneUuid; use omicron_uuid_kinds::PhysicalDiskUuid; use sled_storage::config::MountConfig; +use sled_storage::dataset::U2_DEBUG_DATASET; +use sled_storage::dataset::ZONE_DATASET; use sled_storage::disk::Disk; use slog::Logger; use slog::info; use slog::warn; -use slog_error_chain::InlineErrorChain; use std::collections::BTreeMap; use std::collections::HashSet; use std::sync::Arc; @@ -30,15 +34,16 @@ use std::time::Instant; use tokio::sync::watch; use crate::TimeSyncConfig; -use crate::dataset_serialization_task::DatasetEnsureResult; use crate::dataset_serialization_task::DatasetTaskHandle; use crate::ledger::CurrentSledConfig; use crate::raw_disks::RawDisksReceiver; use crate::sled_agent_facilities::SledAgentFacilities; +mod datasets; mod external_disks; mod zones; +use self::datasets::OmicronDatasets; use self::external_disks::ExternalDisks; use self::zones::OmicronZones; @@ -66,17 +71,18 @@ pub(crate) fn spawn( currently_managed_zpools_tx, external_disks_tx, ); + let datasets = OmicronDatasets::new(dataset_task); let zones = OmicronZones::new(mount_config, time_sync_config); tokio::spawn( ReconcilerTask { key_requester, - dataset_task, current_config_rx, reconciler_result_tx, raw_disks_rx, external_disks, + datasets, zones, log, } @@ -115,8 +121,7 @@ impl ReconcilerResult { }; Either::Right( latest_result - .datasets - .all_mounted_debug_datasets(&self.mount_config), + .all_mounted_datasets(&self.mount_config, DatasetKind::Debug), ) } @@ -126,11 +131,10 @@ impl ReconcilerResult { let Some(latest_result) = &self.latest_result else { return Either::Left(std::iter::empty()); }; - Either::Right( - latest_result - .datasets - .all_mounted_zone_root_datasets(&self.mount_config), - ) + Either::Right(latest_result.all_mounted_datasets( + &self.mount_config, + DatasetKind::TransientZoneRoot, + )) } pub(crate) fn to_inventory( @@ -192,7 +196,7 @@ struct LatestReconciliationResult { sled_config: OmicronSledConfig, external_disks_inventory: BTreeMap, - datasets: DatasetEnsureResult, + datasets: BTreeMap, zones_inventory: BTreeMap, timesync_status: TimeSyncStatus, } @@ -202,19 +206,54 @@ impl LatestReconciliationResult { ConfigReconcilerInventory { last_reconciled_config: self.sled_config.clone(), external_disks: self.external_disks_inventory.clone(), - datasets: self.datasets.to_inventory(), + datasets: self.datasets.clone(), zones: self.zones_inventory.clone(), } } + + fn all_mounted_datasets<'a>( + &'a self, + mount_config: &'a MountConfig, + kind: DatasetKind, + ) -> impl Iterator + 'a { + // This is a private method only called by this file; we only have to + // handle the specific `DatasetKind`s used by our callers. + let mountpoint = match &kind { + DatasetKind::Debug => U2_DEBUG_DATASET, + DatasetKind::TransientZoneRoot => ZONE_DATASET, + _ => unreachable!( + "private function called with unexpected kind {kind:?}" + ), + }; + self.datasets + .iter() + // Filter down to successfully-ensured datasets + .filter_map(|(dataset_id, result)| match result { + ConfigReconcilerInventoryResult::Ok => { + self.sled_config.datasets.get(dataset_id) + } + ConfigReconcilerInventoryResult::Err { .. } => None, + }) + // Filter down to matching dataset kinds + .filter(move |config| *config.name.kind() == kind) + .map(|config| { + let pool = *config.name.pool(); + PathInPool { + pool: ZpoolOrRamdisk::Zpool(pool), + path: pool + .dataset_mountpoint(&mount_config.root, mountpoint), + } + }) + } } struct ReconcilerTask { key_requester: StorageKeyRequester, - dataset_task: DatasetTaskHandle, current_config_rx: watch::Receiver, reconciler_result_tx: watch::Sender, raw_disks_rx: RawDisksReceiver, external_disks: ExternalDisks, + datasets: OmicronDatasets, zones: OmicronZones, log: Logger, } @@ -369,10 +408,12 @@ impl ReconcilerTask { ) .await; - // Next, delete datasets that need to be deleted. + // Next, remove datasets we have but that aren't present in the config. // - // TODO We don't do this yet: + // Note: this doesn't actually delete them yet! // https://github.com/oxidecomputer/omicron/issues/6177 + self.datasets + .remove_datasets_if_needed(&sled_config.datasets, &self.log); // Finally, remove any external disks we're no longer supposed to use // (either due to config changes or the raw disk being gone). @@ -398,30 +439,9 @@ impl ReconcilerTask { .await; // Ensure all the datasets we want exist. - let datasets = match self - .dataset_task - .datasets_ensure(sled_config.datasets.clone()) - .await - { - Ok(result) => result, - Err(err) => { - warn!( - self.log, "failed to contact dataset task"; - InlineErrorChain::new(&err), - ); - // If we can't contact the dataset task, reuse the result from - // our previous attempt. This should still be correct (until we - // start deleting datasets, at which point we'll need a more - // holistic tracker for dataset status like we already have for - // disks and zones). - self.reconciler_result_tx - .borrow() - .latest_result - .as_ref() - .map(|inner| inner.datasets.clone()) - .unwrap_or_else(DatasetEnsureResult::default) - } - }; + self.datasets + .ensure_datasets_if_needed(sled_config.datasets.clone(), &self.log) + .await; // Collect the current timesync status (needed to start any new zones, // and also we want to report it as part of each reconciler result). @@ -438,14 +458,12 @@ impl ReconcilerTask { // the old instance. match zone_shutdown_result { Ok(()) => { - let currently_managed_zpools = - self.external_disks.currently_managed_zpools(); self.zones .start_zones_if_needed( &sled_config.zones, sled_agent_facilities, timesync_status.is_synchronized(), - ¤tly_managed_zpools, + &self.datasets, &self.log, ) .await; @@ -465,7 +483,7 @@ impl ReconcilerTask { let result = if !timesync_status.is_synchronized() || self.external_disks.has_retryable_error() || self.zones.has_retryable_error() - || datasets.has_retryable_error() + || self.datasets.has_retryable_error() { ReconciliationResult::ShouldRetry } else { @@ -475,7 +493,7 @@ impl ReconcilerTask { let inner = LatestReconciliationResult { sled_config, external_disks_inventory: self.external_disks.to_inventory(), - datasets, + datasets: self.datasets.to_inventory(), zones_inventory: self.zones.to_inventory(), timesync_status, }; diff --git a/sled-agent/config-reconciler/src/reconciler_task/datasets.rs b/sled-agent/config-reconciler/src/reconciler_task/datasets.rs new file mode 100644 index 00000000000..5232d67a1af --- /dev/null +++ b/sled-agent/config-reconciler/src/reconciler_task/datasets.rs @@ -0,0 +1,247 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Module for Omicron datasets. +//! +//! This module does not spawn a separate tokio task: our parent reconciler task +//! owns an [`OmicronDatasets`] and is able to mutate it in place during +//! reconciliation. However, we do need a [`DatasetTaskHandle`] to perform some +//! operations. This handle is shared with other "needs to perform dataset +//! operations" consumers (e.g., inventory requests perform operations to check +//! the live state of datasets directly from ZFS). + +use crate::dataset_serialization_task::DatasetEnsureError; +use crate::dataset_serialization_task::DatasetEnsureResult; +use crate::dataset_serialization_task::DatasetTaskHandle; +use id_map::IdMap; +use id_map::IdMappable; +use illumos_utils::zpool::PathInPool; +use illumos_utils::zpool::ZpoolOrRamdisk; +use nexus_sled_agent_shared::inventory::ConfigReconcilerInventoryResult; +use nexus_sled_agent_shared::inventory::OmicronZoneConfig; +use omicron_common::disk::DatasetConfig; +use omicron_common::disk::DatasetKind; +use omicron_common::disk::DatasetName; +use omicron_uuid_kinds::DatasetUuid; +use sled_storage::config::MountConfig; +use sled_storage::dataset::ZONE_DATASET; +use slog::Logger; +use slog::warn; +use slog_error_chain::InlineErrorChain; +use std::collections::BTreeMap; +use std::sync::Arc; + +#[derive(Debug, thiserror::Error)] +pub(super) enum ZoneDatasetDependencyError { + #[error("zone config is missing a filesystem pool")] + MissingFilesystemPool, + #[error( + "zone's transient root dataset is not available: {}", .0.full_name(), + )] + TransientZoneDatasetNotAvailable(DatasetName), + #[error("zone's durable dataset is not available: {}", .0.full_name())] + DurableDatasetNotAvailable(DatasetName), +} + +#[derive(Debug)] +pub(super) struct OmicronDatasets { + datasets: IdMap, + dataset_task: DatasetTaskHandle, +} + +impl OmicronDatasets { + #[cfg(any(test, feature = "testing"))] + pub(super) fn with_datasets(datasets: I) -> Self + where + I: Iterator)>, + { + let dataset_task = DatasetTaskHandle::spawn_noop(); + let datasets = datasets + .map(|(config, result)| OmicronDataset { + config, + state: match result { + Ok(()) => DatasetState::Ensured, + Err(err) => DatasetState::FailedToEnsure(Arc::new(err)), + }, + }) + .collect(); + Self { datasets, dataset_task } + } + + pub(super) fn new(dataset_task: DatasetTaskHandle) -> Self { + Self { datasets: IdMap::default(), dataset_task } + } + + /// Confirm that any dataset dependencies of `zone` have been ensured + /// successfully, returning the path for the zone's filesystem root. + pub(super) fn validate_zone_storage( + &self, + zone: &OmicronZoneConfig, + mount_config: &MountConfig, + ) -> Result { + // Confirm that there's an ensured `TransientZoneRoot` dataset on this + // zone's filesystem pool. + let Some(filesystem_pool) = zone.filesystem_pool else { + // This should never happen: Reconfigurator guarantees all zones + // have filesystem pools. `filesystem_pool` is non-optional in + // blueprints; we should make it non-optional in `OmicronZoneConfig` + // too: https://github.com/oxidecomputer/omicron/issues/8216 + return Err(ZoneDatasetDependencyError::MissingFilesystemPool); + }; + + let transient_dataset_name = DatasetName::new( + filesystem_pool, + DatasetKind::TransientZone { name: zone.zone_name() }, + ); + + // TODO-cleanup It would be nicer if the zone included the filesystem + // dataset ID, so we could just do a lookup here instead of searching. + // https://github.com/oxidecomputer/omicron/issues/7214 + if !self.datasets.iter().any(|d| { + matches!(d.state, DatasetState::Ensured) + && d.config.name == transient_dataset_name + }) { + return Err( + ZoneDatasetDependencyError::TransientZoneDatasetNotAvailable( + transient_dataset_name, + ), + ); + } + + let zone_root_path = PathInPool { + pool: ZpoolOrRamdisk::Zpool(filesystem_pool), + // TODO-cleanup Should we get this path from the dataset we found + // above? + path: filesystem_pool + .dataset_mountpoint(&mount_config.root, ZONE_DATASET), + }; + + // Confirm that the durable dataset for this zone has been ensured, if + // it has one. + let Some(durable_dataset) = zone.dataset_name() else { + return Ok(zone_root_path); + }; + + // TODO-cleanup As above, if we had an ID we could look up instead of + // searching. + if !self.datasets.iter().any(|d| { + matches!(d.state, DatasetState::Ensured) + && d.config.name == durable_dataset + }) { + return Err( + ZoneDatasetDependencyError::DurableDatasetNotAvailable( + durable_dataset, + ), + ); + } + + Ok(zone_root_path) + } + + pub(super) fn remove_datasets_if_needed( + &mut self, + datasets: &IdMap, + log: &Logger, + ) { + let mut datasets_to_remove = Vec::new(); + + for dataset in &self.datasets { + if !datasets.contains_key(&dataset.config.id) { + datasets_to_remove.push(dataset.config.id); + } + } + + for dataset_id in datasets_to_remove { + // TODO We should delete these datasets! (We should also delete any + // on-disk Omicron datasets that aren't present in `config`). + // + // https://github.com/oxidecomputer/omicron/issues/6177 + let dataset = self.datasets.remove(&dataset_id).expect( + "datasets_to_remove only has existing datasets by construction", + ); + warn!( + log, "leaking ZFS dataset (should be deleted: omicron#6177)"; + "id" => %dataset_id, + "name" => dataset.config.name.full_name(), + ) + } + } + + pub(super) async fn ensure_datasets_if_needed( + &mut self, + datasets: IdMap, + log: &Logger, + ) { + let results = match self.dataset_task.datasets_ensure(datasets).await { + Ok(results) => results, + Err(err) => { + // If we can't contact the dataset task, we leave + // `self.datasets` untouched (i.e., reuse whatever datasets we + // have from the last time we successfully contacted the dataset + // task). + warn!( + log, "failed to contact dataset task"; + InlineErrorChain::new(&err), + ); + return; + } + }; + + for DatasetEnsureResult { config, result } in results { + let state = match result { + Ok(()) => DatasetState::Ensured, + Err(err) => DatasetState::FailedToEnsure(err), + }; + self.datasets.insert(OmicronDataset { config, state }); + } + } + + pub(super) fn has_retryable_error(&self) -> bool { + self.datasets.iter().any(|d| match &d.state { + DatasetState::Ensured => false, + DatasetState::FailedToEnsure(err) => err.is_retryable(), + }) + } + + pub(crate) fn to_inventory( + &self, + ) -> BTreeMap { + self.datasets + .iter() + .map(|dataset| { + let result = match &dataset.state { + DatasetState::Ensured => { + ConfigReconcilerInventoryResult::Ok + } + DatasetState::FailedToEnsure(err) => { + ConfigReconcilerInventoryResult::Err { + message: InlineErrorChain::new(err).to_string(), + } + } + }; + (dataset.config.id, result) + }) + .collect() + } +} + +#[derive(Debug)] +struct OmicronDataset { + config: DatasetConfig, + state: DatasetState, +} + +impl IdMappable for OmicronDataset { + type Id = DatasetUuid; + + fn id(&self) -> Self::Id { + self.config.id + } +} + +#[derive(Debug)] +enum DatasetState { + Ensured, + FailedToEnsure(Arc), +} diff --git a/sled-agent/config-reconciler/src/reconciler_task/zones.rs b/sled-agent/config-reconciler/src/reconciler_task/zones.rs index 0bf14582d76..e0a6e370445 100644 --- a/sled-agent/config-reconciler/src/reconciler_task/zones.rs +++ b/sled-agent/config-reconciler/src/reconciler_task/zones.rs @@ -32,6 +32,7 @@ use slog::Logger; use slog::info; use slog::warn; use slog_error_chain::InlineErrorChain; +use std::borrow::Cow; use std::collections::BTreeMap; use std::net::IpAddr; use std::net::Ipv6Addr; @@ -39,7 +40,8 @@ use std::num::NonZeroUsize; use std::str::FromStr as _; use std::sync::Arc; -use super::CurrentlyManagedZpools; +use super::OmicronDatasets; +use super::datasets::ZoneDatasetDependencyError; #[derive(Debug, Clone)] pub enum TimeSyncStatus { @@ -246,7 +248,30 @@ impl OmicronZones { desired_zones: &IdMap, sled_agent_facilities: &T, is_time_synchronized: bool, - all_u2_pools: &CurrentlyManagedZpools, + datasets: &OmicronDatasets, + log: &Logger, + ) { + self.start_zones_if_needed_impl( + desired_zones, + sled_agent_facilities, + &RealZoneFacilities, + is_time_synchronized, + datasets, + log, + ) + .await + } + + async fn start_zones_if_needed_impl< + T: SledAgentFacilities, + U: ZoneFacilities, + >( + &mut self, + desired_zones: &IdMap, + sled_agent_facilities: &T, + zone_facilities: &U, + is_time_synchronized: bool, + datasets: &OmicronDatasets, log: &Logger, ) { // Filter desired zones down to just those that we need to start. See @@ -293,34 +318,132 @@ impl OmicronZones { }); // Build up the futures for starting each zone. - let all_u2_pools = all_u2_pools.clone().into_vec(); let start_futures = zones_to_start.map(|zone| { - sled_agent_facilities - .start_omicron_zone( - zone, - &self.mount_config, - is_time_synchronized, - &all_u2_pools, - ) - .map(move |result| { - ( - zone.clone(), - result.map_err(ZoneStartError::SledAgentStartFailed), - ) - }) + self.start_single_zone( + zone, + sled_agent_facilities, + zone_facilities, + is_time_synchronized, + datasets, + log, + ) + .map(move |result| (zone.clone(), result)) }); // Concurrently start all zones, then record the results. let start_results = future::join_all(start_futures).await; for (config, result) in start_results { let state = match result { - Ok(running_zone) => ZoneState::Running(Arc::new(running_zone)), + Ok(state) => state, Err(err) => ZoneState::FailedToStart(err), }; self.zones.insert(OmicronZone { config, state }); } } + async fn start_single_zone( + &self, + zone: &OmicronZoneConfig, + sled_agent_facilities: &T, + zone_facilities: &U, + is_time_synchronized: bool, + datasets: &OmicronDatasets, + log: &Logger, + ) -> Result { + // Ensure no zone by this name exists. This should only happen in the + // event of a sled-agent restart, in which case all the zones the + // previous sled-agent process had started are still running. + if let Some(state) = self + .ensure_removed_before_starting( + zone, + sled_agent_facilities, + zone_facilities, + log, + ) + .await? + { + return Ok(state); + } + + // Ensure that time is sync'd, if needed by this zone. + if zone.zone_type.requires_timesync() && !is_time_synchronized { + return Err(ZoneStartError::TimeNotSynchronized); + } + + // Ensure all dataset dependencies of this zone are okay. + let zone_root_path = + datasets.validate_zone_storage(zone, &self.mount_config)?; + + // The zone is not running - start it. + match sled_agent_facilities + .start_omicron_zone(zone, zone_root_path) + .await + { + Ok(running_zone) => Ok(ZoneState::Running(Arc::new(running_zone))), + Err(err) => Err(ZoneStartError::SledAgentStartFailed(err)), + } + } + + // The return type of this function is strange. The possible values are: + // + // * `Ok(None)` - the zone is not running + // * `Err(_)` - we had an error related to zone startup + // * `Ok(Some(state))` - the zone is still running and is in some state that + // we need to do more work to handle (e.g., we found a running zone but + // failed to shut it down cleanly, in which case we'll return + // `Ok(Some(ZoneState::PartiallyShutDown { .. }))`). In this case, our + // caller should do no further work to try to start `zone`, and should + // instead bubble the `state` up to be recorded. + async fn ensure_removed_before_starting< + T: SledAgentFacilities, + U: ZoneFacilities, + >( + &self, + zone: &OmicronZoneConfig, + sled_agent_facilities: &T, + zone_facilities: &U, + log: &Logger, + ) -> Result, ZoneStartError> { + let zone_name = ZoneName::new(zone); + + // If no zone by this name exists, there's nothing to remove. + if !zone_facilities.zone_with_name_exists(&zone_name).await? { + return Ok(None); + } + + // NOTE: We might want to tell the sled-agent's metrics task to stop + // tracking any links in this zone. However, we don't have very easy + // access to them, without running a command in the zone. These links + // are about to be deleted, and the metrics task will expire them after + // a while anyway, but it might be worth the trouble to do that in the + // future. + // + // Skipping that for now, follow the normal zone shutdown process + // _after_ metrics (i.e., shut down and clean up the zone). + // + // TODO-correctness There's a (very unlikely?) chance that this cleanup + // isn't right: if the running zone (which we have no active knowledge + // of) was started with a different `OmicronZoneConfig`, the cleanup + // steps we do here might not be right. + match resume_shutdown_from_stop( + zone, + sled_agent_facilities, + zone_facilities, + &zone_name, + log, + ) + .await + { + Ok(()) => Ok(None), + Err((state, err)) => { + // We didn't fail to _start_ the zone, so it doesn't make sense + // to return a `ZoneStartError`, but the zone is in a state that + // we need to remember. + Ok(Some(ZoneState::PartiallyShutDown { state, err })) + } + } + } + /// Check the timesync status from a running NTP zone (if it exists) pub(super) async fn check_timesync(&self) -> TimeSyncStatus { match &self.timesync_config { @@ -486,7 +609,8 @@ impl OmicronZone { state: PartiallyShutDownState::FailedToStop(running_zone), .. } => { - self.resume_shutdown_from_stop( + resume_shutdown_from_stop( + &self.config, sled_agent_facilities, zone_facilities, running_zone, @@ -498,14 +622,24 @@ impl OmicronZone { state: PartiallyShutDownState::FailedToCleanUp, .. } => { - self.resume_shutdown_from_cleanup( + resume_shutdown_from_cleanup( + &self.config, sled_agent_facilities, zone_facilities, &log, ) .await } - ZoneState::FailedToStart(_) => { + // With these errors, we never even tried to start the zone, so + // there's no cleanup required: we can just return. + ZoneState::FailedToStart(ZoneStartError::TimeNotSynchronized) + | ZoneState::FailedToStart(ZoneStartError::CheckZoneExists(_)) + | ZoneState::FailedToStart(ZoneStartError::DatasetDependency(_)) => { + Ok(()) + } + ZoneState::FailedToStart(ZoneStartError::SledAgentStartFailed( + err, + )) => { // TODO-correctness What do we need to do to try to shut down a // zone that we tried to start? We need fine-grained status of // what startup things succeeded that need to be cleaned up. For @@ -514,7 +648,8 @@ impl OmicronZone { log, "need to shut down zone that failed to start, but this \ is currently unimplemented: assuming no cleanup work \ - required" + required"; + "start-err" => InlineErrorChain::new(err.as_ref()), ); Ok(()) } @@ -548,83 +683,83 @@ impl OmicronZone { )); } - self.resume_shutdown_from_stop( + resume_shutdown_from_stop( + &self.config, sled_agent_facilities, zone_facilities, - running_zone, + &ZoneName::from(running_zone.name()), log, ) .await } +} - async fn resume_shutdown_from_stop< - T: SledAgentFacilities, - U: ZoneFacilities, - >( - &self, - sled_agent_facilities: &T, - zone_facilities: &U, - running_zone: &Arc, - log: &Logger, - ) -> Result<(), (PartiallyShutDownState, ZoneShutdownError)> { - if let Err(err) = zone_facilities.halt_zone(running_zone, log).await { +async fn resume_shutdown_from_stop< + T: SledAgentFacilities, + U: ZoneFacilities, +>( + config: &OmicronZoneConfig, + sled_agent_facilities: &T, + zone_facilities: &U, + zone_name: &ZoneName<'_>, + log: &Logger, +) -> Result<(), (PartiallyShutDownState, ZoneShutdownError)> { + if let Err(err) = zone_facilities.halt_zone(zone_name, log).await { + warn!( + log, + "Failed to stop running zone"; + InlineErrorChain::new(&err), + ); + return Err(( + PartiallyShutDownState::FailedToStop(zone_name.to_static()), + err, + )); + } + + resume_shutdown_from_cleanup( + config, + sled_agent_facilities, + zone_facilities, + log, + ) + .await +} + +async fn resume_shutdown_from_cleanup< + T: SledAgentFacilities, + U: ZoneFacilities, +>( + config: &OmicronZoneConfig, + sled_agent_facilities: &T, + zone_facilities: &U, + log: &Logger, +) -> Result<(), (PartiallyShutDownState, ZoneShutdownError)> { + // Special teardown for internal DNS zones: delete the global zone + // address we created for it, and tell DDM to stop advertising the + // prefix of that address. + if let OmicronZoneType::InternalDns { + gz_address, gz_address_index, .. + } = &config.zone_type + { + let addrobj = AddrObject::new( + &sled_agent_facilities.underlay_vnic().0, + &internal_dns_addrobj_name(*gz_address_index), + ) + .expect("internal DNS address object name is well-formed"); + if let Err(err) = zone_facilities.delete_gz_address(addrobj).await { warn!( log, - "Failed to stop running zone"; + "Failed to delete internal-dns gz address"; InlineErrorChain::new(&err), ); - return Err(( - PartiallyShutDownState::FailedToStop(Arc::clone(running_zone)), - err, - )); + return Err((PartiallyShutDownState::FailedToCleanUp, err)); } - self.resume_shutdown_from_cleanup( - sled_agent_facilities, - zone_facilities, - log, - ) - .await + sled_agent_facilities + .ddm_remove_internal_dns_prefix(Ipv6Subnet::new(*gz_address)); } - async fn resume_shutdown_from_cleanup< - T: SledAgentFacilities, - U: ZoneFacilities, - >( - &self, - sled_agent_facilities: &T, - zone_facilities: &U, - log: &Logger, - ) -> Result<(), (PartiallyShutDownState, ZoneShutdownError)> { - // Special teardown for internal DNS zones: delete the global zone - // address we created for it, and tell DDM to stop advertising the - // prefix of that address. - if let OmicronZoneType::InternalDns { - gz_address, - gz_address_index, - .. - } = &self.config.zone_type - { - let addrobj = AddrObject::new( - &sled_agent_facilities.underlay_vnic().0, - &internal_dns_addrobj_name(*gz_address_index), - ) - .expect("internal DNS address object name is well-formed"); - if let Err(err) = zone_facilities.delete_gz_address(addrobj).await { - warn!( - log, - "Failed to delete internal-dns gz address"; - InlineErrorChain::new(&err), - ); - return Err((PartiallyShutDownState::FailedToCleanUp, err)); - } - - sled_agent_facilities - .ddm_remove_internal_dns_prefix(Ipv6Subnet::new(*gz_address)); - } - - Ok(()) - } + Ok(()) } fn internal_dns_addrobj_name(gz_address_index: u32) -> String { @@ -688,19 +823,64 @@ enum ZoneState { FailedToStart(ZoneStartError), } +#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)] +struct ZoneName<'a>(Cow<'a, str>); + +impl<'a> From<&'a str> for ZoneName<'a> { + fn from(value: &'a str) -> Self { + Self(Cow::Borrowed(value)) + } +} + +impl From for ZoneName<'static> { + fn from(value: String) -> Self { + Self(Cow::Owned(value)) + } +} + +impl ZoneName<'_> { + fn new(config: &OmicronZoneConfig) -> Self { + Self(Cow::Owned(config.zone_name())) + } + + fn to_string(&self) -> String { + self.0.clone().into_owned() + } + + fn to_static(&self) -> ZoneName<'static> { + ZoneName(Cow::Owned(self.0.clone().into_owned())) + } +} + #[derive(Debug)] enum PartiallyShutDownState { FailedToUntrackMetrics(Arc), - FailedToStop(Arc), + FailedToStop(ZoneName<'static>), FailedToCleanUp, } #[derive(Debug, thiserror::Error)] enum ZoneStartError { + #[error("could not determine whether zone already exists")] + CheckZoneExists(#[from] CheckZoneExistsError), + #[error("Time not yet synchronized")] + TimeNotSynchronized, + #[error(transparent)] + DatasetDependency(#[from] ZoneDatasetDependencyError), #[error("sled agent failed to start service")] SledAgentStartFailed(#[source] anyhow::Error), } +#[derive(Debug, thiserror::Error)] +enum CheckZoneExistsError { + #[error("failed to find zone {name}")] + FindByName { + name: String, + #[source] + err: AdmError, + }, +} + #[derive(Debug, thiserror::Error)] enum ZoneShutdownError { #[error("failed to untrack metrics")] @@ -712,9 +892,14 @@ enum ZoneShutdownError { } trait ZoneFacilities { + async fn zone_with_name_exists( + &self, + name: &ZoneName<'_>, + ) -> Result; + async fn halt_zone( &self, - zone: &RunningZone, + zone: &ZoneName, log: &Logger, ) -> Result<(), ZoneShutdownError>; @@ -727,20 +912,32 @@ trait ZoneFacilities { struct RealZoneFacilities; impl ZoneFacilities for RealZoneFacilities { + async fn zone_with_name_exists( + &self, + name: &ZoneName<'_>, + ) -> Result { + match Zones::real_api().find(&name.0).await { + Ok(maybe_zone) => Ok(maybe_zone.is_some()), + Err(err) => Err(CheckZoneExistsError::FindByName { + name: name.to_string(), + err, + }), + } + } + async fn halt_zone( &self, - zone: &RunningZone, + zone: &ZoneName<'_>, log: &Logger, ) -> Result<(), ZoneShutdownError> { - // We don't use `zone.stop()` here because it doesn't allow repeated - // attempts after a failure: - // https://github.com/oxidecomputer/omicron/issues/7881. Instead, use - // the lower-level `Zones::halt_and_remove_logged()` function directly. - // This may leave our `RunningZone` is a bogus state where it still - // holds a `zoneid_t` that doesn't exist anymore, but if we're in the - // shutdown path we never use that `zoneid_t`. + // We don't use `RunningZone::stop()` here because it doesn't allow + // repeated attempts after a failure + // (https://github.com/oxidecomputer/omicron/issues/7881) and because in + // the case of "an unexpected zone is running", all we have is the name. + // Instead, use the lower-level `Zones::halt_and_remove_logged()` + // function directly. Zones::real_api() - .halt_and_remove_logged(log, zone.name()) + .halt_and_remove_logged(log, &zone.0) .await .map_err(ZoneShutdownError::HaltAndRemove) } @@ -758,8 +955,9 @@ impl ZoneFacilities for RealZoneFacilities { #[cfg(test)] mod tests { use super::*; - use crate::CurrentlyManagedZpoolsReceiver; + use crate::dataset_serialization_task::DatasetEnsureError; use anyhow::anyhow; + use assert_matches::assert_matches; use camino_tempfile::Utf8TempDir; use illumos_utils::dladm::Etherstub; use illumos_utils::dladm::EtherstubVnic; @@ -771,7 +969,12 @@ mod tests { use nexus_sled_agent_shared::inventory::OmicronZoneDataset; use nexus_sled_agent_shared::inventory::OmicronZoneImageSource; use omicron_common::address::SLED_PREFIX; + use omicron_common::disk::DatasetConfig; + use omicron_common::disk::DatasetKind; + use omicron_common::disk::DatasetName; + use omicron_common::disk::SharedDatasetConfig; use omicron_test_utils::dev; + use omicron_uuid_kinds::DatasetUuid; use omicron_uuid_kinds::ZpoolUuid; use std::collections::BTreeSet; use std::collections::VecDeque; @@ -833,11 +1036,17 @@ mod tests { #[derive(Debug, Default)] struct FakeZoneFacilitiesInner { + existing_zones: BTreeSet, halt_responses: Option>>, removed_gz_addresses: BTreeSet, } impl FakeZoneFacilities { + fn push_existing_zone(&self, name: String) { + let mut inner = self.inner.lock().unwrap(); + inner.existing_zones.insert(name); + } + fn push_halt_response(&self, response: Result<(), ZoneShutdownError>) { let mut inner = self.inner.lock().unwrap(); inner.halt_responses.get_or_insert_default().push_back(response); @@ -845,9 +1054,17 @@ mod tests { } impl ZoneFacilities for FakeZoneFacilities { + async fn zone_with_name_exists( + &self, + name: &ZoneName<'_>, + ) -> Result { + let inner = self.inner.lock().unwrap(); + Ok(inner.existing_zones.contains(&*name.0)) + } + async fn halt_zone( &self, - _zone: &RunningZone, + zone: &ZoneName<'_>, _log: &Logger, ) -> Result<(), ZoneShutdownError> { // If a test has called `push_halt_response`, respsect that; @@ -855,9 +1072,18 @@ mod tests { let mut inner = self.inner.lock().unwrap(); match inner.halt_responses.as_mut() { Some(resp) => { - resp.pop_front().expect("have a response for halt_zone()") + let resp = resp + .pop_front() + .expect("have a response for halt_zone()"); + if resp.is_ok() { + inner.existing_zones.remove(&*zone.0); + } + resp + } + None => { + inner.existing_zones.remove(&*zone.0); + Ok(()) } - None => Ok(()), } } @@ -908,9 +1134,7 @@ mod tests { async fn start_omicron_zone( &self, _zone_config: &OmicronZoneConfig, - _mount_config: &MountConfig, - _is_time_synchronized: bool, - _all_u2_pools: &[ZpoolName], + _zone_root_path: PathInPool, ) -> anyhow::Result { let mut inner = self.inner.lock().unwrap(); inner @@ -965,6 +1189,69 @@ mod tests { } } + #[derive(Default)] + struct DatasetsBuilder { + datasets: Vec<(DatasetConfig, Result<(), DatasetEnsureError>)>, + } + + impl DatasetsBuilder { + fn push_root( + &mut self, + zone: &OmicronZoneConfig, + result: Result<(), DatasetEnsureError>, + ) { + let Some(pool) = zone.filesystem_pool else { + return; + }; + self.datasets.push(( + DatasetConfig { + id: DatasetUuid::new_v4(), + name: DatasetName::new( + pool, + DatasetKind::TransientZone { name: zone.zone_name() }, + ), + inner: SharedDatasetConfig::default(), + }, + result, + )); + } + + fn push_durable( + &mut self, + zone: &OmicronZoneConfig, + result: Result<(), DatasetEnsureError>, + ) { + let Some(dataset) = zone.dataset_name() else { + return; + }; + self.datasets.push(( + DatasetConfig { + id: DatasetUuid::new_v4(), + name: dataset, + inner: SharedDatasetConfig::default(), + }, + result, + )); + } + + fn build(self) -> OmicronDatasets { + OmicronDatasets::with_datasets(self.datasets.into_iter()) + } + } + + // Helper to build an all-dependencies-met `OmicronDatasets` for the given + // zone config. + fn make_datasets<'a>( + zones: impl Iterator, + ) -> OmicronDatasets { + let mut builder = DatasetsBuilder::default(); + for zone in zones { + builder.push_root(zone, Ok(())); + builder.push_durable(zone, Ok(())); + } + builder.build() + } + #[tokio::test] async fn shutdown_retries_after_failed_halt() { let logctx = dev::test_setup_log("shutdown_retries_after_failed_halt"); @@ -1055,26 +1342,24 @@ mod tests { let fake_zone_id = OmicronZoneUuid::new_v4(); let desired_zones: IdMap<_> = [make_zone_config(fake_zone_id)].into_iter().collect(); - let currently_managed_zpools = - CurrentlyManagedZpoolsReceiver::fake_static( - desired_zones.iter().map(|z| z.filesystem_pool.unwrap()), - ) - .current(); + let datasets = make_datasets(desired_zones.iter()); // Configure our fake sled-agent to fail to start a zone. let sled_agent_facilities = FakeSledAgentFacilities::default(); sled_agent_facilities.push_start_response(Err(anyhow!("test-boom"))); + let zone_facilities = FakeZoneFacilities::default(); // Starting with no zones, we should try and fail to start the one zone // in `desired_zones`. let mut zones = OmicronZones::new(nonexistent_mount_config(), TimeSyncConfig::Skip); zones - .start_zones_if_needed( + .start_zones_if_needed_impl( &desired_zones, &sled_agent_facilities, + &zone_facilities, true, - ¤tly_managed_zpools, + &datasets, &logctx.log, ) .await; @@ -1104,11 +1389,12 @@ mod tests { // Starting from the "zone failed to start" state, we should try again // to start the zone (and succeed this time). zones - .start_zones_if_needed( + .start_zones_if_needed_impl( &desired_zones, &sled_agent_facilities, + &zone_facilities, true, - ¤tly_managed_zpools, + &datasets, &logctx.log, ) .await; @@ -1128,6 +1414,168 @@ mod tests { logctx.cleanup_successful(); } + #[tokio::test] + async fn start_zone_stops_preexisting_zones() { + let logctx = dev::test_setup_log("start_zone_stops_preexisting_zones"); + + // Construct a zone we want to start. + let fake_zone = make_zone_config(OmicronZoneUuid::new_v4()); + let desired_zones: IdMap<_> = [fake_zone.clone()].into_iter().collect(); + let datasets = make_datasets(desired_zones.iter()); + + // Configure our fake zone facilities to report a zone with this name as + // already running. + let sled_agent_facilities = FakeSledAgentFacilities::default(); + let zone_facilities = FakeZoneFacilities::default(); + zone_facilities.push_existing_zone(fake_zone.zone_name()); + + let mut zones = + OmicronZones::new(nonexistent_mount_config(), TimeSyncConfig::Skip); + + // Set up our fake sled-agent to return success once the old zone has + // been halted. + let fake_zone_builder = FakeZoneBuilder::new(); + sled_agent_facilities.push_start_response(Ok(fake_zone_builder + .make_running_zone("test", logctx.log.clone()) + .await)); + + // Start zones: this should halt the preexisting zone. + zones + .start_zones_if_needed_impl( + &desired_zones, + &sled_agent_facilities, + &zone_facilities, + true, + &datasets, + &logctx.log, + ) + .await; + + assert_eq!( + zone_facilities.inner.lock().unwrap().existing_zones, + BTreeSet::new() + ); + + assert_eq!(zones.zones.len(), 1); + let zone_should_be_running = + zones.zones.get(&fake_zone.id).expect("zone is present"); + assert_eq!( + zone_should_be_running.config, + *desired_zones.get(&fake_zone.id).unwrap() + ); + match &zone_should_be_running.state { + ZoneState::Running(_) => (), + other => panic!("unexpected zone state: {other:?}"), + } + + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn start_zone_fails_if_time_not_synced_when_required() { + let logctx = dev::test_setup_log( + "start_zone_fails_if_time_not_synced_when_required", + ); + + // Construct a zone we want to start, of a type that requires time to be + // sync'd. + let fake_zone = make_zone_config(OmicronZoneUuid::new_v4()); + assert!(fake_zone.zone_type.requires_timesync()); + let desired_zones: IdMap<_> = [fake_zone.clone()].into_iter().collect(); + let datasets = make_datasets(desired_zones.iter()); + + let zone_facilities = FakeZoneFacilities::default(); + let sled_agent_facilities = FakeSledAgentFacilities::default(); + + let mut zones = OmicronZones::new( + nonexistent_mount_config(), + TimeSyncConfig::Normal, + ); + + // Start zones: this should refuse to start the zone. + zones + .start_zones_if_needed_impl( + &desired_zones, + &sled_agent_facilities, + &zone_facilities, + false, // is_time_synchronized + &datasets, + &logctx.log, + ) + .await; + + assert_eq!(zones.zones.len(), 1); + let zone = zones.zones.get(&fake_zone.id).expect("zone is present"); + assert_eq!(zone.config, *desired_zones.get(&fake_zone.id).unwrap()); + + // The zone should now be in the expected error state. + match &zone.state { + ZoneState::FailedToStart(err) => { + assert_matches!(err, ZoneStartError::TimeNotSynchronized); + } + other => panic!("unexpected zone state: {other:?}"), + } + + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn start_zone_fails_if_halting_preexisting_zone_fails() { + let logctx = dev::test_setup_log( + "start_zone_fails_if_halting_preexisting_zone_fails", + ); + + // Construct a zone we want to start. + let fake_zone = make_zone_config(OmicronZoneUuid::new_v4()); + let desired_zones: IdMap<_> = [fake_zone.clone()].into_iter().collect(); + let datasets = make_datasets(desired_zones.iter()); + + // Configure our fake zone facilities to report a zone with this name as + // already running, and configure halting this zone to fail. + let zone_facilities = FakeZoneFacilities::default(); + zone_facilities.push_existing_zone(fake_zone.zone_name()); + zone_facilities.push_halt_response(Err( + ZoneShutdownError::UntrackMetrics(anyhow!("boom")), + )); + let sled_agent_facilities = FakeSledAgentFacilities::default(); + + let mut zones = + OmicronZones::new(nonexistent_mount_config(), TimeSyncConfig::Skip); + + // Start zones: this should try and fail to halt the preexisting zone. + zones + .start_zones_if_needed_impl( + &desired_zones, + &sled_agent_facilities, + &zone_facilities, + true, + &datasets, + &logctx.log, + ) + .await; + + assert_eq!( + zone_facilities.inner.lock().unwrap().existing_zones, + [fake_zone.zone_name()].into_iter().collect::>(), + ); + + assert_eq!(zones.zones.len(), 1); + let zone = zones.zones.get(&fake_zone.id).expect("zone is present"); + assert_eq!(zone.config, *desired_zones.get(&fake_zone.id).unwrap()); + + // The zone should now be in the "partially shut down" state. + match &zone.state { + ZoneState::PartiallyShutDown { state, err } => { + assert_matches!(state, PartiallyShutDownState::FailedToStop(_)); + let err = InlineErrorChain::new(err).to_string(); + assert!(err.contains("boom"), "unexpected error: {err}"); + } + other => panic!("unexpected zone state: {other:?}"), + } + + logctx.cleanup_successful(); + } + #[tokio::test] async fn shutdown_dns_does_dns_specific_cleanup() { let logctx = @@ -1194,4 +1642,161 @@ mod tests { logctx.cleanup_successful(); } + + #[tokio::test] + async fn start_zone_fails_if_missing_root_dataset() { + let logctx = + dev::test_setup_log("start_zone_fails_if_missing_root_dataset"); + + // Construct a zone we want to start. + let fake_zone = make_zone_config(OmicronZoneUuid::new_v4()); + let desired_zones: IdMap<_> = [fake_zone.clone()].into_iter().collect(); + + // datasets0: missing root dataset entirely + let datasets0 = { + let mut builder = DatasetsBuilder::default(); + for zone in &desired_zones { + builder.push_durable(zone, Ok(())); + } + builder.build() + }; + + // datasets1: root exists but failed to ensure + let datasets1 = { + let mut builder = DatasetsBuilder::default(); + for zone in &desired_zones { + builder.push_root( + zone, + Err(DatasetEnsureError::TestError("boom")), + ); + builder.push_durable(zone, Ok(())); + } + builder.build() + }; + + let zone_facilities = FakeZoneFacilities::default(); + let sled_agent_facilities = FakeSledAgentFacilities::default(); + + // Both dataset variations should fail the same way. + for datasets in [&datasets0, &datasets1] { + let mut zones = OmicronZones::new( + nonexistent_mount_config(), + TimeSyncConfig::Skip, + ); + + zones + .start_zones_if_needed_impl( + &desired_zones, + &sled_agent_facilities, + &zone_facilities, + true, + datasets, + &logctx.log, + ) + .await; + + assert_eq!(zones.zones.len(), 1); + let zone = zones.zones.get(&fake_zone.id).expect("zone is present"); + assert_eq!(zone.config, *desired_zones.get(&fake_zone.id).unwrap()); + + // The zone should now be in the "partially shut down" state. + match &zone.state { + ZoneState::FailedToStart(err) => { + assert_matches!( + err, + ZoneStartError::DatasetDependency( + ZoneDatasetDependencyError::TransientZoneDatasetNotAvailable(_) + ) + ); + } + other => panic!("unexpected zone state: {other:?}"), + } + } + + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn start_zone_fails_if_missing_durable_dataset() { + let logctx = + dev::test_setup_log("start_zone_fails_if_missing_durable_dataset"); + + // Construct a zone we want to start, using a zone type that has a + // durable dataset. + let fake_zone = OmicronZoneConfig { + id: OmicronZoneUuid::new_v4(), + filesystem_pool: Some(ZpoolName::new_external(ZpoolUuid::new_v4())), + zone_type: OmicronZoneType::Crucible { + address: "[::1]:0".parse().unwrap(), + dataset: OmicronZoneDataset { + pool_name: ZpoolName::new_external(ZpoolUuid::new_v4()), + }, + }, + image_source: OmicronZoneImageSource::InstallDataset, + }; + let desired_zones: IdMap<_> = [fake_zone.clone()].into_iter().collect(); + + // datasets0: missing durable dataset entirely + let datasets0 = { + let mut builder = DatasetsBuilder::default(); + for zone in &desired_zones { + builder.push_root(zone, Ok(())); + } + builder.build() + }; + + // datasets1: durable exists but failed to ensure + let datasets1 = { + let mut builder = DatasetsBuilder::default(); + for zone in &desired_zones { + builder.push_root(zone, Ok(())); + builder.push_durable( + zone, + Err(DatasetEnsureError::TestError("boom")), + ); + } + builder.build() + }; + + let zone_facilities = FakeZoneFacilities::default(); + let sled_agent_facilities = FakeSledAgentFacilities::default(); + + // Both dataset variations should fail the same way. + for datasets in [&datasets0, &datasets1] { + let mut zones = OmicronZones::new( + nonexistent_mount_config(), + TimeSyncConfig::Skip, + ); + + zones + .start_zones_if_needed_impl( + &desired_zones, + &sled_agent_facilities, + &zone_facilities, + true, + datasets, + &logctx.log, + ) + .await; + + assert_eq!(zones.zones.len(), 1); + let zone = zones.zones.get(&fake_zone.id).expect("zone is present"); + assert_eq!(zone.config, *desired_zones.get(&fake_zone.id).unwrap()); + + // The zone should now be in the "partially shut down" state. + match &zone.state { + ZoneState::FailedToStart(err) => { + assert_matches!( + err, + ZoneStartError::DatasetDependency( + ZoneDatasetDependencyError::DurableDatasetNotAvailable(_) + ) + ); + } + other => panic!("unexpected zone state: {other:?}"), + } + } + + logctx.cleanup_successful(); + } } diff --git a/sled-agent/config-reconciler/src/sled_agent_facilities.rs b/sled-agent/config-reconciler/src/sled_agent_facilities.rs index 98f3a164cc2..8445b51d21b 100644 --- a/sled-agent/config-reconciler/src/sled_agent_facilities.rs +++ b/sled-agent/config-reconciler/src/sled_agent_facilities.rs @@ -7,12 +7,11 @@ use illumos_utils::dladm::EtherstubVnic; use illumos_utils::running_zone::RunningZone; -use illumos_utils::zpool::ZpoolName; +use illumos_utils::zpool::PathInPool; use nexus_sled_agent_shared::inventory::OmicronZoneConfig; use omicron_common::address::Ipv6Subnet; use omicron_common::address::SLED_PREFIX; use sled_agent_types::zone_bundle::ZoneBundleCause; -use sled_storage::config::MountConfig; use std::future::Future; use tufaceous_artifact::ArtifactHash; @@ -38,9 +37,7 @@ pub trait SledAgentFacilities: Send + Sync + 'static { fn start_omicron_zone( &self, zone_config: &OmicronZoneConfig, - mount_config: &MountConfig, - is_time_synchronized: bool, - all_u2_pools: &[ZpoolName], + zone_root_path: PathInPool, ) -> impl Future> + Send; /// Stop tracking metrics for a zone's datalinks. diff --git a/sled-agent/src/lib.rs b/sled-agent/src/lib.rs index 97d1493467d..4e7c27ea052 100644 --- a/sled-agent/src/lib.rs +++ b/sled-agent/src/lib.rs @@ -28,7 +28,6 @@ mod instance_manager; mod long_running_tasks; mod metrics; mod nexus; -pub mod params; mod probe_manager; mod profile; pub mod rack_setup; diff --git a/sled-agent/src/params.rs b/sled-agent/src/params.rs deleted file mode 100644 index 56411092a70..00000000000 --- a/sled-agent/src/params.rs +++ /dev/null @@ -1,70 +0,0 @@ -// This Source Code Form is subject to the terms of the Mozilla Public -// License, v. 2.0. If a copy of the MPL was not distributed with this -// file, You can obtain one at https://mozilla.org/MPL/2.0/. - -use nexus_sled_agent_shared::inventory::{OmicronZoneConfig, OmicronZoneType}; -use omicron_common::disk::{DatasetKind, DatasetName}; -pub use sled_hardware::DendriteAsic; -use std::net::SocketAddrV6; - -/// Extension trait for `OmicronZoneType` and `OmicronZoneConfig`. -/// -/// This lives here because it requires extra dependencies that -/// nexus-sled-agent-shared doesn't have. -pub(crate) trait OmicronZoneTypeExt { - fn as_omicron_zone_type(&self) -> &OmicronZoneType; - - /// If this kind of zone has an associated dataset, return the dataset's name. - /// Otherwise, return `None`. - fn dataset_name(&self) -> Option { - self.dataset_name_and_address().map(|(name, _)| name) - } - - /// If this kind of zone has an associated dataset, return the dataset's name - /// and the associated "service address". Otherwise, return `None`. - fn dataset_name_and_address(&self) -> Option<(DatasetName, SocketAddrV6)> { - let (dataset, dataset_kind, address) = match self.as_omicron_zone_type() - { - OmicronZoneType::BoundaryNtp { .. } - | OmicronZoneType::InternalNtp { .. } - | OmicronZoneType::Nexus { .. } - | OmicronZoneType::Oximeter { .. } - | OmicronZoneType::CruciblePantry { .. } => None, - OmicronZoneType::Clickhouse { dataset, address, .. } => { - Some((dataset, DatasetKind::Clickhouse, address)) - } - OmicronZoneType::ClickhouseKeeper { dataset, address, .. } => { - Some((dataset, DatasetKind::ClickhouseKeeper, address)) - } - OmicronZoneType::ClickhouseServer { dataset, address, .. } => { - Some((dataset, DatasetKind::ClickhouseServer, address)) - } - OmicronZoneType::CockroachDb { dataset, address, .. } => { - Some((dataset, DatasetKind::Cockroach, address)) - } - OmicronZoneType::Crucible { dataset, address, .. } => { - Some((dataset, DatasetKind::Crucible, address)) - } - OmicronZoneType::ExternalDns { dataset, http_address, .. } => { - Some((dataset, DatasetKind::ExternalDns, http_address)) - } - OmicronZoneType::InternalDns { dataset, http_address, .. } => { - Some((dataset, DatasetKind::InternalDns, http_address)) - } - }?; - - Some((DatasetName::new(dataset.pool_name, dataset_kind), *address)) - } -} - -impl OmicronZoneTypeExt for OmicronZoneType { - fn as_omicron_zone_type(&self) -> &OmicronZoneType { - self - } -} - -impl OmicronZoneTypeExt for OmicronZoneConfig { - fn as_omicron_zone_type(&self) -> &OmicronZoneType { - &self.zone_type - } -} diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index 0400aff0b3f..a60da9210db 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -32,7 +32,6 @@ use crate::bootstrap::early_networking::{ use crate::config::SidecarRevision; use crate::ddm_reconciler::DdmReconciler; use crate::metrics::MetricsRequestQueue; -use crate::params::{DendriteAsic, OmicronZoneTypeExt}; use crate::profile::*; use anyhow::anyhow; use camino::{Utf8Path, Utf8PathBuf}; @@ -63,11 +62,9 @@ use illumos_utils::{PFEXEC, execute}; use internal_dns_resolver::Resolver; use internal_dns_types::names::BOUNDARY_NTP_DNS_NAME; use internal_dns_types::names::DNS_ZONE; -use itertools::Itertools; use nexus_config::{ConfigDropshotWithTls, DeploymentConfig}; use nexus_sled_agent_shared::inventory::{ - OmicronZoneConfig, OmicronZoneImageSource, OmicronZoneType, - OmicronZonesConfig, ZoneKind, + OmicronZoneConfig, OmicronZoneImageSource, OmicronZoneType, ZoneKind, }; use omicron_common::address::AZ_PREFIX; use omicron_common::address::DENDRITE_PORT; @@ -92,19 +89,16 @@ use omicron_common::backoff::{ BackoffError, retry_notify, retry_policy_internal_service_aggressive, }; use omicron_common::disk::{DatasetKind, DatasetName}; -use omicron_common::ledger::Ledgerable; use omicron_ddm_admin_client::DdmError; use omicron_uuid_kinds::OmicronZoneUuid; -use rand::prelude::SliceRandom; use sled_agent_config_reconciler::InternalDisksReceiver; use sled_agent_types::sled::SWITCH_ZONE_BASEBOARD_FILE; use sled_agent_zone_images::ZoneImageSourceResolver; +use sled_hardware::DendriteAsic; use sled_hardware::SledMode; use sled_hardware::is_gimlet; use sled_hardware::underlay; use sled_hardware_types::Baseboard; -use sled_storage::config::MountConfig; -use sled_storage::dataset::ZONE_DATASET; use slog::Logger; use slog_error_chain::InlineErrorChain; use std::net::{IpAddr, Ipv6Addr, SocketAddr}; @@ -150,9 +144,6 @@ pub enum Error { #[error("Sled Agent not initialized yet")] SledAgentNotReady, - #[error("No U.2 devices found with a {ZONE_DATASET} mountpoint")] - U2NotFound, - #[error("Switch zone error: {0}")] SwitchZone(anyhow::Error), @@ -195,9 +186,6 @@ pub enum Error { #[error(transparent)] ZoneInstall(#[from] illumos_utils::running_zone::InstallZoneError), - #[error("Failed to initialize zones: {errors:?}")] - ZoneEnsure { errors: Vec<(String, Error)> }, - #[error("Error contacting ddmd: {0}")] DdmError(#[from] DdmError), @@ -228,34 +216,9 @@ pub enum Error { #[error("Failed to get address: {0}")] GetAddressFailure(#[from] illumos_utils::zone::GetAddressError), - #[error( - "Failed to launch zone {zone} because ZFS value cannot be accessed" - )] - GetZfsValue { - zone: String, - #[source] - source: illumos_utils::zfs::GetValueError, - }, - - #[error( - "Cannot launch {zone} with {dataset} (saw {prop_name} = {prop_value}, expected {prop_value_expected})" - )] - DatasetNotReady { - zone: String, - dataset: String, - prop_name: String, - prop_value: String, - prop_value_expected: String, - }, - #[error("NTP zone not ready")] NtpZoneNotReady, - // This isn't exactly "NtpZoneNotReady" -- it can happen when the NTP zone - // is up, but time is still in the process of synchronizing. - #[error("Time not yet synchronized")] - TimeNotSynchronized, - #[error("Execution error: {0}")] ExecutionError(#[from] illumos_utils::ExecutionError), @@ -334,39 +297,6 @@ impl From for omicron_common::api::external::Error { Error::RequestedZoneConfigOutdated { .. } => { omicron_common::api::external::Error::conflict(&err.to_string()) } - Error::TimeNotSynchronized => { - omicron_common::api::external::Error::unavail(&err.to_string()) - } - Error::ZoneEnsure { errors } => { - // As a special case, if any zones failed to timesync, - // prioritize that error. - // - // This conversion to a 503 error was requested in - // https://github.com/oxidecomputer/omicron/issues/4776 , - // and we preserve that behavior here, even though we may - // launch many zones at the same time. - if let Some(err) = errors.iter().find_map(|(_, err)| { - if matches!(err, Error::TimeNotSynchronized) { - Some(err) - } else { - None - } - }) { - omicron_common::api::external::Error::unavail( - &err.to_string(), - ) - } else { - let internal_message = errors - .iter() - .map(|(name, err)| { - format!("failed to start {name}: {err:?}") - }) - .join("\n"); - omicron_common::api::external::Error::InternalError { - internal_message, - } - } - } _ => omicron_common::api::external::Error::InternalError { internal_message: err.to_string(), }, @@ -492,96 +422,6 @@ impl RealSystemApi { impl SystemApi for RealSystemApi {} -/// Combines the Nexus-provided `OmicronZonesConfig` (which describes what Nexus -/// wants for all of its zones) with the locally-determined configuration for -/// these zones. -#[derive( - Clone, - Debug, - Eq, - PartialEq, - serde::Serialize, - serde::Deserialize, - schemars::JsonSchema, -)] -pub struct OmicronZonesConfigLocal { - /// generation of the Omicron-provided part of the configuration - /// - /// This generation number is outside of Sled Agent's control. We store - /// exactly what we were given and use this number to decide when to - /// fail requests to establish an outdated configuration. - /// - /// You can think of this as a major version number, with - /// `ledger_generation` being a minor version number. See - /// `is_newer_than()`. - pub omicron_generation: Generation, - - /// ledger-managed generation number - /// - /// This generation is managed by the ledger facility itself. It's bumped - /// whenever we write a new ledger. In practice, we don't currently have - /// any reason to bump this _for a given Omicron generation_ so it's - /// somewhat redundant. In principle, if we needed to modify the ledgered - /// configuration due to some event that doesn't change the Omicron config - /// (e.g., if we wanted to move the root filesystem to a different path), we - /// could do that by bumping this generation. - pub ledger_generation: Generation, - pub zones: Vec, -} - -impl Ledgerable for OmicronZonesConfigLocal { - fn is_newer_than(&self, other: &OmicronZonesConfigLocal) -> bool { - self.omicron_generation > other.omicron_generation - || (self.omicron_generation == other.omicron_generation - && self.ledger_generation >= other.ledger_generation) - } - - fn generation_bump(&mut self) { - self.ledger_generation = self.ledger_generation.next(); - } -} - -impl OmicronZonesConfigLocal { - /// Returns the initial configuration for generation 1, which has no zones - pub fn initial() -> OmicronZonesConfigLocal { - OmicronZonesConfigLocal { - omicron_generation: Generation::new(), - ledger_generation: Generation::new(), - zones: vec![], - } - } - - pub fn to_omicron_zones_config(self) -> OmicronZonesConfig { - OmicronZonesConfig { - generation: self.omicron_generation, - zones: self.zones.into_iter().map(|z| z.zone).collect(), - } - } -} - -/// Combines the Nexus-provided `OmicronZoneConfig` (which describes what Nexus -/// wants for this zone) with any locally-determined configuration (like the -/// path to the root filesystem) -// -// NOTE: Although the path to the root filesystem is not exactly equal to the -// ZpoolName, it is derivable from it, and the ZpoolName for the root filesystem -// is now being supplied as a part of OmicronZoneConfig. Therefore, this struct -// is less necessary than it has been historically. -#[derive( - Clone, - Debug, - Eq, - PartialEq, - serde::Serialize, - serde::Deserialize, - schemars::JsonSchema, -)] -pub struct OmicronZoneConfigLocal { - pub zone: OmicronZoneConfig, - #[schemars(with = "String")] - pub root: Utf8PathBuf, -} - /// Describes how we want a switch zone to be configured /// /// This is analogous to `OmicronZoneConfig`, but for the switch zone (which is @@ -636,7 +476,7 @@ impl illumos_utils::smf_helper::Service for SwitchService { /// Describes either an Omicron-managed zone or the switch zone, used for /// functions that operate on either one or the other enum ZoneArgs<'a> { - Omicron(&'a OmicronZoneConfigLocal), + Omicron(&'a OmicronZoneConfig), Switch(&'a SwitchZoneConfig), } @@ -644,7 +484,7 @@ impl<'a> ZoneArgs<'a> { /// If this is an Omicron zone, return its type pub fn omicron_type(&self) -> Option<&'a OmicronZoneType> { match self { - ZoneArgs::Omicron(zone_config) => Some(&zone_config.zone.zone_type), + ZoneArgs::Omicron(zone_config) => Some(&zone_config.zone_type), ZoneArgs::Switch(_) => None, } } @@ -1498,12 +1338,11 @@ impl ServiceManager { // dataset into the zone. Additionally, construct a "unique enough" name // so we can create multiple zones of this type without collision. let unique_name = match &request { - ZoneArgs::Omicron(zone_config) => Some(zone_config.zone.id), + ZoneArgs::Omicron(zone_config) => Some(zone_config.id), ZoneArgs::Switch(_) => None, }; let datasets: Vec<_> = match &request { ZoneArgs::Omicron(zone_config) => zone_config - .zone .dataset_name() .map(|n| zone::Dataset { name: n.full_name() }) .into_iter() @@ -1523,7 +1362,7 @@ impl ServiceManager { // are falling back to searching `/opt/oxide` in addition to the install // datasets. let image_source = match &request { - ZoneArgs::Omicron(zone_config) => &zone_config.zone.image_source, + ZoneArgs::Omicron(zone_config) => &zone_config.image_source, ZoneArgs::Switch(_) => &OmicronZoneImageSource::InstallDataset, }; let file_source = self.inner.zone_image_resolver.file_source_for( @@ -1533,7 +1372,7 @@ impl ServiceManager { let zone_type_str = match &request { ZoneArgs::Omicron(zone_config) => { - zone_config.zone.zone_type.kind().zone_prefix() + zone_config.zone_type.kind().zone_prefix() } ZoneArgs::Switch(_) => "switch", }; @@ -1584,12 +1423,8 @@ impl ServiceManager { .add_instance(ServiceInstanceBuilder::new("default")); let running_zone = match &request { - ZoneArgs::Omicron(OmicronZoneConfigLocal { - zone: - OmicronZoneConfig { - zone_type: OmicronZoneType::Clickhouse { address, .. }, - .. - }, + ZoneArgs::Omicron(OmicronZoneConfig { + zone_type: OmicronZoneType::Clickhouse { address, .. }, .. }) => { let Some(info) = self.inner.sled_info.get() else { @@ -1673,13 +1508,8 @@ impl ServiceManager { RunningZone::boot(installed_zone).await? } - ZoneArgs::Omicron(OmicronZoneConfigLocal { - zone: - OmicronZoneConfig { - zone_type: - OmicronZoneType::ClickhouseServer { address, .. }, - .. - }, + ZoneArgs::Omicron(OmicronZoneConfig { + zone_type: OmicronZoneType::ClickhouseServer { address, .. }, .. }) => { let Some(info) = self.inner.sled_info.get() else { @@ -1763,13 +1593,8 @@ impl ServiceManager { RunningZone::boot(installed_zone).await? } - ZoneArgs::Omicron(OmicronZoneConfigLocal { - zone: - OmicronZoneConfig { - zone_type: - OmicronZoneType::ClickhouseKeeper { address, .. }, - .. - }, + ZoneArgs::Omicron(OmicronZoneConfig { + zone_type: OmicronZoneType::ClickhouseKeeper { address, .. }, .. }) => { let Some(info) = self.inner.sled_info.get() else { @@ -1846,13 +1671,9 @@ impl ServiceManager { RunningZone::boot(installed_zone).await? } - ZoneArgs::Omicron(OmicronZoneConfigLocal { - zone: - OmicronZoneConfig { - id: zone_id, - zone_type: OmicronZoneType::CockroachDb { address, .. }, - .. - }, + ZoneArgs::Omicron(OmicronZoneConfig { + id: zone_id, + zone_type: OmicronZoneType::CockroachDb { address, .. }, .. }) => { let Some(info) = self.inner.sled_info.get() else { @@ -1917,13 +1738,8 @@ impl ServiceManager { RunningZone::boot(installed_zone).await? } - ZoneArgs::Omicron(OmicronZoneConfigLocal { - zone: - OmicronZoneConfig { - zone_type: - OmicronZoneType::Crucible { address, dataset }, - .. - }, + ZoneArgs::Omicron(OmicronZoneConfig { + zone_type: OmicronZoneType::Crucible { address, dataset }, .. }) => { let Some(info) = self.inner.sled_info.get() else { @@ -1975,12 +1791,8 @@ impl ServiceManager { RunningZone::boot(installed_zone).await? } - ZoneArgs::Omicron(OmicronZoneConfigLocal { - zone: - OmicronZoneConfig { - zone_type: OmicronZoneType::CruciblePantry { address }, - .. - }, + ZoneArgs::Omicron(OmicronZoneConfig { + zone_type: OmicronZoneType::CruciblePantry { address }, .. }) => { let Some(info) = self.inner.sled_info.get() else { @@ -2022,13 +1834,9 @@ impl ServiceManager { .map_err(|err| Error::io("crucible pantry profile", err))?; RunningZone::boot(installed_zone).await? } - ZoneArgs::Omicron(OmicronZoneConfigLocal { - zone: - OmicronZoneConfig { - id, - zone_type: OmicronZoneType::Oximeter { address }, - .. - }, + ZoneArgs::Omicron(OmicronZoneConfig { + id, + zone_type: OmicronZoneType::Oximeter { address }, .. }) => { let Some(info) = self.inner.sled_info.get() else { @@ -2063,16 +1871,12 @@ impl ServiceManager { })?; RunningZone::boot(installed_zone).await? } - ZoneArgs::Omicron(OmicronZoneConfigLocal { - zone: - OmicronZoneConfig { - zone_type: - OmicronZoneType::ExternalDns { - http_address, - dns_address, - nic, - .. - }, + ZoneArgs::Omicron(OmicronZoneConfig { + zone_type: + OmicronZoneType::ExternalDns { + http_address, + dns_address, + nic, .. }, .. @@ -2126,17 +1930,13 @@ impl ServiceManager { })?; RunningZone::boot(installed_zone).await? } - ZoneArgs::Omicron(OmicronZoneConfigLocal { - zone: - OmicronZoneConfig { - zone_type: - OmicronZoneType::BoundaryNtp { - address, - dns_servers, - ntp_servers, - domain, - .. - }, + ZoneArgs::Omicron(OmicronZoneConfig { + zone_type: + OmicronZoneType::BoundaryNtp { + address, + dns_servers, + ntp_servers, + domain, .. }, .. @@ -2214,12 +2014,8 @@ impl ServiceManager { RunningZone::boot(installed_zone).await? } - ZoneArgs::Omicron(OmicronZoneConfigLocal { - zone: - OmicronZoneConfig { - zone_type: OmicronZoneType::InternalNtp { address }, - .. - }, + ZoneArgs::Omicron(OmicronZoneConfig { + zone_type: OmicronZoneType::InternalNtp { address }, .. }) => { let Some(info) = self.inner.sled_info.get() else { @@ -2275,17 +2071,13 @@ impl ServiceManager { RunningZone::boot(installed_zone).await? } - ZoneArgs::Omicron(OmicronZoneConfigLocal { - zone: - OmicronZoneConfig { - zone_type: - OmicronZoneType::InternalDns { - http_address, - dns_address, - gz_address, - gz_address_index, - .. - }, + ZoneArgs::Omicron(OmicronZoneConfig { + zone_type: + OmicronZoneType::InternalDns { + http_address, + dns_address, + gz_address, + gz_address_index, .. }, .. @@ -2370,19 +2162,15 @@ impl ServiceManager { })?; RunningZone::boot(installed_zone).await? } - ZoneArgs::Omicron(OmicronZoneConfigLocal { - zone: - OmicronZoneConfig { - zone_type: - OmicronZoneType::Nexus { - internal_address, - external_tls, - external_dns_servers, - .. - }, - id, + ZoneArgs::Omicron(OmicronZoneConfig { + zone_type: + OmicronZoneType::Nexus { + internal_address, + external_tls, + external_dns_servers, .. }, + id, .. }) => { let Some(info) = self.inner.sled_info.get() else { @@ -3262,55 +3050,24 @@ impl ServiceManager { Ok(running_zone) } - // Ensures that a single Omicron zone is running. + // Attempt to start a single Omicron zone. // // This method is NOT idempotent. // - // - If the zone already exists, in any form, it is fully removed - // before being initialized. This is primarily intended to remove "partially - // stopped/started" zones with detritus from interfering with a new zone - // being launched. - // - If zones need time to be synchronized before they are initialized - // (e.g., this is a hard requirement for CockroachDb) they can check the - // `time_is_synchronized` argument. - // - `all_u2_pools` provides a snapshot into durable storage on this sled, - // which gives the storage manager an opportunity to validate the zone's - // storage configuration against the reality of the current sled. + // Callers must do any "pre-zone-start" validation, including: + // + // * No other zone of this same name is still running + // * Time is synchronized, if the zone requires it + // * Any datasets the zone depends on exist and have been configured and/or + // mounted appropriately pub(crate) async fn start_omicron_zone( &self, - mount_config: &MountConfig, zone: &OmicronZoneConfig, - time_is_synchronized: bool, - all_u2_pools: &[ZpoolName], + zone_root_path: PathInPool, ) -> Result { - // Ensure the zone has been fully removed before we try to boot it. - // - // This ensures that old "partially booted/stopped" zones do not - // interfere with our installation. - self.ensure_removed(&zone).await?; - - // If this zone requires timesync and we aren't ready, fail it early. - if zone.zone_type.requires_timesync() && !time_is_synchronized { - return Err(Error::TimeNotSynchronized); - } - - // Ensure that this zone's storage is ready. - let zone_root_path = self - .validate_storage_and_pick_mountpoint( - mount_config, - &zone, - &all_u2_pools, - ) - .await?; - - let config = OmicronZoneConfigLocal { - zone: zone.clone(), - root: zone_root_path.path.clone(), - }; - let runtime = self .initialize_zone( - ZoneArgs::Omicron(&config), + ZoneArgs::Omicron(zone), zone_root_path, // filesystems= &[], @@ -3322,188 +3079,6 @@ impl ServiceManager { Ok(runtime) } - // Ensures that if a zone is about to be installed, it does not exist. - async fn ensure_removed( - &self, - zone_config: &OmicronZoneConfig, - ) -> Result<(), Error> { - let zone_name = zone_config.zone_name(); - match self.inner.system_api.zones().find(&zone_name).await { - Ok(Some(zone)) => { - warn!( - self.inner.log, - "removing zone"; - "zone" => &zone_name, - "state" => ?zone.state(), - ); - // NOTE: We might want to tell the sled-agent's metrics task to - // stop tracking any links in this zone. However, we don't have - // very easy access to them, without running a command in the - // zone. These links are about to be deleted, and the metrics - // task will expire them after a while anyway, but it might be - // worth the trouble to do that in the future. - if let Err(e) = self - .inner - .system_api - .zones() - .halt_and_remove_logged(&self.inner.log, &zone_name) - .await - { - error!( - self.inner.log, - "Failed to remove zone"; - "zone" => &zone_name, - InlineErrorChain::new(&e), - ); - return Err(Error::ZoneRemoval { - zone_name: zone_name.to_string(), - err: e, - }); - } - if let Err(e) = - self.clean_up_after_zone_shutdown(zone_config).await - { - error!( - self.inner.log, - "Failed to clean up after removing zone"; - "zone" => &zone_name, - InlineErrorChain::new(&e), - ); - return Err(e); - } - Ok(()) - } - Ok(None) => Ok(()), - Err(err) => Err(Error::ZoneList(err)), - } - } - - // Perform any outside-the-zone cleanup required after shutting down a zone. - async fn clean_up_after_zone_shutdown( - &self, - zone: &OmicronZoneConfig, - ) -> Result<(), Error> { - // Special teardown for internal DNS zones: delete the global zone - // address we created for it, and tell DDM to stop advertising the - // prefix of that address. - if let OmicronZoneType::InternalDns { - gz_address, - gz_address_index, - .. - } = &zone.zone_type - { - let addrobj = AddrObject::new( - &self.inner.underlay_vnic.0, - &internal_dns_addrobj_name(*gz_address_index), - ) - .expect("internal DNS address object name is well-formed"); - Zones::delete_address(None, &addrobj).await.map_err(|err| { - Error::ZoneCleanup { - zone_name: zone.zone_name(), - err: Box::new(err), - } - })?; - - self.ddm_reconciler() - .remove_internal_dns_subnet(Ipv6Subnet::new(*gz_address)); - } - - Ok(()) - } - - // Returns a zone filesystem mountpoint, after ensuring that U.2 storage - // is valid. - async fn validate_storage_and_pick_mountpoint( - &self, - mount_config: &MountConfig, - zone: &OmicronZoneConfig, - all_u2_pools: &[ZpoolName], - ) -> Result { - let name = zone.zone_name(); - - // If the caller has requested a specific durable dataset, - // ensure that it is encrypted and that it exists. - // - // Typically, the transient filesystem pool will be placed on the same - // zpool as the durable dataset (to reduce the fault domain), but that - // decision belongs to Nexus, and is not enforced here. - if let Some(dataset) = zone.dataset_name() { - // Check that the dataset is actually ready to be used. - let [zoned, canmount, encryption] = - illumos_utils::zfs::Zfs::get_values( - &dataset.full_name(), - &["zoned", "canmount", "encryption"], - None, - ) - .await - .map_err(|err| Error::GetZfsValue { - zone: zone.zone_name(), - source: err, - })?; - - let check_property = |name, actual, expected| { - if actual != expected { - return Err(Error::DatasetNotReady { - zone: zone.zone_name(), - dataset: dataset.full_name(), - prop_name: String::from(name), - prop_value: actual, - prop_value_expected: String::from(expected), - }); - } - return Ok(()); - }; - check_property("zoned", zoned, "on")?; - check_property("canmount", canmount, "on")?; - if dataset.kind().dataset_should_be_encrypted() { - check_property("encryption", encryption, "aes-256-gcm")?; - } - - let data_pool = dataset.pool(); - if !all_u2_pools.contains(&data_pool) { - warn!( - self.inner.log, - "zone dataset requested on a zpool which doesn't exist"; - "zone" => &name, - "zpool" => %data_pool - ); - return Err(Error::MissingDevice { - device: format!("zpool: {data_pool}"), - }); - } - } - - let filesystem_pool = match (&zone.filesystem_pool, zone.dataset_name()) - { - // If a pool was explicitly requested, use it. - (Some(pool), _) => *pool, - // NOTE: The following cases are for backwards compatibility. - // - // If no pool was selected, prefer to use the same pool as the - // durable dataset. Otherwise, pick one randomly. - (None, Some(dataset)) => *dataset.pool(), - (None, None) => *all_u2_pools - .choose(&mut rand::thread_rng()) - .ok_or_else(|| Error::U2NotFound)?, - }; - - if !all_u2_pools.contains(&filesystem_pool) { - warn!( - self.inner.log, - "zone filesystem dataset requested on a zpool which doesn't exist"; - "zone" => &name, - "zpool" => %filesystem_pool - ); - return Err(Error::MissingDevice { - device: format!("zpool: {filesystem_pool}"), - }); - } - let path = filesystem_pool - .dataset_mountpoint(&mount_config.root, ZONE_DATASET); - let pool = ZpoolOrRamdisk::Zpool(filesystem_pool); - Ok(PathInPool { pool, path }) - } - /// Adjust the system boot time to the latest boot time of all zones. fn boottime_rewrite(&self) { // Call out to the 'tmpx' utility program which will rewrite the wtmpx @@ -4364,13 +3939,4 @@ mod test { &serde_json::to_string_pretty(&schema).unwrap(), ); } - - #[test] - fn test_all_zones_requests_schema() { - let schema = schemars::schema_for!(OmicronZonesConfigLocal); - expectorate::assert_contents( - "../schema/all-zones-requests.json", - &serde_json::to_string_pretty(&schema).unwrap(), - ); - } } diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index 93c0b9e4730..240d00d838b 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -31,7 +31,7 @@ use futures::StreamExt; use futures::stream::FuturesUnordered; use illumos_utils::opte::PortManager; use illumos_utils::running_zone::RunningZone; -use illumos_utils::zpool::ZpoolName; +use illumos_utils::zpool::PathInPool; use itertools::Itertools as _; use nexus_sled_agent_shared::inventory::{ Inventory, OmicronSledConfig, OmicronZoneConfig, SledRole, @@ -76,7 +76,6 @@ use sled_hardware::{ }; use sled_hardware_types::Baseboard; use sled_hardware_types::underlay::BootstrapInterface; -use sled_storage::config::MountConfig; use slog::Logger; use slog_error_chain::InlineErrorChain; use sprockets_tls::keys::SprocketsConfig; @@ -1328,18 +1327,11 @@ impl SledAgentFacilities for ReconcilerFacilities { async fn start_omicron_zone( &self, zone_config: &OmicronZoneConfig, - mount_config: &MountConfig, - is_time_synchronized: bool, - all_u2_pools: &[ZpoolName], + zone_root_path: PathInPool, ) -> anyhow::Result { let zone = self .service_manager - .start_omicron_zone( - mount_config, - zone_config, - is_time_synchronized, - all_u2_pools, - ) + .start_omicron_zone(zone_config, zone_root_path) .await?; Ok(zone) }