Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

46 changes: 22 additions & 24 deletions dev-tools/omdb/src/bin/omdb/db.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7350,30 +7350,28 @@ fn inv_collection_print_sleds(collection: &Collection) {
"LAST RECONCILED CONFIG",
&last_reconciliation.last_reconciled_config,
);
let disk_errs = collect_config_reconciler_errors(
&last_reconciliation.external_disks,
);
let dataset_errs = collect_config_reconciler_errors(
&last_reconciliation.datasets,
);
let zone_errs = collect_config_reconciler_errors(
&last_reconciliation.zones,
);
for (label, errs) in [
("disk", disk_errs),
("dataset", dataset_errs),
("zone", zone_errs),
] {
if errs.is_empty() {
println!(" all {label}s reconciled successfully");
} else {
println!(
" {} {label} reconciliation errors:",
errs.len()
);
for err in errs {
println!(" {err}");
}
}
let disk_errs = collect_config_reconciler_errors(
&last_reconciliation.external_disks,
);
let dataset_errs =
collect_config_reconciler_errors(&last_reconciliation.datasets);
let zone_errs =
collect_config_reconciler_errors(&last_reconciliation.zones);
for (label, errs) in [
("disk", disk_errs),
("dataset", dataset_errs),
("zone", zone_errs),
] {
if errs.is_empty() {
println!(" all {label}s reconciled successfully");
} else {
println!(
" {} {label} reconciliation errors:",
errs.len()
);
for err in errs {
println!(" {err}");
}
}
}
Expand Down
40 changes: 40 additions & 0 deletions nexus-sled-agent-shared/src/inventory.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ use chrono::{DateTime, Utc};
use daft::Diffable;
use id_map::IdMap;
use id_map::IdMappable;
use omicron_common::disk::{DatasetKind, DatasetName};
use omicron_common::ledger::Ledgerable;
use omicron_common::{
api::{
Expand Down Expand Up @@ -327,6 +328,10 @@ impl OmicronZoneConfig {
Some(self.id),
)
}

pub fn dataset_name(&self) -> Option<DatasetName> {
self.zone_type.dataset_name()
}
}

/// Describes a persistent ZFS dataset associated with an Omicron zone
Expand Down Expand Up @@ -613,6 +618,41 @@ impl OmicronZoneType {
| OmicronZoneType::Oximeter { .. } => None,
}
}

/// If this kind of zone has an associated dataset, return the dataset's
/// name. Otherwise, return `None`.
pub fn dataset_name(&self) -> Option<DatasetName> {
let (dataset, dataset_kind) = match self {
OmicronZoneType::BoundaryNtp { .. }
| OmicronZoneType::InternalNtp { .. }
| OmicronZoneType::Nexus { .. }
| OmicronZoneType::Oximeter { .. }
| OmicronZoneType::CruciblePantry { .. } => None,
OmicronZoneType::Clickhouse { dataset, .. } => {
Some((dataset, DatasetKind::Clickhouse))
}
OmicronZoneType::ClickhouseKeeper { dataset, .. } => {
Some((dataset, DatasetKind::ClickhouseKeeper))
}
OmicronZoneType::ClickhouseServer { dataset, .. } => {
Some((dataset, DatasetKind::ClickhouseServer))
}
OmicronZoneType::CockroachDb { dataset, .. } => {
Some((dataset, DatasetKind::Cockroach))
}
OmicronZoneType::Crucible { dataset, .. } => {
Some((dataset, DatasetKind::Crucible))
}
OmicronZoneType::ExternalDns { dataset, .. } => {
Some((dataset, DatasetKind::ExternalDns))
}
OmicronZoneType::InternalDns { dataset, .. } => {
Some((dataset, DatasetKind::InternalDns))
}
}?;

Some(DatasetName::new(dataset.pool_name, dataset_kind))
}
}

/// Like [`OmicronZoneType`], but without any associated data.
Expand Down
17 changes: 3 additions & 14 deletions schema/all-zones-requests.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "OmicronZonesConfigLocal",
"description": "Combines the Nexus-provided `OmicronZonesConfig` (which describes what Nexus wants for all of its zones) with the locally-determined configuration for these zones.",
"description": "Legacy type of the ledgered zone config.",
"type": "object",
"required": [
"ledger_generation",
Expand All @@ -10,20 +10,10 @@
],
"properties": {
"ledger_generation": {
"description": "ledger-managed generation number\n\nThis generation is managed by the ledger facility itself. It's bumped whenever we write a new ledger. In practice, we don't currently have any reason to bump this _for a given Omicron generation_ so it's somewhat redundant. In principle, if we needed to modify the ledgered configuration due to some event that doesn't change the Omicron config (e.g., if we wanted to move the root filesystem to a different path), we could do that by bumping this generation.",
"allOf": [
{
"$ref": "#/definitions/Generation"
}
]
"$ref": "#/definitions/Generation"
},
"omicron_generation": {
"description": "generation of the Omicron-provided part of the configuration\n\nThis generation number is outside of Sled Agent's control. We store exactly what we were given and use this number to decide when to fail requests to establish an outdated configuration.\n\nYou can think of this as a major version number, with `ledger_generation` being a minor version number. See `is_newer_than()`.",
"allOf": [
{
"$ref": "#/definitions/Generation"
}
]
"$ref": "#/definitions/Generation"
},
"zones": {
"type": "array",
Expand Down Expand Up @@ -269,7 +259,6 @@
}
},
"OmicronZoneConfigLocal": {
"description": "Combines the Nexus-provided `OmicronZoneConfig` (which describes what Nexus wants for this zone) with any locally-determined configuration (like the path to the root filesystem)",
"type": "object",
"required": [
"root",
Expand Down
1 change: 1 addition & 0 deletions sled-agent/config-reconciler/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ expectorate.workspace = true
illumos-utils = { workspace = true, features = ["testing"] }
omicron-test-utils.workspace = true
proptest.workspace = true
schemars.workspace = true
scopeguard.workspace = true
serde_json.workspace = true
sled-storage = { workspace = true, features = ["testing"] }
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,14 @@ impl IdMappable for DatasetEnsureResult {
pub(crate) struct DatasetTaskHandle(mpsc::Sender<DatasetTaskRequest>);

impl DatasetTaskHandle {
// For testing, create a handle on which requests will always fail with a
// `DatasetTaskError`.
#[cfg(any(test, feature = "testing"))]
pub(crate) fn spawn_noop() -> Self {
let (tx, _rx) = mpsc::channel(1);
Self(tx)
}

pub fn spawn_dataset_task(
mount_config: Arc<MountConfig>,
currently_managed_zpools_rx: CurrentlyManagedZpoolsReceiver,
Expand Down
12 changes: 12 additions & 0 deletions sled-agent/config-reconciler/src/ledger/legacy_configs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,7 @@ fn merge_old_configs(

/// Legacy type of the ledgered zone config.
#[derive(Debug, Clone, Deserialize, Serialize)]
#[cfg_attr(test, derive(schemars::JsonSchema))]
struct OmicronZonesConfigLocal {
omicron_generation: Generation,
ledger_generation: Generation,
Expand All @@ -237,9 +238,11 @@ impl Ledgerable for OmicronZonesConfigLocal {
}

#[derive(Debug, Clone, Deserialize, Serialize)]
#[cfg_attr(test, derive(schemars::JsonSchema))]
struct OmicronZoneConfigLocal {
zone: OmicronZoneConfig,
#[serde(rename = "root")]
#[cfg_attr(test, schemars(with = "String"))]
_root: Utf8PathBuf,
}

Expand All @@ -264,6 +267,15 @@ pub(super) mod tests {
const MERGED_CONFIG_PATH: &str =
"test-data/expectorate/merged-sled-config.json";

#[test]
fn test_old_config_schema() {
let schema = schemars::schema_for!(OmicronZonesConfigLocal);
expectorate::assert_contents(
"../../schema/all-zones-requests.json",
&serde_json::to_string_pretty(&schema).unwrap(),
);
}

#[test]
fn test_merge_old_configs() {
let disks: OmicronPhysicalDisksConfig = {
Expand Down
4 changes: 1 addition & 3 deletions sled-agent/config-reconciler/src/reconciler_task.rs
Original file line number Diff line number Diff line change
Expand Up @@ -458,14 +458,12 @@ impl ReconcilerTask {
// the old instance.
match zone_shutdown_result {
Ok(()) => {
let currently_managed_zpools =
self.external_disks.currently_managed_zpools();
self.zones
.start_zones_if_needed(
&sled_config.zones,
sled_agent_facilities,
timesync_status.is_synchronized(),
&currently_managed_zpools,
&self.datasets,
&self.log,
)
.await;
Expand Down
103 changes: 103 additions & 0 deletions sled-agent/config-reconciler/src/reconciler_task/datasets.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,26 +12,129 @@ use crate::dataset_serialization_task::DatasetEnsureResult;
use crate::dataset_serialization_task::DatasetTaskHandle;
use id_map::IdMap;
use id_map::IdMappable;
use illumos_utils::zpool::PathInPool;
use illumos_utils::zpool::ZpoolOrRamdisk;
use nexus_sled_agent_shared::inventory::ConfigReconcilerInventoryResult;
use nexus_sled_agent_shared::inventory::OmicronZoneConfig;
use omicron_common::disk::DatasetConfig;
use omicron_common::disk::DatasetKind;
use omicron_common::disk::DatasetName;
use omicron_uuid_kinds::DatasetUuid;
use sled_storage::config::MountConfig;
use sled_storage::dataset::ZONE_DATASET;
use slog::Logger;
use slog::warn;
use slog_error_chain::InlineErrorChain;
use std::collections::BTreeMap;
use std::sync::Arc;

#[derive(Debug, thiserror::Error)]
pub(super) enum ZoneDatasetDependencyError {
#[error("zone config is missing a filesystem pool")]
MissingFilesystemPool,
#[error(
"zone's transient root dataset is not available: {}", .0.full_name(),
)]
TransientZoneDatasetNotAvailable(DatasetName),
#[error("zone's durable dataset is not available: {}", .0.full_name())]
DurableDatasetNotAvailable(DatasetName),
}

#[derive(Debug)]
pub(super) struct OmicronDatasets {
datasets: IdMap<OmicronDataset>,
dataset_task: DatasetTaskHandle,
}

impl OmicronDatasets {
#[cfg(any(test, feature = "testing"))]
pub(super) fn with_datasets<I>(datasets: I) -> Self
where
I: Iterator<Item = (DatasetConfig, Result<(), DatasetEnsureError>)>,
{
let dataset_task = DatasetTaskHandle::spawn_noop();
let datasets = datasets
.map(|(config, result)| OmicronDataset {
config,
state: match result {
Ok(()) => DatasetState::Ensured,
Err(err) => DatasetState::FailedToEnsure(Arc::new(err)),
},
})
.collect();
Self { datasets, dataset_task }
}

pub(super) fn new(dataset_task: DatasetTaskHandle) -> Self {
Self { datasets: IdMap::default(), dataset_task }
}

/// Confirm that any dataset dependencies of `zone` have been ensured
/// successfully, returning the path for the zone's filesystem root.
pub(super) fn validate_zone_storage(
&self,
zone: &OmicronZoneConfig,
mount_config: &MountConfig,
) -> Result<PathInPool, ZoneDatasetDependencyError> {
// Confirm that there's an ensured `TransientZoneRoot` dataset on this
// zone's filesystem pool.
let Some(filesystem_pool) = zone.filesystem_pool else {
// This should never happen: Reconfigurator guarantees all zones
// have filesystem pools. `filesystem_pool` is non-optional in
// blueprints; we should make it non-optional in `OmicronZoneConfig`
// too: https://github.com/oxidecomputer/omicron/issues/8216
return Err(ZoneDatasetDependencyError::MissingFilesystemPool);
};

let transient_dataset_name = DatasetName::new(
filesystem_pool,
DatasetKind::TransientZone { name: zone.zone_name() },
);

// TODO-cleanup It would be nicer if the zone included the filesystem
// dataset ID, so we could just do a lookup here instead of searching.
// https://github.com/oxidecomputer/omicron/issues/7214
if !self.datasets.iter().any(|d| {
matches!(d.state, DatasetState::Ensured)
&& d.config.name == transient_dataset_name
}) {
return Err(
ZoneDatasetDependencyError::TransientZoneDatasetNotAvailable(
transient_dataset_name,
),
);
}

let zone_root_path = PathInPool {
pool: ZpoolOrRamdisk::Zpool(filesystem_pool),
// TODO-cleanup Should we get this path from the dataset we found
// above?
path: filesystem_pool
.dataset_mountpoint(&mount_config.root, ZONE_DATASET),
};

// Confirm that the durable dataset for this zone has been ensured, if
// it has one.
let Some(durable_dataset) = zone.dataset_name() else {
return Ok(zone_root_path);
};

// TODO-cleanup As above, if we had an ID we could look up instead of
// searching.
if !self.datasets.iter().any(|d| {
matches!(d.state, DatasetState::Ensured)
&& d.config.name == durable_dataset
}) {
return Err(
ZoneDatasetDependencyError::DurableDatasetNotAvailable(
durable_dataset,
),
);
}

Ok(zone_root_path)
}

pub(super) fn remove_datasets_if_needed(
&mut self,
datasets: &IdMap<DatasetConfig>,
Expand Down
Loading
Loading