Skip to content

Commit 42d86cd

Browse files
authored
[reconfigurator] Rework planner/builder expungement logic (#7495)
The high-level summary of this change is that it moves the logic for handling expunged sleds out of `BlueprintBuilder::expunge_zones_for_sled()` (which is now gone entirely) and into a new method on the planner (`do_plan_expunge_for_commissioned_sled()`, naming suggestions welcome). This fixes two problems; one stylistic, and one semantic: 1. `BlueprintBuilder::expunge_zones_for_sled()` was maybe the worst offender for "is this a builder API or a planner API", because a big chunk of its logic was actually delegated back to `zone_needs_expungement()`, a helper function defined in the planner. The builder now has more explicit and direct methods to expunge specific things, and it's the planner that decides based on the planning input which things to expunge. 2. `BlueprintBuilder::expunge_zones_for_sled()` only handled expunging _zones_. When a sled was expunged, at no point did the planner expunge the disks or datasets on that sled. We haven't been bitten by this yet because the builder currently omits expunged sleds from `blueprint_disks` and `blueprint_datasets` entirely, but that's a major roadblock in the path to joining the maps together (which is a prereq for fixing #7309 by merging sled-agent endpoints together). `do_plan_expunge_for_commissioned_sled()` expunges disks, datasets, and zones. For now we still omit the expunged sleds from `blueprint_disks` and `blueprint_datasets`, but when we stop doing that we'll at least have set the dispositions correctly.
1 parent 66b8423 commit 42d86cd

File tree

9 files changed

+361
-257
lines changed

9 files changed

+361
-257
lines changed

nexus/reconfigurator/planning/src/blueprint_builder/builder.rs

Lines changed: 190 additions & 140 deletions
Large diffs are not rendered by default.

nexus/reconfigurator/planning/src/blueprint_editor.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,5 +25,6 @@ pub use sled_editor::ZonesEditError;
2525
pub(crate) use allocators::BlueprintResourceAllocator;
2626
pub(crate) use allocators::ExternalNetworkingChoice;
2727
pub(crate) use allocators::ExternalSnatNetworkingChoice;
28+
pub(crate) use sled_editor::DiskExpungeDetails;
2829
pub(crate) use sled_editor::EditedSled;
2930
pub(crate) use sled_editor::SledEditor;

nexus/reconfigurator/planning/src/blueprint_editor/sled_editor.rs

Lines changed: 58 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ use illumos_utils::zpool::ZpoolName;
1010
use itertools::Either;
1111
use nexus_sled_agent_shared::inventory::ZoneKind;
1212
use nexus_types::deployment::blueprint_zone_type;
13+
use nexus_types::deployment::BlueprintDatasetConfig;
14+
use nexus_types::deployment::BlueprintDatasetFilter;
1315
use nexus_types::deployment::BlueprintDatasetsConfig;
1416
use nexus_types::deployment::BlueprintPhysicalDiskConfig;
1517
use nexus_types::deployment::BlueprintPhysicalDisksConfig;
@@ -68,6 +70,14 @@ pub enum SledInputError {
6870
MultipleDatasetsOfKind(#[from] MultipleDatasetsOfKind),
6971
}
7072

73+
#[derive(Debug, Clone, Eq, PartialEq)]
74+
pub struct DiskExpungeDetails {
75+
pub disk_id: PhysicalDiskUuid,
76+
pub did_expunge_disk: bool,
77+
pub num_datasets_expunged: usize,
78+
pub num_zones_expunged: usize,
79+
}
80+
7181
#[derive(Debug, thiserror::Error)]
7282
pub enum SledEditError {
7383
#[error("editing a decommissioned sled is not allowed")]
@@ -236,6 +246,24 @@ impl SledEditor {
236246
}
237247
}
238248

249+
pub fn datasets(
250+
&self,
251+
filter: BlueprintDatasetFilter,
252+
) -> impl Iterator<Item = &BlueprintDatasetConfig> {
253+
match &self.0 {
254+
InnerSledEditor::Active(editor) => {
255+
Either::Left(editor.datasets(filter))
256+
}
257+
InnerSledEditor::Decommissioned(edited) => Either::Right(
258+
edited
259+
.datasets
260+
.datasets
261+
.iter()
262+
.filter(move |disk| disk.disposition.matches(filter)),
263+
),
264+
}
265+
}
266+
239267
pub fn zones(
240268
&self,
241269
filter: BlueprintZoneFilter,
@@ -277,7 +305,7 @@ impl SledEditor {
277305
pub fn expunge_disk(
278306
&mut self,
279307
disk_id: &PhysicalDiskUuid,
280-
) -> Result<(), SledEditError> {
308+
) -> Result<DiskExpungeDetails, SledEditError> {
281309
self.as_active_mut()?.expunge_disk(disk_id)
282310
}
283311

@@ -292,7 +320,7 @@ impl SledEditor {
292320
pub fn expunge_zone(
293321
&mut self,
294322
zone_id: &OmicronZoneUuid,
295-
) -> Result<(), SledEditError> {
323+
) -> Result<bool, SledEditError> {
296324
self.as_active_mut()?.expunge_zone(zone_id)
297325
}
298326

@@ -437,6 +465,13 @@ impl ActiveSledEditor {
437465
self.disks.disks(filter)
438466
}
439467

468+
pub fn datasets(
469+
&self,
470+
filter: BlueprintDatasetFilter,
471+
) -> impl Iterator<Item = &BlueprintDatasetConfig> {
472+
self.datasets.datasets(filter)
473+
}
474+
440475
pub fn zones(
441476
&self,
442477
filter: BlueprintZoneFilter,
@@ -465,15 +500,28 @@ impl ActiveSledEditor {
465500
pub fn expunge_disk(
466501
&mut self,
467502
disk_id: &PhysicalDiskUuid,
468-
) -> Result<(), SledEditError> {
469-
let zpool_id = self.disks.expunge(disk_id)?;
503+
) -> Result<DiskExpungeDetails, SledEditError> {
504+
let (did_expunge_disk, zpool_id) = self.disks.expunge(disk_id)?;
470505

471506
// When we expunge a disk, we must also expunge any datasets on it, and
472507
// any zones that relied on those datasets.
473-
self.datasets.expunge_all_on_zpool(&zpool_id);
474-
self.zones.expunge_all_on_zpool(&zpool_id);
508+
let num_datasets_expunged =
509+
self.datasets.expunge_all_on_zpool(&zpool_id);
510+
let num_zones_expunged = self.zones.expunge_all_on_zpool(&zpool_id);
511+
512+
if !did_expunge_disk {
513+
// If we didn't expunge the disk, it was already expunged, so there
514+
// shouldn't have been any datasets or zones to expunge.
515+
debug_assert_eq!(num_datasets_expunged, 0);
516+
debug_assert_eq!(num_zones_expunged, 0);
517+
}
475518

476-
Ok(())
519+
Ok(DiskExpungeDetails {
520+
disk_id: *disk_id,
521+
did_expunge_disk,
522+
num_datasets_expunged,
523+
num_zones_expunged,
524+
})
477525
}
478526

479527
pub fn add_zone(
@@ -499,7 +547,7 @@ impl ActiveSledEditor {
499547
pub fn expunge_zone(
500548
&mut self,
501549
zone_id: &OmicronZoneUuid,
502-
) -> Result<(), SledEditError> {
550+
) -> Result<bool, SledEditError> {
503551
let (did_expunge, config) = self.zones.expunge(zone_id)?;
504552

505553
// If we didn't actually expunge the zone in this edit, we don't
@@ -513,7 +561,7 @@ impl ActiveSledEditor {
513561
// explicitly instead of only recording its zpool; once we fix that we
514562
// should be able to remove this check.
515563
if !did_expunge {
516-
return Ok(());
564+
return Ok(did_expunge);
517565
}
518566

519567
if let Some(dataset) = config.filesystem_dataset() {
@@ -524,7 +572,7 @@ impl ActiveSledEditor {
524572
.expunge(&dataset.dataset.pool_name.id(), &dataset.kind)?;
525573
}
526574

527-
Ok(())
575+
Ok(did_expunge)
528576
}
529577

530578
/// Backwards compatibility / test helper: If we're given a blueprint that

nexus/reconfigurator/planning/src/blueprint_editor/sled_editor/datasets.rs

Lines changed: 13 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ use illumos_utils::zpool::ZpoolName;
88
use nexus_types::deployment::id_map::{self, IdMap};
99
use nexus_types::deployment::BlueprintDatasetConfig;
1010
use nexus_types::deployment::BlueprintDatasetDisposition;
11+
use nexus_types::deployment::BlueprintDatasetFilter;
1112
use nexus_types::deployment::BlueprintDatasetsConfig;
1213
use omicron_common::api::external::ByteCount;
1314
use omicron_common::api::external::Generation;
@@ -19,12 +20,8 @@ use omicron_uuid_kinds::DatasetUuid;
1920
use omicron_uuid_kinds::ZpoolUuid;
2021
use std::collections::btree_map::Entry;
2122
use std::collections::BTreeMap;
22-
use std::collections::BTreeSet;
2323
use std::net::SocketAddrV6;
2424

25-
#[cfg(test)]
26-
use nexus_types::deployment::BlueprintDatasetFilter;
27-
2825
#[derive(Debug, thiserror::Error)]
2926
#[error(
3027
"invalid blueprint input: multiple datasets with kind {kind:?} \
@@ -125,11 +122,6 @@ pub(super) struct DatasetsEditor {
125122
// Cache of _in service only_ datasets, identified by (zpool, kind).
126123
in_service_by_zpool_and_kind:
127124
BTreeMap<ZpoolUuid, BTreeMap<DatasetKind, DatasetUuid>>,
128-
// Cache of _expunged_ dataset IDs. This serves as a list of IDs from
129-
// `preexisting_dataset_ids` to ignore, as we shouldn't reuse old IDs if
130-
// they belong to expunged datasets. We should be able to remove this when
131-
// we remove `preexisting_dataset_ids`.
132-
expunged_datasets: BTreeSet<DatasetUuid>,
133125
counts: EditCounts,
134126
}
135127

@@ -138,7 +130,6 @@ impl DatasetsEditor {
138130
config: BlueprintDatasetsConfig,
139131
) -> Result<Self, MultipleDatasetsOfKind> {
140132
let mut in_service_by_zpool_and_kind = BTreeMap::new();
141-
let mut expunged_datasets = BTreeSet::new();
142133
for dataset in config.datasets.iter() {
143134
match dataset.disposition {
144135
BlueprintDatasetDisposition::InService => {
@@ -160,15 +151,12 @@ impl DatasetsEditor {
160151
}
161152
}
162153
}
163-
BlueprintDatasetDisposition::Expunged => {
164-
expunged_datasets.insert(dataset.id);
165-
}
154+
BlueprintDatasetDisposition::Expunged => (),
166155
}
167156
}
168157
Ok(Self {
169158
config,
170159
in_service_by_zpool_and_kind,
171-
expunged_datasets,
172160
counts: EditCounts::zeroes(),
173161
})
174162
}
@@ -180,7 +168,6 @@ impl DatasetsEditor {
180168
datasets: IdMap::new(),
181169
},
182170
in_service_by_zpool_and_kind: BTreeMap::new(),
183-
expunged_datasets: BTreeSet::new(),
184171
counts: EditCounts::zeroes(),
185172
}
186173
}
@@ -197,7 +184,6 @@ impl DatasetsEditor {
197184
self.counts
198185
}
199186

200-
#[cfg(test)]
201187
pub fn datasets(
202188
&self,
203189
filter: BlueprintDatasetFilter,
@@ -210,7 +196,7 @@ impl DatasetsEditor {
210196

211197
// Private method; panics if given an ID that isn't present in
212198
// `self.config.datasets`. Callers must ensure the ID is valid.
213-
fn expunge_by_known_valid_id(&mut self, id: DatasetUuid) {
199+
fn expunge_by_known_valid_id(&mut self, id: DatasetUuid) -> bool {
214200
let mut dataset = self
215201
.config
216202
.datasets
@@ -220,12 +206,13 @@ impl DatasetsEditor {
220206
BlueprintDatasetDisposition::InService => {
221207
dataset.disposition = BlueprintDatasetDisposition::Expunged;
222208
self.counts.expunged += 1;
209+
true
223210
}
224211
BlueprintDatasetDisposition::Expunged => {
225212
// already expunged; nothing to do
213+
false
226214
}
227215
}
228-
self.expunged_datasets.insert(dataset.id);
229216
}
230217

231218
/// Expunge a dataset identified by its zpool + kind combo.
@@ -252,15 +239,20 @@ impl DatasetsEditor {
252239
Ok(())
253240
}
254241

255-
pub fn expunge_all_on_zpool(&mut self, zpool: &ZpoolUuid) {
242+
pub fn expunge_all_on_zpool(&mut self, zpool: &ZpoolUuid) -> usize {
256243
let Some(by_kind) = self.in_service_by_zpool_and_kind.remove(zpool)
257244
else {
258-
return;
245+
return 0;
259246
};
260247

248+
let mut nexpunged = 0;
261249
for id in by_kind.into_values() {
262-
self.expunge_by_known_valid_id(id);
250+
if self.expunge_by_known_valid_id(id) {
251+
nexpunged += 1;
252+
}
263253
}
254+
255+
nexpunged
264256
}
265257

266258
pub fn ensure_in_service(
@@ -340,7 +332,6 @@ impl DatasetsEditor {
340332
mod tests {
341333
use super::*;
342334
use crate::planner::PlannerRng;
343-
use nexus_types::deployment::BlueprintDatasetFilter;
344335
use omicron_uuid_kinds::GenericUuid;
345336
use omicron_uuid_kinds::SledUuid;
346337
use proptest::prelude::*;

nexus/reconfigurator/planning/src/blueprint_editor/sled_editor/disks.rs

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -96,22 +96,25 @@ impl DisksEditor {
9696
pub fn expunge(
9797
&mut self,
9898
disk_id: &PhysicalDiskUuid,
99-
) -> Result<ZpoolUuid, DisksEditError> {
99+
) -> Result<(bool, ZpoolUuid), DisksEditError> {
100100
let config = self.disks.get_mut(disk_id).ok_or_else(|| {
101101
DisksEditError::ExpungeNonexistentDisk { id: *disk_id }
102102
})?;
103103

104+
let did_expunge: bool;
104105
match config.disposition {
105106
BlueprintPhysicalDiskDisposition::InService => {
106107
config.disposition = BlueprintPhysicalDiskDisposition::Expunged;
107108
self.counts.expunged += 1;
109+
did_expunge = true;
108110
}
109111
BlueprintPhysicalDiskDisposition::Expunged => {
110112
// expunge is idempotent; do nothing
113+
did_expunge = false;
111114
}
112115
}
113116

114-
Ok(config.pool_id)
117+
Ok((did_expunge, config.pool_id))
115118
}
116119
}
117120

nexus/reconfigurator/planning/src/blueprint_editor/sled_editor/zones.rs

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -155,7 +155,8 @@ impl ZonesEditor {
155155
}
156156
}
157157

158-
pub fn expunge_all_on_zpool(&mut self, zpool: &ZpoolUuid) {
158+
pub fn expunge_all_on_zpool(&mut self, zpool: &ZpoolUuid) -> usize {
159+
let mut nexpunged = 0;
159160
for mut config in self.zones.iter_mut() {
160161
// Expunge this zone if its filesystem or durable dataset are on
161162
// this zpool. (If it has both, they should be on the _same_ zpool,
@@ -170,9 +171,12 @@ impl ZonesEditor {
170171
.durable_zpool()
171172
.map_or(false, |pool| pool.id() == *zpool);
172173
if fs_is_on_zpool || dd_is_on_zpool {
173-
Self::expunge_impl(&mut config, &mut self.counts);
174+
if Self::expunge_impl(&mut config, &mut self.counts) {
175+
nexpunged += 1;
176+
}
174177
}
175178
}
179+
nexpunged
176180
}
177181
}
178182

0 commit comments

Comments
 (0)