incorporate SP updates into planner (#8269)

davepacheco · web-flow · commit d333576c31f8 · 2025-06-13T19:27:21.000Z
diff --git a/dev-tools/omdb/tests/usage_errors.out b/dev-tools/omdb/tests/usage_errors.out
@@ -508,19 +508,22 @@ Options:
           Show sleds that match the given filter
 
           Possible values:
-          - all:                      All sleds in the system, regardless of policy or state
-          - commissioned:             All sleds that are currently part of the control plane cluster
-          - decommissioned:           All sleds that were previously part of the control plane
+          - all:                           All sleds in the system, regardless of policy or state
+          - commissioned:                  All sleds that are currently part of the control plane
+            cluster
+          - decommissioned:                All sleds that were previously part of the control plane
             cluster but have been decommissioned
-          - discretionary:            Sleds that are eligible for discretionary services
-          - in-service:               Sleds that are in service (even if they might not be eligible
-            for discretionary services)
-          - query-during-inventory:   Sleds whose sled agents should be queried for inventory
-          - reservation-create:       Sleds on which reservations can be created
-          - vpc-routing:              Sleds which should be sent OPTE V2P mappings and Routing rules
-          - vpc-firewall:             Sleds which should be sent VPC firewall rules
-          - tuf-artifact-replication: Sleds which should have TUF repo artifacts replicated onto
-            them
+          - discretionary:                 Sleds that are eligible for discretionary services
+          - in-service:                    Sleds that are in service (even if they might not be
+            eligible for discretionary services)
+          - query-during-inventory:        Sleds whose sled agents should be queried for inventory
+          - reservation-create:            Sleds on which reservations can be created
+          - vpc-routing:                   Sleds which should be sent OPTE V2P mappings and Routing
+            rules
+          - vpc-firewall:                  Sleds which should be sent VPC firewall rules
+          - tuf-artifact-replication:      Sleds which should have TUF repo artifacts replicated
+            onto them
+          - sps-updated-by-reconfigurator: Sleds whose SPs should be updated by Reconfigurator
 
       --log-level <LOG_LEVEL>
           log level filter
diff --git a/dev-tools/reconfigurator-cli/tests/output/cmds-example-stdout b/dev-tools/reconfigurator-cli/tests/output/cmds-example-stdout
@@ -497,6 +497,7 @@ WARN failed to place all new desired InternalDns zones, placed: 0, wanted_to_pla
 INFO sufficient ExternalDns zones exist in plan, desired_count: 0, current_count: 0
 WARN failed to place all new desired Nexus zones, placed: 0, wanted_to_place: 3
 INFO sufficient Oximeter zones exist in plan, desired_count: 0, current_count: 0
+WARN cannot issue more SP updates (no current artifacts)
 INFO some zones not yet up-to-date, sled_id: 89d02b1b-478c-401a-8e28-7a26f74fa41b
 INFO will ensure cockroachdb setting, setting: cluster.preserve_downgrade_option, value: DoNotModify
 generated blueprint 86db3308-f817-4626-8838-4085949a6a41 based on parent blueprint ade5749d-bdf3-4fab-a8ae-00bea01b3a5a
diff --git a/dev-tools/reconfigurator-cli/tests/output/cmds-expunge-newly-added-external-dns-stdout b/dev-tools/reconfigurator-cli/tests/output/cmds-expunge-newly-added-external-dns-stdout
@@ -974,6 +974,7 @@ INFO sufficient InternalDns zones exist in plan, desired_count: 3, current_count
 INFO added zone to sled, sled_id: a88790de-5962-4871-8686-61c1fd5b7094, kind: ExternalDns
 INFO sufficient Nexus zones exist in plan, desired_count: 3, current_count: 3
 INFO sufficient Oximeter zones exist in plan, desired_count: 0, current_count: 0
+WARN cannot issue more SP updates (no current artifacts)
 INFO some zones not yet up-to-date, sled_id: a88790de-5962-4871-8686-61c1fd5b7094
 INFO will ensure cockroachdb setting, setting: cluster.preserve_downgrade_option, value: DoNotModify
 generated blueprint 9c998c1d-1a7b-440a-ae0c-40f781dea6e2 based on parent blueprint 366b0b68-d80e-4bc1-abd3-dc69837847e0
diff --git a/dev-tools/reconfigurator-cli/tests/output/cmds-expunge-newly-added-internal-dns-stdout b/dev-tools/reconfigurator-cli/tests/output/cmds-expunge-newly-added-internal-dns-stdout
@@ -1004,6 +1004,7 @@ INFO added zone to sled, sled_id: d81c6a84-79b8-4958-ae41-ea46c9b19763, kind: In
 INFO sufficient ExternalDns zones exist in plan, desired_count: 3, current_count: 3
 INFO sufficient Nexus zones exist in plan, desired_count: 3, current_count: 3
 INFO sufficient Oximeter zones exist in plan, desired_count: 0, current_count: 0
+WARN cannot issue more SP updates (no current artifacts)
 INFO some zones not yet up-to-date, sled_id: d81c6a84-79b8-4958-ae41-ea46c9b19763
 INFO will ensure cockroachdb setting, setting: cluster.preserve_downgrade_option, value: DoNotModify
 generated blueprint af934083-59b5-4bf6-8966-6fb5292c29e1 based on parent blueprint 58d5e830-0884-47d8-a7cd-b2b3751adeb4
diff --git a/nexus/db-queries/src/db/datastore/deployment.rs b/nexus/db-queries/src/db/datastore/deployment.rs
@@ -1884,6 +1884,7 @@ mod tests {
     use nexus_types::external_api::views::PhysicalDiskState;
     use nexus_types::external_api::views::SledPolicy;
     use nexus_types::external_api::views::SledState;
+    use nexus_types::inventory::BaseboardId;
     use nexus_types::inventory::Collection;
     use omicron_common::address::IpRange;
     use omicron_common::address::Ipv6Subnet;
@@ -2002,6 +2003,10 @@ mod tests {
             policy: SledPolicy::provisionable(),
             state: SledState::Active,
             resources,
+            baseboard_id: BaseboardId {
+                part_number: String::from("unused"),
+                serial_number: String::from("unused"),
+            },
         }
     }
 
diff --git a/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs b/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs
@@ -1960,6 +1960,13 @@ impl<'a> BlueprintBuilder<'a> {
         Ok(self.resource_allocator()?.inject_untracked_external_dns_ip(addr)?)
     }
 
+    pub fn pending_mgs_updates_replace_all(
+        &mut self,
+        updates: nexus_types::deployment::PendingMgsUpdates,
+    ) {
+        self.pending_mgs_updates = updates;
+    }
+
     pub fn pending_mgs_update_insert(
         &mut self,
         update: nexus_types::deployment::PendingMgsUpdate,
diff --git a/nexus/reconfigurator/planning/src/planner.rs b/nexus/reconfigurator/planning/src/planner.rs
@@ -13,7 +13,9 @@ use crate::blueprint_builder::Error;
 use crate::blueprint_builder::Operation;
 use crate::blueprint_editor::DisksEditError;
 use crate::blueprint_editor::SledEditError;
+use crate::mgs_updates::plan_mgs_updates;
 use crate::planner::omicron_zone_placement::PlacementError;
+use gateway_client::types::SpType;
 use nexus_sled_agent_shared::inventory::OmicronZoneType;
 use nexus_sled_agent_shared::inventory::ZoneKind;
 use nexus_types::deployment::Blueprint;
@@ -50,6 +52,37 @@ pub use self::rng::SledPlannerRng;
 mod omicron_zone_placement;
 pub(crate) mod rng;
 
+/// Maximum number of MGS-managed updates (updates to SP, RoT, RoT bootloader,
+/// or host OS) that we allow to be pending across the whole system at one time
+///
+/// For now, we limit this to 1 for safety.  That's for a few reasons:
+///
+/// - SP updates reboot the corresponding host.  Thus, if we have one of these
+///   updates outstanding, we should assume that host may be offline.  Most
+///   control plane services are designed to survive multiple failures (e.g.,
+///   the Cockroach cluster can sustain two failures and stay online), but
+///   having one sled offline eats into that margin.  And some services like
+///   Crucible volumes can only sustain one failure.  Taking down two sleds
+///   would render unavailable any Crucible volumes with regions on those two
+///   sleds.
+///
+/// - There is unfortunately some risk in updating the RoT bootloader, in that
+///   there's a window where a failure could render the device unbootable.  See
+///   oxidecomputer/omicron#7819 for more on this.  Updating only one at a time
+///   helps mitigate this risk.
+///
+/// More sophisticated schemes are certainly possible (e.g., allocate Crucible
+/// regions in such a way that there are at least pairs of sleds we could update
+/// concurrently without taking volumes down; and/or be willing to update
+/// multiple sleds as long as they don't have overlapping control plane
+/// services, etc.).
+const NUM_CONCURRENT_MGS_UPDATES: usize = 1;
+
+enum UpdateStepResult {
+    ContinueToNextStep,
+    Waiting,
+}
+
 pub struct Planner<'a> {
     log: Logger,
     input: &'a PlanningInput,
@@ -115,7 +148,10 @@ impl<'a> Planner<'a> {
         self.do_plan_expunge()?;
         self.do_plan_add()?;
         self.do_plan_decommission()?;
-        self.do_plan_zone_updates()?;
+        if let UpdateStepResult::ContinueToNextStep = self.do_plan_mgs_updates()
+        {
+            self.do_plan_zone_updates()?;
+        }
         self.do_plan_cockroachdb_settings();
         Ok(())
     }
@@ -901,6 +937,63 @@ impl<'a> Planner<'a> {
         Ok(())
     }
 
+    /// Update at most one MGS-managed device (SP, RoT, etc.), if any are out of
+    /// date.
+    fn do_plan_mgs_updates(&mut self) -> UpdateStepResult {
+        // Determine which baseboards we will consider updating.
+        //
+        // Sleds may be present but not adopted as part of the control plane.
+        // In deployed systems, this would probably only happen if a sled was
+        // about to be added.  In dev/test environments, it's common to leave
+        // some number of sleds out of the control plane for various reasons.
+        // Inventory will still report them, but we don't want to touch them.
+        //
+        // For better or worse, switches and PSCs do not have the same idea of
+        // being adopted into the control plane.  If they're present, they're
+        // part of the system, and we will update them.
+        let included_sled_baseboards: BTreeSet<_> = self
+            .input
+            .all_sleds(SledFilter::SpsUpdatedByReconfigurator)
+            .map(|(_sled_id, details)| &details.baseboard_id)
+            .collect();
+        let included_baseboards =
+            self.inventory
+                .sps
+                .iter()
+                .filter_map(|(baseboard_id, sp_state)| {
+                    let do_include = match sp_state.sp_type {
+                        SpType::Sled => included_sled_baseboards
+                            .contains(baseboard_id.as_ref()),
+                        SpType::Power => true,
+                        SpType::Switch => true,
+                    };
+                    do_include.then_some(baseboard_id.clone())
+                })
+                .collect();
+
+        // Compute the new set of PendingMgsUpdates.
+        let current_updates =
+            &self.blueprint.parent_blueprint().pending_mgs_updates;
+        let current_artifacts = self.input.tuf_repo();
+        let next = plan_mgs_updates(
+            &self.log,
+            &self.inventory,
+            &included_baseboards,
+            &current_updates,
+            current_artifacts,
+            NUM_CONCURRENT_MGS_UPDATES,
+        );
+
+        // TODO This is not quite right.  See oxidecomputer/omicron#8285.
+        let rv = if next.is_empty() {
+            UpdateStepResult::ContinueToNextStep
+        } else {
+            UpdateStepResult::Waiting
+        };
+        self.blueprint.pending_mgs_updates_replace_all(next);
+        rv
+    }
+
     /// Update at most one existing zone to use a new image source.
     fn do_plan_zone_updates(&mut self) -> Result<(), Error> {
         // We are only interested in non-decommissioned sleds.
diff --git a/nexus/reconfigurator/planning/src/system.rs b/nexus/reconfigurator/planning/src/system.rs
@@ -491,6 +491,18 @@ impl SystemDescription {
                 policy: sled.policy,
                 state: sled.state,
                 resources: sled.resources.clone(),
+                baseboard_id: BaseboardId {
+                    part_number: sled
+                        .inventory_sled_agent
+                        .baseboard
+                        .model()
+                        .to_owned(),
+                    serial_number: sled
+                        .inventory_sled_agent
+                        .baseboard
+                        .identifier()
+                        .to_owned(),
+                },
             };
             builder.add_sled(sled.sled_id, sled_details)?;
         }
diff --git a/nexus/reconfigurator/preparation/src/lib.rs b/nexus/reconfigurator/preparation/src/lib.rs
@@ -33,6 +33,7 @@ use nexus_types::deployment::SledResources;
 use nexus_types::deployment::UnstableReconfiguratorState;
 use nexus_types::identity::Asset;
 use nexus_types::identity::Resource;
+use nexus_types::inventory::BaseboardId;
 use nexus_types::inventory::Collection;
 use omicron_common::address::IpRange;
 use omicron_common::address::Ipv6Subnet;
@@ -274,6 +275,10 @@ impl PlanningInputFromDb<'_> {
                 policy: sled_row.policy(),
                 state: sled_row.state().into(),
                 resources: SledResources { subnet, zpools },
+                baseboard_id: BaseboardId {
+                    part_number: sled_row.part_number().to_owned(),
+                    serial_number: sled_row.serial_number().to_owned(),
+                },
             };
             // TODO-cleanup use `TypedUuid` everywhere
             let sled_id = SledUuid::from_untyped_uuid(sled_id);
diff --git a/nexus/types/src/deployment/planning_input.rs b/nexus/types/src/deployment/planning_input.rs
@@ -14,6 +14,7 @@ use crate::external_api::views::PhysicalDiskState;
 use crate::external_api::views::SledPolicy;
 use crate::external_api::views::SledProvisionPolicy;
 use crate::external_api::views::SledState;
+use crate::inventory::BaseboardId;
 use chrono::DateTime;
 use chrono::Utc;
 use clap::ValueEnum;
@@ -724,6 +725,9 @@ pub enum SledFilter {
 
     /// Sleds which should have TUF repo artifacts replicated onto them.
     TufArtifactReplication,
+
+    /// Sleds whose SPs should be updated by Reconfigurator
+    SpsUpdatedByReconfigurator,
 }
 
 impl SledFilter {
@@ -780,6 +784,7 @@ impl SledPolicy {
                 SledFilter::VpcRouting => true,
                 SledFilter::VpcFirewall => true,
                 SledFilter::TufArtifactReplication => true,
+                SledFilter::SpsUpdatedByReconfigurator => true,
             },
             SledPolicy::InService {
                 provision_policy: SledProvisionPolicy::NonProvisionable,
@@ -794,6 +799,7 @@ impl SledPolicy {
                 SledFilter::VpcRouting => true,
                 SledFilter::VpcFirewall => true,
                 SledFilter::TufArtifactReplication => true,
+                SledFilter::SpsUpdatedByReconfigurator => true,
             },
             SledPolicy::Expunged => match filter {
                 SledFilter::All => true,
@@ -806,6 +812,7 @@ impl SledPolicy {
                 SledFilter::VpcRouting => false,
                 SledFilter::VpcFirewall => false,
                 SledFilter::TufArtifactReplication => false,
+                SledFilter::SpsUpdatedByReconfigurator => false,
             },
         }
     }
@@ -840,6 +847,7 @@ impl SledState {
                 SledFilter::VpcRouting => true,
                 SledFilter::VpcFirewall => true,
                 SledFilter::TufArtifactReplication => true,
+                SledFilter::SpsUpdatedByReconfigurator => true,
             },
             SledState::Decommissioned => match filter {
                 SledFilter::All => true,
@@ -852,6 +860,7 @@ impl SledState {
                 SledFilter::VpcRouting => false,
                 SledFilter::VpcFirewall => false,
                 SledFilter::TufArtifactReplication => false,
+                SledFilter::SpsUpdatedByReconfigurator => false,
             },
         }
     }
@@ -1058,6 +1067,8 @@ pub struct SledDetails {
     pub state: SledState,
     /// current resources allocated to this sled
     pub resources: SledResources,
+    /// baseboard id for this sled
+    pub baseboard_id: BaseboardId,
 }
 
 #[derive(Debug, thiserror::Error)]