@@ -13,7 +13,9 @@ use crate::blueprint_builder::Error;
1313use crate :: blueprint_builder:: Operation ;
1414use crate :: blueprint_editor:: DisksEditError ;
1515use crate :: blueprint_editor:: SledEditError ;
16+ use crate :: mgs_updates:: plan_mgs_updates;
1617use crate :: planner:: omicron_zone_placement:: PlacementError ;
18+ use gateway_client:: types:: SpType ;
1719use nexus_sled_agent_shared:: inventory:: OmicronZoneType ;
1820use nexus_sled_agent_shared:: inventory:: ZoneKind ;
1921use nexus_types:: deployment:: Blueprint ;
@@ -50,6 +52,37 @@ pub use self::rng::SledPlannerRng;
5052mod omicron_zone_placement;
5153pub ( crate ) mod rng;
5254
55+ /// Maximum number of MGS-managed updates (updates to SP, RoT, RoT bootloader,
56+ /// or host OS) that we allow to be pending across the whole system at one time
57+ ///
58+ /// For now, we limit this to 1 for safety. That's for a few reasons:
59+ ///
60+ /// - SP updates reboot the corresponding host. Thus, if we have one of these
61+ /// updates outstanding, we should assume that host may be offline. Most
62+ /// control plane services are designed to survive multiple failures (e.g.,
63+ /// the Cockroach cluster can sustain two failures and stay online), but
64+ /// having one sled offline eats into that margin. And some services like
65+ /// Crucible volumes can only sustain one failure. Taking down two sleds
66+ /// would render unavailable any Crucible volumes with regions on those two
67+ /// sleds.
68+ ///
69+ /// - There is unfortunately some risk in updating the RoT bootloader, in that
70+ /// there's a window where a failure could render the device unbootable. See
71+ /// oxidecomputer/omicron#7819 for more on this. Updating only one at a time
72+ /// helps mitigate this risk.
73+ ///
74+ /// More sophisticated schemes are certainly possible (e.g., allocate Crucible
75+ /// regions in such a way that there are at least pairs of sleds we could update
76+ /// concurrently without taking volumes down; and/or be willing to update
77+ /// multiple sleds as long as they don't have overlapping control plane
78+ /// services, etc.).
79+ const NUM_CONCURRENT_MGS_UPDATES : usize = 1 ;
80+
81+ enum UpdateStepResult {
82+ ContinueToNextStep ,
83+ Waiting ,
84+ }
85+
5386pub struct Planner < ' a > {
5487 log : Logger ,
5588 input : & ' a PlanningInput ,
@@ -115,7 +148,10 @@ impl<'a> Planner<'a> {
115148 self . do_plan_expunge ( ) ?;
116149 self . do_plan_add ( ) ?;
117150 self . do_plan_decommission ( ) ?;
118- self . do_plan_zone_updates ( ) ?;
151+ if let UpdateStepResult :: ContinueToNextStep = self . do_plan_mgs_updates ( )
152+ {
153+ self . do_plan_zone_updates ( ) ?;
154+ }
119155 self . do_plan_cockroachdb_settings ( ) ;
120156 Ok ( ( ) )
121157 }
@@ -901,6 +937,63 @@ impl<'a> Planner<'a> {
901937 Ok ( ( ) )
902938 }
903939
940+ /// Update at most one MGS-managed device (SP, RoT, etc.), if any are out of
941+ /// date.
942+ fn do_plan_mgs_updates ( & mut self ) -> UpdateStepResult {
943+ // Determine which baseboards we will consider updating.
944+ //
945+ // Sleds may be present but not adopted as part of the control plane.
946+ // In deployed systems, this would probably only happen if a sled was
947+ // about to be added. In dev/test environments, it's common to leave
948+ // some number of sleds out of the control plane for various reasons.
949+ // Inventory will still report them, but we don't want to touch them.
950+ //
951+ // For better or worse, switches and PSCs do not have the same idea of
952+ // being adopted into the control plane. If they're present, they're
953+ // part of the system, and we will update them.
954+ let included_sled_baseboards: BTreeSet < _ > = self
955+ . input
956+ . all_sleds ( SledFilter :: SpsUpdatedByReconfigurator )
957+ . map ( |( _sled_id, details) | & details. baseboard_id )
958+ . collect ( ) ;
959+ let included_baseboards =
960+ self . inventory
961+ . sps
962+ . iter ( )
963+ . filter_map ( |( baseboard_id, sp_state) | {
964+ let do_include = match sp_state. sp_type {
965+ SpType :: Sled => included_sled_baseboards
966+ . contains ( baseboard_id. as_ref ( ) ) ,
967+ SpType :: Power => true ,
968+ SpType :: Switch => true ,
969+ } ;
970+ do_include. then_some ( baseboard_id. clone ( ) )
971+ } )
972+ . collect ( ) ;
973+
974+ // Compute the new set of PendingMgsUpdates.
975+ let current_updates =
976+ & self . blueprint . parent_blueprint ( ) . pending_mgs_updates ;
977+ let current_artifacts = self . input . tuf_repo ( ) ;
978+ let next = plan_mgs_updates (
979+ & self . log ,
980+ & self . inventory ,
981+ & included_baseboards,
982+ & current_updates,
983+ current_artifacts,
984+ NUM_CONCURRENT_MGS_UPDATES ,
985+ ) ;
986+
987+ // TODO This is not quite right. See oxidecomputer/omicron#8285.
988+ let rv = if next. is_empty ( ) {
989+ UpdateStepResult :: ContinueToNextStep
990+ } else {
991+ UpdateStepResult :: Waiting
992+ } ;
993+ self . blueprint . pending_mgs_updates_replace_all ( next) ;
994+ rv
995+ }
996+
904997 /// Update at most one existing zone to use a new image source.
905998 fn do_plan_zone_updates ( & mut self ) -> Result < ( ) , Error > {
906999 // We are only interested in non-decommissioned sleds.
0 commit comments