diff --git a/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs b/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs index df486ac5bf..bb20b38df7 100644 --- a/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs +++ b/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs @@ -343,13 +343,28 @@ impl<'a> BlueprintBuilder<'a> { // Prefer the sled state from our parent blueprint for sleds // that were in it; there may be new sleds in `input`, in which // case we'll use their current state as our starting point. - // - // TODO-correctness When can we drop decommissioned sleds from this map? let mut sled_state = parent_blueprint.sled_state.clone(); + let mut commissioned_sled_ids = BTreeSet::new(); for (sled_id, details) in input.all_sleds(SledFilter::Commissioned) { + commissioned_sled_ids.insert(sled_id); sled_state.entry(sled_id).or_insert(details.state); } + // Make a garbage collection pass through `sled_state`. We want to keep + // any sleds which either: + // + // 1. do not have a desired state of `Decommissioned` + // 2. do have a desired state of `Decommissioned` and are still included + // in our input's list of commissioned sleds + // + // Sleds that don't fall into either of these cases have reached the + // actual `Decommissioned` state, which means we no longer need to carry + // forward that desired state. + sled_state.retain(|sled_id, state| { + *state != SledState::Decommissioned + || commissioned_sled_ids.contains(sled_id) + }); + Ok(BlueprintBuilder { log, parent_blueprint, @@ -1197,6 +1212,7 @@ pub mod test { use crate::system::SledBuilder; use expectorate::assert_contents; use nexus_types::deployment::BlueprintZoneFilter; + use nexus_types::external_api::views::SledPolicy; use omicron_common::address::IpRange; use omicron_test_utils::dev::test_setup_log; use std::collections::BTreeSet; @@ -1403,6 +1419,83 @@ pub mod test { logctx.cleanup_successful(); } + #[test] + fn test_prune_decommissioned_sleds() { + static TEST_NAME: &str = + "blueprint_builder_test_prune_decommissioned_sleds"; + let logctx = test_setup_log(TEST_NAME); + let (_, input, mut blueprint1) = + example(&logctx.log, TEST_NAME, DEFAULT_N_SLEDS); + verify_blueprint(&blueprint1); + + // Mark one sled as having a desired state of decommissioned. + let decommision_sled_id = blueprint1 + .sled_state + .keys() + .copied() + .next() + .expect("at least one sled"); + *blueprint1.sled_state.get_mut(&decommision_sled_id).unwrap() = + SledState::Decommissioned; + + // Change the input to note that the sled is expunged, but still active. + let mut builder = input.into_builder(); + builder.sleds_mut().get_mut(&decommision_sled_id).unwrap().policy = + SledPolicy::Expunged; + builder.sleds_mut().get_mut(&decommision_sled_id).unwrap().state = + SledState::Active; + let input = builder.build(); + + // Generate a new blueprint. This sled should still be included: even + // though the desired state is decommissioned, the current state is + // still active, so we should carry it forward. + let blueprint2 = BlueprintBuilder::new_based_on( + &logctx.log, + &blueprint1, + &input, + "test_prune_decommissioned_sleds", + ) + .expect("created builder") + .build(); + verify_blueprint(&blueprint2); + + // We carried forward the desired state. + assert_eq!( + blueprint2.sled_state.get(&decommision_sled_id).copied(), + Some(SledState::Decommissioned) + ); + + // Change the input to mark the sled decommissioned. (Normally realizing + // blueprint2 would make this change.) + let mut builder = input.into_builder(); + builder.sleds_mut().get_mut(&decommision_sled_id).unwrap().state = + SledState::Decommissioned; + let input = builder.build(); + + // Generate a new blueprint. This desired sled state should no longer be + // present: it has reached the terminal decommissioned state, so there's + // no more work to be done. + let blueprint3 = BlueprintBuilder::new_based_on( + &logctx.log, + &blueprint2, + &input, + "test_prune_decommissioned_sleds", + ) + .expect("created builder") + .build(); + verify_blueprint(&blueprint3); + + // Ensure we've dropped the decommissioned sled. (We may still have + // _zones_ for it that need cleanup work, but all state transitions for + // it are complete.) + assert_eq!( + blueprint3.sled_state.get(&decommision_sled_id).copied(), + None, + ); + + logctx.cleanup_successful(); + } + #[test] fn test_add_physical_disks() { static TEST_NAME: &str = "blueprint_builder_test_add_physical_disks";