From a0cc2ff07c746f94000916234720972dd8fea9b2 Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Mon, 13 May 2024 17:24:49 -0400 Subject: [PATCH] [reconfigurator] Decommission sleds (#5698) This builds on (and is currently pointed at) #5663, and is on the path to fixing #5625, although there is still more work to do there. (Should be small, and I'll start on it while this is in review to make sure it really fixes it.) I don't plan on landing this before R8 is out the door, but wanted to go ahead and open it up to review. Much of this is fallout from our discussions about what it means to decommission a sled. Major changes are: * `SledFilter::All` has been renamed to `SledFilter::Commissioned`, and no longer returns sleds that are in `SledState::Decommissioned`. * The blueprint planner will now update the desired sled state to `Decommissioned` for sleds which satisfy our conditions. (See `do_plan_decommission()`.) * The blueprint planner will carry forward the Omicron zones of decommissioned sleds to child blueprints. Pruning these is #5552. * The blueprint planner will _not_ carry forward a desired sled state of `Decommissioned` once the inputs report that the sled has already been decommissioned. * The blueprint executor will decommission sleds that the planner said to. * Decommissioning a sled implicitly decommissions all its disks. (This matches what happens with sled expungement, and should not interfere with region replacement, which keys off of policy, not state.) --- dev-tools/omdb/src/bin/omdb/db.rs | 7 +- dev-tools/omdb/tests/env.out | 6 +- dev-tools/omdb/tests/successes.out | 2 +- dev-tools/omdb/tests/test_all_output.rs | 2 +- dev-tools/omdb/tests/usage_errors.out | 2 +- dev-tools/reconfigurator-cli/src/main.rs | 8 +- .../db-queries/src/db/datastore/deployment.rs | 2 +- nexus/db-queries/src/db/datastore/mod.rs | 2 + nexus/db-queries/src/db/datastore/sled.rs | 216 ++++++++++++----- nexus/reconfigurator/execution/src/dns.rs | 2 +- nexus/reconfigurator/execution/src/lib.rs | 15 +- .../execution/src/sled_state.rs | 146 +++++++++++ .../planning/src/blueprint_builder/builder.rs | 229 ++++++++++++++---- .../planning/src/blueprint_builder/zones.rs | 4 +- nexus/reconfigurator/planning/src/example.rs | 4 +- nexus/reconfigurator/planning/src/planner.rs | 211 +++++++++++++++- .../planner_decommissions_sleds_1_2.txt | 81 +++++++ .../planner_decommissions_sleds_bp2.txt | 56 +++++ .../output/planner_nonprovisionable_1_2.txt | 52 ++-- .../output/planner_nonprovisionable_2_2a.txt | 2 +- .../output/planner_nonprovisionable_bp2.txt | 12 +- nexus/reconfigurator/preparation/src/lib.rs | 2 +- nexus/src/app/deployment.rs | 5 +- nexus/types/src/deployment.rs | 15 +- nexus/types/src/deployment/planning_input.rs | 17 +- openapi/nexus-internal.json | 4 +- 26 files changed, 896 insertions(+), 208 deletions(-) create mode 100644 nexus/reconfigurator/execution/src/sled_state.rs create mode 100644 nexus/reconfigurator/planning/tests/output/planner_decommissions_sleds_1_2.txt create mode 100644 nexus/reconfigurator/planning/tests/output/planner_decommissions_sleds_bp2.txt diff --git a/dev-tools/omdb/src/bin/omdb/db.rs b/dev-tools/omdb/src/bin/omdb/db.rs index 4d36b4522e..5930788cd2 100644 --- a/dev-tools/omdb/src/bin/omdb/db.rs +++ b/dev-tools/omdb/src/bin/omdb/db.rs @@ -1455,8 +1455,11 @@ async fn cmd_db_sleds( let filter = match args.filter { Some(filter) => filter, None => { - eprintln!("note: listing all sleds (use -F to filter, e.g. -F in-service)"); - SledFilter::All + eprintln!( + "note: listing all commissioned sleds \ + (use -F to filter, e.g. -F in-service)" + ); + SledFilter::Commissioned } }; diff --git a/dev-tools/omdb/tests/env.out b/dev-tools/omdb/tests/env.out index a224155bf9..5716510602 100644 --- a/dev-tools/omdb/tests/env.out +++ b/dev-tools/omdb/tests/env.out @@ -9,7 +9,7 @@ stdout: stderr: note: using database URL postgresql://root@[::1]:REDACTED_PORT/omicron?sslmode=disable note: database schema version matches expected () -note: listing all sleds (use -F to filter, e.g. -F in-service) +note: listing all commissioned sleds (use -F to filter, e.g. -F in-service) ============================================= EXECUTING COMMAND: omdb ["db", "--db-url", "junk", "sleds"] termination: Exited(2) @@ -341,7 +341,7 @@ note: database URL not specified. Will search DNS. note: (override with --db-url or OMDB_DB_URL) note: using database URL postgresql://root@[::1]:REDACTED_PORT/omicron?sslmode=disable note: database schema version matches expected () -note: listing all sleds (use -F to filter, e.g. -F in-service) +note: listing all commissioned sleds (use -F to filter, e.g. -F in-service) ============================================= EXECUTING COMMAND: omdb ["--dns-server", "[::1]:REDACTED_PORT", "db", "sleds"] termination: Exited(0) @@ -356,5 +356,5 @@ note: database URL not specified. Will search DNS. note: (override with --db-url or OMDB_DB_URL) note: using database URL postgresql://root@[::1]:REDACTED_PORT/omicron?sslmode=disable note: database schema version matches expected () -note: listing all sleds (use -F to filter, e.g. -F in-service) +note: listing all commissioned sleds (use -F to filter, e.g. -F in-service) ============================================= diff --git a/dev-tools/omdb/tests/successes.out b/dev-tools/omdb/tests/successes.out index d7711610bd..0aa47f2712 100644 --- a/dev-tools/omdb/tests/successes.out +++ b/dev-tools/omdb/tests/successes.out @@ -80,7 +80,7 @@ stdout: stderr: note: using database URL postgresql://root@[::1]:REDACTED_PORT/omicron?sslmode=disable note: database schema version matches expected () -note: listing all sleds (use -F to filter, e.g. -F in-service) +note: listing all commissioned sleds (use -F to filter, e.g. -F in-service) ============================================= EXECUTING COMMAND: omdb ["db", "sleds", "-F", "discretionary"] termination: Exited(0) diff --git a/dev-tools/omdb/tests/test_all_output.rs b/dev-tools/omdb/tests/test_all_output.rs index a480683f04..19be33631d 100644 --- a/dev-tools/omdb/tests/test_all_output.rs +++ b/dev-tools/omdb/tests/test_all_output.rs @@ -165,7 +165,7 @@ async fn test_omdb_success_cases(cptestctx: &ControlPlaneTestContext) { // collection? assert!(parsed .planning_input - .all_sled_ids(SledFilter::All) + .all_sled_ids(SledFilter::Commissioned) .next() .is_some()); assert!(!parsed.collections.is_empty()); diff --git a/dev-tools/omdb/tests/usage_errors.out b/dev-tools/omdb/tests/usage_errors.out index 7b45d33700..ee32167290 100644 --- a/dev-tools/omdb/tests/usage_errors.out +++ b/dev-tools/omdb/tests/usage_errors.out @@ -226,7 +226,7 @@ Options: Show sleds that match the given filter Possible values: - - all: All sleds + - commissioned: All sleds that are currently part of the control plane cluster - discretionary: Sleds that are eligible for discretionary services - in-service: Sleds that are in service (even if they might not be eligible for discretionary services) diff --git a/dev-tools/reconfigurator-cli/src/main.rs b/dev-tools/reconfigurator-cli/src/main.rs index ccc35fd74c..72add6ce8c 100644 --- a/dev-tools/reconfigurator-cli/src/main.rs +++ b/dev-tools/reconfigurator-cli/src/main.rs @@ -559,7 +559,7 @@ fn cmd_sled_list( .to_planning_input_builder() .context("failed to generate planning input")? .build(); - let rows = planning_input.all_sled_resources(SledFilter::All).map( + let rows = planning_input.all_sled_resources(SledFilter::Commissioned).map( |(sled_id, sled_resources)| Sled { id: sled_id, subnet: sled_resources.subnet.net().to_string(), @@ -648,7 +648,7 @@ fn cmd_inventory_generate( // has no zones on it. let planning_input = sim.system.to_planning_input_builder().unwrap().build(); - for sled_id in planning_input.all_sled_ids(SledFilter::All) { + for sled_id in planning_input.all_sled_ids(SledFilter::Commissioned) { builder .found_sled_omicron_zones( "fake sled agent", @@ -1077,7 +1077,7 @@ fn cmd_load( .context("generating planning input")? .build(); for (sled_id, sled_details) in - loaded.planning_input.all_sleds(SledFilter::All) + loaded.planning_input.all_sleds(SledFilter::Commissioned) { if current_planning_input.sled_resources(&sled_id).is_some() { swriteln!( @@ -1202,7 +1202,7 @@ fn cmd_file_contents(args: FileContentsArgs) -> anyhow::Result> { let mut s = String::new(); for (sled_id, sled_resources) in - loaded.planning_input.all_sled_resources(SledFilter::All) + loaded.planning_input.all_sled_resources(SledFilter::Commissioned) { swriteln!( s, diff --git a/nexus/db-queries/src/db/datastore/deployment.rs b/nexus/db-queries/src/db/datastore/deployment.rs index 6fdeed9ee5..7359f1725b 100644 --- a/nexus/db-queries/src/db/datastore/deployment.rs +++ b/nexus/db-queries/src/db/datastore/deployment.rs @@ -1553,7 +1553,7 @@ mod tests { // Check the number of blueprint elements against our collection. assert_eq!( blueprint1.blueprint_zones.len(), - planning_input.all_sled_ids(SledFilter::All).count(), + planning_input.all_sled_ids(SledFilter::Commissioned).count(), ); assert_eq!( blueprint1.blueprint_zones.len(), diff --git a/nexus/db-queries/src/db/datastore/mod.rs b/nexus/db-queries/src/db/datastore/mod.rs index 9ade7200d4..9f2d2d02db 100644 --- a/nexus/db-queries/src/db/datastore/mod.rs +++ b/nexus/db-queries/src/db/datastore/mod.rs @@ -110,6 +110,8 @@ pub use probe::ProbeInfo; pub use rack::RackInit; pub use rack::SledUnderlayAllocationResult; pub use silo::Discoverability; +pub use sled::SledTransition; +pub use sled::TransitionError; pub use switch_port::SwitchPortSettingsCombinedResult; pub use virtual_provisioning_collection::StorageType; pub use volume::read_only_resources_associated_with_volume; diff --git a/nexus/db-queries/src/db/datastore/sled.rs b/nexus/db-queries/src/db/datastore/sled.rs index 76b42d0bf0..bf43b9182d 100644 --- a/nexus/db-queries/src/db/datastore/sled.rs +++ b/nexus/db-queries/src/db/datastore/sled.rs @@ -454,12 +454,13 @@ impl DataStore { UpdateStatus::NotUpdatedButExists, ) => { // Two reasons this can happen: - // 1. An idempotent update: this is treated as a success. + // 1. An idempotent update: this is treated as a + // success. // 2. Invalid state transition: a failure. // - // To differentiate between the two, check that the new policy - // is the same as the old policy, and that the old state is - // valid. + // To differentiate between the two, check that the + // new policy is the same as the old policy, and + // that the old state is valid. if result.found.policy() == new_sled_policy && valid_old_states .contains(&result.found.state()) @@ -533,7 +534,7 @@ impl DataStore { &self, opctx: &OpContext, authz_sled: &authz::Sled, - ) -> Result { + ) -> Result { self.sled_set_state_impl( opctx, authz_sled, @@ -541,14 +542,13 @@ impl DataStore { ValidateTransition::Yes, ) .await - .map_err(|error| error.into_external_error()) } pub(super) async fn sled_set_state_impl( &self, opctx: &OpContext, authz_sled: &authz::Sled, - new_state: SledState, + new_sled_state: SledState, check: ValidateTransition, ) -> Result { use db::schema::sled::dsl; @@ -556,62 +556,124 @@ impl DataStore { opctx.authorize(authz::Action::Modify, authz_sled).await?; let sled_id = authz_sled.id(); - let query = diesel::update(dsl::sled) - .filter(dsl::time_deleted.is_null()) - .filter(dsl::id.eq(sled_id)); + let err = OptionalError::new(); + let conn = self.pool_connection_authorized(opctx).await?; + let old_state = self + .transaction_retry_wrapper("sled_set_state") + .transaction(&conn, |conn| { + let err = err.clone(); - let t = SledTransition::State(new_state); - let valid_old_policies = t.valid_old_policies(); - let valid_old_states = t.valid_old_states(); + async move { + let query = diesel::update(dsl::sled) + .filter(dsl::time_deleted.is_null()) + .filter(dsl::id.eq(sled_id)); - let query = match check { - ValidateTransition::Yes => query - .filter(dsl::sled_policy.eq_any( - valid_old_policies.iter().copied().map(to_db_sled_policy), - )) - .filter(dsl::sled_state.eq_any(valid_old_states)) - .into_boxed(), - #[cfg(test)] - ValidateTransition::No => query.into_boxed(), - }; + let t = SledTransition::State(new_sled_state); + let valid_old_policies = t.valid_old_policies(); + let valid_old_states = t.valid_old_states(); - let query = query - .set(( - dsl::sled_state.eq(new_state), - dsl::time_modified.eq(Utc::now()), - )) - .check_if_exists::(sled_id); + let query = match check { + ValidateTransition::Yes => query + .filter( + dsl::sled_policy.eq_any( + valid_old_policies + .iter() + .copied() + .map(to_db_sled_policy), + ), + ) + .filter(dsl::sled_state.eq_any(valid_old_states)) + .into_boxed(), + #[cfg(test)] + ValidateTransition::No => query.into_boxed(), + }; - let result = query - .execute_and_check(&*self.pool_connection_authorized(opctx).await?) - .await - .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + let query = query + .set(( + dsl::sled_state.eq(new_sled_state), + dsl::time_modified.eq(Utc::now()), + )) + .check_if_exists::(sled_id); - match (check, result.status) { - (ValidateTransition::Yes, UpdateStatus::Updated) => { - Ok(result.found.state()) - } - (ValidateTransition::Yes, UpdateStatus::NotUpdatedButExists) => { - // Two reasons this can happen: - // 1. An idempotent update: this is treated as a success. - // 2. Invalid state transition: a failure. - // - // To differentiate between the two, check that the new state - // is the same as the old state, and the found policy is valid. - if result.found.state() == new_state - && valid_old_policies.contains(&result.found.policy()) - { - Ok(result.found.state()) - } else { - Err(TransitionError::InvalidTransition { - current: result.found, - transition: SledTransition::State(new_state), - }) + let result = query.execute_and_check(&conn).await?; + + let old_state = match (check, result.status) { + (ValidateTransition::Yes, UpdateStatus::Updated) => { + result.found.state() + } + ( + ValidateTransition::Yes, + UpdateStatus::NotUpdatedButExists, + ) => { + // Two reasons this can happen: + // 1. An idempotent update: this is treated as a + // success. + // 2. Invalid state transition: a failure. + // + // To differentiate between the two, check that the + // new state is the same as the old state, and the + // found policy is valid. + if result.found.state() == new_sled_state + && valid_old_policies + .contains(&result.found.policy()) + { + result.found.state() + } else { + return Err(err.bail( + TransitionError::InvalidTransition { + current: result.found, + transition: SledTransition::State( + new_sled_state, + ), + }, + )); + } + } + #[cfg(test)] + (ValidateTransition::No, _) => result.found.state(), + }; + + // When a sled is decommissioned, the associated disks with + // that sled should also be implicitly set to + // decommissioned. + // + // We use an explicit `match` to force ourselves to consider + // disk state if we add any addition sled states in the + // future. + let new_disk_state = match new_sled_state { + SledState::Active => None, + SledState::Decommissioned => Some( + nexus_db_model::PhysicalDiskState::Decommissioned, + ), + }; + if let Some(new_disk_state) = new_disk_state { + use db::schema::physical_disk::dsl as physical_disk_dsl; + diesel::update(physical_disk_dsl::physical_disk) + .filter(physical_disk_dsl::time_deleted.is_null()) + .filter(physical_disk_dsl::sled_id.eq(sled_id)) + .set( + physical_disk_dsl::disk_state + .eq(new_disk_state), + ) + .execute_async(&conn) + .await?; + } + + Ok(old_state) } - } - #[cfg(test)] - (ValidateTransition::No, _) => Ok(result.found.state()), - } + }) + .await + .map_err(|e| { + if let Some(err) = err.take() { + return err; + } + TransitionError::from(public_error_from_diesel( + e, + ErrorHandler::Server, + )) + })?; + + Ok(old_state) } } @@ -623,7 +685,7 @@ impl DataStore { // valid for a new policy or state, except idempotent transitions. #[derive(Clone, Copy, Debug, PartialEq, Eq)] -pub(super) enum SledTransition { +pub enum SledTransition { Policy(SledPolicy), State(SledState), } @@ -714,7 +776,7 @@ impl IntoEnumIterator for SledTransition { /// An error that occurred while setting a policy or state. #[derive(Debug, Error)] #[must_use] -pub(super) enum TransitionError { +pub enum TransitionError { /// The state transition check failed. /// /// The sled is returned. @@ -778,6 +840,7 @@ mod test { use nexus_db_model::PhysicalDisk; use nexus_db_model::PhysicalDiskKind; use nexus_db_model::PhysicalDiskPolicy; + use nexus_db_model::PhysicalDiskState; use nexus_test_utils::db::test_setup_database; use nexus_types::identity::Asset; use omicron_common::api::external; @@ -1108,7 +1171,7 @@ mod test { // Set up a sled to test against. let (sled, _) = datastore.sled_upsert(test_new_sled_update()).await.unwrap(); - let sled_id = sled.id(); + let sled_id = SledUuid::from_untyped_uuid(sled.id()); // Add a couple disks to this sled. // @@ -1121,7 +1184,7 @@ mod test { "serial1".to_string(), "model1".to_string(), PhysicalDiskKind::U2, - sled_id, + sled_id.into_untyped_uuid(), ); let disk2 = PhysicalDisk::new( Uuid::new_v4(), @@ -1129,7 +1192,7 @@ mod test { "serial2".to_string(), "model2".to_string(), PhysicalDiskKind::U2, - sled_id, + sled_id.into_untyped_uuid(), ); datastore @@ -1158,7 +1221,7 @@ mod test { sled_set_policy( &opctx, &datastore, - SledUuid::from_untyped_uuid(sled_id), + sled_id, SledPolicy::Expunged, ValidateTransition::Yes, Expected::Ok(SledPolicy::provisionable()), @@ -1166,7 +1229,7 @@ mod test { .await .expect("Could not expunge sled"); - // Observe that the disk state is now expunged + // Observe that the disk policy is now expunged assert_eq!( PhysicalDiskPolicy::Expunged, lookup_physical_disk(&datastore, disk1.id()).await.disk_policy @@ -1176,6 +1239,29 @@ mod test { lookup_physical_disk(&datastore, disk2.id()).await.disk_policy ); + // We can now decommission the sled, which should also decommission the + // disks. + sled_set_state( + &opctx, + &datastore, + sled_id, + SledState::Decommissioned, + ValidateTransition::Yes, + Expected::Ok(SledState::Active), + ) + .await + .expect("decommissioned sled"); + + // Observe that the disk state is now decommissioned + assert_eq!( + PhysicalDiskState::Decommissioned, + lookup_physical_disk(&datastore, disk1.id()).await.disk_state + ); + assert_eq!( + PhysicalDiskState::Decommissioned, + lookup_physical_disk(&datastore, disk2.id()).await.disk_state + ); + db.cleanup().await.unwrap(); logctx.cleanup_successful(); } @@ -1452,7 +1538,7 @@ mod test { assert_eq!(ninserted, size); let sleds = datastore - .sled_list_all_batched(&opctx, SledFilter::All) + .sled_list_all_batched(&opctx, SledFilter::Commissioned) .await .expect("failed to list all sleds"); diff --git a/nexus/reconfigurator/execution/src/dns.rs b/nexus/reconfigurator/execution/src/dns.rs index 6a3c1755cf..1760421dee 100644 --- a/nexus/reconfigurator/execution/src/dns.rs +++ b/nexus/reconfigurator/execution/src/dns.rs @@ -1194,7 +1194,7 @@ mod test { // We do this directly with BlueprintBuilder to avoid the planner // deciding to make other unrelated changes. let sled_rows = datastore - .sled_list_all_batched(&opctx, SledFilter::All) + .sled_list_all_batched(&opctx, SledFilter::Commissioned) .await .unwrap(); let zpool_rows = diff --git a/nexus/reconfigurator/execution/src/lib.rs b/nexus/reconfigurator/execution/src/lib.rs index 118907390b..8ac8bc4399 100644 --- a/nexus/reconfigurator/execution/src/lib.rs +++ b/nexus/reconfigurator/execution/src/lib.rs @@ -12,6 +12,7 @@ use nexus_db_queries::db::DataStore; use nexus_types::deployment::Blueprint; use nexus_types::deployment::BlueprintZoneFilter; use nexus_types::deployment::SledFilter; +use nexus_types::external_api::views::SledState; use nexus_types::identity::Asset; use omicron_common::address::Ipv6Subnet; use omicron_common::address::SLED_PREFIX; @@ -29,6 +30,7 @@ mod external_networking; mod omicron_physical_disks; mod omicron_zones; mod overridables; +mod sled_state; pub use dns::blueprint_external_dns_config; pub use dns::blueprint_internal_dns_config; @@ -196,10 +198,21 @@ where String::from(nexus_label), blueprint, &sleds_by_id, - &overrides, + overrides, ) .await .map_err(|e| vec![anyhow!("{}", InlineErrorChain::new(&e))])?; + sled_state::decommission_sleds( + &opctx, + datastore, + blueprint + .sled_state + .iter() + .filter(|&(_, &state)| state == SledState::Decommissioned) + .map(|(&sled_id, _)| sled_id), + ) + .await?; + Ok(()) } diff --git a/nexus/reconfigurator/execution/src/sled_state.rs b/nexus/reconfigurator/execution/src/sled_state.rs new file mode 100644 index 0000000000..aaa5b6bc26 --- /dev/null +++ b/nexus/reconfigurator/execution/src/sled_state.rs @@ -0,0 +1,146 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Updates sled states required by a given blueprint + +use anyhow::Context; +use nexus_db_model::SledState; +use nexus_db_queries::authz::Action; +use nexus_db_queries::context::OpContext; +use nexus_db_queries::db::datastore::TransitionError; +use nexus_db_queries::db::lookup::LookupPath; +use nexus_db_queries::db::DataStore; +use omicron_uuid_kinds::GenericUuid; +use omicron_uuid_kinds::SledUuid; + +pub(crate) async fn decommission_sleds( + opctx: &OpContext, + datastore: &DataStore, + sled_ids_to_decommission: impl Iterator, +) -> Result<(), Vec> { + let mut errors = Vec::new(); + + for sled_id in sled_ids_to_decommission { + if let Err(err) = decommission_one_sled(opctx, datastore, sled_id).await + { + errors.push(err); + } + } + + if errors.is_empty() { + Ok(()) + } else { + Err(errors) + } +} + +async fn decommission_one_sled( + opctx: &OpContext, + datastore: &DataStore, + sled_id: SledUuid, +) -> anyhow::Result<()> { + let (authz_sled,) = LookupPath::new(opctx, datastore) + .sled_id(sled_id.into_untyped_uuid()) + .lookup_for(Action::Modify) + .await + .with_context(|| { + format!("failed to look up sled {sled_id} for modification") + })?; + match datastore.sled_set_state_to_decommissioned(opctx, &authz_sled).await { + Ok(_) => Ok(()), + // `sled_set_state_to_decommissioned` is not idempotent. If we're racing + // another Nexus (or we're repeating realization of a blueprint we've + // already realized), this sled may already be decommissioned; that's + // fine. + Err(TransitionError::InvalidTransition { current, .. }) + if current.state() == SledState::Decommissioned => + { + Ok(()) + } + Err(err) => Err(anyhow::Error::new(err) + .context(format!("failed to decommission sled {sled_id}"))), + } +} + +#[cfg(test)] +mod tests { + use super::*; + use nexus_test_utils_macros::nexus_test; + use nexus_types::deployment::SledFilter; + use nexus_types::identity::Asset; + + type ControlPlaneTestContext = + nexus_test_utils::ControlPlaneTestContext; + + async fn list_all_commissioned_sled_ids( + opctx: &OpContext, + datastore: &DataStore, + ) -> Vec { + datastore + .sled_list_all_batched(&opctx, SledFilter::Commissioned) + .await + .expect("listing sleds") + .into_iter() + .map(|sled| SledUuid::from_untyped_uuid(sled.id())) + .collect() + } + + #[nexus_test] + async fn test_decommission_is_idempotent( + cptestctx: &ControlPlaneTestContext, + ) { + let nexus = &cptestctx.server.apictx().nexus; + let datastore = nexus.datastore(); + let opctx = OpContext::for_tests( + cptestctx.logctx.log.clone(), + datastore.clone(), + ); + + let mut commissioned_sled_ids = + list_all_commissioned_sled_ids(&opctx, datastore).await; + + // Pick a sled to decommission. + let decommissioned_sled_id = + commissioned_sled_ids.pop().expect("at least one sled"); + + // Expunge the sled (required prior to decommissioning). + let (authz_sled,) = LookupPath::new(&opctx, datastore) + .sled_id(decommissioned_sled_id.into_untyped_uuid()) + .lookup_for(Action::Modify) + .await + .expect("lookup authz_sled"); + datastore + .sled_set_policy_to_expunged(&opctx, &authz_sled) + .await + .expect("expunged sled"); + + // Decommission the sled. + decommission_sleds( + &opctx, + datastore, + std::iter::once(decommissioned_sled_id), + ) + .await + .expect("decommissioned sled"); + + // Ensure the sled was marked decommissioned in the db. + assert_eq!( + commissioned_sled_ids, + list_all_commissioned_sled_ids(&opctx, datastore).await + ); + + // Try to decommission the sled again; this should be fine. + decommission_sleds( + &opctx, + datastore, + std::iter::once(decommissioned_sled_id), + ) + .await + .expect("decommissioned sled"); + assert_eq!( + commissioned_sled_ids, + list_all_commissioned_sled_ids(&opctx, datastore).await + ); + } +} diff --git a/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs b/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs index 0d187af50c..7e34bf9691 100644 --- a/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs +++ b/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs @@ -88,6 +88,14 @@ pub enum Error { NoSystemMacAddressAvailable, #[error("exhausted available Nexus IP addresses")] ExhaustedNexusIps, + #[error( + "invariant violation: found decommissioned sled with \ + {num_zones} non-expunged zones: {sled_id}" + )] + DecommissionedSledWithNonExpungedZones { + sled_id: SledUuid, + num_zones: usize, + }, #[error("programming error in planner")] Planner(#[from] anyhow::Error), } @@ -340,20 +348,30 @@ impl<'a> BlueprintBuilder<'a> { let available_system_macs = AvailableIterator::new(MacAddr::iter_system(), used_macs); - let sled_state = input - .all_sleds(SledFilter::All) - .map(|(sled_id, details)| { - // Prefer the sled state from our parent blueprint for sleds - // that were in it; there may be new sleds in `input`, in which - // case we'll use their current state as our starting point. - let state = parent_blueprint - .sled_state - .get(&sled_id) - .copied() - .unwrap_or(details.state); - (sled_id, state) - }) - .collect(); + // Prefer the sled state from our parent blueprint for sleds + // that were in it; there may be new sleds in `input`, in which + // case we'll use their current state as our starting point. + let mut sled_state = parent_blueprint.sled_state.clone(); + let mut commissioned_sled_ids = BTreeSet::new(); + for (sled_id, details) in input.all_sleds(SledFilter::Commissioned) { + commissioned_sled_ids.insert(sled_id); + sled_state.entry(sled_id).or_insert(details.state); + } + + // Make a garbage collection pass through `sled_state`. We want to keep + // any sleds which either: + // + // 1. do not have a desired state of `Decommissioned` + // 2. do have a desired state of `Decommissioned` and are still included + // in our input's list of commissioned sleds + // + // Sleds that don't fall into either of these cases have reached the + // actual `Decommissioned` state, which means we no longer need to carry + // forward that desired state. + sled_state.retain(|sled_id, state| { + *state != SledState::Decommissioned + || commissioned_sled_ids.contains(sled_id) + }); Ok(BlueprintBuilder { log, @@ -373,12 +391,27 @@ impl<'a> BlueprintBuilder<'a> { }) } + /// Iterates over the list of sled IDs for which we have zones. + /// + /// This may include decommissioned sleds. + pub fn sled_ids_with_zones(&self) -> impl Iterator { + self.zones.sled_ids_with_zones() + } + + pub fn current_sled_zones( + &self, + sled_id: SledUuid, + ) -> impl Iterator { + self.zones.current_sled_zones(sled_id).map(|(config, _)| config) + } + /// Assemble a final [`Blueprint`] based on the contents of the builder pub fn build(mut self) -> Blueprint { // Collect the Omicron zones config for all sleds, including sleds that // are no longer in service and need expungement work. - let blueprint_zones = - self.zones.into_zones_map(self.input.all_sled_ids(SledFilter::All)); + let blueprint_zones = self + .zones + .into_zones_map(self.input.all_sled_ids(SledFilter::Commissioned)); let blueprint_disks = self .disks .into_disks_map(self.input.all_sled_ids(SledFilter::InService)); @@ -396,6 +429,15 @@ impl<'a> BlueprintBuilder<'a> { } } + /// Set the desired state of the given sled. + pub fn set_sled_state( + &mut self, + sled_id: SledUuid, + desired_state: SledState, + ) { + self.sled_state.insert(sled_id, desired_state); + } + /// Within tests, set a seeded RNG for deterministic results. /// /// This will ensure that tests that use this builder will produce the same @@ -465,7 +507,7 @@ impl<'a> BlueprintBuilder<'a> { &log, "sled has state Decommissioned, yet has zones \ allocated to it; will expunge them \ - (sled policy is \"{policy}\")" + (sled policy is \"{policy:?}\")" ); } ZoneExpungeReason::SledExpunged => { @@ -1012,6 +1054,18 @@ impl<'a> BlueprintZonesBuilder<'a> { }) } + /// Iterates over the list of sled IDs for which we have zones. + /// + /// This may include decommissioned sleds. + pub fn sled_ids_with_zones(&self) -> impl Iterator { + let mut sled_ids = + self.changed_zones.keys().copied().collect::>(); + for &sled_id in self.parent_zones.keys() { + sled_ids.insert(sled_id); + } + sled_ids.into_iter() + } + /// Iterates over the list of Omicron zones currently configured for this /// sled in the blueprint that's being built, along with each zone's state /// in the builder. @@ -1034,38 +1088,35 @@ impl<'a> BlueprintZonesBuilder<'a> { } } - /// Produces an owned map of zones for the requested sleds + /// Produces an owned map of zones for the sleds recorded in this blueprint + /// plus any newly-added sleds pub fn into_zones_map( - mut self, - sled_ids: impl Iterator, + self, + added_sled_ids: impl Iterator, ) -> BTreeMap { - sled_ids - .map(|sled_id| { - // Start with self.changed_zones, which contains entries for any - // sled whose zones config is changing in this blueprint. - if let Some(zones) = self.changed_zones.remove(&sled_id) { - (sled_id, zones.build()) - } - // Next, check self.parent_zones, to represent an unchanged - // sled. - else if let Some(parent_zones) = - self.parent_zones.get(&sled_id) - { - (sled_id, parent_zones.clone()) - } else { - // If the sled is not in self.parent_zones, then it must be - // a new sled and we haven't added any zones to it yet. Use - // the standard initial config. - ( - sled_id, - BlueprintZonesConfig { - generation: Generation::new(), - zones: vec![], - }, - ) - } - }) - .collect() + // Start with self.changed_zones, which contains entries for any + // sled whose zones config is changing in this blueprint. + let mut zones = self + .changed_zones + .into_iter() + .map(|(sled_id, zones)| (sled_id, zones.build())) + .collect::>(); + + // Carry forward any zones from our parent blueprint. This may include + // zones for decommissioned sleds. + for (sled_id, parent_zones) in self.parent_zones { + zones.entry(*sled_id).or_insert_with(|| parent_zones.clone()); + } + + // Finally, insert any newly-added sleds. + for sled_id in added_sled_ids { + zones.entry(sled_id).or_insert_with(|| BlueprintZonesConfig { + generation: Generation::new(), + zones: vec![], + }); + } + + zones } } @@ -1170,6 +1221,7 @@ pub mod test { use crate::system::SledBuilder; use expectorate::assert_contents; use nexus_types::deployment::BlueprintZoneFilter; + use nexus_types::external_api::views::SledPolicy; use omicron_common::address::IpRange; use omicron_test_utils::dev::test_setup_log; use std::collections::BTreeSet; @@ -1269,7 +1321,7 @@ pub mod test { // existing sleds, plus Crucible zones on all pools. So if we ensure // all these zones exist, we should see no change. for (sled_id, sled_resources) in - example.input.all_sled_resources(SledFilter::All) + example.input.all_sled_resources(SledFilter::Commissioned) { builder.sled_ensure_zone_ntp(sled_id).unwrap(); for pool_id in sled_resources.zpools.keys() { @@ -1376,6 +1428,83 @@ pub mod test { logctx.cleanup_successful(); } + #[test] + fn test_prune_decommissioned_sleds() { + static TEST_NAME: &str = + "blueprint_builder_test_prune_decommissioned_sleds"; + let logctx = test_setup_log(TEST_NAME); + let (_, input, mut blueprint1) = + example(&logctx.log, TEST_NAME, DEFAULT_N_SLEDS); + verify_blueprint(&blueprint1); + + // Mark one sled as having a desired state of decommissioned. + let decommision_sled_id = blueprint1 + .sled_state + .keys() + .copied() + .next() + .expect("at least one sled"); + *blueprint1.sled_state.get_mut(&decommision_sled_id).unwrap() = + SledState::Decommissioned; + + // Change the input to note that the sled is expunged, but still active. + let mut builder = input.into_builder(); + builder.sleds_mut().get_mut(&decommision_sled_id).unwrap().policy = + SledPolicy::Expunged; + builder.sleds_mut().get_mut(&decommision_sled_id).unwrap().state = + SledState::Active; + let input = builder.build(); + + // Generate a new blueprint. This sled should still be included: even + // though the desired state is decommissioned, the current state is + // still active, so we should carry it forward. + let blueprint2 = BlueprintBuilder::new_based_on( + &logctx.log, + &blueprint1, + &input, + "test_prune_decommissioned_sleds", + ) + .expect("created builder") + .build(); + verify_blueprint(&blueprint2); + + // We carried forward the desired state. + assert_eq!( + blueprint2.sled_state.get(&decommision_sled_id).copied(), + Some(SledState::Decommissioned) + ); + + // Change the input to mark the sled decommissioned. (Normally realizing + // blueprint2 would make this change.) + let mut builder = input.into_builder(); + builder.sleds_mut().get_mut(&decommision_sled_id).unwrap().state = + SledState::Decommissioned; + let input = builder.build(); + + // Generate a new blueprint. This desired sled state should no longer be + // present: it has reached the terminal decommissioned state, so there's + // no more work to be done. + let blueprint3 = BlueprintBuilder::new_based_on( + &logctx.log, + &blueprint2, + &input, + "test_prune_decommissioned_sleds", + ) + .expect("created builder") + .build(); + verify_blueprint(&blueprint3); + + // Ensure we've dropped the decommissioned sled. (We may still have + // _zones_ for it that need cleanup work, but all state transitions for + // it are complete.) + assert_eq!( + blueprint3.sled_state.get(&decommision_sled_id).copied(), + None, + ); + + logctx.cleanup_successful(); + } + #[test] fn test_add_physical_disks() { static TEST_NAME: &str = "blueprint_builder_test_add_physical_disks"; @@ -1384,7 +1513,7 @@ pub mod test { // Start with an empty blueprint (sleds with no zones). let parent = BlueprintBuilder::build_empty_with_sleds_seeded( - input.all_sled_ids(SledFilter::All), + input.all_sled_ids(SledFilter::Commissioned), "test", TEST_NAME, ); @@ -1430,7 +1559,7 @@ pub mod test { let (collection, input, _) = example(&logctx.log, TEST_NAME, DEFAULT_N_SLEDS); let parent = BlueprintBuilder::build_empty_with_sleds_seeded( - input.all_sled_ids(SledFilter::All), + input.all_sled_ids(SledFilter::Commissioned), "test", TEST_NAME, ); diff --git a/nexus/reconfigurator/planning/src/blueprint_builder/zones.rs b/nexus/reconfigurator/planning/src/blueprint_builder/zones.rs index c9015f8b27..a2e577f80c 100644 --- a/nexus/reconfigurator/planning/src/blueprint_builder/zones.rs +++ b/nexus/reconfigurator/planning/src/blueprint_builder/zones.rs @@ -274,7 +274,7 @@ mod tests { // existing sled. let existing_sled_id = example .input - .all_sled_ids(SledFilter::All) + .all_sled_ids(SledFilter::Commissioned) .next() .expect("at least one sled present"); let change = builder.zones.change_sled_zones(existing_sled_id); @@ -351,7 +351,7 @@ mod tests { // become smarter and not do so (in which case this test will break). let control_sled_id = example .input - .all_sled_ids(SledFilter::All) + .all_sled_ids(SledFilter::Commissioned) .nth(2) .expect("at least 2 sleds present"); _ = builder.zones.change_sled_zones(control_sled_id); diff --git a/nexus/reconfigurator/planning/src/example.rs b/nexus/reconfigurator/planning/src/example.rs index 136302e7be..24dbbd15ac 100644 --- a/nexus/reconfigurator/planning/src/example.rs +++ b/nexus/reconfigurator/planning/src/example.rs @@ -51,7 +51,7 @@ impl ExampleSystem { // Start with an empty blueprint containing only our sleds, no zones. let initial_blueprint = BlueprintBuilder::build_empty_with_sleds_seeded( - base_input.all_sled_ids(SledFilter::All), + base_input.all_sled_ids(SledFilter::Commissioned), "test suite", (test_name, "ExampleSystem initial"), ); @@ -66,7 +66,7 @@ impl ExampleSystem { .unwrap(); builder.set_rng_seed((test_name, "ExampleSystem make_zones")); for (sled_id, sled_resources) in - base_input.all_sled_resources(SledFilter::All) + base_input.all_sled_resources(SledFilter::Commissioned) { let _ = builder.sled_ensure_zone_ntp(sled_id).unwrap(); let _ = builder diff --git a/nexus/reconfigurator/planning/src/planner.rs b/nexus/reconfigurator/planning/src/planner.rs index 7fb8e49c31..5535b28910 100644 --- a/nexus/reconfigurator/planning/src/planner.rs +++ b/nexus/reconfigurator/planning/src/planner.rs @@ -11,6 +11,7 @@ use crate::blueprint_builder::Ensure; use crate::blueprint_builder::EnsureMultiple; use crate::blueprint_builder::Error; use nexus_types::deployment::Blueprint; +use nexus_types::deployment::BlueprintZoneDisposition; use nexus_types::deployment::PlanningInput; use nexus_types::deployment::SledFilter; use nexus_types::deployment::ZpoolFilter; @@ -18,6 +19,7 @@ use nexus_types::external_api::views::SledPolicy; use nexus_types::external_api::views::SledState; use nexus_types::inventory::Collection; use omicron_uuid_kinds::SledUuid; +use slog::error; use slog::{info, warn, Logger}; use std::collections::BTreeMap; use std::collections::BTreeSet; @@ -80,15 +82,82 @@ impl<'a> Planner<'a> { self.do_plan_expunge()?; self.do_plan_add()?; + self.do_plan_decommission()?; + + Ok(()) + } + + fn do_plan_decommission(&mut self) -> Result<(), Error> { + // Check for any sleds that are currently commissioned but can be + // decommissioned. Our gates for decommissioning are: + // + // 1. The policy indicates the sled has been removed (i.e., the policy + // is "expunged"; we may have other policies that satisfy this + // requirement in the future). + // 2. All zones associated with the sled have been marked expunged. + // 3. There are no instances assigned to this sled. This is blocked by + // omicron#4872, so today we omit this check entirely, as any sled + // that could be otherwise decommissioned that still has instances + // assigned to it needs support intervention for cleanup. + // 4. All disks associated with the sled have been marked expunged. This + // happens implicitly when a sled is expunged, so is covered by our + // first check. + for (sled_id, sled_details) in + self.input.all_sleds(SledFilter::Commissioned) + { + // Check 1: look for sleds that are expunged. + match (sled_details.policy, sled_details.state) { + // If the sled is still in service, don't decommission it. + (SledPolicy::InService { .. }, _) => continue, + // If the sled is already decommissioned it... why is it showing + // up when we ask for commissioned sleds? Warn, but don't try to + // decommission it again. + (SledPolicy::Expunged, SledState::Decommissioned) => { + error!( + self.log, + "decommissioned sled returned by \ + SledFilter::Commissioned"; + "sled_id" => %sled_id, + ); + continue; + } + // The sled is expunged but not yet decommissioned; fall through + // to check the rest of the criteria. + (SledPolicy::Expunged, SledState::Active) => (), + } + + // Check 2: have all this sled's zones been expunged? It's possible + // we ourselves have made this change, which is fine. + let all_zones_expunged = + self.blueprint.current_sled_zones(sled_id).all(|zone| { + zone.disposition == BlueprintZoneDisposition::Expunged + }); + + // Check 3: Are there any instances assigned to this sled? See + // comment above; while we wait for omicron#4872, we just assume + // there are no instances running. + let num_instances_assigned = 0; + + if all_zones_expunged && num_instances_assigned == 0 { + self.blueprint + .set_sled_state(sled_id, SledState::Decommissioned); + } + } Ok(()) } fn do_plan_expunge(&mut self) -> Result<(), Error> { - // Remove services from sleds marked expunged. We use `SledFilter::All` - // and have a custom `needs_zone_expungement` function that allows us - // to produce better errors. - for (sled_id, sled_details) in self.input.all_sleds(SledFilter::All) { + let mut commissioned_sled_ids = BTreeSet::new(); + + // Remove services from sleds marked expunged. We use + // `SledFilter::Commissioned` and have a custom `needs_zone_expungement` + // function that allows us to produce better errors. + for (sled_id, sled_details) in + self.input.all_sleds(SledFilter::Commissioned) + { + commissioned_sled_ids.insert(sled_id); + // Does this sled need zone expungement based on the details? let Some(reason) = needs_zone_expungement(sled_details.state, sled_details.policy) @@ -100,6 +169,31 @@ impl<'a> Planner<'a> { self.blueprint.expunge_all_zones_for_sled(sled_id, reason)?; } + // Check for any decommissioned sleds (i.e., sleds for which our + // blueprint has zones, but are not in the input sled list). Any zones + // for decommissioned sleds must have already be expunged for + // decommissioning to have happened; fail if we find non-expunged zones + // associated with a decommissioned sled. + for sled_id in self.blueprint.sled_ids_with_zones() { + if !commissioned_sled_ids.contains(&sled_id) { + let num_zones = self + .blueprint + .current_sled_zones(sled_id) + .filter(|zone| { + zone.disposition != BlueprintZoneDisposition::Expunged + }) + .count(); + if num_zones > 0 { + return Err( + Error::DecommissionedSledWithNonExpungedZones { + sled_id, + num_zones, + }, + ); + } + } + } + Ok(()) } @@ -852,7 +946,7 @@ mod test { // and decommissioned sleds. (When we add more kinds of // non-provisionable states in the future, we'll have to add more // sleds.) - let (collection, input, blueprint1) = + let (collection, input, mut blueprint1) = example(&logctx.log, TEST_NAME, 5); // This blueprint should only have 5 Nexus zones: one on each sled. @@ -890,6 +984,17 @@ mod test { let decommissioned_sled_id = { let (sled_id, details) = sleds_iter.next().expect("no sleds"); details.state = SledState::Decommissioned; + + // Decommissioned sleds can only occur if their zones have been + // expunged, so lie and pretend like that already happened + // (otherwise the planner will rightfully fail to generate a new + // blueprint, because we're feeding it invalid inputs). + for zone in + &mut blueprint1.blueprint_zones.get_mut(sled_id).unwrap().zones + { + zone.disposition = BlueprintZoneDisposition::Expunged; + } + *sled_id }; println!("1 -> 2: decommissioned {decommissioned_sled_id}"); @@ -951,13 +1056,6 @@ mod test { let expunged_modified = sleds.remove(&expunged_sled_id).unwrap(); assert_all_zones_expunged(&expunged_modified, "expunged sled"); - let decommissioned_modified = - sleds.remove(&decommissioned_sled_id).unwrap(); - assert_all_zones_expunged( - &decommissioned_modified, - "decommissioned sled", - ); - // Only 2 of the 3 remaining sleds (not the non-provisionable sled) // should get additional Nexus zones. We expect a total of 6 new Nexus // zones, which should be split evenly between the two sleds, while the @@ -1110,4 +1208,93 @@ mod test { ); } } + + #[test] + fn planner_decommissions_sleds() { + static TEST_NAME: &str = "planner_decommissions_sleds"; + let logctx = test_setup_log(TEST_NAME); + + // Use our example system as a starting point. + let (collection, input, blueprint1) = + example(&logctx.log, TEST_NAME, DEFAULT_N_SLEDS); + + // Expunge one of the sleds. + let mut builder = input.into_builder(); + let expunged_sled_id = { + let mut iter = builder.sleds_mut().iter_mut(); + let (sled_id, details) = iter.next().expect("at least one sled"); + details.policy = SledPolicy::Expunged; + *sled_id + }; + + let input = builder.build(); + let mut blueprint2 = Planner::new_based_on( + logctx.log.clone(), + &blueprint1, + &input, + "test_blueprint2", + &collection, + ) + .expect("created planner") + .with_rng_seed((TEST_NAME, "bp2")) + .plan() + .expect("failed to plan"); + + // Define a time_created for consistent output across runs. + blueprint2.time_created = + Utc.from_utc_datetime(&NaiveDateTime::UNIX_EPOCH); + + assert_contents( + "tests/output/planner_decommissions_sleds_bp2.txt", + &blueprint2.display().to_string(), + ); + let diff = blueprint2.diff_since_blueprint(&blueprint1).unwrap(); + println!("1 -> 2 (expunged {expunged_sled_id}):\n{}", diff.display()); + assert_contents( + "tests/output/planner_decommissions_sleds_1_2.txt", + &diff.display().to_string(), + ); + + // All the zones of the expunged sled should be expunged, and the sled + // itself should be decommissioned. + assert!(blueprint2.blueprint_zones[&expunged_sled_id] + .are_all_zones_expunged()); + assert_eq!( + blueprint2.sled_state[&expunged_sled_id], + SledState::Decommissioned + ); + + // Remove the now-decommissioned sled from the planning input. + let mut builder = input.into_builder(); + builder.sleds_mut().remove(&expunged_sled_id); + let input = builder.build(); + + let blueprint3 = Planner::new_based_on( + logctx.log.clone(), + &blueprint2, + &input, + "test_blueprint3", + &collection, + ) + .expect("created planner") + .with_rng_seed((TEST_NAME, "bp3")) + .plan() + .expect("failed to plan"); + + // There should be no changes to the blueprint; we don't yet garbage + // collect zones, so we should still have the sled's expunged zones + // (even though the sled itself is no longer present in the list of + // commissioned sleds). + let diff = blueprint3.diff_since_blueprint(&blueprint2).unwrap(); + println!( + "2 -> 3 (decommissioned {expunged_sled_id}):\n{}", + diff.display() + ); + assert_eq!(diff.sleds_added().count(), 0); + assert_eq!(diff.sleds_removed().count(), 0); + assert_eq!(diff.sleds_modified().count(), 0); + assert_eq!(diff.sleds_unchanged().count(), DEFAULT_N_SLEDS); + + logctx.cleanup_successful(); + } } diff --git a/nexus/reconfigurator/planning/tests/output/planner_decommissions_sleds_1_2.txt b/nexus/reconfigurator/planning/tests/output/planner_decommissions_sleds_1_2.txt new file mode 100644 index 0000000000..28f08c9c78 --- /dev/null +++ b/nexus/reconfigurator/planning/tests/output/planner_decommissions_sleds_1_2.txt @@ -0,0 +1,81 @@ +from: blueprint 516e80a3-b362-4fac-bd3c-4559717120dd +to: blueprint 1ac2d88f-27dd-4506-8585-6b2be832528e + + -------------------------------------------------------------------------------------------------------- + zone type zone ID disposition underlay IP status + -------------------------------------------------------------------------------------------------------- + + UNCHANGED SLEDS: + + sled d67ce8f0-a691-4010-b414-420d82e80527: blueprint zones at generation 2 + crucible 15dbaa30-1539-49d6-970d-ba5962960f33 in service fd00:1122:3344:101::27 + crucible 1ec4cc7b-2f00-4d13-8176-3b9815533ae9 in service fd00:1122:3344:101::24 + crucible 2e65b765-5c41-4519-bf4e-e2a68569afc1 in service fd00:1122:3344:101::23 + crucible 3d4143df-e212-4774-9258-7d9b421fac2e in service fd00:1122:3344:101::25 + crucible 5d9d8fa7-8379-470b-90ba-fe84a3c45512 in service fd00:1122:3344:101::2a + crucible 70232a6d-6c9d-4fa6-a34d-9c73d940db33 in service fd00:1122:3344:101::28 + crucible 8567a616-a709-4c8c-a323-4474675dad5c in service fd00:1122:3344:101::2c + crucible 8b0b8623-930a-41af-9f9b-ca28b1b11139 in service fd00:1122:3344:101::29 + crucible cf87d2a3-d323-44a3-a87e-adc4ef6c75f4 in service fd00:1122:3344:101::2b + crucible eac6c0a0-baa5-4490-9cee-65198b7fbd9c in service fd00:1122:3344:101::26 + internal_ntp ad76d200-5675-444b-b19c-684689ff421f in service fd00:1122:3344:101::21 + nexus e9bf2525-5fa0-4c1b-b52d-481225083845 in service fd00:1122:3344:101::22 + + MODIFIED SLEDS: + +* sled a1b477db-b629-48eb-911d-1ccdafca75b9: blueprint zones at generation: 2 -> 3 +- crucible 1e1ed0cc-1adc-410f-943a-d1a3107de619 in service fd00:1122:3344:103::27 modified ++ ├─ expunged fd00:1122:3344:103::27 +* └─ changed: disposition +- crucible 2307bbed-02ba-493b-89e3-46585c74c8fc in service fd00:1122:3344:103::28 modified ++ ├─ expunged fd00:1122:3344:103::28 +* └─ changed: disposition +- crucible 4e36b7ef-5684-4304-b7c3-3c31aaf83d4f in service fd00:1122:3344:103::23 modified ++ ├─ expunged fd00:1122:3344:103::23 +* └─ changed: disposition +- crucible 603e629d-2599-400e-b879-4134d4cc426e in service fd00:1122:3344:103::2c modified ++ ├─ expunged fd00:1122:3344:103::2c +* └─ changed: disposition +- crucible 9179d6dc-387d-424e-8d62-ed59b2c728f6 in service fd00:1122:3344:103::2a modified ++ ├─ expunged fd00:1122:3344:103::2a +* └─ changed: disposition +- crucible c28d7b4b-a259-45ad-945d-f19ca3c6964c in service fd00:1122:3344:103::29 modified ++ ├─ expunged fd00:1122:3344:103::29 +* └─ changed: disposition +- crucible e29998e7-9ed2-46b6-bb70-4118159fe07f in service fd00:1122:3344:103::26 modified ++ ├─ expunged fd00:1122:3344:103::26 +* └─ changed: disposition +- crucible f06e91a1-0c17-4cca-adbc-1c9b67bdb11d in service fd00:1122:3344:103::2b modified ++ ├─ expunged fd00:1122:3344:103::2b +* └─ changed: disposition +- crucible f11f5c60-1ac7-4630-9a3a-a9bc85c75203 in service fd00:1122:3344:103::25 modified ++ ├─ expunged fd00:1122:3344:103::25 +* └─ changed: disposition +- crucible f231e4eb-3fc9-4964-9d71-2c41644852d9 in service fd00:1122:3344:103::24 modified ++ ├─ expunged fd00:1122:3344:103::24 +* └─ changed: disposition +- internal_ntp c62b87b6-b98d-4d22-ba4f-cee4499e2ba8 in service fd00:1122:3344:103::21 modified ++ ├─ expunged fd00:1122:3344:103::21 +* └─ changed: disposition +- nexus 6a70a233-1900-43c0-9c00-aa9d1f7adfbc in service fd00:1122:3344:103::22 modified ++ ├─ expunged fd00:1122:3344:103::22 +* └─ changed: disposition + +* sled fefcf4cf-f7e7-46b3-b629-058526ce440e: blueprint zones at generation: 2 -> 3 + crucible 0e2b035e-1de1-48af-8ac0-5316418e3de1 in service fd00:1122:3344:102::2a + crucible 4f8ce495-21dd-48a1-859c-80d34ce394ed in service fd00:1122:3344:102::23 + crucible 5c78756d-6182-4c27-a507-3419e8dbe76b in service fd00:1122:3344:102::28 + crucible a1ae92ac-e1f1-4654-ab54-5b75ba7c44d6 in service fd00:1122:3344:102::24 + crucible a308d3e1-118c-440a-947a-8b6ab7d833ab in service fd00:1122:3344:102::25 + crucible b7402110-d88f-4ca4-8391-4a2fda6ad271 in service fd00:1122:3344:102::29 + crucible b7ae596e-0c85-40b2-bb47-df9f76db3cca in service fd00:1122:3344:102::2b + crucible c552280f-ba02-4f8d-9049-bd269e6b7845 in service fd00:1122:3344:102::26 + crucible cf13b878-47f1-4ba0-b8c2-9f3e15f2ee87 in service fd00:1122:3344:102::2c + crucible e6d0df1f-9f98-4c5a-9540-8444d1185c7d in service fd00:1122:3344:102::27 + internal_ntp f68846ad-4619-4747-8293-a2b4aeeafc5b in service fd00:1122:3344:102::21 + nexus 99c6401d-9796-4ae1-bf0c-9a097cf21c33 in service fd00:1122:3344:102::22 ++ nexus c8851a11-a4f7-4b21-9281-6182fd15dc8d in service fd00:1122:3344:102::2d added + + METADATA: + internal DNS version: 1 (unchanged) + external DNS version: 1 (unchanged) diff --git a/nexus/reconfigurator/planning/tests/output/planner_decommissions_sleds_bp2.txt b/nexus/reconfigurator/planning/tests/output/planner_decommissions_sleds_bp2.txt new file mode 100644 index 0000000000..ca08bd5c33 --- /dev/null +++ b/nexus/reconfigurator/planning/tests/output/planner_decommissions_sleds_bp2.txt @@ -0,0 +1,56 @@ +blueprint 1ac2d88f-27dd-4506-8585-6b2be832528e +parent: 516e80a3-b362-4fac-bd3c-4559717120dd + + -------------------------------------------------------------------------------------------- + zone type zone ID disposition underlay IP + -------------------------------------------------------------------------------------------- + + sled a1b477db-b629-48eb-911d-1ccdafca75b9: blueprint zones at generation 3 + crucible 1e1ed0cc-1adc-410f-943a-d1a3107de619 expunged fd00:1122:3344:103::27 + crucible 2307bbed-02ba-493b-89e3-46585c74c8fc expunged fd00:1122:3344:103::28 + crucible 4e36b7ef-5684-4304-b7c3-3c31aaf83d4f expunged fd00:1122:3344:103::23 + crucible 603e629d-2599-400e-b879-4134d4cc426e expunged fd00:1122:3344:103::2c + crucible 9179d6dc-387d-424e-8d62-ed59b2c728f6 expunged fd00:1122:3344:103::2a + crucible c28d7b4b-a259-45ad-945d-f19ca3c6964c expunged fd00:1122:3344:103::29 + crucible e29998e7-9ed2-46b6-bb70-4118159fe07f expunged fd00:1122:3344:103::26 + crucible f06e91a1-0c17-4cca-adbc-1c9b67bdb11d expunged fd00:1122:3344:103::2b + crucible f11f5c60-1ac7-4630-9a3a-a9bc85c75203 expunged fd00:1122:3344:103::25 + crucible f231e4eb-3fc9-4964-9d71-2c41644852d9 expunged fd00:1122:3344:103::24 + internal_ntp c62b87b6-b98d-4d22-ba4f-cee4499e2ba8 expunged fd00:1122:3344:103::21 + nexus 6a70a233-1900-43c0-9c00-aa9d1f7adfbc expunged fd00:1122:3344:103::22 + + sled d67ce8f0-a691-4010-b414-420d82e80527: blueprint zones at generation 2 + crucible 15dbaa30-1539-49d6-970d-ba5962960f33 in service fd00:1122:3344:101::27 + crucible 1ec4cc7b-2f00-4d13-8176-3b9815533ae9 in service fd00:1122:3344:101::24 + crucible 2e65b765-5c41-4519-bf4e-e2a68569afc1 in service fd00:1122:3344:101::23 + crucible 3d4143df-e212-4774-9258-7d9b421fac2e in service fd00:1122:3344:101::25 + crucible 5d9d8fa7-8379-470b-90ba-fe84a3c45512 in service fd00:1122:3344:101::2a + crucible 70232a6d-6c9d-4fa6-a34d-9c73d940db33 in service fd00:1122:3344:101::28 + crucible 8567a616-a709-4c8c-a323-4474675dad5c in service fd00:1122:3344:101::2c + crucible 8b0b8623-930a-41af-9f9b-ca28b1b11139 in service fd00:1122:3344:101::29 + crucible cf87d2a3-d323-44a3-a87e-adc4ef6c75f4 in service fd00:1122:3344:101::2b + crucible eac6c0a0-baa5-4490-9cee-65198b7fbd9c in service fd00:1122:3344:101::26 + internal_ntp ad76d200-5675-444b-b19c-684689ff421f in service fd00:1122:3344:101::21 + nexus e9bf2525-5fa0-4c1b-b52d-481225083845 in service fd00:1122:3344:101::22 + + sled fefcf4cf-f7e7-46b3-b629-058526ce440e: blueprint zones at generation 3 + crucible 0e2b035e-1de1-48af-8ac0-5316418e3de1 in service fd00:1122:3344:102::2a + crucible 4f8ce495-21dd-48a1-859c-80d34ce394ed in service fd00:1122:3344:102::23 + crucible 5c78756d-6182-4c27-a507-3419e8dbe76b in service fd00:1122:3344:102::28 + crucible a1ae92ac-e1f1-4654-ab54-5b75ba7c44d6 in service fd00:1122:3344:102::24 + crucible a308d3e1-118c-440a-947a-8b6ab7d833ab in service fd00:1122:3344:102::25 + crucible b7402110-d88f-4ca4-8391-4a2fda6ad271 in service fd00:1122:3344:102::29 + crucible b7ae596e-0c85-40b2-bb47-df9f76db3cca in service fd00:1122:3344:102::2b + crucible c552280f-ba02-4f8d-9049-bd269e6b7845 in service fd00:1122:3344:102::26 + crucible cf13b878-47f1-4ba0-b8c2-9f3e15f2ee87 in service fd00:1122:3344:102::2c + crucible e6d0df1f-9f98-4c5a-9540-8444d1185c7d in service fd00:1122:3344:102::27 + internal_ntp f68846ad-4619-4747-8293-a2b4aeeafc5b in service fd00:1122:3344:102::21 + nexus 99c6401d-9796-4ae1-bf0c-9a097cf21c33 in service fd00:1122:3344:102::22 + nexus c8851a11-a4f7-4b21-9281-6182fd15dc8d in service fd00:1122:3344:102::2d + +METADATA: + created by: test_blueprint2 + created at: 1970-01-01T00:00:00.000Z + comment: sled a1b477db-b629-48eb-911d-1ccdafca75b9 (sled policy is expunged): 12 zones expunged, sled d67ce8f0-a691-4010-b414-420d82e80527: altered disks, sled fefcf4cf-f7e7-46b3-b629-058526ce440e: altered disks + internal DNS version: 1 + external DNS version: 1 diff --git a/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_1_2.txt b/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_1_2.txt index ecc5b125d9..a87243733f 100644 --- a/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_1_2.txt +++ b/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_1_2.txt @@ -21,6 +21,20 @@ to: blueprint 9f71f5d3-a272-4382-9154-6ea2e171a6c6 internal_ntp 7f4e9f9f-08f8-4d14-885d-e977c05525ad in service fd00:1122:3344:105::21 nexus 6dff7633-66bb-4924-a6ff-2c896e66964b in service fd00:1122:3344:105::22 + sled 68d24ac5-f341-49ea-a92a-0381b52ab387: blueprint zones at generation 2 + crucible 3b3c14b6-a8e2-4054-a577-8d96cb576230 expunged fd00:1122:3344:102::2c + crucible 47a87c6e-ef45-4d52-9a3e-69cdd96737cc expunged fd00:1122:3344:102::23 + crucible 6464d025-4652-4948-919e-740bec5699b1 expunged fd00:1122:3344:102::24 + crucible 6939ce48-b17c-4616-b176-8a419a7697be expunged fd00:1122:3344:102::29 + crucible 878dfddd-3113-4197-a3ea-e0d4dbe9b476 expunged fd00:1122:3344:102::25 + crucible 8d4d2b28-82bb-4e36-80da-1408d8c35d82 expunged fd00:1122:3344:102::2b + crucible 9fd52961-426f-4e62-a644-b70871103fca expunged fd00:1122:3344:102::26 + crucible b44cdbc0-0ce0-46eb-8b21-a09e113aa1d0 expunged fd00:1122:3344:102::27 + crucible b6b759d0-f60d-42b7-bbbc-9d61c9e895a9 expunged fd00:1122:3344:102::28 + crucible c407795c-6c8b-428e-8ab8-b962913c447f expunged fd00:1122:3344:102::2a + internal_ntp f3f2e4f3-0985-4ef6-8336-ce479382d05d expunged fd00:1122:3344:102::21 + nexus 01d58626-e1b0-480f-96be-ac784863c7dc expunged fd00:1122:3344:102::22 + MODIFIED SLEDS: * sled 48d95fef-bc9f-4f50-9a53-1e075836291d: blueprint zones at generation: 2 -> 3 @@ -61,44 +75,6 @@ to: blueprint 9f71f5d3-a272-4382-9154-6ea2e171a6c6 + ├─ expunged fd00:1122:3344:103::22 * └─ changed: disposition -* sled 68d24ac5-f341-49ea-a92a-0381b52ab387: blueprint zones at generation: 2 -> 3 -- crucible 3b3c14b6-a8e2-4054-a577-8d96cb576230 in service fd00:1122:3344:102::2c modified -+ ├─ expunged fd00:1122:3344:102::2c -* └─ changed: disposition -- crucible 47a87c6e-ef45-4d52-9a3e-69cdd96737cc in service fd00:1122:3344:102::23 modified -+ ├─ expunged fd00:1122:3344:102::23 -* └─ changed: disposition -- crucible 6464d025-4652-4948-919e-740bec5699b1 in service fd00:1122:3344:102::24 modified -+ ├─ expunged fd00:1122:3344:102::24 -* └─ changed: disposition -- crucible 6939ce48-b17c-4616-b176-8a419a7697be in service fd00:1122:3344:102::29 modified -+ ├─ expunged fd00:1122:3344:102::29 -* └─ changed: disposition -- crucible 878dfddd-3113-4197-a3ea-e0d4dbe9b476 in service fd00:1122:3344:102::25 modified -+ ├─ expunged fd00:1122:3344:102::25 -* └─ changed: disposition -- crucible 8d4d2b28-82bb-4e36-80da-1408d8c35d82 in service fd00:1122:3344:102::2b modified -+ ├─ expunged fd00:1122:3344:102::2b -* └─ changed: disposition -- crucible 9fd52961-426f-4e62-a644-b70871103fca in service fd00:1122:3344:102::26 modified -+ ├─ expunged fd00:1122:3344:102::26 -* └─ changed: disposition -- crucible b44cdbc0-0ce0-46eb-8b21-a09e113aa1d0 in service fd00:1122:3344:102::27 modified -+ ├─ expunged fd00:1122:3344:102::27 -* └─ changed: disposition -- crucible b6b759d0-f60d-42b7-bbbc-9d61c9e895a9 in service fd00:1122:3344:102::28 modified -+ ├─ expunged fd00:1122:3344:102::28 -* └─ changed: disposition -- crucible c407795c-6c8b-428e-8ab8-b962913c447f in service fd00:1122:3344:102::2a modified -+ ├─ expunged fd00:1122:3344:102::2a -* └─ changed: disposition -- internal_ntp f3f2e4f3-0985-4ef6-8336-ce479382d05d in service fd00:1122:3344:102::21 modified -+ ├─ expunged fd00:1122:3344:102::21 -* └─ changed: disposition -- nexus 01d58626-e1b0-480f-96be-ac784863c7dc in service fd00:1122:3344:102::22 modified -+ ├─ expunged fd00:1122:3344:102::22 -* └─ changed: disposition - * sled 75bc286f-2b4b-482c-9431-59272af529da: blueprint zones at generation: 2 -> 3 crucible 15bb9def-69b8-4d2e-b04f-9fee1143387c in service fd00:1122:3344:104::25 crucible 23a8fa2b-ef3e-4017-a43f-f7a83953bd7c in service fd00:1122:3344:104::2c diff --git a/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_2_2a.txt b/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_2_2a.txt index 00ca05b4b8..ec6c505c87 100644 --- a/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_2_2a.txt +++ b/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_2_2a.txt @@ -43,7 +43,7 @@ to: blueprint 9f71f5d3-a272-4382-9154-6ea2e171a6c6 REMOVED SLEDS: -- sled 68d24ac5-f341-49ea-a92a-0381b52ab387: blueprint zones at generation 3 +- sled 68d24ac5-f341-49ea-a92a-0381b52ab387: blueprint zones at generation 2 - crucible 3b3c14b6-a8e2-4054-a577-8d96cb576230 expunged fd00:1122:3344:102::2c removed - crucible 47a87c6e-ef45-4d52-9a3e-69cdd96737cc expunged fd00:1122:3344:102::23 removed - crucible 6464d025-4652-4948-919e-740bec5699b1 expunged fd00:1122:3344:102::24 removed diff --git a/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_bp2.txt b/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_bp2.txt index 623bf0a756..294a12f77a 100644 --- a/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_bp2.txt +++ b/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_bp2.txt @@ -33,7 +33,7 @@ parent: 4d4e6c38-cd95-4c4e-8f45-6af4d686964b internal_ntp 67d913e0-0005-4599-9b28-0abbf6cc2916 expunged fd00:1122:3344:103::21 nexus 2aa0ea4f-3561-4989-a98c-9ab7d9a240fb expunged fd00:1122:3344:103::22 - sled 68d24ac5-f341-49ea-a92a-0381b52ab387: blueprint zones at generation 3 + sled 68d24ac5-f341-49ea-a92a-0381b52ab387: blueprint zones at generation 2 crucible 3b3c14b6-a8e2-4054-a577-8d96cb576230 expunged fd00:1122:3344:102::2c crucible 47a87c6e-ef45-4d52-9a3e-69cdd96737cc expunged fd00:1122:3344:102::23 crucible 6464d025-4652-4948-919e-740bec5699b1 expunged fd00:1122:3344:102::24 @@ -82,8 +82,8 @@ parent: 4d4e6c38-cd95-4c4e-8f45-6af4d686964b nexus c26b3bda-5561-44a1-a69f-22103fe209a1 in service fd00:1122:3344:101::2f METADATA: - created by: test_blueprint2 - created at: 1970-01-01T00:00:00.000Z - comment: sled 48d95fef-bc9f-4f50-9a53-1e075836291d (sled policy is expunged): 12 zones expunged, sled 68d24ac5-f341-49ea-a92a-0381b52ab387 (sled state is decommissioned): 12 zones expunged, sled 2d1cb4f2-cf44-40fc-b118-85036eb732a9: altered disks, sled 75bc286f-2b4b-482c-9431-59272af529da: altered disks, sled affab35f-600a-4109-8ea0-34a067a4e0bc: altered disks - internal DNS version: 1 - external DNS version: 1 + created by: test_blueprint2 + created at: 1970-01-01T00:00:00.000Z + comment: sled 48d95fef-bc9f-4f50-9a53-1e075836291d (sled policy is expunged): 12 zones expunged, sled 2d1cb4f2-cf44-40fc-b118-85036eb732a9: altered disks, sled 75bc286f-2b4b-482c-9431-59272af529da: altered disks, sled affab35f-600a-4109-8ea0-34a067a4e0bc: altered disks + internal DNS version: 1 + external DNS version: 1 diff --git a/nexus/reconfigurator/preparation/src/lib.rs b/nexus/reconfigurator/preparation/src/lib.rs index 6346db47bd..305644bc93 100644 --- a/nexus/reconfigurator/preparation/src/lib.rs +++ b/nexus/reconfigurator/preparation/src/lib.rs @@ -182,7 +182,7 @@ pub async fn reconfigurator_state_load( ) -> Result { opctx.check_complex_operations_allowed()?; let sled_rows = datastore - .sled_list_all_batched(opctx, SledFilter::All) + .sled_list_all_batched(opctx, SledFilter::Commissioned) .await .context("listing sleds")?; let zpool_rows = datastore diff --git a/nexus/src/app/deployment.rs b/nexus/src/app/deployment.rs index b89fdb5a9c..98f1f84744 100644 --- a/nexus/src/app/deployment.rs +++ b/nexus/src/app/deployment.rs @@ -130,8 +130,9 @@ impl super::Nexus { let creator = self.id.to_string(); let datastore = self.datastore(); - let sled_rows = - datastore.sled_list_all_batched(opctx, SledFilter::All).await?; + let sled_rows = datastore + .sled_list_all_batched(opctx, SledFilter::Commissioned) + .await?; let zpool_rows = datastore.zpool_list_all_external_batched(opctx).await?; let ip_pool_range_rows = { diff --git a/nexus/types/src/deployment.rs b/nexus/types/src/deployment.rs index fe013b8ca4..b7b5bf6aac 100644 --- a/nexus/types/src/deployment.rs +++ b/nexus/types/src/deployment.rs @@ -106,19 +106,24 @@ pub struct Blueprint { /// unique identifier for this blueprint pub id: Uuid, - /// A map of sled id -> zones deployed on each sled, along with the - /// [`BlueprintZoneDisposition`] for each zone. + /// A map of sled id -> desired state of the sled. /// /// A sled is considered part of the control plane cluster iff it has an /// entry in this map. + pub sled_state: BTreeMap, + + /// A map of sled id -> zones deployed on each sled, along with the + /// [`BlueprintZoneDisposition`] for each zone. + /// + /// Unlike `sled_state`, this map may contain entries for sleds that are no + /// longer a part of the control plane cluster (e.g., sleds that have been + /// decommissioned, but still have expunged zones where cleanup has not yet + /// completed). pub blueprint_zones: BTreeMap, /// A map of sled id -> disks in use on each sled. pub blueprint_disks: BTreeMap, - /// A map of sled id -> desired state of the sled. - pub sled_state: BTreeMap, - /// which blueprint this blueprint is based on pub parent_blueprint_id: Option, diff --git a/nexus/types/src/deployment/planning_input.rs b/nexus/types/src/deployment/planning_input.rs index b932b6000e..1975cfaae0 100644 --- a/nexus/types/src/deployment/planning_input.rs +++ b/nexus/types/src/deployment/planning_input.rs @@ -244,8 +244,11 @@ pub enum SledFilter { // --- // Prefer to keep this list in alphabetical order. // --- - /// All sleds. - All, + /// All sleds that are currently part of the control plane cluster. + /// + /// Intentionally omits decommissioned sleds, but is otherwise the filter to + /// fetch "all sleds regardless of current policy or state". + Commissioned, /// Sleds that are eligible for discretionary services. Discretionary, @@ -308,7 +311,7 @@ impl SledPolicy { SledPolicy::InService { provision_policy: SledProvisionPolicy::Provisionable, } => match filter { - SledFilter::All => true, + SledFilter::Commissioned => true, SledFilter::Discretionary => true, SledFilter::InService => true, SledFilter::QueryDuringInventory => true, @@ -318,7 +321,7 @@ impl SledPolicy { SledPolicy::InService { provision_policy: SledProvisionPolicy::NonProvisionable, } => match filter { - SledFilter::All => true, + SledFilter::Commissioned => true, SledFilter::Discretionary => false, SledFilter::InService => true, SledFilter::QueryDuringInventory => true, @@ -326,7 +329,7 @@ impl SledPolicy { SledFilter::VpcFirewall => true, }, SledPolicy::Expunged => match filter { - SledFilter::All => true, + SledFilter::Commissioned => true, SledFilter::Discretionary => false, SledFilter::InService => false, SledFilter::QueryDuringInventory => false, @@ -356,7 +359,7 @@ impl SledState { // See `SledFilter::matches` above for some notes. match self { SledState::Active => match filter { - SledFilter::All => true, + SledFilter::Commissioned => true, SledFilter::Discretionary => true, SledFilter::InService => true, SledFilter::QueryDuringInventory => true, @@ -364,7 +367,7 @@ impl SledState { SledFilter::VpcFirewall => true, }, SledState::Decommissioned => match filter { - SledFilter::All => true, + SledFilter::Commissioned => false, SledFilter::Discretionary => false, SledFilter::InService => false, SledFilter::QueryDuringInventory => false, diff --git a/openapi/nexus-internal.json b/openapi/nexus-internal.json index a5e0d708eb..d694e5ee0d 100644 --- a/openapi/nexus-internal.json +++ b/openapi/nexus-internal.json @@ -1744,7 +1744,7 @@ } }, "blueprint_zones": { - "description": "A map of sled id -> zones deployed on each sled, along with the [`BlueprintZoneDisposition`] for each zone.\n\nA sled is considered part of the control plane cluster iff it has an entry in this map.", + "description": "A map of sled id -> zones deployed on each sled, along with the [`BlueprintZoneDisposition`] for each zone.\n\nUnlike `sled_state`, this map may contain entries for sleds that are no longer a part of the control plane cluster (e.g., sleds that have been decommissioned, but still have expunged zones where cleanup has not yet completed).", "type": "object", "additionalProperties": { "$ref": "#/components/schemas/BlueprintZonesConfig" @@ -1786,7 +1786,7 @@ "format": "uuid" }, "sled_state": { - "description": "A map of sled id -> desired state of the sled.", + "description": "A map of sled id -> desired state of the sled.\n\nA sled is considered part of the control plane cluster iff it has an entry in this map.", "type": "object", "additionalProperties": { "$ref": "#/components/schemas/SledState"