diff --git a/nexus/db-queries/src/db/datastore/instance.rs b/nexus/db-queries/src/db/datastore/instance.rs index 9fb94f043e7..b55243321db 100644 --- a/nexus/db-queries/src/db/datastore/instance.rs +++ b/nexus/db-queries/src/db/datastore/instance.rs @@ -563,10 +563,10 @@ impl DataStore { }) } - /// Lists all instances on in-service sleds with active Propolis VMM - /// processes, returning the instance along with the VMM on which it's - /// running, the sled on which the VMM is running, and the project that owns - /// the instance. + /// Lists all instances on sleds with active Propolis VMM processes, + /// returning the instance along with the VMM on which it's running, the + /// sled on which the VMM is running, and the project that owns the + /// instance. /// /// The query performed by this function is paginated by the sled's UUID. pub async fn instance_and_vmm_list_by_sled_agent( @@ -583,7 +583,7 @@ impl DataStore { let result = paginated(sled_dsl::sled, sled_dsl::id, pagparams) .filter(sled_dsl::time_deleted.is_null()) - .sled_filter(SledFilter::InService) + .sled_filter(SledFilter::CouldBeRunningInstances) .inner_join( vmm_dsl::vmm .on(vmm_dsl::sled_id @@ -601,7 +601,7 @@ impl DataStore { ), ), ) - .sled_filter(SledFilter::InService) + .sled_filter(SledFilter::CouldBeRunningInstances) .select(( Sled::as_select(), Instance::as_select(), diff --git a/nexus/db-queries/src/db/datastore/physical_disk.rs b/nexus/db-queries/src/db/datastore/physical_disk.rs index e51d59075e8..1a9ca254f58 100644 --- a/nexus/db-queries/src/db/datastore/physical_disk.rs +++ b/nexus/db-queries/src/db/datastore/physical_disk.rs @@ -56,18 +56,28 @@ impl DataStore { let zpool = zpool.clone(); async move { // Verify that the sled into which we are inserting the disk - // and zpool pair is still in-service. - // - // Although the "physical_disk_insert" and "zpool_insert" + // and zpool pair is still eligible to host zpools; e.g., + // although the "physical_disk_insert" and "zpool_insert" // functions below check that the Sled hasn't been deleted, // they do not currently check that the Sled has not been // expunged. - Self::check_sled_in_service_on_connection( - &conn, - disk.sled_id, - ) - .await - .map_err(|txn_error| txn_error.into_diesel(&err))?; + let valid_sled = + Self::check_sled_matches_filter_on_connection( + &conn, + disk.sled_id, + SledFilter::EligibleForZpools, + ) + .await + .map_err(|txn_error| txn_error.into_diesel(&err))?; + if !valid_sled { + let txn_error = TransactionError::CustomError( + Error::internal_error(&format!( + "Sled {} is not in service", + disk.sled_id + )), + ); + return Err(txn_error.into_diesel(&err)); + } Self::physical_disk_insert_on_connection( &conn, opctx, disk, @@ -202,9 +212,9 @@ impl DataStore { use db::schema::sled::dsl as sled_dsl; sled_dsl::sled - // If the sled is not in-service, drop the list immediately. + // If the sled is not eligible for zpools, drop the list immediately. .filter(sled_dsl::time_deleted.is_null()) - .sled_filter(SledFilter::InService) + .sled_filter(SledFilter::EligibleForZpools) // Look up all inventory physical disks that could match this sled .inner_join( inv_physical_disk_dsl::inv_physical_disk.on( diff --git a/nexus/db-queries/src/db/datastore/sled.rs b/nexus/db-queries/src/db/datastore/sled.rs index 381b25dc17d..2e777348b9e 100644 --- a/nexus/db-queries/src/db/datastore/sled.rs +++ b/nexus/db-queries/src/db/datastore/sled.rs @@ -38,7 +38,6 @@ use omicron_common::api::external::DeleteResult; use omicron_common::api::external::Error; use omicron_common::api::external::ListResultVec; use omicron_common::api::external::ResourceType; -use omicron_common::bail_unless; use std::fmt; use strum::IntoEnumIterator; use thiserror::Error; @@ -99,30 +98,25 @@ impl DataStore { Ok((sled, was_modified)) } - /// Confirms that a sled exists and is in-service. + /// Confirms that a sled exists and matches the given [`SledFilter`]. /// /// This function may be called from a transaction context. - pub async fn check_sled_in_service_on_connection( + pub async fn check_sled_matches_filter_on_connection( conn: &async_bb8_diesel::Connection, sled_id: Uuid, - ) -> Result<(), TransactionError> { + sled_filter: SledFilter, + ) -> Result> { use db::schema::sled::dsl; - let sled_exists_and_in_service = diesel::select(diesel::dsl::exists( + let sled_exists_and_matches = diesel::select(diesel::dsl::exists( dsl::sled .filter(dsl::time_deleted.is_null()) .filter(dsl::id.eq(sled_id)) - .sled_filter(SledFilter::InService), + .sled_filter(sled_filter), )) .get_result_async::(conn) .await?; - bail_unless!( - sled_exists_and_in_service, - "Sled {} is not in service", - sled_id, - ); - - Ok(()) + Ok(sled_exists_and_matches) } pub async fn sled_list( @@ -1285,9 +1279,7 @@ mod test { ( // In-service and active sleds can be marked as expunged. Before::new( - predicate::in_iter(SledPolicy::all_matching( - SledFilter::InService, - )), + predicate::in_iter(SledPolicy::all_in_service()), predicate::eq(SledState::Active), ), SledTransition::Policy(SledPolicy::Expunged), @@ -1296,9 +1288,7 @@ mod test { // The provision policy of in-service sleds can be changed, or // kept the same (1 of 2). Before::new( - predicate::in_iter(SledPolicy::all_matching( - SledFilter::InService, - )), + predicate::in_iter(SledPolicy::all_in_service()), predicate::eq(SledState::Active), ), SledTransition::Policy(SledPolicy::InService { @@ -1308,9 +1298,7 @@ mod test { ( // (2 of 2) Before::new( - predicate::in_iter(SledPolicy::all_matching( - SledFilter::InService, - )), + predicate::in_iter(SledPolicy::all_in_service()), predicate::eq(SledState::Active), ), SledTransition::Policy(SledPolicy::InService { diff --git a/nexus/reconfigurator/execution/src/lib.rs b/nexus/reconfigurator/execution/src/lib.rs index 0e9ab394f18..2d31090dd00 100644 --- a/nexus/reconfigurator/execution/src/lib.rs +++ b/nexus/reconfigurator/execution/src/lib.rs @@ -144,7 +144,7 @@ where .map_err(|err| vec![err])?; let sleds_by_id: BTreeMap = datastore - .sled_list_all_batched(&opctx, SledFilter::InService) + .sled_list_all_batched(&opctx, SledFilter::BlueprintExecutionTarget) .await .context("listing all sleds") .map_err(|e| vec![e])? diff --git a/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs b/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs index 4177d4884ff..012cfde1894 100644 --- a/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs +++ b/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs @@ -354,7 +354,7 @@ impl<'a> BlueprintBuilder<'a> { .into_zones_map(self.input.all_sled_ids(SledFilter::Commissioned)); let blueprint_disks = self .disks - .into_disks_map(self.input.all_sled_ids(SledFilter::InService)); + .into_disks_map(self.input.all_sled_ids(SledFilter::Commissioned)); Blueprint { id: self.rng.blueprint_rng.next(), blueprint_zones, @@ -1641,7 +1641,7 @@ pub mod test { assert!(builder.disks.parent_disks.is_empty()); for (sled_id, sled_resources) in - input.all_sled_resources(SledFilter::InService) + input.all_sled_resources(SledFilter::Commissioned) { assert_eq!( builder @@ -2067,7 +2067,7 @@ pub mod test { // Pick an arbitrary sled. let (target_sled_id, sled_resources) = input - .all_sled_resources(SledFilter::InService) + .all_sled_resources(SledFilter::Commissioned) .next() .expect("at least one sled"); diff --git a/nexus/reconfigurator/planning/src/planner.rs b/nexus/reconfigurator/planning/src/planner.rs index 08c25c20fd9..ba2d84c9e05 100644 --- a/nexus/reconfigurator/planning/src/planner.rs +++ b/nexus/reconfigurator/planning/src/planner.rs @@ -226,7 +226,7 @@ impl<'a> Planner<'a> { let mut sleds_waiting_for_ntp_zone = BTreeSet::new(); for (sled_id, sled_resources) in - self.input.all_sled_resources(SledFilter::InService) + self.input.all_sled_resources(SledFilter::MandatoryServices) { // First, we need to ensure that sleds are using their expected // disks. This is necessary before we can allocate any zones. @@ -418,7 +418,10 @@ impl<'a> Planner<'a> { // services, but will not include sleds that have been expunged or // decommissioned. let mut num_existing_kind_zones = 0; - for sled_id in self.input.all_sled_ids(SledFilter::InService) { + for sled_id in self + .input + .all_sled_ids(SledFilter::CouldBeRunningDiscretionaryServices) + { let num_zones_of_kind = self .blueprint .sled_num_zones_of_kind(sled_id, zone_kind.into()); diff --git a/nexus/src/app/rack.rs b/nexus/src/app/rack.rs index ee3818f40c8..be2d82ad3f0 100644 --- a/nexus/src/app/rack.rs +++ b/nexus/src/app/rack.rs @@ -763,10 +763,10 @@ impl super::Nexus { limit: NonZeroU32::new(32).unwrap(), }; - debug!(self.log, "Listing sleds"); + debug!(self.log, "Listing commissioned sleds"); let sleds = self .db_datastore - .sled_list(opctx, &pagparams, SledFilter::InService) + .sled_list(opctx, &pagparams, SledFilter::Commissioned) .await?; let mut uninitialized_sleds: Vec = collection @@ -897,7 +897,15 @@ impl super::Nexus { opctx: &OpContext, ) -> Result { let addr = self - .sled_list(opctx, &DataPageParams::max_page()) + .sled_list( + opctx, + &DataPageParams { + marker: None, + direction: dropshot::PaginationOrder::Ascending, + limit: NonZeroU32::new(1).expect("1 is nonzero"), + }, + SledFilter::SledAgentAvailable, + ) .await? .get(0) .ok_or(Error::InternalError { diff --git a/nexus/src/app/sled.rs b/nexus/src/app/sled.rs index fd5341ae80c..d592635be9f 100644 --- a/nexus/src/app/sled.rs +++ b/nexus/src/app/sled.rs @@ -116,10 +116,9 @@ impl super::Nexus { &self, opctx: &OpContext, pagparams: &DataPageParams<'_, Uuid>, + sled_filter: SledFilter, ) -> ListResultVec { - self.db_datastore - .sled_list(&opctx, pagparams, SledFilter::InService) - .await + self.db_datastore.sled_list(&opctx, pagparams, sled_filter).await } pub async fn sled_client( diff --git a/nexus/src/external_api/http_entrypoints.rs b/nexus/src/external_api/http_entrypoints.rs index 2678768b482..a912f4f67a3 100644 --- a/nexus/src/external_api/http_entrypoints.rs +++ b/nexus/src/external_api/http_entrypoints.rs @@ -40,6 +40,7 @@ use nexus_db_queries::db::lookup::ImageLookup; use nexus_db_queries::db::lookup::ImageParentLookup; use nexus_db_queries::db::model::Name; use nexus_db_queries::{authz, db::datastore::ProbeInfo}; +use nexus_types::deployment::SledFilter; use nexus_types::external_api::shared::BfdStatus; use omicron_common::api::external::http_pagination::marker_for_name; use omicron_common::api::external::http_pagination::marker_for_name_or_id; @@ -5961,7 +5962,11 @@ async fn sled_list( let query = query_params.into_inner(); let opctx = crate::context::op_context_for_external_api(&rqctx).await?; let sleds = nexus - .sled_list(&opctx, &data_page_params_for(&rqctx, &query)?) + .sled_list( + &opctx, + &data_page_params_for(&rqctx, &query)?, + SledFilter::OperatorVisible, + ) .await? .into_iter() .map(|s| s.into()) diff --git a/nexus/types/src/deployment/planning_input.rs b/nexus/types/src/deployment/planning_input.rs index c5e377aeb9b..7a5ad0aa48b 100644 --- a/nexus/types/src/deployment/planning_input.rs +++ b/nexus/types/src/deployment/planning_input.rs @@ -457,12 +457,31 @@ pub enum SledFilter { // --- // Prefer to keep this list in alphabetical order. // --- + /// All sleds that the blueprint execution background task should attempt to + /// configure. + /// + /// In general, this is "all commissioned, non-expunged sleds", as blueprint + /// execution will attempt to manage any commissioned sleds present in the + /// rack. + BlueprintExecutionTarget, + /// All sleds that are currently part of the control plane cluster. /// /// Intentionally omits decommissioned sleds, but is otherwise the filter to /// fetch "all sleds regardless of current policy or state". Commissioned, + /// Sleds on which discretionary services may be running. + /// + /// Note that this is a superset of `Discretionary` (eligible for + /// discretionary control plane services): it includes all sleds eligible + /// for discretionary services _and_ any sleds that are not eligible for new + /// services but might still be running existing services. + CouldBeRunningDiscretionaryServices, + + /// Sleds on which instances may be running. + CouldBeRunningInstances, + /// All sleds that were previously part of the control plane cluster but /// have been decommissioned. /// @@ -474,9 +493,19 @@ pub enum SledFilter { /// Sleds that are eligible for discretionary services. Discretionary, - /// Sleds that are in service (even if they might not be eligible for - /// discretionary services). - InService, + /// Sleds that are eligible to host zpools. + EligibleForZpools, + + /// Sleds that are eligible for mandatory services (e.g., NTP and a Crucible + /// zone for each U.2). + MandatoryServices, + + /// Sleds that should be reported to an operator when they request the list + /// of sleds. + /// + /// This will not include uninitialized sleds, which must be listed + /// separately, nor will it include expunged sleds. + OperatorVisible, /// Sleds whose sled agents should be queried for inventory QueryDuringInventory, @@ -484,6 +513,10 @@ pub enum SledFilter { /// Sleds on which reservations can be created. ReservationCreate, + /// Sleds on which we expect to be able to reach sled-agent on the underlay + /// network. + SledAgentAvailable, + /// Sleds which should be sent OPTE V2P mappings. VpcRouting, @@ -535,34 +568,52 @@ impl SledPolicy { SledPolicy::InService { provision_policy: SledProvisionPolicy::Provisionable, } => match filter { + SledFilter::BlueprintExecutionTarget => true, SledFilter::Commissioned => true, + SledFilter::CouldBeRunningDiscretionaryServices => true, + SledFilter::CouldBeRunningInstances => true, SledFilter::Decommissioned => false, SledFilter::Discretionary => true, - SledFilter::InService => true, + SledFilter::EligibleForZpools => true, + SledFilter::MandatoryServices => true, + SledFilter::OperatorVisible => true, SledFilter::QueryDuringInventory => true, SledFilter::ReservationCreate => true, + SledFilter::SledAgentAvailable => true, SledFilter::VpcRouting => true, SledFilter::VpcFirewall => true, }, SledPolicy::InService { provision_policy: SledProvisionPolicy::NonProvisionable, } => match filter { + SledFilter::BlueprintExecutionTarget => true, SledFilter::Commissioned => true, + SledFilter::CouldBeRunningDiscretionaryServices => true, + SledFilter::CouldBeRunningInstances => true, SledFilter::Decommissioned => false, SledFilter::Discretionary => false, - SledFilter::InService => true, + SledFilter::EligibleForZpools => true, + SledFilter::MandatoryServices => true, + SledFilter::OperatorVisible => true, SledFilter::QueryDuringInventory => true, SledFilter::ReservationCreate => false, + SledFilter::SledAgentAvailable => true, SledFilter::VpcRouting => true, SledFilter::VpcFirewall => true, }, SledPolicy::Expunged => match filter { + SledFilter::BlueprintExecutionTarget => false, SledFilter::Commissioned => true, + SledFilter::CouldBeRunningDiscretionaryServices => false, + SledFilter::CouldBeRunningInstances => false, SledFilter::Decommissioned => true, SledFilter::Discretionary => false, - SledFilter::InService => false, + SledFilter::EligibleForZpools => false, + SledFilter::MandatoryServices => false, + SledFilter::OperatorVisible => false, SledFilter::QueryDuringInventory => false, SledFilter::ReservationCreate => false, + SledFilter::SledAgentAvailable => false, SledFilter::VpcRouting => false, SledFilter::VpcFirewall => false, }, @@ -577,6 +628,14 @@ impl SledPolicy { pub fn all_matching(filter: SledFilter) -> impl Iterator { Self::iter().filter(move |policy| policy.matches(filter)) } + + /// Returns all sub-policies of `SledPolicy::InService { .. }`. + pub fn all_in_service() -> impl Iterator { + Self::iter().filter(|policy| match policy { + SledPolicy::InService { .. } => true, + SledPolicy::Expunged => false, + }) + } } impl SledState { @@ -589,22 +648,34 @@ impl SledState { // See `SledFilter::matches` above for some notes. match self { SledState::Active => match filter { + SledFilter::BlueprintExecutionTarget => true, SledFilter::Commissioned => true, + SledFilter::CouldBeRunningDiscretionaryServices => true, + SledFilter::CouldBeRunningInstances => true, SledFilter::Decommissioned => false, SledFilter::Discretionary => true, - SledFilter::InService => true, + SledFilter::EligibleForZpools => true, + SledFilter::MandatoryServices => true, + SledFilter::OperatorVisible => true, SledFilter::QueryDuringInventory => true, SledFilter::ReservationCreate => true, + SledFilter::SledAgentAvailable => true, SledFilter::VpcRouting => true, SledFilter::VpcFirewall => true, }, SledState::Decommissioned => match filter { + SledFilter::BlueprintExecutionTarget => false, SledFilter::Commissioned => false, + SledFilter::CouldBeRunningDiscretionaryServices => false, + SledFilter::CouldBeRunningInstances => false, SledFilter::Decommissioned => true, SledFilter::Discretionary => false, - SledFilter::InService => false, + SledFilter::EligibleForZpools => false, + SledFilter::MandatoryServices => false, + SledFilter::OperatorVisible => false, SledFilter::QueryDuringInventory => false, SledFilter::ReservationCreate => false, + SledFilter::SledAgentAvailable => false, SledFilter::VpcRouting => false, SledFilter::VpcFirewall => false, },