Skip to content

Commit

Permalink
blueprint planner: skip non-provisionable sleds (#5003)
Browse files Browse the repository at this point in the history
  • Loading branch information
jgallagher authored Feb 8, 2024
1 parent 7ae7461 commit b941323
Show file tree
Hide file tree
Showing 5 changed files with 129 additions and 3 deletions.
7 changes: 6 additions & 1 deletion nexus/db-queries/src/db/datastore/deployment.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1053,6 +1053,7 @@ mod tests {
use nexus_test_utils::db::test_setup_database;
use nexus_types::deployment::Policy;
use nexus_types::deployment::SledResources;
use nexus_types::external_api::views::SledProvisionState;
use nexus_types::inventory::Collection;
use omicron_common::address::Ipv6Subnet;
use omicron_test_utils::dev;
Expand Down Expand Up @@ -1115,7 +1116,11 @@ mod tests {
})
.collect();
let ip = ip.unwrap_or_else(|| thread_rng().gen::<u128>().into());
SledResources { zpools, subnet: Ipv6Subnet::new(ip) }
SledResources {
provision_state: SledProvisionState::Provisionable,
zpools,
subnet: Ipv6Subnet::new(ip),
}
}

// Create a `Policy` that contains all the sleds found in `collection`
Expand Down
10 changes: 9 additions & 1 deletion nexus/deployment/src/blueprint_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -718,6 +718,7 @@ impl<'a> BlueprintZones<'a> {
#[cfg(test)]
pub mod test {
use super::*;
use nexus_types::external_api::views::SledProvisionState;
use omicron_common::address::IpRange;
use omicron_common::address::Ipv4Range;
use omicron_common::address::Ipv6Subnet;
Expand Down Expand Up @@ -890,7 +891,14 @@ pub mod test {
.collect();

let subnet = Ipv6Subnet::<SLED_PREFIX>::new(sled_ip);
policy.sleds.insert(sled_id, SledResources { zpools, subnet });
policy.sleds.insert(
sled_id,
SledResources {
provision_state: SledProvisionState::Provisionable,
zpools,
subnet,
},
);
sled_ip
}

Expand Down
105 changes: 105 additions & 0 deletions nexus/deployment/src/planner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ use crate::blueprint_builder::EnsureMultiple;
use crate::blueprint_builder::Error;
use nexus_types::deployment::Blueprint;
use nexus_types::deployment::Policy;
use nexus_types::external_api::views::SledProvisionState;
use nexus_types::inventory::Collection;
use slog::warn;
use slog::{info, Logger};
Expand Down Expand Up @@ -162,6 +163,20 @@ impl<'a> Planner<'a> {
}
}

// We've now placed all the services that should always exist on all
// sleds. Before moving on to make decisions about placing services that
// are _not_ present on all sleds, check the provision state of all our
// sleds so we can avoid any non-provisionable sleds under the
// assumption that there is something amiss with them.
sleds_ineligible_for_services.extend(
self.policy.sleds.iter().filter_map(|(sled_id, sled_info)| {
match sled_info.provision_state {
SledProvisionState::Provisionable => None,
SledProvisionState::NonProvisionable => Some(*sled_id),
}
}),
);

self.ensure_correct_number_of_nexus_zones(
&sleds_ineligible_for_services,
)?;
Expand Down Expand Up @@ -293,6 +308,7 @@ mod test {
use crate::blueprint_builder::test::verify_blueprint;
use crate::blueprint_builder::BlueprintBuilder;
use nexus_inventory::now_db_precision;
use nexus_types::external_api::views::SledProvisionState;
use nexus_types::inventory::OmicronZoneType;
use nexus_types::inventory::OmicronZonesFound;
use omicron_common::api::external::Generation;
Expand Down Expand Up @@ -627,4 +643,93 @@ mod test {

logctx.cleanup_successful();
}

/// Check that the planner will skip non-provisionable sleds when allocating
/// extra Nexus zones
#[test]
fn test_nexus_allocation_skips_nonprovisionable_sleds() {
let logctx = test_setup_log(
"planner_nexus_allocation_skips_nonprovisionable_sleds",
);

// Use our example inventory collection as a starting point.
let (collection, mut policy) = example();

// Build the initial blueprint.
let blueprint1 = BlueprintBuilder::build_initial_from_collection(
&collection,
&policy,
"the_test",
)
.expect("failed to create initial blueprint");

// This blueprint should only have 3 Nexus zones: one on each sled.
assert_eq!(blueprint1.omicron_zones.len(), 3);
for sled_config in blueprint1.omicron_zones.values() {
assert_eq!(
sled_config
.zones
.iter()
.filter(|z| z.zone_type.is_nexus())
.count(),
1
);
}

// Arbitrarily choose one of the sleds and mark it non-provisionable.
let nonprovisionable_sled_id = {
let (sled_id, resources) =
policy.sleds.iter_mut().next().expect("no sleds");
resources.provision_state = SledProvisionState::NonProvisionable;
*sled_id
};

// Now run the planner with a high number of target Nexus zones.
policy.target_nexus_zone_count = 14;
let blueprint2 = Planner::new_based_on(
logctx.log.clone(),
&blueprint1,
&policy,
"add more Nexus",
&collection,
)
.expect("failed to create planner")
.plan()
.expect("failed to plan");

let diff = blueprint1.diff(&blueprint2);
println!("1 -> 2 (added additional Nexus zones):\n{}", diff);
assert_eq!(diff.sleds_added().count(), 0);
assert_eq!(diff.sleds_removed().count(), 0);
let sleds = diff.sleds_changed().collect::<Vec<_>>();

// Only 2 of the 3 sleds should get additional Nexus zones. We expect a
// total of 11 new Nexus zones, which should be spread evenly across the
// two sleds (one gets 6 and the other gets 5), while the
// non-provisionable sled should be unchanged.
assert_eq!(sleds.len(), 2);
let mut total_new_nexus_zones = 0;
for (sled_id, sled_changes) in sleds {
assert!(sled_id != nonprovisionable_sled_id);
assert_eq!(sled_changes.zones_removed().count(), 0);
assert_eq!(sled_changes.zones_changed().count(), 0);
let zones = sled_changes.zones_added().collect::<Vec<_>>();
match zones.len() {
n @ (5 | 6) => {
total_new_nexus_zones += n;
}
n => {
panic!("unexpected number of zones added to {sled_id}: {n}")
}
}
for zone in &zones {
let OmicronZoneType::Nexus { .. } = zone.zone_type else {
panic!("unexpectedly added a non-Crucible zone: {zone:?}");
};
}
}
assert_eq!(total_new_nexus_zones, 11);

logctx.cleanup_successful();
}
}
6 changes: 5 additions & 1 deletion nexus/src/app/deployment.rs
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,11 @@ impl super::Nexus {
let zpools = zpools_by_sled_id
.remove(&sled_id)
.unwrap_or_else(BTreeSet::new);
let sled_info = SledResources { subnet, zpools };
let sled_info = SledResources {
provision_state: sled_row.provision_state().into(),
subnet,
zpools,
};
(sled_id, sled_info)
})
.collect();
Expand Down
4 changes: 4 additions & 0 deletions nexus/types/src/deployment.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
//! nexus/deployment does not currently know about nexus/db-model and it's
//! convenient to separate these concerns.)
use crate::external_api::views::SledProvisionState;
use crate::inventory::Collection;
pub use crate::inventory::NetworkInterface;
pub use crate::inventory::NetworkInterfaceKind;
Expand Down Expand Up @@ -63,6 +64,9 @@ pub struct Policy {
/// Describes the resources available on each sled for the planner
#[derive(Debug, Clone)]
pub struct SledResources {
/// provision state of this sled
pub provision_state: SledProvisionState,

/// zpools on this sled
///
/// (used to allocate storage for control plane zones with persistent
Expand Down

0 comments on commit b941323

Please sign in to comment.