Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

blueprint planner: skip non-provisionable sleds #5003

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion nexus/db-queries/src/db/datastore/deployment.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1053,6 +1053,7 @@ mod tests {
use nexus_test_utils::db::test_setup_database;
use nexus_types::deployment::Policy;
use nexus_types::deployment::SledResources;
use nexus_types::external_api::views::SledProvisionState;
use nexus_types::inventory::Collection;
use omicron_common::address::Ipv6Subnet;
use omicron_test_utils::dev;
Expand Down Expand Up @@ -1115,7 +1116,11 @@ mod tests {
})
.collect();
let ip = ip.unwrap_or_else(|| thread_rng().gen::<u128>().into());
SledResources { zpools, subnet: Ipv6Subnet::new(ip) }
SledResources {
provision_state: SledProvisionState::Provisionable,
zpools,
subnet: Ipv6Subnet::new(ip),
}
}

// Create a `Policy` that contains all the sleds found in `collection`
Expand Down
10 changes: 9 additions & 1 deletion nexus/deployment/src/blueprint_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -718,6 +718,7 @@ impl<'a> BlueprintZones<'a> {
#[cfg(test)]
pub mod test {
use super::*;
use nexus_types::external_api::views::SledProvisionState;
use omicron_common::address::IpRange;
use omicron_common::address::Ipv4Range;
use omicron_common::address::Ipv6Subnet;
Expand Down Expand Up @@ -890,7 +891,14 @@ pub mod test {
.collect();

let subnet = Ipv6Subnet::<SLED_PREFIX>::new(sled_ip);
policy.sleds.insert(sled_id, SledResources { zpools, subnet });
policy.sleds.insert(
sled_id,
SledResources {
provision_state: SledProvisionState::Provisionable,
zpools,
subnet,
},
);
sled_ip
}

Expand Down
105 changes: 105 additions & 0 deletions nexus/deployment/src/planner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ use crate::blueprint_builder::EnsureMultiple;
use crate::blueprint_builder::Error;
use nexus_types::deployment::Blueprint;
use nexus_types::deployment::Policy;
use nexus_types::external_api::views::SledProvisionState;
use nexus_types::inventory::Collection;
use slog::warn;
use slog::{info, Logger};
Expand Down Expand Up @@ -162,6 +163,20 @@ impl<'a> Planner<'a> {
}
}

// We've now placed all the services that should always exist on all
// sleds. Before moving on to make decisions about placing services that
// are _not_ present on all sleds, check the provision state of all our
// sleds so we can avoid any non-provisionable sleds under the
// assumption that there is something amiss with them.
sleds_ineligible_for_services.extend(
self.policy.sleds.iter().filter_map(|(sled_id, sled_info)| {
match sled_info.provision_state {
SledProvisionState::Provisionable => None,
SledProvisionState::NonProvisionable => Some(*sled_id),
}
}),
);

self.ensure_correct_number_of_nexus_zones(
&sleds_ineligible_for_services,
)?;
Expand Down Expand Up @@ -293,6 +308,7 @@ mod test {
use crate::blueprint_builder::test::verify_blueprint;
use crate::blueprint_builder::BlueprintBuilder;
use nexus_inventory::now_db_precision;
use nexus_types::external_api::views::SledProvisionState;
use nexus_types::inventory::OmicronZoneType;
use nexus_types::inventory::OmicronZonesFound;
use omicron_common::api::external::Generation;
Expand Down Expand Up @@ -627,4 +643,93 @@ mod test {

logctx.cleanup_successful();
}

/// Check that the planner will skip non-provisionable sleds when allocating
/// extra Nexus zones
#[test]
fn test_nexus_allocation_skips_nonprovisionable_sleds() {
let logctx = test_setup_log(
"planner_nexus_allocation_skips_nonprovisionable_sleds",
);

// Use our example inventory collection as a starting point.
let (collection, mut policy) = example();

// Build the initial blueprint.
let blueprint1 = BlueprintBuilder::build_initial_from_collection(
&collection,
&policy,
"the_test",
)
.expect("failed to create initial blueprint");

// This blueprint should only have 3 Nexus zones: one on each sled.
assert_eq!(blueprint1.omicron_zones.len(), 3);
for sled_config in blueprint1.omicron_zones.values() {
assert_eq!(
sled_config
.zones
.iter()
.filter(|z| z.zone_type.is_nexus())
.count(),
1
);
}

// Arbitrarily choose one of the sleds and mark it non-provisionable.
let nonprovisionable_sled_id = {
let (sled_id, resources) =
policy.sleds.iter_mut().next().expect("no sleds");
resources.provision_state = SledProvisionState::NonProvisionable;
*sled_id
};

// Now run the planner with a high number of target Nexus zones.
policy.target_nexus_zone_count = 14;
let blueprint2 = Planner::new_based_on(
logctx.log.clone(),
&blueprint1,
&policy,
"add more Nexus",
&collection,
)
.expect("failed to create planner")
.plan()
.expect("failed to plan");

let diff = blueprint1.diff(&blueprint2);
println!("1 -> 2 (added additional Nexus zones):\n{}", diff);
assert_eq!(diff.sleds_added().count(), 0);
assert_eq!(diff.sleds_removed().count(), 0);
let sleds = diff.sleds_changed().collect::<Vec<_>>();

// Only 2 of the 3 sleds should get additional Nexus zones. We expect a
// total of 11 new Nexus zones, which should be spread evenly across the
// two sleds (one gets 6 and the other gets 5), while the
// non-provisionable sled should be unchanged.
assert_eq!(sleds.len(), 2);
let mut total_new_nexus_zones = 0;
for (sled_id, sled_changes) in sleds {
assert!(sled_id != nonprovisionable_sled_id);
assert_eq!(sled_changes.zones_removed().count(), 0);
assert_eq!(sled_changes.zones_changed().count(), 0);
let zones = sled_changes.zones_added().collect::<Vec<_>>();
match zones.len() {
n @ (5 | 6) => {
total_new_nexus_zones += n;
}
n => {
panic!("unexpected number of zones added to {sled_id}: {n}")
}
}
for zone in &zones {
let OmicronZoneType::Nexus { .. } = zone.zone_type else {
panic!("unexpectedly added a non-Crucible zone: {zone:?}");
};
}
}
assert_eq!(total_new_nexus_zones, 11);

logctx.cleanup_successful();
}
}
6 changes: 5 additions & 1 deletion nexus/src/app/deployment.rs
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,11 @@ impl super::Nexus {
let zpools = zpools_by_sled_id
.remove(&sled_id)
.unwrap_or_else(BTreeSet::new);
let sled_info = SledResources { subnet, zpools };
let sled_info = SledResources {
provision_state: sled_row.provision_state().into(),
subnet,
zpools,
};
(sled_id, sled_info)
})
.collect();
Expand Down
4 changes: 4 additions & 0 deletions nexus/types/src/deployment.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
//! nexus/deployment does not currently know about nexus/db-model and it's
//! convenient to separate these concerns.)

use crate::external_api::views::SledProvisionState;
use crate::inventory::Collection;
pub use crate::inventory::NetworkInterface;
pub use crate::inventory::NetworkInterfaceKind;
Expand Down Expand Up @@ -63,6 +64,9 @@ pub struct Policy {
/// Describes the resources available on each sled for the planner
#[derive(Debug, Clone)]
pub struct SledResources {
/// provision state of this sled
pub provision_state: SledProvisionState,

/// zpools on this sled
///
/// (used to allocate storage for control plane zones with persistent
Expand Down