Skip to content

Commit

Permalink
allocate external IPs previously used by expunged zones (#6483)
Browse files Browse the repository at this point in the history
Closes #6260.

The test I've written successfully fails when the change to
`BuilderExternalNetworking::new` is backed out.
  • Loading branch information
iliana authored and hawkw committed Aug 31, 2024
1 parent 73d5290 commit 748788d
Show file tree
Hide file tree
Showing 4 changed files with 139 additions and 33 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -103,8 +103,8 @@ impl<'a> BuilderExternalNetworking<'a> {
ExternalIpAllocator::new(input.service_ip_pool_ranges());
let mut used_macs: HashSet<MacAddr> = HashSet::new();

for (_, z) in
parent_blueprint.all_omicron_zones(BlueprintZoneFilter::All)
for (_, z) in parent_blueprint
.all_omicron_zones(BlueprintZoneFilter::ShouldBeRunning)
{
let zone_type = &z.zone_type;
match zone_type {
Expand Down
37 changes: 6 additions & 31 deletions nexus/reconfigurator/planning/src/example.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,10 @@ use crate::system::SledBuilder;
use crate::system::SystemDescription;
use nexus_types::deployment::Blueprint;
use nexus_types::deployment::BlueprintZoneFilter;
use nexus_types::deployment::OmicronZoneNic;
use nexus_types::deployment::PlanningInput;
use nexus_types::deployment::SledFilter;
use nexus_types::inventory::Collection;
use omicron_uuid_kinds::GenericUuid;
use omicron_uuid_kinds::SledKind;
use omicron_uuid_kinds::VnicUuid;
use typed_rng::TypedUuidRng;

pub struct ExampleSystem {
Expand Down Expand Up @@ -95,37 +92,15 @@ impl ExampleSystem {
system.to_collection_builder().expect("failed to build collection");
builder.set_rng_seed((test_name, "ExampleSystem collection"));

for sled_id in blueprint.sleds() {
let Some(zones) = blueprint.blueprint_zones.get(&sled_id) else {
continue;
};
for zone in zones.zones.iter() {
let service_id = zone.id;
if let Some((external_ip, nic)) =
zone.zone_type.external_networking()
{
input_builder
.add_omicron_zone_external_ip(service_id, external_ip)
.expect("failed to add Omicron zone external IP");
input_builder
.add_omicron_zone_nic(
service_id,
OmicronZoneNic {
// TODO-cleanup use `TypedUuid` everywhere
id: VnicUuid::from_untyped_uuid(nic.id),
mac: nic.mac,
ip: nic.ip,
slot: nic.slot,
primary: nic.primary,
},
)
.expect("failed to add Omicron zone NIC");
}
}
input_builder
.update_network_resources_from_blueprint(&blueprint)
.expect("failed to add network resources from blueprint");

for (sled_id, zones) in &blueprint.blueprint_zones {
builder
.found_sled_omicron_zones(
"fake sled agent",
sled_id,
*sled_id,
zones.to_omicron_zones_config(
BlueprintZoneFilter::ShouldBeRunning,
),
Expand Down
98 changes: 98 additions & 0 deletions nexus/reconfigurator/planning/src/planner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1249,6 +1249,99 @@ mod test {
logctx.cleanup_successful();
}

/// Check that the planner will reuse external IPs that were previously
/// assigned to expunged zones
#[test]
fn test_reuse_external_ips_from_expunged_zones() {
static TEST_NAME: &str =
"planner_reuse_external_ips_from_expunged_zones";
let logctx = test_setup_log(TEST_NAME);

// Use our example system as a starting point.
let (collection, input, blueprint1) =
example(&logctx.log, TEST_NAME, DEFAULT_N_SLEDS);

// Expunge the first sled we see, which will result in a Nexus external
// IP no longer being associated with a running zone, and a new Nexus
// zone being added to one of the two remaining sleds.
let mut builder = input.into_builder();
let (sled_id, details) =
builder.sleds_mut().iter_mut().next().expect("no sleds");
let sled_id = *sled_id;
details.policy = SledPolicy::Expunged;
let input = builder.build();
let blueprint2 = Planner::new_based_on(
logctx.log.clone(),
&blueprint1,
&input,
"test_blueprint2",
&collection,
)
.expect("failed to create planner")
.with_rng_seed((TEST_NAME, "bp2"))
.plan()
.expect("failed to plan");

let diff = blueprint2.diff_since_blueprint(&blueprint1);
println!("1 -> 2 (expunged sled):\n{}", diff.display());

// The expunged sled should have an expunged Nexus zone.
let zone = blueprint2.blueprint_zones[&sled_id]
.zones
.iter()
.find(|zone| matches!(zone.zone_type, BlueprintZoneType::Nexus(_)))
.expect("no nexus zone found");
assert_eq!(zone.disposition, BlueprintZoneDisposition::Expunged);

// Set the target Nexus zone count to one that will completely exhaust
// the service IP pool. This will force reuse of the IP that was
// allocated to the expunged Nexus zone.
let mut builder = input.into_builder();
builder.update_network_resources_from_blueprint(&blueprint2).unwrap();
assert_eq!(builder.policy_mut().service_ip_pool_ranges.len(), 1);
builder.policy_mut().target_nexus_zone_count =
builder.policy_mut().service_ip_pool_ranges[0]
.len()
.try_into()
.unwrap();
let input = builder.build();
let blueprint3 = Planner::new_based_on(
logctx.log.clone(),
&blueprint2,
&input,
"test_blueprint3",
&collection,
)
.expect("failed to create planner")
.with_rng_seed((TEST_NAME, "bp3"))
.plan()
.expect("failed to plan");

let diff = blueprint3.diff_since_blueprint(&blueprint2);
println!("2 -> 3 (maximum Nexus):\n{}", diff.display());

// Planning succeeded, but let's prove that we reused the IP address!
let expunged_ip = zone.zone_type.external_networking().unwrap().0.ip();
let new_zone = blueprint3
.blueprint_zones
.values()
.flat_map(|c| &c.zones)
.find(|zone| {
zone.disposition == BlueprintZoneDisposition::InService
&& zone
.zone_type
.external_networking()
.map_or(false, |(ip, _)| expunged_ip == ip.ip())
})
.expect("couldn't find that the external IP was reused");
println!(
"zone {} reused external IP {} from expunged zone {}",
new_zone.id, expunged_ip, zone.id
);

logctx.cleanup_successful();
}

#[test]
fn test_crucible_allocation_skips_nonprovisionable_disks() {
static TEST_NAME: &str =
Expand Down Expand Up @@ -1604,6 +1697,10 @@ mod test {
};
println!("1 -> 2: decommissioned {decommissioned_sled_id}");

// Because we marked zones as expunged, we need to update the networking
// config in the planning input.
builder.update_network_resources_from_blueprint(&blueprint1).unwrap();

// Now run the planner with a high number of target Nexus zones. The
// number (9) is chosen such that:
//
Expand Down Expand Up @@ -1877,6 +1974,7 @@ mod test {

// Remove the now-decommissioned sled from the planning input.
let mut builder = input.into_builder();
builder.update_network_resources_from_blueprint(&blueprint2).unwrap();
builder.sleds_mut().remove(&expunged_sled_id);
let input = builder.build();

Expand Down
33 changes: 33 additions & 0 deletions nexus/types/src/deployment/planning_input.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
//! blueprints.
use super::AddNetworkResourceError;
use super::Blueprint;
use super::BlueprintZoneFilter;
use super::OmicronZoneExternalIp;
use super::OmicronZoneNetworkResources;
use super::OmicronZoneNic;
Expand All @@ -16,6 +18,7 @@ use crate::external_api::views::SledProvisionPolicy;
use crate::external_api::views::SledState;
use clap::ValueEnum;
use ipnetwork::IpNetwork;
use newtype_uuid::GenericUuid;
use omicron_common::address::IpRange;
use omicron_common::address::Ipv6Subnet;
use omicron_common::address::SLED_PREFIX;
Expand All @@ -25,6 +28,7 @@ use omicron_common::disk::DiskIdentity;
use omicron_uuid_kinds::OmicronZoneUuid;
use omicron_uuid_kinds::PhysicalDiskUuid;
use omicron_uuid_kinds::SledUuid;
use omicron_uuid_kinds::VnicUuid;
use omicron_uuid_kinds::ZpoolUuid;
use schemars::JsonSchema;
use serde::Deserialize;
Expand Down Expand Up @@ -859,6 +863,35 @@ impl PlanningInputBuilder {
&mut self.network_resources
}

pub fn update_network_resources_from_blueprint(
&mut self,
blueprint: &Blueprint,
) -> Result<(), PlanningInputBuildError> {
self.network_resources = OmicronZoneNetworkResources::new();
for (_, zone) in
blueprint.all_omicron_zones(BlueprintZoneFilter::ShouldBeRunning)
{
let service_id = zone.id;
if let Some((external_ip, nic)) =
zone.zone_type.external_networking()
{
self.add_omicron_zone_external_ip(service_id, external_ip)?;
self.add_omicron_zone_nic(
service_id,
OmicronZoneNic {
// TODO-cleanup use `TypedUuid` everywhere
id: VnicUuid::from_untyped_uuid(nic.id),
mac: nic.mac,
ip: nic.ip,
slot: nic.slot,
primary: nic.primary,
},
)?;
}
}
Ok(())
}

pub fn policy_mut(&mut self) -> &mut Policy {
&mut self.policy
}
Expand Down

0 comments on commit 748788d

Please sign in to comment.