Skip to content

Commit

Permalink
Fix blueprint planner's validation of PlanningInput external networking
Browse files Browse the repository at this point in the history
The "Check the planning input" block of code needs to consider _all_
zones in the parent blueprint, including expunged zones. #6483 fixed the
planner's ability to reuse external IPs from expunged zones, but
accidentally made these checks incorrect, because it's certainly valid
for the planning input to still have records for expunged zones. See
#6581 (comment)
for more context.

We also get to remove the somewhat-awkward
update_network_resources_from_blueprint() test helper that was needed in #6483
to make some tests pass (because we didn't realize the checks being
performed here were wrong).
  • Loading branch information
jgallagher committed Sep 18, 2024
1 parent 178c3f4 commit 5f629d7
Show file tree
Hide file tree
Showing 5 changed files with 149 additions and 115 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -152,74 +152,17 @@ impl<'a> BuilderExternalNetworking<'a> {
// Check the planning input: there shouldn't be any external networking
// resources in the database (the source of `input`) that we don't know
// about from the parent blueprint.
for external_ip_entry in
input.network_resources().omicron_zone_external_ips()
{
// As above, ignore localhost (used by the test suite).
if external_ip_entry.ip.ip().is_loopback() {
continue;
}
if !external_ip_alloc.contains(&external_ip_entry.ip)? {
bail!(
"planning input contains unexpected external IP \
(IP not found in parent blueprint): {external_ip_entry:?}"
);
}
}
for nic_entry in input.network_resources().omicron_zone_nics() {
if !used_macs.contains(&nic_entry.nic.mac) {
bail!(
"planning input contains unexpected NIC \
(MAC not found in parent blueprint): {nic_entry:?}"
);
}
match nic_entry.nic.ip {
IpAddr::V4(ip) if NEXUS_OPTE_IPV4_SUBNET.contains(ip) => {
if !existing_nexus_v4_ips.contains(&ip) {
bail!(
"planning input contains unexpected NIC \
(IP not found in parent blueprint): {nic_entry:?}"
);
}
}
IpAddr::V4(ip) if NTP_OPTE_IPV4_SUBNET.contains(ip) => {
if !existing_boundary_ntp_v4_ips.contains(&ip) {
bail!(
"planning input contains unexpected NIC \
(IP not found in parent blueprint): {nic_entry:?}"
);
}
}
IpAddr::V4(ip) if DNS_OPTE_IPV4_SUBNET.contains(ip) => {
// TODO check existing_dns_v4_ips, once it exists
}
IpAddr::V6(ip) if NEXUS_OPTE_IPV6_SUBNET.contains(ip) => {
if !existing_nexus_v6_ips.contains(&ip) {
bail!(
"planning input contains unexpected NIC \
(IP not found in parent blueprint): {nic_entry:?}"
);
}
}
IpAddr::V6(ip) if NTP_OPTE_IPV6_SUBNET.contains(ip) => {
if !existing_boundary_ntp_v6_ips.contains(&ip) {
bail!(
"planning input contains unexpected NIC \
(IP not found in parent blueprint): {nic_entry:?}"
);
}
}
IpAddr::V6(ip) if DNS_OPTE_IPV6_SUBNET.contains(ip) => {
// TODO check existing_dns_v6_ips, once it exists
}
_ => {
bail!(
"planning input contains unexpected NIC \
(IP not contained in known OPTE subnet): {nic_entry:?}"
)
}
}
}
//
// Logically this could be the first thing we do in this function, but
// we have some tests that check error cases in the above block that
// would also fail these checks (so reordering the checks would require
// those tests to do more work to construct valid `input`s), and we
// never expect this to fail in practice, so there's no use in "failing
// fast".
ensure_input_records_appear_in_parent_blueprint(
parent_blueprint,
input,
)?;

// TODO-performance Building these iterators as "walk through the list
// and skip anything we've used already" is fine as long as we're
Expand Down Expand Up @@ -333,6 +276,139 @@ impl<'a> BuilderExternalNetworking<'a> {
}
}

// Helper to validate that the system hasn't gone off the rails. There should
// never be any external networking resources in the planning input (which is
// derived from the contents of CRDB) that we don't know about from the parent
// blueprint.
//
// There may still be database records corresponding to _expunged_ zones, but
// that's okay: it just means we haven't yet realized a blueprint where those
// zones are expunged. And those should should still be in the blueprint (not
// pruned) until their database records are cleaned up.
//
// It's also possible that there may be networking records in the database
// assigned to zones that have been expunged, and our parent blueprint uses
// those same records for new zones. This is also fine and expected, and is a
// similar case to the previous paragraph: a zone with networking resources was
// expunged, the database doesn't realize it yet, but can still move forward and
// make planning decisions that reuse those resources for new zones.
fn ensure_input_records_appear_in_parent_blueprint(
parent_blueprint: &Blueprint,
input: &PlanningInput,
) -> anyhow::Result<()> {
let mut all_macs: HashSet<MacAddr> = HashSet::new();
let mut all_nexus_v4_ips: HashSet<Ipv4Addr> = HashSet::new();
let mut all_nexus_v6_ips: HashSet<Ipv6Addr> = HashSet::new();
let mut all_boundary_ntp_v4_ips: HashSet<Ipv4Addr> = HashSet::new();
let mut all_boundary_ntp_v6_ips: HashSet<Ipv6Addr> = HashSet::new();
let mut all_external_ips: HashSet<OmicronZoneExternalIp> = HashSet::new();

// Unlike the construction of the external IP allocator and existing IPs
// constructed above in `BuilderExternalNetworking::new()`, we do not
// check for duplicates here: we could very well see reuse of IPs
// between expunged zones or between expunged -> running zones.
for (_, z) in parent_blueprint.all_omicron_zones(BlueprintZoneFilter::All) {
let zone_type = &z.zone_type;
match zone_type {
BlueprintZoneType::BoundaryNtp(ntp) => match ntp.nic.ip {
IpAddr::V4(ip) => {
all_boundary_ntp_v4_ips.insert(ip);
}
IpAddr::V6(ip) => {
all_boundary_ntp_v6_ips.insert(ip);
}
},
BlueprintZoneType::Nexus(nexus) => match nexus.nic.ip {
IpAddr::V4(ip) => {
all_nexus_v4_ips.insert(ip);
}
IpAddr::V6(ip) => {
all_nexus_v6_ips.insert(ip);
}
},
// TODO: external-dns
_ => (),
}

if let Some((external_ip, nic)) = zone_type.external_networking() {
// As above, ignore localhost (used by the test suite).
if !external_ip.ip().is_loopback() {
all_external_ips.insert(external_ip);
}
all_macs.insert(nic.mac);
}
}
for external_ip_entry in
input.network_resources().omicron_zone_external_ips()
{
// As above, ignore localhost (used by the test suite).
if external_ip_entry.ip.ip().is_loopback() {
continue;
}
if !all_external_ips.contains(&external_ip_entry.ip) {
bail!(
"planning input contains unexpected external IP \
(IP not found in parent blueprint): {external_ip_entry:?}"
);
}
}
for nic_entry in input.network_resources().omicron_zone_nics() {
if !all_macs.contains(&nic_entry.nic.mac) {
bail!(
"planning input contains unexpected NIC \
(MAC not found in parent blueprint): {nic_entry:?}"
);
}
match nic_entry.nic.ip {
IpAddr::V4(ip) if NEXUS_OPTE_IPV4_SUBNET.contains(ip) => {
if !all_nexus_v4_ips.contains(&ip) {
bail!(
"planning input contains unexpected NIC \
(IP not found in parent blueprint): {nic_entry:?}"
);
}
}
IpAddr::V4(ip) if NTP_OPTE_IPV4_SUBNET.contains(ip) => {
if !all_boundary_ntp_v4_ips.contains(&ip) {
bail!(
"planning input contains unexpected NIC \
(IP not found in parent blueprint): {nic_entry:?}"
);
}
}
IpAddr::V4(ip) if DNS_OPTE_IPV4_SUBNET.contains(ip) => {
// TODO check all_dns_v4_ips, once it exists
}
IpAddr::V6(ip) if NEXUS_OPTE_IPV6_SUBNET.contains(ip) => {
if !all_nexus_v6_ips.contains(&ip) {
bail!(
"planning input contains unexpected NIC \
(IP not found in parent blueprint): {nic_entry:?}"
);
}
}
IpAddr::V6(ip) if NTP_OPTE_IPV6_SUBNET.contains(ip) => {
if !all_boundary_ntp_v6_ips.contains(&ip) {
bail!(
"planning input contains unexpected NIC \
(IP not found in parent blueprint): {nic_entry:?}"
);
}
}
IpAddr::V6(ip) if DNS_OPTE_IPV6_SUBNET.contains(ip) => {
// TODO check all_dns_v6_ips, once it exists
}
_ => {
bail!(
"planning input contains unexpected NIC \
(IP not contained in known OPTE subnet): {nic_entry:?}"
)
}
}
}
Ok(())
}

#[derive(Debug, Clone, Copy)]
pub(super) struct ExternalNetworkingChoice {
pub(super) external_ip: IpAddr,
Expand Down Expand Up @@ -440,6 +516,7 @@ impl<'a> ExternalIpAllocator<'a> {
// Returns `Ok(true)` if we contain this IP exactly, `Ok(false)` if we do
// not contain this IP, and an error if we contain a matching IP address but
// a mismatched exclusiveness / SNAT-ness.
#[cfg(test)]
fn contains(
&self,
external_ip: &OmicronZoneExternalIp,
Expand Down
6 changes: 1 addition & 5 deletions nexus/reconfigurator/planning/src/example.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ impl ExampleSystem {
let _ = system.sled(SledBuilder::new().id(*sled_id)).unwrap();
}

let mut input_builder = system
let input_builder = system
.to_planning_input_builder()
.expect("failed to make planning input builder");
let base_input = input_builder.clone().build();
Expand Down Expand Up @@ -92,10 +92,6 @@ impl ExampleSystem {
system.to_collection_builder().expect("failed to build collection");
builder.set_rng_seed((test_name, "ExampleSystem collection"));

input_builder
.update_network_resources_from_blueprint(&blueprint)
.expect("failed to add network resources from blueprint");

for (sled_id, zones) in &blueprint.blueprint_zones {
builder
.found_sled_omicron_zones(
Expand Down
6 changes: 0 additions & 6 deletions nexus/reconfigurator/planning/src/planner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1297,7 +1297,6 @@ mod test {
// the service IP pool. This will force reuse of the IP that was
// allocated to the expunged Nexus zone.
let mut builder = input.into_builder();
builder.update_network_resources_from_blueprint(&blueprint2).unwrap();
assert_eq!(builder.policy_mut().service_ip_pool_ranges.len(), 1);
builder.policy_mut().target_nexus_zone_count =
builder.policy_mut().service_ip_pool_ranges[0]
Expand Down Expand Up @@ -1697,10 +1696,6 @@ mod test {
};
println!("1 -> 2: decommissioned {decommissioned_sled_id}");

// Because we marked zones as expunged, we need to update the networking
// config in the planning input.
builder.update_network_resources_from_blueprint(&blueprint1).unwrap();

// Now run the planner with a high number of target Nexus zones. The
// number (9) is chosen such that:
//
Expand Down Expand Up @@ -1975,7 +1970,6 @@ mod test {

// Remove the now-decommissioned sled from the planning input.
let mut builder = input.into_builder();
builder.update_network_resources_from_blueprint(&blueprint2).unwrap();
builder.sleds_mut().remove(&expunged_sled_id);
let input = builder.build();

Expand Down
6 changes: 3 additions & 3 deletions nexus/types/src/deployment/network_resources.rs
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ impl OmicronZoneNetworkResources {
}

/// External IP variants possible for Omicron-managed zones.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
#[derive(Debug, Clone, Copy, Hash, PartialEq, Eq, Serialize, Deserialize)]
pub enum OmicronZoneExternalIp {
Floating(OmicronZoneExternalFloatingIp),
Snat(OmicronZoneExternalSnatIp),
Expand Down Expand Up @@ -194,7 +194,7 @@ pub enum OmicronZoneExternalIpKey {
/// necessary for blueprint planning, and requires that the zone have a single
/// IP.
#[derive(
Debug, Clone, Copy, PartialEq, Eq, JsonSchema, Serialize, Deserialize,
Debug, Clone, Copy, Hash, PartialEq, Eq, JsonSchema, Serialize, Deserialize,
)]
pub struct OmicronZoneExternalFloatingIp {
pub id: ExternalIpUuid,
Expand Down Expand Up @@ -222,7 +222,7 @@ impl OmicronZoneExternalFloatingAddr {
/// necessary for blueprint planning, and requires that the zone have a single
/// IP.
#[derive(
Debug, Clone, Copy, PartialEq, Eq, JsonSchema, Serialize, Deserialize,
Debug, Clone, Copy, Hash, PartialEq, Eq, JsonSchema, Serialize, Deserialize,
)]
pub struct OmicronZoneExternalSnatIp {
pub id: ExternalIpUuid,
Expand Down
33 changes: 0 additions & 33 deletions nexus/types/src/deployment/planning_input.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,6 @@
//! blueprints.
use super::AddNetworkResourceError;
use super::Blueprint;
use super::BlueprintZoneFilter;
use super::OmicronZoneExternalIp;
use super::OmicronZoneNetworkResources;
use super::OmicronZoneNic;
Expand All @@ -18,7 +16,6 @@ use crate::external_api::views::SledProvisionPolicy;
use crate::external_api::views::SledState;
use clap::ValueEnum;
use ipnetwork::IpNetwork;
use newtype_uuid::GenericUuid;
use omicron_common::address::IpRange;
use omicron_common::address::Ipv6Subnet;
use omicron_common::address::SLED_PREFIX;
Expand All @@ -28,7 +25,6 @@ use omicron_common::disk::DiskIdentity;
use omicron_uuid_kinds::OmicronZoneUuid;
use omicron_uuid_kinds::PhysicalDiskUuid;
use omicron_uuid_kinds::SledUuid;
use omicron_uuid_kinds::VnicUuid;
use omicron_uuid_kinds::ZpoolUuid;
use schemars::JsonSchema;
use serde::Deserialize;
Expand Down Expand Up @@ -863,35 +859,6 @@ impl PlanningInputBuilder {
&mut self.network_resources
}

pub fn update_network_resources_from_blueprint(
&mut self,
blueprint: &Blueprint,
) -> Result<(), PlanningInputBuildError> {
self.network_resources = OmicronZoneNetworkResources::new();
for (_, zone) in
blueprint.all_omicron_zones(BlueprintZoneFilter::ShouldBeRunning)
{
let service_id = zone.id;
if let Some((external_ip, nic)) =
zone.zone_type.external_networking()
{
self.add_omicron_zone_external_ip(service_id, external_ip)?;
self.add_omicron_zone_nic(
service_id,
OmicronZoneNic {
// TODO-cleanup use `TypedUuid` everywhere
id: VnicUuid::from_untyped_uuid(nic.id),
mac: nic.mac,
ip: nic.ip,
slot: nic.slot,
primary: nic.primary,
},
)?;
}
}
Ok(())
}

pub fn policy_mut(&mut self) -> &mut Policy {
&mut self.policy
}
Expand Down

0 comments on commit 5f629d7

Please sign in to comment.