From f433b38574e50b6759d132c9d27587c4e955b897 Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Wed, 22 May 2024 18:49:50 +0100 Subject: [PATCH] Unsubscribe routes from sled when ports are removed. --- dev-tools/omdb/tests/successes.out | 2 +- illumos-utils/src/opte/port.rs | 12 +++++ illumos-utils/src/opte/port_manager.rs | 72 ++++++++++++++++++-------- 3 files changed, 62 insertions(+), 24 deletions(-) diff --git a/dev-tools/omdb/tests/successes.out b/dev-tools/omdb/tests/successes.out index 07960112d6..b1476b5f37 100644 --- a/dev-tools/omdb/tests/successes.out +++ b/dev-tools/omdb/tests/successes.out @@ -430,7 +430,7 @@ task: "metrics_producer_gc" currently executing: no last completed activation: , triggered by an explicit signal started at (s ago) and ran for ms -warning: unknown background task: "metrics_producer_gc" (don't know how to interpret details: Object {"expiration": String(""), "pruned": Array []}) +warning: unknown background task: "metrics_producer_gc" (don't know how to interpret details: Object {"expiration": String(""), "pruned": Array []}) task: "phantom_disks" configured period: every 30s diff --git a/illumos-utils/src/opte/port.rs b/illumos-utils/src/opte/port.rs index 38ba1b8c1c..832335891c 100644 --- a/illumos-utils/src/opte/port.rs +++ b/illumos-utils/src/opte/port.rs @@ -7,7 +7,9 @@ use crate::opte::Gateway; use crate::opte::Vni; use macaddr::MacAddr6; +use omicron_common::api::external; use omicron_common::api::external::IpNet; +use omicron_common::api::internal::shared::RouterId; use std::net::IpAddr; use std::sync::Arc; @@ -134,4 +136,14 @@ impl Port { pub fn slot(&self) -> u8 { self.inner.slot } + + pub fn system_router_key(&self) -> RouterId { + // Unwrap safety: both of these VNI types represent validated u24s. + let vni = external::Vni::try_from(self.vni().as_u32()).unwrap(); + RouterId { vni, subnet: None } + } + + pub fn custom_router_key(&self) -> RouterId { + RouterId { subnet: Some(*self.subnet()), ..self.system_router_key() } + } } diff --git a/illumos-utils/src/opte/port_manager.rs b/illumos-utils/src/opte/port_manager.rs index d206273c48..943e818832 100644 --- a/illumos-utils/src/opte/port_manager.rs +++ b/illumos-utils/src/opte/port_manager.rs @@ -60,6 +60,7 @@ const XDE_LINK_PREFIX: &str = "opte"; struct RouteSet { version: Option, routes: HashSet, + active_ports: usize, } #[derive(Debug)] @@ -76,9 +77,6 @@ struct PortManagerInner { // (which includes the Uuid of the parent instance or service) ports: Mutex>, - // XX: Should this be the UUID of the VPC? The rulesets are - // arguably shared v4+v6, although today we don't yet - // allow dual-stack, let alone v6. // Map of all current resolved routes. routes: Mutex>, } @@ -380,12 +378,9 @@ impl PortManager { (port, ticket) }; - // XX: need to delete safely after all subnet-holders leave - // to not get flooded with useless rules. let mut routes = self.inner.routes.lock().unwrap(); - let system_routes = routes - .entry(RouterId { vni: nic.vni, subnet: None }) - .or_insert_with(|| { + let system_routes = + routes.entry(port.system_router_key()).or_insert_with(|| { let mut routes = HashSet::new(); // Services do not talk to one another via OPTE, but do need @@ -402,16 +397,20 @@ impl PortManager { }); } - RouteSet { version: None, routes } - }) - .clone(); + RouteSet { version: None, routes, active_ports: 0 } + }); + system_routes.active_ports += 1; + // Needed to get borrowck on our side, sadly. + let system_routes = system_routes.clone(); let custom_routes = routes - .entry(RouterId { vni: nic.vni, subnet: Some(nic.subnet) }) + .entry(port.custom_router_key()) .or_insert_with(|| RouteSet { version: None, routes: HashSet::default(), + active_ports: 0, }); + custom_routes.active_ports += 1; for (class, routes) in [ (RouterClass::System, &system_routes), @@ -460,11 +459,16 @@ impl PortManager { let mut routes = self.inner.routes.lock().unwrap(); let mut deltas = HashMap::new(); for set in new_routes { + // Disregard any route information for a subnet we don't have. + let Some(old) = routes.get(&set.id) else { + continue; + }; + // We have to handle subnet router changes, as well as // spurious updates from multiple Nexus instances. // If there's a UUID match, only update if vers increased, // otherwise take the update verbatim (including loss of version). - let (to_add, to_delete) = if let Some(old) = routes.get(&set.id) { + let (to_add, to_delete): (HashSet<_>, HashSet<_>) = match (old.version, set.version) { (Some(old_vers), Some(new_vers)) if !old_vers.is_replaced_by(&new_vers) => @@ -475,30 +479,29 @@ impl PortManager { set.routes.difference(&old.routes).cloned().collect(), old.routes.difference(&set.routes).cloned().collect(), ), - } - } else { - (set.routes.clone(), HashSet::new()) - }; + }; deltas.insert(set.id, (to_add, to_delete)); + let active_ports = old.active_ports; routes.insert( set.id, - RouteSet { version: set.version, routes: set.routes }, + RouteSet { + version: set.version, + routes: set.routes, + active_ports, + }, ); } - drop(routes); let ports = self.inner.ports.lock().unwrap(); #[cfg(target_os = "illumos")] let hdl = opte_ioctl::OpteHdl::open(opte_ioctl::OpteHdl::XDE_CTL)?; for port in ports.values() { - let vni = external::Vni::try_from(port.vni().as_u32()).unwrap(); - let system_id = RouterId { vni, subnet: None }; + let system_id = port.system_router_key(); let system_delta = deltas.get(&system_id); - let custom_id = RouterId { vni, subnet: Some(*port.subnet()) }; - + let custom_id = port.custom_router_key(); let custom_delta = deltas.get(&custom_id); #[cfg_attr(not(target_os = "illumos"), allow(unused_variables))] @@ -821,6 +824,29 @@ impl PortTicket { ); return Err(Error::ReleaseMissingPort(self.id, self.kind)); }; + drop(ports); + + // Cleanup the set of subnets we want to receive routes for. + let mut routes = self.manager.routes.lock().unwrap(); + for key in [port.system_router_key(), port.custom_router_key()] { + let should_remove = routes + .get_mut(&key) + .map(|v| { + v.active_ports = v.active_ports.saturating_sub(1); + v.active_ports == 0 + }) + .unwrap_or_default(); + + if should_remove { + routes.remove(&key); + info!( + self.manager.log, + "Removed route set for subnet"; + "id" => ?&key, + ); + } + } + debug!( self.manager.log, "Removed OPTE port from manager";