diff --git a/.github/buildomat/jobs/deploy.sh b/.github/buildomat/jobs/deploy.sh index b14486b651..e4f59aff5f 100755 --- a/.github/buildomat/jobs/deploy.sh +++ b/.github/buildomat/jobs/deploy.sh @@ -2,7 +2,7 @@ #: #: name = "helios / deploy" #: variety = "basic" -#: target = "lab-2.0-opte-0.31" +#: target = "lab-2.0-opte-0.32" #: output_rules = [ #: "%/var/svc/log/oxide-sled-agent:default.log*", #: "%/zone/oxz_*/root/var/svc/log/oxide-*.log*", diff --git a/Cargo.lock b/Cargo.lock index 734a32e8bd..38b8f310c8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1604,8 +1604,9 @@ dependencies = [ [[package]] name = "ddm-admin-client" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/maghemite?rev=5630887d0373857f77cb264f84aa19bdec720ce3#5630887d0373857f77cb264f84aa19bdec720ce3" +source = "git+https://github.com/oxidecomputer/maghemite?rev=3c3fa8482fe09a01da62fbd35efe124ea9cac9e7#3c3fa8482fe09a01da62fbd35efe124ea9cac9e7" dependencies = [ + "oxnet", "percent-encoding", "progenitor", "reqwest", @@ -3468,7 +3469,7 @@ dependencies = [ [[package]] name = "illumos-sys-hdrs" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=417f74e94978c23f3892ac328c3387f3ecd9bb29#417f74e94978c23f3892ac328c3387f3ecd9bb29" +source = "git+https://github.com/oxidecomputer/opte?rev=915975f6d1729db95619f752148974016912412f#915975f6d1729db95619f752148974016912412f" [[package]] name = "illumos-utils" @@ -3882,7 +3883,7 @@ dependencies = [ [[package]] name = "kstat-macro" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=417f74e94978c23f3892ac328c3387f3ecd9bb29#417f74e94978c23f3892ac328c3387f3ecd9bb29" +source = "git+https://github.com/oxidecomputer/opte?rev=915975f6d1729db95619f752148974016912412f#915975f6d1729db95619f752148974016912412f" dependencies = [ "quote", "syn 2.0.68", @@ -4011,7 +4012,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0c2a198fb6b0eada2a8df47933734e6d35d350665a33a3593d7164fa52c75c19" dependencies = [ "cfg-if", - "windows-targets 0.52.5", + "windows-targets 0.48.5", ] [[package]] @@ -4294,7 +4295,7 @@ dependencies = [ [[package]] name = "mg-admin-client" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/maghemite?rev=5630887d0373857f77cb264f84aa19bdec720ce3#5630887d0373857f77cb264f84aa19bdec720ce3" +source = "git+https://github.com/oxidecomputer/maghemite?rev=3c3fa8482fe09a01da62fbd35efe124ea9cac9e7#3c3fa8482fe09a01da62fbd35efe124ea9cac9e7" dependencies = [ "anyhow", "chrono", @@ -6104,7 +6105,7 @@ dependencies = [ [[package]] name = "opte" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=417f74e94978c23f3892ac328c3387f3ecd9bb29#417f74e94978c23f3892ac328c3387f3ecd9bb29" +source = "git+https://github.com/oxidecomputer/opte?rev=915975f6d1729db95619f752148974016912412f#915975f6d1729db95619f752148974016912412f" dependencies = [ "cfg-if", "dyn-clone", @@ -6121,7 +6122,7 @@ dependencies = [ [[package]] name = "opte-api" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=417f74e94978c23f3892ac328c3387f3ecd9bb29#417f74e94978c23f3892ac328c3387f3ecd9bb29" +source = "git+https://github.com/oxidecomputer/opte?rev=915975f6d1729db95619f752148974016912412f#915975f6d1729db95619f752148974016912412f" dependencies = [ "illumos-sys-hdrs", "ipnetwork", @@ -6133,7 +6134,7 @@ dependencies = [ [[package]] name = "opte-ioctl" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=417f74e94978c23f3892ac328c3387f3ecd9bb29#417f74e94978c23f3892ac328c3387f3ecd9bb29" +source = "git+https://github.com/oxidecomputer/opte?rev=915975f6d1729db95619f752148974016912412f#915975f6d1729db95619f752148974016912412f" dependencies = [ "libc", "libnet 0.1.0 (git+https://github.com/oxidecomputer/netadm-sys)", @@ -6207,7 +6208,7 @@ dependencies = [ [[package]] name = "oxide-vpc" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=417f74e94978c23f3892ac328c3387f3ecd9bb29#417f74e94978c23f3892ac328c3387f3ecd9bb29" +source = "git+https://github.com/oxidecomputer/opte?rev=915975f6d1729db95619f752148974016912412f#915975f6d1729db95619f752148974016912412f" dependencies = [ "cfg-if", "illumos-sys-hdrs", @@ -6454,7 +6455,7 @@ dependencies = [ [[package]] name = "oxnet" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/oxnet?branch=main#42b4d3c77c7f5f2636cd6c4bbf37ac3eada047e0" +source = "git+https://github.com/oxidecomputer/oxnet#2612d2203effcfdcbf83778a77f1bfd03fe6ed24" dependencies = [ "ipnetwork", "schemars", @@ -8536,9 +8537,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.117" +version = "1.0.118" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "455182ea6142b14f93f4bc5320a2b31c1f266b66a4a5c858b013302a5d8cbfc3" +checksum = "d947f6b3163d8857ea16c4fa0dd4840d52f3041039a85decd46867eb1abef2e4" dependencies = [ "itoa", "ryu", diff --git a/Cargo.toml b/Cargo.toml index 60425a4239..640e76e59a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -333,8 +333,8 @@ macaddr = { version = "1.0.1", features = ["serde_std"] } maplit = "1.0.2" mockall = "0.12" newtype_derive = "0.1.6" -mg-admin-client = { git = "https://github.com/oxidecomputer/maghemite", rev = "5630887d0373857f77cb264f84aa19bdec720ce3" } -ddm-admin-client = { git = "https://github.com/oxidecomputer/maghemite", rev = "5630887d0373857f77cb264f84aa19bdec720ce3" } +mg-admin-client = { git = "https://github.com/oxidecomputer/maghemite", rev = "3c3fa8482fe09a01da62fbd35efe124ea9cac9e7" } +ddm-admin-client = { git = "https://github.com/oxidecomputer/maghemite", rev = "3c3fa8482fe09a01da62fbd35efe124ea9cac9e7" } multimap = "0.10.0" nexus-auth = { path = "nexus/auth" } nexus-client = { path = "clients/nexus-client" } @@ -354,7 +354,7 @@ omicron-certificates = { path = "certificates" } omicron-passwords = { path = "passwords" } omicron-workspace-hack = "0.1.0" oxlog = { path = "dev-tools/oxlog" } -oxnet = { git = "https://github.com/oxidecomputer/oxnet", branch = "main" } +oxnet = { git = "https://github.com/oxidecomputer/oxnet" } nexus-test-interface = { path = "nexus/test-interface" } nexus-test-utils-macros = { path = "nexus/test-utils-macros" } nexus-test-utils = { path = "nexus/test-utils" } @@ -372,14 +372,14 @@ omicron-sled-agent = { path = "sled-agent" } omicron-test-utils = { path = "test-utils" } omicron-zone-package = "0.11.0" oxide-client = { path = "clients/oxide-client" } -oxide-vpc = { git = "https://github.com/oxidecomputer/opte", rev = "417f74e94978c23f3892ac328c3387f3ecd9bb29", features = [ "api", "std" ] } +oxide-vpc = { git = "https://github.com/oxidecomputer/opte", rev = "915975f6d1729db95619f752148974016912412f", features = [ "api", "std" ] } once_cell = "1.19.0" openapi-lint = { git = "https://github.com/oxidecomputer/openapi-lint", branch = "main" } openapiv3 = "2.0.0" # must match samael's crate! openssl = "0.10" openssl-sys = "0.9" -opte-ioctl = { git = "https://github.com/oxidecomputer/opte", rev = "417f74e94978c23f3892ac328c3387f3ecd9bb29" } +opte-ioctl = { git = "https://github.com/oxidecomputer/opte", rev = "915975f6d1729db95619f752148974016912412f" } oso = "0.27" owo-colors = "4.0.0" oximeter = { path = "oximeter/oximeter" } diff --git a/clients/ddm-admin-client/src/lib.rs b/clients/ddm-admin-client/src/lib.rs index b926ee2971..8cd9781e1d 100644 --- a/clients/ddm-admin-client/src/lib.rs +++ b/clients/ddm-admin-client/src/lib.rs @@ -12,7 +12,7 @@ pub use ddm_admin_client::types; pub use ddm_admin_client::Error; -use ddm_admin_client::types::{Ipv6Prefix, TunnelOrigin}; +use ddm_admin_client::types::TunnelOrigin; use ddm_admin_client::Client as InnerClient; use either::Either; use omicron_common::address::Ipv6Subnet; @@ -81,8 +81,7 @@ impl Client { pub fn advertise_prefix(&self, address: Ipv6Subnet) { let me = self.clone(); tokio::spawn(async move { - let prefix = - Ipv6Prefix { addr: address.net().prefix(), len: SLED_PREFIX }; + let prefix = address.net(); retry_notify(retry_policy_internal_service_aggressive(), || async { info!( me.log, "Sending prefix to ddmd for advertisement"; @@ -130,8 +129,8 @@ impl Client { let prefixes = self.inner.get_prefixes().await?.into_inner(); Ok(prefixes.into_iter().flat_map(|(_, prefixes)| { prefixes.into_iter().flat_map(|prefix| { - let mut segments = prefix.destination.addr.segments(); - if prefix.destination.len == BOOTSTRAP_MASK + let mut segments = prefix.destination.addr().segments(); + if prefix.destination.width() == BOOTSTRAP_MASK && segments[0] == BOOTSTRAP_PREFIX { Either::Left(interfaces.iter().map(move |interface| { diff --git a/clients/sled-agent-client/src/lib.rs b/clients/sled-agent-client/src/lib.rs index 81b225b035..42eefaf8b5 100644 --- a/clients/sled-agent-client/src/lib.rs +++ b/clients/sled-agent-client/src/lib.rs @@ -51,6 +51,11 @@ progenitor::generate_api!( NetworkInterface = omicron_common::api::internal::shared::NetworkInterface, PortFec = omicron_common::api::internal::shared::PortFec, PortSpeed = omicron_common::api::internal::shared::PortSpeed, + RouterId = omicron_common::api::internal::shared::RouterId, + ResolvedVpcRoute = omicron_common::api::internal::shared::ResolvedVpcRoute, + ResolvedVpcRouteSet = omicron_common::api::internal::shared::ResolvedVpcRouteSet, + RouterTarget = omicron_common::api::internal::shared::RouterTarget, + RouterVersion = omicron_common::api::internal::shared::RouterVersion, SourceNatConfig = omicron_common::api::internal::shared::SourceNatConfig, SwitchLocation = omicron_common::api::external::SwitchLocation, TypedUuidForInstanceKind = omicron_uuid_kinds::InstanceUuid, diff --git a/common/src/api/external/mod.rs b/common/src/api/external/mod.rs index 0af437bd99..58d45fecef 100644 --- a/common/src/api/external/mod.rs +++ b/common/src/api/external/mod.rs @@ -1331,6 +1331,9 @@ pub enum RouteTarget { #[display("inetgw:{0}")] /// Forward traffic to an internet gateway InternetGateway(Name), + #[display("drop")] + /// Drop matching traffic + Drop, } /// A `RouteDestination` is used to match traffic with a routing rule, on the diff --git a/common/src/api/internal/shared.rs b/common/src/api/internal/shared.rs index 3d710fc952..090b3c3058 100644 --- a/common/src/api/internal/shared.rs +++ b/common/src/api/internal/shared.rs @@ -6,13 +6,13 @@ use crate::{ address::NUM_SOURCE_NAT_PORTS, - api::external::{self, BfdMode, ImportExportPolicy, Name}, + api::external::{self, BfdMode, ImportExportPolicy, Name, Vni}, }; use oxnet::{IpNet, Ipv4Net, Ipv6Net}; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; use std::{ - collections::HashMap, + collections::{HashMap, HashSet}, fmt, net::{IpAddr, Ipv4Addr, Ipv6Addr}, str::FromStr, @@ -50,11 +50,11 @@ pub enum NetworkInterfaceKind { pub struct NetworkInterface { pub id: Uuid, pub kind: NetworkInterfaceKind, - pub name: external::Name, + pub name: Name, pub ip: IpAddr, pub mac: external::MacAddr, pub subnet: IpNet, - pub vni: external::Vni, + pub vni: Vni, pub primary: bool, pub slot: u8, } @@ -624,6 +624,82 @@ impl TryFrom<&[ipnetwork::IpNetwork]> for IpAllowList { } } +/// A VPC route resolved into a concrete target. +#[derive( + Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq, Eq, Hash, +)] +pub struct ResolvedVpcRoute { + pub dest: IpNet, + pub target: RouterTarget, +} + +/// The target for a given router entry. +#[derive( + Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq, Eq, Hash, +)] +#[serde(tag = "type", rename_all = "snake_case", content = "value")] +pub enum RouterTarget { + Drop, + InternetGateway, + Ip(IpAddr), + VpcSubnet(IpNet), +} + +/// Information on the current parent router (and version) of a route set +/// according to the control plane. +#[derive( + Copy, Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq, Eq, Hash, +)] +pub struct RouterVersion { + pub router_id: Uuid, + pub version: u64, +} + +impl RouterVersion { + /// Return whether a new route set should be applied over the current + /// values. + /// + /// This will occur when seeing a new version and a matching parent, + /// or a new parent router on the control plane. + pub fn is_replaced_by(&self, other: &Self) -> bool { + (self.router_id != other.router_id) || self.version < other.version + } +} + +/// Identifier for a VPC and/or subnet. +#[derive( + Copy, Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq, Eq, Hash, +)] +pub struct RouterId { + pub vni: Vni, + pub kind: RouterKind, +} + +/// The scope of a set of VPC router rules. +#[derive( + Copy, Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq, Eq, Hash, +)] +#[serde(tag = "type", rename_all = "snake_case", content = "subnet")] +pub enum RouterKind { + System, + Custom(IpNet), +} + +/// Version information for routes on a given VPC subnet. +#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq, Eq)] +pub struct ResolvedVpcRouteState { + pub id: RouterId, + pub version: Option, +} + +/// An updated set of routes for a given VPC and/or subnet. +#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq, Eq)] +pub struct ResolvedVpcRouteSet { + pub id: RouterId, + pub version: Option, + pub routes: HashSet, +} + #[cfg(test)] mod tests { use crate::api::internal::shared::AllowedSourceIps; diff --git a/dev-tools/omdb/tests/env.out b/dev-tools/omdb/tests/env.out index 23ab034f1c..252313e6c8 100644 --- a/dev-tools/omdb/tests/env.out +++ b/dev-tools/omdb/tests/env.out @@ -131,6 +131,10 @@ task: "v2p_manager" manages opte v2p mappings for vpc networking +task: "vpc_route_manager" + propagates updated VPC routes to all OPTE ports + + --------------------------------------------- stderr: note: using Nexus URL http://127.0.0.1:REDACTED_PORT @@ -259,6 +263,10 @@ task: "v2p_manager" manages opte v2p mappings for vpc networking +task: "vpc_route_manager" + propagates updated VPC routes to all OPTE ports + + --------------------------------------------- stderr: note: Nexus URL not specified. Will pick one from DNS. @@ -374,6 +382,10 @@ task: "v2p_manager" manages opte v2p mappings for vpc networking +task: "vpc_route_manager" + propagates updated VPC routes to all OPTE ports + + --------------------------------------------- stderr: note: Nexus URL not specified. Will pick one from DNS. diff --git a/dev-tools/omdb/tests/successes.out b/dev-tools/omdb/tests/successes.out index ebebb657e5..032a574c8e 100644 --- a/dev-tools/omdb/tests/successes.out +++ b/dev-tools/omdb/tests/successes.out @@ -332,6 +332,10 @@ task: "v2p_manager" manages opte v2p mappings for vpc networking +task: "vpc_route_manager" + propagates updated VPC routes to all OPTE ports + + --------------------------------------------- stderr: note: using Nexus URL http://127.0.0.1:REDACTED_PORT/ @@ -545,6 +549,13 @@ task: "v2p_manager" started at (s ago) and ran for ms warning: unknown background task: "v2p_manager" (don't know how to interpret details: Object {}) +task: "vpc_route_manager" + configured period: every 30s + currently executing: no + last completed activation: , triggered by an explicit signal + started at (s ago) and ran for ms +warning: unknown background task: "vpc_route_manager" (don't know how to interpret details: Object {}) + --------------------------------------------- stderr: note: using Nexus URL http://127.0.0.1:REDACTED_PORT/ diff --git a/illumos-utils/src/opte/firewall_rules.rs b/illumos-utils/src/opte/firewall_rules.rs index 1df0e7421a..4dcb390e9e 100644 --- a/illumos-utils/src/opte/firewall_rules.rs +++ b/illumos-utils/src/opte/firewall_rules.rs @@ -4,6 +4,7 @@ //! Convert Omicron VPC firewall rules to OPTE firewall rules. +use super::net_to_cidr; use crate::opte::params::VpcFirewallRule; use crate::opte::Vni; use macaddr::MacAddr6; @@ -18,11 +19,6 @@ use oxide_vpc::api::Filters; use oxide_vpc::api::FirewallAction; use oxide_vpc::api::FirewallRule; use oxide_vpc::api::IpAddr; -use oxide_vpc::api::IpCidr; -use oxide_vpc::api::Ipv4Cidr; -use oxide_vpc::api::Ipv4PrefixLen; -use oxide_vpc::api::Ipv6Cidr; -use oxide_vpc::api::Ipv6PrefixLen; use oxide_vpc::api::Ports; use oxide_vpc::api::ProtoFilter; use oxide_vpc::api::Protocol; @@ -68,21 +64,10 @@ impl FromVpcFirewallRule for VpcFirewallRule { HostIdentifier::Ip(IpNet::V4(net)) if net.is_host_net() => { Address::Ip(IpAddr::Ip4(net.addr().into())) } - HostIdentifier::Ip(IpNet::V4(net)) => { - Address::Subnet(IpCidr::Ip4(Ipv4Cidr::new( - net.addr().into(), - Ipv4PrefixLen::new(net.width()).unwrap(), - ))) - } HostIdentifier::Ip(IpNet::V6(net)) if net.is_host_net() => { Address::Ip(IpAddr::Ip6(net.addr().into())) } - HostIdentifier::Ip(IpNet::V6(net)) => { - Address::Subnet(IpCidr::Ip6(Ipv6Cidr::new( - net.addr().into(), - Ipv6PrefixLen::new(net.width()).unwrap(), - ))) - } + HostIdentifier::Ip(ip) => Address::Subnet(net_to_cidr(*ip)), HostIdentifier::Vpc(vni) => { Address::Vni(Vni::new(u32::from(*vni)).unwrap()) } diff --git a/illumos-utils/src/opte/mod.rs b/illumos-utils/src/opte/mod.rs index d06b6b26e5..d7fd96b0c0 100644 --- a/illumos-utils/src/opte/mod.rs +++ b/illumos-utils/src/opte/mod.rs @@ -18,15 +18,23 @@ mod port; mod port_manager; pub use firewall_rules::opte_firewall_rules; -pub use port::Port; -pub use port_manager::PortManager; -pub use port_manager::PortTicket; - use ipnetwork::IpNetwork; use macaddr::MacAddr6; +use omicron_common::api::internal::shared; pub use oxide_vpc::api::BoundaryServices; pub use oxide_vpc::api::DhcpCfg; +use oxide_vpc::api::IpCidr; +use oxide_vpc::api::Ipv4Cidr; +use oxide_vpc::api::Ipv4PrefixLen; +use oxide_vpc::api::Ipv6Cidr; +use oxide_vpc::api::Ipv6PrefixLen; +use oxide_vpc::api::RouterTarget; pub use oxide_vpc::api::Vni; +use oxnet::IpNet; +pub use port::Port; +pub use port_manager::PortCreateParams; +pub use port_manager::PortManager; +pub use port_manager::PortTicket; use std::net::IpAddr; /// Information about the gateway for an OPTE port @@ -63,3 +71,28 @@ impl Gateway { &self.ip } } + +/// Convert a nexus `IpNet` to an OPTE `IpCidr`. +fn net_to_cidr(net: IpNet) -> IpCidr { + match net { + IpNet::V4(net) => IpCidr::Ip4(Ipv4Cidr::new( + net.addr().into(), + Ipv4PrefixLen::new(net.width()).unwrap(), + )), + IpNet::V6(net) => IpCidr::Ip6(Ipv6Cidr::new( + net.addr().into(), + Ipv6PrefixLen::new(net.width()).unwrap(), + )), + } +} + +/// Convert a nexus `RouterTarget` to an OPTE `RouterTarget`. +fn router_target_opte(target: &shared::RouterTarget) -> RouterTarget { + use shared::RouterTarget::*; + match target { + Drop => RouterTarget::Drop, + InternetGateway => RouterTarget::InternetGateway, + Ip(ip) => RouterTarget::Ip((*ip).into()), + VpcSubnet(net) => RouterTarget::VpcSubnet(net_to_cidr(*net)), + } +} diff --git a/illumos-utils/src/opte/port.rs b/illumos-utils/src/opte/port.rs index 6fbb89c450..a692a02304 100644 --- a/illumos-utils/src/opte/port.rs +++ b/illumos-utils/src/opte/port.rs @@ -7,23 +7,30 @@ use crate::opte::Gateway; use crate::opte::Vni; use macaddr::MacAddr6; +use omicron_common::api::external; +use omicron_common::api::internal::shared::RouterId; +use omicron_common::api::internal::shared::RouterKind; +use oxnet::IpNet; use std::net::IpAddr; use std::sync::Arc; #[derive(Debug)] -struct PortInner { - // Name of the port as identified by OPTE - name: String, - // IP address within the VPC Subnet - ip: IpAddr, - // VPC-private MAC address - mac: MacAddr6, - // Emulated PCI slot for the guest NIC, passed to Propolis - slot: u8, - // Geneve VNI for the VPC - vni: Vni, - // Information about the virtual gateway, aka OPTE - gateway: Gateway, +pub struct PortData { + /// Name of the port as identified by OPTE + pub(crate) name: String, + /// IP address within the VPC Subnet + pub(crate) ip: IpAddr, + /// VPC-private MAC address + pub(crate) mac: MacAddr6, + /// Emulated PCI slot for the guest NIC, passed to Propolis + pub(crate) slot: u8, + /// Geneve VNI for the VPC + pub(crate) vni: Vni, + /// Subnet the port belong to within the VPC. + pub(crate) subnet: IpNet, + /// Information about the virtual gateway, aka OPTE + pub(crate) gateway: Gateway, + /// Name of the VNIC the OPTE port is bound to. // TODO-remove(#2932): Remove this once we can put Viona directly on top of an // OPTE port device. // @@ -33,7 +40,18 @@ struct PortInner { // https://github.com/oxidecomputer/opte/issues/178 for more details. This // can be changed back to a real VNIC when that is resolved, and the Drop // impl below can simplify to just call `drop(self.vnic)`. - vnic: String, + pub(crate) vnic: String, +} + +#[derive(Debug)] +struct PortInner(PortData); + +impl core::ops::Deref for PortInner { + type Target = PortData; + + fn deref(&self) -> &Self::Target { + &self.0 + } } #[cfg(target_os = "illumos")] @@ -83,26 +101,8 @@ pub struct Port { } impl Port { - pub fn new( - name: String, - ip: IpAddr, - mac: MacAddr6, - slot: u8, - vni: Vni, - gateway: Gateway, - vnic: String, - ) -> Self { - Self { - inner: Arc::new(PortInner { - name, - ip, - mac, - slot, - vni, - gateway, - vnic, - }), - } + pub fn new(data: PortData) -> Self { + Self { inner: Arc::new(PortInner(data)) } } pub fn ip(&self) -> &IpAddr { @@ -126,6 +126,10 @@ impl Port { &self.inner.vni } + pub fn subnet(&self) -> &IpNet { + &self.inner.subnet + } + pub fn vnic_name(&self) -> &str { &self.inner.vnic } @@ -133,4 +137,17 @@ impl Port { pub fn slot(&self) -> u8 { self.inner.slot } + + pub fn system_router_key(&self) -> RouterId { + // Unwrap safety: both of these VNI types represent validated u24s. + let vni = external::Vni::try_from(self.vni().as_u32()).unwrap(); + RouterId { vni, kind: RouterKind::System } + } + + pub fn custom_router_key(&self) -> RouterId { + RouterId { + kind: RouterKind::Custom(*self.subnet()), + ..self.system_router_key() + } + } } diff --git a/illumos-utils/src/opte/port_manager.rs b/illumos-utils/src/opte/port_manager.rs index 726aa01a2a..caeda81217 100644 --- a/illumos-utils/src/opte/port_manager.rs +++ b/illumos-utils/src/opte/port_manager.rs @@ -7,6 +7,7 @@ use crate::opte::opte_firewall_rules; use crate::opte::params::VirtualNetworkInterfaceHost; use crate::opte::params::VpcFirewallRule; +use crate::opte::port::PortData; use crate::opte::Error; use crate::opte::Gateway; use crate::opte::Port; @@ -15,8 +16,15 @@ use ipnetwork::IpNetwork; use omicron_common::api::external; use omicron_common::api::internal::shared::NetworkInterface; use omicron_common::api::internal::shared::NetworkInterfaceKind; +use omicron_common::api::internal::shared::ResolvedVpcRoute; +use omicron_common::api::internal::shared::ResolvedVpcRouteSet; +use omicron_common::api::internal::shared::ResolvedVpcRouteState; +use omicron_common::api::internal::shared::RouterId; +use omicron_common::api::internal::shared::RouterTarget as ApiRouterTarget; +use omicron_common::api::internal::shared::RouterVersion; use omicron_common::api::internal::shared::SourceNatConfig; use oxide_vpc::api::AddRouterEntryReq; +use oxide_vpc::api::DelRouterEntryReq; use oxide_vpc::api::DhcpCfg; use oxide_vpc::api::ExternalIpCfg; use oxide_vpc::api::IpCfg; @@ -24,7 +32,7 @@ use oxide_vpc::api::IpCidr; use oxide_vpc::api::Ipv4Cfg; use oxide_vpc::api::Ipv6Cfg; use oxide_vpc::api::MacAddr; -use oxide_vpc::api::RouterTarget; +use oxide_vpc::api::RouterClass; use oxide_vpc::api::SNat4Cfg; use oxide_vpc::api::SNat6Cfg; use oxide_vpc::api::SetExternalIpsReq; @@ -34,6 +42,8 @@ use slog::error; use slog::info; use slog::Logger; use std::collections::BTreeMap; +use std::collections::HashMap; +use std::collections::HashSet; use std::net::IpAddr; use std::net::Ipv6Addr; use std::sync::atomic::AtomicU64; @@ -45,19 +55,30 @@ use uuid::Uuid; // Prefix used to identify xde data links. const XDE_LINK_PREFIX: &str = "opte"; +/// Stored routes (and usage count) for a given VPC/subnet. +#[derive(Debug, Clone)] +struct RouteSet { + version: Option, + routes: HashSet, + active_ports: usize, +} + #[derive(Debug)] struct PortManagerInner { log: Logger, - // Sequential identifier for each port on the system. + /// Sequential identifier for each port on the system. next_port_id: AtomicU64, - // IP address of the hosting sled on the underlay. + /// IP address of the hosting sled on the underlay. underlay_ip: Ipv6Addr, - // Map of all ports, keyed on the interface Uuid and its kind - // (which includes the Uuid of the parent instance or service) + /// Map of all ports, keyed on the interface Uuid and its kind + /// (which includes the Uuid of the parent instance or service) ports: Mutex>, + + /// Map of all current resolved routes. + routes: Mutex>, } impl PortManagerInner { @@ -70,6 +91,18 @@ impl PortManagerInner { } } +#[derive(Debug)] +/// Parameters needed to create and configure an OPTE port. +pub struct PortCreateParams<'a> { + pub nic: &'a NetworkInterface, + pub source_nat: Option, + pub ephemeral_ip: Option, + pub floating_ips: &'a [IpAddr], + pub firewall_rules: &'a [VpcFirewallRule], + pub dhcp_config: DhcpCfg, + pub is_service: bool, +} + /// The port manager controls all OPTE ports on a single host. #[derive(Debug, Clone)] pub struct PortManager { @@ -84,6 +117,7 @@ impl PortManager { next_port_id: AtomicU64::new(0), underlay_ip, ports: Mutex::new(BTreeMap::new()), + routes: Mutex::new(Default::default()), }); Self { inner } @@ -97,13 +131,18 @@ impl PortManager { #[cfg_attr(not(target_os = "illumos"), allow(unused_variables))] pub fn create_port( &self, - nic: &NetworkInterface, - source_nat: Option, - ephemeral_ip: Option, - floating_ips: &[IpAddr], - firewall_rules: &[VpcFirewallRule], - dhcp_config: DhcpCfg, + params: PortCreateParams, ) -> Result<(Port, PortTicket), Error> { + let PortCreateParams { + nic, + source_nat, + ephemeral_ip, + floating_ips, + firewall_rules, + dhcp_config, + is_service, + } = params; + let mac = *nic.mac; let vni = Vni::new(nic.vni).unwrap(); let subnet = IpNetwork::from(nic.subnet); @@ -319,15 +358,16 @@ impl PortManager { let (port, ticket) = { let mut ports = self.inner.ports.lock().unwrap(); let ticket = PortTicket::new(nic.id, nic.kind, self.inner.clone()); - let port = Port::new( - port_name.clone(), - nic.ip, + let port = Port::new(PortData { + name: port_name.clone(), + ip: nic.ip, mac, - nic.slot, + slot: nic.slot, vni, + subnet: nic.subnet, gateway, vnic, - ); + }); let old = ports.insert((nic.id, nic.kind), port.clone()); assert!( old.is_none(), @@ -338,57 +378,67 @@ impl PortManager { (port, ticket) }; - // Add a router entry for this interface's subnet, directing traffic to the - // VPC subnet. - let route = AddRouterEntryReq { - port_name: port_name.clone(), - dest: vpc_subnet, - target: RouterTarget::VpcSubnet(vpc_subnet), - }; - #[cfg(target_os = "illumos")] - hdl.add_router_entry(&route)?; - debug!( - self.inner.log, - "Added VPC Subnet router entry"; - "port_name" => &port_name, - "route" => ?route, - ); + // Check locally to see whether we have any routes from the + // control plane for this port already installed. If not, + // create a record to show that we're interested in receiving + // those routes. + let mut routes = self.inner.routes.lock().unwrap(); + let system_routes = + routes.entry(port.system_router_key()).or_insert_with(|| { + let mut routes = HashSet::new(); + + // Services do not talk to one another via OPTE, but do need + // to reach out over the Internet *before* nexus is up to give + // us real rules. The easiest bet is to instantiate these here. + if is_service { + routes.insert(ResolvedVpcRoute { + dest: "0.0.0.0/0".parse().unwrap(), + target: ApiRouterTarget::InternetGateway, + }); + routes.insert(ResolvedVpcRoute { + dest: "::/0".parse().unwrap(), + target: ApiRouterTarget::InternetGateway, + }); + } - // TODO-remove - // - // See https://github.com/oxidecomputer/omicron/issues/1336 - // - // This is another part of the workaround, allowing reply traffic from - // the guest back out. Normally, OPTE would drop such traffic at the - // router layer, as it has no route for that external IP address. This - // allows such traffic through. - // - // Note that this exact rule will eventually be included, since it's one - // of the default routing rules in the VPC System Router. However, that - // will likely be communicated in a different way, or could be modified, - // and this specific call should be removed in favor of sending the - // routing rules the control plane provides. - // - // This rule sends all traffic that has no better match to the gateway. - let dest = match vpc_subnet { - IpCidr::Ip4(_) => "0.0.0.0/0", - IpCidr::Ip6(_) => "::/0", + RouteSet { version: None, routes, active_ports: 0 } + }); + system_routes.active_ports += 1; + // Clone is needed to get borrowck on our side, sadly. + let system_routes = system_routes.clone(); + + let custom_routes = routes + .entry(port.custom_router_key()) + .or_insert_with(|| RouteSet { + version: None, + routes: HashSet::default(), + active_ports: 0, + }); + custom_routes.active_ports += 1; + + for (class, routes) in [ + (RouterClass::System, &system_routes), + (RouterClass::Custom, custom_routes), + ] { + for route in &routes.routes { + let route = AddRouterEntryReq { + class, + port_name: port_name.clone(), + dest: super::net_to_cidr(route.dest), + target: super::router_target_opte(&route.target), + }; + + #[cfg(target_os = "illumos")] + hdl.add_router_entry(&route)?; + + debug!( + self.inner.log, + "Added router entry"; + "port_name" => &port_name, + "route" => ?route, + ); + } } - .parse() - .unwrap(); - let route = AddRouterEntryReq { - port_name: port_name.clone(), - dest, - target: RouterTarget::InternetGateway, - }; - #[cfg(target_os = "illumos")] - hdl.add_router_entry(&route)?; - debug!( - self.inner.log, - "Added default internet gateway route entry"; - "port_name" => &port_name, - "route" => ?route, - ); info!( self.inner.log, @@ -398,6 +448,122 @@ impl PortManager { Ok((port, ticket)) } + pub fn vpc_routes_list(&self) -> Vec { + let routes = self.inner.routes.lock().unwrap(); + routes + .iter() + .map(|(k, v)| ResolvedVpcRouteState { id: *k, version: v.version }) + .collect() + } + + pub fn vpc_routes_ensure( + &self, + new_routes: Vec, + ) -> Result<(), Error> { + let mut routes = self.inner.routes.lock().unwrap(); + let mut deltas = HashMap::new(); + for new in new_routes { + // Disregard any route information for a subnet we don't have. + let Some(old) = routes.get(&new.id) else { + continue; + }; + + // We have to handle subnet router changes, as well as + // spurious updates from multiple Nexus instances. + // If there's a UUID match, only update if vers increased, + // otherwise take the update verbatim (including loss of version). + let (to_add, to_delete): (HashSet<_>, HashSet<_>) = + match (old.version, new.version) { + (Some(old_vers), Some(new_vers)) + if !old_vers.is_replaced_by(&new_vers) => + { + continue; + } + _ => ( + new.routes.difference(&old.routes).cloned().collect(), + old.routes.difference(&new.routes).cloned().collect(), + ), + }; + deltas.insert(new.id, (to_add, to_delete)); + + let active_ports = old.active_ports; + routes.insert( + new.id, + RouteSet { + version: new.version, + routes: new.routes, + active_ports, + }, + ); + } + + // Note: We're deliberately holding both locks here + // to prevent several nexuses computng and applying deltas + // out of order. + let ports = self.inner.ports.lock().unwrap(); + #[cfg(target_os = "illumos")] + let hdl = opte_ioctl::OpteHdl::open(opte_ioctl::OpteHdl::XDE_CTL)?; + + // Propagate deltas out to all ports. + for port in ports.values() { + let system_id = port.system_router_key(); + let system_delta = deltas.get(&system_id); + + let custom_id = port.custom_router_key(); + let custom_delta = deltas.get(&custom_id); + + #[cfg_attr(not(target_os = "illumos"), allow(unused_variables))] + for (class, delta) in [ + (RouterClass::System, system_delta), + (RouterClass::Custom, custom_delta), + ] { + let Some((to_add, to_delete)) = delta else { + continue; + }; + + for route in to_delete { + let route = DelRouterEntryReq { + class, + port_name: port.name().into(), + dest: super::net_to_cidr(route.dest), + target: super::router_target_opte(&route.target), + }; + + #[cfg(target_os = "illumos")] + hdl.del_router_entry(&route)?; + + debug!( + self.inner.log, + "Removed router entry"; + "port_name" => &port.name(), + "route" => ?route, + ); + } + + for route in to_add { + let route = AddRouterEntryReq { + class, + port_name: port.name().into(), + dest: super::net_to_cidr(route.dest), + target: super::router_target_opte(&route.target), + }; + + #[cfg(target_os = "illumos")] + hdl.add_router_entry(&route)?; + + debug!( + self.inner.log, + "Added router entry"; + "port_name" => &port.name(), + "route" => ?route, + ); + } + } + } + + Ok(()) + } + /// Ensure external IPs for an OPTE port are up to date. #[cfg_attr(not(target_os = "illumos"), allow(unused_variables))] pub fn external_ips_ensure( @@ -739,6 +905,29 @@ impl PortTicket { ); return Err(Error::ReleaseMissingPort(self.id, self.kind)); }; + drop(ports); + + // Cleanup the set of subnets we want to receive routes for. + let mut routes = self.manager.routes.lock().unwrap(); + for key in [port.system_router_key(), port.custom_router_key()] { + let should_remove = routes + .get_mut(&key) + .map(|v| { + v.active_ports = v.active_ports.saturating_sub(1); + v.active_ports == 0 + }) + .unwrap_or_default(); + + if should_remove { + routes.remove(&key); + info!( + self.manager.log, + "Removed route set for subnet"; + "id" => ?&key, + ); + } + } + debug!( self.manager.log, "Removed OPTE port from manager"; diff --git a/nexus/db-fixed-data/src/vpc.rs b/nexus/db-fixed-data/src/vpc.rs index 25628a83b5..d5940a976e 100644 --- a/nexus/db-fixed-data/src/vpc.rs +++ b/nexus/db-fixed-data/src/vpc.rs @@ -23,12 +23,21 @@ pub static SERVICES_VPC_ROUTER_ID: Lazy = Lazy::new(|| { .expect("invalid uuid for builtin services vpc router id") }); -/// UUID of default route for built-in Services VPC. -pub static SERVICES_VPC_DEFAULT_ROUTE_ID: Lazy = Lazy::new(|| { - "001de000-074c-4000-8000-000000000002" - .parse() - .expect("invalid uuid for builtin services vpc default route id") -}); +/// UUID of default IPv4 route for built-in Services VPC. +pub static SERVICES_VPC_DEFAULT_V4_ROUTE_ID: Lazy = + Lazy::new(|| { + "001de000-074c-4000-8000-000000000002" + .parse() + .expect("invalid uuid for builtin services vpc default route id") + }); + +/// UUID of default IPv6 route for built-in Services VPC. +pub static SERVICES_VPC_DEFAULT_V6_ROUTE_ID: Lazy = + Lazy::new(|| { + "001de000-074c-4000-8000-000000000003" + .parse() + .expect("invalid uuid for builtin services vpc default route id") + }); /// Built-in VPC for internal services on the rack. pub static SERVICES_VPC: Lazy = Lazy::new(|| { diff --git a/nexus/db-fixed-data/src/vpc_subnet.rs b/nexus/db-fixed-data/src/vpc_subnet.rs index 622799b000..c91581ac13 100644 --- a/nexus/db-fixed-data/src/vpc_subnet.rs +++ b/nexus/db-fixed-data/src/vpc_subnet.rs @@ -31,6 +31,27 @@ pub static NTP_VPC_SUBNET_ID: Lazy = Lazy::new(|| { .expect("invalid uuid for builtin boundary ntp vpc subnet id") }); +/// UUID of built-in subnet route VPC Subnet route for External DNS. +pub static DNS_VPC_SUBNET_ROUTE_ID: Lazy = Lazy::new(|| { + "001de000-c470-4000-8000-000000000004" + .parse() + .expect("invalid uuid for builtin services vpc default route id") +}); + +/// UUID of built-in subnet route VPC Subnet route for Nexus. +pub static NEXUS_VPC_SUBNET_ROUTE_ID: Lazy = Lazy::new(|| { + "001de000-c470-4000-8000-000000000005" + .parse() + .expect("invalid uuid for builtin services vpc default route id") +}); + +/// UUID of built-in subnet route VPC Subnet route for Boundary NTP. +pub static NTP_VPC_SUBNET_ROUTE_ID: Lazy = Lazy::new(|| { + "001de000-c470-4000-8000-000000000006" + .parse() + .expect("invalid uuid for builtin services vpc default route id") +}); + /// Built-in VPC Subnet for External DNS. pub static DNS_VPC_SUBNET: Lazy = Lazy::new(|| { VpcSubnet::new( diff --git a/nexus/db-model/src/schema.rs b/nexus/db-model/src/schema.rs index d08a51edd4..72d01f094b 100644 --- a/nexus/db-model/src/schema.rs +++ b/nexus/db-model/src/schema.rs @@ -1106,6 +1106,7 @@ table! { rcgen -> Int8, ipv4_block -> Inet, ipv6_block -> Inet, + custom_router_id -> Nullable, } } @@ -1120,6 +1121,7 @@ table! { kind -> crate::VpcRouterKindEnum, vpc_id -> Uuid, rcgen -> Int8, + resolved_version -> Int8, } } diff --git a/nexus/db-model/src/schema_versions.rs b/nexus/db-model/src/schema_versions.rs index 04fafe4f93..ec94221496 100644 --- a/nexus/db-model/src/schema_versions.rs +++ b/nexus/db-model/src/schema_versions.rs @@ -17,7 +17,7 @@ use std::collections::BTreeMap; /// /// This must be updated when you change the database schema. Refer to /// schema/crdb/README.adoc in the root of this repository for details. -pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(77, 0, 0); +pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(78, 0, 0); /// List of all past database schema versions, in *reverse* order /// @@ -29,6 +29,7 @@ static KNOWN_VERSIONS: Lazy> = Lazy::new(|| { // | leaving the first copy as an example for the next person. // v // KnownVersion::new(next_int, "unique-dirname-with-the-sql-files"), + KnownVersion::new(78, "vpc-subnet-routing"), KnownVersion::new(77, "remove-view-for-v2p-mappings"), KnownVersion::new(76, "lookup-region-snapshot-by-snapshot-id"), KnownVersion::new(75, "add-cockroach-zone-id-to-node-id"), diff --git a/nexus/db-model/src/vpc_route.rs b/nexus/db-model/src/vpc_route.rs index 168ed41cef..3015df691f 100644 --- a/nexus/db-model/src/vpc_route.rs +++ b/nexus/db-model/src/vpc_route.rs @@ -18,7 +18,7 @@ use std::io::Write; use uuid::Uuid; impl_enum_wrapper!( - #[derive(SqlType, Debug)] + #[derive(SqlType, Debug, QueryId)] #[diesel(postgres_type(name = "router_route_kind", schema = "public"))] pub struct RouterRouteKindEnum; @@ -127,6 +127,46 @@ impl RouterRoute { destination: RouteDestination::new(params.destination), } } + + /// Create a subnet routing rule for a VPC's system router. + /// + /// This defaults to use the same name as the subnet. If this would conflict + /// with the internet gateway rules, then the UUID is used instead (alongside + /// notice that a name conflict has occurred). + pub fn for_subnet( + route_id: Uuid, + system_router_id: Uuid, + subnet: Name, + ) -> Self { + let forbidden_names = ["default-v4", "default-v6"]; + + let name = if forbidden_names.contains(&subnet.as_str()) { + // unwrap safety: a uuid is not by itself a valid name + // so prepend it with another string. + // - length constraint is <63 chars, + // - a UUID is 36 chars including hyphens, + // - "{subnet}-" is 11 chars + // - "conflict-" is 9 chars + // = 56 chars + format!("conflict-{subnet}-{route_id}").parse().unwrap() + } else { + subnet.0.clone() + }; + + Self::new( + route_id, + system_router_id, + external::RouterRouteKind::VpcSubnet, + params::RouterRouteCreate { + identity: external::IdentityMetadataCreateParams { + name, + description: format!("VPC Subnet route for '{subnet}'"), + }, + target: external::RouteTarget::Subnet(subnet.0.clone()), + destination: external::RouteDestination::Subnet(subnet.0), + }, + ) + } } impl Into for RouterRoute { diff --git a/nexus/db-model/src/vpc_router.rs b/nexus/db-model/src/vpc_router.rs index 71c753e6aa..51409c38d5 100644 --- a/nexus/db-model/src/vpc_router.rs +++ b/nexus/db-model/src/vpc_router.rs @@ -44,6 +44,7 @@ pub struct VpcRouter { pub vpc_id: Uuid, pub kind: VpcRouterKind, pub rcgen: Generation, + pub resolved_version: i64, } impl VpcRouter { @@ -54,7 +55,13 @@ impl VpcRouter { params: params::VpcRouterCreate, ) -> Self { let identity = VpcRouterIdentity::new(router_id, params.identity); - Self { identity, vpc_id, kind, rcgen: Generation::new() } + Self { + identity, + vpc_id, + kind, + rcgen: Generation::new(), + resolved_version: 0, + } } } diff --git a/nexus/db-model/src/vpc_subnet.rs b/nexus/db-model/src/vpc_subnet.rs index f3c90a908e..eaa7c6e87d 100644 --- a/nexus/db-model/src/vpc_subnet.rs +++ b/nexus/db-model/src/vpc_subnet.rs @@ -39,6 +39,7 @@ pub struct VpcSubnet { pub rcgen: Generation, pub ipv4_block: Ipv4Net, pub ipv6_block: Ipv6Net, + pub custom_router_id: Option, } impl VpcSubnet { @@ -60,6 +61,7 @@ impl VpcSubnet { rcgen: Generation::new(), ipv4_block: Ipv4Net(ipv4_block), ipv6_block: Ipv6Net(ipv6_block), + custom_router_id: None, } } @@ -102,6 +104,7 @@ impl From for views::VpcSubnet { vpc_id: subnet.vpc_id, ipv4_block: subnet.ipv4_block.0, ipv6_block: subnet.ipv6_block.0, + custom_router_id: subnet.custom_router_id, } } } diff --git a/nexus/db-queries/src/db/datastore/network_interface.rs b/nexus/db-queries/src/db/datastore/network_interface.rs index 3076afa39f..a9b406becf 100644 --- a/nexus/db-queries/src/db/datastore/network_interface.rs +++ b/nexus/db-queries/src/db/datastore/network_interface.rs @@ -136,11 +136,27 @@ impl DataStore { ), )); } - self.create_network_interface_raw(opctx, interface) + + let out = self + .create_network_interface_raw(opctx, interface) .await // Convert to `InstanceNetworkInterface` before returning; we know // this is valid as we've checked the condition on-entry. - .map(NetworkInterface::as_instance) + .map(NetworkInterface::as_instance)?; + + // `instance:xxx` targets in router rules resolve to the primary + // NIC of that instance. Accordingly, NIC create may cause dangling + // entries to re-resolve to a valid instance (even if it is not yet + // started). + // This will not trigger the route RPW directly, we still need to do + // so in e.g. the instance watcher task. + if out.primary { + self.vpc_increment_rpw_version(opctx, out.vpc_id) + .await + .map_err(|e| network_interface::InsertError::External(e))?; + } + + Ok(out) } /// List network interfaces associated with a given service. @@ -608,6 +624,28 @@ impl DataStore { .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) } + /// Retrieve the primary network interface for a given instance. + pub async fn instance_get_primary_network_interface( + &self, + opctx: &OpContext, + authz_instance: &authz::Instance, + ) -> LookupResult { + opctx.authorize(authz::Action::ListChildren, authz_instance).await?; + + use db::schema::instance_network_interface::dsl; + dsl::instance_network_interface + .filter(dsl::time_deleted.is_null()) + .filter(dsl::instance_id.eq(authz_instance.id())) + .filter(dsl::is_primary.eq(true)) + .select(InstanceNetworkInterface::as_select()) + .limit(1) + .first_async::( + &*self.pool_connection_authorized(opctx).await?, + ) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + } + /// Get network interface associated with a given probe. pub async fn probe_get_network_interface( &self, diff --git a/nexus/db-queries/src/db/datastore/vpc.rs b/nexus/db-queries/src/db/datastore/vpc.rs index 5322e20dbf..89ee1c468e 100644 --- a/nexus/db-queries/src/db/datastore/vpc.rs +++ b/nexus/db-queries/src/db/datastore/vpc.rs @@ -5,6 +5,7 @@ //! [`DataStore`] methods on [`Vpc`]s. use super::DataStore; +use super::SQL_BATCH_SIZE; use crate::authz; use crate::context::OpContext; use crate::db; @@ -20,6 +21,7 @@ use crate::db::model::InstanceNetworkInterface; use crate::db::model::Name; use crate::db::model::Project; use crate::db::model::RouterRoute; +use crate::db::model::RouterRouteKind; use crate::db::model::RouterRouteUpdate; use crate::db::model::Sled; use crate::db::model::Vni; @@ -33,6 +35,7 @@ use crate::db::model::VpcSubnetUpdate; use crate::db::model::VpcUpdate; use crate::db::model::{Ipv4Net, Ipv6Net}; use crate::db::pagination::paginated; +use crate::db::pagination::Paginator; use crate::db::queries::vpc::InsertVpcQuery; use crate::db::queries::vpc::VniSearchIter; use crate::db::queries::vpc_subnet::FilterConflictingVpcSubnetRangesQuery; @@ -43,6 +46,7 @@ use chrono::Utc; use diesel::prelude::*; use diesel::result::DatabaseErrorKind; use diesel::result::Error as DieselError; +use futures::stream::{self, StreamExt}; use ipnetwork::IpNetwork; use nexus_db_fixed_data::vpc::SERVICES_VPC_ID; use nexus_types::deployment::BlueprintZoneFilter; @@ -59,11 +63,16 @@ use omicron_common::api::external::LookupType; use omicron_common::api::external::ResourceType; use omicron_common::api::external::RouteDestination; use omicron_common::api::external::RouteTarget; -use omicron_common::api::external::RouterRouteKind; +use omicron_common::api::external::RouterRouteKind as ExternalRouteKind; use omicron_common::api::external::UpdateResult; use omicron_common::api::external::Vni as ExternalVni; +use omicron_common::api::internal::shared::RouterTarget; +use oxnet::IpNet; use ref_cast::RefCast; use std::collections::BTreeMap; +use std::collections::HashMap; +use std::collections::HashSet; +use std::net::IpAddr; use uuid::Uuid; impl DataStore { @@ -74,7 +83,8 @@ impl DataStore { ) -> Result<(), Error> { use nexus_db_fixed_data::project::SERVICES_PROJECT_ID; use nexus_db_fixed_data::vpc::SERVICES_VPC; - use nexus_db_fixed_data::vpc::SERVICES_VPC_DEFAULT_ROUTE_ID; + use nexus_db_fixed_data::vpc::SERVICES_VPC_DEFAULT_V4_ROUTE_ID; + use nexus_db_fixed_data::vpc::SERVICES_VPC_DEFAULT_V6_ROUTE_ID; opctx.authorize(authz::Action::Modify, &authz::DATABASE).await?; @@ -135,35 +145,49 @@ impl DataStore { .map(|(authz_router, _)| authz_router)? }; - let route = RouterRoute::new( - *SERVICES_VPC_DEFAULT_ROUTE_ID, - SERVICES_VPC.system_router_id, - RouterRouteKind::Default, - nexus_types::external_api::params::RouterRouteCreate { - identity: IdentityMetadataCreateParams { - name: "default".parse().unwrap(), - description: - "Default internet gateway route for Oxide Services" - .to_string(), + // Unwrap safety: these are known valid CIDR blocks. + let default_ips = [ + ( + "default-v4", + "0.0.0.0/0".parse().unwrap(), + *SERVICES_VPC_DEFAULT_V4_ROUTE_ID, + ), + ( + "default-v6", + "::/0".parse().unwrap(), + *SERVICES_VPC_DEFAULT_V6_ROUTE_ID, + ), + ]; + + for (name, default, uuid) in default_ips { + let route = RouterRoute::new( + uuid, + SERVICES_VPC.system_router_id, + ExternalRouteKind::Default, + nexus_types::external_api::params::RouterRouteCreate { + identity: IdentityMetadataCreateParams { + name: name.parse().unwrap(), + description: + "Default internet gateway route for Oxide Services" + .to_string(), + }, + target: RouteTarget::InternetGateway( + "outbound".parse().unwrap(), + ), + destination: RouteDestination::IpNet(default), }, - target: RouteTarget::InternetGateway( - "outbound".parse().unwrap(), - ), - destination: RouteDestination::Vpc( - SERVICES_VPC.identity.name.clone().into(), - ), - }, - ); - self.router_create_route(opctx, &authz_router, route) - .await - .map(|_| ()) - .or_else(|e| match e { - Error::ObjectAlreadyExists { .. } => Ok(()), - _ => Err(e), - })?; + ); + self.router_create_route(opctx, &authz_router, route) + .await + .map(|_| ()) + .or_else(|e| match e { + Error::ObjectAlreadyExists { .. } => Ok(()), + _ => Err(e), + })?; + } self.load_builtin_vpc_fw_rules(opctx).await?; - self.load_builtin_vpc_subnets(opctx).await?; + self.load_builtin_vpc_subnets(opctx, &authz_router).await?; info!(opctx.log, "created built-in services vpc"); @@ -228,10 +252,15 @@ impl DataStore { async fn load_builtin_vpc_subnets( &self, opctx: &OpContext, + authz_router: &authz::VpcRouter, ) -> Result<(), Error> { + use nexus_db_fixed_data::vpc::SERVICES_VPC; use nexus_db_fixed_data::vpc_subnet::DNS_VPC_SUBNET; + use nexus_db_fixed_data::vpc_subnet::DNS_VPC_SUBNET_ROUTE_ID; use nexus_db_fixed_data::vpc_subnet::NEXUS_VPC_SUBNET; + use nexus_db_fixed_data::vpc_subnet::NEXUS_VPC_SUBNET_ROUTE_ID; use nexus_db_fixed_data::vpc_subnet::NTP_VPC_SUBNET; + use nexus_db_fixed_data::vpc_subnet::NTP_VPC_SUBNET_ROUTE_ID; debug!(opctx.log, "attempting to create built-in VPC Subnets"); @@ -242,9 +271,11 @@ impl DataStore { .lookup_for(authz::Action::CreateChild) .await .internal_context("lookup built-in services vpc")?; - for vpc_subnet in - [&*DNS_VPC_SUBNET, &*NEXUS_VPC_SUBNET, &*NTP_VPC_SUBNET] - { + for (vpc_subnet, route_id) in [ + (&*DNS_VPC_SUBNET, *DNS_VPC_SUBNET_ROUTE_ID), + (&*NEXUS_VPC_SUBNET, *NEXUS_VPC_SUBNET_ROUTE_ID), + (&*NTP_VPC_SUBNET, *NTP_VPC_SUBNET_ROUTE_ID), + ] { if let Ok(_) = db::lookup::LookupPath::new(opctx, self) .vpc_subnet_id(vpc_subnet.id()) .fetch() @@ -260,6 +291,20 @@ impl DataStore { Error::ObjectAlreadyExists { .. } => Ok(()), _ => Err(e), })?; + + let route = RouterRoute::for_subnet( + route_id, + SERVICES_VPC.system_router_id, + vpc_subnet.name().clone().into(), + ); + + self.router_create_route(opctx, &authz_router, route) + .await + .map(|_| ()) + .or_else(|e| match e { + Error::ObjectAlreadyExists { .. } => Ok(()), + _ => Err(e), + })?; } info!(opctx.log, "created built-in services vpc subnets"); @@ -770,6 +815,9 @@ impl DataStore { assert_eq!(authz_vpc.id(), subnet.vpc_id); let db_subnet = self.vpc_create_subnet_raw(subnet).await?; + self.vpc_system_router_ensure_subnet_routes(opctx, authz_vpc.id()) + .await + .map_err(SubnetError::External)?; Ok(( authz::VpcSubnet::new( authz_vpc.clone(), @@ -850,6 +898,12 @@ impl DataStore { "deletion failed due to concurrent modification", )); } else { + self.vpc_system_router_ensure_subnet_routes( + opctx, + db_subnet.vpc_id, + ) + .await?; + Ok(()) } } @@ -863,7 +917,7 @@ impl DataStore { opctx.authorize(authz::Action::Modify, authz_subnet).await?; use db::schema::vpc_subnet::dsl; - diesel::update(dsl::vpc_subnet) + let out = diesel::update(dsl::vpc_subnet) .filter(dsl::time_deleted.is_null()) .filter(dsl::id.eq(authz_subnet.id())) .set(updates) @@ -875,7 +929,11 @@ impl DataStore { e, ErrorHandler::NotFoundByResource(authz_subnet), ) - }) + })?; + + self.vpc_system_router_ensure_subnet_routes(opctx, out.vpc_id).await?; + + Ok(out) } pub async fn subnet_list_instance_network_interfaces( @@ -994,6 +1052,29 @@ impl DataStore { ErrorHandler::NotFoundByResource(authz_router), ) })?; + + // All child routes are deleted. + use db::schema::router_route::dsl as rr; + let now = Utc::now(); + diesel::update(rr::router_route) + .filter(rr::time_deleted.is_null()) + .filter(rr::vpc_router_id.eq(authz_router.id())) + .set(rr::time_deleted.eq(now)) + .execute_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + + // Unlink all subnets from this router. + // XXX: We might this want to error out before the delete fires. + use db::schema::vpc_subnet::dsl as vpc; + diesel::update(vpc::vpc_subnet) + .filter(vpc::time_deleted.is_null()) + .filter(vpc::custom_router_id.eq(authz_router.id())) + .set(vpc::custom_router_id.eq(Option::::None)) + .execute_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + Ok(()) } @@ -1059,6 +1140,17 @@ impl DataStore { assert_eq!(authz_router.id(), route.vpc_router_id); opctx.authorize(authz::Action::CreateChild, authz_router).await?; + Self::router_create_route_on_connection( + route, + &*self.pool_connection_authorized(opctx).await?, + ) + .await + } + + pub async fn router_create_route_on_connection( + route: RouterRoute, + conn: &async_bb8_diesel::Connection, + ) -> CreateResult { use db::schema::router_route::dsl; let router_id = route.vpc_router_id; let name = route.name().clone(); @@ -1067,9 +1159,7 @@ impl DataStore { router_id, diesel::insert_into(dsl::router_route).values(route), ) - .insert_and_get_result_async( - &*self.pool_connection_authorized(opctx).await?, - ) + .insert_and_get_result_async(conn) .await .map_err(|e| match e { AsyncInsertError::CollectionNotFound => Error::ObjectNotFound { @@ -1221,6 +1311,487 @@ impl DataStore { ) }) } + + /// Ensure the system router for a VPC has the correct set of subnet + /// routing rules, after any changes to a subnet. + pub async fn vpc_system_router_ensure_subnet_routes( + &self, + opctx: &OpContext, + vpc_id: Uuid, + ) -> Result<(), Error> { + // These rules are immutable from a user's perspective, and + // aren't something which they can meaningfully interact with, + // so uuid stability on e.g. VPC rename is not a primary concern. + // We make sure only to alter VPC subnet rules here: users may + // modify other system routes like internet gateways (which are + // `RouteKind::Default`). + let conn = self.pool_connection_authorized(opctx).await?; + self.transaction_retry_wrapper("vpc_subnet_route_reconcile") + .transaction(&conn, |conn| async move { + use db::schema::router_route::dsl; + use db::schema::vpc::dsl as vpc; + use db::schema::vpc_subnet::dsl as subnet; + + let system_router_id = vpc::vpc + .filter(vpc::id.eq(vpc_id)) + .filter(vpc::time_deleted.is_null()) + .select(vpc::system_router_id) + .limit(1) + .get_result_async(&conn) + .await?; + + let valid_subnets: Vec = subnet::vpc_subnet + .filter(subnet::vpc_id.eq(vpc_id)) + .filter(subnet::time_deleted.is_null()) + .select(VpcSubnet::as_select()) + .load_async(&conn) + .await?; + + let current_rules: Vec = dsl::router_route + .filter( + dsl::kind + .eq(RouterRouteKind(ExternalRouteKind::VpcSubnet)), + ) + .filter(dsl::time_deleted.is_null()) + .filter(dsl::vpc_router_id.eq(system_router_id)) + .select(RouterRoute::as_select()) + .load_async(&conn) + .await?; + + // Build the add/delete sets. + let expected_names: HashSet = valid_subnets + .iter() + .map(|v| v.identity.name.clone()) + .collect(); + + // This checks that we have rules which *point to* the named + // subnets, rather than working with rule names (even if these + // are set to match the subnet where possible). + // Rule names are effectively randomised when someone, e.g., + // names a subnet "default-v4"/"-v6", and this prevents us + // from repeatedly adding/deleting that route. + let mut found_names = HashSet::new(); + let mut invalid = Vec::new(); + for rule in current_rules { + let id = rule.id(); + match (rule.kind.0, rule.target.0) { + ( + ExternalRouteKind::VpcSubnet, + RouteTarget::Subnet(n), + ) if expected_names.contains(Name::ref_cast(&n)) => { + let _ = found_names.insert(n.into()); + } + _ => invalid.push(id), + } + } + + // Add/Remove routes. Retry if number is incorrect due to + // concurrent modification. + let now = Utc::now(); + let to_update = invalid.len(); + let updated_rows = diesel::update(dsl::router_route) + .filter(dsl::time_deleted.is_null()) + .filter(dsl::id.eq_any(invalid)) + .set(dsl::time_deleted.eq(now)) + .execute_async(&conn) + .await?; + + if updated_rows != to_update { + return Err(DieselError::RollbackTransaction); + } + + // Duplicate rules are caught here using the UNIQUE constraint + // on names in a router. Only nexus can alter the system router, + // so there is no risk of collision with user-specified names. + // + // Subnets named "default-v4" or "default-v6" have their rules renamed + // to include the rule UUID. + for subnet in expected_names.difference(&found_names) { + let route_id = Uuid::new_v4(); + let route = db::model::RouterRoute::for_subnet( + route_id, + system_router_id, + subnet.clone(), + ); + + match Self::router_create_route_on_connection(route, &conn) + .await + { + Err(Error::Conflict { .. }) => { + return Err(DieselError::RollbackTransaction) + } + Err(_) => return Err(DieselError::NotFound), + _ => {} + } + } + + // Verify that route set is exactly as intended, and rollback otherwise. + let current_rules: Vec = dsl::router_route + .filter( + dsl::kind + .eq(RouterRouteKind(ExternalRouteKind::VpcSubnet)), + ) + .filter(dsl::time_deleted.is_null()) + .filter(dsl::vpc_router_id.eq(system_router_id)) + .select(RouterRoute::as_select()) + .load_async(&conn) + .await?; + + if current_rules.len() != expected_names.len() { + return Err(DieselError::RollbackTransaction); + } + + for rule in current_rules { + match (rule.kind.0, rule.target.0) { + ( + ExternalRouteKind::VpcSubnet, + RouteTarget::Subnet(n), + ) if expected_names.contains(Name::ref_cast(&n)) => {} + _ => return Err(DieselError::RollbackTransaction), + } + } + + Ok(()) + }) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + + self.vpc_increment_rpw_version(opctx, vpc_id).await + } + + /// Look up a VPC by VNI. + pub async fn vpc_get_system_router( + &self, + opctx: &OpContext, + vpc_id: Uuid, + ) -> LookupResult { + use db::schema::vpc::dsl as vpc_dsl; + use db::schema::vpc_router::dsl as router_dsl; + + vpc_dsl::vpc + .inner_join( + router_dsl::vpc_router + .on(router_dsl::id.eq(vpc_dsl::system_router_id)), + ) + .filter(vpc_dsl::time_deleted.is_null()) + .filter(vpc_dsl::id.eq(vpc_id)) + .filter(router_dsl::time_deleted.is_null()) + .filter(router_dsl::vpc_id.eq(vpc_id)) + .select(VpcRouter::as_select()) + .limit(1) + .first_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map_err(|e| { + public_error_from_diesel( + e, + ErrorHandler::NotFoundByLookup( + ResourceType::Vpc, + LookupType::ById(vpc_id), + ), + ) + }) + } + + /// Fetch all active custom routers (and their parent subnets) + /// in a VPC. + pub async fn vpc_get_active_custom_routers( + &self, + opctx: &OpContext, + vpc_id: Uuid, + ) -> ListResultVec<(VpcSubnet, VpcRouter)> { + use db::schema::vpc_router::dsl as router_dsl; + use db::schema::vpc_subnet::dsl as subnet_dsl; + + subnet_dsl::vpc_subnet + .inner_join( + router_dsl::vpc_router.on(router_dsl::id + .nullable() + .eq(subnet_dsl::custom_router_id)), + ) + .filter(subnet_dsl::time_deleted.is_null()) + .filter(subnet_dsl::vpc_id.eq(vpc_id)) + .filter(router_dsl::time_deleted.is_null()) + .filter(router_dsl::vpc_id.eq(vpc_id)) + .select((VpcSubnet::as_select(), VpcRouter::as_select())) + .load_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map_err(|e| { + public_error_from_diesel( + e, + ErrorHandler::NotFoundByLookup( + ResourceType::Vpc, + LookupType::ById(vpc_id), + ), + ) + }) + } + + /// Resolve all targets in a router into concrete details. + pub async fn vpc_resolve_router_rules( + &self, + opctx: &OpContext, + vpc_router_id: Uuid, + ) -> Result, Error> { + // Get all rules in target router. + opctx.check_complex_operations_allowed()?; + + let (.., authz_project, authz_vpc, authz_router) = + db::lookup::LookupPath::new(opctx, self) + .vpc_router_id(vpc_router_id) + .lookup_for(authz::Action::Read) + .await + .internal_context("lookup router by id for rules")?; + let mut paginator = Paginator::new(SQL_BATCH_SIZE); + let mut all_rules = vec![]; + while let Some(p) = paginator.next() { + let batch = self + .vpc_router_route_list( + opctx, + &authz_router, + &PaginatedBy::Id(p.current_pagparams()), + ) + .await?; + paginator = p + .found_batch(&batch, &|s: &nexus_db_model::RouterRoute| s.id()); + all_rules.extend(batch); + } + + // This is not in a transaction, because... + // We're not necessarily too concerned about getting partially + // updated state when resolving these names. See the header discussion + // in `nexus/src/app/background/vpc_routes.rs`: any state updates + // are followed by a version bump/notify, so we will be eventually + // consistent with route resolution. + let mut subnet_names = HashSet::new(); + let mut vpc_names = HashSet::new(); + let mut inetgw_names = HashSet::new(); + let mut instance_names = HashSet::new(); + for rule in &all_rules { + match &rule.target.0 { + RouteTarget::Vpc(n) => { + vpc_names.insert(n.clone()); + } + RouteTarget::Subnet(n) => { + subnet_names.insert(n.clone()); + } + RouteTarget::Instance(n) => { + instance_names.insert(n.clone()); + } + RouteTarget::InternetGateway(n) => { + inetgw_names.insert(n.clone()); + } + _ => {} + } + + match &rule.destination.0 { + RouteDestination::Vpc(n) => { + vpc_names.insert(n.clone()); + } + RouteDestination::Subnet(n) => { + subnet_names.insert(n.clone()); + } + _ => {} + } + } + + // TODO: This would be nice to solve in fewer queries. + let subnets = stream::iter(subnet_names) + .filter_map(|name| async { + db::lookup::LookupPath::new(opctx, self) + .vpc_id(authz_vpc.id()) + .vpc_subnet_name(Name::ref_cast(&name)) + .fetch() + .await + .ok() + .map(|(.., subnet)| (name, subnet)) + }) + .collect::>() + .await; + + // TODO: unused until VPC peering. + let _vpcs = stream::iter(vpc_names) + .filter_map(|name| async { + db::lookup::LookupPath::new(opctx, self) + .project_id(authz_project.id()) + .vpc_name(Name::ref_cast(&name)) + .fetch() + .await + .ok() + .map(|(.., vpc)| (name, vpc)) + }) + .collect::>() + .await; + + let instances = stream::iter(instance_names) + .filter_map(|name| async { + db::lookup::LookupPath::new(opctx, self) + .project_id(authz_project.id()) + .instance_name(Name::ref_cast(&name)) + .fetch() + .await + .ok() + .map(|(.., auth, inst)| (name, auth, inst)) + }) + .filter_map(|(name, authz_instance, instance)| async move { + // XXX: currently an instance can have one primary NIC, + // and it is not dual-stack (v4 + v6). We need + // to clarify what should be resolved in the v6 case. + self.instance_get_primary_network_interface( + opctx, + &authz_instance, + ) + .await + .ok() + .map(|primary_nic| (name, (instance, primary_nic))) + }) + .collect::>() + .await; + + // TODO: validate names of Internet Gateways. + + // See the discussion in `resolve_firewall_rules_for_sled_agent` on + // how we should resolve name misses in route resolution. + // This method adopts the same strategy: a lookup failure corresponds + // to a NO-OP rule. + let mut out = HashMap::new(); + for rule in all_rules { + // Some dests/targets (e.g., subnet) resolve to *several* specifiers + // to handle both v4 and v6. The user-facing API will prevent severe + // mistakes on naked IPs/CIDRs (mixed v4/6), but we need to be smarter + // around named entities here. + let (v4_dest, v6_dest) = match rule.destination.0 { + RouteDestination::Ip(ip @ IpAddr::V4(_)) => { + (Some(IpNet::host_net(ip)), None) + } + RouteDestination::Ip(ip @ IpAddr::V6(_)) => { + (None, Some(IpNet::host_net(ip))) + } + RouteDestination::IpNet(ip @ IpNet::V4(_)) => (Some(ip), None), + RouteDestination::IpNet(ip @ IpNet::V6(_)) => (None, Some(ip)), + RouteDestination::Subnet(n) => subnets + .get(&n) + .map(|s| { + ( + Some(s.ipv4_block.0.into()), + Some(s.ipv6_block.0.into()), + ) + }) + .unwrap_or_default(), + + // TODO: VPC peering. + RouteDestination::Vpc(_) => (None, None), + }; + + let (v4_target, v6_target) = match rule.target.0 { + RouteTarget::Ip(ip @ IpAddr::V4(_)) => { + (Some(RouterTarget::Ip(ip)), None) + } + RouteTarget::Ip(ip @ IpAddr::V6(_)) => { + (None, Some(RouterTarget::Ip(ip))) + } + RouteTarget::Subnet(n) => subnets + .get(&n) + .map(|s| { + ( + Some(RouterTarget::VpcSubnet( + s.ipv4_block.0.into(), + )), + Some(RouterTarget::VpcSubnet( + s.ipv6_block.0.into(), + )), + ) + }) + .unwrap_or_default(), + RouteTarget::Instance(n) => instances + .get(&n) + .map(|i| match i.1.ip { + // TODO: update for dual-stack v4/6. + ip @ IpNetwork::V4(_) => { + (Some(RouterTarget::Ip(ip.ip())), None) + } + ip @ IpNetwork::V6(_) => { + (None, Some(RouterTarget::Ip(ip.ip()))) + } + }) + .unwrap_or_default(), + RouteTarget::Drop => { + (Some(RouterTarget::Drop), Some(RouterTarget::Drop)) + } + + // TODO: Internet Gateways. + // The semantic here is 'name match => allow', + // as the other aspect they will control is SNAT + // IP allocation. Today, presence of this rule + // allows upstream regardless of name. + RouteTarget::InternetGateway(_n) => ( + Some(RouterTarget::InternetGateway), + Some(RouterTarget::InternetGateway), + ), + + // TODO: VPC Peering. + RouteTarget::Vpc(_) => (None, None), + }; + + // XXX: Is there another way we should be handling destination + // collisions within a router? 'first/last wins' is fairly + // arbitrary when lookups are sorted on UUID, but it's + // unpredictable. + // It would be really useful to raise collisions and + // misses to users, somehow. + if let (Some(dest), Some(target)) = (v4_dest, v4_target) { + out.insert(dest, target); + } + + if let (Some(dest), Some(target)) = (v6_dest, v6_target) { + out.insert(dest, target); + } + } + + Ok(out) + } + + /// Trigger an RPW version bump on a single VPC router in response + /// to CRUD operations on individual routes. + pub async fn vpc_router_increment_rpw_version( + &self, + opctx: &OpContext, + router_id: Uuid, + ) -> UpdateResult<()> { + // NOTE: this operation and `vpc_increment_rpw_version` do not + // have auth checks, as these can occur in connection with unrelated + // resources -- the current user may have access to those, but be unable + // to modify the entire set of VPC routers in a project. + + use db::schema::vpc_router::dsl; + diesel::update(dsl::vpc_router) + .filter(dsl::time_deleted.is_null()) + .filter(dsl::id.eq(router_id)) + .set(dsl::resolved_version.eq(dsl::resolved_version + 1)) + .execute_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + + Ok(()) + } + + /// Trigger an RPW version bump on *all* routers within a VPC in + /// response to changes to named entities (e.g., subnets, instances). + pub async fn vpc_increment_rpw_version( + &self, + opctx: &OpContext, + vpc_id: Uuid, + ) -> UpdateResult<()> { + use db::schema::vpc_router::dsl; + diesel::update(dsl::vpc_router) + .filter(dsl::time_deleted.is_null()) + .filter(dsl::vpc_id.eq(vpc_id)) + .set(dsl::resolved_version.eq(dsl::resolved_version + 1)) + .execute_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + + Ok(()) + } } #[cfg(test)] @@ -1232,6 +1803,7 @@ mod tests { use crate::db::datastore::test_utils::IneligibleSleds; use crate::db::model::Project; use crate::db::queries::vpc::MAX_VNI_SEARCH_RANGE_SIZE; + use nexus_db_fixed_data::silo::DEFAULT_SILO; use nexus_db_fixed_data::vpc_subnet::NEXUS_VPC_SUBNET; use nexus_db_model::IncompleteNetworkInterface; use nexus_db_model::SledUpdate; @@ -1249,7 +1821,10 @@ mod tests { use omicron_common::api::external::Generation; use omicron_test_utils::dev; use omicron_uuid_kinds::GenericUuid; + use omicron_uuid_kinds::InstanceUuid; use omicron_uuid_kinds::SledUuid; + use oxnet::IpNet; + use oxnet::Ipv4Net; use slog::info; // Test that we detect the right error condition and return None when we @@ -1748,4 +2323,487 @@ mod tests { db.cleanup().await.unwrap(); logctx.cleanup_successful(); } + + async fn create_initial_vpc( + log: &slog::Logger, + opctx: &OpContext, + datastore: &DataStore, + ) -> (authz::Project, authz::Vpc, Vpc, authz::VpcRouter, VpcRouter) { + // Create a project and VPC. + let project_params = params::ProjectCreate { + identity: IdentityMetadataCreateParams { + name: "project".parse().unwrap(), + description: String::from("test project"), + }, + }; + let project = Project::new(DEFAULT_SILO.id(), project_params); + let (authz_project, _) = datastore + .project_create(&opctx, project) + .await + .expect("failed to create project"); + + let vpc_name: external::Name = "my-vpc".parse().unwrap(); + let description = String::from("test vpc"); + let mut incomplete_vpc = IncompleteVpc::new( + Uuid::new_v4(), + authz_project.id(), + Uuid::new_v4(), + params::VpcCreate { + identity: IdentityMetadataCreateParams { + name: vpc_name.clone(), + description: description.clone(), + }, + ipv6_prefix: None, + dns_name: vpc_name.clone(), + }, + ) + .expect("failed to create incomplete VPC"); + let this_vni = Vni(external::Vni::try_from(2048).unwrap()); + incomplete_vpc.vni = this_vni; + info!( + log, + "creating initial VPC"; + "vni" => ?this_vni, + ); + let query = InsertVpcQuery::new(incomplete_vpc); + let (authz_vpc, db_vpc) = datastore + .project_create_vpc_raw(&opctx, &authz_project, query) + .await + .expect("failed to create initial set of VPCs") + .expect("expected an actual VPC"); + info!( + log, + "created VPC"; + "vpc" => ?db_vpc, + ); + + // Now create the system router for this VPC. Subnet CRUD + // operations need this defined to succeed. + let router = VpcRouter::new( + db_vpc.system_router_id, + db_vpc.id(), + VpcRouterKind::System, + nexus_types::external_api::params::VpcRouterCreate { + identity: IdentityMetadataCreateParams { + name: "system".parse().unwrap(), + description: description.clone(), + }, + }, + ); + + let (authz_router, db_router) = datastore + .vpc_create_router(&opctx, &authz_vpc, router) + .await + .unwrap(); + + (authz_project, authz_vpc, db_vpc, authz_router, db_router) + } + + async fn new_subnet_ez( + opctx: &OpContext, + datastore: &DataStore, + db_vpc: &Vpc, + authz_vpc: &authz::Vpc, + name: &str, + ip: [u8; 4], + prefix_len: u8, + ) -> (authz::VpcSubnet, VpcSubnet) { + let ipv6_block = db_vpc + .ipv6_prefix + .random_subnet( + omicron_common::address::VPC_SUBNET_IPV6_PREFIX_LENGTH, + ) + .map(|block| block.0) + .unwrap(); + + datastore + .vpc_create_subnet( + &opctx, + &authz_vpc, + db::model::VpcSubnet::new( + Uuid::new_v4(), + db_vpc.id(), + IdentityMetadataCreateParams { + name: name.parse().unwrap(), + description: "A subnet...".into(), + }, + Ipv4Net::new(core::net::Ipv4Addr::from(ip), prefix_len) + .unwrap(), + ipv6_block, + ), + ) + .await + .unwrap() + } + + // Test to verify that subnet CRUD operations are correctly + // reflected in the nexus-managed system router attached to a VPC, + // and that these resolve to the v4/6 subnets of each. + #[tokio::test] + async fn test_vpc_system_router_sync_to_subnets() { + usdt::register_probes().unwrap(); + let logctx = + dev::test_setup_log("test_vpc_system_router_sync_to_subnets"); + let log = &logctx.log; + let mut db = test_setup_database(&logctx.log).await; + let (opctx, datastore) = datastore_test(&logctx, &db).await; + + let (_, authz_vpc, db_vpc, _, db_router) = + create_initial_vpc(log, &opctx, &datastore).await; + + // InternetGateway route creation is handled by the saga proper, + // so we'll only have subnet routes here. Initially, we start with none: + verify_all_subnet_routes_in_router( + &opctx, + &datastore, + db_router.id(), + &[], + ) + .await; + + // Add a new subnet and we should get a new route. + let (authz_sub0, sub0) = new_subnet_ez( + &opctx, + &datastore, + &db_vpc, + &authz_vpc, + "s0", + [172, 30, 0, 0], + 22, + ) + .await; + + verify_all_subnet_routes_in_router( + &opctx, + &datastore, + db_router.id(), + &[&sub0], + ) + .await; + + // Add another, and get another route. + let (authz_sub1, sub1) = new_subnet_ez( + &opctx, + &datastore, + &db_vpc, + &authz_vpc, + "s1", + [172, 31, 0, 0], + 22, + ) + .await; + + verify_all_subnet_routes_in_router( + &opctx, + &datastore, + db_router.id(), + &[&sub0, &sub1], + ) + .await; + + // Rename one subnet, and our invariants should hold. + let sub0 = datastore + .vpc_update_subnet( + &opctx, + &authz_sub0, + VpcSubnetUpdate { + name: Some( + "a-new-name".parse::().unwrap().into(), + ), + description: None, + time_modified: Utc::now(), + }, + ) + .await + .unwrap(); + + verify_all_subnet_routes_in_router( + &opctx, + &datastore, + db_router.id(), + &[&sub0, &sub1], + ) + .await; + + // Delete one, and routes should stay in sync. + datastore.vpc_delete_subnet(&opctx, &sub0, &authz_sub0).await.unwrap(); + + verify_all_subnet_routes_in_router( + &opctx, + &datastore, + db_router.id(), + &[&sub1], + ) + .await; + + // If we use a reserved name, we should be able to update the table. + let sub1 = datastore + .vpc_update_subnet( + &opctx, + &authz_sub1, + VpcSubnetUpdate { + name: Some( + "default-v4".parse::().unwrap().into(), + ), + description: None, + time_modified: Utc::now(), + }, + ) + .await + .unwrap(); + + verify_all_subnet_routes_in_router( + &opctx, + &datastore, + db_router.id(), + &[&sub1], + ) + .await; + + // Ditto for adding such a route. + let (_, sub0) = new_subnet_ez( + &opctx, + &datastore, + &db_vpc, + &authz_vpc, + "default-v6", + [172, 30, 0, 0], + 22, + ) + .await; + + verify_all_subnet_routes_in_router( + &opctx, + &datastore, + db_router.id(), + &[&sub0, &sub1], + ) + .await; + + db.cleanup().await.unwrap(); + logctx.cleanup_successful(); + } + + async fn verify_all_subnet_routes_in_router( + opctx: &OpContext, + datastore: &DataStore, + router_id: Uuid, + subnets: &[&VpcSubnet], + ) -> Vec { + let conn = datastore.pool_connection_authorized(opctx).await.unwrap(); + + use db::schema::router_route::dsl; + let routes = dsl::router_route + .filter(dsl::time_deleted.is_null()) + .filter(dsl::vpc_router_id.eq(router_id)) + .filter(dsl::kind.eq(RouterRouteKind(ExternalRouteKind::VpcSubnet))) + .select(RouterRoute::as_select()) + .load_async(&*conn) + .await + .unwrap(); + + // We should have exactly as many subnet routes as subnets. + assert_eq!(routes.len(), subnets.len()); + + let mut names: HashMap<_, _> = + subnets.iter().map(|s| (s.name().clone(), 0usize)).collect(); + + // Each should have a target+dest bound to a subnet by name. + for route in &routes { + let found_name = match &route.target.0 { + RouteTarget::Subnet(name) => name, + e => panic!("found target {e:?} instead of Subnet({{name}})"), + }; + + match &route.destination.0 { + RouteDestination::Subnet(name) => assert_eq!(name, found_name), + e => panic!("found dest {e:?} instead of Subnet({{name}})"), + } + + *names.get_mut(found_name).unwrap() += 1; + } + + // Each name should be used exactly once. + for (name, count) in names { + assert_eq!(count, 1, "subnet {name} should appear exactly once") + } + + // Resolve the routes: we should have two for each entry: + let resolved = datastore + .vpc_resolve_router_rules(&opctx, router_id) + .await + .unwrap(); + assert_eq!(resolved.len(), 2 * subnets.len()); + + // And each subnet generates a v4->v4 and v6->v6. + for subnet in subnets { + assert!(resolved.iter().any(|(k, v)| { + *k == subnet.ipv4_block.0.into() + && match v { + RouterTarget::VpcSubnet(ip) => { + *ip == subnet.ipv4_block.0.into() + } + _ => false, + } + })); + assert!(resolved.iter().any(|(k, v)| { + *k == subnet.ipv6_block.0.into() + && match v { + RouterTarget::VpcSubnet(ip) => { + *ip == subnet.ipv6_block.0.into() + } + _ => false, + } + })); + } + + routes + } + + // Test to verify that VPC routers resolve to the primary addr + // of an instance NIC. + #[tokio::test] + async fn test_vpc_router_rule_instance_resolve() { + usdt::register_probes().unwrap(); + let logctx = + dev::test_setup_log("test_vpc_router_rule_instance_resolve"); + let log = &logctx.log; + let mut db = test_setup_database(&logctx.log).await; + let (opctx, datastore) = datastore_test(&logctx, &db).await; + + let (authz_project, authz_vpc, db_vpc, authz_router, _) = + create_initial_vpc(log, &opctx, &datastore).await; + + // Create a subnet for an instance to live in. + let (authz_sub0, sub0) = new_subnet_ez( + &opctx, + &datastore, + &db_vpc, + &authz_vpc, + "s0", + [172, 30, 0, 0], + 22, + ) + .await; + + // Add a rule pointing to the instance before it is created. + // We're commiting some minor data integrity sins by putting + // these into a system router, but that's irrelevant to resolution. + let inst_name = "insty".parse::().unwrap(); + let _ = datastore + .router_create_route( + &opctx, + &authz_router, + RouterRoute::new( + Uuid::new_v4(), + authz_router.id(), + external::RouterRouteKind::Custom, + params::RouterRouteCreate { + identity: IdentityMetadataCreateParams { + name: "to-vpn".parse().unwrap(), + description: "A rule...".into(), + }, + target: external::RouteTarget::Instance( + inst_name.clone(), + ), + destination: external::RouteDestination::IpNet( + "192.168.0.0/16".parse().unwrap(), + ), + }, + ), + ) + .await + .unwrap(); + + // Resolve the rules: we will have two entries generated by the + // VPC subnet (v4, v6). + let routes = datastore + .vpc_resolve_router_rules(&opctx, authz_router.id()) + .await + .unwrap(); + + assert_eq!(routes.len(), 2); + + // Create an instance, this will have no effect for now as + // the instance lacks a NIC. + let db_inst = datastore + .project_create_instance( + &opctx, + &authz_project, + db::model::Instance::new( + InstanceUuid::new_v4(), + authz_project.id(), + ¶ms::InstanceCreate { + identity: IdentityMetadataCreateParams { + name: inst_name.clone(), + description: "An instance...".into(), + }, + ncpus: external::InstanceCpuCount(1), + memory: 10.into(), + hostname: "insty".parse().unwrap(), + user_data: vec![], + network_interfaces: + params::InstanceNetworkInterfaceAttachment::None, + external_ips: vec![], + disks: vec![], + ssh_public_keys: None, + start: false, + }, + ), + ) + .await + .unwrap(); + let (.., authz_instance) = + db::lookup::LookupPath::new(&opctx, &datastore) + .instance_id(db_inst.id()) + .lookup_for(authz::Action::CreateChild) + .await + .unwrap(); + + let routes = datastore + .vpc_resolve_router_rules(&opctx, authz_router.id()) + .await + .unwrap(); + + assert_eq!(routes.len(), 2); + + // Create a primary NIC on the instance; the route can now resolve + // to the instance's IP. + let nic = datastore + .instance_create_network_interface( + &opctx, + &authz_sub0, + &authz_instance, + IncompleteNetworkInterface::new_instance( + Uuid::new_v4(), + InstanceUuid::from_untyped_uuid(db_inst.id()), + sub0, + IdentityMetadataCreateParams { + name: "nic".parse().unwrap(), + description: "A NIC...".into(), + }, + None, + ) + .unwrap(), + ) + .await + .unwrap(); + + let routes = datastore + .vpc_resolve_router_rules(&opctx, authz_router.id()) + .await + .unwrap(); + + // Verify we now have a route pointing at this instance. + assert_eq!(routes.len(), 3); + assert!(routes.iter().any(|(k, v)| (*k + == "192.168.0.0/16".parse::().unwrap()) + && match v { + RouterTarget::Ip(ip) => *ip == nic.ip.ip(), + _ => false, + })); + + db.cleanup().await.unwrap(); + logctx.cleanup_successful(); + } } diff --git a/nexus/src/app/background/init.rs b/nexus/src/app/background/init.rs index c8e40c80a1..3f569afc85 100644 --- a/nexus/src/app/background/init.rs +++ b/nexus/src/app/background/init.rs @@ -25,6 +25,7 @@ use super::tasks::service_firewall_rules; use super::tasks::sync_service_zone_nat::ServiceZoneNatTracker; use super::tasks::sync_switch_configuration::SwitchPortSettingsManager; use super::tasks::v2p_mappings::V2PManager; +use super::tasks::vpc_routes; use super::Driver; use super::TaskHandle; use crate::app::oximeter::PRODUCER_LEASE_DURATION; @@ -118,6 +119,9 @@ pub struct BackgroundTasks { /// task handle for deletion of database records for VMMs abandoned by their /// instances. pub task_abandoned_vmm_reaper: TaskHandle, + + /// task handle for propagation of VPC router rules to all OPTE ports + pub task_vpc_route_manager: TaskHandle, } impl BackgroundTasks { @@ -457,6 +461,19 @@ impl BackgroundTasks { vec![], ); + // Background task: OPTE port route propagation + let task_vpc_route_manager = { + let watcher = vpc_routes::VpcRouteManager::new(datastore.clone()); + driver.register( + "vpc_route_manager".to_string(), + "propagates updated VPC routes to all OPTE ports".into(), + config.switch_port_settings_manager.period_secs, + Box::new(watcher), + opctx.child(BTreeMap::new()), + vec![], + ) + }; + // Background task: abandoned VMM reaping let task_abandoned_vmm_reaper = driver.register( String::from("abandoned_vmm_reaper"), @@ -495,6 +512,7 @@ impl BackgroundTasks { task_instance_watcher, task_service_firewall_propagation, task_abandoned_vmm_reaper, + task_vpc_route_manager, } } diff --git a/nexus/src/app/background/tasks/mod.rs b/nexus/src/app/background/tasks/mod.rs index 3886b43a30..cb2ab46c2a 100644 --- a/nexus/src/app/background/tasks/mod.rs +++ b/nexus/src/app/background/tasks/mod.rs @@ -26,3 +26,4 @@ pub mod service_firewall_rules; pub mod sync_service_zone_nat; pub mod sync_switch_configuration; pub mod v2p_mappings; +pub mod vpc_routes; diff --git a/nexus/src/app/background/tasks/vpc_routes.rs b/nexus/src/app/background/tasks/vpc_routes.rs new file mode 100644 index 0000000000..5ba428308b --- /dev/null +++ b/nexus/src/app/background/tasks/vpc_routes.rs @@ -0,0 +1,283 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Background task for propagating VPC routes (system and custom) to sleds. + +use crate::app::background::BackgroundTask; +use futures::future::BoxFuture; +use futures::FutureExt; +use nexus_db_model::{Sled, SledState, Vni}; +use nexus_db_queries::{context::OpContext, db::DataStore}; +use nexus_networking::sled_client_from_address; +use nexus_types::{ + deployment::SledFilter, external_api::views::SledPolicy, identity::Asset, + identity::Resource, +}; +use omicron_common::api::internal::shared::{ + ResolvedVpcRoute, ResolvedVpcRouteSet, RouterId, RouterKind, RouterVersion, +}; +use serde_json::json; +use std::collections::hash_map::Entry; +use std::{ + collections::{HashMap, HashSet}, + sync::Arc, +}; +use uuid::Uuid; + +pub struct VpcRouteManager { + datastore: Arc, +} + +impl VpcRouteManager { + pub fn new(datastore: Arc) -> Self { + Self { datastore } + } +} + +// This RPW doesn't concern itself overly much with resolved router targets +// and destinations being partial wrt. the current generation, in the same +// vein as how firewall rules are handled. Gating *pushing* this update on a +// generation number can be a bit more risky, but there's a sort of eventual +// consistency happening here that keeps this safe. +// +// Any location which updates name-resolvable state follows the pattern: +// * Update state. +// * Update (VPC-wide) router generation numbers. +// * Awaken this task. This might happen indirectly via e.g. instance start. +// +// As a result, any update which accidentally sees partial state will be followed +// by re-triggering this RPW with a higher generation number, giving us a re-resolved +// route set and pushing to any relevant sleds. +impl BackgroundTask for VpcRouteManager { + fn activate<'a>( + &'a mut self, + opctx: &'a OpContext, + ) -> BoxFuture<'a, serde_json::Value> { + async { + let log = &opctx.log; + + let sleds = match self + .datastore + .sled_list_all_batched(opctx, SledFilter::InService) + .await + { + Ok(v) => v, + Err(e) => { + let msg = format!("failed to enumerate sleds: {:#}", e); + error!(&log, "{msg}"); + return json!({"error": msg}); + } + } + .into_iter() + .filter(|sled| { + matches!(sled.state(), SledState::Active) + && matches!(sled.policy(), SledPolicy::InService { .. }) + }); + + // Map sled db records to sled-agent clients + let sled_clients: Vec<(Sled, sled_agent_client::Client)> = sleds + .map(|sled| { + let client = sled_client_from_address( + sled.id(), + sled.address(), + &log, + ); + (sled, client) + }) + .collect(); + + let mut known_rules: HashMap> = + HashMap::new(); + let mut db_routers = HashMap::new(); + let mut vni_to_vpc = HashMap::new(); + + for (sled, client) in sled_clients { + let Ok(route_sets) = client.list_vpc_routes().await else { + warn!( + log, + "failed to fetch current VPC route state from sled"; + "sled" => sled.serial_number(), + ); + continue; + }; + + let route_sets = route_sets.into_inner(); + + // Lookup all VPC<->Subnet<->Router associations we might need, + // based on the set of VNIs reported by this sled. + // These provide the versions we'll stick with -- in the worst + // case we push newer state to a sled with an older generation + // number, which will be fixed up on the next activation. + for set in &route_sets { + let db_vni = Vni(set.id.vni); + let maybe_vpc = vni_to_vpc.entry(set.id.vni); + let vpc = match maybe_vpc { + Entry::Occupied(_) => { + continue; + } + Entry::Vacant(v) => { + let Ok(vpc) = self + .datastore + .resolve_vni_to_vpc(opctx, db_vni) + .await + else { + error!( + log, + "failed to fetch VPC from VNI"; + "sled" => sled.serial_number(), + "vni" => ?db_vni + ); + continue; + }; + + v.insert(vpc) + } + }; + + let vpc_id = vpc.identity().id; + + let Ok(system_router) = self + .datastore + .vpc_get_system_router(opctx, vpc_id) + .await + else { + error!( + log, + "failed to fetch system router for VPC"; + "vpc" => vpc_id.to_string() + ); + continue; + }; + + let Ok(custom_routers) = self + .datastore + .vpc_get_active_custom_routers(opctx, vpc_id) + .await + else { + error!( + log, + "failed to fetch custom routers for VPC"; + "vpc" => vpc_id.to_string() + ); + continue; + }; + + db_routers.insert( + RouterId { vni: set.id.vni, kind: RouterKind::System }, + system_router, + ); + db_routers.extend(custom_routers.iter().map( + |(subnet, router)| { + ( + RouterId { + vni: set.id.vni, + kind: RouterKind::Custom( + subnet.ipv4_block.0.into(), + ), + }, + router.clone(), + ) + }, + )); + db_routers.extend(custom_routers.into_iter().map( + |(subnet, router)| { + ( + RouterId { + vni: set.id.vni, + kind: RouterKind::Custom( + subnet.ipv6_block.0.into(), + ), + }, + router, + ) + }, + )); + } + + let mut to_push = Vec::new(); + let mut set_rules = |id, version, routes| { + to_push.push(ResolvedVpcRouteSet { id, routes, version }); + }; + + // resolve into known_rules on an as-needed basis. + for set in &route_sets { + let Some(db_router) = db_routers.get(&set.id) else { + // The sled wants to know about rules for a VPC + // subnet with no custom router set. Send them + // the empty list, and unset its table version. + set_rules(set.id, None, HashSet::new()); + continue; + }; + + let router_id = db_router.id(); + let version = RouterVersion { + version: db_router.resolved_version as u64, + router_id, + }; + + // Only attempt to resolve/push a ruleset if we have a + // different router ID than the sled, or a higher version + // number. + match &set.version { + Some(v) if !v.is_replaced_by(&version) => { + continue; + } + _ => {} + } + + // We may have already resolved the rules for this + // router in a previous iteration. + if let Some(rules) = known_rules.get(&router_id) { + set_rules(set.id, Some(version), rules.clone()); + continue; + } + + match self + .datastore + .vpc_resolve_router_rules( + opctx, + db_router.identity().id, + ) + .await + { + Ok(rules) => { + let collapsed: HashSet<_> = rules + .into_iter() + .map(|(dest, target)| ResolvedVpcRoute { + dest, + target, + }) + .collect(); + set_rules(set.id, Some(version), collapsed.clone()); + known_rules.insert(router_id, collapsed); + } + Err(e) => { + error!( + &log, + "failed to compute subnet routes"; + "router" => router_id.to_string(), + "err" => e.to_string() + ); + } + } + } + + if !to_push.is_empty() { + if let Err(e) = client.set_vpc_routes(&to_push).await { + error!( + log, + "failed to push new VPC route state from sled"; + "sled" => sled.serial_number(), + "err" => ?e + ); + continue; + }; + } + } + + json!({}) + } + .boxed() + } +} diff --git a/nexus/src/app/instance.rs b/nexus/src/app/instance.rs index ee70b8bb06..6b4d87063a 100644 --- a/nexus/src/app/instance.rs +++ b/nexus/src/app/instance.rs @@ -1551,6 +1551,7 @@ impl super::Nexus { self.v2p_notification_tx.clone(), ) .await?; + self.vpc_needed_notify_sleds(); Ok(()) } diff --git a/nexus/src/app/sagas/vpc_create.rs b/nexus/src/app/sagas/vpc_create.rs index cc40a8d43a..2b6615ad40 100644 --- a/nexus/src/app/sagas/vpc_create.rs +++ b/nexus/src/app/sagas/vpc_create.rs @@ -17,6 +17,7 @@ use omicron_common::api::external::LookupType; use omicron_common::api::external::RouteDestination; use omicron_common::api::external::RouteTarget; use omicron_common::api::external::RouterRouteKind; +use oxnet::IpNet; use serde::Deserialize; use serde::Serialize; use steno::ActionError; @@ -44,9 +45,13 @@ declare_saga_actions! { + svc_create_router - svc_create_router_undo } - VPC_CREATE_ROUTE -> "route" { - + svc_create_route - - svc_create_route_undo + VPC_CREATE_V4_ROUTE -> "route4" { + + svc_create_v4_route + - svc_create_v4_route_undo + } + VPC_CREATE_V6_ROUTE -> "route6" { + + svc_create_v6_route + - svc_create_v6_route_undo } VPC_CREATE_SUBNET -> "subnet" { + svc_create_subnet @@ -79,8 +84,13 @@ pub fn create_dag( ACTION_GENERATE_ID.as_ref(), )); builder.append(Node::action( - "default_route_id", - "GenerateDefaultRouteId", + "default_v4_route_id", + "GenerateDefaultV4RouteId", + ACTION_GENERATE_ID.as_ref(), + )); + builder.append(Node::action( + "default_v6_route_id", + "GenerateDefaultV6RouteId", ACTION_GENERATE_ID.as_ref(), )); builder.append(Node::action( @@ -90,7 +100,8 @@ pub fn create_dag( )); builder.append(vpc_create_vpc_action()); builder.append(vpc_create_router_action()); - builder.append(vpc_create_route_action()); + builder.append(vpc_create_v4_route_action()); + builder.append(vpc_create_v6_route_action()); builder.append(vpc_create_subnet_action()); builder.append(vpc_update_firewall_action()); builder.append(vpc_notify_sleds_action()); @@ -217,8 +228,45 @@ async fn svc_create_router_undo( Ok(()) } +async fn svc_create_v4_route( + sagactx: NexusActionContext, +) -> Result<(), ActionError> { + let default_route_id = sagactx.lookup::("default_v4_route_id")?; + let default_route = + "0.0.0.0/0".parse().expect("known-valid specifier for a default route"); + svc_create_route(sagactx, default_route_id, default_route, "default-v4") + .await +} + +async fn svc_create_v4_route_undo( + sagactx: NexusActionContext, +) -> Result<(), anyhow::Error> { + let route_id = sagactx.lookup::("default_v4_route_id")?; + svc_create_route_undo(sagactx, route_id).await +} + +async fn svc_create_v6_route( + sagactx: NexusActionContext, +) -> Result<(), ActionError> { + let default_route_id = sagactx.lookup::("default_v6_route_id")?; + let default_route = + "::/0".parse().expect("known-valid specifier for a default route"); + svc_create_route(sagactx, default_route_id, default_route, "default-v6") + .await +} + +async fn svc_create_v6_route_undo( + sagactx: NexusActionContext, +) -> Result<(), anyhow::Error> { + let route_id = sagactx.lookup::("default_v6_route_id")?; + svc_create_route_undo(sagactx, route_id).await +} + async fn svc_create_route( sagactx: NexusActionContext, + route_id: Uuid, + default_net: IpNet, + name: &str, ) -> Result<(), ActionError> { let osagactx = sagactx.user_data(); let params = sagactx.saga_params::()?; @@ -226,23 +274,20 @@ async fn svc_create_route( &sagactx, ¶ms.serialized_authn, ); - let default_route_id = sagactx.lookup::("default_route_id")?; let system_router_id = sagactx.lookup::("system_router_id")?; let authz_router = sagactx.lookup::("router")?; let route = db::model::RouterRoute::new( - default_route_id, + route_id, system_router_id, RouterRouteKind::Default, params::RouterRouteCreate { identity: IdentityMetadataCreateParams { - name: "default".parse().unwrap(), + name: name.parse().unwrap(), description: "The default route of a vpc".to_string(), }, target: RouteTarget::InternetGateway("outbound".parse().unwrap()), - destination: RouteDestination::Vpc( - params.vpc_create.identity.name.clone(), - ), + destination: RouteDestination::IpNet(default_net), }, ); @@ -256,6 +301,7 @@ async fn svc_create_route( async fn svc_create_route_undo( sagactx: NexusActionContext, + route_id: Uuid, ) -> Result<(), anyhow::Error> { let osagactx = sagactx.user_data(); let params = sagactx.saga_params::()?; @@ -264,7 +310,6 @@ async fn svc_create_route_undo( ¶ms.serialized_authn, ); let authz_router = sagactx.lookup::("router")?; - let route_id = sagactx.lookup::("default_route_id")?; let authz_route = authz::RouterRoute::new( authz_router, route_id, @@ -538,12 +583,25 @@ pub(crate) mod test { .await .expect("Failed to delete default Subnet"); - // Default route + // Default gateway routes + let (.., authz_route, _route) = LookupPath::new(&opctx, &datastore) + .project_id(project_id) + .vpc_name(&default_name.clone().into()) + .vpc_router_name(&system_name.clone().into()) + .router_route_name(&"default-v4".parse::().unwrap().into()) + .fetch() + .await + .expect("Failed to fetch default route"); + datastore + .router_delete_route(&opctx, &authz_route) + .await + .expect("Failed to delete default route"); + let (.., authz_route, _route) = LookupPath::new(&opctx, &datastore) .project_id(project_id) .vpc_name(&default_name.clone().into()) .vpc_router_name(&system_name.clone().into()) - .router_route_name(&default_name.clone().into()) + .router_route_name(&"default-v6".parse::().unwrap().into()) .fetch() .await .expect("Failed to fetch default route"); diff --git a/nexus/src/app/vpc_router.rs b/nexus/src/app/vpc_router.rs index 523a450bbd..40b4c1de0f 100644 --- a/nexus/src/app/vpc_router.rs +++ b/nexus/src/app/vpc_router.rs @@ -83,6 +83,10 @@ impl super::Nexus { .db_datastore .vpc_create_router(&opctx, &authz_vpc, router) .await?; + + // Note: we don't trigger the route RPW here as it's impossible + // for the router to be bound to a subnet at this point. + Ok(router) } @@ -114,9 +118,6 @@ impl super::Nexus { .await } - // TODO: When a router is deleted all its routes should be deleted - // TODO: When a router is deleted it should be unassociated w/ any subnets it may be associated with - // or trigger an error pub(crate) async fn vpc_delete_router( &self, opctx: &OpContext, @@ -131,7 +132,12 @@ impl super::Nexus { if db_router.kind == VpcRouterKind::System { return Err(Error::invalid_request("Cannot delete system router")); } - self.db_datastore.vpc_delete_router(opctx, &authz_router).await + let out = + self.db_datastore.vpc_delete_router(opctx, &authz_router).await?; + + self.vpc_needed_notify_sleds(); + + Ok(out) } // Routes @@ -198,6 +204,9 @@ impl super::Nexus { .db_datastore .router_create_route(&opctx, &authz_router, route) .await?; + + self.vpc_router_increment_rpw_version(opctx, &authz_router).await?; + Ok(route) } @@ -220,7 +229,7 @@ impl super::Nexus { route_lookup: &lookup::RouterRoute<'_>, params: ¶ms::RouterRouteUpdate, ) -> UpdateResult { - let (.., vpc, _, authz_route, db_route) = + let (.., vpc, authz_router, authz_route, db_route) = route_lookup.fetch_for(authz::Action::Modify).await?; // TODO: Write a test for this once there's a way to test it (i.e. // subnets automatically register to the system router table) @@ -235,9 +244,14 @@ impl super::Nexus { ))); } } - self.db_datastore + let out = self + .db_datastore .router_update_route(&opctx, &authz_route, params.clone().into()) - .await + .await?; + + self.vpc_router_increment_rpw_version(opctx, &authz_router).await?; + + Ok(out) } pub(crate) async fn router_delete_route( @@ -245,7 +259,7 @@ impl super::Nexus { opctx: &OpContext, route_lookup: &lookup::RouterRoute<'_>, ) -> DeleteResult { - let (.., authz_route, db_route) = + let (.., authz_router, authz_route, db_route) = route_lookup.fetch_for(authz::Action::Delete).await?; // Only custom routes can be deleted @@ -255,6 +269,37 @@ impl super::Nexus { "DELETE not allowed on system routes", )); } - self.db_datastore.router_delete_route(opctx, &authz_route).await + let out = + self.db_datastore.router_delete_route(opctx, &authz_route).await?; + + self.vpc_router_increment_rpw_version(opctx, &authz_router).await?; + + Ok(out) + } + + /// Trigger the VPC routing RPW in repsonse to a state change + /// or a new possible listener (e.g., instance/probe start, NIC + /// create). + pub(crate) fn vpc_needed_notify_sleds(&self) { + self.background_tasks + .activate(&self.background_tasks.task_vpc_route_manager) + } + + /// Trigger an RPW version bump on a single VPC router in response + /// to CRUD operations on individual routes. + /// + /// This will also awaken the VPC Router RPW. + pub(crate) async fn vpc_router_increment_rpw_version( + &self, + opctx: &OpContext, + authz_router: &authz::VpcRouter, + ) -> UpdateResult<()> { + self.datastore() + .vpc_router_increment_rpw_version(opctx, authz_router.id()) + .await?; + + self.vpc_needed_notify_sleds(); + + Ok(()) } } diff --git a/nexus/src/app/vpc_subnet.rs b/nexus/src/app/vpc_subnet.rs index f081f351db..478e1af9f9 100644 --- a/nexus/src/app/vpc_subnet.rs +++ b/nexus/src/app/vpc_subnet.rs @@ -64,8 +64,7 @@ impl super::Nexus { )), } } - // TODO: When a subnet is created it should add a route entry into the VPC's - // system router + pub(crate) async fn vpc_create_subnet( &self, opctx: &OpContext, @@ -109,7 +108,7 @@ impl super::Nexus { // See for // details. let subnet_id = Uuid::new_v4(); - match params.ipv6_block { + let out = match params.ipv6_block { None => { const NUM_RETRIES: usize = 2; let mut retry = 0; @@ -213,7 +212,11 @@ impl super::Nexus { .map(|(.., subnet)| subnet) .map_err(SubnetError::into_external) } - } + }?; + + self.vpc_needed_notify_sleds(); + + Ok(out) } pub(crate) async fn vpc_subnet_list( @@ -235,13 +238,16 @@ impl super::Nexus { ) -> UpdateResult { let (.., authz_subnet) = vpc_subnet_lookup.lookup_for(authz::Action::Modify).await?; - self.db_datastore + let out = self + .db_datastore .vpc_update_subnet(&opctx, &authz_subnet, params.clone().into()) - .await + .await?; + + self.vpc_needed_notify_sleds(); + + Ok(out) } - // TODO: When a subnet is deleted it should remove its entry from the VPC's - // system router. pub(crate) async fn vpc_delete_subnet( &self, opctx: &OpContext, @@ -249,9 +255,14 @@ impl super::Nexus { ) -> DeleteResult { let (.., authz_subnet, db_subnet) = vpc_subnet_lookup.fetch_for(authz::Action::Delete).await?; - self.db_datastore + let out = self + .db_datastore .vpc_delete_subnet(opctx, &db_subnet, &authz_subnet) - .await + .await?; + + self.vpc_needed_notify_sleds(); + + Ok(out) } pub(crate) async fn subnet_list_instance_network_interfaces( diff --git a/nexus/src/external_api/http_entrypoints.rs b/nexus/src/external_api/http_entrypoints.rs index 350836441e..e814df2b61 100644 --- a/nexus/src/external_api/http_entrypoints.rs +++ b/nexus/src/external_api/http_entrypoints.rs @@ -5704,7 +5704,7 @@ async fn vpc_router_route_view( .await } -/// Create router +/// Create route #[endpoint { method = POST, path = "/v1/vpc-router-routes", diff --git a/nexus/tests/integration_tests/instances.rs b/nexus/tests/integration_tests/instances.rs index 4f7a1d1b77..75ddf847bf 100644 --- a/nexus/tests/integration_tests/instances.rs +++ b/nexus/tests/integration_tests/instances.rs @@ -18,6 +18,7 @@ use nexus_db_queries::context::OpContext; use nexus_db_queries::db::fixed_data::silo::DEFAULT_SILO; use nexus_db_queries::db::fixed_data::silo::DEFAULT_SILO_ID; use nexus_db_queries::db::lookup::LookupPath; +use nexus_db_queries::db::DataStore; use nexus_test_interface::NexusServer; use nexus_test_utils::http_testing::AuthnMode; use nexus_test_utils::http_testing::NexusRequest; @@ -59,6 +60,9 @@ use omicron_common::api::external::InstanceState; use omicron_common::api::external::Name; use omicron_common::api::external::NameOrId; use omicron_common::api::external::Vni; +use omicron_common::api::internal::shared::ResolvedVpcRoute; +use omicron_common::api::internal::shared::RouterId; +use omicron_common::api::internal::shared::RouterKind; use omicron_nexus::app::MAX_MEMORY_BYTES_PER_INSTANCE; use omicron_nexus::app::MAX_VCPU_PER_INSTANCE; use omicron_nexus::app::MIN_MEMORY_BYTES_PER_INSTANCE; @@ -70,6 +74,7 @@ use omicron_uuid_kinds::PropolisUuid; use omicron_uuid_kinds::SledUuid; use omicron_uuid_kinds::{GenericUuid, InstanceUuid}; use sled_agent_client::TestInterfaces as _; +use std::collections::HashSet; use std::convert::TryFrom; use std::net::Ipv4Addr; use std::sync::Arc; @@ -672,6 +677,30 @@ async fn test_instance_start_creates_networking_state( for agent in &sled_agents { assert_sled_v2p_mappings(agent, &nics[0], guest_nics[0].vni).await; } + + // Ensure that the target sled agent for our instance has received + // up-to-date VPC routes. + let with_vmm = datastore + .instance_fetch_with_vmm(&opctx, &authz_instance) + .await + .unwrap(); + + let mut checked = false; + for agent in &sled_agents { + if Some(agent.id) == with_vmm.sled_id().map(SledUuid::into_untyped_uuid) + { + assert_sled_vpc_routes( + agent, + &opctx, + datastore, + nics[0].subnet_id, + guest_nics[0].vni, + ) + .await; + checked = true; + } + } + assert!(checked); } #[nexus_test] @@ -836,7 +865,9 @@ async fn test_instance_migrate(cptestctx: &ControlPlaneTestContext) { } #[nexus_test] -async fn test_instance_migrate_v2p(cptestctx: &ControlPlaneTestContext) { +async fn test_instance_migrate_v2p_and_routes( + cptestctx: &ControlPlaneTestContext, +) { let client = &cptestctx.external_client; let apictx = &cptestctx.server.server_context(); let nexus = &apictx.nexus; @@ -965,6 +996,15 @@ async fn test_instance_migrate_v2p(cptestctx: &ControlPlaneTestContext) { if sled_agent.id != dst_sled_id.into_untyped_uuid() { assert_sled_v2p_mappings(sled_agent, &nics[0], guest_nics[0].vni) .await; + } else { + assert_sled_vpc_routes( + sled_agent, + &opctx, + datastore, + nics[0].subnet_id, + guest_nics[0].vni, + ) + .await; } } } @@ -4764,6 +4804,78 @@ async fn assert_sled_v2p_mappings( .expect("matching v2p mapping should be present"); } +/// Asserts that supplied sled agent's most recent VPC route sets +/// contain up-to-date routes for a known subnet. +pub async fn assert_sled_vpc_routes( + sled_agent: &Arc, + opctx: &OpContext, + datastore: &DataStore, + subnet_id: Uuid, + vni: Vni, +) { + let (.., authz_vpc, _, db_subnet) = LookupPath::new(opctx, datastore) + .vpc_subnet_id(subnet_id) + .fetch() + .await + .unwrap(); + + let custom_routes: HashSet<_> = + if let Some(router_id) = db_subnet.custom_router_id { + datastore + .vpc_resolve_router_rules(opctx, router_id) + .await + .unwrap() + .into_iter() + .map(|(dest, target)| ResolvedVpcRoute { dest, target }) + .collect() + } else { + Default::default() + }; + + let (.., vpc) = LookupPath::new(opctx, datastore) + .vpc_id(authz_vpc.id()) + .fetch() + .await + .unwrap(); + + let system_routes: HashSet<_> = datastore + .vpc_resolve_router_rules(opctx, vpc.system_router_id) + .await + .unwrap() + .into_iter() + .map(|(dest, target)| ResolvedVpcRoute { dest, target }) + .collect(); + + assert!(!system_routes.is_empty()); + + let condition = || async { + let vpc_routes = sled_agent.vpc_routes.lock().await; + let sys_routes_found = vpc_routes.iter().any(|(id, set)| { + *id == RouterId { vni, kind: RouterKind::System } + && set.routes == system_routes + }); + let custom_routes_found = vpc_routes.iter().any(|(id, set)| { + *id == RouterId { + vni, + kind: RouterKind::Custom(db_subnet.ipv4_block.0.into()), + } && set.routes == custom_routes + }); + + if sys_routes_found && custom_routes_found { + Ok(()) + } else { + Err(CondCheckError::NotYet::<()>) + } + }; + wait_for_condition( + condition, + &Duration::from_secs(1), + &Duration::from_secs(30), + ) + .await + .expect("matching vpc routes should be present"); +} + /// Simulate completion of an ongoing instance state transition. To do this, we /// have to look up the instance, then get the sled agent associated with that /// instance, and then tell it to finish simulating whatever async transition is diff --git a/nexus/tests/integration_tests/router_routes.rs b/nexus/tests/integration_tests/router_routes.rs index 10c594bba9..79a5db8eaf 100644 --- a/nexus/tests/integration_tests/router_routes.rs +++ b/nexus/tests/integration_tests/router_routes.rs @@ -10,10 +10,12 @@ use nexus_test_utils::identity_eq; use nexus_test_utils::resource_helpers::objects_list_page_authz; use nexus_test_utils_macros::nexus_test; use nexus_types::external_api::params; +use omicron_common::api::external::SimpleIdentity; use omicron_common::api::external::{ IdentityMetadataCreateParams, IdentityMetadataUpdateParams, RouteDestination, RouteTarget, RouterRoute, RouterRouteKind, }; +use oxnet::IpNet; use std::net::IpAddr; use std::net::Ipv4Addr; @@ -59,27 +61,48 @@ async fn test_router_routes(cptestctx: &ControlPlaneTestContext) { .await .items; - // The system should start with a single, pre-configured route - assert_eq!(system_router_routes.len(), 1); - - // That route should be the default route - let default_route = &system_router_routes[0]; - assert_eq!(default_route.kind, RouterRouteKind::Default); - - // It errors if you try to delete the default route - let error: dropshot::HttpErrorResponseBody = NexusRequest::expect_failure( - client, - StatusCode::BAD_REQUEST, - Method::DELETE, - get_route_url("system", "default").as_str(), - ) - .authn_as(AuthnMode::PrivilegedUser) - .execute() - .await - .unwrap() - .parsed_body() - .unwrap(); - assert_eq!(error.message, "DELETE not allowed on system routes"); + // The system should start with three preconfigured routes: + // - a default v4 gateway route + // - a default v6 gateway route + // - a managed subnet route for the 'default' subnet + assert_eq!(system_router_routes.len(), 3); + + let mut v4_route = None; + let mut v6_route = None; + let mut subnet_route = None; + for route in system_router_routes { + match (&route.kind, &route.destination, &route.target) { + (RouterRouteKind::Default, RouteDestination::IpNet(IpNet::V4(_)), RouteTarget::InternetGateway(_)) => {v4_route = Some(route);}, + (RouterRouteKind::Default, RouteDestination::IpNet(IpNet::V6(_)), RouteTarget::InternetGateway(_)) => {v6_route = Some(route);}, + (RouterRouteKind::VpcSubnet, RouteDestination::Subnet(n0), RouteTarget::Subnet(n1)) if n0 == n1 && n0.as_str() == "default" => {subnet_route = Some(route);}, + _ => panic!("unexpected system route {route:?} -- wanted gateway and subnet"), + } + } + + let v4_route = + v4_route.expect("no v4 gateway route found in system router"); + let v6_route = + v6_route.expect("no v6 gateway route found in system router"); + let subnet_route = + subnet_route.expect("no default subnet route found in system router"); + + // Deleting any default system route is disallowed. + for route in &[&v4_route, &v6_route, &subnet_route] { + let error: dropshot::HttpErrorResponseBody = + NexusRequest::expect_failure( + client, + StatusCode::BAD_REQUEST, + Method::DELETE, + get_route_url("system", route.name().as_str()).as_str(), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap() + .parsed_body() + .unwrap(); + assert_eq!(error.message, "DELETE not allowed on system routes"); + } // Create a custom router create_router(&client, project_name, vpc_name, router_name).await; diff --git a/nexus/types/src/external_api/views.rs b/nexus/types/src/external_api/views.rs index 394bef5d2f..8e2ee39c21 100644 --- a/nexus/types/src/external_api/views.rs +++ b/nexus/types/src/external_api/views.rs @@ -262,7 +262,7 @@ pub struct Vpc { } /// A VPC subnet represents a logical grouping for instances that allows network traffic between -/// them, within a IPv4 subnetwork or optionall an IPv6 subnetwork. +/// them, within a IPv4 subnetwork or optionally an IPv6 subnetwork. #[derive(ObjectIdentity, Clone, Debug, Deserialize, Serialize, JsonSchema)] pub struct VpcSubnet { /// common identifying metadata @@ -277,6 +277,9 @@ pub struct VpcSubnet { /// The IPv6 subnet CIDR block. pub ipv6_block: Ipv6Net, + + /// ID for an attached custom router. + pub custom_router_id: Option, } #[derive(Clone, Copy, Debug, Deserialize, Serialize, PartialEq, JsonSchema)] diff --git a/openapi/nexus.json b/openapi/nexus.json index a985a3e42c..f4cd1ce89e 100644 --- a/openapi/nexus.json +++ b/openapi/nexus.json @@ -19760,9 +19760,15 @@ ] }, "VpcSubnet": { - "description": "A VPC subnet represents a logical grouping for instances that allows network traffic between them, within a IPv4 subnetwork or optionall an IPv6 subnetwork.", + "description": "A VPC subnet represents a logical grouping for instances that allows network traffic between them, within a IPv4 subnetwork or optionally an IPv6 subnetwork.", "type": "object", "properties": { + "custom_router_id": { + "nullable": true, + "description": "ID for an attached custom router.", + "type": "string", + "format": "uuid" + }, "description": { "description": "human-readable free-form text about a resource", "type": "string" diff --git a/openapi/sled-agent.json b/openapi/sled-agent.json index 05081d8298..3ac130c565 100644 --- a/openapi/sled-agent.json +++ b/openapi/sled-agent.json @@ -950,6 +950,63 @@ } } }, + "/vpc-routes": { + "get": { + "summary": "Get the current versions of VPC routing rules.", + "operationId": "list_vpc_routes", + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "title": "Array_of_ResolvedVpcRouteState", + "type": "array", + "items": { + "$ref": "#/components/schemas/ResolvedVpcRouteState" + } + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + }, + "put": { + "summary": "Update VPC routing rules.", + "operationId": "set_vpc_routes", + "requestBody": { + "content": { + "application/json": { + "schema": { + "title": "Array_of_ResolvedVpcRouteSet", + "type": "array", + "items": { + "$ref": "#/components/schemas/ResolvedVpcRouteSet" + } + } + } + }, + "required": true + }, + "responses": { + "204": { + "description": "resource updated" + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, "/zones": { "get": { "summary": "List the zones that are currently managed by the sled agent.", @@ -4236,6 +4293,70 @@ "rack_subnet" ] }, + "ResolvedVpcRoute": { + "description": "A VPC route resolved into a concrete target.", + "type": "object", + "properties": { + "dest": { + "$ref": "#/components/schemas/IpNet" + }, + "target": { + "$ref": "#/components/schemas/RouterTarget" + } + }, + "required": [ + "dest", + "target" + ] + }, + "ResolvedVpcRouteSet": { + "description": "An updated set of routes for a given VPC and/or subnet.", + "type": "object", + "properties": { + "id": { + "$ref": "#/components/schemas/RouterId" + }, + "routes": { + "type": "array", + "items": { + "$ref": "#/components/schemas/ResolvedVpcRoute" + }, + "uniqueItems": true + }, + "version": { + "nullable": true, + "allOf": [ + { + "$ref": "#/components/schemas/RouterVersion" + } + ] + } + }, + "required": [ + "id", + "routes" + ] + }, + "ResolvedVpcRouteState": { + "description": "Version information for routes on a given VPC subnet.", + "type": "object", + "properties": { + "id": { + "$ref": "#/components/schemas/RouterId" + }, + "version": { + "nullable": true, + "allOf": [ + { + "$ref": "#/components/schemas/RouterVersion" + } + ] + } + }, + "required": [ + "id" + ] + }, "RouteConfig": { "type": "object", "properties": { @@ -4266,6 +4387,148 @@ "nexthop" ] }, + "RouterId": { + "description": "Identifier for a VPC and/or subnet.", + "type": "object", + "properties": { + "kind": { + "$ref": "#/components/schemas/RouterKind" + }, + "vni": { + "$ref": "#/components/schemas/Vni" + } + }, + "required": [ + "kind", + "vni" + ] + }, + "RouterKind": { + "description": "The scope of a set of VPC router rules.", + "oneOf": [ + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "system" + ] + } + }, + "required": [ + "type" + ] + }, + { + "type": "object", + "properties": { + "subnet": { + "$ref": "#/components/schemas/IpNet" + }, + "type": { + "type": "string", + "enum": [ + "custom" + ] + } + }, + "required": [ + "subnet", + "type" + ] + } + ] + }, + "RouterTarget": { + "description": "The target for a given router entry.", + "oneOf": [ + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "drop" + ] + } + }, + "required": [ + "type" + ] + }, + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "internet_gateway" + ] + } + }, + "required": [ + "type" + ] + }, + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "ip" + ] + }, + "value": { + "type": "string", + "format": "ip" + } + }, + "required": [ + "type", + "value" + ] + }, + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "vpc_subnet" + ] + }, + "value": { + "$ref": "#/components/schemas/IpNet" + } + }, + "required": [ + "type", + "value" + ] + } + ] + }, + "RouterVersion": { + "description": "Information on the current parent router (and version) of a route set according to the control plane.", + "type": "object", + "properties": { + "router_id": { + "type": "string", + "format": "uuid" + }, + "version": { + "type": "integer", + "format": "uint64", + "minimum": 0 + } + }, + "required": [ + "router_id", + "version" + ] + }, "SemverVersion": { "type": "string", "pattern": "^(0|[1-9]\\d*)\\.(0|[1-9]\\d*)\\.(0|[1-9]\\d*)(?:-((?:0|[1-9]\\d*|\\d*[a-zA-Z-][0-9a-zA-Z-]*)(?:\\.(?:0|[1-9]\\d*|\\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?(?:\\+([0-9a-zA-Z-]+(?:\\.[0-9a-zA-Z-]+)*))?$" diff --git a/package-manifest.toml b/package-manifest.toml index d653f9d999..027ca52b48 100644 --- a/package-manifest.toml +++ b/package-manifest.toml @@ -563,10 +563,10 @@ source.repo = "maghemite" # `tools/maghemite_openapi_version`. Failing to do so will cause a failure when # building `ddm-admin-client` (which will instruct you to update # `tools/maghemite_openapi_version`). -source.commit = "c67f6ab49e0e8a49bcf84542500fceb6b9417ca4" +source.commit = "3c3fa8482fe09a01da62fbd35efe124ea9cac9e7" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/maghemite/image//mg-ddm-gz.sha256.txt -source.sha256 = "33e3b09408551be860debac08de50a840909d4e6c6bed9aecaef63fe8bef2d69" +source.sha256 = "63b6c74584e32f52893730e3a567da29c7f93934c38882614aad59034bdd980d" output.type = "tarball" [package.mg-ddm] @@ -579,10 +579,10 @@ source.repo = "maghemite" # `tools/maghemite_openapi_version`. Failing to do so will cause a failure when # building `ddm-admin-client` (which will instruct you to update # `tools/maghemite_openapi_version`). -source.commit = "c67f6ab49e0e8a49bcf84542500fceb6b9417ca4" +source.commit = "3c3fa8482fe09a01da62fbd35efe124ea9cac9e7" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/maghemite/image//mg-ddm.sha256.txt -source.sha256 = "81674afa17873f84bb49a800c8511938d1c2e871026cbb17e5eed2b645b1eb55" +source.sha256 = "b9908b81fee00d71b750f5b9a0f866c807adb0f924ab635295d28753538836f5" output.type = "zone" output.intermediate_only = true @@ -594,10 +594,10 @@ source.repo = "maghemite" # `tools/maghemite_openapi_version`. Failing to do so will cause a failure when # building `ddm-admin-client` (which will instruct you to update # `tools/maghemite_openapi_version`). -source.commit = "c67f6ab49e0e8a49bcf84542500fceb6b9417ca4" +source.commit = "3c3fa8482fe09a01da62fbd35efe124ea9cac9e7" # The SHA256 digest is automatically posted to: -# https://buildomat.eng.oxide.computer/public/file/oxidecomputer/maghemite/image//mg-ddm.sha256.txt -source.sha256 = "5e8bdd6774ef6041189621306577d0e0d174d596d216e53740ce6f035316c5af" +# https://buildomat.eng.oxide.computer/public/file/oxidecomputer/maghemite/image//mgd.sha256.txt +source.sha256 = "51f446933f0d8c426b15ea0845b66664da9b9a129893d12b25d7912b52f07362" output.type = "zone" output.intermediate_only = true diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index 905fd111c1..b6102c3a64 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -1406,7 +1406,9 @@ CREATE TABLE IF NOT EXISTS omicron.public.vpc_subnet ( /* Child resource creation generation number */ rcgen INT8 NOT NULL, ipv4_block INET NOT NULL, - ipv6_block INET NOT NULL + ipv6_block INET NOT NULL, + /* nullable FK to the `vpc_router` table. */ + custom_router_id UUID ); /* Subnet and network interface names are unique per VPC, not project */ @@ -1636,7 +1638,13 @@ CREATE TABLE IF NOT EXISTS omicron.public.vpc_router ( time_deleted TIMESTAMPTZ, kind omicron.public.vpc_router_kind NOT NULL, vpc_id UUID NOT NULL, - rcgen INT NOT NULL + rcgen INT NOT NULL, + /* + * version information used to trigger VPC router RPW. + * this is sensitive to CRUD on named resources beyond + * routers e.g. instances, subnets, ... + */ + resolved_version INT NOT NULL DEFAULT 0 ); CREATE UNIQUE INDEX IF NOT EXISTS lookup_router_by_vpc ON omicron.public.vpc_router ( @@ -1662,6 +1670,7 @@ CREATE TABLE IF NOT EXISTS omicron.public.router_route ( /* Indicates that the object has been deleted */ time_deleted TIMESTAMPTZ, + /* FK to the `vpc_router` table. */ vpc_router_id UUID NOT NULL, kind omicron.public.router_route_kind NOT NULL, target STRING(128) NOT NULL, @@ -4098,7 +4107,7 @@ INSERT INTO omicron.public.db_metadata ( version, target_version ) VALUES - (TRUE, NOW(), NOW(), '77.0.0', NULL) + (TRUE, NOW(), NOW(), '78.0.0', NULL) ON CONFLICT DO NOTHING; COMMIT; diff --git a/schema/crdb/vpc-subnet-routing/up01.sql b/schema/crdb/vpc-subnet-routing/up01.sql new file mode 100644 index 0000000000..d1869dd010 --- /dev/null +++ b/schema/crdb/vpc-subnet-routing/up01.sql @@ -0,0 +1,3 @@ +-- Each subnet may have a custom router attached. +ALTER TABLE omicron.public.vpc_subnet +ADD COLUMN IF NOT EXISTS custom_router_id UUID; diff --git a/schema/crdb/vpc-subnet-routing/up02.sql b/schema/crdb/vpc-subnet-routing/up02.sql new file mode 100644 index 0000000000..77e72961a3 --- /dev/null +++ b/schema/crdb/vpc-subnet-routing/up02.sql @@ -0,0 +1,7 @@ +/* + * version information used to trigger VPC router RPW. + * this is sensitive to CRUD on named resources beyond + * routers e.g. instances, subnets, ... + */ +ALTER TABLE omicron.public.vpc_router +ADD COLUMN IF NOT EXISTS resolved_version INT NOT NULL DEFAULT 0; diff --git a/schema/crdb/vpc-subnet-routing/up03.sql b/schema/crdb/vpc-subnet-routing/up03.sql new file mode 100644 index 0000000000..fb4fd2324a --- /dev/null +++ b/schema/crdb/vpc-subnet-routing/up03.sql @@ -0,0 +1,96 @@ +set local disallow_full_table_scans = off; + +-- We need to manually rebuild a compliant set of routes. +-- Remove everything that exists today. +DELETE FROM omicron.public.router_route WHERE 1=1; + +-- Insert gateway routes for all VPCs. +INSERT INTO omicron.public.router_route + ( + id, name, + description, + time_created, time_modified, + vpc_router_id, kind, + target, destination + ) +SELECT + gen_random_uuid(), 'default-v4', + 'The default route of a vpc', + now(), now(), + omicron.public.vpc_router.id, 'default', + 'inetgw:outbound', 'ipnet:0.0.0.0/0' +FROM + omicron.public.vpc_router +ON CONFLICT DO NOTHING; + +INSERT INTO omicron.public.router_route + ( + id, name, + description, + time_created, time_modified, + vpc_router_id, kind, + target, destination + ) +SELECT + gen_random_uuid(), 'default-v6', + 'The default route of a vpc', + now(), now(), + omicron.public.vpc_router.id, 'default', + 'inetgw:outbound', 'ipnet:::/0' +FROM + omicron.public.vpc_router +ON CONFLICT DO NOTHING; + +-- Insert subnet routes for every defined VPC subnet. +INSERT INTO omicron.public.router_route + ( + id, name, + description, + time_created, time_modified, + vpc_router_id, kind, + target, destination + ) +SELECT + gen_random_uuid(), vpc_subnet.name, + 'VPC Subnet route for ''' || vpc_subnet.name || '''', + now(), now(), + omicron.public.vpc_router.id, 'vpc_subnet', + 'subnet:' || vpc_subnet.name, 'subnet:' || vpc_subnet.name +FROM + (omicron.public.vpc_subnet JOIN omicron.public.vpc + ON vpc_subnet.vpc_id = vpc.id) JOIN omicron.public.vpc_router + ON vpc_router.vpc_id = vpc.id +ON CONFLICT DO NOTHING; + +-- Replace IDs of fixed_data routes for the services VPC. +-- This is done instead of an insert to match the initial +-- empty state of dbinit.sql. +WITH known_ids (new_id, new_name, new_description) AS ( + VALUES + ( + '001de000-074c-4000-8000-000000000002', 'default-v4', + 'Default internet gateway route for Oxide Services' + ), + ( + '001de000-074c-4000-8000-000000000003', 'default-v6', + 'Default internet gateway route for Oxide Services' + ), + ( + '001de000-c470-4000-8000-000000000004', 'external-dns', + 'Built-in VPC Subnet for Oxide service (external-dns)' + ), + ( + '001de000-c470-4000-8000-000000000005', 'nexus', + 'Built-in VPC Subnet for Oxide service (nexus)' + ), + ( + '001de000-c470-4000-8000-000000000006', 'boundary-ntp', + 'Built-in VPC Subnet for Oxide service (boundary-ntp)' + ) +) +UPDATE omicron.public.router_route +SET + id = CAST(new_id AS UUID), + description = new_description +FROM known_ids +WHERE vpc_router_id = '001de000-074c-4000-8000-000000000001' AND new_name = router_route.name; diff --git a/sled-agent/src/http_entrypoints.rs b/sled-agent/src/http_entrypoints.rs index 6defd18a95..2d41e2860a 100644 --- a/sled-agent/src/http_entrypoints.rs +++ b/sled-agent/src/http_entrypoints.rs @@ -31,7 +31,9 @@ use omicron_common::api::external::Error; use omicron_common::api::internal::nexus::{ DiskRuntimeState, SledInstanceState, UpdateArtifactId, }; -use omicron_common::api::internal::shared::SwitchPorts; +use omicron_common::api::internal::shared::{ + ResolvedVpcRouteSet, ResolvedVpcRouteState, SwitchPorts, +}; use omicron_uuid_kinds::{GenericUuid, InstanceUuid}; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; @@ -86,6 +88,8 @@ pub fn api() -> SledApiDescription { api.register(host_os_write_status_delete)?; api.register(inventory)?; api.register(bootstore_status)?; + api.register(list_vpc_routes)?; + api.register(set_vpc_routes)?; Ok(()) } @@ -1025,3 +1029,29 @@ async fn bootstore_status( .into(); Ok(HttpResponseOk(status)) } + +/// Get the current versions of VPC routing rules. +#[endpoint { + method = GET, + path = "/vpc-routes", +}] +async fn list_vpc_routes( + request_context: RequestContext, +) -> Result>, HttpError> { + let sa = request_context.context(); + Ok(HttpResponseOk(sa.list_vpc_routes())) +} + +/// Update VPC routing rules. +#[endpoint { + method = PUT, + path = "/vpc-routes", +}] +async fn set_vpc_routes( + request_context: RequestContext, + body: TypedBody>, +) -> Result { + let sa = request_context.context(); + sa.set_vpc_routes(body.into_inner())?; + Ok(HttpResponseUpdatedNoContent()) +} diff --git a/sled-agent/src/instance.rs b/sled-agent/src/instance.rs index 04b68ef752..ec4d503e7b 100644 --- a/sled-agent/src/instance.rs +++ b/sled-agent/src/instance.rs @@ -27,7 +27,7 @@ use backoff::BackoffError; use chrono::Utc; use illumos_utils::dladm::Etherstub; use illumos_utils::link::VnicAllocator; -use illumos_utils::opte::{DhcpCfg, PortManager}; +use illumos_utils::opte::{DhcpCfg, PortCreateParams, PortManager}; use illumos_utils::running_zone::{RunningZone, ZoneBuilderFactory}; use illumos_utils::svc::wait_for_service; use illumos_utils::zone::PROPOLIS_ZONE_PREFIX; @@ -1327,14 +1327,15 @@ impl InstanceRunner { } else { (None, None, &[][..]) }; - let port = self.port_manager.create_port( + let port = self.port_manager.create_port(PortCreateParams { nic, - snat, + source_nat: snat, ephemeral_ip, floating_ips, - &self.firewall_rules, - self.dhcp_config.clone(), - )?; + firewall_rules: &self.firewall_rules, + dhcp_config: self.dhcp_config.clone(), + is_service: false, + })?; opte_ports.push(port); } diff --git a/sled-agent/src/probe_manager.rs b/sled-agent/src/probe_manager.rs index 16559039a2..40af604645 100644 --- a/sled-agent/src/probe_manager.rs +++ b/sled-agent/src/probe_manager.rs @@ -3,10 +3,12 @@ use anyhow::{anyhow, Result}; use illumos_utils::dladm::Etherstub; use illumos_utils::link::VnicAllocator; use illumos_utils::opte::params::VpcFirewallRule; -use illumos_utils::opte::{DhcpCfg, PortManager}; +use illumos_utils::opte::{DhcpCfg, PortCreateParams, PortManager}; use illumos_utils::running_zone::{RunningZone, ZoneBuilderFactory}; use illumos_utils::zone::Zones; -use nexus_client::types::{ProbeExternalIp, ProbeInfo}; +use nexus_client::types::{ + BackgroundTasksActivateRequest, ProbeExternalIp, ProbeInfo, +}; use omicron_common::api::external::{ VpcFirewallRuleAction, VpcFirewallRuleDirection, VpcFirewallRulePriority, VpcFirewallRuleStatus, @@ -179,24 +181,44 @@ impl ProbeManagerInner { } }; - self.add(target.difference(¤t)).await; + let n_added = self.add(target.difference(¤t)).await; self.remove(current.difference(&target)).await; self.check(current.intersection(&target)).await; + + // If we have created some new probes, we may need the control plane + // to provide us with valid routes for the VPC the probe belongs to. + if n_added > 0 { + if let Err(e) = self + .nexus_client + .client() + .bgtask_activate(&BackgroundTasksActivateRequest { + bgtask_names: vec!["vpc_route_manager".into()], + }) + .await + { + error!(self.log, "get routes for probe: {e}"); + } + } } }) } /// Add a set of probes to this sled. - async fn add<'a, I>(self: &Arc, probes: I) + /// + /// Returns the number of inserted probes. + async fn add<'a, I>(self: &Arc, probes: I) -> usize where I: Iterator, { + let mut i = 0; for probe in probes { info!(self.log, "adding probe {}", probe.id); if let Err(e) = self.add_probe(probe).await { error!(self.log, "add probe: {e}"); } + i += 1; } + i } /// Add a probe to this sled. This sets up resources for the probe zone @@ -223,12 +245,12 @@ impl ProbeManagerInner { .get(0) .ok_or(anyhow!("expected an external ip"))?; - let port = self.port_manager.create_port( - &nic, - None, - Some(eip.ip), - &[], // floating ips - &[VpcFirewallRule { + let port = self.port_manager.create_port(PortCreateParams { + nic, + source_nat: None, + ephemeral_ip: Some(eip.ip), + floating_ips: &[], + firewall_rules: &[VpcFirewallRule { status: VpcFirewallRuleStatus::Enabled, direction: VpcFirewallRuleDirection::Inbound, targets: vec![nic.clone()], @@ -238,8 +260,9 @@ impl ProbeManagerInner { action: VpcFirewallRuleAction::Allow, priority: VpcFirewallRulePriority(100), }], - DhcpCfg::default(), - )?; + dhcp_config: DhcpCfg::default(), + is_service: false, + })?; let installed_zone = ZoneBuilderFactory::default() .builder() diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index 5f53601681..fb57990f1b 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -48,7 +48,9 @@ use illumos_utils::dladm::{ Dladm, Etherstub, EtherstubVnic, GetSimnetError, PhysicalLink, }; use illumos_utils::link::{Link, VnicAllocator}; -use illumos_utils::opte::{DhcpCfg, Port, PortManager, PortTicket}; +use illumos_utils::opte::{ + DhcpCfg, Port, PortCreateParams, PortManager, PortTicket, +}; use illumos_utils::running_zone::{ EnsureAddressError, InstalledZone, RunCommandError, RunningZone, ZoneBuilderFactory, @@ -1162,11 +1164,19 @@ impl ServiceManager { // Create the OPTE port for the service. // Note we don't plumb any firewall rules at this point, - // Nexus will plumb them down later but the default OPTE + // Nexus will plumb them down later but services' default OPTE // config allows outbound access which is enough for // Boundary NTP which needs to come up before Nexus. let port = port_manager - .create_port(nic, snat, None, floating_ips, &[], DhcpCfg::default()) + .create_port(PortCreateParams { + nic, + source_nat: snat, + ephemeral_ip: None, + floating_ips, + firewall_rules: &[], + dhcp_config: DhcpCfg::default(), + is_service: true, + }) .map_err(|err| Error::ServicePortCreation { service: zone_type_str.clone(), err: Box::new(err), diff --git a/sled-agent/src/sim/http_entrypoints.rs b/sled-agent/src/sim/http_entrypoints.rs index 012889c664..cfafaeea22 100644 --- a/sled-agent/src/sim/http_entrypoints.rs +++ b/sled-agent/src/sim/http_entrypoints.rs @@ -24,7 +24,9 @@ use illumos_utils::opte::params::VirtualNetworkInterfaceHost; use omicron_common::api::internal::nexus::DiskRuntimeState; use omicron_common::api::internal::nexus::SledInstanceState; use omicron_common::api::internal::nexus::UpdateArtifactId; -use omicron_common::api::internal::shared::SwitchPorts; +use omicron_common::api::internal::shared::{ + ResolvedVpcRouteSet, ResolvedVpcRouteState, SwitchPorts, +}; use omicron_uuid_kinds::{GenericUuid, InstanceUuid}; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; @@ -64,6 +66,8 @@ pub fn api() -> SledApiDescription { api.register(omicron_zones_get)?; api.register(omicron_zones_put)?; api.register(sled_add)?; + api.register(list_vpc_routes)?; + api.register(set_vpc_routes)?; Ok(()) } @@ -508,3 +512,27 @@ async fn sled_add( ) -> Result { Ok(HttpResponseUpdatedNoContent()) } + +#[endpoint { + method = GET, + path = "/vpc-routes", +}] +async fn list_vpc_routes( + rqctx: RequestContext>, +) -> Result>, HttpError> { + let sa = rqctx.context(); + Ok(HttpResponseOk(sa.list_vpc_routes().await)) +} + +#[endpoint { + method = PUT, + path = "/vpc-routes", +}] +async fn set_vpc_routes( + rqctx: RequestContext>, + body: TypedBody>, +) -> Result { + let sa = rqctx.context(); + sa.set_vpc_routes(body.into_inner()).await; + Ok(HttpResponseUpdatedNoContent()) +} diff --git a/sled-agent/src/sim/sled_agent.rs b/sled-agent/src/sim/sled_agent.rs index f47d8a9100..9cb146531b 100644 --- a/sled-agent/src/sim/sled_agent.rs +++ b/sled-agent/src/sim/sled_agent.rs @@ -36,7 +36,10 @@ use omicron_common::api::internal::nexus::{ use omicron_common::api::internal::nexus::{ InstanceRuntimeState, VmmRuntimeState, }; -use omicron_common::api::internal::shared::RackNetworkConfig; +use omicron_common::api::internal::shared::{ + RackNetworkConfig, ResolvedVpcRoute, ResolvedVpcRouteSet, + ResolvedVpcRouteState, RouterId, RouterKind, RouterVersion, +}; use omicron_common::disk::DiskIdentity; use omicron_uuid_kinds::{GenericUuid, InstanceUuid, PropolisUuid, ZpoolUuid}; use oxnet::Ipv6Net; @@ -77,6 +80,7 @@ pub struct SledAgent { Mutex>, PropolisClient)>>, /// lists of external IPs assigned to instances pub external_ips: Mutex>>, + pub vpc_routes: Mutex>, config: Config, fake_zones: Mutex, instance_ensure_state_error: Mutex>, @@ -182,6 +186,7 @@ impl SledAgent { disk_id_to_region_ids: Mutex::new(HashMap::new()), v2p_mappings: Mutex::new(HashSet::new()), external_ips: Mutex::new(HashMap::new()), + vpc_routes: Mutex::new(HashMap::new()), mock_propolis: Mutex::new(None), config: config.clone(), fake_zones: Mutex::new(OmicronZonesConfig { @@ -360,6 +365,18 @@ impl SledAgent { self.map_disk_ids_to_region_ids(&vcr).await?; } + let mut routes = self.vpc_routes.lock().await; + for nic in &hardware.nics { + let my_routers = [ + RouterId { vni: nic.vni, kind: RouterKind::System }, + RouterId { vni: nic.vni, kind: RouterKind::Custom(nic.subnet) }, + ]; + + for router in my_routers { + routes.entry(router).or_default(); + } + } + Ok(instance_run_time_state) } @@ -879,4 +896,49 @@ impl SledAgent { pub async fn drop_dataset(&self, zpool_id: ZpoolUuid, dataset_id: Uuid) { self.storage.lock().await.drop_dataset(zpool_id, dataset_id) } + + pub async fn list_vpc_routes(&self) -> Vec { + let routes = self.vpc_routes.lock().await; + routes + .iter() + .map(|(k, v)| ResolvedVpcRouteState { id: *k, version: v.version }) + .collect() + } + + pub async fn set_vpc_routes(&self, new_routes: Vec) { + let mut routes = self.vpc_routes.lock().await; + for new in new_routes { + // Disregard any route information for a subnet we don't have. + let Some(old) = routes.get(&new.id) else { + continue; + }; + + // We have to handle subnet router changes, as well as + // spurious updates from multiple Nexus instances. + // If there's a UUID match, only update if vers increased, + // otherwise take the update verbatim (including loss of version). + match (old.version, new.version) { + (Some(old_vers), Some(new_vers)) + if !old_vers.is_replaced_by(&new_vers) => + { + continue; + } + _ => {} + }; + + routes.insert( + new.id, + RouteSet { version: new.version, routes: new.routes }, + ); + } + } +} + +/// Stored routes (and usage count) for a given VPC/subnet. +// NB: We aren't doing post count tracking here to unsubscribe +// from (VNI, subnet) pairs. +#[derive(Debug, Clone, Default)] +pub struct RouteSet { + pub version: Option, + pub routes: HashSet, } diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index 993e5f6a94..7f05d55e60 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -49,7 +49,8 @@ use omicron_common::api::internal::nexus::{ SledInstanceState, VmmRuntimeState, }; use omicron_common::api::internal::shared::{ - HostPortConfig, RackNetworkConfig, + HostPortConfig, RackNetworkConfig, ResolvedVpcRouteSet, + ResolvedVpcRouteState, }; use omicron_common::api::{ internal::nexus::DiskRuntimeState, internal::nexus::InstanceRuntimeState, @@ -1096,6 +1097,17 @@ impl SledAgent { self.inner.bootstore.clone() } + pub fn list_vpc_routes(&self) -> Vec { + self.inner.port_manager.vpc_routes_list() + } + + pub fn set_vpc_routes( + &self, + routes: Vec, + ) -> Result<(), Error> { + self.inner.port_manager.vpc_routes_ensure(routes).map_err(Error::from) + } + /// Return the metric producer registry. pub fn metrics_registry(&self) -> &ProducerRegistry { self.inner.metrics_manager.registry() diff --git a/tools/maghemite_ddm_openapi_version b/tools/maghemite_ddm_openapi_version index 9a93e6b556..569d3d7813 100644 --- a/tools/maghemite_ddm_openapi_version +++ b/tools/maghemite_ddm_openapi_version @@ -1,2 +1,2 @@ -COMMIT="5630887d0373857f77cb264f84aa19bdec720ce3" -SHA2="004e873e4120aa26460271368485266b75b7f964e5ed4dbee8fb5db4519470d7" +COMMIT="3c3fa8482fe09a01da62fbd35efe124ea9cac9e7" +SHA2="007bfb717ccbc077c0250dee3121aeb0c5bb0d1c16795429a514fa4f8635a5ef" diff --git a/tools/maghemite_mg_openapi_version b/tools/maghemite_mg_openapi_version index 22918c581a..de64133971 100644 --- a/tools/maghemite_mg_openapi_version +++ b/tools/maghemite_mg_openapi_version @@ -1,2 +1,2 @@ -COMMIT="5630887d0373857f77cb264f84aa19bdec720ce3" -SHA2="fdb33ee7425923560534672264008ef8948d227afce948ab704de092ad72157c" +COMMIT="3c3fa8482fe09a01da62fbd35efe124ea9cac9e7" +SHA2="e4b42ab9daad90f0c561a830b62a9d17e294b4d0da0a6d44b4030929b0c37b7e" diff --git a/tools/maghemite_mgd_checksums b/tools/maghemite_mgd_checksums index d2ad05383d..f9d4fd4491 100644 --- a/tools/maghemite_mgd_checksums +++ b/tools/maghemite_mgd_checksums @@ -1,2 +1,2 @@ -CIDL_SHA256="6ae4bc3b332e91706c1c6633a7fc218aac65b7feff5643ee2dbbe79b841e0df3" -MGD_LINUX_SHA256="7930008cf8ce535a8b31043fc3edde0e825bd54d75f73234929bd0037ecc3a41" +CIDL_SHA256="51f446933f0d8c426b15ea0845b66664da9b9a129893d12b25d7912b52f07362" +MGD_LINUX_SHA256="736067394778cc4c38fecb1ca8647db3ca7ab1b5c4446f3ce2b5350379ba95b7" diff --git a/tools/opte_version b/tools/opte_version index 529b93110f..ff992938ae 100644 --- a/tools/opte_version +++ b/tools/opte_version @@ -1 +1 @@ -0.31.262 +0.32.265 diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index 0dca1a904e..7dfc9a1402 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -93,7 +93,7 @@ schemars = { version = "0.8.21", features = ["bytes", "chrono", "uuid1"] } scopeguard = { version = "1.2.0" } semver = { version = "1.0.23", features = ["serde"] } serde = { version = "1.0.203", features = ["alloc", "derive", "rc"] } -serde_json = { version = "1.0.117", features = ["raw_value", "unbounded_depth"] } +serde_json = { version = "1.0.118", features = ["raw_value", "unbounded_depth"] } sha2 = { version = "0.10.8", features = ["oid"] } similar = { version = "2.5.0", features = ["inline", "unicode"] } slog = { version = "2.7.0", features = ["dynamic-keys", "max_level_trace", "release_max_level_debug", "release_max_level_trace"] } @@ -197,7 +197,7 @@ schemars = { version = "0.8.21", features = ["bytes", "chrono", "uuid1"] } scopeguard = { version = "1.2.0" } semver = { version = "1.0.23", features = ["serde"] } serde = { version = "1.0.203", features = ["alloc", "derive", "rc"] } -serde_json = { version = "1.0.117", features = ["raw_value", "unbounded_depth"] } +serde_json = { version = "1.0.118", features = ["raw_value", "unbounded_depth"] } sha2 = { version = "0.10.8", features = ["oid"] } similar = { version = "2.5.0", features = ["inline", "unicode"] } slog = { version = "2.7.0", features = ["dynamic-keys", "max_level_trace", "release_max_level_debug", "release_max_level_trace"] }