diff --git a/clients/sled-agent-client/src/lib.rs b/clients/sled-agent-client/src/lib.rs index 9df5230779..d4fb36004f 100644 --- a/clients/sled-agent-client/src/lib.rs +++ b/clients/sled-agent-client/src/lib.rs @@ -35,7 +35,7 @@ progenitor::generate_api!( PortConfigV1 = { derives = [PartialEq, Eq, Hash, Serialize, Deserialize] }, RouteConfig = { derives = [PartialEq, Eq, Hash, Serialize, Deserialize] }, IpNet = { derives = [PartialEq, Eq, Hash, Serialize, Deserialize] }, - OmicronPhysicalDiskConfig = { derives = [Clone, Debug, Serialize, Deserialize, PartialEq, Eq, Hash, PartialOrd, Ord] } + OmicronPhysicalDiskConfig = { derives = [Clone, Debug, Serialize, Deserialize, PartialEq, Eq, Hash, PartialOrd, Ord] }, RouterId = { derives = [PartialEq, Eq, Hash, Debug, Deserialize, Serialize] }, }, //TODO trade the manual transformations later in this file for the diff --git a/nexus/db-queries/src/db/datastore/network_interface.rs b/nexus/db-queries/src/db/datastore/network_interface.rs index b426c8b472..2ac1f531a3 100644 --- a/nexus/db-queries/src/db/datastore/network_interface.rs +++ b/nexus/db-queries/src/db/datastore/network_interface.rs @@ -137,11 +137,27 @@ impl DataStore { ), )); } - self.create_network_interface_raw(opctx, interface) + + let out = self + .create_network_interface_raw(opctx, interface) .await // Convert to `InstanceNetworkInterface` before returning; we know // this is valid as we've checked the condition on-entry. - .map(NetworkInterface::as_instance) + .map(NetworkInterface::as_instance)?; + + // `instance:xxx` targets in router rules resolve to the primary + // NIC of that instance. Accordingly, NIC create may cause dangling + // entries to re-resolve to a valid instance (even if it is not yet + // started). + // This will not trigger the RPW directly, we still need to do so + // in e.g. the instance watcher task. + if out.primary { + self.vpc_increment_rpw_version(opctx, out.vpc_id) + .await + .map_err(|e| network_interface::InsertError::External(e))?; + } + + Ok(out) } /// List network interfaces associated with a given service. diff --git a/nexus/db-queries/src/db/datastore/vpc.rs b/nexus/db-queries/src/db/datastore/vpc.rs index d6b9645f93..fd90c8a7e6 100644 --- a/nexus/db-queries/src/db/datastore/vpc.rs +++ b/nexus/db-queries/src/db/datastore/vpc.rs @@ -1409,7 +1409,9 @@ impl DataStore { Ok(()) }}).await - .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + + self.vpc_increment_rpw_version(opctx, vpc_id).await } /// Look up a VPC by VNI. @@ -1690,49 +1692,40 @@ impl DataStore { pub async fn vpc_router_increment_rpw_version( &self, opctx: &OpContext, - authz_router: &authz::VpcRouter, + router_id: Uuid, ) -> UpdateResult<()> { - opctx.authorize(authz::Action::Modify, authz_router).await?; + // NOTE: this operation and `vpc_increment_rpw_version` do not + // have auth checks, as these can occur in connection with unrelated + // resources -- the current user may have access to those, but be unable + // to modify the entire set of VPC routers in a project. use db::schema::vpc_router::dsl; diesel::update(dsl::vpc_router) .filter(dsl::time_deleted.is_null()) - .filter(dsl::id.eq(authz_router.id())) + .filter(dsl::id.eq(router_id)) .set(dsl::resolved_version.eq(dsl::resolved_version + 1)) .execute_async(&*self.pool_connection_authorized(opctx).await?) .await - .map_err(|e| { - public_error_from_diesel( - e, - ErrorHandler::NotFoundByResource(authz_router), - ) - })?; + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; Ok(()) } - /// Trigger an RPW version bump on all routers within a VPC in + /// Trigger an RPW version bump on *all* routers within a VPC in /// response to changes to named entities (e.g., subnets, instances). pub async fn vpc_increment_rpw_version( &self, opctx: &OpContext, - authz_vpc: &authz::Vpc, + vpc_id: Uuid, ) -> UpdateResult<()> { - opctx.authorize(authz::Action::CreateChild, authz_vpc).await?; - use db::schema::vpc_router::dsl; diesel::update(dsl::vpc_router) .filter(dsl::time_deleted.is_null()) - .filter(dsl::vpc_id.eq(authz_vpc.id())) + .filter(dsl::vpc_id.eq(vpc_id)) .set(dsl::resolved_version.eq(dsl::resolved_version + 1)) .execute_async(&*self.pool_connection_authorized(opctx).await?) .await - .map_err(|e| { - public_error_from_diesel( - e, - ErrorHandler::NotFoundByResource(authz_vpc), - ) - })?; + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; Ok(()) } diff --git a/nexus/src/app/background/vpc_routes.rs b/nexus/src/app/background/vpc_routes.rs index eb19753c27..6cdf720ce2 100644 --- a/nexus/src/app/background/vpc_routes.rs +++ b/nexus/src/app/background/vpc_routes.rs @@ -207,8 +207,7 @@ impl BackgroundTask for VpcRouteManager { // different router ID than the sled, or a higher version // number. match &set.version { - Some(v) if !v.is_replaced_by(&version) => - { + Some(v) if !v.is_replaced_by(&version) => { continue; } _ => {} @@ -244,15 +243,17 @@ impl BackgroundTask for VpcRouteManager { } } - if let Err(e) = client.set_vpc_routes(&to_push).await { - error!( - log, - "failed to push new VPC route state from sled"; - "sled" => sled.serial_number(), - "err" => ?e - ); - continue; - }; + if !to_push.is_empty() { + if let Err(e) = client.set_vpc_routes(&to_push).await { + error!( + log, + "failed to push new VPC route state from sled"; + "sled" => sled.serial_number(), + "err" => ?e + ); + continue; + }; + } } json!({}) diff --git a/nexus/src/app/instance.rs b/nexus/src/app/instance.rs index 50b46c8e8d..090caddf18 100644 --- a/nexus/src/app/instance.rs +++ b/nexus/src/app/instance.rs @@ -1524,6 +1524,7 @@ impl super::Nexus { new_runtime_state, ) .await?; + self.vpc_needed_notify_sleds(); Ok(()) } diff --git a/nexus/src/app/vpc_router.rs b/nexus/src/app/vpc_router.rs index e65b2a8605..ae2fdffeeb 100644 --- a/nexus/src/app/vpc_router.rs +++ b/nexus/src/app/vpc_router.rs @@ -83,6 +83,10 @@ impl super::Nexus { .db_datastore .vpc_create_router(&opctx, &authz_vpc, router) .await?; + + // Note: we don't trigger the route RPW here as it's impossible + // for the router to be bound to a subnet at this point. + Ok(router) } @@ -114,8 +118,8 @@ impl super::Nexus { .await } - // TODO: When a router is deleted it should be unassociated w/ any subnets it may be associated with - // or trigger an error + // TODO(now): When a router is deleted it should be unassociated w/ any subnets it may be associated with + // or trigger an error. pub(crate) async fn vpc_delete_router( &self, opctx: &OpContext, @@ -130,7 +134,12 @@ impl super::Nexus { if db_router.kind == VpcRouterKind::System { return Err(Error::invalid_request("Cannot delete system router")); } - self.db_datastore.vpc_delete_router(opctx, &authz_router).await + let out = + self.db_datastore.vpc_delete_router(opctx, &authz_router).await?; + + self.vpc_needed_notify_sleds(); + + Ok(out) } // Routes @@ -197,6 +206,9 @@ impl super::Nexus { .db_datastore .router_create_route(&opctx, &authz_router, route) .await?; + + self.vpc_router_increment_rpw_version(opctx, &authz_router).await?; + Ok(route) } @@ -219,7 +231,7 @@ impl super::Nexus { route_lookup: &lookup::RouterRoute<'_>, params: ¶ms::RouterRouteUpdate, ) -> UpdateResult { - let (.., vpc, _, authz_route, db_route) = + let (.., vpc, authz_router, authz_route, db_route) = route_lookup.fetch_for(authz::Action::Modify).await?; // TODO: Write a test for this once there's a way to test it (i.e. // subnets automatically register to the system router table) @@ -234,9 +246,14 @@ impl super::Nexus { ))); } } - self.db_datastore + let out = self + .db_datastore .router_update_route(&opctx, &authz_route, params.clone().into()) - .await + .await?; + + self.vpc_router_increment_rpw_version(opctx, &authz_router).await?; + + Ok(out) } pub(crate) async fn router_delete_route( @@ -244,7 +261,7 @@ impl super::Nexus { opctx: &OpContext, route_lookup: &lookup::RouterRoute<'_>, ) -> DeleteResult { - let (.., authz_route, db_route) = + let (.., authz_router, authz_route, db_route) = route_lookup.fetch_for(authz::Action::Delete).await?; // Only custom routes can be deleted @@ -254,6 +271,37 @@ impl super::Nexus { "DELETE not allowed on system routes", )); } - self.db_datastore.router_delete_route(opctx, &authz_route).await + let out = + self.db_datastore.router_delete_route(opctx, &authz_route).await?; + + self.vpc_router_increment_rpw_version(opctx, &authz_router).await?; + + Ok(out) + } + + /// Trigger the VPC routing RPW in repsonse to a state change + /// or a new possible listener (e.g., instance/probe start, NIC + /// create). + pub(crate) fn vpc_needed_notify_sleds(&self) { + self.background_tasks + .activate(&self.background_tasks.task_vpc_route_manager) + } + + /// Trigger an RPW version bump on a single VPC router in response + /// to CRUD operations on individual routes. + /// + /// This will also awaken the VPC Router RPW. + pub(crate) async fn vpc_router_increment_rpw_version( + &self, + opctx: &OpContext, + authz_router: &authz::VpcRouter, + ) -> UpdateResult<()> { + self.datastore() + .vpc_router_increment_rpw_version(opctx, authz_router.id()) + .await?; + + self.vpc_needed_notify_sleds(); + + Ok(()) } } diff --git a/nexus/src/app/vpc_subnet.rs b/nexus/src/app/vpc_subnet.rs index 4c5a569201..0e3affb470 100644 --- a/nexus/src/app/vpc_subnet.rs +++ b/nexus/src/app/vpc_subnet.rs @@ -63,8 +63,7 @@ impl super::Nexus { )), } } - // TODO: When a subnet is created it should add a route entry into the VPC's - // system router + pub(crate) async fn vpc_create_subnet( &self, opctx: &OpContext, @@ -108,7 +107,7 @@ impl super::Nexus { // See for // details. let subnet_id = Uuid::new_v4(); - match params.ipv6_block { + let out = match params.ipv6_block { None => { const NUM_RETRIES: usize = 2; let mut retry = 0; @@ -212,7 +211,11 @@ impl super::Nexus { .map(|(.., subnet)| subnet) .map_err(SubnetError::into_external) } - } + }?; + + self.vpc_needed_notify_sleds(); + + Ok(out) } pub(crate) async fn vpc_subnet_list( @@ -234,13 +237,16 @@ impl super::Nexus { ) -> UpdateResult { let (.., authz_subnet) = vpc_subnet_lookup.lookup_for(authz::Action::Modify).await?; - self.db_datastore + let out = self + .db_datastore .vpc_update_subnet(&opctx, &authz_subnet, params.clone().into()) - .await + .await?; + + self.vpc_needed_notify_sleds(); + + Ok(out) } - // TODO: When a subnet is deleted it should remove its entry from the VPC's - // system router. pub(crate) async fn vpc_delete_subnet( &self, opctx: &OpContext, @@ -248,9 +254,14 @@ impl super::Nexus { ) -> DeleteResult { let (.., authz_subnet, db_subnet) = vpc_subnet_lookup.fetch_for(authz::Action::Delete).await?; - self.db_datastore + let out = self + .db_datastore .vpc_delete_subnet(opctx, &db_subnet, &authz_subnet) - .await + .await?; + + self.vpc_needed_notify_sleds(); + + Ok(out) } pub(crate) async fn subnet_list_instance_network_interfaces( diff --git a/openapi/sled-agent.json b/openapi/sled-agent.json index e4778bf06a..8c71d9eeb1 100644 --- a/openapi/sled-agent.json +++ b/openapi/sled-agent.json @@ -4340,13 +4340,17 @@ "uniqueItems": true }, "version": { - "$ref": "#/components/schemas/RouterVersion" + "nullable": true, + "allOf": [ + { + "$ref": "#/components/schemas/RouterVersion" + } + ] } }, "required": [ "id", - "routes", - "version" + "routes" ] }, "ReifiedVpcRouteState": { diff --git a/sled-agent/src/probe_manager.rs b/sled-agent/src/probe_manager.rs index cc38725b61..eabf3850af 100644 --- a/sled-agent/src/probe_manager.rs +++ b/sled-agent/src/probe_manager.rs @@ -6,7 +6,9 @@ use illumos_utils::opte::params::VpcFirewallRule; use illumos_utils::opte::{DhcpCfg, PortCreateParams, PortManager}; use illumos_utils::running_zone::{RunningZone, ZoneBuilderFactory}; use illumos_utils::zone::Zones; -use nexus_client::types::{ProbeExternalIp, ProbeInfo}; +use nexus_client::types::{ + BackgroundTasksActivateRequest, ProbeExternalIp, ProbeInfo, +}; use omicron_common::api::external::{ VpcFirewallRuleAction, VpcFirewallRuleDirection, VpcFirewallRulePriority, VpcFirewallRuleStatus, @@ -179,24 +181,44 @@ impl ProbeManagerInner { } }; - self.add(target.difference(¤t)).await; + let n_added = self.add(target.difference(¤t)).await; self.remove(current.difference(&target)).await; self.check(current.intersection(&target)).await; + + // If we have created some new probes, we may (in future) need the control plane + // to provide us with valid routes for the VPC the probe belongs to. + if n_added > 0 { + if let Err(e) = self + .nexus_client + .client() + .bgtask_activate(&BackgroundTasksActivateRequest { + bgtask_names: vec!["vpc_route_manager".into()], + }) + .await + { + error!(self.log, "get routes for probe: {e}"); + } + } } }) } /// Add a set of probes to this sled. - async fn add<'a, I>(self: &Arc, probes: I) + /// + /// Returns the number of inserted probes. + async fn add<'a, I>(self: &Arc, probes: I) -> usize where I: Iterator, { + let mut i = 0; for probe in probes { info!(self.log, "adding probe {}", probe.id); if let Err(e) = self.add_probe(probe).await { error!(self.log, "add probe: {e}"); } + i += 1; } + i } /// Add a probe to this sled. This sets up resources for the probe zone