From 70263a2f3c662f5c699f40e473148f0102feecd2 Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Wed, 26 Jun 2024 12:40:06 +0100 Subject: [PATCH] Better conflict resolution on Nexus-managed subnet route names --- nexus/db-model/src/vpc_route.rs | 27 ++++++-- nexus/db-queries/src/db/datastore/vpc.rs | 80 ++++++++++++++---------- schema/crdb/vpc-subnet-routing/up03.sql | 8 +-- 3 files changed, 73 insertions(+), 42 deletions(-) diff --git a/nexus/db-model/src/vpc_route.rs b/nexus/db-model/src/vpc_route.rs index dda7f0b785..3015df691f 100644 --- a/nexus/db-model/src/vpc_route.rs +++ b/nexus/db-model/src/vpc_route.rs @@ -128,13 +128,32 @@ impl RouterRoute { } } + /// Create a subnet routing rule for a VPC's system router. + /// + /// This defaults to use the same name as the subnet. If this would conflict + /// with the internet gateway rules, then the UUID is used instead (alongside + /// notice that a name conflict has occurred). pub fn for_subnet( route_id: Uuid, system_router_id: Uuid, subnet: Name, - ) -> Result { - let name = format!("sn-{}", subnet).parse().map_err(|_| ())?; - Ok(Self::new( + ) -> Self { + let forbidden_names = ["default-v4", "default-v6"]; + + let name = if forbidden_names.contains(&subnet.as_str()) { + // unwrap safety: a uuid is not by itself a valid name + // so prepend it with another string. + // - length constraint is <63 chars, + // - a UUID is 36 chars including hyphens, + // - "{subnet}-" is 11 chars + // - "conflict-" is 9 chars + // = 56 chars + format!("conflict-{subnet}-{route_id}").parse().unwrap() + } else { + subnet.0.clone() + }; + + Self::new( route_id, system_router_id, external::RouterRouteKind::VpcSubnet, @@ -146,7 +165,7 @@ impl RouterRoute { target: external::RouteTarget::Subnet(subnet.0.clone()), destination: external::RouteDestination::Subnet(subnet.0), }, - )) + ) } } diff --git a/nexus/db-queries/src/db/datastore/vpc.rs b/nexus/db-queries/src/db/datastore/vpc.rs index a265853cb5..988f40e770 100644 --- a/nexus/db-queries/src/db/datastore/vpc.rs +++ b/nexus/db-queries/src/db/datastore/vpc.rs @@ -296,8 +296,8 @@ impl DataStore { route_id, SERVICES_VPC.system_router_id, vpc_subnet.name().clone().into(), - ) - .expect("builtin service names are short enough for route naming"); + ); + self.router_create_route(opctx, &authz_router, route) .await .map(|_| ()) @@ -1326,15 +1326,11 @@ impl DataStore { // modify other system routes like internet gateways (which are // `RouteKind::Default`). let conn = self.pool_connection_authorized(opctx).await?; - let log = opctx.log.clone(); self.transaction_retry_wrapper("vpc_subnet_route_reconcile") - .transaction(&conn, |conn| { - let log = log.clone(); - async move { - + .transaction(&conn, |conn| async move { use db::schema::router_route::dsl; - use db::schema::vpc_subnet::dsl as subnet; use db::schema::vpc::dsl as vpc; + use db::schema::vpc_subnet::dsl as subnet; let system_router_id = vpc::vpc .filter(vpc::id.eq(vpc_id)) @@ -1352,7 +1348,10 @@ impl DataStore { .await?; let current_rules: Vec = dsl::router_route - .filter(dsl::kind.eq(RouterRouteKind(ExternalRouteKind::VpcSubnet))) + .filter( + dsl::kind + .eq(RouterRouteKind(ExternalRouteKind::VpcSubnet)), + ) .filter(dsl::time_deleted.is_null()) .filter(dsl::vpc_router_id.eq(system_router_id)) .select(RouterRoute::as_select()) @@ -1360,18 +1359,28 @@ impl DataStore { .await?; // Build the add/delete sets. - let expected_names: HashSet = valid_subnets.iter() + let expected_names: HashSet = valid_subnets + .iter() .map(|v| v.identity.name.clone()) .collect(); + // This checks that we have rules which *point to* the named + // subnets, rather than working with rule names (even if these + // are set to match the subnet where possible). + // Rule names are effectively randomised when someone, e.g., + // names a subnet "default-v4"/"-v6", and this prevents us + // from repeatedly adding/deleting that route. let mut found_names = HashSet::new(); let mut invalid = Vec::new(); for rule in current_rules { let id = rule.id(); match (rule.kind.0, rule.target.0) { - (ExternalRouteKind::VpcSubnet, RouteTarget::Subnet(n)) - if expected_names.contains(Name::ref_cast(&n)) => - {let _ = found_names.insert(n.into());}, + ( + ExternalRouteKind::VpcSubnet, + RouteTarget::Subnet(n), + ) if expected_names.contains(Name::ref_cast(&n)) => { + let _ = found_names.insert(n.into()); + } _ => invalid.push(id), } } @@ -1394,34 +1403,34 @@ impl DataStore { // Duplicate rules are caught here using the UNIQUE constraint // on names in a router. Only nexus can alter the system router, // so there is no risk of collision with user-specified names. + // + // Subnets named "default-v4" or "default-v6" have their rules renamed + // to include the rule UUID. for subnet in expected_names.difference(&found_names) { let route_id = Uuid::new_v4(); - // XXX this is fallible as it is based on subnet name. - // need to control this somewhere sane. - let Ok(route) = db::model::RouterRoute::for_subnet( + let route = db::model::RouterRoute::for_subnet( route_id, system_router_id, subnet.clone(), - ) else { - error!( - log, - "Reconciling VPC routes: name {} in vpc {} is too long", - subnet, - vpc_id, - ); - continue; - }; - - match Self::router_create_route_on_connection(route, &conn).await { - Err(Error::Conflict { .. }) => return Err(DieselError::RollbackTransaction), + ); + + match Self::router_create_route_on_connection(route, &conn) + .await + { + Err(Error::Conflict { .. }) => { + return Err(DieselError::RollbackTransaction) + } Err(_) => return Err(DieselError::NotFound), - _ => {}, + _ => {} } } // Verify that route set is exactly as intended, and rollback otherwise. let current_rules: Vec = dsl::router_route - .filter(dsl::kind.eq(RouterRouteKind(ExternalRouteKind::VpcSubnet))) + .filter( + dsl::kind + .eq(RouterRouteKind(ExternalRouteKind::VpcSubnet)), + ) .filter(dsl::time_deleted.is_null()) .filter(dsl::vpc_router_id.eq(system_router_id)) .select(RouterRoute::as_select()) @@ -1429,19 +1438,22 @@ impl DataStore { .await?; if current_rules.len() != expected_names.len() { - return Err(DieselError::RollbackTransaction) + return Err(DieselError::RollbackTransaction); } for rule in current_rules { match (rule.kind.0, rule.target.0) { - (ExternalRouteKind::VpcSubnet, RouteTarget::Subnet(n)) - if expected_names.contains(Name::ref_cast(&n)) => {}, + ( + ExternalRouteKind::VpcSubnet, + RouteTarget::Subnet(n), + ) if expected_names.contains(Name::ref_cast(&n)) => {} _ => return Err(DieselError::RollbackTransaction), } } Ok(()) - }}).await + }) + .await .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; self.vpc_increment_rpw_version(opctx, vpc_id).await diff --git a/schema/crdb/vpc-subnet-routing/up03.sql b/schema/crdb/vpc-subnet-routing/up03.sql index d256921d34..7c4cc97a80 100644 --- a/schema/crdb/vpc-subnet-routing/up03.sql +++ b/schema/crdb/vpc-subnet-routing/up03.sql @@ -51,7 +51,7 @@ INSERT INTO omicron.public.router_route target, destination ) SELECT - gen_random_uuid(), 'sn-' || vpc_subnet.name, + gen_random_uuid(), vpc_subnet.name, 'VPC Subnet route for ''' || vpc_subnet.name || '''', now(), now(), omicron.public.vpc_router.id, 'default', @@ -76,15 +76,15 @@ WITH known_ids (new_id, new_name, new_description) AS ( 'Default internet gateway route for Oxide Services' ), ( - '001de000-c470-4000-8000-000000000004', 'sn-external-dns', + '001de000-c470-4000-8000-000000000004', 'external-dns', 'Built-in VPC Subnet for Oxide service (external-dns)' ), ( - '001de000-c470-4000-8000-000000000005', 'sn-nexus', + '001de000-c470-4000-8000-000000000005', 'nexus', 'Built-in VPC Subnet for Oxide service (nexus)' ), ( - '001de000-c470-4000-8000-000000000006', 'sn-boundary-ntp', + '001de000-c470-4000-8000-000000000006', 'boundary-ntp', 'Built-in VPC Subnet for Oxide service (boundary-ntp)' ) )