Skip to content

Commit

Permalink
Better conflict resolution on Nexus-managed subnet route names
Browse files Browse the repository at this point in the history
  • Loading branch information
FelixMcFelix committed Jun 26, 2024
1 parent 899d367 commit 70263a2
Show file tree
Hide file tree
Showing 3 changed files with 73 additions and 42 deletions.
27 changes: 23 additions & 4 deletions nexus/db-model/src/vpc_route.rs
Original file line number Diff line number Diff line change
Expand Up @@ -128,13 +128,32 @@ impl RouterRoute {
}
}

/// Create a subnet routing rule for a VPC's system router.
///
/// This defaults to use the same name as the subnet. If this would conflict
/// with the internet gateway rules, then the UUID is used instead (alongside
/// notice that a name conflict has occurred).
pub fn for_subnet(
route_id: Uuid,
system_router_id: Uuid,
subnet: Name,
) -> Result<Self, ()> {
let name = format!("sn-{}", subnet).parse().map_err(|_| ())?;
Ok(Self::new(
) -> Self {
let forbidden_names = ["default-v4", "default-v6"];

let name = if forbidden_names.contains(&subnet.as_str()) {
// unwrap safety: a uuid is not by itself a valid name
// so prepend it with another string.
// - length constraint is <63 chars,
// - a UUID is 36 chars including hyphens,
// - "{subnet}-" is 11 chars
// - "conflict-" is 9 chars
// = 56 chars
format!("conflict-{subnet}-{route_id}").parse().unwrap()
} else {
subnet.0.clone()
};

Self::new(
route_id,
system_router_id,
external::RouterRouteKind::VpcSubnet,
Expand All @@ -146,7 +165,7 @@ impl RouterRoute {
target: external::RouteTarget::Subnet(subnet.0.clone()),
destination: external::RouteDestination::Subnet(subnet.0),
},
))
)
}
}

Expand Down
80 changes: 46 additions & 34 deletions nexus/db-queries/src/db/datastore/vpc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -296,8 +296,8 @@ impl DataStore {
route_id,
SERVICES_VPC.system_router_id,
vpc_subnet.name().clone().into(),
)
.expect("builtin service names are short enough for route naming");
);

self.router_create_route(opctx, &authz_router, route)
.await
.map(|_| ())
Expand Down Expand Up @@ -1326,15 +1326,11 @@ impl DataStore {
// modify other system routes like internet gateways (which are
// `RouteKind::Default`).
let conn = self.pool_connection_authorized(opctx).await?;
let log = opctx.log.clone();
self.transaction_retry_wrapper("vpc_subnet_route_reconcile")
.transaction(&conn, |conn| {
let log = log.clone();
async move {

.transaction(&conn, |conn| async move {
use db::schema::router_route::dsl;
use db::schema::vpc_subnet::dsl as subnet;
use db::schema::vpc::dsl as vpc;
use db::schema::vpc_subnet::dsl as subnet;

let system_router_id = vpc::vpc
.filter(vpc::id.eq(vpc_id))
Expand All @@ -1352,26 +1348,39 @@ impl DataStore {
.await?;

let current_rules: Vec<RouterRoute> = dsl::router_route
.filter(dsl::kind.eq(RouterRouteKind(ExternalRouteKind::VpcSubnet)))
.filter(
dsl::kind
.eq(RouterRouteKind(ExternalRouteKind::VpcSubnet)),
)
.filter(dsl::time_deleted.is_null())
.filter(dsl::vpc_router_id.eq(system_router_id))
.select(RouterRoute::as_select())
.load_async(&conn)
.await?;

// Build the add/delete sets.
let expected_names: HashSet<Name> = valid_subnets.iter()
let expected_names: HashSet<Name> = valid_subnets
.iter()
.map(|v| v.identity.name.clone())
.collect();

// This checks that we have rules which *point to* the named
// subnets, rather than working with rule names (even if these
// are set to match the subnet where possible).
// Rule names are effectively randomised when someone, e.g.,
// names a subnet "default-v4"/"-v6", and this prevents us
// from repeatedly adding/deleting that route.
let mut found_names = HashSet::new();
let mut invalid = Vec::new();
for rule in current_rules {
let id = rule.id();
match (rule.kind.0, rule.target.0) {
(ExternalRouteKind::VpcSubnet, RouteTarget::Subnet(n))
if expected_names.contains(Name::ref_cast(&n)) =>
{let _ = found_names.insert(n.into());},
(
ExternalRouteKind::VpcSubnet,
RouteTarget::Subnet(n),
) if expected_names.contains(Name::ref_cast(&n)) => {
let _ = found_names.insert(n.into());
}
_ => invalid.push(id),
}
}
Expand All @@ -1394,54 +1403,57 @@ impl DataStore {
// Duplicate rules are caught here using the UNIQUE constraint
// on names in a router. Only nexus can alter the system router,
// so there is no risk of collision with user-specified names.
//
// Subnets named "default-v4" or "default-v6" have their rules renamed
// to include the rule UUID.
for subnet in expected_names.difference(&found_names) {
let route_id = Uuid::new_v4();
// XXX this is fallible as it is based on subnet name.
// need to control this somewhere sane.
let Ok(route) = db::model::RouterRoute::for_subnet(
let route = db::model::RouterRoute::for_subnet(
route_id,
system_router_id,
subnet.clone(),
) else {
error!(
log,
"Reconciling VPC routes: name {} in vpc {} is too long",
subnet,
vpc_id,
);
continue;
};

match Self::router_create_route_on_connection(route, &conn).await {
Err(Error::Conflict { .. }) => return Err(DieselError::RollbackTransaction),
);

match Self::router_create_route_on_connection(route, &conn)
.await
{
Err(Error::Conflict { .. }) => {
return Err(DieselError::RollbackTransaction)
}
Err(_) => return Err(DieselError::NotFound),
_ => {},
_ => {}
}
}

// Verify that route set is exactly as intended, and rollback otherwise.
let current_rules: Vec<RouterRoute> = dsl::router_route
.filter(dsl::kind.eq(RouterRouteKind(ExternalRouteKind::VpcSubnet)))
.filter(
dsl::kind
.eq(RouterRouteKind(ExternalRouteKind::VpcSubnet)),
)
.filter(dsl::time_deleted.is_null())
.filter(dsl::vpc_router_id.eq(system_router_id))
.select(RouterRoute::as_select())
.load_async(&conn)
.await?;

if current_rules.len() != expected_names.len() {
return Err(DieselError::RollbackTransaction)
return Err(DieselError::RollbackTransaction);
}

for rule in current_rules {
match (rule.kind.0, rule.target.0) {
(ExternalRouteKind::VpcSubnet, RouteTarget::Subnet(n))
if expected_names.contains(Name::ref_cast(&n)) => {},
(
ExternalRouteKind::VpcSubnet,
RouteTarget::Subnet(n),
) if expected_names.contains(Name::ref_cast(&n)) => {}
_ => return Err(DieselError::RollbackTransaction),
}
}

Ok(())
}}).await
})
.await
.map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?;

self.vpc_increment_rpw_version(opctx, vpc_id).await
Expand Down
8 changes: 4 additions & 4 deletions schema/crdb/vpc-subnet-routing/up03.sql
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ INSERT INTO omicron.public.router_route
target, destination
)
SELECT
gen_random_uuid(), 'sn-' || vpc_subnet.name,
gen_random_uuid(), vpc_subnet.name,
'VPC Subnet route for ''' || vpc_subnet.name || '''',
now(), now(),
omicron.public.vpc_router.id, 'default',
Expand All @@ -76,15 +76,15 @@ WITH known_ids (new_id, new_name, new_description) AS (
'Default internet gateway route for Oxide Services'
),
(
'001de000-c470-4000-8000-000000000004', 'sn-external-dns',
'001de000-c470-4000-8000-000000000004', 'external-dns',
'Built-in VPC Subnet for Oxide service (external-dns)'
),
(
'001de000-c470-4000-8000-000000000005', 'sn-nexus',
'001de000-c470-4000-8000-000000000005', 'nexus',
'Built-in VPC Subnet for Oxide service (nexus)'
),
(
'001de000-c470-4000-8000-000000000006', 'sn-boundary-ntp',
'001de000-c470-4000-8000-000000000006', 'boundary-ntp',
'Built-in VPC Subnet for Oxide service (boundary-ntp)'
)
)
Expand Down

0 comments on commit 70263a2

Please sign in to comment.