From faa518c131b5811d5d4010dde847a1589babeb03 Mon Sep 17 00:00:00 2001 From: James MacMahon Date: Tue, 25 Jun 2024 16:25:04 -0400 Subject: [PATCH 01/13] [#3886 4/4] Drive and finish a region replacement (#5885) When a disk is expunged, any region that was on that disk is assumed to be gone. A single disk expungement can put many Volumes into degraded states, as one of the three mirrors of a region set is now gone. Volumes that are degraded in this way remain degraded until a new region is swapped in, and an Upstairs performs the necessary repair operation (either through a Live Repair or Reconciliation). Nexus can only initiate these repairs - it does not participate in them, instead requesting that an Upstairs perform the repair. These repair operations can only be done by an Upstairs running as part of an activated Volume: either Nexus has to send this Volume to a Pantry and repair it there, or Nexus has to talk to a propolis that has that active Volume. Further complicating things is that the Volumes in question can be activated and deactivated as a result of user action, namely starting and stopping Instances. This will interrupt any on-going repair. This is ok! Both operations support being interrupted, but as a result it's then Nexus' job to continually monitor these repair operations and initiate further operations if the current one is interrupted. A single saga invocation is not enough to continually make sure a Volume is being repaired, so driving one of the repair operations forward happens as a saga that is triggered from a background task: this is called the _region replacement drive_ saga. It wll: - transition a region replacement request to state Driving, again blocking out other invocations of the same saga - check if Nexus has taken an action to initiate a repair yet. if not, then one is needed. if it has previously initiated a repair operation, the state of the system is examined: is that operation still running? has something changed? further action may be required depending on this observation. - if an action is required, Nexus will prepare an action that will initiate either Live Repair or Reconciliation based on the current observed state of the system. - that action is then executed. if there was an error, then the saga unwinds. if it was successful, it is recorded as a "repair step" in CRDB and will be checked the next time the saga runs. - if Nexus observed an Upstairs telling it that a repair was completed or not necessary, then the request is placed into the ReplacementDone state, otherwise it is placed back into the Running state. if the saga unwinds, it unwinds back to the Running state. The background task responsible for triggering the drive saga will also scan for notifications of a successful live repair or reconciliation, and transition region replacement requests to ReplacementDone if it sees one. If a region replacement request is in state ReplacementDone, _region replacement finish_ saga is triggered, which will: - transition a request into Completing - delete the old region by deleting a transient Volume that refers to it - transition the request to the Complete state For the entire region replacement work, testing was done manually using the Canada region using the following test cases: - a disk needing repair is attached to a instance for the duration of the repair - a disk needing repair is attached to a instance that is migrated mid-repair - a disk needing repair is attached to a instance that is stopped mid-repair - a disk needing repair is attached to a instance that is stopped mid-repair, then started in the middle of the pantry's repair - a detached disk needs repair - a detached disk needs repair, and is then attached to an instance that is then started - a sled is expunged, causing region replacement requests for all regions on it Fixes #3886 Fixes #5191 --- dev-tools/omdb/src/bin/omdb/nexus.rs | 33 + dev-tools/omdb/tests/env.out | 12 + dev-tools/omdb/tests/successes.out | 13 + nexus-config/src/nexus_config.rs | 18 +- nexus/examples/config.toml | 1 + nexus/src/app/background/init.rs | 48 +- nexus/src/app/background/mod.rs | 1 + .../background/region_replacement_driver.rs | 736 +++++++ nexus/src/app/instance.rs | 26 +- nexus/src/app/mod.rs | 50 +- nexus/src/app/sagas/mod.rs | 16 + .../src/app/sagas/region_replacement_drive.rs | 1754 +++++++++++++++++ .../app/sagas/region_replacement_finish.rs | 345 ++++ nexus/src/app/volume.rs | 25 +- nexus/tests/config.test.toml | 5 + .../integration_tests/volume_management.rs | 2 +- nexus/types/src/internal_api/background.rs | 14 + nexus/types/src/internal_api/mod.rs | 1 + smf/nexus/multi-sled/config-partial.toml | 1 + smf/nexus/single-sled/config-partial.toml | 1 + 20 files changed, 3077 insertions(+), 25 deletions(-) create mode 100644 nexus/src/app/background/region_replacement_driver.rs create mode 100644 nexus/src/app/sagas/region_replacement_drive.rs create mode 100644 nexus/src/app/sagas/region_replacement_finish.rs create mode 100644 nexus/types/src/internal_api/background.rs diff --git a/dev-tools/omdb/src/bin/omdb/nexus.rs b/dev-tools/omdb/src/bin/omdb/nexus.rs index 09ae82b5d9..0b1c6c77f4 100644 --- a/dev-tools/omdb/src/bin/omdb/nexus.rs +++ b/dev-tools/omdb/src/bin/omdb/nexus.rs @@ -28,6 +28,7 @@ use nexus_client::types::SledSelector; use nexus_client::types::UninitializedSledId; use nexus_db_queries::db::lookup::LookupPath; use nexus_types::deployment::Blueprint; +use nexus_types::internal_api::background::RegionReplacementDriverStatus; use nexus_types::inventory::BaseboardId; use omicron_uuid_kinds::CollectionUuid; use omicron_uuid_kinds::GenericUuid; @@ -1049,6 +1050,38 @@ fn print_task_details(bgtask: &BackgroundTask, details: &serde_json::Value) { ); } }; + } else if name == "region_replacement_driver" { + match serde_json::from_value::( + details.clone(), + ) { + Err(error) => eprintln!( + "warning: failed to interpret task details: {:?}: {:?}", + error, details + ), + + Ok(status) => { + println!( + " number of region replacement drive sagas started ok: {}", + status.drive_invoked_ok.len() + ); + for line in &status.drive_invoked_ok { + println!(" > {line}"); + } + + println!( + " number of region replacement finish sagas started ok: {}", + status.finish_invoked_ok.len() + ); + for line in &status.finish_invoked_ok { + println!(" > {line}"); + } + + println!(" number of errors: {}", status.errors.len()); + for line in &status.errors { + println!(" > {line}"); + } + } + }; } else { println!( "warning: unknown background task: {:?} \ diff --git a/dev-tools/omdb/tests/env.out b/dev-tools/omdb/tests/env.out index 174ffe5e3e..23ab034f1c 100644 --- a/dev-tools/omdb/tests/env.out +++ b/dev-tools/omdb/tests/env.out @@ -110,6 +110,10 @@ task: "region_replacement" detects if a region requires replacing and begins the process +task: "region_replacement_driver" + drive region replacements forward to completion + + task: "service_firewall_rule_propagation" propagates VPC firewall rules for Omicron services with external network connectivity @@ -234,6 +238,10 @@ task: "region_replacement" detects if a region requires replacing and begins the process +task: "region_replacement_driver" + drive region replacements forward to completion + + task: "service_firewall_rule_propagation" propagates VPC firewall rules for Omicron services with external network connectivity @@ -345,6 +353,10 @@ task: "region_replacement" detects if a region requires replacing and begins the process +task: "region_replacement_driver" + drive region replacements forward to completion + + task: "service_firewall_rule_propagation" propagates VPC firewall rules for Omicron services with external network connectivity diff --git a/dev-tools/omdb/tests/successes.out b/dev-tools/omdb/tests/successes.out index 9f16c6026c..ebebb657e5 100644 --- a/dev-tools/omdb/tests/successes.out +++ b/dev-tools/omdb/tests/successes.out @@ -311,6 +311,10 @@ task: "region_replacement" detects if a region requires replacing and begins the process +task: "region_replacement_driver" + drive region replacements forward to completion + + task: "service_firewall_rule_propagation" propagates VPC firewall rules for Omicron services with external network connectivity @@ -505,6 +509,15 @@ task: "region_replacement" number of region replacements started ok: 0 number of region replacement start errors: 0 +task: "region_replacement_driver" + configured period: every 30s + currently executing: no + last completed activation: , triggered by an explicit signal + started at (s ago) and ran for ms + number of region replacement drive sagas started ok: 0 + number of region replacement finish sagas started ok: 0 + number of errors: 0 + task: "service_firewall_rule_propagation" configured period: every 5m currently executing: no diff --git a/nexus-config/src/nexus_config.rs b/nexus-config/src/nexus_config.rs index 67acb5ec1b..a8c863298e 100644 --- a/nexus-config/src/nexus_config.rs +++ b/nexus-config/src/nexus_config.rs @@ -373,8 +373,10 @@ pub struct BackgroundTaskConfig { pub bfd_manager: BfdManagerConfig, /// configuration for the switch port settings manager task pub switch_port_settings_manager: SwitchPortSettingsManagerConfig, - /// configuration for region replacement task + /// configuration for region replacement starter task pub region_replacement: RegionReplacementConfig, + /// configuration for region replacement driver task + pub region_replacement_driver: RegionReplacementDriverConfig, /// configuration for instance watcher task pub instance_watcher: InstanceWatcherConfig, /// configuration for service VPC firewall propagation task @@ -564,6 +566,14 @@ pub struct AbandonedVmmReaperConfig { pub period_secs: Duration, } +#[serde_as] +#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)] +pub struct RegionReplacementDriverConfig { + /// period (in seconds) for periodic activations of this background task + #[serde_as(as = "DurationSeconds")] + pub period_secs: Duration, +} + /// Configuration for a nexus server #[derive(Clone, Debug, Deserialize, PartialEq, Serialize)] pub struct PackageConfig { @@ -801,6 +811,7 @@ mod test { sync_service_zone_nat.period_secs = 30 switch_port_settings_manager.period_secs = 30 region_replacement.period_secs = 30 + region_replacement_driver.period_secs = 30 instance_watcher.period_secs = 30 service_firewall_propagation.period_secs = 300 v2p_mapping_propagation.period_secs = 30 @@ -935,6 +946,10 @@ mod test { region_replacement: RegionReplacementConfig { period_secs: Duration::from_secs(30), }, + region_replacement_driver: + RegionReplacementDriverConfig { + period_secs: Duration::from_secs(30), + }, instance_watcher: InstanceWatcherConfig { period_secs: Duration::from_secs(30), }, @@ -1015,6 +1030,7 @@ mod test { sync_service_zone_nat.period_secs = 30 switch_port_settings_manager.period_secs = 30 region_replacement.period_secs = 30 + region_replacement_driver.period_secs = 30 instance_watcher.period_secs = 30 service_firewall_propagation.period_secs = 300 v2p_mapping_propagation.period_secs = 30 diff --git a/nexus/examples/config.toml b/nexus/examples/config.toml index 3162cf212b..407f5479d5 100644 --- a/nexus/examples/config.toml +++ b/nexus/examples/config.toml @@ -114,6 +114,7 @@ blueprints.period_secs_collect_crdb_node_ids = 180 sync_service_zone_nat.period_secs = 30 switch_port_settings_manager.period_secs = 30 region_replacement.period_secs = 30 +region_replacement_driver.period_secs = 10 # How frequently to query the status of active instances. instance_watcher.period_secs = 30 service_firewall_propagation.period_secs = 300 diff --git a/nexus/src/app/background/init.rs b/nexus/src/app/background/init.rs index f78cb69d76..1fc1e858b7 100644 --- a/nexus/src/app/background/init.rs +++ b/nexus/src/app/background/init.rs @@ -21,6 +21,7 @@ use super::nat_cleanup; use super::phantom_disks; use super::physical_disk_adoption; use super::region_replacement; +use super::region_replacement_driver; use super::service_firewall_rules; use super::sync_service_zone_nat::ServiceZoneNatTracker; use super::sync_switch_configuration::SwitchPortSettingsManager; @@ -103,6 +104,9 @@ pub struct BackgroundTasks { /// begins the process pub task_region_replacement: common::TaskHandle, + /// task handle for the task that drives region replacements forward + pub task_region_replacement_driver: common::TaskHandle, + /// task handle for the task that polls sled agents for instance states. pub task_instance_watcher: common::TaskHandle, @@ -395,6 +399,26 @@ impl BackgroundTasks { task }; + // Background task: drive region replacements forward to completion + let task_region_replacement_driver = { + let detector = + region_replacement_driver::RegionReplacementDriver::new( + datastore.clone(), + saga_request.clone(), + ); + + let task = driver.register( + String::from("region_replacement_driver"), + String::from("drive region replacements forward to completion"), + config.region_replacement_driver.period_secs, + Box::new(detector), + opctx.child(BTreeMap::new()), + vec![], + ); + + task + }; + let task_instance_watcher = { let watcher = instance_watcher::InstanceWatcher::new( datastore.clone(), @@ -412,6 +436,7 @@ impl BackgroundTasks { vec![], ) }; + // Background task: service firewall rule propagation let task_service_firewall_propagation = driver.register( String::from("service_firewall_rule_propagation"), @@ -429,17 +454,17 @@ impl BackgroundTasks { // Background task: abandoned VMM reaping let task_abandoned_vmm_reaper = driver.register( - String::from("abandoned_vmm_reaper"), - String::from( - "deletes sled reservations for VMMs that have been abandoned by their instances", - ), - config.abandoned_vmm_reaper.period_secs, - Box::new(abandoned_vmm_reaper::AbandonedVmmReaper::new( - datastore, - )), - opctx.child(BTreeMap::new()), - vec![], - ); + String::from("abandoned_vmm_reaper"), + String::from( + "deletes sled reservations for VMMs that have been abandoned by their instances", + ), + config.abandoned_vmm_reaper.period_secs, + Box::new(abandoned_vmm_reaper::AbandonedVmmReaper::new( + datastore, + )), + opctx.child(BTreeMap::new()), + vec![], + ); BackgroundTasks { driver, @@ -462,6 +487,7 @@ impl BackgroundTasks { task_switch_port_settings_manager, task_v2p_manager, task_region_replacement, + task_region_replacement_driver, task_instance_watcher, task_service_firewall_propagation, task_abandoned_vmm_reaper, diff --git a/nexus/src/app/background/mod.rs b/nexus/src/app/background/mod.rs index 7d1fc43d69..4d5a63e69f 100644 --- a/nexus/src/app/background/mod.rs +++ b/nexus/src/app/background/mod.rs @@ -23,6 +23,7 @@ mod networking; mod phantom_disks; mod physical_disk_adoption; mod region_replacement; +mod region_replacement_driver; mod service_firewall_rules; mod status; mod sync_service_zone_nat; diff --git a/nexus/src/app/background/region_replacement_driver.rs b/nexus/src/app/background/region_replacement_driver.rs new file mode 100644 index 0000000000..a02fa4e2e4 --- /dev/null +++ b/nexus/src/app/background/region_replacement_driver.rs @@ -0,0 +1,736 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Background task for driving region replacement progress +//! +//! Region replacements will have been requested by the +//! `region_replacement_start` saga, but that will not trigger the necessary +//! live repair or reconciliation required on its own: the Volume is left in a +//! degraded state (less than a three way mirror) until either of those complete +//! successfully. +//! +//! For each region replacement request that is in state `Running`, this +//! background task will call a saga that drives that forward: namely, get an +//! Upstairs working on either the repair or reconcilation. If an Upstairs *was* +//! running one of these and for some reason was stopped, start it again. +//! +//! Basically, keep starting either repair or reconcilation until they complete +//! successfully, then "finish" the region replacement. + +use super::common::BackgroundTask; +use crate::app::authn; +use crate::app::sagas; +use futures::future::BoxFuture; +use futures::FutureExt; +use nexus_db_queries::context::OpContext; +use nexus_db_queries::db::DataStore; +use nexus_types::internal_api::background::RegionReplacementDriverStatus; +use serde_json::json; +use std::sync::Arc; +use tokio::sync::mpsc::Sender; + +pub struct RegionReplacementDriver { + datastore: Arc, + saga_request: Sender, +} + +impl RegionReplacementDriver { + pub fn new( + datastore: Arc, + saga_request: Sender, + ) -> Self { + RegionReplacementDriver { datastore, saga_request } + } + + /// Drive running region replacements forward + pub async fn drive_running_replacements_forward( + &self, + opctx: &OpContext, + status: &mut RegionReplacementDriverStatus, + ) { + let log = &opctx.log; + + let running_replacements = + match self.datastore.get_running_region_replacements(opctx).await { + Ok(requests) => requests, + + Err(e) => { + let s = format!( + "query for running region replacement requests \ + failed: {e}" + ); + + error!(&log, "{s}"); + status.errors.push(s); + + return; + } + }; + + for request in running_replacements { + // If a successful finish notification was received, change the + // state here: don't drive requests forward where the replacement is + // done. + + let has_matching_finish_notification = match self + .datastore + .request_has_matching_successful_finish_notification( + opctx, &request, + ) + .await + { + Ok(has_matching_finish_notification) => { + has_matching_finish_notification + } + + Err(e) => { + let s = format!( + "checking for a finish notification for {} failed: {e}", + request.id + ); + + error!(&log, "{s}"); + status.errors.push(s); + + // Nexus may determine the request is `ReplacementDone` via + // the drive saga polling an Upstairs, so return false here + // to invoke that saga. + false + } + }; + + if has_matching_finish_notification { + if let Err(e) = self + .datastore + .mark_region_replacement_as_done(opctx, request.id) + .await + { + let s = format!( + "error marking {} as ReplacementDone: {e}", + request.id + ); + + error!(&log, "{s}"); + status.errors.push(s); + } + } else { + // Otherwise attempt to drive the replacement's progress forward + // (or determine if it is complete). + + let request_id = request.id; + + let result = self + .saga_request + .send(sagas::SagaRequest::RegionReplacementDrive { + params: sagas::region_replacement_drive::Params { + serialized_authn: + authn::saga::Serialized::for_opctx(opctx), + request, + }, + }) + .await; + + match result { + Ok(()) => { + let s = format!("{request_id}: drive invoked ok"); + + info!(&log, "{s}"); + status.drive_invoked_ok.push(s); + } + + Err(e) => { + let s = format!( + "sending region replacement drive request for \ + {request_id} failed: {e}", + ); + + error!(&log, "{s}"); + status.errors.push(s); + } + }; + } + } + } + + /// Complete region replacements that are done + pub async fn complete_done_replacements( + &self, + opctx: &OpContext, + status: &mut RegionReplacementDriverStatus, + ) { + let log = &opctx.log; + + let done_replacements = + match self.datastore.get_done_region_replacements(opctx).await { + Ok(requests) => requests, + + Err(e) => { + let s = format!( + "query for done region replacement requests failed: {e}" + ); + + error!(&log, "{s}"); + status.errors.push(s); + + return; + } + }; + + for request in done_replacements { + let Some(old_region_volume_id) = request.old_region_volume_id + else { + // This state is illegal! + let s = format!( + "request {} old region volume id is None!", + request.id, + ); + + error!(&log, "{s}"); + status.errors.push(s); + + continue; + }; + + let request_id = request.id; + + let result = + self.saga_request + .send(sagas::SagaRequest::RegionReplacementFinish { + params: sagas::region_replacement_finish::Params { + serialized_authn: + authn::saga::Serialized::for_opctx(opctx), + region_volume_id: old_region_volume_id, + request, + }, + }) + .await; + + match result { + Ok(()) => { + let s = format!("{request_id}: finish invoked ok"); + + info!(&log, "{s}"); + status.finish_invoked_ok.push(s); + } + + Err(e) => { + let s = format!( + "sending region replacement finish request for \ + {request_id} failed: {e}" + ); + + error!(&log, "{s}"); + status.errors.push(s); + } + }; + } + } +} + +impl BackgroundTask for RegionReplacementDriver { + fn activate<'a>( + &'a mut self, + opctx: &'a OpContext, + ) -> BoxFuture<'a, serde_json::Value> { + async { + let log = &opctx.log; + info!(&log, "region replacement driver task started"); + + let mut status = RegionReplacementDriverStatus::default(); + + self.drive_running_replacements_forward(opctx, &mut status).await; + self.complete_done_replacements(opctx, &mut status).await; + + info!(&log, "region replacement driver task done"); + + json!(status) + } + .boxed() + } +} + +#[cfg(test)] +mod test { + use super::*; + use async_bb8_diesel::AsyncRunQueryDsl; + use chrono::Utc; + use nexus_db_model::Region; + use nexus_db_model::RegionReplacement; + use nexus_db_model::RegionReplacementState; + use nexus_db_model::UpstairsRepairNotification; + use nexus_db_model::UpstairsRepairNotificationType; + use nexus_db_model::UpstairsRepairType; + use nexus_test_utils_macros::nexus_test; + use omicron_uuid_kinds::DownstairsRegionKind; + use omicron_uuid_kinds::GenericUuid; + use omicron_uuid_kinds::TypedUuid; + use omicron_uuid_kinds::UpstairsKind; + use omicron_uuid_kinds::UpstairsRepairKind; + use omicron_uuid_kinds::UpstairsSessionKind; + use tokio::sync::mpsc; + use uuid::Uuid; + + type ControlPlaneTestContext = + nexus_test_utils::ControlPlaneTestContext; + + #[nexus_test(server = crate::Server)] + async fn test_running_region_replacement_causes_drive( + cptestctx: &ControlPlaneTestContext, + ) { + let nexus = &cptestctx.server.server_context().nexus; + let datastore = nexus.datastore(); + let opctx = OpContext::for_tests( + cptestctx.logctx.log.clone(), + datastore.clone(), + ); + + let (saga_request_tx, mut saga_request_rx) = mpsc::channel(1); + let mut task = + RegionReplacementDriver::new(datastore.clone(), saga_request_tx); + + // Noop test + let result = task.activate(&opctx).await; + assert_eq!(result, json!(RegionReplacementDriverStatus::default())); + + // Add a region replacement request for a fake region, and change it to + // state Running. + let region_id = Uuid::new_v4(); + let new_region_id = Uuid::new_v4(); + let volume_id = Uuid::new_v4(); + + let request = { + let mut request = RegionReplacement::new(region_id, volume_id); + request.replacement_state = RegionReplacementState::Running; + request.new_region_id = Some(new_region_id); + request + }; + + let request_id = request.id; + + datastore + .insert_region_replacement_request(&opctx, request) + .await + .unwrap(); + + // Activate the task - it should pick that up and try to run the region + // replacement drive saga + let result: RegionReplacementDriverStatus = + serde_json::from_value(task.activate(&opctx).await).unwrap(); + + assert_eq!( + result.drive_invoked_ok, + vec![format!("{request_id}: drive invoked ok")] + ); + assert!(result.finish_invoked_ok.is_empty()); + assert!(result.errors.is_empty()); + + let request = saga_request_rx.try_recv().unwrap(); + + assert!(matches!( + request, + sagas::SagaRequest::RegionReplacementDrive { .. } + )); + } + + #[nexus_test(server = crate::Server)] + async fn test_done_region_replacement_causes_finish( + cptestctx: &ControlPlaneTestContext, + ) { + let nexus = &cptestctx.server.server_context().nexus; + let datastore = nexus.datastore(); + let opctx = OpContext::for_tests( + cptestctx.logctx.log.clone(), + datastore.clone(), + ); + + let (saga_request_tx, mut saga_request_rx) = mpsc::channel(1); + let mut task = + RegionReplacementDriver::new(datastore.clone(), saga_request_tx); + + // Noop test + let result = task.activate(&opctx).await; + assert_eq!(result, json!(RegionReplacementDriverStatus::default())); + + // Insert some region records + let old_region = { + let dataset_id = Uuid::new_v4(); + let volume_id = Uuid::new_v4(); + Region::new( + dataset_id, + volume_id, + 512_i64.try_into().unwrap(), + 10, + 10, + ) + }; + + let new_region = { + let dataset_id = Uuid::new_v4(); + let volume_id = Uuid::new_v4(); + Region::new( + dataset_id, + volume_id, + 512_i64.try_into().unwrap(), + 10, + 10, + ) + }; + + { + let conn = datastore.pool_connection_for_tests().await.unwrap(); + + use nexus_db_model::schema::region::dsl; + diesel::insert_into(dsl::region) + .values(old_region.clone()) + .execute_async(&*conn) + .await + .unwrap(); + + diesel::insert_into(dsl::region) + .values(new_region.clone()) + .execute_async(&*conn) + .await + .unwrap(); + } + + // Add a region replacement request for that region, and change it to + // state ReplacementDone. Set the new_region_id to the region created + // above. + let request = { + let mut request = + RegionReplacement::new(old_region.id(), old_region.volume_id()); + request.replacement_state = RegionReplacementState::ReplacementDone; + request.new_region_id = Some(new_region.id()); + request.old_region_volume_id = Some(Uuid::new_v4()); + request + }; + + let request_id = request.id; + + datastore + .insert_region_replacement_request(&opctx, request) + .await + .unwrap(); + + // Activate the task - it should pick that up and try to run the region + // replacement finish saga + let result: RegionReplacementDriverStatus = + serde_json::from_value(task.activate(&opctx).await).unwrap(); + + assert!(result.drive_invoked_ok.is_empty()); + assert_eq!( + result.finish_invoked_ok, + vec![format!("{request_id}: finish invoked ok")] + ); + assert!(result.errors.is_empty()); + + let request = saga_request_rx.try_recv().unwrap(); + + assert!(matches!( + request, + sagas::SagaRequest::RegionReplacementFinish { .. } + )); + } + + #[nexus_test(server = crate::Server)] + async fn test_mark_region_replacement_done_after_notification( + cptestctx: &ControlPlaneTestContext, + ) { + let nexus = &cptestctx.server.server_context().nexus; + let datastore = nexus.datastore(); + let opctx = OpContext::for_tests( + cptestctx.logctx.log.clone(), + datastore.clone(), + ); + + let (saga_request_tx, mut saga_request_rx) = mpsc::channel(1); + let mut task = + RegionReplacementDriver::new(datastore.clone(), saga_request_tx); + + // Noop test + let result = task.activate(&opctx).await; + assert_eq!(result, json!(RegionReplacementDriverStatus::default())); + + // Insert some region records + let old_region = { + let dataset_id = Uuid::new_v4(); + let volume_id = Uuid::new_v4(); + Region::new( + dataset_id, + volume_id, + 512_i64.try_into().unwrap(), + 10, + 10, + ) + }; + + let new_region = { + let dataset_id = Uuid::new_v4(); + let volume_id = Uuid::new_v4(); + Region::new( + dataset_id, + volume_id, + 512_i64.try_into().unwrap(), + 10, + 10, + ) + }; + + { + let conn = datastore.pool_connection_for_tests().await.unwrap(); + + use nexus_db_model::schema::region::dsl; + diesel::insert_into(dsl::region) + .values(old_region.clone()) + .execute_async(&*conn) + .await + .unwrap(); + + diesel::insert_into(dsl::region) + .values(new_region.clone()) + .execute_async(&*conn) + .await + .unwrap(); + } + + // Add a region replacement request for that region, and change it to + // state Running. Set the new_region_id to the region created above. + let request = { + let mut request = + RegionReplacement::new(old_region.id(), old_region.volume_id()); + request.replacement_state = RegionReplacementState::Running; + request.new_region_id = Some(new_region.id()); + request.old_region_volume_id = Some(Uuid::new_v4()); + request + }; + + let request_id = request.id; + + datastore + .insert_region_replacement_request(&opctx, request.clone()) + .await + .unwrap(); + + // Activate the task - it should pick that up and try to run the region + // replacement drive saga + let result: RegionReplacementDriverStatus = + serde_json::from_value(task.activate(&opctx).await).unwrap(); + + assert_eq!( + result.drive_invoked_ok, + vec![format!("{request_id}: drive invoked ok")] + ); + assert!(result.finish_invoked_ok.is_empty()); + assert!(result.errors.is_empty()); + + let saga_request = saga_request_rx.try_recv().unwrap(); + + assert!(matches!( + saga_request, + sagas::SagaRequest::RegionReplacementDrive { .. } + )); + + // Now, pretend that an Upstairs sent a notification that it + // successfully finished a repair + + { + datastore + .upstairs_repair_notification( + &opctx, + UpstairsRepairNotification::new( + Utc::now(), // client time + TypedUuid::::from_untyped_uuid( + Uuid::new_v4(), + ), + UpstairsRepairType::Live, + TypedUuid::::from_untyped_uuid( + Uuid::new_v4(), + ), + TypedUuid::::from_untyped_uuid( + Uuid::new_v4(), + ), + TypedUuid::::from_untyped_uuid( + new_region.id(), + ), // downstairs that was repaired + "[fd00:1122:3344:101::2]:12345".parse().unwrap(), + UpstairsRepairNotificationType::Succeeded, + ), + ) + .await + .unwrap(); + } + + // Activating the task now should + // 1) switch the state to ReplacementDone + // 2) start the finish saga + let result: RegionReplacementDriverStatus = + serde_json::from_value(task.activate(&opctx).await).unwrap(); + + assert_eq!(result.finish_invoked_ok.len(), 1); + + { + let request_in_db = datastore + .get_region_replacement_request_by_id(&opctx, request.id) + .await + .unwrap(); + assert_eq!( + request_in_db.replacement_state, + RegionReplacementState::ReplacementDone + ); + } + + let saga_request = saga_request_rx.try_recv().unwrap(); + + assert!(matches!( + saga_request, + sagas::SagaRequest::RegionReplacementFinish { .. } + )); + } + + #[nexus_test(server = crate::Server)] + async fn test_no_mark_region_replacement_done_after_failed_notification( + cptestctx: &ControlPlaneTestContext, + ) { + let nexus = &cptestctx.server.server_context().nexus; + let datastore = nexus.datastore(); + let opctx = OpContext::for_tests( + cptestctx.logctx.log.clone(), + datastore.clone(), + ); + + let (saga_request_tx, mut saga_request_rx) = mpsc::channel(1); + let mut task = + RegionReplacementDriver::new(datastore.clone(), saga_request_tx); + + // Noop test + let result = task.activate(&opctx).await; + assert_eq!(result, json!(RegionReplacementDriverStatus::default())); + + // Insert some region records + let old_region = { + let dataset_id = Uuid::new_v4(); + let volume_id = Uuid::new_v4(); + Region::new( + dataset_id, + volume_id, + 512_i64.try_into().unwrap(), + 10, + 10, + ) + }; + + let new_region = { + let dataset_id = Uuid::new_v4(); + let volume_id = Uuid::new_v4(); + Region::new( + dataset_id, + volume_id, + 512_i64.try_into().unwrap(), + 10, + 10, + ) + }; + + { + let conn = datastore.pool_connection_for_tests().await.unwrap(); + + use nexus_db_model::schema::region::dsl; + diesel::insert_into(dsl::region) + .values(old_region.clone()) + .execute_async(&*conn) + .await + .unwrap(); + + diesel::insert_into(dsl::region) + .values(new_region.clone()) + .execute_async(&*conn) + .await + .unwrap(); + } + + // Add a region replacement request for that region, and change it to + // state Running. Set the new_region_id to the region created above. + let request = { + let mut request = + RegionReplacement::new(old_region.id(), old_region.volume_id()); + request.replacement_state = RegionReplacementState::Running; + request.new_region_id = Some(new_region.id()); + request + }; + + let request_id = request.id; + + datastore + .insert_region_replacement_request(&opctx, request.clone()) + .await + .unwrap(); + + // Activate the task - it should pick that up and try to run the region + // replacement drive saga + let result: RegionReplacementDriverStatus = + serde_json::from_value(task.activate(&opctx).await).unwrap(); + + assert_eq!( + result.drive_invoked_ok, + vec![format!("{request_id}: drive invoked ok")] + ); + assert!(result.finish_invoked_ok.is_empty()); + assert!(result.errors.is_empty()); + + let saga_request = saga_request_rx.try_recv().unwrap(); + + assert!(matches!( + saga_request, + sagas::SagaRequest::RegionReplacementDrive { .. } + )); + + // Now, pretend that an Upstairs sent a notification that it failed to + // finish a repair + + { + datastore + .upstairs_repair_notification( + &opctx, + UpstairsRepairNotification::new( + Utc::now(), // client time + TypedUuid::::from_untyped_uuid( + Uuid::new_v4(), + ), + UpstairsRepairType::Live, + TypedUuid::::from_untyped_uuid( + Uuid::new_v4(), + ), + TypedUuid::::from_untyped_uuid( + Uuid::new_v4(), + ), + TypedUuid::::from_untyped_uuid( + new_region.id(), + ), // downstairs that was repaired + "[fd00:1122:3344:101::2]:12345".parse().unwrap(), + UpstairsRepairNotificationType::Failed, + ), + ) + .await + .unwrap(); + } + + // Activating the task now should start the drive saga + let result: RegionReplacementDriverStatus = + serde_json::from_value(task.activate(&opctx).await).unwrap(); + + assert_eq!( + result.drive_invoked_ok, + vec![format!("{request_id}: drive invoked ok")] + ); + assert!(result.finish_invoked_ok.is_empty()); + assert!(result.errors.is_empty()); + + let saga_request = saga_request_rx.try_recv().unwrap(); + + assert!(matches!( + saga_request, + sagas::SagaRequest::RegionReplacementDrive { .. } + )); + } +} diff --git a/nexus/src/app/instance.rs b/nexus/src/app/instance.rs index 517fbf218a..ee70b8bb06 100644 --- a/nexus/src/app/instance.rs +++ b/nexus/src/app/instance.rs @@ -20,6 +20,7 @@ use futures::future::Fuse; use futures::{FutureExt, SinkExt, StreamExt}; use nexus_db_model::IpAttachState; use nexus_db_model::IpKind; +use nexus_db_model::Vmm; use nexus_db_model::VmmState as DbVmmState; use nexus_db_queries::authn; use nexus_db_queries::authz; @@ -1561,7 +1562,7 @@ impl super::Nexus { instance_lookup: &lookup::Instance<'_>, params: ¶ms::InstanceSerialConsoleRequest, ) -> Result { - let client = self + let (_, client) = self .propolis_client_for_instance( opctx, instance_lookup, @@ -1602,7 +1603,7 @@ impl super::Nexus { instance_lookup: &lookup::Instance<'_>, params: ¶ms::InstanceSerialConsoleStreamRequest, ) -> Result<(), Error> { - let client_addr = match self + let (_, client_addr) = match self .propolis_addr_for_instance( opctx, instance_lookup, @@ -1657,12 +1658,14 @@ impl super::Nexus { } } + /// Return a propolis address for the instance, along with the VMM identity + /// that it's for. async fn propolis_addr_for_instance( &self, opctx: &OpContext, instance_lookup: &lookup::Instance<'_>, action: authz::Action, - ) -> Result { + ) -> Result<(Vmm, SocketAddr), Error> { let (.., authz_instance) = instance_lookup.lookup_for(action).await?; let state = self @@ -1676,8 +1679,9 @@ impl super::Nexus { DbVmmState::Running | DbVmmState::Rebooting | DbVmmState::Migrating => { - Ok(SocketAddr::new(vmm.propolis_ip.ip(), vmm.propolis_port.into())) + Ok((vmm.clone(), SocketAddr::new(vmm.propolis_ip.ip(), vmm.propolis_port.into()))) } + DbVmmState::Starting | DbVmmState::Stopping | DbVmmState::Stopped @@ -1687,6 +1691,7 @@ impl super::Nexus { vmm.runtime.state, ))) } + DbVmmState::Destroyed | DbVmmState::SagaUnwound => Err(Error::invalid_request( "cannot connect to serial console of instance in state \"Stopped\"", )), @@ -1700,16 +1705,21 @@ impl super::Nexus { } } - async fn propolis_client_for_instance( + /// Return a propolis client for the instance, along with the VMM identity + /// that it's for. + pub(crate) async fn propolis_client_for_instance( &self, opctx: &OpContext, instance_lookup: &lookup::Instance<'_>, action: authz::Action, - ) -> Result { - let client_addr = self + ) -> Result<(Vmm, propolis_client::Client), Error> { + let (vmm, client_addr) = self .propolis_addr_for_instance(opctx, instance_lookup, action) .await?; - Ok(propolis_client::Client::new(&format!("http://{}", client_addr))) + Ok(( + vmm, + propolis_client::Client::new(&format!("http://{}", client_addr)), + )) } async fn proxy_instance_serial_ws( diff --git a/nexus/src/app/mod.rs b/nexus/src/app/mod.rs index 8e4a795a95..da06ccaa32 100644 --- a/nexus/src/app/mod.rs +++ b/nexus/src/app/mod.rs @@ -943,7 +943,7 @@ impl Nexus { Ok(_) => { info!( nexus.log, - "region replacement drive saga completed ok" + "region replacement start saga completed ok" ); } @@ -953,6 +953,54 @@ impl Nexus { } }); } + + SagaRequest::RegionReplacementDrive { params } => { + let nexus = self.clone(); + tokio::spawn(async move { + let saga_result = nexus + .execute_saga::( + params, + ) + .await; + + match saga_result { + Ok(_) => { + info!( + nexus.log, + "region replacement drive saga completed ok" + ); + } + + Err(e) => { + warn!(nexus.log, "region replacement drive saga returned an error: {e}"); + } + } + }); + } + + SagaRequest::RegionReplacementFinish { params } => { + let nexus = self.clone(); + tokio::spawn(async move { + let saga_result = nexus + .execute_saga::( + params, + ) + .await; + + match saga_result { + Ok(_) => { + info!( + nexus.log, + "region replacement finish saga completed ok" + ); + } + + Err(e) => { + warn!(nexus.log, "region replacement finish saga returned an error: {e}"); + } + } + }); + } } } diff --git a/nexus/src/app/sagas/mod.rs b/nexus/src/app/sagas/mod.rs index ac9a30dd98..1604d6013d 100644 --- a/nexus/src/app/sagas/mod.rs +++ b/nexus/src/app/sagas/mod.rs @@ -32,6 +32,8 @@ pub mod instance_ip_detach; pub mod instance_migrate; pub mod instance_start; pub mod project_create; +pub mod region_replacement_drive; +pub mod region_replacement_finish; pub mod region_replacement_start; pub mod snapshot_create; pub mod snapshot_delete; @@ -163,6 +165,12 @@ fn make_action_registry() -> ActionRegistry { ::register_actions( &mut registry, ); + ::register_actions( + &mut registry, + ); + ::register_actions( + &mut registry, + ); #[cfg(test)] ::register_actions(&mut registry); @@ -320,6 +328,14 @@ pub enum SagaRequest { RegionReplacementStart { params: region_replacement_start::Params, }, + + RegionReplacementDrive { + params: region_replacement_drive::Params, + }, + + RegionReplacementFinish { + params: region_replacement_finish::Params, + }, } impl SagaRequest { diff --git a/nexus/src/app/sagas/region_replacement_drive.rs b/nexus/src/app/sagas/region_replacement_drive.rs new file mode 100644 index 0000000000..e2f7620178 --- /dev/null +++ b/nexus/src/app/sagas/region_replacement_drive.rs @@ -0,0 +1,1754 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! # first, some Crucible background # +//! +//! Crucible's Upstairs has two methods of swapping in a new downstairs to a +//! region set: +//! +//! - A running Upstairs that is currently activated can be sent a request to +//! replace a downstairs with a new one - this can be done while accepting all +//! the usual IO requests. This is called _Live Repair_. +//! +//! - Prior to activation, an Upstairs will perform _Reconciliation_ to ensure +//! that all the downstairs are consistent. Activation is held back until this +//! is true. +//! +//! Each of these operations will ensure that each member of the three-way +//! mirror that is a region set is the same. +//! +//! Usually, each running Volume will have been constructed from a Volume +//! Construction Request (VCR) that Nexus created as part of a +//! `volume_checkout`. This VCR is sent to a service (for example, a Propolis +//! or Pantry) and ultimately passed to `Volume::construct` to create a running +//! Volume. This is then activated, and then IO can proceed. +//! +//! # how did we get here? # +//! +//! The process of region replacement begins with a region replacement request. +//! Today this is created either manually with omdb, or as a result of a +//! physical disk being expunged. Affected VCRs are modified first by the region +//! replacement start saga, which includes allocating a new replacement region. +//! This then places the region replacement request into the state "Running". +//! See that saga's documentation for more information. +//! +//! # why does the drive saga exist? # +//! +//! Region replacement is similar to instance migration in that it is initiated +//! by Nexus but not directly controlled by it. Instance migration requires a +//! source and destination Propolis to exist, and then Nexus waits for a +//! callback to occur. For region replacement, it's Nexus' job to trigger +//! either the Live Repair or Reconciliation operations via some Upstairs. Nexus +//! then either receives a notification of success, or sees that the Volume is +//! no longer in a degraded state as the result of some polling operation. +//! +//! Note: _it's very important that only_ the Upstairs can make the +//! determination that a Volume is no longer degraded. Nexus should not be +//! assuming anything. This is the _golden rule_ that this saga must follow. +//! +//! Volumes are in this degraded state the moment one or more targets in a +//! region set is no longer functional. An Upstairs can still accept reads, +//! writes, and flushes with only two out of three present in the set, but it's +//! operating with a reduced redundancy. +//! +//! Through disk expungement, an operator has told Nexus that failure is not +//! transient. The region replacement start saga then modifies them: a blank +//! region is swapped in to replace one of the regions that are gone. Then this +//! saga triggers either Live Repair or Reconciliation, and that's it right? +//! +//! Volumes back higher level objects that users interact with: disks, +//! snapshots, images, etc. Users can start and stop Upstairs by starting and +//! stopping Instances. This interrupts any current operation on the Volume! +//! This is ok: both operations were designed so that interruptions are not a +//! problem, but it does stop progress. +//! +//! Say an Instance is running, and that Instance's propolis is performing a +//! Live Repair. If a user stops that Instance, the propolis is torn down, and +//! the Volume remains degraded. The next time that Volume is constructed and +//! activated, the Upstairs will check each downstairs in the region set, see +//! that there's a difference, and perform Reconciliation. If the user stops an +//! Instance and does not start it again, that difference will remain. +//! +//! Nexus can at that point send the Volume to a Pantry and activate it, causing +//! Reconciliation. At any time, the user can come along and start the Instance +//! in question, which would take over the activation from the Pantry - this +//! would cause that Reconciliation to fail, and the new propolis server would +//! start its own Reconciliation. Again, the user may then stop the Instance, +//! halting progress. +//! +//! This saga is responsible for driving forward the Volume repair process, by +//! initiating repair operations. One invocation of this saga is most likely not +//! enough to repair a Volume: Nexus must continuously monitor the degraded +//! Volumes and initiate the necessary operation (LR or Reconciliation) until +//! those Volumes are no longer degraded. Those operations can fail or be +//! interrupted at any time due to user actions. +//! +//! # what does the saga do? # +//! +//! A background task will look at all region replacement requests in the +//! "Running" state, and call this saga for each one. This saga then does what's +//! required to fix these degraded Volumes. +//! +//! This saga handles the following region replacement request state +//! transitions: +//! +//! ```text +//! Running <-- +//! | +//! | | +//! v | +//! | +//! Driving -- +//! +//! | +//! v +//! +//! ReplacementDone +//! ``` +//! +//! The first thing this saga does is set itself as the "operating saga" for the +//! request, and change the state to "Driving". Then, it performs the following +//! (generic) steps: +//! +//! 1. If there was a previous repair step, check what the status of the +//! Volume's repair is. Determine if there is action required by Nexus, if +//! Nexus should wait, or if Nexus saw that some response that indicated the +//! repair was done (don't forget the golden rule!). +//! +//! If there was no previous repair step, then some action is required. +//! +//! 2. If there is action required, prepare an action that will initiate either +//! Live Repair or Reconciliation, based on the current state of the world +//! (noting that it's entirely possible that state will change before +//! executing that action, and invalidate the action!). +//! +//! 3. If there is one, execute the action. +//! +//! 4. If an action was executed without error, then commit it to CRDB as a +//! repair step. +//! +//! Recording the steps that were taken as part of repairing this Volume helps +//! this saga determine what to do, and can be helpful for Oxide support staff +//! if there's a problem. +//! +//! TODO: Cases not handled yet: +//! - a disk attached to a pantry for bulk imports +//! + +use super::{ + ActionRegistry, NexusActionContext, NexusSaga, SagaInitError, + ACTION_GENERATE_ID, +}; +use crate::app::db::datastore::InstanceAndActiveVmm; +use crate::app::db::lookup::LookupPath; +use crate::app::sagas::common_storage::get_pantry_address; +use crate::app::sagas::declare_saga_actions; +use crate::app::{authn, authz, db}; +use chrono::DateTime; +use chrono::Utc; +use nexus_db_model::VmmState; +use nexus_types::identity::Resource; +use omicron_common::api::external::Error; +use propolis_client::types::ReplaceResult; +use serde::Deserialize; +use serde::Serialize; +use slog::Logger; +use std::net::SocketAddrV6; +use steno::ActionError; +use steno::Node; +use uuid::Uuid; + +// region replacement drive saga: input parameters + +#[derive(Debug, Deserialize, Serialize)] +pub(crate) struct Params { + pub serialized_authn: authn::saga::Serialized, + pub request: db::model::RegionReplacement, +} + +// region replacement drive saga: actions + +declare_saga_actions! { + region_replacement_drive; + SET_SAGA_ID -> "unused_1" { + + srrd_set_saga_id + - srrd_set_saga_id_undo + } + DRIVE_REGION_REPLACEMENT_CHECK -> "check" { + + srrd_drive_region_replacement_check + } + DRIVE_REGION_REPLACEMENT_PREPARE -> "prepare" { + + srrd_drive_region_replacement_prepare + } + DRIVE_REGION_REPLACEMENT_EXECUTE -> "execute" { + + srrd_drive_region_replacement_execute + } + DRIVE_REGION_REPLACEMENT_COMMIT -> "commit" { + + srrd_drive_region_replacement_commit + - srrd_drive_region_replacement_commit_undo + } + FINISH_SAGA -> "unused_2" { + + srrd_finish_saga + } +} + +// region replacement drive saga: definition + +#[derive(Debug)] +pub(crate) struct SagaRegionReplacementDrive; +impl NexusSaga for SagaRegionReplacementDrive { + const NAME: &'static str = "region-replacement-drive"; + type Params = Params; + + fn register_actions(registry: &mut ActionRegistry) { + region_replacement_drive_register_actions(registry); + } + + fn make_saga_dag( + _params: &Self::Params, + mut builder: steno::DagBuilder, + ) -> Result { + builder.append(Node::action( + "saga_id", + "GenerateSagaId", + ACTION_GENERATE_ID.as_ref(), + )); + + builder.append(Node::action( + "job_id", + "GenerateJobId", + ACTION_GENERATE_ID.as_ref(), + )); + + builder.append(set_saga_id_action()); + + builder.append(drive_region_replacement_check_action()); + builder.append(drive_region_replacement_prepare_action()); + builder.append(drive_region_replacement_execute_action()); + builder.append(drive_region_replacement_commit_action()); + + builder.append(finish_saga_action()); + + Ok(builder.build()?) + } +} + +// region replacement drive saga: action implementations + +async fn srrd_set_saga_id( + sagactx: NexusActionContext, +) -> Result<(), ActionError> { + let osagactx = sagactx.user_data(); + let params = sagactx.saga_params::()?; + + let opctx = crate::context::op_context_for_saga_action( + &sagactx, + ¶ms.serialized_authn, + ); + + let saga_id = sagactx.lookup::("saga_id")?; + + // Change the request record here to an intermediate "driving" state to + // block out other sagas that will be triggered for the same request. + osagactx + .datastore() + .set_region_replacement_driving(&opctx, params.request.id, saga_id) + .await + .map_err(ActionError::action_failed)?; + + Ok(()) +} + +async fn srrd_set_saga_id_undo( + sagactx: NexusActionContext, +) -> Result<(), anyhow::Error> { + let osagactx = sagactx.user_data(); + let params = sagactx.saga_params::()?; + let opctx = crate::context::op_context_for_saga_action( + &sagactx, + ¶ms.serialized_authn, + ); + + let saga_id = sagactx.lookup::("saga_id")?; + + osagactx + .datastore() + .undo_set_region_replacement_driving(&opctx, params.request.id, saga_id) + .await?; + + Ok(()) +} + +/// What is the status of the repair? +#[derive(Debug, Serialize, Deserialize)] +enum DriveCheck { + /// The last step is still running, so don't do anything + LastStepStillRunning, + + /// The last step is not still running, but all we can do is wait. + Wait, + + /// We got some status that indicates that the region has been replaced! + Done, + + /// Some action is required. Either the last step is no longer running, or + /// the repair needs to be unstuck. + ActionRequired, +} + +async fn srrd_drive_region_replacement_check( + sagactx: NexusActionContext, +) -> Result { + let log = sagactx.user_data().log(); + let osagactx = sagactx.user_data(); + let params = sagactx.saga_params::()?; + + let opctx = crate::context::op_context_for_saga_action( + &sagactx, + ¶ms.serialized_authn, + ); + + let last_request_step = osagactx + .datastore() + .current_region_replacement_request_step(&opctx, params.request.id) + .await + .map_err(ActionError::action_failed)?; + + let Some(last_request_step) = last_request_step else { + // This is the first time this saga was invoked for this particular + // replacement request, so some action is required + info!( + log, + "no steps taken yet"; + "region replacement id" => %params.request.id, + ); + + return Ok(DriveCheck::ActionRequired); + }; + + // If the last request step is still "running", then check on it, and + // determine if any action is required. + + match last_request_step.step_type { + db::model::RegionReplacementStepType::Propolis => { + let Some((step_instance_id, step_vmm_id)) = + last_request_step.instance_and_vmm_ids() + else { + // This record is invalid, but we can still attempt to drive the + // repair forward. + error!( + log, + "step at {} has no associated ids", last_request_step.step_time; + "region replacement id" => ?params.request.id, + "last replacement drive time" => ?last_request_step.step_time, + "last replacement drive step" => "propolis", + ); + + return Ok(DriveCheck::ActionRequired); + }; + + let (.., authz_instance) = + LookupPath::new(&opctx, &osagactx.datastore()) + .instance_id(step_instance_id) + .lookup_for(authz::Action::Read) + .await + .map_err(ActionError::action_failed)?; + + let instance_and_vmm = osagactx + .datastore() + .instance_fetch_with_vmm(&opctx, &authz_instance) + .await + .map_err(ActionError::action_failed)?; + + check_from_previous_propolis_step( + log, + params.request.id, + last_request_step.step_time, + step_instance_id, + step_vmm_id, + instance_and_vmm, + ) + .await + } + + db::model::RegionReplacementStepType::Pantry => { + // Check if the Pantry is still trying to activate the Volume + + let Some(pantry_address) = last_request_step.pantry_address() + else { + // This record is invalid, but we can still attempt to drive the + // repair forward. + + error!( + log, + "step has no associated pantry address"; + "region replacement id" => %params.request.id, + "last replacement drive time" => ?last_request_step.step_time, + "last replacement drive step" => "pantry", + ); + + return Ok(DriveCheck::ActionRequired); + }; + + let Some(job_id) = last_request_step.step_associated_pantry_job_id + else { + // This record is invalid, but we can still attempt to drive the + // repair forward. + + error!( + log, + "step has no associated pantry job id"; + "region replacement id" => %params.request.id, + "last replacement drive time" => ?last_request_step.step_time, + "last replacement drive step" => "pantry", + "pantry address" => ?pantry_address, + ); + + return Ok(DriveCheck::ActionRequired); + }; + + let Some(new_region_id) = params.request.new_region_id else { + return Err(ActionError::action_failed(format!( + "region replacement request {} has new_region_id = None", + params.request.id, + ))); + }; + + let new_region: db::model::Region = osagactx + .datastore() + .get_region(new_region_id) + .await + .map_err(ActionError::action_failed)?; + + let volume_id = new_region.volume_id().to_string(); + + check_from_previous_pantry_step( + log, + params.request.id, + last_request_step.step_time, + pantry_address, + job_id, + &volume_id.to_string(), + ) + .await + } + } +} + +/// Generate a DriveCheck if the previous step was a Propolis step +async fn check_from_previous_propolis_step( + log: &Logger, + request_id: Uuid, + step_time: DateTime, + step_instance_id: Uuid, + step_vmm_id: Uuid, + instance_and_vmm: InstanceAndActiveVmm, +) -> Result { + // When this saga recorded a Propolis replacement step, an instance existed + // and had a running vmm. Is this true now? + + let Some(current_vmm) = instance_and_vmm.vmm() else { + // There is no current VMM, but if the current repair step was + // `Propolis` then there was previously one. Some action is required: + // namely, attach disk to the pantry and let it perform reconcilation. + + info!( + log, + "instance from last step no longer has vmm"; + "region replacement id" => ?request_id, + "last replacement drive time" => ?step_time, + "last replacement drive step" => "propolis", + "instance id" => ?step_instance_id, + ); + + return Ok(DriveCheck::ActionRequired); + }; + + // `migration_id` is set at the beginning of an instance migration (before + // anything has happened), and is cleared at the end (after the migration is + // finished but before the migration target activates disk Volumes). For + // now, return `DriveCheck::Wait`, and pick up driving the region + // replacement forward after the migration has completed. + // + // If this saga does not wait, it will interleave with the instance + // migration saga. Depending on Nexus' view of what stage the migration is + // in, volume replacement requests could be sent to the source propolis or + // destination propolis. This is because any call to + // `instance_fetch_with_vmm` will always return a VMM that is either a + // migration source or not migrating. If this saga calls + // `instance_fetch_with_vmm` multiple times during a migration, it will + // return the source propolis until the migration is done, where then it + // will return the destination propolis. + // + // Processing a replacement request does _not_ cause an activation, so + // sending a replacement request to the source propolis will not cause the + // destination to be unable to activate (even though the destination _could_ + // be using a VCR with a lower generation number than what the replacement + // request has!). It will probably cause live repair to start on the source, + // which is alright because it can be cancelled at any time (and will be + // when the destination propolis activates the Volume). + // + // Until crucible#871 is addressed, sending the replacement request to the + // destination propolis could cause a panic if activation hasn't occurred + // yet. Even if this saga does wait, this same potential exists because the + // migration is considered complete before propolis activates disk Volumes. + // + // If the destination propolis' Volume activated, the Upstairs will return a + // `ReplacementResult`: either `VcrMatches` (if the destination is using the + // updated VCR) or `Started` (if the destination is using the pre-update VCR + // and the replacement result triggers live repair). + // + // Also note: if the migration target was sent a Volume that refers to a + // region that is no longer responding, it will hang trying to activate, but + // the migration itself will succeed (clearing the migration ID!). This is + // especially bad because it's easy to hit: if a region goes away and a + // migration is triggered before the region replacement start saga can swap + // out the region that's gone, the migration saga will checkout the + // pre-update Volume and the destination propolis will hit this scenario. + + if instance_and_vmm.instance().runtime().migration_id.is_some() { + info!( + log, + "instance is undergoing migration, wait for it to finish"; + "region replacement id" => ?request_id, + "last replacement drive time" => ?step_time, + "last replacement drive step" => "propolis", + "instance id" => ?step_instance_id, + ); + + return Ok(DriveCheck::Wait); + } + + // Check if the VMM has changed. + + if current_vmm.id != step_vmm_id { + // The VMM has changed! This can be due to a stop and start of the + // instance, or a migration. If this is the case, then the new VMM + // (propolis server) could be performing reconcilation as part of the + // Volume activation. Nexus should be receiving notifications from the + // Upstairs there. + // + // If this is the result of a stop/start, then the new vmm will be using + // the updated VCR. If the new vmm is in the right state, this drive + // saga can re-send the target replacement request to poll if the + // replacement is done yet. + + info!( + log, + "vmm has changed from last step"; + "region replacement id" => ?request_id, + "last replacement drive time" => ?step_time, + "last replacement drive step" => "propolis", + "instance id" => ?step_instance_id, + "old vmm id" => ?step_vmm_id, + "new vmm id" => ?current_vmm.id, + ); + + Ok(DriveCheck::ActionRequired) + } else { + // The VMM has not changed: check if the VMM is still active. + + let state = current_vmm.runtime.state; + + info!( + log, + "vmm from last step in state {}", state; + "region replacement id" => ?request_id, + "last replacement drive time" => ?step_time, + "last replacement drive step" => "propolis", + "instance id" => ?step_instance_id, + "vmm id" => ?step_vmm_id, + ); + + match &state { + // If propolis is running, or rebooting, then it is likely that the + // Upstairs that was previously sent the volume replacement request + // is still running the live repair (note: rebooting does not affect + // the running volume). + VmmState::Running | VmmState::Rebooting => { + // Until crucible#1277 is merged, choose to _not_ poll Propolis + // (which would happen if ActionRequired was returned here). + // + // TODO Nexus needs to poll, as it could miss receiving the + // "Finished" notification that would complete this region + // replacement. Most of the time it will receive that ok though. + + Ok(DriveCheck::LastStepStillRunning) + } + + VmmState::Starting => { + // This state is unexpected, considering Nexus previously sent a + // target replacement request to this propolis! + + return Err(ActionError::action_failed(format!( + "vmm {} propolis is Starting", + step_vmm_id, + ))); + } + + VmmState::Migrating => { + // This state is unexpected because we should have already + // returned `DriveCheck::Wait` above. + + return Err(ActionError::action_failed(format!( + "vmm {} propolis is Migrating!", + step_vmm_id, + ))); + } + + VmmState::Stopping + | VmmState::Stopped + | VmmState::Failed + | VmmState::Destroyed + | VmmState::SagaUnwound => { + // The VMM we sent the replacement request to is probably not + // operating on the request anymore. Wait to see where to send + // the next action: if the instance is migrating, eventually + // that will be a new propolis. If the instance is stopping, + // then that will be a Pantry. Otherwise, the saga will wait: + // propolis should only receive target replacement requests when + // in a good state. + + Ok(DriveCheck::Wait) + } + } + } +} + +/// Generate a DriveCheck if the previous step was a Pantry step +async fn check_from_previous_pantry_step( + log: &Logger, + request_id: Uuid, + step_time: DateTime, + pantry_address: SocketAddrV6, + job_id: Uuid, + volume_id: &str, +) -> Result { + // If there is a committed step, Nexus attached this Volume to a Pantry, and + // requested activation in a background job. Is it finished? + + let endpoint = format!("http://{}", pantry_address); + let client = crucible_pantry_client::Client::new(&endpoint); + + match client.is_job_finished(&job_id.to_string()).await { + Ok(status) => { + if status.job_is_finished { + // The job could be done because it failed: check the volume + // status to query if it is active or gone. + + match client.volume_status(volume_id).await { + Ok(volume_status) => { + info!( + log, + "pantry job finished, saw status {volume_status:?}"; + "region replacement id" => %request_id, + "last replacement drive time" => ?step_time, + "last replacement drive step" => "pantry", + "pantry address" => ?pantry_address, + ); + + if volume_status.seen_active { + // It may not be active now if a Propolis activated + // the volume, but if the Pantry's ever seen this + // Volume active before, then the reconciliation + // completed ok. + + Ok(DriveCheck::Done) + } else { + // The Pantry has never seen this active before, and + // the job finished - some action is required, the + // job failed. + + Ok(DriveCheck::ActionRequired) + } + } + + Err(e) => { + // Seeing 410 Gone here may mean that the pantry + // performed reconciliation successfully, but had a + // propolis activation take over from the pantry's. If + // this occurred before a "reconciliation successful" + // notification occurred, and the propolis activation + // does not require a reconcilation (because the pantry + // did it already), then another notification will not + // be resent by propolis. + // + // Return ActionRequired here so that this saga will + // re-send the target replacement request to the + // propolis the did the take over: if the above race + // occurred, that request will return + // ReplaceResult::VcrMatches. + + error!( + log, + "pantry job finished, saw error {e}"; + "region replacement id" => %request_id, + "last replacement drive time" => ?step_time, + "last replacement drive step" => "pantry", + "pantry address" => ?pantry_address, + ); + + Ok(DriveCheck::ActionRequired) + } + } + } else { + info!( + log, + "pantry is still performing reconcilation"; + "region replacement id" => %request_id, + "last replacement drive time" => ?step_time, + "last replacement drive step" => "pantry", + "pantry address" => ?pantry_address, + ); + + Ok(DriveCheck::LastStepStillRunning) + } + } + + Err(e) => { + // If there was some problem accessing the Pantry. It may be because + // that Pantry is now gone, so check on it. + + error!( + log, + "pantry returned an error checking job {job_id}: {e}"; + "region replacement id" => %request_id, + "last replacement drive time" => ?step_time, + "last replacement drive step" => "pantry", + "pantry address" => ?pantry_address, + ); + + match client.pantry_status().await { + Ok(_) => { + // The pantry responded, so it's still there. It may be that + // the volume is no longer attached because a Propolis + // activation took over from the Pantry. + + match client.volume_status(&volume_id).await { + Ok(_) => { + // The volume is still there as an entry, but the + // job isn't? Action is required: this saga should + // delete the attached volume, then re-attach it. + + info!( + log, + "pantry still has active volume"; + "region replacement id" => %request_id, + "last replacement drive time" => ?step_time, + "last replacement drive step" => "pantry", + "pantry address" => ?pantry_address, + "volume id" => volume_id, + ); + + Ok(DriveCheck::ActionRequired) + } + + Err(e) => { + // The volume is gone: it's likely been activated by + // a Propolis, but this could also be because the + // Pantry bounced. Some further action is required: + // either poll the propolis that stole the + // activation or send the volume to a new Pantry. + + error!( + log, + "pantry returned an error checking on volume: {e}"; + "region replacement id" => %request_id, + "last replacement drive time" => ?step_time, + "last replacement drive step" => "pantry", + "pantry address" => ?pantry_address, + "volume id" => volume_id, + ); + + Ok(DriveCheck::ActionRequired) + } + } + } + + Err(e) => { + // The pantry is not responding on its status endpoint. + // Further action is required to drive the repair, which may + // be attaching to another Pantry. + + error!( + log, + "pantry returned an error checking on status: {e}"; + "region replacement id" => %request_id, + "last replacement drive time" => ?step_time, + "last replacement drive step" => "pantry", + "pantry address" => ?pantry_address, + ); + + Ok(DriveCheck::ActionRequired) + } + } + } + } +} + +/// What action does this saga invocation need to take? +#[allow(clippy::large_enum_variant)] +#[derive(Debug, Serialize, Deserialize)] +enum DriveAction { + /// Do nothing - the repair is proceeding from the last drive step, or is + /// done. + Noop { replacement_done: bool }, + + /// If there is no active Propolis that is running the Volume, attach the + /// associated Volume to a Pantry. + Pantry { step: db::model::RegionReplacementStep, volume_id: Uuid }, + + /// If the Volume is currently running in a Propolis server, then send the + /// volume replacement request there. + Propolis { step: db::model::RegionReplacementStep, disk: db::model::Disk }, +} + +async fn srrd_drive_region_replacement_prepare( + sagactx: NexusActionContext, +) -> Result { + let log = sagactx.user_data().log(); + let osagactx = sagactx.user_data(); + let params = sagactx.saga_params::()?; + + // If the previous saga step did _not_ require an action, then return Noop + // here. + + let check_result = sagactx.lookup::("check")?; + + if !matches!(check_result, DriveCheck::ActionRequired) { + return Ok(DriveAction::Noop { + replacement_done: matches!(check_result, DriveCheck::Done), + }); + } + + // Otherwise, take a look at the state of the world, and prepare an action + // to execute. + + let opctx = crate::context::op_context_for_saga_action( + &sagactx, + ¶ms.serialized_authn, + ); + + let nexus = osagactx.nexus(); + + let Some(new_region_id) = params.request.new_region_id else { + return Err(ActionError::action_failed(format!( + "region replacement request {} has new_region_id = None", + params.request.id, + ))); + }; + + let new_region: db::model::Region = osagactx + .datastore() + .get_region(new_region_id) + .await + .map_err(ActionError::action_failed)?; + + let maybe_disk = osagactx + .datastore() + .disk_for_volume_id(new_region.volume_id()) + .await + .map_err(ActionError::action_failed)?; + + // Does this volume back a disk? + let drive_action = if let Some(disk) = maybe_disk { + match &disk.runtime().attach_instance_id { + Some(instance_id) => { + // The region's volume is attached to an instance + let (.., authz_instance) = + LookupPath::new(&opctx, &osagactx.datastore()) + .instance_id(*instance_id) + .lookup_for(authz::Action::Read) + .await + .map_err(ActionError::action_failed)?; + + let instance_and_vmm = osagactx + .datastore() + .instance_fetch_with_vmm(&opctx, &authz_instance) + .await + .map_err(ActionError::action_failed)?; + + if let Some(migration_id) = + instance_and_vmm.instance().runtime().migration_id + { + // If the check node did not observe migration_id as Some, + // it will not have returned `Wait`, but here in the prepare + // node we are observing that migration_id is Some: this + // means an instance migration was triggered in the middle + // of the region replacement. + // + // Log a message and bail out. + + info!( + log, + "instance migration_id is {migration_id}"; + "region replacement id" => %params.request.id, + "disk id" => ?disk.id(), + "instance id" => ?instance_id, + ); + + return Err(ActionError::action_failed( + "instance is undergoing migration".to_string(), + )); + } + + match instance_and_vmm.vmm() { + Some(vmm) => { + // The disk is attached to an instance and there's an + // active propolis server. Send the volume replacement + // request to the running Volume there if the runtime + // state is either running or rebooting. + + let state = vmm.runtime.state; + + info!( + log, + "disk attached to instance with vmm in state {state}"; + "region replacement id" => %params.request.id, + "disk id" => ?disk.id(), + "instance id" => ?instance_id, + "vmm id" => ?vmm.id, + ); + + match &state { + VmmState::Running | VmmState::Rebooting => { + // Propolis server is ok to receive the volume + // replacement request. + } + + VmmState::Starting + | VmmState::Stopping + | VmmState::Stopped + | VmmState::Migrating + | VmmState::Failed + | VmmState::Destroyed + | VmmState::SagaUnwound => { + // Propolis server is not ok to receive volume + // replacement requests, bail out + return Err(ActionError::action_failed(format!( + "vmm {} propolis not in a state to receive request", + vmm.id, + ))); + } + } + + DriveAction::Propolis { + step: db::model::RegionReplacementStep { + replacement_id: params.request.id, + step_time: Utc::now(), + step_type: db::model::RegionReplacementStepType::Propolis, + + step_associated_instance_id: Some(*instance_id), + step_associated_vmm_id: Some(vmm.id), + + step_associated_pantry_ip: None, + step_associated_pantry_port: None, + step_associated_pantry_job_id: None, + }, + + disk, + } + } + + None => { + // The disk is attached to an instance but there's no + // active propolis server. Attach to a pantry. + + let state = + &instance_and_vmm.instance().runtime().nexus_state; + + info!( + log, + "disk attached to instance in state {state} with no vmm"; + "region replacement id" => %params.request.id, + "disk id" => ?disk.id(), + "instance id" => ?instance_id, + ); + + let pantry_address = + get_pantry_address(osagactx.nexus()).await?; + + DriveAction::Pantry { + step: db::model::RegionReplacementStep { + replacement_id: params.request.id, + step_time: Utc::now(), + step_type: + db::model::RegionReplacementStepType::Pantry, + + step_associated_instance_id: None, + step_associated_vmm_id: None, + + step_associated_pantry_ip: Some( + pantry_address.ip().into(), + ), + step_associated_pantry_port: Some( + pantry_address.port().into(), + ), + step_associated_pantry_job_id: Some( + sagactx.lookup::("job_id")?, + ), + }, + + volume_id: new_region.volume_id(), + } + } + } + } + + None => { + // The disk is not attached to an instance. Is it attached to a + // Pantry right now (aka performing bulk import)? + + if let Some(address) = &disk.pantry_address { + // TODO currently unsupported + return Err(ActionError::action_failed(format!( + "disk {} attached to {address}, not supported", + disk.id(), + ))); + } + + // Attach to a pantry. + + info!( + log, + "disk not attached to instance"; + "region replacement id" => %params.request.id, + "disk id" => ?disk.id(), + ); + + // XXX: internal-dns does not randomize the order of addresses + // in its responses: if the first Pantry in the list of + // addresses returned by DNS isn't responding, the drive saga + // will still continually try to use it. + + let pantry_address = get_pantry_address(nexus).await?; + + DriveAction::Pantry { + step: db::model::RegionReplacementStep { + replacement_id: params.request.id, + step_time: Utc::now(), + step_type: db::model::RegionReplacementStepType::Pantry, + + step_associated_instance_id: None, + step_associated_vmm_id: None, + + step_associated_pantry_ip: Some( + pantry_address.ip().into(), + ), + step_associated_pantry_port: Some( + pantry_address.port().into(), + ), + step_associated_pantry_job_id: Some( + sagactx.lookup::("job_id")?, + ), + }, + + volume_id: new_region.volume_id(), + } + } + } + } else { + // Is this volume the destination volume for a snapshot? + + let maybe_snapshot = osagactx + .datastore() + .find_snapshot_by_destination_volume_id( + &opctx, + new_region.volume_id(), + ) + .await + .map_err(ActionError::action_failed)?; + + if maybe_snapshot.is_some() { + // Volume is the destination that snapshot blocks should be scrubbed + // into. The scrubber is not written yet, so nothing should be using + // this volume yet. We can attach it to the Pantry. + + info!( + log, + "volume is for a snapshot destination"; + "region replacement id" => %params.request.id, + ); + + let pantry_address = get_pantry_address(nexus).await?; + + DriveAction::Pantry { + step: db::model::RegionReplacementStep { + replacement_id: params.request.id, + step_time: Utc::now(), + step_type: db::model::RegionReplacementStepType::Pantry, + + step_associated_instance_id: None, + step_associated_vmm_id: None, + + step_associated_pantry_ip: Some(pantry_address.ip().into()), + step_associated_pantry_port: Some( + pantry_address.port().into(), + ), + step_associated_pantry_job_id: Some( + sagactx.lookup::("job_id")?, + ), + }, + + volume_id: new_region.volume_id(), + } + } else { + // XXX what other volumes are created? + return Err(ActionError::action_failed(format!( + "don't know what to do with volume {}", + new_region.volume_id(), + ))); + } + }; + + Ok(drive_action) +} + +#[derive(Debug, Serialize, Deserialize)] +struct ExecuteResult { + step_to_commit: Option, + replacement_done: bool, +} + +/// Attempt to execute the prepared step. If it was successful, return the step +/// to commit to the database. +async fn srrd_drive_region_replacement_execute( + sagactx: NexusActionContext, +) -> Result { + let log = sagactx.user_data().log(); + let osagactx = sagactx.user_data(); + let params = sagactx.saga_params::()?; + + let opctx = crate::context::op_context_for_saga_action( + &sagactx, + ¶ms.serialized_authn, + ); + + // Look up the prepared action, and execute it. If something has changed + // between when the action was determined and now, then bail out - the next + // drive saga invocation will pick up the new state of the world and act + // accordingly. + + let action = sagactx.lookup::("prepare")?; + + let result = match action { + DriveAction::Noop { replacement_done } => { + // *slaps knees and stands up* welp + ExecuteResult { step_to_commit: None, replacement_done } + } + + DriveAction::Pantry { step, volume_id } => { + let Some(pantry_address) = step.pantry_address() else { + return Err(ActionError::action_failed(String::from( + "pantry step does not have an address", + ))); + }; + + let job_id = sagactx.lookup::("job_id")?; + + execute_pantry_drive_action( + log, + osagactx.datastore(), + params.request.id, + pantry_address, + volume_id, + job_id, + ) + .await?; + + ExecuteResult { + step_to_commit: Some(step), + replacement_done: false, + } + } + + DriveAction::Propolis { step, disk } => { + let Some((instance_id, vmm_id)) = step.instance_and_vmm_ids() + else { + return Err(ActionError::action_failed(Error::internal_error( + "propolis step does not have instance and vmm ids", + ))); + }; + + let (.., authz_instance) = + LookupPath::new(&opctx, &osagactx.datastore()) + .instance_id(instance_id) + .lookup_for(authz::Action::Read) + .await + .map_err(ActionError::action_failed)?; + + let instance_and_vmm = osagactx + .datastore() + .instance_fetch_with_vmm(&opctx, &authz_instance) + .await + .map_err(ActionError::action_failed)?; + + if let Some(migration_id) = + instance_and_vmm.instance().runtime().migration_id + { + // An indefinite amount of time can occur between saga nodes: if + // both the check node and prepare node both observed + // `migration_id` as None, but this node observes Some, this + // still means an instance migration was triggered in the middle + // of the region replacement. + // + // Log a message and bail out. This is still best effort: a + // migration could be triggered after this check! + + info!( + log, + "instance migration_id is {migration_id}"; + "region replacement id" => %params.request.id, + "disk id" => ?disk.id(), + "instance id" => ?instance_id, + ); + + return Err(ActionError::action_failed( + "instance is undergoing migration".to_string(), + )); + } + + // The disk is attached to an instance and there's an active + // propolis server. Send a volume replacement request to the running + // Volume there - either it will start a live repair, or be ignored + // because there is no difference in the volume construction + // request. + + let disk_new_volume_vcr = match osagactx + .datastore() + .volume_get(disk.volume_id) + .await + .map_err(ActionError::action_failed)? + { + Some(volume) => volume.data().to_string(), + + None => { + return Err(ActionError::action_failed( + Error::internal_error("new volume is gone!"), + )); + } + }; + + let instance_lookup = + LookupPath::new(&opctx, &osagactx.datastore()) + .instance_id(instance_id); + + let (vmm, client) = osagactx + .nexus() + .propolis_client_for_instance( + &opctx, + &instance_lookup, + authz::Action::Modify, + ) + .await + .map_err(ActionError::action_failed)?; + + let replacement_done = execute_propolis_drive_action( + log, + params.request.id, + vmm_id, + vmm, + client, + disk, + disk_new_volume_vcr, + ) + .await?; + + ExecuteResult { step_to_commit: Some(step), replacement_done } + } + }; + + Ok(result) +} + +/// Execute a prepared Pantry step +async fn execute_pantry_drive_action( + log: &Logger, + datastore: &db::DataStore, + request_id: Uuid, + pantry_address: SocketAddrV6, + volume_id: Uuid, + job_id: Uuid, +) -> Result<(), ActionError> { + // Importantly, _do not use `call_pantry_attach_for_disk`_! That uses + // `retry_until_known_result`, which we _do not want here_. The Pantry + // attach can fail if there's a racing Volume checkout to be sent to + // Propolis. Additionally, that call uses `attach` instead of + // `attach_activate_background`, which means it will hang on the activation. + + let endpoint = format!("http://{}", pantry_address); + let client = crucible_pantry_client::Client::new(&endpoint); + + // Check pantry first, to see if this volume is attached already. This can + // occur if: + // + // - the volume is attached to the target pantry, but it can't be reliably + // determined if reconcilation finished. + // + // - a previous repair operated on another region in the same Volume, and + // that attachment was not garbage collected. + // + // Try to get the volume's status in order to check. + + let detach_required = + match client.volume_status(&volume_id.to_string()).await { + Ok(volume_status) => { + info!( + log, + "volume is already attached with status {volume_status:?}"; + "region replacement id" => %request_id, + "volume id" => ?volume_id, + "endpoint" => endpoint.clone(), + ); + + // In the case where this forward action is being rerun, + // detaching the volume would mean that the reconciliation would + // be interrupted. This is ok, as that operation can be + // interrupted at any time. + + // Detach this volume so we can reattach with this saga's job id. + true + } + + Err(e) => { + match e { + crucible_pantry_client::Error::ErrorResponse(ref rv) => { + match rv.status() { + http::StatusCode::NOT_FOUND => { + // No detach required, this Volume isn't attached to + // this Pantry. + false + } + + http::StatusCode::GONE => { + // 410 Gone means detach is required - it was + // previously attached and may have been activated + true + } + + _ => { + error!( + log, + "error checking volume status: {e}"; + "region replacement id" => %request_id, + "volume id" => ?volume_id, + "endpoint" => endpoint.clone(), + ); + + return Err(ActionError::action_failed( + Error::internal_error(&format!( + "unexpected error from volume_status: {e}" + )), + )); + } + } + } + + _ => { + error!( + log, + "error checking volume status: {e}"; + "region replacement id" => %request_id, + "volume id" => ?volume_id, + "endpoint" => endpoint.clone(), + ); + + return Err(ActionError::action_failed( + Error::internal_error(&format!( + "unexpected error from volume_status: {e}" + )), + )); + } + } + } + }; + + if detach_required { + info!( + log, + "detach required"; + "region replacement id" => %request_id, + "volume id" => ?volume_id, + "endpoint" => endpoint.clone(), + ); + + match client.detach(&volume_id.to_string()).await { + Ok(_) => { + info!( + log, + "detached volume"; + "region replacement id" => %request_id, + "volume id" => ?volume_id, + "endpoint" => endpoint.clone(), + ); + } + + Err(e) => { + error!( + log, + "error detaching volume: {e}"; + "region replacement id" => %request_id, + "volume id" => ?volume_id, + "endpoint" => endpoint.clone(), + ); + + // Cannot continue: the Pantry will return an error unless the + // volume construction request matches what was originally + // attached, and the job id matches what was originally sent. + // Even if the VCR is the same, this saga does not have the same + // job id. Bail out here: hopefully the next time this saga + // runs, it will select a different Pantry. + + return Err(ActionError::action_failed( + Error::invalid_request(String::from( + "cannot proceed, pantry will reject our request", + )), + )); + } + } + } else { + info!( + log, + "no detach required"; + "region replacement id" => %request_id, + "volume id" => ?volume_id, + "endpoint" => endpoint.clone(), + ); + } + + // Attach the volume to the pantry, and let reconciliation occur. + + info!( + log, + "sending attach for volume"; + "region replacement id" => %request_id, + "volume id" => ?volume_id, + "endpoint" => endpoint.clone(), + ); + + let disk_volume = datastore + .volume_checkout(volume_id, db::datastore::VolumeCheckoutReason::Pantry) + .await + .map_err(ActionError::action_failed)?; + + let volume_construction_request: + crucible_pantry_client::types::VolumeConstructionRequest = + serde_json::from_str(&disk_volume.data()).map_err(|e| { + ActionError::action_failed(Error::internal_error(&format!( + "failed to deserialize volume {volume_id} data: {e}", + ))) + })?; + + let attach_request = + crucible_pantry_client::types::AttachBackgroundRequest { + volume_construction_request, + job_id: job_id.to_string(), + }; + + client + .attach_activate_background(&volume_id.to_string(), &attach_request) + .await + .map_err(|e| { + ActionError::action_failed(format!( + "pantry attach failed with {:?}", + e, + )) + })?; + + Ok(()) +} + +/// Execute a prepared Propolis step +async fn execute_propolis_drive_action( + log: &Logger, + request_id: Uuid, + step_vmm_id: Uuid, + vmm: db::model::Vmm, + client: propolis_client::Client, + disk: db::model::Disk, + disk_new_volume_vcr: String, +) -> Result { + // This client could be for a different VMM than the step was + // prepared for. Bail out if this is true + if vmm.id != step_vmm_id { + return Err(ActionError::action_failed(format!( + "propolis client vmm {} does not match step vmm {}", + vmm.id, step_vmm_id, + ))); + } + + info!( + log, + "sending replacement request for disk volume to propolis {step_vmm_id}"; + "region replacement id" => %request_id, + "disk id" => ?disk.id(), + "volume id" => ?disk.volume_id, + ); + + // Start (or poll) the replacement + let result = client + .instance_issue_crucible_vcr_request() + .id(disk.id()) + .body(propolis_client::types::InstanceVcrReplace { + name: disk.name().to_string(), + vcr_json: disk_new_volume_vcr, + }) + .send() + .await + .map_err(|e| match e { + propolis_client::Error::ErrorResponse(rv) => { + ActionError::action_failed(rv.message.clone()) + } + + _ => ActionError::action_failed(format!( + "unexpected failure during \ + `instance_issue_crucible_vcr_request`: {e}", + )), + })?; + + let replace_result = result.into_inner(); + + info!( + log, + "saw replace result {replace_result:?}"; + "region replacement id" => %request_id, + "disk id" => ?disk.id(), + "volume id" => ?disk.volume_id, + ); + + let replacement_done = match &replace_result { + ReplaceResult::Started => { + // This drive saga's call just started the replacement + false + } + + ReplaceResult::StartedAlready => { + // A previous drive saga's call started the replacement, but it's + // not done yet. + false + } + + ReplaceResult::CompletedAlready => { + // It's done! We see this if the same propolis that received the + // original replace request started and finished the live repair. + true + } + + ReplaceResult::VcrMatches => { + // If this propolis booted after the volume construction request was + // modified but before all the regions were reconciled, then + // `VcrMatches` will be seen as a result of `target_replace`: the + // new propolis will have received the updated VCR when it was + // created. + // + // The upstairs will be performing reconciliation (or have + // previously performed it), not live repair, and will have no + // record of a previous replace request (sent to a different + // propolis!) starting a live repair. + // + // If the Volume is active, that means reconcilation completed ok, + // and therefore Nexus can consider this repair complete. This is + // only true if one repair occurs at a time per volume (which is + // true due to the presence of volume_repair records), and if this + // saga locks the region replacement request record as part of it + // executing (which it does through the SET_SAGA_ID forward action). + // If either of those conditions are not held, then multiple + // replacement calls and activation checks can interleave and + // confuse this saga. + // + // Check if the Volume activated. + + let result = client + .disk_volume_status() + .id(disk.id()) + .send() + .await + .map_err(|e| match e { + propolis_client::Error::ErrorResponse(rv) => { + ActionError::action_failed(rv.message.clone()) + } + + _ => ActionError::action_failed(format!( + "unexpected failure during \ + `disk_volume_status`: {e}", + )), + })?; + + // If the Volume is active, then reconciliation finished + // successfully. + // + // There's a few reasons it may not be active yet: + // + // - Propolis could be shutting down, and tearing down the Upstairs + // in the process (which deactivates the Volume) + // + // - reconciliation could still be going on + // + // - reconciliation could have failed + // + // If it's not active, wait until the next invocation of this saga + // to decide what to do next. + + result.into_inner().active + } + + ReplaceResult::Missing => { + // The disk's volume does not contain the region to be replaced. + // This is an error! + + return Err(ActionError::action_failed(String::from( + "saw ReplaceResult::Missing", + ))); + } + }; + + Ok(replacement_done) +} + +async fn srrd_drive_region_replacement_commit( + sagactx: NexusActionContext, +) -> Result<(), ActionError> { + let log = sagactx.user_data().log(); + let osagactx = sagactx.user_data(); + let params = sagactx.saga_params::()?; + + let opctx = crate::context::op_context_for_saga_action( + &sagactx, + ¶ms.serialized_authn, + ); + + // If there was an executed step, record it! + + let execute_result = sagactx.lookup::("execute")?; + + if let Some(step) = execute_result.step_to_commit { + info!( + log, + "committing step {}", step.step_time; + "region replacement id" => %params.request.id, + ); + + osagactx + .datastore() + .add_region_replacement_request_step(&opctx, step) + .await + .map_err(ActionError::action_failed)?; + } else { + info!( + log, + "no step to commit"; + "region replacement id" => %params.request.id, + ); + } + + Ok(()) +} + +async fn srrd_drive_region_replacement_commit_undo( + _sagactx: NexusActionContext, +) -> Result<(), anyhow::Error> { + // If this saga unwinds at the last step, do we have to remove the committed + // step from db? The problem is that we did execute the step, and it's not + // something we can roll back. Leave the last step in the DB so it can be + // referenced during the check step the next time this saga is invoked. + // + // If the saga unwinds at the last step because it didn't commit the + // executed step to the database, this is ok! This would mean that the next + // invocation of the drive saga would be executing without the knowledge of + // what the previous one did - however, this author believes that this is ok + // due to the fact that this saga's forward actions are idempotent. + // + // If the final forward action fails to commit a step to the database, here + // are the cases where this saga could potentially repeat its action: + // + // 1. a propolis action was executed (read: a running propolis was sent a + // replace request) + // 2. a pantry action was executed (read: the volume was attached + // (activating in the background) to a pantry) + // + // # case 1 # + // + // In the case of the next invocation of the drive saga choosing a propolis + // action: + // + // - if the replace request is sent to the same propolis that originally + // received it, the upstairs would respond with `StartedAlready`. The + // drive saga would then consider the replacement not done and wait. + // + // - if the replace request is sent to a different propolis, that propolis + // would have constructed the disk's volume with the replacement VCR, so + // the upstairs would respond with `ReplaceResult::VcrMatches`. The drive + // saga would then consider the replacement done only if propolis observed + // that the volume activated ok. + // + // # case 2 # + // + // In the case of the next invocation of the drive saga choosing a pantry + // action, Nexus first checks if the volume was already attached to the + // selected Pantry, and if so, will detach it before sending a "attach in + // the background with this job id" request. + // + // - if Nexus chose same Pantry as the original drive saga, this would + // cancel any existing reconciliation and start it up again from the + // beginning. This is ok - reconciliation can be interrupted at any time. + // If this repeatedly happened it would cause progress to be very slow, + // but progress would be made. + // + // - if Nexus chose a different Pantry, the newly checked-out Volume would + // steal the activation from the original Pantry, cancelling the + // reconcilation only to start it up again on the different Pantry. + // + // # also! + // + // As well, both of these cases are equivalent to if Nexus chose to always + // attempt some sort of action, instead of choosing no-ops or waiting for + // operations driven by any previous steps to complete, aka if Nexus + // _always_ polled, instead of the behaviour it has now (wait or poll or + // receive push notifications). Polling all the time would be functionally + // correct but unnecessary (and in the case of crucible#1277, a problem!). + + Ok(()) +} + +async fn srrd_finish_saga( + sagactx: NexusActionContext, +) -> Result<(), ActionError> { + let osagactx = sagactx.user_data(); + let params = sagactx.saga_params::()?; + let opctx = crate::context::op_context_for_saga_action( + &sagactx, + ¶ms.serialized_authn, + ); + + let saga_id = sagactx.lookup::("saga_id")?; + + let execute_result = sagactx.lookup::("execute")?; + + // Use the same undo function to exit the saga. If it was determined that + // the region replacement is done, transition to ReplacementDone, else + // transition back to Running. + if execute_result.replacement_done { + osagactx + .datastore() + .set_region_replacement_from_driving_to_done( + &opctx, + params.request.id, + saga_id, + ) + .await + .map_err(ActionError::action_failed)?; + } else { + osagactx + .datastore() + .undo_set_region_replacement_driving( + &opctx, + params.request.id, + saga_id, + ) + .await + .map_err(ActionError::action_failed)?; + } + + Ok(()) +} diff --git a/nexus/src/app/sagas/region_replacement_finish.rs b/nexus/src/app/sagas/region_replacement_finish.rs new file mode 100644 index 0000000000..c917ac2edd --- /dev/null +++ b/nexus/src/app/sagas/region_replacement_finish.rs @@ -0,0 +1,345 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Compared to the rest of the region replacement process, finishing the +//! process is straight forward. This saga is responsible for the following +//! region replacement request state transitions: +//! +//! ```text +//! ReplacementDone <-- +//! | +//! | | +//! v | +//! | +//! Completing -- +//! +//! | +//! v +//! +//! Completed +//! ``` +//! +//! It will set itself as the "operating saga" for a region replacement request, +//! change the state to "Completing", and: +//! +//! 1. Call the Volume delete saga for the fake Volume that points to the old +//! region. +//! +//! 2. Clear the operating saga id from the request record, and change the state +//! to Completed. +//! + +use super::{ + ActionRegistry, NexusActionContext, NexusSaga, SagaInitError, + ACTION_GENERATE_ID, +}; +use crate::app::sagas::declare_saga_actions; +use crate::app::sagas::volume_delete; +use crate::app::{authn, db}; +use serde::Deserialize; +use serde::Serialize; +use steno::ActionError; +use steno::Node; +use uuid::Uuid; + +// region replacement finish saga: input parameters + +#[derive(Debug, Deserialize, Serialize)] +pub(crate) struct Params { + pub serialized_authn: authn::saga::Serialized, + /// The fake volume created for the region that was replaced + // Note: this is only required in the params to build the volume-delete sub + // saga + pub region_volume_id: Uuid, + pub request: db::model::RegionReplacement, +} + +// region replacement finish saga: actions + +declare_saga_actions! { + region_replacement_finish; + SET_SAGA_ID -> "unused_1" { + + srrf_set_saga_id + - srrf_set_saga_id_undo + } + UPDATE_REQUEST_RECORD -> "unused_2" { + + srrf_update_request_record + } +} + +// region replacement finish saga: definition + +#[derive(Debug)] +pub(crate) struct SagaRegionReplacementFinish; +impl NexusSaga for SagaRegionReplacementFinish { + const NAME: &'static str = "region-replacement-finish"; + type Params = Params; + + fn register_actions(registry: &mut ActionRegistry) { + region_replacement_finish_register_actions(registry); + } + + fn make_saga_dag( + params: &Self::Params, + mut builder: steno::DagBuilder, + ) -> Result { + builder.append(Node::action( + "saga_id", + "GenerateSagaId", + ACTION_GENERATE_ID.as_ref(), + )); + + builder.append(set_saga_id_action()); + + let subsaga_params = volume_delete::Params { + serialized_authn: params.serialized_authn.clone(), + volume_id: params.region_volume_id, + }; + + let subsaga_dag = { + let subsaga_builder = steno::DagBuilder::new(steno::SagaName::new( + volume_delete::SagaVolumeDelete::NAME, + )); + volume_delete::SagaVolumeDelete::make_saga_dag( + &subsaga_params, + subsaga_builder, + )? + }; + + builder.append(Node::constant( + "params_for_volume_delete_subsaga", + serde_json::to_value(&subsaga_params).map_err(|e| { + SagaInitError::SerializeError( + "params_for_volume_delete_subsaga".to_string(), + e, + ) + })?, + )); + + builder.append(Node::subsaga( + "volume_delete_subsaga_no_result", + subsaga_dag, + "params_for_volume_delete_subsaga", + )); + + builder.append(update_request_record_action()); + + Ok(builder.build()?) + } +} + +// region replacement finish saga: action implementations + +async fn srrf_set_saga_id( + sagactx: NexusActionContext, +) -> Result<(), ActionError> { + let osagactx = sagactx.user_data(); + let params = sagactx.saga_params::()?; + + let opctx = crate::context::op_context_for_saga_action( + &sagactx, + ¶ms.serialized_authn, + ); + + let saga_id = sagactx.lookup::("saga_id")?; + + // Change the request record here to an intermediate "completing" state to + // block out other sagas that will be triggered for the same request. + osagactx + .datastore() + .set_region_replacement_completing(&opctx, params.request.id, saga_id) + .await + .map_err(ActionError::action_failed)?; + + Ok(()) +} + +async fn srrf_set_saga_id_undo( + sagactx: NexusActionContext, +) -> Result<(), anyhow::Error> { + let osagactx = sagactx.user_data(); + let params = sagactx.saga_params::()?; + let opctx = crate::context::op_context_for_saga_action( + &sagactx, + ¶ms.serialized_authn, + ); + + let saga_id = sagactx.lookup::("saga_id")?; + + osagactx + .datastore() + .undo_set_region_replacement_completing( + &opctx, + params.request.id, + saga_id, + ) + .await?; + + Ok(()) +} + +async fn srrf_update_request_record( + sagactx: NexusActionContext, +) -> Result<(), ActionError> { + let params = sagactx.saga_params::()?; + let osagactx = sagactx.user_data(); + let datastore = osagactx.datastore(); + let opctx = crate::context::op_context_for_saga_action( + &sagactx, + ¶ms.serialized_authn, + ); + + let saga_id = sagactx.lookup::("saga_id")?; + + // Now that the region has been deleted, update the replacement request + // record to 'Complete' and clear the operating saga id. There is no undo + // step for this, it should succeed idempotently. + datastore + .set_region_replacement_complete(&opctx, params.request.id, saga_id) + .await + .map_err(ActionError::action_failed)?; + + Ok(()) +} + +#[cfg(test)] +pub(crate) mod test { + use crate::{ + app::saga::create_saga_dag, + app::sagas::region_replacement_finish::Params, + app::sagas::region_replacement_finish::SagaRegionReplacementFinish, + }; + use async_bb8_diesel::AsyncRunQueryDsl; + use chrono::Utc; + use nexus_db_model::Region; + use nexus_db_model::RegionReplacement; + use nexus_db_model::RegionReplacementState; + use nexus_db_model::Volume; + use nexus_db_queries::authn::saga::Serialized; + use nexus_db_queries::context::OpContext; + use nexus_test_utils_macros::nexus_test; + use sled_agent_client::types::CrucibleOpts; + use sled_agent_client::types::VolumeConstructionRequest; + use uuid::Uuid; + + type ControlPlaneTestContext = + nexus_test_utils::ControlPlaneTestContext; + + #[nexus_test(server = crate::Server)] + async fn test_region_replacement_finish_saga( + cptestctx: &ControlPlaneTestContext, + ) { + let nexus = &cptestctx.server.server_context().nexus; + let datastore = nexus.datastore(); + let opctx = OpContext::for_tests( + cptestctx.logctx.log.clone(), + datastore.clone(), + ); + + // Manually insert required records + let old_region_volume_id = Uuid::new_v4(); + let new_volume_id = Uuid::new_v4(); + + let replaced_region = { + let dataset_id = Uuid::new_v4(); + Region::new( + dataset_id, + old_region_volume_id, + 512_i64.try_into().unwrap(), + 10, + 10, + ) + }; + + { + let conn = datastore.pool_connection_for_tests().await.unwrap(); + + use nexus_db_model::schema::region::dsl; + diesel::insert_into(dsl::region) + .values(replaced_region.clone()) + .execute_async(&*conn) + .await + .unwrap(); + } + + let volume_construction_request = VolumeConstructionRequest::Volume { + id: old_region_volume_id, + block_size: 0, + sub_volumes: vec![VolumeConstructionRequest::Region { + block_size: 0, + blocks_per_extent: 0, + extent_count: 0, + gen: 0, + opts: CrucibleOpts { + id: old_region_volume_id, + target: vec![ + // XXX if you put something here, you'll need a + // synthetic dataset record + ], + lossy: false, + flush_timeout: None, + key: None, + cert_pem: None, + key_pem: None, + root_cert_pem: None, + control: None, + read_only: false, + }, + }], + read_only_parent: None, + }; + + let volume_data = + serde_json::to_string(&volume_construction_request).unwrap(); + + datastore + .volume_create(Volume::new(old_region_volume_id, volume_data)) + .await + .unwrap(); + + let request = RegionReplacement { + id: Uuid::new_v4(), + request_time: Utc::now(), + old_region_id: replaced_region.id(), + volume_id: new_volume_id, + old_region_volume_id: Some(old_region_volume_id), + new_region_id: None, // no value needed here + replacement_state: RegionReplacementState::ReplacementDone, + operating_saga_id: None, + }; + + datastore + .insert_region_replacement_request(&opctx, request.clone()) + .await + .unwrap(); + + // Run the region replacement finish saga + let dag = create_saga_dag::(Params { + serialized_authn: Serialized::for_opctx(&opctx), + region_volume_id: old_region_volume_id, + request: request.clone(), + }) + .unwrap(); + + let runnable_saga = nexus.create_runnable_saga(dag).await.unwrap(); + + // Actually run the saga + let _output = nexus.run_saga(runnable_saga).await.unwrap(); + + // Validate the state transition + let result = datastore + .get_region_replacement_request_by_id(&opctx, request.id) + .await + .unwrap(); + assert_eq!(result.replacement_state, RegionReplacementState::Complete); + assert!(result.operating_saga_id.is_none()); + + // Validate the Volume was deleted + assert!(datastore + .volume_get(old_region_volume_id) + .await + .unwrap() + .is_none()); + } +} diff --git a/nexus/src/app/volume.rs b/nexus/src/app/volume.rs index 8cfffdb686..d4353e52d9 100644 --- a/nexus/src/app/volume.rs +++ b/nexus/src/app/volume.rs @@ -90,6 +90,8 @@ impl super::Nexus { ); for repaired_downstairs in repair_finish_info.repairs { + // First, record this notification + self.db_datastore .upstairs_repair_notification( opctx, @@ -110,10 +112,27 @@ impl super::Nexus { ) .await?; + // If the live repair or reconciliation was successfully completed, + // check if the repaired downstairs is part of a region + // replacement request. + if !repair_finish_info.aborted { - // TODO-followup if there's an active region replacement - // occurring, a successfully completed live repair can trigger a - // saga to destroy the original region. + let maybe_region_replacement = self + .datastore() + .lookup_in_progress_region_replacement_request_by_new_region_id( + opctx, + repaired_downstairs.region_uuid, + ) + .await?; + + if maybe_region_replacement.is_none() { + // A live repair or reconciliation completed successfully, + // but there is no in-progress region replacement request + // for that region, so it wasn't initated by Nexus. + // + // TODO-followup if there are too many repairs to the same + // downstairs, do something with that information. + } } } diff --git a/nexus/tests/config.test.toml b/nexus/tests/config.test.toml index 952b324ac6..f90a035de6 100644 --- a/nexus/tests/config.test.toml +++ b/nexus/tests/config.test.toml @@ -110,6 +110,11 @@ blueprints.period_secs_collect_crdb_node_ids = 600 sync_service_zone_nat.period_secs = 30 switch_port_settings_manager.period_secs = 30 region_replacement.period_secs = 30 +# The driver task should wake up frequently, something like every 10 seconds. +# however, if it's this low it affects the test_omdb_success_cases test output. +# keep this 30 seconds, so that the test shows "triggered by an explicit +# signal" instead of "triggered by a periodic timer firing" +region_replacement_driver.period_secs = 30 instance_watcher.period_secs = 30 service_firewall_propagation.period_secs = 300 v2p_mapping_propagation.period_secs = 30 diff --git a/nexus/tests/integration_tests/volume_management.rs b/nexus/tests/integration_tests/volume_management.rs index ae348e775d..73322e518f 100644 --- a/nexus/tests/integration_tests/volume_management.rs +++ b/nexus/tests/integration_tests/volume_management.rs @@ -356,7 +356,7 @@ async fn test_snapshot_prevents_other_disk( // The Crucible snapshots still remain assert!(!disk_test.crucible_resources_deleted().await); - // Attempt disk allocation, which will fail - the presense of the snapshot + // Attempt disk allocation, which will fail - the presence of the snapshot // means the region wasn't deleted. let disk_size = ByteCount::from_gibibytes_u32(10); let next_disk_name: Name = "next-disk".parse().unwrap(); diff --git a/nexus/types/src/internal_api/background.rs b/nexus/types/src/internal_api/background.rs new file mode 100644 index 0000000000..be4c2ec9c0 --- /dev/null +++ b/nexus/types/src/internal_api/background.rs @@ -0,0 +1,14 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use serde::Deserialize; +use serde::Serialize; + +/// The status of a `region_replacement_drive` background task activation +#[derive(Serialize, Deserialize, Default)] +pub struct RegionReplacementDriverStatus { + pub drive_invoked_ok: Vec, + pub finish_invoked_ok: Vec, + pub errors: Vec, +} diff --git a/nexus/types/src/internal_api/mod.rs b/nexus/types/src/internal_api/mod.rs index 9c029d5072..66a5d21a23 100644 --- a/nexus/types/src/internal_api/mod.rs +++ b/nexus/types/src/internal_api/mod.rs @@ -2,5 +2,6 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. +pub mod background; pub mod params; pub mod views; diff --git a/smf/nexus/multi-sled/config-partial.toml b/smf/nexus/multi-sled/config-partial.toml index 8d5adba401..e63eb411c3 100644 --- a/smf/nexus/multi-sled/config-partial.toml +++ b/smf/nexus/multi-sled/config-partial.toml @@ -56,6 +56,7 @@ blueprints.period_secs_collect_crdb_node_ids = 180 sync_service_zone_nat.period_secs = 30 switch_port_settings_manager.period_secs = 30 region_replacement.period_secs = 30 +region_replacement_driver.period_secs = 10 service_firewall_propagation.period_secs = 300 v2p_mapping_propagation.period_secs = 30 instance_watcher.period_secs = 30 diff --git a/smf/nexus/single-sled/config-partial.toml b/smf/nexus/single-sled/config-partial.toml index 58a4ad5b45..ced1da17b3 100644 --- a/smf/nexus/single-sled/config-partial.toml +++ b/smf/nexus/single-sled/config-partial.toml @@ -56,6 +56,7 @@ blueprints.period_secs_collect_crdb_node_ids = 180 sync_service_zone_nat.period_secs = 30 switch_port_settings_manager.period_secs = 30 region_replacement.period_secs = 30 +region_replacement_driver.period_secs = 10 service_firewall_propagation.period_secs = 300 v2p_mapping_propagation.period_secs = 30 instance_watcher.period_secs = 30 From cb3a9bf2d49df9b048f590d2340b0a8a07819f01 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 25 Jun 2024 14:34:42 -0700 Subject: [PATCH 02/13] Bump curve25519-dalek from 4.1.2 to 4.1.3 (#5915) --- Cargo.lock | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f9ae4c29c1..3f4300ebd0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1506,16 +1506,15 @@ dependencies = [ [[package]] name = "curve25519-dalek" -version = "4.1.2" +version = "4.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0a677b8922c94e01bdbb12126b0bc852f00447528dee1782229af9c720c3f348" +checksum = "97fb8b7c4503de7d6ae7b42ab72a5a59857b4c937ec27a3d4539dba95b5ab2be" dependencies = [ "cfg-if", "cpufeatures", "curve25519-dalek-derive", "digest", "fiat-crypto", - "platforms", "rand_core 0.6.4", "rustc_version 0.4.0", "subtle", @@ -6879,12 +6878,6 @@ version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b4596b6d070b27117e987119b4dac604f3c58cfb0b191112e24771b2faeac1a6" -[[package]] -name = "platforms" -version = "3.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "db23d408679286588f4d4644f965003d056e3dd5abcaaa938116871d7ce2fee7" - [[package]] name = "plotters" version = "0.3.5" From 990758564aab5eb25aafa3fb918092c4c51de99a Mon Sep 17 00:00:00 2001 From: Adam Leventhal Date: Tue, 25 Jun 2024 18:11:47 -0700 Subject: [PATCH 03/13] clean up dpd-client (#5951) --- Cargo.lock | 1 - clients/dpd-client/Cargo.toml | 1 - clients/dpd-client/build.rs | 24 +- clients/dpd-client/src/lib.rs | 655 +--------------------------------- sled-agent/src/services.rs | 2 +- 5 files changed, 37 insertions(+), 646 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 3f4300ebd0..3a533e010a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1997,7 +1997,6 @@ dependencies = [ "chrono", "futures", "http 0.2.12", - "ipnetwork", "omicron-workspace-hack", "omicron-zone-package", "progenitor", diff --git a/clients/dpd-client/Cargo.toml b/clients/dpd-client/Cargo.toml index 477435d8bb..04240ea5bb 100644 --- a/clients/dpd-client/Cargo.toml +++ b/clients/dpd-client/Cargo.toml @@ -16,7 +16,6 @@ slog.workspace = true regress.workspace = true uuid.workspace = true chrono.workspace = true -ipnetwork.workspace = true http.workspace = true schemars.workspace = true rand.workspace = true diff --git a/clients/dpd-client/build.rs b/clients/dpd-client/build.rs index 952a7ddee6..02a685632c 100644 --- a/clients/dpd-client/build.rs +++ b/clients/dpd-client/build.rs @@ -2,7 +2,7 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -// Copyright 2022 Oxide Computer Company +// Copyright 2024 Oxide Computer Company // // TODO: remove // This code is only required at the moment because the source repo @@ -15,6 +15,7 @@ use anyhow::Context; use anyhow::Result; use omicron_zone_package::config::Config; use omicron_zone_package::package::PackageSource; +use progenitor::TypePatch; use quote::quote; use std::env; use std::fs; @@ -73,7 +74,7 @@ fn main() -> Result<()> { let code = progenitor::Generator::new( progenitor::GenerationSettings::new() - .with_inner_type(quote!(ClientState)) + .with_inner_type(quote!{ ClientState }) .with_pre_hook(quote! { |state: &crate::ClientState, request: &reqwest::Request| { slog::debug!(state.log, "client request"; @@ -88,9 +89,22 @@ fn main() -> Result<()> { slog::debug!(state.log, "client response"; "result" => ?result); } }) - .with_replacement("Ipv4Cidr", "crate::Ipv4Cidr", std::iter::empty()) - .with_replacement("Ipv6Cidr", "crate::Ipv6Cidr", std::iter::empty()) - .with_replacement("Cidr", "crate::Cidr", std::iter::empty()), + .with_patch("LinkId", &TypePatch::default() + .with_derive("Eq") + .with_derive("PartialEq") + ) + .with_patch("LinkCreate", &TypePatch::default() + .with_derive("Eq") + .with_derive("PartialEq") + ) + .with_patch("LinkSettings", &TypePatch::default() + .with_derive("Eq") + .with_derive("PartialEq") + ) + .with_patch("PortSettings", &TypePatch::default() + .with_derive("Eq") + .with_derive("PartialEq") + ) ) .generate_tokens(&spec) .with_context(|| { diff --git a/clients/dpd-client/src/lib.rs b/clients/dpd-client/src/lib.rs index 556a8493d7..a0608a5c7d 100644 --- a/clients/dpd-client/src/lib.rs +++ b/clients/dpd-client/src/lib.rs @@ -2,25 +2,18 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -// Copyright 2023 Oxide Computer Company +// Copyright 2024 Oxide Computer Company -#![allow(clippy::redundant_closure_call)] -#![allow(clippy::needless_lifetimes)] -#![allow(clippy::match_single_binding)] -#![allow(clippy::clone_on_copy)] -#![allow(clippy::unnecessary_to_owned)] // The progenitor-generated API for dpd currently incorporates a type from // oximeter, which includes a docstring that has a doc-test in it. // That test passes for code that lives in omicron, but fails for code imported // by omicron. #![allow(rustdoc::broken_intra_doc_links)] +use std::net::IpAddr; + use slog::info; use slog::Logger; -use types::LinkCreate; -use types::LinkId; -use types::LinkSettings; -use types::PortSettings; include!(concat!(env!("OUT_DIR"), "/dpd-client.rs")); @@ -44,20 +37,16 @@ impl Client { pub async fn ensure_nat_entry( &self, log: &Logger, - target_ip: ipnetwork::IpNetwork, + target_ip: IpAddr, target_mac: types::MacAddr, target_first_port: u16, target_last_port: u16, target_vni: u32, sled_ip_address: &std::net::Ipv6Addr, ) -> Result<(), progenitor_client::Error> { - let existing_nat = match target_ip { - ipnetwork::IpNetwork::V4(network) => { - self.nat_ipv4_get(&network.ip(), target_first_port).await - } - ipnetwork::IpNetwork::V6(network) => { - self.nat_ipv6_get(&network.ip(), target_first_port).await - } + let existing_nat = match &target_ip { + IpAddr::V4(ip) => self.nat_ipv4_get(ip, target_first_port).await, + IpAddr::V6(ip) => self.nat_ipv6_get(ip, target_first_port).await, }; // If a NAT entry already exists, but has the wrong internal @@ -72,20 +61,12 @@ impl Client { info!(log, "deleting old nat entry"; "target_ip" => ?target_ip); - match target_ip { - ipnetwork::IpNetwork::V4(network) => { - self.nat_ipv4_delete( - &network.ip(), - target_first_port, - ) - .await + match &target_ip { + IpAddr::V4(ip) => { + self.nat_ipv4_delete(ip, target_first_port).await } - ipnetwork::IpNetwork::V6(network) => { - self.nat_ipv6_delete( - &network.ip(), - target_first_port, - ) - .await + IpAddr::V6(ip) => { + self.nat_ipv6_delete(ip, target_first_port).await } }?; } else { @@ -113,19 +94,19 @@ impl Client { vni: target_vni.into(), }; - match target_ip { - ipnetwork::IpNetwork::V4(network) => { + match &target_ip { + IpAddr::V4(ip) => { self.nat_ipv4_create( - &network.ip(), + ip, target_first_port, target_last_port, &nat_target, ) .await } - ipnetwork::IpNetwork::V6(network) => { + IpAddr::V6(ip) => { self.nat_ipv6_create( - &network.ip(), + ip, target_first_port, target_last_port, &nat_target, @@ -139,42 +120,6 @@ impl Client { Ok(()) } - /// Ensure that a NAT entry is deleted. - /// - /// nat_ipv[46]_delete are not idempotent (see oxidecomputer/dendrite#343), - /// but this wrapper function is. Call this from sagas instead. - pub async fn ensure_nat_entry_deleted( - &self, - log: &Logger, - target_ip: ipnetwork::IpNetwork, - target_first_port: u16, - ) -> Result<(), progenitor_client::Error> { - let result = match target_ip { - ipnetwork::IpNetwork::V4(network) => { - self.nat_ipv4_delete(&network.ip(), target_first_port).await - } - ipnetwork::IpNetwork::V6(network) => { - self.nat_ipv6_delete(&network.ip(), target_first_port).await - } - }; - - match result { - Ok(_) => { - info!(log, "deleted old nat entry"; "target_ip" => ?target_ip); - } - - Err(e) => { - if e.status() == Some(http::StatusCode::NOT_FOUND) { - info!(log, "no nat entry found for: {target_ip:#?}"); - } else { - return Err(e); - } - } - } - - Ok(()) - } - /// Ensure that a loopback address is created. /// /// loopback_ipv[46]_create are not idempotent (see @@ -261,569 +206,3 @@ impl Client { } } } - -// XXX delete everything below once we use the real dpd-client crate. -// https://github.com/oxidecomputer/omicron/issues/2775 - -use std::convert::TryFrom; -use std::fmt; -use std::net::{IpAddr, Ipv4Addr, Ipv6Addr}; -use std::str::FromStr; - -use schemars::JsonSchema; -use serde::Deserialize; -use serde::Serialize; - -use rand::prelude::*; - -// Given an IPv6 multicast address, generate the associated synthetic mac -// address -pub fn multicast_mac_addr(ip: Ipv6Addr) -> MacAddr { - let o = ip.octets(); - MacAddr::new(0x33, 0x33, o[12], o[13], o[14], o[15]) -} - -/// Generate an IPv6 adddress within the provided `cidr`, using the EUI-64 -/// transfrom of `mac`. -pub fn generate_ipv6_addr(cidr: Ipv6Cidr, mac: MacAddr) -> Ipv6Addr { - let prefix: u128 = cidr.prefix.into(); - let mac = u128::from(u64::from_be_bytes(mac.to_eui64())); - let mask = ((1u128 << cidr.prefix_len) - 1) << (128 - cidr.prefix_len); - let ipv6 = (prefix & mask) | (mac & !mask); - ipv6.into() -} - -/// Generate a link-local IPv6 address using the EUI-64 transform of `mac`. -pub fn generate_ipv6_link_local(mac: MacAddr) -> Ipv6Addr { - const LINK_LOCAL_PREFIX: Ipv6Cidr = Ipv6Cidr { - prefix: Ipv6Addr::new(0xfe80, 0, 0, 0, 0, 0, 0, 0), - prefix_len: 64, - }; - - generate_ipv6_addr(LINK_LOCAL_PREFIX, mac) -} - -/// An IP subnet with a network prefix and prefix length. -#[derive(Debug, Eq, PartialEq, Copy, Deserialize, Serialize, Clone)] -#[serde(untagged, rename_all = "snake_case")] -pub enum Cidr { - V4(Ipv4Cidr), - V6(Ipv6Cidr), -} - -// NOTE: We don't derive JsonSchema. That's intended so that we can use an -// untagged enum for `Cidr`, and use this method to annotate schemars output -// for client-generators (e.g., progenitor) to use in generating a better -// client. -impl JsonSchema for Cidr { - fn schema_name() -> String { - "Cidr".to_string() - } - - fn json_schema( - gen: &mut schemars::gen::SchemaGenerator, - ) -> schemars::schema::Schema { - schemars::schema::SchemaObject { - subschemas: Some(Box::new(schemars::schema::SubschemaValidation { - one_of: Some(vec![ - label_schema("v4", gen.subschema_for::()), - label_schema("v6", gen.subschema_for::()), - ]), - ..Default::default() - })), - ..Default::default() - } - .into() - } -} - -// Insert another level of schema indirection in order to provide an -// additional title for a subschema. This allows generators to infer a better -// variant name for an "untagged" enum. -fn label_schema( - label: &str, - schema: schemars::schema::Schema, -) -> schemars::schema::Schema { - schemars::schema::SchemaObject { - metadata: Some( - schemars::schema::Metadata { - title: Some(label.to_string()), - ..Default::default() - } - .into(), - ), - subschemas: Some( - schemars::schema::SubschemaValidation { - all_of: Some(vec![schema]), - ..Default::default() - } - .into(), - ), - ..Default::default() - } - .into() -} - -impl fmt::Display for Cidr { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match self { - Cidr::V4(c) => write!(f, "{c}"), - Cidr::V6(c) => write!(f, "{c}"), - } - } -} - -impl FromStr for Cidr { - type Err = String; - - fn from_str(s: &str) -> Result { - if let Ok(cidr) = s.parse() { - Ok(Cidr::V4(cidr)) - } else if let Ok(cidr) = s.parse() { - Ok(Cidr::V6(cidr)) - } else { - Err(format!("Invalid CIDR: '{s}'")) - } - } -} - -/// An IPv4 subnet with prefix and prefix length. -#[derive(Debug, Eq, PartialEq, Clone, Copy)] -pub struct Ipv4Cidr { - pub prefix: Ipv4Addr, - pub prefix_len: u8, -} - -// NOTE -// -// We implement the serde and JsonSchema traits manually. This emitted schema is -// never actually used to generate the client, because we instead ask -// `progenitor` to use the "real" `common::network::Ipv4Cidr` in its place. We -// do however include _some_ schema for this type so that it shows up in the -// document. Rather than provide a regular expression for the format of an IPv4 -// or v6 CIDR block, which is complicated, we just provide a human-friendly -// format name of "ipv4cidr" or "ipv6cidr". -impl<'de> serde::Deserialize<'de> for Ipv4Cidr { - fn deserialize(deserializer: D) -> Result - where - D: serde::Deserializer<'de>, - { - String::deserialize(deserializer)?.as_str().parse().map_err( - |e: ::Err| { - ::custom(e) - }, - ) - } -} - -impl Serialize for Ipv4Cidr { - fn serialize(&self, serializer: S) -> Result - where - S: serde::Serializer, - { - serializer.serialize_str(&format!("{self}")) - } -} - -impl JsonSchema for Ipv4Cidr { - fn schema_name() -> String { - String::from("Ipv4Cidr") - } - - fn json_schema( - _: &mut schemars::gen::SchemaGenerator, - ) -> schemars::schema::Schema { - schemars::schema::SchemaObject { - metadata: Some(Box::new(schemars::schema::Metadata { - title: Some("An IPv4 subnet".to_string()), - description: Some( - "An IPv4 subnet, including prefix and subnet mask" - .to_string(), - ), - examples: vec!["192.168.1.0/24".into()], - ..Default::default() - })), - format: Some(String::from("ipv4cidr")), - instance_type: Some(schemars::schema::InstanceType::String.into()), - ..Default::default() - } - .into() - } -} - -impl Ipv4Cidr { - /// Return `true` if the IP address is within the network. - pub fn contains(&self, ipv4: Ipv4Addr) -> bool { - let prefix: u32 = self.prefix.into(); - let mask = ((1u32 << self.prefix_len) - 1) << (32 - self.prefix_len); - let addr: u32 = ipv4.into(); - - (addr & mask) == prefix - } -} - -impl fmt::Display for Ipv4Cidr { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "{}/{}", self.prefix, self.prefix_len) - } -} - -impl From for Ipv4Cidr { - fn from(x: u64) -> Self { - let prefix: u32 = (x >> 32) as u32; - let prefix_len: u8 = (x & 0xff) as u8; - Ipv4Cidr { prefix: prefix.into(), prefix_len } - } -} - -impl From for u64 { - fn from(x: Ipv4Cidr) -> Self { - let prefix: u32 = x.prefix.into(); - (u64::from(prefix) << 32) | u64::from(x.prefix_len) - } -} - -impl From<&Ipv4Cidr> for u64 { - fn from(x: &Ipv4Cidr) -> Self { - (*x).into() - } -} - -impl FromStr for Ipv4Cidr { - type Err = String; - - fn from_str(s: &str) -> Result { - let err = || Err(format!("Invalid IPv4 CIDR: '{s}'")); - let Some((maybe_prefix, maybe_prefix_len)) = s.split_once('/') else { - return err(); - }; - let Ok(prefix) = maybe_prefix.parse() else { - return err(); - }; - let Ok(prefix_len) = maybe_prefix_len.parse() else { - return err(); - }; - if prefix_len <= 32 { - Ok(Ipv4Cidr { prefix, prefix_len }) - } else { - err() - } - } -} - -impl From for Cidr { - fn from(cidr: Ipv4Cidr) -> Self { - Cidr::V4(cidr) - } -} - -impl TryFrom for Ipv4Cidr { - type Error = &'static str; - - fn try_from(cidr: Cidr) -> Result { - match cidr { - Cidr::V4(c) => Ok(c), - _ => Err("not a v4 CIDR"), - } - } -} - -/// An IPv6 subnet with prefix and prefix length. -#[derive(Debug, Clone, Copy, Eq, PartialEq, Hash)] -pub struct Ipv6Cidr { - pub prefix: Ipv6Addr, - pub prefix_len: u8, -} - -// NOTE: See above about why we manually implement serialization and JsonSchema. -impl<'de> serde::Deserialize<'de> for Ipv6Cidr { - fn deserialize(deserializer: D) -> Result - where - D: serde::Deserializer<'de>, - { - String::deserialize(deserializer)?.parse().map_err( - |e: ::Err| { - ::custom(e) - }, - ) - } -} - -impl Serialize for Ipv6Cidr { - fn serialize(&self, serializer: S) -> Result - where - S: serde::Serializer, - { - serializer.serialize_str(&format!("{self}")) - } -} -impl JsonSchema for Ipv6Cidr { - fn schema_name() -> String { - String::from("Ipv6Cidr") - } - - fn json_schema( - _: &mut schemars::gen::SchemaGenerator, - ) -> schemars::schema::Schema { - schemars::schema::SchemaObject { - metadata: Some(Box::new(schemars::schema::Metadata { - title: Some("An IPv6 subnet".to_string()), - description: Some( - "An IPv6 subnet, including prefix and subnet mask" - .to_string(), - ), - examples: vec!["fe80::/10".into()], - ..Default::default() - })), - format: Some(String::from("ipv6cidr")), - instance_type: Some(schemars::schema::InstanceType::String.into()), - ..Default::default() - } - .into() - } -} - -impl Ipv6Cidr { - /// Return `true` if the address is within the subnet. - pub fn contains(&self, ipv6: Ipv6Addr) -> bool { - let prefix: u128 = self.prefix.into(); - let mask = ((1u128 << self.prefix_len) - 1) << (128 - self.prefix_len); - let addr: u128 = ipv6.into(); - - (addr & mask) == prefix - } -} - -impl Ord for Ipv6Cidr { - fn cmp(&self, other: &Self) -> std::cmp::Ordering { - match self.prefix.cmp(&other.prefix) { - std::cmp::Ordering::Equal => self.prefix_len.cmp(&other.prefix_len), - o => o, - } - } -} - -impl PartialOrd for Ipv6Cidr { - fn partial_cmp(&self, other: &Self) -> Option { - Some(self.cmp(other)) - } -} - -impl fmt::Display for Ipv6Cidr { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "{}/{}", self.prefix, self.prefix_len) - } -} - -impl FromStr for Ipv6Cidr { - type Err = String; - - fn from_str(s: &str) -> Result { - let err = || Err(format!("Invalid IPv6 CIDR: '{s}'")); - let Some((maybe_prefix, maybe_prefix_len)) = s.split_once('/') else { - return err(); - }; - let Ok(prefix) = maybe_prefix.parse() else { - return err(); - }; - let Ok(prefix_len) = maybe_prefix_len.parse() else { - return err(); - }; - if prefix_len <= 128 { - Ok(Ipv6Cidr { prefix, prefix_len }) - } else { - err() - } - } -} - -impl TryFrom for Ipv6Cidr { - type Error = &'static str; - - fn try_from(cidr: Cidr) -> Result { - match cidr { - Cidr::V6(c) => Ok(c), - _ => Err("not a v6 CIDR"), - } - } -} - -impl From for Cidr { - fn from(cidr: Ipv6Cidr) -> Self { - Cidr::V6(cidr) - } -} - -/// An EUI-48 MAC address, used for layer-2 addressing. -#[derive(Copy, Deserialize, Serialize, JsonSchema, Clone, Eq, PartialEq)] -pub struct MacAddr { - a: [u8; 6], -} - -impl MacAddr { - /// Create a new MAC address from octets in network byte order. - pub fn new(o0: u8, o1: u8, o2: u8, o3: u8, o4: u8, o5: u8) -> MacAddr { - MacAddr { a: [o0, o1, o2, o3, o4, o5] } - } - - /// Create a new MAC address from a slice of bytes in network byte order. - /// - /// # Panics - /// - /// Panics if the slice is fewer than 6 octets. - /// - /// Note that any further octets are ignored. - pub fn from_slice(s: &[u8]) -> MacAddr { - MacAddr::new(s[0], s[1], s[2], s[3], s[4], s[5]) - } - - /// Convert `self` to an array of bytes in network byte order. - pub fn to_vec(self) -> Vec { - vec![self.a[0], self.a[1], self.a[2], self.a[3], self.a[4], self.a[5]] - } - - /// Return `true` if `self` is the null MAC address, all zeros. - pub fn is_null(self) -> bool { - const EMPTY: MacAddr = MacAddr { a: [0, 0, 0, 0, 0, 0] }; - - self == EMPTY - } - - /// Generate a random MAC address. - pub fn random() -> MacAddr { - let mut rng = rand::thread_rng(); - let mut m = MacAddr { a: [0; 6] }; - for octet in m.a.iter_mut() { - *octet = rng.gen(); - } - m - } - - /// Generate an EUI-64 ID from the mac address, following the process - /// desribed in RFC 2464, section 4. - pub fn to_eui64(self) -> [u8; 8] { - [ - self.a[0] ^ 0x2, - self.a[1], - self.a[2], - 0xff, - 0xfe, - self.a[3], - self.a[4], - self.a[5], - ] - } -} - -impl FromStr for MacAddr { - type Err = String; - - fn from_str(s: &str) -> Result { - let v: Vec<&str> = s.split(':').collect(); - - if v.len() != 6 { - return Err(format!("invalid mac address: {} octets", v.len())); - } - - let mut m = MacAddr { a: [0u8; 6] }; - for (i, octet) in v.iter().enumerate() { - match u8::from_str_radix(octet, 16) { - Ok(b) => m.a[i] = b, - Err(_) => { - return Err(format!( - "invalid mac address: bad octet '{octet}'", - )) - } - } - } - Ok(m) - } -} - -impl fmt::Display for MacAddr { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!( - f, - "{:02x}:{:02x}:{:02x}:{:02x}:{:02x}:{:02x}", - self.a[0], self.a[1], self.a[2], self.a[3], self.a[4], self.a[5] - ) - } -} - -impl fmt::Debug for MacAddr { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!( - f, - "{:02x}:{:02x}:{:02x}:{:02x}:{:02x}:{:02x}", - self.a[0], self.a[1], self.a[2], self.a[3], self.a[4], self.a[5] - ) - } -} - -impl From for u64 { - fn from(mac: MacAddr) -> u64 { - (u64::from(mac.a[0]) << 40) - | (u64::from(mac.a[1]) << 32) - | (u64::from(mac.a[2]) << 24) - | (u64::from(mac.a[3]) << 16) - | (u64::from(mac.a[4]) << 8) - | u64::from(mac.a[5]) - } -} - -impl From<&MacAddr> for u64 { - fn from(mac: &MacAddr) -> u64 { - From::from(*mac) - } -} - -impl From for MacAddr { - fn from(x: u64) -> Self { - MacAddr { - a: [ - ((x >> 40) & 0xff) as u8, - ((x >> 32) & 0xff) as u8, - ((x >> 24) & 0xff) as u8, - ((x >> 16) & 0xff) as u8, - ((x >> 8) & 0xff) as u8, - (x & 0xff) as u8, - ], - } - } -} - -impl Eq for PortSettings {} - -impl PartialEq for PortSettings { - fn eq(&self, other: &Self) -> bool { - self.links == other.links - } -} - -impl Eq for LinkSettings {} - -impl PartialEq for LinkSettings { - fn eq(&self, other: &Self) -> bool { - self.addrs == other.addrs && self.params == other.params - } -} - -impl Eq for LinkCreate {} - -impl PartialEq for LinkCreate { - fn eq(&self, other: &Self) -> bool { - self.autoneg == other.autoneg - && self.fec == other.fec - && self.kr == other.kr - && self.lane == other.lane - && self.speed == other.speed - } -} - -impl Eq for LinkId {} - -impl PartialEq for LinkId { - fn eq(&self, other: &Self) -> bool { - self.0 == other.0 - } -} diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index f335c2c257..5f53601681 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -1195,7 +1195,7 @@ impl ServiceManager { dpd_client .ensure_nat_entry( &self.inner.log, - target_ip.into(), + target_ip, dpd_client::types::MacAddr { a: port.0.mac().into_array(), }, From 6e29409c04fb701dd6a9abfafe76c80a4b07f7d2 Mon Sep 17 00:00:00 2001 From: Alan Hanson Date: Tue, 25 Jun 2024 19:52:29 -0700 Subject: [PATCH 04/13] Update crucible, add DTrace crucible to host image (#5954) Update Crucible and Propolis to the latest Added a new package, crucible-dtrace that pulls from buildomat a package that contains a set of DTrace scripts. These scripts are extracted into the global zone at /opt/oxide/crucible_dtrace/ Crucible latest includes these updates: Clean up dependency checking, fixing space leak (#1372) Make a DTrace package (#1367) Use a single context in all messages (#1363) Remove `DownstairsWork`, because it's redundant (#1371) Remove `WorkState`, because it's implicit (#1370) Do work immediately upon receipt of a job, if possible (#1366) Move 'do work for one job' into a helper function (#1365) Remove `DownstairsWork` from map when handling it (#1361) Using `block_in_place` for IO operations (#1357) update omicron deps; use re-exported dropshot types in oximeter-producer configuration (#1369) Parameterize more tests (#1364) Misc cleanup, remove sqlite references. (#1360) Fix `Extent::close` docstring (#1359) Make many `Region` functions synchronous (#1356) Remove `Workstate::Done` (unused) (#1355) Return a sorted `VecDeque` directly (#1354) Combine `proc_frame` and `do_work_for` (#1351) Move `do_work_for` and `do_work` into `ActiveConnection` (#1350) Support arbitrary Volumes during replace compare (#1349) Remove the SQLite backend (#1352) Add a custom timeout for buildomat tests (#1344) Move `proc_frame` into `ActiveConnection` (#1348) Remove `UpstairsConnection` from `DownstairsWork` (#1341) Move Work into ConnectionState (#1340) Make `ConnectionState` an enum type (#1339) Parameterize `test_repair.sh` directories (#1345) Remove `Arc>` (#1338) Send message to Downstairs directly (#1336) Consolidate `on_disconnected` and `remove_connection` (#1333) Move disconnect logic to the Downstairs (#1332) Remove invalid DTrace probes. (#1335) Fix outdated comments (#1331) Use message passing when a new connection starts (#1330) Move cancellation into Downstairs, using a token to kill IO tasks (#1329) Make the Downstairs own per-connection state (#1328) Move remaining local state into a `struct ConnectionState` (#1327) Consolidate negotiation + IO operations into one loop (#1322) Allow replacement of a target in a read_only_parent (#1281) Do all IO through IO tasks (#1321) Make `reqwest_client` only present if it's used (#1326) Move negotiation into Downstairs as well (#1320) Update Rust crate clap to v4.5.4 (#1301) Reuse a reqwest client when creating Nexus clients (#1317) Reuse a reqwest client when creating repair client (#1324) Add % to keep buildomat happy (#1323) Downstairs task cleanup (#1313) Update crutest replace test, and mismatch printing. (#1314) Added more DTrace scripts. (#1309) Update Rust crate async-trait to 0.1.80 (#1298) Propolis just has this one update: Allow boot order config in propolis-standalone --------- Co-authored-by: Alan Hanson --- Cargo.lock | 26 +++++++++++++------------- Cargo.toml | 12 ++++++------ dev-tools/releng/src/main.rs | 3 ++- package-manifest.toml | 29 ++++++++++++++++++++++------- 4 files changed, 43 insertions(+), 27 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 3a533e010a..734a32e8bd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -464,9 +464,9 @@ dependencies = [ [[package]] name = "bhyve_api" version = "0.0.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=50cb28f586083fdb990e401bc6146e7dac9b2753#50cb28f586083fdb990e401bc6146e7dac9b2753" +source = "git+https://github.com/oxidecomputer/propolis?rev=59868677c70f3cd03f03e12584ad1056da8b5459#59868677c70f3cd03f03e12584ad1056da8b5459" dependencies = [ - "bhyve_api_sys 0.0.0 (git+https://github.com/oxidecomputer/propolis?rev=50cb28f586083fdb990e401bc6146e7dac9b2753)", + "bhyve_api_sys 0.0.0 (git+https://github.com/oxidecomputer/propolis?rev=59868677c70f3cd03f03e12584ad1056da8b5459)", "libc", "strum", ] @@ -484,7 +484,7 @@ dependencies = [ [[package]] name = "bhyve_api_sys" version = "0.0.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=50cb28f586083fdb990e401bc6146e7dac9b2753#50cb28f586083fdb990e401bc6146e7dac9b2753" +source = "git+https://github.com/oxidecomputer/propolis?rev=59868677c70f3cd03f03e12584ad1056da8b5459#59868677c70f3cd03f03e12584ad1056da8b5459" dependencies = [ "libc", "strum", @@ -1387,7 +1387,7 @@ dependencies = [ [[package]] name = "crucible-agent-client" version = "0.0.1" -source = "git+https://github.com/oxidecomputer/crucible?rev=8c6d485110ecfae5409575246b986a145c386dc4#8c6d485110ecfae5409575246b986a145c386dc4" +source = "git+https://github.com/oxidecomputer/crucible?rev=64e28cea69b427b05064defaf8800a4d678b4612#64e28cea69b427b05064defaf8800a4d678b4612" dependencies = [ "anyhow", "chrono", @@ -1403,7 +1403,7 @@ dependencies = [ [[package]] name = "crucible-pantry-client" version = "0.0.1" -source = "git+https://github.com/oxidecomputer/crucible?rev=8c6d485110ecfae5409575246b986a145c386dc4#8c6d485110ecfae5409575246b986a145c386dc4" +source = "git+https://github.com/oxidecomputer/crucible?rev=64e28cea69b427b05064defaf8800a4d678b4612#64e28cea69b427b05064defaf8800a4d678b4612" dependencies = [ "anyhow", "chrono", @@ -1420,7 +1420,7 @@ dependencies = [ [[package]] name = "crucible-smf" version = "0.0.0" -source = "git+https://github.com/oxidecomputer/crucible?rev=8c6d485110ecfae5409575246b986a145c386dc4#8c6d485110ecfae5409575246b986a145c386dc4" +source = "git+https://github.com/oxidecomputer/crucible?rev=64e28cea69b427b05064defaf8800a4d678b4612#64e28cea69b427b05064defaf8800a4d678b4612" dependencies = [ "crucible-workspace-hack", "libc", @@ -3476,7 +3476,7 @@ version = "0.1.0" dependencies = [ "anyhow", "async-trait", - "bhyve_api 0.0.0 (git+https://github.com/oxidecomputer/propolis?rev=50cb28f586083fdb990e401bc6146e7dac9b2753)", + "bhyve_api 0.0.0 (git+https://github.com/oxidecomputer/propolis?rev=59868677c70f3cd03f03e12584ad1056da8b5459)", "byteorder", "camino", "camino-tempfile", @@ -5531,7 +5531,7 @@ dependencies = [ "pq-sys", "pretty_assertions", "progenitor-client", - "propolis-client 0.1.0 (git+https://github.com/oxidecomputer/propolis?rev=50cb28f586083fdb990e401bc6146e7dac9b2753)", + "propolis-client 0.1.0 (git+https://github.com/oxidecomputer/propolis?rev=59868677c70f3cd03f03e12584ad1056da8b5459)", "rand 0.8.5", "rcgen", "ref-cast", @@ -5781,7 +5781,7 @@ dependencies = [ "oximeter-producer", "oxnet", "pretty_assertions", - "propolis-client 0.1.0 (git+https://github.com/oxidecomputer/propolis?rev=50cb28f586083fdb990e401bc6146e7dac9b2753)", + "propolis-client 0.1.0 (git+https://github.com/oxidecomputer/propolis?rev=59868677c70f3cd03f03e12584ad1056da8b5459)", "propolis-mock-server", "rand 0.8.5", "rcgen", @@ -7245,7 +7245,7 @@ dependencies = [ [[package]] name = "propolis-client" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=50cb28f586083fdb990e401bc6146e7dac9b2753#50cb28f586083fdb990e401bc6146e7dac9b2753" +source = "git+https://github.com/oxidecomputer/propolis?rev=59868677c70f3cd03f03e12584ad1056da8b5459#59868677c70f3cd03f03e12584ad1056da8b5459" dependencies = [ "async-trait", "base64 0.21.7", @@ -7287,7 +7287,7 @@ dependencies = [ [[package]] name = "propolis-mock-server" version = "0.0.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=50cb28f586083fdb990e401bc6146e7dac9b2753#50cb28f586083fdb990e401bc6146e7dac9b2753" +source = "git+https://github.com/oxidecomputer/propolis?rev=59868677c70f3cd03f03e12584ad1056da8b5459#59868677c70f3cd03f03e12584ad1056da8b5459" dependencies = [ "anyhow", "atty", @@ -7297,7 +7297,7 @@ dependencies = [ "futures", "hyper 0.14.28", "progenitor", - "propolis_types 0.0.0 (git+https://github.com/oxidecomputer/propolis?rev=50cb28f586083fdb990e401bc6146e7dac9b2753)", + "propolis_types 0.0.0 (git+https://github.com/oxidecomputer/propolis?rev=59868677c70f3cd03f03e12584ad1056da8b5459)", "rand 0.8.5", "reqwest", "schemars", @@ -7329,7 +7329,7 @@ dependencies = [ [[package]] name = "propolis_types" version = "0.0.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=50cb28f586083fdb990e401bc6146e7dac9b2753#50cb28f586083fdb990e401bc6146e7dac9b2753" +source = "git+https://github.com/oxidecomputer/propolis?rev=59868677c70f3cd03f03e12584ad1056da8b5459#59868677c70f3cd03f03e12584ad1056da8b5459" dependencies = [ "schemars", "serde", diff --git a/Cargo.toml b/Cargo.toml index 3ba353c220..60425a4239 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -260,9 +260,9 @@ cookie = "0.18" criterion = { version = "0.5.1", features = [ "async_tokio" ] } crossbeam = "0.8" crossterm = { version = "0.27.0", features = ["event-stream"] } -crucible-agent-client = { git = "https://github.com/oxidecomputer/crucible", rev = "8c6d485110ecfae5409575246b986a145c386dc4" } -crucible-pantry-client = { git = "https://github.com/oxidecomputer/crucible", rev = "8c6d485110ecfae5409575246b986a145c386dc4" } -crucible-smf = { git = "https://github.com/oxidecomputer/crucible", rev = "8c6d485110ecfae5409575246b986a145c386dc4" } +crucible-agent-client = { git = "https://github.com/oxidecomputer/crucible", rev = "64e28cea69b427b05064defaf8800a4d678b4612" } +crucible-pantry-client = { git = "https://github.com/oxidecomputer/crucible", rev = "64e28cea69b427b05064defaf8800a4d678b4612" } +crucible-smf = { git = "https://github.com/oxidecomputer/crucible", rev = "64e28cea69b427b05064defaf8800a4d678b4612" } csv = "1.3.0" curve25519-dalek = "4" datatest-stable = "0.2.9" @@ -408,9 +408,9 @@ prettyplease = { version = "0.2.20", features = ["verbatim"] } proc-macro2 = "1.0" progenitor = { git = "https://github.com/oxidecomputer/progenitor", branch = "main" } progenitor-client = { git = "https://github.com/oxidecomputer/progenitor", branch = "main" } -bhyve_api = { git = "https://github.com/oxidecomputer/propolis", rev = "50cb28f586083fdb990e401bc6146e7dac9b2753" } -propolis-client = { git = "https://github.com/oxidecomputer/propolis", rev = "50cb28f586083fdb990e401bc6146e7dac9b2753" } -propolis-mock-server = { git = "https://github.com/oxidecomputer/propolis", rev = "50cb28f586083fdb990e401bc6146e7dac9b2753" } +bhyve_api = { git = "https://github.com/oxidecomputer/propolis", rev = "59868677c70f3cd03f03e12584ad1056da8b5459" } +propolis-client = { git = "https://github.com/oxidecomputer/propolis", rev = "59868677c70f3cd03f03e12584ad1056da8b5459" } +propolis-mock-server = { git = "https://github.com/oxidecomputer/propolis", rev = "59868677c70f3cd03f03e12584ad1056da8b5459" } proptest = "1.4.0" quote = "1.0" rand = "0.8.5" diff --git a/dev-tools/releng/src/main.rs b/dev-tools/releng/src/main.rs index 9340de4961..1bd3b69ac9 100644 --- a/dev-tools/releng/src/main.rs +++ b/dev-tools/releng/src/main.rs @@ -56,13 +56,14 @@ enum InstallMethod { } /// Packages to install or bundle in the host OS image. -const HOST_IMAGE_PACKAGES: [(&str, InstallMethod); 7] = [ +const HOST_IMAGE_PACKAGES: [(&str, InstallMethod); 8] = [ ("mg-ddm-gz", InstallMethod::Install), ("omicron-sled-agent", InstallMethod::Install), ("overlay", InstallMethod::Bundle), ("oxlog", InstallMethod::Install), ("propolis-server", InstallMethod::Bundle), ("pumpkind-gz", InstallMethod::Install), + ("crucible-dtrace", InstallMethod::Install), ("switch-asic", InstallMethod::Bundle), ]; /// Packages to install or bundle in the recovery (trampoline) OS image. diff --git a/package-manifest.toml b/package-manifest.toml index 476926aaf0..d653f9d999 100644 --- a/package-manifest.toml +++ b/package-manifest.toml @@ -481,7 +481,6 @@ source.type = "composite" source.packages = [ "crucible.tar.gz", "zone-setup.tar.gz", "zone-network-install.tar.gz" ] output.type = "zone" - [package.crucible-pantry-zone] service_name = "crucible_pantry" only_for_targets.image = "standard" @@ -505,10 +504,10 @@ only_for_targets.image = "standard" # 3. Use source.type = "manual" instead of "prebuilt" source.type = "prebuilt" source.repo = "crucible" -source.commit = "8c6d485110ecfae5409575246b986a145c386dc4" +source.commit = "64e28cea69b427b05064defaf8800a4d678b4612" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/crucible/image//crucible.sha256.txt -source.sha256 = "a974c976babbbbe4d126fe324e28093b4f69b689e1cf607ce38323befcfa494e" +source.sha256 = "e9051934c7d6e274158d4afdb4523797c913acd1a1262f973bc0ab7a2a253b5f" output.type = "zone" output.intermediate_only = true @@ -517,13 +516,29 @@ service_name = "crucible_pantry_prebuilt" only_for_targets.image = "standard" source.type = "prebuilt" source.repo = "crucible" -source.commit = "8c6d485110ecfae5409575246b986a145c386dc4" +source.commit = "64e28cea69b427b05064defaf8800a4d678b4612" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/crucible/image//crucible-pantry.sha256.txt -source.sha256 = "34418c60ecccade796e604997a11b1fa7f01c364996fa4b57131466e910700a8" +source.sha256 = "a8850bfaf08c11a7baa2e4b14b859613b77d9952dc8d20433ebea8136f8a00d3" output.type = "zone" output.intermediate_only = true +[package.crucible-dtrace] +# This package contains a select set of DTrace script that operate on DTrace +# probes that exist for consumers of the crucible upstairs library. These +# scripts are extracted onto the global zone. The source commit here should +# match a version of Crucible that contain probes used by the upstairs. In most +# cases this means the version of Crucible that Propolis is using. +service_name = "crucible_dtrace" +only_for_targets.image = "standard" +source.type = "prebuilt" +source.repo = "crucible" +source.commit = "64e28cea69b427b05064defaf8800a4d678b4612" +# The SHA256 digest is automatically posted to: +# https://buildomat.eng.oxide.computer/public/file/oxidecomputer/crucible/image//crucible-dtrace.sha256.txt +source.sha256 = "fe51b1c771f990761c4f8bf95aa26febbfa452df97f8da7d2f329dad88f63e1d" +output.type = "tarball" + # Refer to # https://github.com/oxidecomputer/propolis/blob/master/package/README.md # for instructions on building this manually. @@ -532,10 +547,10 @@ service_name = "propolis-server" only_for_targets.image = "standard" source.type = "prebuilt" source.repo = "propolis" -source.commit = "50cb28f586083fdb990e401bc6146e7dac9b2753" +source.commit = "59868677c70f3cd03f03e12584ad1056da8b5459" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/propolis/image//propolis-server.sha256.txt -source.sha256 = "864e74222d3e617f1bd7b7ba8d0e5cc18134dca121fc4339369620d1419c5bb0" +source.sha256 = "4ab62342141c655a2bf088ff608fa353063bc3ac44db459e9d56768aa5f4e3d2" output.type = "zone" [package.mg-ddm-gz] From 49f6e01001462fe31c09c83ce423dc73cc3cc1ce Mon Sep 17 00:00:00 2001 From: David Pacheco Date: Tue, 25 Jun 2024 21:11:29 -0700 Subject: [PATCH 05/13] move specific background tasks into submodule (#5930) --- .../app/background/{common.rs => driver.rs} | 155 +-------------- nexus/src/app/background/init.rs | 103 +++++----- nexus/src/app/background/mod.rs | 180 +++++++++++++++--- .../{ => tasks}/abandoned_vmm_reaper.rs | 2 +- nexus/src/app/background/{ => tasks}/bfd.rs | 4 +- .../{ => tasks}/blueprint_execution.rs | 4 +- .../background/{ => tasks}/blueprint_load.rs | 4 +- .../{ => tasks}/crdb_node_id_collector.rs | 2 +- .../app/background/{ => tasks}/dns_config.rs | 6 +- .../background/{ => tasks}/dns_propagation.rs | 8 +- .../app/background/{ => tasks}/dns_servers.rs | 2 +- .../{ => tasks}/external_endpoints.rs | 6 +- .../{ => tasks}/instance_watcher.rs | 2 +- .../{ => tasks}/inventory_collection.rs | 8 +- .../{ => tasks}/metrics_producer_gc.rs | 2 +- nexus/src/app/background/tasks/mod.rs | 28 +++ .../app/background/{ => tasks}/nat_cleanup.rs | 2 +- .../app/background/{ => tasks}/networking.rs | 0 .../background/{ => tasks}/phantom_disks.rs | 2 +- .../{ => tasks}/physical_disk_adoption.rs | 2 +- .../{ => tasks}/region_replacement.rs | 2 +- .../{ => tasks}/region_replacement_driver.rs | 2 +- .../{ => tasks}/service_firewall_rules.rs | 2 +- .../{ => tasks}/sync_service_zone_nat.rs | 2 +- .../{ => tasks}/sync_switch_configuration.rs | 4 +- .../background/{ => tasks}/v2p_mappings.rs | 2 +- 26 files changed, 275 insertions(+), 261 deletions(-) rename nexus/src/app/background/{common.rs => driver.rs} (76%) rename nexus/src/app/background/{ => tasks}/abandoned_vmm_reaper.rs (99%) rename nexus/src/app/background/{ => tasks}/bfd.rs (98%) rename nexus/src/app/background/{ => tasks}/blueprint_execution.rs (99%) rename nexus/src/app/background/{ => tasks}/blueprint_load.rs (99%) rename nexus/src/app/background/{ => tasks}/crdb_node_id_collector.rs (99%) rename nexus/src/app/background/{ => tasks}/dns_config.rs (98%) rename nexus/src/app/background/{ => tasks}/dns_propagation.rs (98%) rename nexus/src/app/background/{ => tasks}/dns_servers.rs (99%) rename nexus/src/app/background/{ => tasks}/external_endpoints.rs (97%) rename nexus/src/app/background/{ => tasks}/instance_watcher.rs (99%) rename nexus/src/app/background/{ => tasks}/inventory_collection.rs (98%) rename nexus/src/app/background/{ => tasks}/metrics_producer_gc.rs (99%) create mode 100644 nexus/src/app/background/tasks/mod.rs rename nexus/src/app/background/{ => tasks}/nat_cleanup.rs (99%) rename nexus/src/app/background/{ => tasks}/networking.rs (100%) rename nexus/src/app/background/{ => tasks}/phantom_disks.rs (98%) rename nexus/src/app/background/{ => tasks}/physical_disk_adoption.rs (99%) rename nexus/src/app/background/{ => tasks}/region_replacement.rs (99%) rename nexus/src/app/background/{ => tasks}/region_replacement_driver.rs (99%) rename nexus/src/app/background/{ => tasks}/service_firewall_rules.rs (98%) rename nexus/src/app/background/{ => tasks}/sync_service_zone_nat.rs (99%) rename nexus/src/app/background/{ => tasks}/sync_switch_configuration.rs (99%) rename nexus/src/app/background/{ => tasks}/v2p_mappings.rs (99%) diff --git a/nexus/src/app/background/common.rs b/nexus/src/app/background/driver.rs similarity index 76% rename from nexus/src/app/background/common.rs rename to nexus/src/app/background/driver.rs index da595dc4e1..f1982b1ad8 100644 --- a/nexus/src/app/background/common.rs +++ b/nexus/src/app/background/driver.rs @@ -2,132 +2,10 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -//! # Nexus Background Tasks -//! -//! A **background task** in Nexus is any operation that can be activated both -//! periodically and by an explicit signal. This is aimed at RFD 373-style -//! "reliable persistent workflows", also called "reconcilers" or "controllers". -//! These are a kind of automation that examines some _current_ state, compares -//! it to some _intended_ state, and potentially takes action to try to bring -//! the current state in sync with the intended state. Our canonical example is -//! that we want to have Nexus monitor the intended DNS configuration. When it -//! changes, we want to propagate the new configuration to all DNS servers. We -//! implement this with three different background tasks: -//! -//! 1. `DnsConfigWatcher` reads the DNS configuration from the database, stores -//! it in memory, and makes it available via a `tokio::sync::watch` channel. -//! 2. `DnsServersWatcher` reads the list of DNS servers from the database, -//! stores it in memory, and makes it available via a `tokio::sync::watch` -//! channel. -//! 3. `DnsPropagator` uses the the watch channels provided by the other two -//! background tasks to notice when either the DNS configuration or the list -//! of DNS servers has changed. It uses the latest values to make a request -//! to each server to update its configuration. -//! -//! When Nexus changes the DNS configuration, it will update the database with -//! the new configuration and then explicitly activate the `DnsConfigWatcher`. -//! When it reads the new config, it will send it to its watch channel, and that -//! will activate the `DnsPropagator`. If any of this fails, or if Nexus -//! crashes at any point, then the periodic activation of every background task -//! will eventually cause the latest config to be propagated to all of the -//! current servers. -//! -//! The background task framework here is pretty minimal: essentially what it -//! gives you is that you just write an idempotent function that you want to -//! happen periodically or on-demand, wrap it in an impl of `BackgroundTask`, -//! register that with the `Driver`, and you're done. The framework will take -//! care of: -//! -//! * providing a way for Nexus at-large to activate your task -//! * activating your task periodically -//! * ensuring that the task is activated only once at a time in this Nexus -//! (but note that it may always be running concurrently in other Nexus -//! instances) -//! * providing basic visibility into whether the task is running, when the task -//! last ran, etc. -//! -//! We may well want to extend the framework as we build more tasks in general -//! and reconcilers specifically. But we should be mindful not to create -//! footguns for ourselves! See "Design notes" below. -//! -//! ## Notes for background task implementors -//! -//! Background tasks are not necessarily just for reconcilers. That's just the -//! design center. The first two DNS background tasks above aren't reconcilers -//! in any non-trivial sense. -//! -//! Background task activations do not accept input, by design. See "Design -//! notes" below. -//! -//! Generally, you probably don't want to have your background task do retries. -//! If things fail, you rely on the periodic reactivation to try again. -//! -//! ## Design notes -//! -//! The underlying design for RFD 373-style reconcilers is inspired by a few -//! related principles: -//! -//! * the principle in distributed systems of having exactly one code path to -//! achieve a thing, and then always using that path to do that thing (as -//! opposed to having separate paths for, say, the happy path vs. failover, -//! and having one of those paths rarely used) -//! * the [constant-work pattern][1], which basically suggests that a system can -//! be more robust and scalable if it's constructed in a way that always does -//! the same amount of work. Imagine if we made requests to the DNS servers -//! to incrementally update their config every time the DNS data changed. -//! This system does more work as users make more requests. During overloads, -//! things can fall over. Compare with a system whose frontend merely updates -//! the DNS configuration that _should_ exist and whose backend periodically -//! scans the complete intended state and then sets its own state accordingly. -//! The backend does the same amount of work no matter how many requests were -//! made, making it more resistant to overload. A big downside of this -//! approach is increased latency from the user making a request to seeing it -//! applied. This can be mitigated (sacrificing some, but not all, of the -//! "constant work" property) by triggering a backend scan operation when user -//! requests complete. -//! * the design pattern in distributed systems of keeping two copies of data in -//! sync using both event notifications (like a changelog) _and_ periodic full -//! scans. The hope is that a full scan never finds a change that wasn't -//! correctly sync'd, but incorporating an occasional full scan into the -//! design ensures that such bugs are found and their impact repaired -//! automatically. -//! -//! [1]: https://aws.amazon.com/builders-library/reliability-and-constant-work/ -//! -//! Combining these, we get a design pattern for a "reconciler" where: -//! -//! * The reconciler is activated by explicit request (when we know it has work -//! to do) _and_ periodically (to deal with all manner of transient failures) -//! * The reconciler's activity is idempotent: given the same underlying state -//! (e.g., database state), it always attempts to do the same thing. -//! * Each activation of the reconciler accepts no input. That is, even when we -//! think we know what changed, we do not use that information. This ensures -//! that the reconciler really is idempotent and its actions are based solely -//! on the state that it's watching. Put differently: having reconcilers -//! accept an explicit hint about what changed (and then doing something -//! differently based on that) bifurcates the code: there's the common case -//! where that hint is available and the rarely-exercised case when it's not -//! (e.g., because Nexus crashed and it's the subsequent periodic activation -//! that's propagating this change). This is what we're trying to avoid. -//! * We do allow reconcilers to be triggered by a `tokio::sync::watch` channel -//! -- but again, not using the _data_ from that channel. There are two big -//! advantages here: (1) reduced latency from when a change is made to when -//! the reconciler applies it, and (2) (arguably another way to say the same -//! thing) we can space out the periodic activations much further, knowing -//! that most of the time we're not increasing latency by doing this. This -//! compromises the "constant-work" pattern a bit: we might wind up running -//! the reconciler more often during busy times than during idle times, and we -//! could find that overloads something. However, the _operation_ of the -//! reconciler can still be constant work, and there's no more than that -//! amount of work going on at any given time. -//! -//! `watch` channels are a convenient primitive here because they only store -//! one value. With a little care, we can ensure that the writer never blocks -//! and the readers can all see the latest value. (By design, reconcilers -//! generally only care about the latest state of something, not any -//! intermediate states.) We don't have to worry about an unbounded queue, or -//! handling a full queue, or other forms of backpressure. +//! Manages execution of background tasks +use super::BackgroundTask; +use super::TaskHandle; use assert_matches::assert_matches; use chrono::Utc; use futures::future::BoxFuture; @@ -149,16 +27,6 @@ use tokio::sync::watch; use tokio::sync::Notify; use tokio::time::MissedTickBehavior; -/// An operation activated both periodically and by an explicit signal -/// -/// See module-level documentation for details. -pub trait BackgroundTask: Send + Sync { - fn activate<'a>( - &'a mut self, - opctx: &'a OpContext, - ) -> BoxFuture<'a, serde_json::Value>; -} - /// Drives the execution of background tasks /// /// Nexus has only one Driver. All background tasks are registered with the @@ -170,21 +38,6 @@ pub struct Driver { tasks: BTreeMap, } -/// Identifies a background task -/// -/// This is returned by [`Driver::register()`] to identify the corresponding -/// background task. It's then accepted by functions like -/// [`Driver::activate()`] and [`Driver::task_status()`] to identify the task. -#[derive(Clone, Debug, Ord, PartialOrd, PartialEq, Eq)] -pub struct TaskHandle(String); - -impl TaskHandle { - /// Returns the unique name of this background task - pub fn name(&self) -> &str { - &self.0 - } -} - /// Driver-side state of a background task struct Task { /// what this task does (for developers) @@ -466,7 +319,6 @@ impl GenericWatcher for watch::Receiver { mod test { use super::BackgroundTask; use super::Driver; - use crate::app::background::common::ActivationReason; use crate::app::sagas::SagaRequest; use assert_matches::assert_matches; use chrono::Utc; @@ -474,6 +326,7 @@ mod test { use futures::FutureExt; use nexus_db_queries::context::OpContext; use nexus_test_utils_macros::nexus_test; + use nexus_types::internal_api::views::ActivationReason; use std::time::Duration; use std::time::Instant; use tokio::sync::mpsc; diff --git a/nexus/src/app/background/init.rs b/nexus/src/app/background/init.rs index 1fc1e858b7..a32f60c2b6 100644 --- a/nexus/src/app/background/init.rs +++ b/nexus/src/app/background/init.rs @@ -2,30 +2,31 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -//! Background task initialization - -use super::abandoned_vmm_reaper; -use super::bfd; -use super::blueprint_execution; -use super::blueprint_load; -use super::common; -use super::crdb_node_id_collector; -use super::dns_config; -use super::dns_propagation; -use super::dns_servers; -use super::external_endpoints; -use super::instance_watcher; -use super::inventory_collection; -use super::metrics_producer_gc; -use super::nat_cleanup; -use super::phantom_disks; -use super::physical_disk_adoption; -use super::region_replacement; -use super::region_replacement_driver; -use super::service_firewall_rules; -use super::sync_service_zone_nat::ServiceZoneNatTracker; -use super::sync_switch_configuration::SwitchPortSettingsManager; -use super::v2p_mappings::V2PManager; +//! Specific background task initialization + +use super::tasks::abandoned_vmm_reaper; +use super::tasks::bfd; +use super::tasks::blueprint_execution; +use super::tasks::blueprint_load; +use super::tasks::crdb_node_id_collector; +use super::tasks::dns_config; +use super::tasks::dns_propagation; +use super::tasks::dns_servers; +use super::tasks::external_endpoints; +use super::tasks::instance_watcher; +use super::tasks::inventory_collection; +use super::tasks::metrics_producer_gc; +use super::tasks::nat_cleanup; +use super::tasks::phantom_disks; +use super::tasks::physical_disk_adoption; +use super::tasks::region_replacement; +use super::tasks::region_replacement_driver; +use super::tasks::service_firewall_rules; +use super::tasks::sync_service_zone_nat::ServiceZoneNatTracker; +use super::tasks::sync_switch_configuration::SwitchPortSettingsManager; +use super::tasks::v2p_mappings::V2PManager; +use super::Driver; +use super::TaskHandle; use crate::app::oximeter::PRODUCER_LEASE_DURATION; use crate::app::sagas::SagaRequest; use nexus_config::BackgroundTaskConfig; @@ -47,76 +48,76 @@ use uuid::Uuid; pub struct BackgroundTasks { /// interface for working with background tasks (activation, checking /// status, etc.) - pub driver: common::Driver, + pub driver: Driver, /// task handle for the internal DNS config background task - pub task_internal_dns_config: common::TaskHandle, + pub task_internal_dns_config: TaskHandle, /// task handle for the internal DNS servers background task - pub task_internal_dns_servers: common::TaskHandle, + pub task_internal_dns_servers: TaskHandle, /// task handle for the external DNS config background task - pub task_external_dns_config: common::TaskHandle, + pub task_external_dns_config: TaskHandle, /// task handle for the external DNS servers background task - pub task_external_dns_servers: common::TaskHandle, + pub task_external_dns_servers: TaskHandle, /// task handle for pruning metrics producers with expired leases - pub task_metrics_producer_gc: common::TaskHandle, + pub task_metrics_producer_gc: TaskHandle, /// task handle for the task that keeps track of external endpoints - pub task_external_endpoints: common::TaskHandle, + pub task_external_endpoints: TaskHandle, /// external endpoints read by the background task pub external_endpoints: tokio::sync::watch::Receiver< Option, >, /// task handle for the ipv4 nat entry garbage collector - pub nat_cleanup: common::TaskHandle, + pub nat_cleanup: TaskHandle, /// task handle for the switch bfd manager - pub bfd_manager: common::TaskHandle, + pub bfd_manager: TaskHandle, /// task handle for the task that collects inventory - pub task_inventory_collection: common::TaskHandle, + pub task_inventory_collection: TaskHandle, /// task handle for the task that collects inventory - pub task_physical_disk_adoption: common::TaskHandle, + pub task_physical_disk_adoption: TaskHandle, /// task handle for the task that detects phantom disks - pub task_phantom_disks: common::TaskHandle, + pub task_phantom_disks: TaskHandle, /// task handle for blueprint target loader - pub task_blueprint_loader: common::TaskHandle, + pub task_blueprint_loader: TaskHandle, /// task handle for blueprint execution background task - pub task_blueprint_executor: common::TaskHandle, + pub task_blueprint_executor: TaskHandle, /// task handle for collecting CockroachDB node IDs - pub task_crdb_node_id_collector: common::TaskHandle, + pub task_crdb_node_id_collector: TaskHandle, /// task handle for the service zone nat tracker - pub task_service_zone_nat_tracker: common::TaskHandle, + pub task_service_zone_nat_tracker: TaskHandle, /// task handle for the switch port settings manager - pub task_switch_port_settings_manager: common::TaskHandle, + pub task_switch_port_settings_manager: TaskHandle, /// task handle for the opte v2p manager - pub task_v2p_manager: common::TaskHandle, + pub task_v2p_manager: TaskHandle, /// task handle for the task that detects if regions need replacement and /// begins the process - pub task_region_replacement: common::TaskHandle, + pub task_region_replacement: TaskHandle, /// task handle for the task that drives region replacements forward - pub task_region_replacement_driver: common::TaskHandle, + pub task_region_replacement_driver: TaskHandle, /// task handle for the task that polls sled agents for instance states. - pub task_instance_watcher: common::TaskHandle, + pub task_instance_watcher: TaskHandle, /// task handle for propagation of VPC firewall rules for Omicron services /// with external network connectivity, - pub task_service_firewall_propagation: common::TaskHandle, + pub task_service_firewall_propagation: TaskHandle, /// task handle for deletion of database records for VMMs abandoned by their /// instances. - pub task_abandoned_vmm_reaper: common::TaskHandle, + pub task_abandoned_vmm_reaper: TaskHandle, } impl BackgroundTasks { @@ -136,7 +137,7 @@ impl BackgroundTasks { ), producer_registry: &ProducerRegistry, ) -> BackgroundTasks { - let mut driver = common::Driver::new(); + let mut driver = Driver::new(); let (task_internal_dns_config, task_internal_dns_servers) = init_dns( &mut driver, @@ -494,19 +495,19 @@ impl BackgroundTasks { } } - pub fn activate(&self, task: &common::TaskHandle) { + pub fn activate(&self, task: &TaskHandle) { self.driver.activate(task); } } fn init_dns( - driver: &mut common::Driver, + driver: &mut Driver, opctx: &OpContext, datastore: Arc, dns_group: DnsGroup, resolver: internal_dns::resolver::Resolver, config: &DnsTasksConfig, -) -> (common::TaskHandle, common::TaskHandle) { +) -> (TaskHandle, TaskHandle) { let dns_group_name = dns_group.to_string(); let metadata = BTreeMap::from([("dns_group".to_string(), dns_group_name)]); diff --git a/nexus/src/app/background/mod.rs b/nexus/src/app/background/mod.rs index 4d5a63e69f..40716aa036 100644 --- a/nexus/src/app/background/mod.rs +++ b/nexus/src/app/background/mod.rs @@ -2,32 +2,164 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -//! Background tasks +//! # Nexus Background Tasks +//! +//! A **background task** in Nexus is any operation that can be activated both +//! periodically and by an explicit signal. This is aimed at RFD 373-style +//! "reliable persistent workflows", also called "reconcilers" or "controllers". +//! These are a kind of automation that examines some _current_ state, compares +//! it to some _intended_ state, and potentially takes action to try to bring +//! the current state in sync with the intended state. Our canonical example is +//! that we want to have Nexus monitor the intended DNS configuration. When it +//! changes, we want to propagate the new configuration to all DNS servers. We +//! implement this with three different background tasks: +//! +//! 1. `DnsConfigWatcher` reads the DNS configuration from the database, stores +//! it in memory, and makes it available via a `tokio::sync::watch` channel. +//! 2. `DnsServersWatcher` reads the list of DNS servers from the database, +//! stores it in memory, and makes it available via a `tokio::sync::watch` +//! channel. +//! 3. `DnsPropagator` uses the the watch channels provided by the other two +//! background tasks to notice when either the DNS configuration or the list +//! of DNS servers has changed. It uses the latest values to make a request +//! to each server to update its configuration. +//! +//! When Nexus changes the DNS configuration, it will update the database with +//! the new configuration and then explicitly activate the `DnsConfigWatcher`. +//! When it reads the new config, it will send it to its watch channel, and that +//! will activate the `DnsPropagator`. If any of this fails, or if Nexus +//! crashes at any point, then the periodic activation of every background task +//! will eventually cause the latest config to be propagated to all of the +//! current servers. +//! +//! The background task framework here is pretty minimal: essentially what it +//! gives you is that you just write an idempotent function that you want to +//! happen periodically or on-demand, wrap it in an impl of `BackgroundTask`, +//! register that with the `Driver`, and you're done. The framework will take +//! care of: +//! +//! * providing a way for Nexus at-large to activate your task +//! * activating your task periodically +//! * ensuring that the task is activated only once at a time in this Nexus +//! (but note that it may always be running concurrently in other Nexus +//! instances) +//! * providing basic visibility into whether the task is running, when the task +//! last ran, etc. +//! +//! We may well want to extend the framework as we build more tasks in general +//! and reconcilers specifically. But we should be mindful not to create +//! footguns for ourselves! See "Design notes" below. +//! +//! ## Notes for background task implementors +//! +//! Background tasks are not necessarily just for reconcilers. That's just the +//! design center. The first two DNS background tasks above aren't reconcilers +//! in any non-trivial sense. +//! +//! Background task activations do not accept input, by design. See "Design +//! notes" below. +//! +//! Generally, you probably don't want to have your background task do retries. +//! If things fail, you rely on the periodic reactivation to try again. +//! +//! ## Design notes +//! +//! The underlying design for RFD 373-style reconcilers is inspired by a few +//! related principles: +//! +//! * the principle in distributed systems of having exactly one code path to +//! achieve a thing, and then always using that path to do that thing (as +//! opposed to having separate paths for, say, the happy path vs. failover, +//! and having one of those paths rarely used) +//! * the [constant-work pattern][1], which basically suggests that a system can +//! be more robust and scalable if it's constructed in a way that always does +//! the same amount of work. Imagine if we made requests to the DNS servers +//! to incrementally update their config every time the DNS data changed. +//! This system does more work as users make more requests. During overloads, +//! things can fall over. Compare with a system whose frontend merely updates +//! the DNS configuration that _should_ exist and whose backend periodically +//! scans the complete intended state and then sets its own state accordingly. +//! The backend does the same amount of work no matter how many requests were +//! made, making it more resistant to overload. A big downside of this +//! approach is increased latency from the user making a request to seeing it +//! applied. This can be mitigated (sacrificing some, but not all, of the +//! "constant work" property) by triggering a backend scan operation when user +//! requests complete. +//! * the design pattern in distributed systems of keeping two copies of data in +//! sync using both event notifications (like a changelog) _and_ periodic full +//! scans. The hope is that a full scan never finds a change that wasn't +//! correctly sync'd, but incorporating an occasional full scan into the +//! design ensures that such bugs are found and their impact repaired +//! automatically. +//! +//! [1]: https://aws.amazon.com/builders-library/reliability-and-constant-work/ +//! +//! Combining these, we get a design pattern for a "reconciler" where: +//! +//! * The reconciler is activated by explicit request (when we know it has work +//! to do) _and_ periodically (to deal with all manner of transient failures) +//! * The reconciler's activity is idempotent: given the same underlying state +//! (e.g., database state), it always attempts to do the same thing. +//! * Each activation of the reconciler accepts no input. That is, even when we +//! think we know what changed, we do not use that information. This ensures +//! that the reconciler really is idempotent and its actions are based solely +//! on the state that it's watching. Put differently: having reconcilers +//! accept an explicit hint about what changed (and then doing something +//! differently based on that) bifurcates the code: there's the common case +//! where that hint is available and the rarely-exercised case when it's not +//! (e.g., because Nexus crashed and it's the subsequent periodic activation +//! that's propagating this change). This is what we're trying to avoid. +//! * We do allow reconcilers to be triggered by a `tokio::sync::watch` channel +//! -- but again, not using the _data_ from that channel. There are two big +//! advantages here: (1) reduced latency from when a change is made to when +//! the reconciler applies it, and (2) (arguably another way to say the same +//! thing) we can space out the periodic activations much further, knowing +//! that most of the time we're not increasing latency by doing this. This +//! compromises the "constant-work" pattern a bit: we might wind up running +//! the reconciler more often during busy times than during idle times, and we +//! could find that overloads something. However, the _operation_ of the +//! reconciler can still be constant work, and there's no more than that +//! amount of work going on at any given time. +//! +//! `watch` channels are a convenient primitive here because they only store +//! one value. With a little care, we can ensure that the writer never blocks +//! and the readers can all see the latest value. (By design, reconcilers +//! generally only care about the latest state of something, not any +//! intermediate states.) We don't have to worry about an unbounded queue, or +//! handling a full queue, or other forms of backpressure. -mod abandoned_vmm_reaper; -mod bfd; -mod blueprint_execution; -mod blueprint_load; -mod common; -mod crdb_node_id_collector; -mod dns_config; -mod dns_propagation; -mod dns_servers; -mod external_endpoints; +mod driver; mod init; -mod instance_watcher; -mod inventory_collection; -mod metrics_producer_gc; -mod nat_cleanup; -mod networking; -mod phantom_disks; -mod physical_disk_adoption; -mod region_replacement; -mod region_replacement_driver; -mod service_firewall_rules; mod status; -mod sync_service_zone_nat; -mod sync_switch_configuration; -mod v2p_mappings; +mod tasks; +pub use driver::Driver; pub use init::BackgroundTasks; + +use futures::future::BoxFuture; +use nexus_auth::context::OpContext; + +/// An operation activated both periodically and by an explicit signal +/// +/// See module-level documentation for details. +pub trait BackgroundTask: Send + Sync { + fn activate<'a>( + &'a mut self, + opctx: &'a OpContext, + ) -> BoxFuture<'a, serde_json::Value>; +} + +/// Identifies a background task +/// +/// This is returned by [`Driver::register()`] to identify the corresponding +/// background task. It's then accepted by functions like +/// [`Driver::activate()`] and [`Driver::task_status()`] to identify the task. +#[derive(Clone, Debug, Ord, PartialOrd, PartialEq, Eq)] +pub struct TaskHandle(String); + +impl TaskHandle { + /// Returns the unique name of this background task + pub fn name(&self) -> &str { + &self.0 + } +} diff --git a/nexus/src/app/background/abandoned_vmm_reaper.rs b/nexus/src/app/background/tasks/abandoned_vmm_reaper.rs similarity index 99% rename from nexus/src/app/background/abandoned_vmm_reaper.rs rename to nexus/src/app/background/tasks/abandoned_vmm_reaper.rs index 3883185d9f..bd9a7b1602 100644 --- a/nexus/src/app/background/abandoned_vmm_reaper.rs +++ b/nexus/src/app/background/tasks/abandoned_vmm_reaper.rs @@ -31,7 +31,7 @@ //! is handled elsewhere, by `notify_instance_updated` and (eventually) the //! `instance-update` saga. -use super::common::BackgroundTask; +use crate::app::background::BackgroundTask; use anyhow::Context; use futures::future::BoxFuture; use futures::FutureExt; diff --git a/nexus/src/app/background/bfd.rs b/nexus/src/app/background/tasks/bfd.rs similarity index 98% rename from nexus/src/app/background/bfd.rs rename to nexus/src/app/background/tasks/bfd.rs index 39b3c8f661..67b15ee3d3 100644 --- a/nexus/src/app/background/bfd.rs +++ b/nexus/src/app/background/tasks/bfd.rs @@ -6,10 +6,10 @@ //! (BFD) sessions. use crate::app::{ - background::networking::build_mgd_clients, map_switch_zone_addrs, + background::tasks::networking::build_mgd_clients, map_switch_zone_addrs, }; -use super::common::BackgroundTask; +use crate::app::background::BackgroundTask; use futures::future::BoxFuture; use futures::FutureExt; use internal_dns::{resolver::Resolver, ServiceName}; diff --git a/nexus/src/app/background/blueprint_execution.rs b/nexus/src/app/background/tasks/blueprint_execution.rs similarity index 99% rename from nexus/src/app/background/blueprint_execution.rs rename to nexus/src/app/background/tasks/blueprint_execution.rs index b01d1213de..253a89a18d 100644 --- a/nexus/src/app/background/blueprint_execution.rs +++ b/nexus/src/app/background/tasks/blueprint_execution.rs @@ -4,7 +4,7 @@ //! Background task for realizing a plan blueprint -use super::common::BackgroundTask; +use crate::app::background::BackgroundTask; use futures::future::BoxFuture; use futures::FutureExt; use nexus_db_queries::context::OpContext; @@ -111,7 +111,7 @@ impl BackgroundTask for BlueprintExecutor { #[cfg(test)] mod test { use super::BlueprintExecutor; - use crate::app::background::common::BackgroundTask; + use crate::app::background::BackgroundTask; use httptest::matchers::{all_of, request}; use httptest::responders::status_code; use httptest::Expectation; diff --git a/nexus/src/app/background/blueprint_load.rs b/nexus/src/app/background/tasks/blueprint_load.rs similarity index 99% rename from nexus/src/app/background/blueprint_load.rs rename to nexus/src/app/background/tasks/blueprint_load.rs index baf86d655f..38cfe85558 100644 --- a/nexus/src/app/background/blueprint_load.rs +++ b/nexus/src/app/background/tasks/blueprint_load.rs @@ -7,7 +7,7 @@ //! This task triggers the `blueprint_execution` background task when the //! blueprint changes. -use super::common::BackgroundTask; +use crate::app::background::BackgroundTask; use futures::future::BoxFuture; use futures::FutureExt; use nexus_db_queries::context::OpContext; @@ -185,7 +185,7 @@ impl BackgroundTask for TargetBlueprintLoader { #[cfg(test)] mod test { use super::*; - use crate::app::background::common::BackgroundTask; + use crate::app::background::BackgroundTask; use nexus_inventory::now_db_precision; use nexus_test_utils_macros::nexus_test; use nexus_types::deployment::{ diff --git a/nexus/src/app/background/crdb_node_id_collector.rs b/nexus/src/app/background/tasks/crdb_node_id_collector.rs similarity index 99% rename from nexus/src/app/background/crdb_node_id_collector.rs rename to nexus/src/app/background/tasks/crdb_node_id_collector.rs index 2736514021..2a0e1c6d3d 100644 --- a/nexus/src/app/background/crdb_node_id_collector.rs +++ b/nexus/src/app/background/tasks/crdb_node_id_collector.rs @@ -23,7 +23,7 @@ //! the status of all nodes and looking for orphans, perhaps) to determine //! whether a zone without a known node ID ever existed. -use super::common::BackgroundTask; +use crate::app::background::BackgroundTask; use anyhow::ensure; use anyhow::Context; use futures::future::BoxFuture; diff --git a/nexus/src/app/background/dns_config.rs b/nexus/src/app/background/tasks/dns_config.rs similarity index 98% rename from nexus/src/app/background/dns_config.rs rename to nexus/src/app/background/tasks/dns_config.rs index 71e0a812a7..4cf8c4d86d 100644 --- a/nexus/src/app/background/dns_config.rs +++ b/nexus/src/app/background/tasks/dns_config.rs @@ -4,7 +4,7 @@ //! Background task for keeping track of DNS configuration -use super::common::BackgroundTask; +use crate::app::background::BackgroundTask; use dns_service_client::types::DnsConfigParams; use futures::future::BoxFuture; use futures::FutureExt; @@ -157,9 +157,9 @@ impl BackgroundTask for DnsConfigWatcher { #[cfg(test)] mod test { - use crate::app::background::common::BackgroundTask; - use crate::app::background::dns_config::DnsConfigWatcher; + use super::DnsConfigWatcher; use crate::app::background::init::test::write_test_dns_generation; + use crate::app::background::BackgroundTask; use assert_matches::assert_matches; use async_bb8_diesel::AsyncRunQueryDsl; use async_bb8_diesel::AsyncSimpleConnection; diff --git a/nexus/src/app/background/dns_propagation.rs b/nexus/src/app/background/tasks/dns_propagation.rs similarity index 98% rename from nexus/src/app/background/dns_propagation.rs rename to nexus/src/app/background/tasks/dns_propagation.rs index 7d650f6f27..c680a6f010 100644 --- a/nexus/src/app/background/dns_propagation.rs +++ b/nexus/src/app/background/tasks/dns_propagation.rs @@ -4,8 +4,8 @@ //! Background task for propagating DNS configuration to all DNS servers -use super::common::BackgroundTask; use super::dns_servers::DnsServersList; +use crate::app::background::BackgroundTask; use anyhow::Context; use dns_service_client::types::DnsConfigParams; use futures::future::BoxFuture; @@ -177,9 +177,9 @@ async fn dns_propagate_one( #[cfg(test)] mod test { - use crate::app::background::common::BackgroundTask; - use crate::app::background::dns_propagation::DnsPropagator; - use crate::app::background::dns_servers::DnsServersList; + use super::DnsPropagator; + use crate::app::background::tasks::dns_servers::DnsServersList; + use crate::app::background::BackgroundTask; use dns_service_client::types::DnsConfigParams; use httptest::matchers::request; use httptest::responders::status_code; diff --git a/nexus/src/app/background/dns_servers.rs b/nexus/src/app/background/tasks/dns_servers.rs similarity index 99% rename from nexus/src/app/background/dns_servers.rs rename to nexus/src/app/background/tasks/dns_servers.rs index 8f4cce4ee0..9d99460917 100644 --- a/nexus/src/app/background/dns_servers.rs +++ b/nexus/src/app/background/tasks/dns_servers.rs @@ -4,7 +4,7 @@ //! Background task for keeping track of DNS servers -use super::common::BackgroundTask; +use crate::app::background::BackgroundTask; use futures::future::BoxFuture; use futures::FutureExt; use internal_dns::names::ServiceName; diff --git a/nexus/src/app/background/external_endpoints.rs b/nexus/src/app/background/tasks/external_endpoints.rs similarity index 97% rename from nexus/src/app/background/external_endpoints.rs rename to nexus/src/app/background/tasks/external_endpoints.rs index 1a587298d5..0ff1e06a46 100644 --- a/nexus/src/app/background/external_endpoints.rs +++ b/nexus/src/app/background/tasks/external_endpoints.rs @@ -6,7 +6,7 @@ //! all Silos, their externally-visible DNS names, and the TLS certificates //! associated with those names -use super::common::BackgroundTask; +use crate::app::background::BackgroundTask; use crate::app::external_endpoints::read_all_endpoints; pub use crate::app::external_endpoints::ExternalEndpoints; use futures::future::BoxFuture; @@ -117,8 +117,8 @@ impl BackgroundTask for ExternalEndpointsWatcher { #[cfg(test)] mod test { - use crate::app::background::common::BackgroundTask; - use crate::app::background::external_endpoints::ExternalEndpointsWatcher; + use super::ExternalEndpointsWatcher; + use crate::app::background::BackgroundTask; use nexus_db_queries::context::OpContext; use nexus_db_queries::db::fixed_data::silo::DEFAULT_SILO; use nexus_test_utils::resource_helpers::create_silo; diff --git a/nexus/src/app/background/instance_watcher.rs b/nexus/src/app/background/tasks/instance_watcher.rs similarity index 99% rename from nexus/src/app/background/instance_watcher.rs rename to nexus/src/app/background/tasks/instance_watcher.rs index 1b10605c5e..a6e579eb8a 100644 --- a/nexus/src/app/background/instance_watcher.rs +++ b/nexus/src/app/background/tasks/instance_watcher.rs @@ -4,7 +4,7 @@ //! Background task for pulling instance state from sled-agents. -use super::common::BackgroundTask; +use crate::app::background::BackgroundTask; use futures::{future::BoxFuture, FutureExt}; use http::StatusCode; use nexus_db_model::Instance; diff --git a/nexus/src/app/background/inventory_collection.rs b/nexus/src/app/background/tasks/inventory_collection.rs similarity index 98% rename from nexus/src/app/background/inventory_collection.rs rename to nexus/src/app/background/tasks/inventory_collection.rs index 52ee8f6e13..95268334d9 100644 --- a/nexus/src/app/background/inventory_collection.rs +++ b/nexus/src/app/background/tasks/inventory_collection.rs @@ -4,7 +4,7 @@ //! Background task for reading inventory for the rack -use super::common::BackgroundTask; +use crate::app::background::BackgroundTask; use anyhow::ensure; use anyhow::Context; use futures::future::BoxFuture; @@ -186,10 +186,10 @@ impl<'a> nexus_inventory::SledAgentEnumerator for DbSledAgentEnumerator<'a> { #[cfg(test)] mod test { + use super::DbSledAgentEnumerator; + use super::InventoryCollector; use crate::app::authz; - use crate::app::background::common::BackgroundTask; - use crate::app::background::inventory_collection::DbSledAgentEnumerator; - use crate::app::background::inventory_collection::InventoryCollector; + use crate::app::background::BackgroundTask; use nexus_db_model::Generation; use nexus_db_model::SledBaseboard; use nexus_db_model::SledSystemHardware; diff --git a/nexus/src/app/background/metrics_producer_gc.rs b/nexus/src/app/background/tasks/metrics_producer_gc.rs similarity index 99% rename from nexus/src/app/background/metrics_producer_gc.rs rename to nexus/src/app/background/tasks/metrics_producer_gc.rs index 2a8464b80f..8c2c271cfb 100644 --- a/nexus/src/app/background/metrics_producer_gc.rs +++ b/nexus/src/app/background/tasks/metrics_producer_gc.rs @@ -5,7 +5,7 @@ //! Background task for garbage collecting metrics producers that have not //! renewed their lease -use super::common::BackgroundTask; +use crate::app::background::BackgroundTask; use chrono::TimeDelta; use chrono::Utc; use futures::future::BoxFuture; diff --git a/nexus/src/app/background/tasks/mod.rs b/nexus/src/app/background/tasks/mod.rs new file mode 100644 index 0000000000..3886b43a30 --- /dev/null +++ b/nexus/src/app/background/tasks/mod.rs @@ -0,0 +1,28 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Implementations of specific background tasks + +pub mod abandoned_vmm_reaper; +pub mod bfd; +pub mod blueprint_execution; +pub mod blueprint_load; +pub mod crdb_node_id_collector; +pub mod dns_config; +pub mod dns_propagation; +pub mod dns_servers; +pub mod external_endpoints; +pub mod instance_watcher; +pub mod inventory_collection; +pub mod metrics_producer_gc; +pub mod nat_cleanup; +pub mod networking; +pub mod phantom_disks; +pub mod physical_disk_adoption; +pub mod region_replacement; +pub mod region_replacement_driver; +pub mod service_firewall_rules; +pub mod sync_service_zone_nat; +pub mod sync_switch_configuration; +pub mod v2p_mappings; diff --git a/nexus/src/app/background/nat_cleanup.rs b/nexus/src/app/background/tasks/nat_cleanup.rs similarity index 99% rename from nexus/src/app/background/nat_cleanup.rs rename to nexus/src/app/background/tasks/nat_cleanup.rs index 844dbffefe..675f4fc809 100644 --- a/nexus/src/app/background/nat_cleanup.rs +++ b/nexus/src/app/background/tasks/nat_cleanup.rs @@ -8,8 +8,8 @@ use crate::app::map_switch_zone_addrs; -use super::common::BackgroundTask; use super::networking::build_dpd_clients; +use crate::app::background::BackgroundTask; use chrono::{Duration, Utc}; use futures::future::BoxFuture; use futures::FutureExt; diff --git a/nexus/src/app/background/networking.rs b/nexus/src/app/background/tasks/networking.rs similarity index 100% rename from nexus/src/app/background/networking.rs rename to nexus/src/app/background/tasks/networking.rs diff --git a/nexus/src/app/background/phantom_disks.rs b/nexus/src/app/background/tasks/phantom_disks.rs similarity index 98% rename from nexus/src/app/background/phantom_disks.rs rename to nexus/src/app/background/tasks/phantom_disks.rs index 48688838e5..eac37eee19 100644 --- a/nexus/src/app/background/phantom_disks.rs +++ b/nexus/src/app/background/tasks/phantom_disks.rs @@ -18,7 +18,7 @@ //! this background task is required to apply the same fix for disks that are //! already in this phantom state. -use super::common::BackgroundTask; +use crate::app::background::BackgroundTask; use futures::future::BoxFuture; use futures::FutureExt; use nexus_db_queries::context::OpContext; diff --git a/nexus/src/app/background/physical_disk_adoption.rs b/nexus/src/app/background/tasks/physical_disk_adoption.rs similarity index 99% rename from nexus/src/app/background/physical_disk_adoption.rs rename to nexus/src/app/background/tasks/physical_disk_adoption.rs index 05c53963de..f3b9e8ac62 100644 --- a/nexus/src/app/background/physical_disk_adoption.rs +++ b/nexus/src/app/background/tasks/physical_disk_adoption.rs @@ -11,7 +11,7 @@ //! //! In the future, this may become more explicitly operator-controlled. -use super::common::BackgroundTask; +use crate::app::background::BackgroundTask; use futures::future::BoxFuture; use futures::FutureExt; use nexus_db_model::PhysicalDisk; diff --git a/nexus/src/app/background/region_replacement.rs b/nexus/src/app/background/tasks/region_replacement.rs similarity index 99% rename from nexus/src/app/background/region_replacement.rs rename to nexus/src/app/background/tasks/region_replacement.rs index 02ae548d75..635b956193 100644 --- a/nexus/src/app/background/region_replacement.rs +++ b/nexus/src/app/background/tasks/region_replacement.rs @@ -10,8 +10,8 @@ //! for any requests that are in state "Requested". See the documentation there //! for more information. -use super::common::BackgroundTask; use crate::app::authn; +use crate::app::background::BackgroundTask; use crate::app::sagas; use crate::app::RegionAllocationStrategy; use futures::future::BoxFuture; diff --git a/nexus/src/app/background/region_replacement_driver.rs b/nexus/src/app/background/tasks/region_replacement_driver.rs similarity index 99% rename from nexus/src/app/background/region_replacement_driver.rs rename to nexus/src/app/background/tasks/region_replacement_driver.rs index a02fa4e2e4..06155ffa24 100644 --- a/nexus/src/app/background/region_replacement_driver.rs +++ b/nexus/src/app/background/tasks/region_replacement_driver.rs @@ -18,8 +18,8 @@ //! Basically, keep starting either repair or reconcilation until they complete //! successfully, then "finish" the region replacement. -use super::common::BackgroundTask; use crate::app::authn; +use crate::app::background::BackgroundTask; use crate::app::sagas; use futures::future::BoxFuture; use futures::FutureExt; diff --git a/nexus/src/app/background/service_firewall_rules.rs b/nexus/src/app/background/tasks/service_firewall_rules.rs similarity index 98% rename from nexus/src/app/background/service_firewall_rules.rs rename to nexus/src/app/background/tasks/service_firewall_rules.rs index 1a705d1fae..230172dab8 100644 --- a/nexus/src/app/background/service_firewall_rules.rs +++ b/nexus/src/app/background/tasks/service_firewall_rules.rs @@ -10,7 +10,7 @@ //! handle general changes to customer-visible VPC firewalls, and is mostly in //! place to propagate changes in the IP allowlist for user-facing services. -use super::common::BackgroundTask; +use crate::app::background::BackgroundTask; use futures::future::BoxFuture; use futures::FutureExt; use nexus_db_queries::context::OpContext; diff --git a/nexus/src/app/background/sync_service_zone_nat.rs b/nexus/src/app/background/tasks/sync_service_zone_nat.rs similarity index 99% rename from nexus/src/app/background/sync_service_zone_nat.rs rename to nexus/src/app/background/tasks/sync_service_zone_nat.rs index b0a4c8cef2..59cd6a6a79 100644 --- a/nexus/src/app/background/sync_service_zone_nat.rs +++ b/nexus/src/app/background/tasks/sync_service_zone_nat.rs @@ -7,8 +7,8 @@ use crate::app::map_switch_zone_addrs; -use super::common::BackgroundTask; use super::networking::build_dpd_clients; +use crate::app::background::BackgroundTask; use anyhow::Context; use futures::future::BoxFuture; use futures::FutureExt; diff --git a/nexus/src/app/background/sync_switch_configuration.rs b/nexus/src/app/background/tasks/sync_switch_configuration.rs similarity index 99% rename from nexus/src/app/background/sync_switch_configuration.rs rename to nexus/src/app/background/tasks/sync_switch_configuration.rs index 8552d62988..ed13545d3b 100644 --- a/nexus/src/app/background/sync_switch_configuration.rs +++ b/nexus/src/app/background/tasks/sync_switch_configuration.rs @@ -6,7 +6,7 @@ //! to relevant management daemons (dendrite, mgd, sled-agent, etc.) use crate::app::{ - background::networking::{ + background::tasks::networking::{ api_to_dpd_port_settings, build_dpd_clients, build_mgd_clients, }, map_switch_zone_addrs, @@ -23,7 +23,7 @@ use nexus_db_model::{ }; use uuid::Uuid; -use super::common::BackgroundTask; +use crate::app::background::BackgroundTask; use display_error_chain::DisplayErrorChain; use dpd_client::types::PortId; use futures::future::BoxFuture; diff --git a/nexus/src/app/background/v2p_mappings.rs b/nexus/src/app/background/tasks/v2p_mappings.rs similarity index 99% rename from nexus/src/app/background/v2p_mappings.rs rename to nexus/src/app/background/tasks/v2p_mappings.rs index e2318f94d6..26ce131e9a 100644 --- a/nexus/src/app/background/v2p_mappings.rs +++ b/nexus/src/app/background/tasks/v2p_mappings.rs @@ -12,7 +12,7 @@ use omicron_common::api::external::Vni; use serde_json::json; use sled_agent_client::types::VirtualNetworkInterfaceHost; -use super::common::BackgroundTask; +use crate::app::background::BackgroundTask; pub struct V2PManager { datastore: Arc, From d9e638a791edfd199fd896df45f73d5edf2e6d87 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Wed, 26 Jun 2024 04:18:20 +0000 Subject: [PATCH 06/13] Update taiki-e/install-action digest to 3b0d937 (#5956) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR contains the following updates: | Package | Type | Update | Change | |---|---|---|---| | [taiki-e/install-action](https://togithub.com/taiki-e/install-action) | action | digest | [`c2c0b6e` -> `3b0d937`](https://togithub.com/taiki-e/install-action/compare/c2c0b6e...3b0d937) | --- ### Configuration 📅 **Schedule**: Branch creation - "after 8pm,before 6am" in timezone America/Los_Angeles, Automerge - "after 8pm,before 6am" in timezone America/Los_Angeles. 🚦 **Automerge**: Enabled. ♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. 🔕 **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box --- This PR has been generated by [Renovate Bot](https://togithub.com/renovatebot/renovate). Co-authored-by: oxide-renovate[bot] <146848827+oxide-renovate[bot]@users.noreply.github.com> --- .github/workflows/hakari.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/hakari.yml b/.github/workflows/hakari.yml index 8fde9c3e84..0e6e743db5 100644 --- a/.github/workflows/hakari.yml +++ b/.github/workflows/hakari.yml @@ -24,7 +24,7 @@ jobs: with: toolchain: stable - name: Install cargo-hakari - uses: taiki-e/install-action@c2c0b6efca88e97964ac243a1bce7fa796e0f56f # v2 + uses: taiki-e/install-action@3b0d937160738a7a458c7b13f8b05f08f3f72d80 # v2 with: tool: cargo-hakari - name: Check workspace-hack Cargo.toml is up-to-date From ff0c914753022a057d6443540ec3641b35654461 Mon Sep 17 00:00:00 2001 From: David Pacheco Date: Tue, 25 Jun 2024 22:30:58 -0700 Subject: [PATCH 07/13] wrap too-long strings in background task subsystem (#5934) --- nexus/src/app/background/init.rs | 54 +++++---- .../background/tasks/abandoned_vmm_reaper.rs | 3 +- .../app/background/tasks/blueprint_load.rs | 4 +- nexus/src/app/background/tasks/dns_config.rs | 8 +- .../background/tasks/inventory_collection.rs | 12 +- .../background/tasks/metrics_producer_gc.rs | 2 +- .../src/app/background/tasks/phantom_disks.rs | 3 +- .../background/tasks/region_replacement.rs | 13 +-- .../tasks/service_firewall_rules.rs | 4 +- .../tasks/sync_switch_configuration.rs | 108 +++++++++--------- 10 files changed, 112 insertions(+), 99 deletions(-) diff --git a/nexus/src/app/background/init.rs b/nexus/src/app/background/init.rs index a32f60c2b6..c8e40c80a1 100644 --- a/nexus/src/app/background/init.rs +++ b/nexus/src/app/background/init.rs @@ -165,7 +165,7 @@ impl BackgroundTasks { String::from("metrics_producer_gc"), String::from( "unregisters Oximeter metrics producers that have not \ - renewed their lease", + renewed their lease", ), config.metrics_producer_gc.period_secs, Box::new(gc), @@ -184,8 +184,8 @@ impl BackgroundTasks { String::from("external_endpoints"), String::from( "reads config for silos and TLS certificates to determine \ - the right set of HTTP endpoints, their HTTP server names, \ - and which TLS certificates to use on each one", + the right set of HTTP endpoints, their HTTP server \ + names, and which TLS certificates to use on each one", ), config.external_endpoints.period_secs, Box::new(watcher), @@ -199,13 +199,13 @@ impl BackgroundTasks { driver.register( "nat_v4_garbage_collector".to_string(), String::from( - "prunes soft-deleted IPV4 NAT entries from ipv4_nat_entry table \ - based on a predetermined retention policy", + "prunes soft-deleted IPV4 NAT entries from ipv4_nat_entry \ + table based on a predetermined retention policy", ), config.nat_cleanup.period_secs, Box::new(nat_cleanup::Ipv4NatGarbageCollector::new( datastore.clone(), - resolver.clone() + resolver.clone(), )), opctx.child(BTreeMap::new()), vec![], @@ -217,7 +217,7 @@ impl BackgroundTasks { "bfd_manager".to_string(), String::from( "Manages bidirectional fowarding detection (BFD) \ - configuration on rack switches", + configuration on rack switches", ), config.bfd_manager.period_secs, Box::new(bfd::BfdManager::new( @@ -311,7 +311,7 @@ impl BackgroundTasks { String::from("inventory_collection"), String::from( "collects hardware and software inventory data from the \ - whole system", + whole system", ), config.inventory.period_secs, Box::new(collector), @@ -343,7 +343,8 @@ impl BackgroundTasks { driver.register( "service_zone_nat_tracker".to_string(), String::from( - "ensures service zone nat records are recorded in NAT RPW table", + "ensures service zone nat records are recorded in NAT RPW \ + table", ), config.sync_service_zone_nat.period_secs, Box::new(ServiceZoneNatTracker::new( @@ -390,7 +391,10 @@ impl BackgroundTasks { let task = driver.register( String::from("region_replacement"), - String::from("detects if a region requires replacing and begins the process"), + String::from( + "detects if a region requires replacing and begins the \ + process", + ), config.region_replacement.period_secs, Box::new(detector), opctx.child(BTreeMap::new()), @@ -442,8 +446,8 @@ impl BackgroundTasks { let task_service_firewall_propagation = driver.register( String::from("service_firewall_rule_propagation"), String::from( - "propagates VPC firewall rules for Omicron \ - services with external network connectivity", + "propagates VPC firewall rules for Omicron services with \ + external network connectivity", ), config.service_firewall_propagation.period_secs, Box::new(service_firewall_rules::ServiceRulePropagator::new( @@ -457,12 +461,11 @@ impl BackgroundTasks { let task_abandoned_vmm_reaper = driver.register( String::from("abandoned_vmm_reaper"), String::from( - "deletes sled reservations for VMMs that have been abandoned by their instances", + "deletes sled reservations for VMMs that have been abandoned \ + by their instances", ), config.abandoned_vmm_reaper.period_secs, - Box::new(abandoned_vmm_reaper::AbandonedVmmReaper::new( - datastore, - )), + Box::new(abandoned_vmm_reaper::AbandonedVmmReaper::new(datastore)), opctx.child(BTreeMap::new()), vec![], ); @@ -551,8 +554,8 @@ fn init_dns( format!("dns_propagation_{}", dns_group), format!( "propagates latest {} DNS configuration (from {:?} background \ - task) to the latest list of DNS servers (from {:?} background \ - task)", + task) to the latest list of DNS servers (from {:?} background \ + task)", dns_group, task_name_config, task_name_servers, ), config.period_secs_propagation, @@ -634,7 +637,10 @@ pub mod test { }; match record.get(0) { Some(dns_service_client::types::DnsRecord::Srv(srv)) => srv, - record => panic!("expected a SRV record for {internal_dns_srv_name}, found {record:?}"), + record => panic!( + "expected a SRV record for {internal_dns_srv_name}, found \ + {record:?}" + ), } }; @@ -781,7 +787,7 @@ pub mod test { ) { println!( "waiting for propagation of generation {generation} to {label} \ - DNS server ({addr})", + DNS server ({addr})", ); let client = dns_service_client::Client::new( @@ -812,13 +818,13 @@ pub mod test { .await; if let Err(err) = result { panic!( - "DNS generation {generation} not propagated to \ - {label} DNS server ({addr}) within {poll_max:?}: {err}" + "DNS generation {generation} not propagated to {label} DNS \ + server ({addr}) within {poll_max:?}: {err}" ); } else { println!( - "DNS generation {generation} propagated to {label} \ - DNS server ({addr}) successfully." + "DNS generation {generation} propagated to {label} DNS server \ + ({addr}) successfully." ); } } diff --git a/nexus/src/app/background/tasks/abandoned_vmm_reaper.rs b/nexus/src/app/background/tasks/abandoned_vmm_reaper.rs index bd9a7b1602..a81080ec75 100644 --- a/nexus/src/app/background/tasks/abandoned_vmm_reaper.rs +++ b/nexus/src/app/background/tasks/abandoned_vmm_reaper.rs @@ -135,7 +135,8 @@ impl AbandonedVmmReaper { results.error_count += 1; *last_err = Err(e).with_context(|| { format!( - "failed to delete sled reservation for VMM {vmm_id}" + "failed to delete sled reservation for VMM \ + {vmm_id}" ) }); } diff --git a/nexus/src/app/background/tasks/blueprint_load.rs b/nexus/src/app/background/tasks/blueprint_load.rs index 38cfe85558..31bc00441d 100644 --- a/nexus/src/app/background/tasks/blueprint_load.rs +++ b/nexus/src/app/background/tasks/blueprint_load.rs @@ -127,8 +127,8 @@ impl BackgroundTask for TargetBlueprintLoader { // bugs further up the stack. if *old_blueprint != new_blueprint { let message = format!( - "blueprint for id {} changed. \ - Blueprints are supposed to be immutable.", + "blueprint for id {} changed. Blueprints are supposed \ + to be immutable.", target_id ); error!(&log, "{}", message); diff --git a/nexus/src/app/background/tasks/dns_config.rs b/nexus/src/app/background/tasks/dns_config.rs index 4cf8c4d86d..1b0f627870 100644 --- a/nexus/src/app/background/tasks/dns_config.rs +++ b/nexus/src/app/background/tasks/dns_config.rs @@ -100,8 +100,8 @@ impl BackgroundTask for DnsConfigWatcher { // we just read. This should never happen because we // never remove the latest generation. let message = format!( - "found latest DNS generation ({}) is older \ - than the one we already know about ({})", + "found latest DNS generation ({}) is older than \ + the one we already know about ({})", new.generation, old.generation ); @@ -115,8 +115,8 @@ impl BackgroundTask for DnsConfigWatcher { // immutable once created. let message = format!( "found DNS config at generation {} that does \ - not match the config that we already have for \ - the same generation", + not match the config that we already have \ + for the same generation", new.generation ); error!(&log, "{}", message); diff --git a/nexus/src/app/background/tasks/inventory_collection.rs b/nexus/src/app/background/tasks/inventory_collection.rs index 95268334d9..1e2d3bda1f 100644 --- a/nexus/src/app/background/tasks/inventory_collection.rs +++ b/nexus/src/app/background/tasks/inventory_collection.rs @@ -270,8 +270,8 @@ mod test { // has pushed us out. if our_collections.is_empty() { println!( - "iter {i}: no test collections \ - ({num_collections} Nexus collections)", + "iter {i}: no test collections ({num_collections} Nexus \ + collections)", ); continue; } @@ -285,8 +285,8 @@ mod test { // tail of all IDs we've seen matches the ones we saw in this // iteration (i.e., we're pushing out old collections in order). println!( - "iter {i}: saw {our_collections:?}; \ - should match tail of {all_our_collection_ids:?}" + "iter {i}: saw {our_collections:?}; should match tail of \ + {all_our_collection_ids:?}" ); assert_eq!( all_our_collection_ids @@ -398,8 +398,8 @@ mod test { assert_eq!( removed_urls.len(), 1, - "expected to find exactly one sled URL matching our \ - expunged sled's URL" + "expected to find exactly one sled URL matching our expunged \ + sled's URL" ); let mut found_urls = db_enum.list_sled_agents().await.unwrap(); found_urls.sort(); diff --git a/nexus/src/app/background/tasks/metrics_producer_gc.rs b/nexus/src/app/background/tasks/metrics_producer_gc.rs index 8c2c271cfb..1df0afb7ed 100644 --- a/nexus/src/app/background/tasks/metrics_producer_gc.rs +++ b/nexus/src/app/background/tasks/metrics_producer_gc.rs @@ -144,7 +144,7 @@ mod tests { { panic!( "failed to update time_modified for producer {producer_id}: \ - {err}" + {err}" ); } } diff --git a/nexus/src/app/background/tasks/phantom_disks.rs b/nexus/src/app/background/tasks/phantom_disks.rs index eac37eee19..4b0d8bec38 100644 --- a/nexus/src/app/background/tasks/phantom_disks.rs +++ b/nexus/src/app/background/tasks/phantom_disks.rs @@ -74,7 +74,8 @@ impl BackgroundTask for PhantomDiskDetector { if let Err(e) = result { error!( &log, - "error un-deleting disk {} and setting to faulted: {:#}", + "error un-deleting disk {} and setting to faulted: \ + {:#}", disk.id(), e, ); diff --git a/nexus/src/app/background/tasks/region_replacement.rs b/nexus/src/app/background/tasks/region_replacement.rs index 635b956193..9e14c294ba 100644 --- a/nexus/src/app/background/tasks/region_replacement.rs +++ b/nexus/src/app/background/tasks/region_replacement.rs @@ -82,8 +82,7 @@ impl BackgroundTask for RegionReplacementDetector { Err(e) => { error!( &log, - "find_regions_on_expunged_physical_disks failed: \ - {e}" + "find_regions_on_expunged_physical_disks failed: {e}" ); err += 1; @@ -110,8 +109,8 @@ impl BackgroundTask for RegionReplacementDetector { Err(e) => { error!( &log, - "error looking for existing region \ - replacement requests for {}: {e}", + "error looking for existing region replacement \ + requests for {}: {e}", region.id(), ); continue; @@ -130,7 +129,7 @@ impl BackgroundTask for RegionReplacementDetector { info!( &log, "added region replacement request \ - {request_id} for {} volume {}", + {request_id} for {} volume {}", region.id(), region.volume_id(), ); @@ -140,7 +139,7 @@ impl BackgroundTask for RegionReplacementDetector { error!( &log, "error adding region replacement request for \ - region {} volume id {}: {e}", + region {} volume id {}: {e}", region.id(), region.volume_id(), ); @@ -172,7 +171,7 @@ impl BackgroundTask for RegionReplacementDetector { error!( &log, "sending region replacement start request \ - failed: {e}", + failed: {e}", ); err += 1; } diff --git a/nexus/src/app/background/tasks/service_firewall_rules.rs b/nexus/src/app/background/tasks/service_firewall_rules.rs index 230172dab8..4004de42c8 100644 --- a/nexus/src/app/background/tasks/service_firewall_rules.rs +++ b/nexus/src/app/background/tasks/service_firewall_rules.rs @@ -38,8 +38,8 @@ impl BackgroundTask for ServiceRulePropagator { .new(slog::o!("component" => "service-firewall-rule-progator")); debug!( log, - "starting background task for service \ - firewall rule propagation" + "starting background task for service firewall rule \ + propagation" ); let start = std::time::Instant::now(); let res = nexus_networking::plumb_service_firewall_rules( diff --git a/nexus/src/app/background/tasks/sync_switch_configuration.rs b/nexus/src/app/background/tasks/sync_switch_configuration.rs index ed13545d3b..0351c9542a 100644 --- a/nexus/src/app/background/tasks/sync_switch_configuration.rs +++ b/nexus/src/app/background/tasks/sync_switch_configuration.rs @@ -235,14 +235,19 @@ impl SwitchPortSettingsManager { let config = sled_agent_client::types::BfdPeerConfig { local: spec.local.map(|x| x.ip()), remote: spec.remote.ip(), - detection_threshold: spec.detection_threshold.0.try_into().map_err(|_| { - omicron_common::api::external::Error::InternalError { - internal_message: format!( - "db_bfd_peer_configs: detection threshold overflow: {}", - spec.detection_threshold.0, - ), - } - })?, + detection_threshold: spec + .detection_threshold + .0 + .try_into() + .map_err(|_| { + omicron_common::api::external::Error::InternalError { + internal_message: format!( + "db_bfd_peer_configs: detection threshold \ + overflow: {}", + spec.detection_threshold.0, + ), + } + })?, required_rx: spec.required_rx.0.into(), mode: match spec.mode { nexus_db_model::BfdMode::SingleHop => { @@ -252,15 +257,17 @@ impl SwitchPortSettingsManager { sled_agent_client::types::BfdMode::MultiHop } }, - switch: spec.switch.parse().map_err(|e: ParseSwitchLocationError| { - omicron_common::api::external::Error::InternalError { - internal_message: format!( - "db_bfd_peer_configs: failed to parse switch name: {}: {:?}", - spec.switch, - e, - ), - } - })?, + switch: spec.switch.parse().map_err( + |e: ParseSwitchLocationError| { + omicron_common::api::external::Error::InternalError { + internal_message: format!( + "db_bfd_peer_configs: failed to parse switch \ + name: {}: {:?}", + spec.switch, e, + ), + } + }, + )?, }; result.push(config); } @@ -1760,45 +1767,44 @@ async fn static_routes_on_switch<'a>( let mut routes_on_switch = HashMap::new(); for (location, client) in mgd_clients { - let static_routes: SwitchStaticRoutes = match client - .static_list_v4_routes() - .await - { - Ok(routes) => { - let mut flattened = HashSet::new(); - for (destination, paths) in routes.iter() { - let Ok(dst) = destination.parse() else { - error!( + let static_routes: SwitchStaticRoutes = + match client.static_list_v4_routes().await { + Ok(routes) => { + let mut flattened = HashSet::new(); + for (destination, paths) in routes.iter() { + let Ok(dst) = destination.parse() else { + error!( log, - "failed to parse static route destination: {destination}" + "failed to parse static route destination: \ + {destination}" ); - continue; - }; - for p in paths.iter() { - let nh = match p.nexthop { - IpAddr::V4(addr) => addr, - IpAddr::V6(addr) => { - error!( - log, - "ipv6 nexthops not supported: {addr}" - ); - continue; - } + continue; }; - flattened.insert((nh, dst, p.vlan_id)); + for p in paths.iter() { + let nh = match p.nexthop { + IpAddr::V4(addr) => addr, + IpAddr::V6(addr) => { + error!( + log, + "ipv6 nexthops not supported: {addr}" + ); + continue; + } + }; + flattened.insert((nh, dst, p.vlan_id)); + } } + flattened } - flattened - } - Err(_) => { - error!( - &log, - "unable to retrieve routes from switch"; - "switch_location" => ?location, - ); - continue; - } - }; + Err(_) => { + error!( + &log, + "unable to retrieve routes from switch"; + "switch_location" => ?location, + ); + continue; + } + }; routes_on_switch.insert(*location, static_routes); } routes_on_switch From d52aad08c6b77c7dc9dd2e0f3050e4b73faf20fb Mon Sep 17 00:00:00 2001 From: David Pacheco Date: Wed, 26 Jun 2024 06:52:57 -0700 Subject: [PATCH 08/13] background task consumers could be more consistent (#5935) --- nexus/src/app/background/driver.rs | 2 +- nexus/src/app/background/init.rs | 16 ++++++++++------ nexus/src/app/bfd.rs | 10 ++-------- nexus/src/app/mod.rs | 2 +- nexus/src/app/switch_interface.rs | 2 -- nexus/src/app/switch_port.rs | 3 --- 6 files changed, 14 insertions(+), 21 deletions(-) diff --git a/nexus/src/app/background/driver.rs b/nexus/src/app/background/driver.rs index f1982b1ad8..e620f5d7bc 100644 --- a/nexus/src/app/background/driver.rs +++ b/nexus/src/app/background/driver.rs @@ -156,7 +156,7 @@ impl Driver { /// /// If the task is currently running, it will be activated again when it /// finishes. - pub fn activate(&self, task: &TaskHandle) { + pub(super) fn activate(&self, task: &TaskHandle) { self.task_required(task).notify.notify_one(); } diff --git a/nexus/src/app/background/init.rs b/nexus/src/app/background/init.rs index c8e40c80a1..3d98f80f4a 100644 --- a/nexus/src/app/background/init.rs +++ b/nexus/src/app/background/init.rs @@ -69,10 +69,10 @@ pub struct BackgroundTasks { Option, >, /// task handle for the ipv4 nat entry garbage collector - pub nat_cleanup: TaskHandle, + pub task_nat_cleanup: TaskHandle, /// task handle for the switch bfd manager - pub bfd_manager: TaskHandle, + pub task_bfd_manager: TaskHandle, /// task handle for the task that collects inventory pub task_inventory_collection: TaskHandle, @@ -195,7 +195,7 @@ impl BackgroundTasks { (task, watcher_channel) }; - let nat_cleanup = { + let task_nat_cleanup = { driver.register( "nat_v4_garbage_collector".to_string(), String::from( @@ -212,7 +212,7 @@ impl BackgroundTasks { ) }; - let bfd_manager = { + let task_bfd_manager = { driver.register( "bfd_manager".to_string(), String::from( @@ -479,8 +479,8 @@ impl BackgroundTasks { task_metrics_producer_gc, task_external_endpoints, external_endpoints, - nat_cleanup, - bfd_manager, + task_nat_cleanup, + task_bfd_manager, task_inventory_collection, task_physical_disk_adoption, task_phantom_disks, @@ -498,6 +498,10 @@ impl BackgroundTasks { } } + /// Activate the specified background task + /// + /// If the task is currently running, it will be activated again when it + /// finishes. pub fn activate(&self, task: &TaskHandle) { self.driver.activate(task); } diff --git a/nexus/src/app/bfd.rs b/nexus/src/app/bfd.rs index 0afa238ee3..1ae958c20d 100644 --- a/nexus/src/app/bfd.rs +++ b/nexus/src/app/bfd.rs @@ -39,12 +39,9 @@ impl super::Nexus { // add the bfd session to the db and trigger the bfd manager to handle // the reset self.datastore().bfd_session_create(opctx, &session).await?; - self.background_tasks - .driver - .activate(&self.background_tasks.bfd_manager); + self.background_tasks.activate(&self.background_tasks.task_bfd_manager); // for timely propagation to bootstore self.background_tasks - .driver .activate(&self.background_tasks.task_switch_port_settings_manager); Ok(()) } @@ -57,12 +54,9 @@ impl super::Nexus { // remove the bfd session from the db and trigger the bfd manager to // handle the reset self.datastore().bfd_session_delete(opctx, &session).await?; - self.background_tasks - .driver - .activate(&self.background_tasks.bfd_manager); + self.background_tasks.activate(&self.background_tasks.task_bfd_manager); // for timely propagation to bootstore self.background_tasks - .driver .activate(&self.background_tasks.task_switch_port_settings_manager); Ok(()) } diff --git a/nexus/src/app/mod.rs b/nexus/src/app/mod.rs index da06ccaa32..510b6b5b16 100644 --- a/nexus/src/app/mod.rs +++ b/nexus/src/app/mod.rs @@ -504,7 +504,7 @@ impl Nexus { "populate complete; activating background tasks" ); for task in task_nexus.background_tasks.driver.tasks() { - task_nexus.background_tasks.driver.activate(task); + task_nexus.background_tasks.activate(task); } } Err(_) => { diff --git a/nexus/src/app/switch_interface.rs b/nexus/src/app/switch_interface.rs index bb4cba4c7b..c4e69d1e3e 100644 --- a/nexus/src/app/switch_interface.rs +++ b/nexus/src/app/switch_interface.rs @@ -57,7 +57,6 @@ impl super::Nexus { // eagerly propagate changes via rpw self.background_tasks - .driver .activate(&self.background_tasks.task_switch_port_settings_manager); Ok(value) @@ -86,7 +85,6 @@ impl super::Nexus { // eagerly propagate changes via rpw self.background_tasks - .driver .activate(&self.background_tasks.task_switch_port_settings_manager); Ok(()) diff --git a/nexus/src/app/switch_port.rs b/nexus/src/app/switch_port.rs index 7a6d56252a..bb35b6939e 100644 --- a/nexus/src/app/switch_port.rs +++ b/nexus/src/app/switch_port.rs @@ -100,7 +100,6 @@ impl super::Nexus { // eagerly propagate changes via rpw self.background_tasks - .driver .activate(&self.background_tasks.task_switch_port_settings_manager); Ok(result) @@ -214,7 +213,6 @@ impl super::Nexus { // eagerly propagate changes via rpw self.background_tasks - .driver .activate(&self.background_tasks.task_switch_port_settings_manager); Ok(()) @@ -248,7 +246,6 @@ impl super::Nexus { // eagerly propagate changes via rpw self.background_tasks - .driver .activate(&self.background_tasks.task_switch_port_settings_manager); Ok(()) From f22e8fec1d5e9d47dcd1da55a0828a535b0f2def Mon Sep 17 00:00:00 2001 From: iliana etaoin Date: Wed, 26 Jun 2024 11:07:52 -0700 Subject: [PATCH 09/13] switch clickhouse downloads to sha256 checksums (#5959) Also hooked up flake.nix to the checksums file now that it's sha256. Checked the Nix flake with `nix flake check` and found that rust-overlay was too out of date to have 1.78.0, so bumped that as well (`nix flake lock --update-input rust-overlay`). --- Cargo.lock | 1 - dev-tools/xtask/Cargo.toml | 1 - dev-tools/xtask/src/download.rs | 23 ++----------------- flake.lock | 40 +++------------------------------ flake.nix | 12 +++++----- tools/clickhouse_checksums | 6 ++--- 6 files changed, 14 insertions(+), 69 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 734a32e8bd..53c7967778 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -11422,7 +11422,6 @@ dependencies = [ "fs-err", "futures", "macaddr", - "md5", "reqwest", "serde", "sha2", diff --git a/dev-tools/xtask/Cargo.toml b/dev-tools/xtask/Cargo.toml index 9268eafc3e..acf225cd9d 100644 --- a/dev-tools/xtask/Cargo.toml +++ b/dev-tools/xtask/Cargo.toml @@ -17,7 +17,6 @@ flate2.workspace = true futures.workspace = true fs-err.workspace = true macaddr.workspace = true -md5 = "0.7.0" reqwest = { workspace = true, features = [ "default-tls" ] } serde.workspace = true sha2.workspace = true diff --git a/dev-tools/xtask/src/download.rs b/dev-tools/xtask/src/download.rs index 3002837507..2790a638a7 100644 --- a/dev-tools/xtask/src/download.rs +++ b/dev-tools/xtask/src/download.rs @@ -257,23 +257,6 @@ async fn streaming_download(url: &str, path: &Utf8Path) -> Result<()> { Ok(()) } -/// Returns the hex, lowercase md5 checksum of a file at `path`. -async fn md5_checksum(path: &Utf8Path) -> Result { - let mut buf = vec![0u8; 65536]; - let mut file = tokio::fs::File::open(path).await?; - let mut ctx = md5::Context::new(); - loop { - let n = file.read(&mut buf).await?; - if n == 0 { - break; - } - ctx.write_all(&buf[0..n])?; - } - - let digest = ctx.compute(); - Ok(format!("{digest:x}")) -} - /// Returns the hex, lowercase sha2 checksum of a file at `path`. async fn sha2_checksum(path: &Utf8Path) -> Result { let mut buf = vec![0u8; 65536]; @@ -382,14 +365,12 @@ async fn set_permissions(path: &Utf8Path, mode: u32) -> Result<()> { } enum ChecksumAlgorithm { - Md5, Sha2, } impl ChecksumAlgorithm { async fn checksum(&self, path: &Utf8Path) -> Result { match self { - ChecksumAlgorithm::Md5 => md5_checksum(path).await, ChecksumAlgorithm::Sha2 => sha2_checksum(path).await, } } @@ -459,7 +440,7 @@ impl<'a> Downloader<'a> { let checksums_path = self.versions_dir.join("clickhouse_checksums"); let [checksum] = get_values_from_file( - [&format!("CIDL_MD5_{}", os.env_name())], + [&format!("CIDL_SHA256_{}", os.env_name())], &checksums_path, ) .await?; @@ -491,7 +472,7 @@ impl<'a> Downloader<'a> { &self.log, &tarball_path, &tarball_url, - ChecksumAlgorithm::Md5, + ChecksumAlgorithm::Sha2, &checksum, ) .await?; diff --git a/flake.lock b/flake.lock index 7c6acc0815..5a70a42881 100644 --- a/flake.lock +++ b/flake.lock @@ -1,23 +1,5 @@ { "nodes": { - "flake-utils": { - "inputs": { - "systems": "systems" - }, - "locked": { - "lastModified": 1705309234, - "narHash": "sha256-uNRRNRKmJyCRC/8y1RqBkqWBLM034y4qN7EprSdmgyA=", - "owner": "numtide", - "repo": "flake-utils", - "rev": "1ef2e671c3b0c19053962c07dbda38332dcebf26", - "type": "github" - }, - "original": { - "owner": "numtide", - "repo": "flake-utils", - "type": "github" - } - }, "nixpkgs": { "locked": { "lastModified": 1712791164, @@ -42,17 +24,16 @@ }, "rust-overlay": { "inputs": { - "flake-utils": "flake-utils", "nixpkgs": [ "nixpkgs" ] }, "locked": { - "lastModified": 1712888034, - "narHash": "sha256-SmBeT3oxdwOzheSfxZmk+3xmv98Z3zlzjlnl9nBdOIE=", + "lastModified": 1719368303, + "narHash": "sha256-vhkKOUs9eOZgcPrA6wMw7a7J48pEjVuhzQfitVwVv1g=", "owner": "oxalica", "repo": "rust-overlay", - "rev": "96fbdc73dec8eaa5a9d4a9b307b75c9a856e5dec", + "rev": "32415b22fd3b454e4a1385af64aa5cef9766ff4c", "type": "github" }, "original": { @@ -60,21 +41,6 @@ "repo": "rust-overlay", "type": "github" } - }, - "systems": { - "locked": { - "lastModified": 1681028828, - "narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=", - "owner": "nix-systems", - "repo": "default", - "rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e", - "type": "github" - }, - "original": { - "owner": "nix-systems", - "repo": "default", - "type": "github" - } } }, "root": "root", diff --git a/flake.nix b/flake.nix index 6828577403..831a0aaea2 100644 --- a/flake.nix +++ b/flake.nix @@ -255,12 +255,12 @@ let name = "clickhouse"; version = readVersionFile "${name}_version"; - # N.B. that unlike maghemite and dendrite, the Clickhouse hashes - # in `tools/clickhouse_checksums` are MD5 rather than SHA256, so we - # can't give Nix those hashes and must instead determine it ourselves. - # this means that we will have to update this SHA if the clickhouse - # version changes. - sha256 = "0wx8w9sdms5hsc9f835ivsissf15wjzdb9cvxr65xdi384i9pkzx"; + sha256 = + let + shaFile = builtins.readFile ./tools/${name}_checksums; + shas = lib.strings.splitString "\n" shaFile; + in + findSha shas "CIDL_SHA256_LINUX"; src = builtins.fetchurl { inherit sha256; diff --git a/tools/clickhouse_checksums b/tools/clickhouse_checksums index afddb15cab..54ffaddb3d 100644 --- a/tools/clickhouse_checksums +++ b/tools/clickhouse_checksums @@ -1,3 +1,3 @@ -CIDL_MD5_DARWIN="3e20c3284b7e6b0cfcfedf622ecf547a" -CIDL_MD5_LINUX="f6c30a25a86deac3bad6c50dcf758fd5" -CIDL_MD5_ILLUMOS="409222de8ecb59e5dd97dcc942ccdffe" +CIDL_SHA256_DARWIN="275843f5942bf84a27cfededa2314d70d4a3300d7762045e1f1b5cd93e97c6a1" +CIDL_SHA256_LINUX="fdcf9b224123b65e4cee9ba5d5bee42538ada3deb10ce412d3b0e8da74e2a873" +CIDL_SHA256_ILLUMOS="29ad158e985b088f6b8987b33dac96dcbaed9bdffc95ae0e880273cc125320b3" From b3a1a72951bd92f6fcfb04b21766a4535f0e1ddb Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Wed, 26 Jun 2024 12:57:15 -0700 Subject: [PATCH 10/13] Update Rust crate cargo_toml to v0.20.3 (#5957) --- Cargo.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 53c7967778..b96d4e71a1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -843,9 +843,9 @@ dependencies = [ [[package]] name = "cargo_toml" -version = "0.20.2" +version = "0.20.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c8cb1d556b8b8f36e5ca74938008be3ac102f5dcb5b68a0477e4249ae2291cd3" +checksum = "4895c018bb228aa6b3ba1a0285543fcb4b704734c3fb1f72afaa75aa769500c1" dependencies = [ "serde", "toml 0.8.13", From 931e2d457c7bf9ad40e4475d78db1aa81938ea70 Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Wed, 26 Jun 2024 21:22:16 +0100 Subject: [PATCH 11/13] VPC Subnet Routing [1/2] -- RPW and System Routers (#5777) This PR wires up all the backing machinery for VPC subnet routing, and automatically resolves and pushes updated rules to sleds using an RPW. This allows instances in all subnets of a VPC to talk with one another -- assuming no firewall rules have been configured otherwise. At a high level, this works by a few changes: * During the VPC create saga, we now push two rules explicitly to the system router -- default routes from `(0.0.0.0/0, ::/0) -> inetgw:outbound`. * Any CRUD operation on a VPC subnet will reconcile the set of VPC subnet routes within the system router to have one entry per subnet. This takes the form `subnet:{name} -> subnet:{name}` for each subnet, which are later resolved to both v4 and v6 entries. * Ports are created using route information known to sled-agent -- this defaults to an empty route set for instances/probes, and an internet gateway rule for services to enable early NTP sync. * Routes are sync'd with sleds using a new background task. Broadly, this asks each sled for the set of VPCs and subnets it has ports on, and a version for the current route set installed in each. The background task will use this information to determine which routes must be rebuilt, and will send updated versions out in response. The most immediate consequence in this PR is that hosts within a subnet -- on different VPCs -- will be able to talk with one another at last. The user facing API (#2116) will be re-enabled in a concurrent PR -- #5823 -- as will NIC spoof detection hole-punching. Depends on oxidecomputer/opte#490. Closes #2232, Fixes #1336. --- A few pieces will block tests passing & merge-readiness: - [x] Creation of a `lab-2.0-opte-0.32` image. - [x] Merge of oxidecomputer/maghemite#274 (and updating all the right SHAs in this PR). --- .github/buildomat/jobs/deploy.sh | 2 +- Cargo.lock | 25 +- Cargo.toml | 10 +- clients/ddm-admin-client/src/lib.rs | 9 +- clients/sled-agent-client/src/lib.rs | 5 + common/src/api/external/mod.rs | 3 + common/src/api/internal/shared.rs | 84 +- dev-tools/omdb/tests/env.out | 12 + dev-tools/omdb/tests/successes.out | 11 + illumos-utils/src/opte/firewall_rules.rs | 19 +- illumos-utils/src/opte/mod.rs | 41 +- illumos-utils/src/opte/port.rs | 85 +- illumos-utils/src/opte/port_manager.rs | 319 ++++- nexus/db-fixed-data/src/vpc.rs | 21 +- nexus/db-fixed-data/src/vpc_subnet.rs | 21 + nexus/db-model/src/schema.rs | 2 + nexus/db-model/src/schema_versions.rs | 3 +- nexus/db-model/src/vpc_route.rs | 42 +- nexus/db-model/src/vpc_router.rs | 9 +- nexus/db-model/src/vpc_subnet.rs | 3 + .../src/db/datastore/network_interface.rs | 42 +- nexus/db-queries/src/db/datastore/vpc.rs | 1130 ++++++++++++++++- nexus/src/app/background/init.rs | 18 + nexus/src/app/background/tasks/mod.rs | 1 + nexus/src/app/background/tasks/vpc_routes.rs | 283 +++++ nexus/src/app/instance.rs | 1 + nexus/src/app/sagas/vpc_create.rs | 88 +- nexus/src/app/vpc_router.rs | 63 +- nexus/src/app/vpc_subnet.rs | 31 +- nexus/src/external_api/http_entrypoints.rs | 2 +- nexus/tests/integration_tests/instances.rs | 114 +- .../tests/integration_tests/router_routes.rs | 65 +- nexus/types/src/external_api/views.rs | 5 +- openapi/nexus.json | 8 +- openapi/sled-agent.json | 263 ++++ package-manifest.toml | 14 +- schema/crdb/dbinit.sql | 15 +- schema/crdb/vpc-subnet-routing/up01.sql | 3 + schema/crdb/vpc-subnet-routing/up02.sql | 7 + schema/crdb/vpc-subnet-routing/up03.sql | 96 ++ sled-agent/src/http_entrypoints.rs | 32 +- sled-agent/src/instance.rs | 13 +- sled-agent/src/probe_manager.rs | 47 +- sled-agent/src/services.rs | 16 +- sled-agent/src/sim/http_entrypoints.rs | 30 +- sled-agent/src/sim/sled_agent.rs | 64 +- sled-agent/src/sled_agent.rs | 14 +- tools/maghemite_ddm_openapi_version | 4 +- tools/maghemite_mg_openapi_version | 4 +- tools/maghemite_mgd_checksums | 4 +- tools/opte_version | 2 +- workspace-hack/Cargo.toml | 4 +- 52 files changed, 2912 insertions(+), 297 deletions(-) create mode 100644 nexus/src/app/background/tasks/vpc_routes.rs create mode 100644 schema/crdb/vpc-subnet-routing/up01.sql create mode 100644 schema/crdb/vpc-subnet-routing/up02.sql create mode 100644 schema/crdb/vpc-subnet-routing/up03.sql diff --git a/.github/buildomat/jobs/deploy.sh b/.github/buildomat/jobs/deploy.sh index b14486b651..e4f59aff5f 100755 --- a/.github/buildomat/jobs/deploy.sh +++ b/.github/buildomat/jobs/deploy.sh @@ -2,7 +2,7 @@ #: #: name = "helios / deploy" #: variety = "basic" -#: target = "lab-2.0-opte-0.31" +#: target = "lab-2.0-opte-0.32" #: output_rules = [ #: "%/var/svc/log/oxide-sled-agent:default.log*", #: "%/zone/oxz_*/root/var/svc/log/oxide-*.log*", diff --git a/Cargo.lock b/Cargo.lock index b96d4e71a1..30c6f861ad 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1604,8 +1604,9 @@ dependencies = [ [[package]] name = "ddm-admin-client" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/maghemite?rev=5630887d0373857f77cb264f84aa19bdec720ce3#5630887d0373857f77cb264f84aa19bdec720ce3" +source = "git+https://github.com/oxidecomputer/maghemite?rev=3c3fa8482fe09a01da62fbd35efe124ea9cac9e7#3c3fa8482fe09a01da62fbd35efe124ea9cac9e7" dependencies = [ + "oxnet", "percent-encoding", "progenitor", "reqwest", @@ -3468,7 +3469,7 @@ dependencies = [ [[package]] name = "illumos-sys-hdrs" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=417f74e94978c23f3892ac328c3387f3ecd9bb29#417f74e94978c23f3892ac328c3387f3ecd9bb29" +source = "git+https://github.com/oxidecomputer/opte?rev=915975f6d1729db95619f752148974016912412f#915975f6d1729db95619f752148974016912412f" [[package]] name = "illumos-utils" @@ -3882,7 +3883,7 @@ dependencies = [ [[package]] name = "kstat-macro" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=417f74e94978c23f3892ac328c3387f3ecd9bb29#417f74e94978c23f3892ac328c3387f3ecd9bb29" +source = "git+https://github.com/oxidecomputer/opte?rev=915975f6d1729db95619f752148974016912412f#915975f6d1729db95619f752148974016912412f" dependencies = [ "quote", "syn 2.0.68", @@ -4011,7 +4012,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0c2a198fb6b0eada2a8df47933734e6d35d350665a33a3593d7164fa52c75c19" dependencies = [ "cfg-if", - "windows-targets 0.52.5", + "windows-targets 0.48.5", ] [[package]] @@ -4294,7 +4295,7 @@ dependencies = [ [[package]] name = "mg-admin-client" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/maghemite?rev=5630887d0373857f77cb264f84aa19bdec720ce3#5630887d0373857f77cb264f84aa19bdec720ce3" +source = "git+https://github.com/oxidecomputer/maghemite?rev=3c3fa8482fe09a01da62fbd35efe124ea9cac9e7#3c3fa8482fe09a01da62fbd35efe124ea9cac9e7" dependencies = [ "anyhow", "chrono", @@ -6104,7 +6105,7 @@ dependencies = [ [[package]] name = "opte" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=417f74e94978c23f3892ac328c3387f3ecd9bb29#417f74e94978c23f3892ac328c3387f3ecd9bb29" +source = "git+https://github.com/oxidecomputer/opte?rev=915975f6d1729db95619f752148974016912412f#915975f6d1729db95619f752148974016912412f" dependencies = [ "cfg-if", "dyn-clone", @@ -6121,7 +6122,7 @@ dependencies = [ [[package]] name = "opte-api" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=417f74e94978c23f3892ac328c3387f3ecd9bb29#417f74e94978c23f3892ac328c3387f3ecd9bb29" +source = "git+https://github.com/oxidecomputer/opte?rev=915975f6d1729db95619f752148974016912412f#915975f6d1729db95619f752148974016912412f" dependencies = [ "illumos-sys-hdrs", "ipnetwork", @@ -6133,7 +6134,7 @@ dependencies = [ [[package]] name = "opte-ioctl" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=417f74e94978c23f3892ac328c3387f3ecd9bb29#417f74e94978c23f3892ac328c3387f3ecd9bb29" +source = "git+https://github.com/oxidecomputer/opte?rev=915975f6d1729db95619f752148974016912412f#915975f6d1729db95619f752148974016912412f" dependencies = [ "libc", "libnet 0.1.0 (git+https://github.com/oxidecomputer/netadm-sys)", @@ -6207,7 +6208,7 @@ dependencies = [ [[package]] name = "oxide-vpc" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=417f74e94978c23f3892ac328c3387f3ecd9bb29#417f74e94978c23f3892ac328c3387f3ecd9bb29" +source = "git+https://github.com/oxidecomputer/opte?rev=915975f6d1729db95619f752148974016912412f#915975f6d1729db95619f752148974016912412f" dependencies = [ "cfg-if", "illumos-sys-hdrs", @@ -6454,7 +6455,7 @@ dependencies = [ [[package]] name = "oxnet" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/oxnet?branch=main#42b4d3c77c7f5f2636cd6c4bbf37ac3eada047e0" +source = "git+https://github.com/oxidecomputer/oxnet#2612d2203effcfdcbf83778a77f1bfd03fe6ed24" dependencies = [ "ipnetwork", "schemars", @@ -8536,9 +8537,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.117" +version = "1.0.118" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "455182ea6142b14f93f4bc5320a2b31c1f266b66a4a5c858b013302a5d8cbfc3" +checksum = "d947f6b3163d8857ea16c4fa0dd4840d52f3041039a85decd46867eb1abef2e4" dependencies = [ "itoa", "ryu", diff --git a/Cargo.toml b/Cargo.toml index 60425a4239..640e76e59a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -333,8 +333,8 @@ macaddr = { version = "1.0.1", features = ["serde_std"] } maplit = "1.0.2" mockall = "0.12" newtype_derive = "0.1.6" -mg-admin-client = { git = "https://github.com/oxidecomputer/maghemite", rev = "5630887d0373857f77cb264f84aa19bdec720ce3" } -ddm-admin-client = { git = "https://github.com/oxidecomputer/maghemite", rev = "5630887d0373857f77cb264f84aa19bdec720ce3" } +mg-admin-client = { git = "https://github.com/oxidecomputer/maghemite", rev = "3c3fa8482fe09a01da62fbd35efe124ea9cac9e7" } +ddm-admin-client = { git = "https://github.com/oxidecomputer/maghemite", rev = "3c3fa8482fe09a01da62fbd35efe124ea9cac9e7" } multimap = "0.10.0" nexus-auth = { path = "nexus/auth" } nexus-client = { path = "clients/nexus-client" } @@ -354,7 +354,7 @@ omicron-certificates = { path = "certificates" } omicron-passwords = { path = "passwords" } omicron-workspace-hack = "0.1.0" oxlog = { path = "dev-tools/oxlog" } -oxnet = { git = "https://github.com/oxidecomputer/oxnet", branch = "main" } +oxnet = { git = "https://github.com/oxidecomputer/oxnet" } nexus-test-interface = { path = "nexus/test-interface" } nexus-test-utils-macros = { path = "nexus/test-utils-macros" } nexus-test-utils = { path = "nexus/test-utils" } @@ -372,14 +372,14 @@ omicron-sled-agent = { path = "sled-agent" } omicron-test-utils = { path = "test-utils" } omicron-zone-package = "0.11.0" oxide-client = { path = "clients/oxide-client" } -oxide-vpc = { git = "https://github.com/oxidecomputer/opte", rev = "417f74e94978c23f3892ac328c3387f3ecd9bb29", features = [ "api", "std" ] } +oxide-vpc = { git = "https://github.com/oxidecomputer/opte", rev = "915975f6d1729db95619f752148974016912412f", features = [ "api", "std" ] } once_cell = "1.19.0" openapi-lint = { git = "https://github.com/oxidecomputer/openapi-lint", branch = "main" } openapiv3 = "2.0.0" # must match samael's crate! openssl = "0.10" openssl-sys = "0.9" -opte-ioctl = { git = "https://github.com/oxidecomputer/opte", rev = "417f74e94978c23f3892ac328c3387f3ecd9bb29" } +opte-ioctl = { git = "https://github.com/oxidecomputer/opte", rev = "915975f6d1729db95619f752148974016912412f" } oso = "0.27" owo-colors = "4.0.0" oximeter = { path = "oximeter/oximeter" } diff --git a/clients/ddm-admin-client/src/lib.rs b/clients/ddm-admin-client/src/lib.rs index b926ee2971..8cd9781e1d 100644 --- a/clients/ddm-admin-client/src/lib.rs +++ b/clients/ddm-admin-client/src/lib.rs @@ -12,7 +12,7 @@ pub use ddm_admin_client::types; pub use ddm_admin_client::Error; -use ddm_admin_client::types::{Ipv6Prefix, TunnelOrigin}; +use ddm_admin_client::types::TunnelOrigin; use ddm_admin_client::Client as InnerClient; use either::Either; use omicron_common::address::Ipv6Subnet; @@ -81,8 +81,7 @@ impl Client { pub fn advertise_prefix(&self, address: Ipv6Subnet) { let me = self.clone(); tokio::spawn(async move { - let prefix = - Ipv6Prefix { addr: address.net().prefix(), len: SLED_PREFIX }; + let prefix = address.net(); retry_notify(retry_policy_internal_service_aggressive(), || async { info!( me.log, "Sending prefix to ddmd for advertisement"; @@ -130,8 +129,8 @@ impl Client { let prefixes = self.inner.get_prefixes().await?.into_inner(); Ok(prefixes.into_iter().flat_map(|(_, prefixes)| { prefixes.into_iter().flat_map(|prefix| { - let mut segments = prefix.destination.addr.segments(); - if prefix.destination.len == BOOTSTRAP_MASK + let mut segments = prefix.destination.addr().segments(); + if prefix.destination.width() == BOOTSTRAP_MASK && segments[0] == BOOTSTRAP_PREFIX { Either::Left(interfaces.iter().map(move |interface| { diff --git a/clients/sled-agent-client/src/lib.rs b/clients/sled-agent-client/src/lib.rs index 81b225b035..42eefaf8b5 100644 --- a/clients/sled-agent-client/src/lib.rs +++ b/clients/sled-agent-client/src/lib.rs @@ -51,6 +51,11 @@ progenitor::generate_api!( NetworkInterface = omicron_common::api::internal::shared::NetworkInterface, PortFec = omicron_common::api::internal::shared::PortFec, PortSpeed = omicron_common::api::internal::shared::PortSpeed, + RouterId = omicron_common::api::internal::shared::RouterId, + ResolvedVpcRoute = omicron_common::api::internal::shared::ResolvedVpcRoute, + ResolvedVpcRouteSet = omicron_common::api::internal::shared::ResolvedVpcRouteSet, + RouterTarget = omicron_common::api::internal::shared::RouterTarget, + RouterVersion = omicron_common::api::internal::shared::RouterVersion, SourceNatConfig = omicron_common::api::internal::shared::SourceNatConfig, SwitchLocation = omicron_common::api::external::SwitchLocation, TypedUuidForInstanceKind = omicron_uuid_kinds::InstanceUuid, diff --git a/common/src/api/external/mod.rs b/common/src/api/external/mod.rs index 0af437bd99..58d45fecef 100644 --- a/common/src/api/external/mod.rs +++ b/common/src/api/external/mod.rs @@ -1331,6 +1331,9 @@ pub enum RouteTarget { #[display("inetgw:{0}")] /// Forward traffic to an internet gateway InternetGateway(Name), + #[display("drop")] + /// Drop matching traffic + Drop, } /// A `RouteDestination` is used to match traffic with a routing rule, on the diff --git a/common/src/api/internal/shared.rs b/common/src/api/internal/shared.rs index 3d710fc952..090b3c3058 100644 --- a/common/src/api/internal/shared.rs +++ b/common/src/api/internal/shared.rs @@ -6,13 +6,13 @@ use crate::{ address::NUM_SOURCE_NAT_PORTS, - api::external::{self, BfdMode, ImportExportPolicy, Name}, + api::external::{self, BfdMode, ImportExportPolicy, Name, Vni}, }; use oxnet::{IpNet, Ipv4Net, Ipv6Net}; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; use std::{ - collections::HashMap, + collections::{HashMap, HashSet}, fmt, net::{IpAddr, Ipv4Addr, Ipv6Addr}, str::FromStr, @@ -50,11 +50,11 @@ pub enum NetworkInterfaceKind { pub struct NetworkInterface { pub id: Uuid, pub kind: NetworkInterfaceKind, - pub name: external::Name, + pub name: Name, pub ip: IpAddr, pub mac: external::MacAddr, pub subnet: IpNet, - pub vni: external::Vni, + pub vni: Vni, pub primary: bool, pub slot: u8, } @@ -624,6 +624,82 @@ impl TryFrom<&[ipnetwork::IpNetwork]> for IpAllowList { } } +/// A VPC route resolved into a concrete target. +#[derive( + Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq, Eq, Hash, +)] +pub struct ResolvedVpcRoute { + pub dest: IpNet, + pub target: RouterTarget, +} + +/// The target for a given router entry. +#[derive( + Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq, Eq, Hash, +)] +#[serde(tag = "type", rename_all = "snake_case", content = "value")] +pub enum RouterTarget { + Drop, + InternetGateway, + Ip(IpAddr), + VpcSubnet(IpNet), +} + +/// Information on the current parent router (and version) of a route set +/// according to the control plane. +#[derive( + Copy, Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq, Eq, Hash, +)] +pub struct RouterVersion { + pub router_id: Uuid, + pub version: u64, +} + +impl RouterVersion { + /// Return whether a new route set should be applied over the current + /// values. + /// + /// This will occur when seeing a new version and a matching parent, + /// or a new parent router on the control plane. + pub fn is_replaced_by(&self, other: &Self) -> bool { + (self.router_id != other.router_id) || self.version < other.version + } +} + +/// Identifier for a VPC and/or subnet. +#[derive( + Copy, Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq, Eq, Hash, +)] +pub struct RouterId { + pub vni: Vni, + pub kind: RouterKind, +} + +/// The scope of a set of VPC router rules. +#[derive( + Copy, Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq, Eq, Hash, +)] +#[serde(tag = "type", rename_all = "snake_case", content = "subnet")] +pub enum RouterKind { + System, + Custom(IpNet), +} + +/// Version information for routes on a given VPC subnet. +#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq, Eq)] +pub struct ResolvedVpcRouteState { + pub id: RouterId, + pub version: Option, +} + +/// An updated set of routes for a given VPC and/or subnet. +#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq, Eq)] +pub struct ResolvedVpcRouteSet { + pub id: RouterId, + pub version: Option, + pub routes: HashSet, +} + #[cfg(test)] mod tests { use crate::api::internal::shared::AllowedSourceIps; diff --git a/dev-tools/omdb/tests/env.out b/dev-tools/omdb/tests/env.out index 23ab034f1c..252313e6c8 100644 --- a/dev-tools/omdb/tests/env.out +++ b/dev-tools/omdb/tests/env.out @@ -131,6 +131,10 @@ task: "v2p_manager" manages opte v2p mappings for vpc networking +task: "vpc_route_manager" + propagates updated VPC routes to all OPTE ports + + --------------------------------------------- stderr: note: using Nexus URL http://127.0.0.1:REDACTED_PORT @@ -259,6 +263,10 @@ task: "v2p_manager" manages opte v2p mappings for vpc networking +task: "vpc_route_manager" + propagates updated VPC routes to all OPTE ports + + --------------------------------------------- stderr: note: Nexus URL not specified. Will pick one from DNS. @@ -374,6 +382,10 @@ task: "v2p_manager" manages opte v2p mappings for vpc networking +task: "vpc_route_manager" + propagates updated VPC routes to all OPTE ports + + --------------------------------------------- stderr: note: Nexus URL not specified. Will pick one from DNS. diff --git a/dev-tools/omdb/tests/successes.out b/dev-tools/omdb/tests/successes.out index ebebb657e5..032a574c8e 100644 --- a/dev-tools/omdb/tests/successes.out +++ b/dev-tools/omdb/tests/successes.out @@ -332,6 +332,10 @@ task: "v2p_manager" manages opte v2p mappings for vpc networking +task: "vpc_route_manager" + propagates updated VPC routes to all OPTE ports + + --------------------------------------------- stderr: note: using Nexus URL http://127.0.0.1:REDACTED_PORT/ @@ -545,6 +549,13 @@ task: "v2p_manager" started at (s ago) and ran for ms warning: unknown background task: "v2p_manager" (don't know how to interpret details: Object {}) +task: "vpc_route_manager" + configured period: every 30s + currently executing: no + last completed activation: , triggered by an explicit signal + started at (s ago) and ran for ms +warning: unknown background task: "vpc_route_manager" (don't know how to interpret details: Object {}) + --------------------------------------------- stderr: note: using Nexus URL http://127.0.0.1:REDACTED_PORT/ diff --git a/illumos-utils/src/opte/firewall_rules.rs b/illumos-utils/src/opte/firewall_rules.rs index 1df0e7421a..4dcb390e9e 100644 --- a/illumos-utils/src/opte/firewall_rules.rs +++ b/illumos-utils/src/opte/firewall_rules.rs @@ -4,6 +4,7 @@ //! Convert Omicron VPC firewall rules to OPTE firewall rules. +use super::net_to_cidr; use crate::opte::params::VpcFirewallRule; use crate::opte::Vni; use macaddr::MacAddr6; @@ -18,11 +19,6 @@ use oxide_vpc::api::Filters; use oxide_vpc::api::FirewallAction; use oxide_vpc::api::FirewallRule; use oxide_vpc::api::IpAddr; -use oxide_vpc::api::IpCidr; -use oxide_vpc::api::Ipv4Cidr; -use oxide_vpc::api::Ipv4PrefixLen; -use oxide_vpc::api::Ipv6Cidr; -use oxide_vpc::api::Ipv6PrefixLen; use oxide_vpc::api::Ports; use oxide_vpc::api::ProtoFilter; use oxide_vpc::api::Protocol; @@ -68,21 +64,10 @@ impl FromVpcFirewallRule for VpcFirewallRule { HostIdentifier::Ip(IpNet::V4(net)) if net.is_host_net() => { Address::Ip(IpAddr::Ip4(net.addr().into())) } - HostIdentifier::Ip(IpNet::V4(net)) => { - Address::Subnet(IpCidr::Ip4(Ipv4Cidr::new( - net.addr().into(), - Ipv4PrefixLen::new(net.width()).unwrap(), - ))) - } HostIdentifier::Ip(IpNet::V6(net)) if net.is_host_net() => { Address::Ip(IpAddr::Ip6(net.addr().into())) } - HostIdentifier::Ip(IpNet::V6(net)) => { - Address::Subnet(IpCidr::Ip6(Ipv6Cidr::new( - net.addr().into(), - Ipv6PrefixLen::new(net.width()).unwrap(), - ))) - } + HostIdentifier::Ip(ip) => Address::Subnet(net_to_cidr(*ip)), HostIdentifier::Vpc(vni) => { Address::Vni(Vni::new(u32::from(*vni)).unwrap()) } diff --git a/illumos-utils/src/opte/mod.rs b/illumos-utils/src/opte/mod.rs index d06b6b26e5..d7fd96b0c0 100644 --- a/illumos-utils/src/opte/mod.rs +++ b/illumos-utils/src/opte/mod.rs @@ -18,15 +18,23 @@ mod port; mod port_manager; pub use firewall_rules::opte_firewall_rules; -pub use port::Port; -pub use port_manager::PortManager; -pub use port_manager::PortTicket; - use ipnetwork::IpNetwork; use macaddr::MacAddr6; +use omicron_common::api::internal::shared; pub use oxide_vpc::api::BoundaryServices; pub use oxide_vpc::api::DhcpCfg; +use oxide_vpc::api::IpCidr; +use oxide_vpc::api::Ipv4Cidr; +use oxide_vpc::api::Ipv4PrefixLen; +use oxide_vpc::api::Ipv6Cidr; +use oxide_vpc::api::Ipv6PrefixLen; +use oxide_vpc::api::RouterTarget; pub use oxide_vpc::api::Vni; +use oxnet::IpNet; +pub use port::Port; +pub use port_manager::PortCreateParams; +pub use port_manager::PortManager; +pub use port_manager::PortTicket; use std::net::IpAddr; /// Information about the gateway for an OPTE port @@ -63,3 +71,28 @@ impl Gateway { &self.ip } } + +/// Convert a nexus `IpNet` to an OPTE `IpCidr`. +fn net_to_cidr(net: IpNet) -> IpCidr { + match net { + IpNet::V4(net) => IpCidr::Ip4(Ipv4Cidr::new( + net.addr().into(), + Ipv4PrefixLen::new(net.width()).unwrap(), + )), + IpNet::V6(net) => IpCidr::Ip6(Ipv6Cidr::new( + net.addr().into(), + Ipv6PrefixLen::new(net.width()).unwrap(), + )), + } +} + +/// Convert a nexus `RouterTarget` to an OPTE `RouterTarget`. +fn router_target_opte(target: &shared::RouterTarget) -> RouterTarget { + use shared::RouterTarget::*; + match target { + Drop => RouterTarget::Drop, + InternetGateway => RouterTarget::InternetGateway, + Ip(ip) => RouterTarget::Ip((*ip).into()), + VpcSubnet(net) => RouterTarget::VpcSubnet(net_to_cidr(*net)), + } +} diff --git a/illumos-utils/src/opte/port.rs b/illumos-utils/src/opte/port.rs index 6fbb89c450..a692a02304 100644 --- a/illumos-utils/src/opte/port.rs +++ b/illumos-utils/src/opte/port.rs @@ -7,23 +7,30 @@ use crate::opte::Gateway; use crate::opte::Vni; use macaddr::MacAddr6; +use omicron_common::api::external; +use omicron_common::api::internal::shared::RouterId; +use omicron_common::api::internal::shared::RouterKind; +use oxnet::IpNet; use std::net::IpAddr; use std::sync::Arc; #[derive(Debug)] -struct PortInner { - // Name of the port as identified by OPTE - name: String, - // IP address within the VPC Subnet - ip: IpAddr, - // VPC-private MAC address - mac: MacAddr6, - // Emulated PCI slot for the guest NIC, passed to Propolis - slot: u8, - // Geneve VNI for the VPC - vni: Vni, - // Information about the virtual gateway, aka OPTE - gateway: Gateway, +pub struct PortData { + /// Name of the port as identified by OPTE + pub(crate) name: String, + /// IP address within the VPC Subnet + pub(crate) ip: IpAddr, + /// VPC-private MAC address + pub(crate) mac: MacAddr6, + /// Emulated PCI slot for the guest NIC, passed to Propolis + pub(crate) slot: u8, + /// Geneve VNI for the VPC + pub(crate) vni: Vni, + /// Subnet the port belong to within the VPC. + pub(crate) subnet: IpNet, + /// Information about the virtual gateway, aka OPTE + pub(crate) gateway: Gateway, + /// Name of the VNIC the OPTE port is bound to. // TODO-remove(#2932): Remove this once we can put Viona directly on top of an // OPTE port device. // @@ -33,7 +40,18 @@ struct PortInner { // https://github.com/oxidecomputer/opte/issues/178 for more details. This // can be changed back to a real VNIC when that is resolved, and the Drop // impl below can simplify to just call `drop(self.vnic)`. - vnic: String, + pub(crate) vnic: String, +} + +#[derive(Debug)] +struct PortInner(PortData); + +impl core::ops::Deref for PortInner { + type Target = PortData; + + fn deref(&self) -> &Self::Target { + &self.0 + } } #[cfg(target_os = "illumos")] @@ -83,26 +101,8 @@ pub struct Port { } impl Port { - pub fn new( - name: String, - ip: IpAddr, - mac: MacAddr6, - slot: u8, - vni: Vni, - gateway: Gateway, - vnic: String, - ) -> Self { - Self { - inner: Arc::new(PortInner { - name, - ip, - mac, - slot, - vni, - gateway, - vnic, - }), - } + pub fn new(data: PortData) -> Self { + Self { inner: Arc::new(PortInner(data)) } } pub fn ip(&self) -> &IpAddr { @@ -126,6 +126,10 @@ impl Port { &self.inner.vni } + pub fn subnet(&self) -> &IpNet { + &self.inner.subnet + } + pub fn vnic_name(&self) -> &str { &self.inner.vnic } @@ -133,4 +137,17 @@ impl Port { pub fn slot(&self) -> u8 { self.inner.slot } + + pub fn system_router_key(&self) -> RouterId { + // Unwrap safety: both of these VNI types represent validated u24s. + let vni = external::Vni::try_from(self.vni().as_u32()).unwrap(); + RouterId { vni, kind: RouterKind::System } + } + + pub fn custom_router_key(&self) -> RouterId { + RouterId { + kind: RouterKind::Custom(*self.subnet()), + ..self.system_router_key() + } + } } diff --git a/illumos-utils/src/opte/port_manager.rs b/illumos-utils/src/opte/port_manager.rs index 726aa01a2a..caeda81217 100644 --- a/illumos-utils/src/opte/port_manager.rs +++ b/illumos-utils/src/opte/port_manager.rs @@ -7,6 +7,7 @@ use crate::opte::opte_firewall_rules; use crate::opte::params::VirtualNetworkInterfaceHost; use crate::opte::params::VpcFirewallRule; +use crate::opte::port::PortData; use crate::opte::Error; use crate::opte::Gateway; use crate::opte::Port; @@ -15,8 +16,15 @@ use ipnetwork::IpNetwork; use omicron_common::api::external; use omicron_common::api::internal::shared::NetworkInterface; use omicron_common::api::internal::shared::NetworkInterfaceKind; +use omicron_common::api::internal::shared::ResolvedVpcRoute; +use omicron_common::api::internal::shared::ResolvedVpcRouteSet; +use omicron_common::api::internal::shared::ResolvedVpcRouteState; +use omicron_common::api::internal::shared::RouterId; +use omicron_common::api::internal::shared::RouterTarget as ApiRouterTarget; +use omicron_common::api::internal::shared::RouterVersion; use omicron_common::api::internal::shared::SourceNatConfig; use oxide_vpc::api::AddRouterEntryReq; +use oxide_vpc::api::DelRouterEntryReq; use oxide_vpc::api::DhcpCfg; use oxide_vpc::api::ExternalIpCfg; use oxide_vpc::api::IpCfg; @@ -24,7 +32,7 @@ use oxide_vpc::api::IpCidr; use oxide_vpc::api::Ipv4Cfg; use oxide_vpc::api::Ipv6Cfg; use oxide_vpc::api::MacAddr; -use oxide_vpc::api::RouterTarget; +use oxide_vpc::api::RouterClass; use oxide_vpc::api::SNat4Cfg; use oxide_vpc::api::SNat6Cfg; use oxide_vpc::api::SetExternalIpsReq; @@ -34,6 +42,8 @@ use slog::error; use slog::info; use slog::Logger; use std::collections::BTreeMap; +use std::collections::HashMap; +use std::collections::HashSet; use std::net::IpAddr; use std::net::Ipv6Addr; use std::sync::atomic::AtomicU64; @@ -45,19 +55,30 @@ use uuid::Uuid; // Prefix used to identify xde data links. const XDE_LINK_PREFIX: &str = "opte"; +/// Stored routes (and usage count) for a given VPC/subnet. +#[derive(Debug, Clone)] +struct RouteSet { + version: Option, + routes: HashSet, + active_ports: usize, +} + #[derive(Debug)] struct PortManagerInner { log: Logger, - // Sequential identifier for each port on the system. + /// Sequential identifier for each port on the system. next_port_id: AtomicU64, - // IP address of the hosting sled on the underlay. + /// IP address of the hosting sled on the underlay. underlay_ip: Ipv6Addr, - // Map of all ports, keyed on the interface Uuid and its kind - // (which includes the Uuid of the parent instance or service) + /// Map of all ports, keyed on the interface Uuid and its kind + /// (which includes the Uuid of the parent instance or service) ports: Mutex>, + + /// Map of all current resolved routes. + routes: Mutex>, } impl PortManagerInner { @@ -70,6 +91,18 @@ impl PortManagerInner { } } +#[derive(Debug)] +/// Parameters needed to create and configure an OPTE port. +pub struct PortCreateParams<'a> { + pub nic: &'a NetworkInterface, + pub source_nat: Option, + pub ephemeral_ip: Option, + pub floating_ips: &'a [IpAddr], + pub firewall_rules: &'a [VpcFirewallRule], + pub dhcp_config: DhcpCfg, + pub is_service: bool, +} + /// The port manager controls all OPTE ports on a single host. #[derive(Debug, Clone)] pub struct PortManager { @@ -84,6 +117,7 @@ impl PortManager { next_port_id: AtomicU64::new(0), underlay_ip, ports: Mutex::new(BTreeMap::new()), + routes: Mutex::new(Default::default()), }); Self { inner } @@ -97,13 +131,18 @@ impl PortManager { #[cfg_attr(not(target_os = "illumos"), allow(unused_variables))] pub fn create_port( &self, - nic: &NetworkInterface, - source_nat: Option, - ephemeral_ip: Option, - floating_ips: &[IpAddr], - firewall_rules: &[VpcFirewallRule], - dhcp_config: DhcpCfg, + params: PortCreateParams, ) -> Result<(Port, PortTicket), Error> { + let PortCreateParams { + nic, + source_nat, + ephemeral_ip, + floating_ips, + firewall_rules, + dhcp_config, + is_service, + } = params; + let mac = *nic.mac; let vni = Vni::new(nic.vni).unwrap(); let subnet = IpNetwork::from(nic.subnet); @@ -319,15 +358,16 @@ impl PortManager { let (port, ticket) = { let mut ports = self.inner.ports.lock().unwrap(); let ticket = PortTicket::new(nic.id, nic.kind, self.inner.clone()); - let port = Port::new( - port_name.clone(), - nic.ip, + let port = Port::new(PortData { + name: port_name.clone(), + ip: nic.ip, mac, - nic.slot, + slot: nic.slot, vni, + subnet: nic.subnet, gateway, vnic, - ); + }); let old = ports.insert((nic.id, nic.kind), port.clone()); assert!( old.is_none(), @@ -338,57 +378,67 @@ impl PortManager { (port, ticket) }; - // Add a router entry for this interface's subnet, directing traffic to the - // VPC subnet. - let route = AddRouterEntryReq { - port_name: port_name.clone(), - dest: vpc_subnet, - target: RouterTarget::VpcSubnet(vpc_subnet), - }; - #[cfg(target_os = "illumos")] - hdl.add_router_entry(&route)?; - debug!( - self.inner.log, - "Added VPC Subnet router entry"; - "port_name" => &port_name, - "route" => ?route, - ); + // Check locally to see whether we have any routes from the + // control plane for this port already installed. If not, + // create a record to show that we're interested in receiving + // those routes. + let mut routes = self.inner.routes.lock().unwrap(); + let system_routes = + routes.entry(port.system_router_key()).or_insert_with(|| { + let mut routes = HashSet::new(); + + // Services do not talk to one another via OPTE, but do need + // to reach out over the Internet *before* nexus is up to give + // us real rules. The easiest bet is to instantiate these here. + if is_service { + routes.insert(ResolvedVpcRoute { + dest: "0.0.0.0/0".parse().unwrap(), + target: ApiRouterTarget::InternetGateway, + }); + routes.insert(ResolvedVpcRoute { + dest: "::/0".parse().unwrap(), + target: ApiRouterTarget::InternetGateway, + }); + } - // TODO-remove - // - // See https://github.com/oxidecomputer/omicron/issues/1336 - // - // This is another part of the workaround, allowing reply traffic from - // the guest back out. Normally, OPTE would drop such traffic at the - // router layer, as it has no route for that external IP address. This - // allows such traffic through. - // - // Note that this exact rule will eventually be included, since it's one - // of the default routing rules in the VPC System Router. However, that - // will likely be communicated in a different way, or could be modified, - // and this specific call should be removed in favor of sending the - // routing rules the control plane provides. - // - // This rule sends all traffic that has no better match to the gateway. - let dest = match vpc_subnet { - IpCidr::Ip4(_) => "0.0.0.0/0", - IpCidr::Ip6(_) => "::/0", + RouteSet { version: None, routes, active_ports: 0 } + }); + system_routes.active_ports += 1; + // Clone is needed to get borrowck on our side, sadly. + let system_routes = system_routes.clone(); + + let custom_routes = routes + .entry(port.custom_router_key()) + .or_insert_with(|| RouteSet { + version: None, + routes: HashSet::default(), + active_ports: 0, + }); + custom_routes.active_ports += 1; + + for (class, routes) in [ + (RouterClass::System, &system_routes), + (RouterClass::Custom, custom_routes), + ] { + for route in &routes.routes { + let route = AddRouterEntryReq { + class, + port_name: port_name.clone(), + dest: super::net_to_cidr(route.dest), + target: super::router_target_opte(&route.target), + }; + + #[cfg(target_os = "illumos")] + hdl.add_router_entry(&route)?; + + debug!( + self.inner.log, + "Added router entry"; + "port_name" => &port_name, + "route" => ?route, + ); + } } - .parse() - .unwrap(); - let route = AddRouterEntryReq { - port_name: port_name.clone(), - dest, - target: RouterTarget::InternetGateway, - }; - #[cfg(target_os = "illumos")] - hdl.add_router_entry(&route)?; - debug!( - self.inner.log, - "Added default internet gateway route entry"; - "port_name" => &port_name, - "route" => ?route, - ); info!( self.inner.log, @@ -398,6 +448,122 @@ impl PortManager { Ok((port, ticket)) } + pub fn vpc_routes_list(&self) -> Vec { + let routes = self.inner.routes.lock().unwrap(); + routes + .iter() + .map(|(k, v)| ResolvedVpcRouteState { id: *k, version: v.version }) + .collect() + } + + pub fn vpc_routes_ensure( + &self, + new_routes: Vec, + ) -> Result<(), Error> { + let mut routes = self.inner.routes.lock().unwrap(); + let mut deltas = HashMap::new(); + for new in new_routes { + // Disregard any route information for a subnet we don't have. + let Some(old) = routes.get(&new.id) else { + continue; + }; + + // We have to handle subnet router changes, as well as + // spurious updates from multiple Nexus instances. + // If there's a UUID match, only update if vers increased, + // otherwise take the update verbatim (including loss of version). + let (to_add, to_delete): (HashSet<_>, HashSet<_>) = + match (old.version, new.version) { + (Some(old_vers), Some(new_vers)) + if !old_vers.is_replaced_by(&new_vers) => + { + continue; + } + _ => ( + new.routes.difference(&old.routes).cloned().collect(), + old.routes.difference(&new.routes).cloned().collect(), + ), + }; + deltas.insert(new.id, (to_add, to_delete)); + + let active_ports = old.active_ports; + routes.insert( + new.id, + RouteSet { + version: new.version, + routes: new.routes, + active_ports, + }, + ); + } + + // Note: We're deliberately holding both locks here + // to prevent several nexuses computng and applying deltas + // out of order. + let ports = self.inner.ports.lock().unwrap(); + #[cfg(target_os = "illumos")] + let hdl = opte_ioctl::OpteHdl::open(opte_ioctl::OpteHdl::XDE_CTL)?; + + // Propagate deltas out to all ports. + for port in ports.values() { + let system_id = port.system_router_key(); + let system_delta = deltas.get(&system_id); + + let custom_id = port.custom_router_key(); + let custom_delta = deltas.get(&custom_id); + + #[cfg_attr(not(target_os = "illumos"), allow(unused_variables))] + for (class, delta) in [ + (RouterClass::System, system_delta), + (RouterClass::Custom, custom_delta), + ] { + let Some((to_add, to_delete)) = delta else { + continue; + }; + + for route in to_delete { + let route = DelRouterEntryReq { + class, + port_name: port.name().into(), + dest: super::net_to_cidr(route.dest), + target: super::router_target_opte(&route.target), + }; + + #[cfg(target_os = "illumos")] + hdl.del_router_entry(&route)?; + + debug!( + self.inner.log, + "Removed router entry"; + "port_name" => &port.name(), + "route" => ?route, + ); + } + + for route in to_add { + let route = AddRouterEntryReq { + class, + port_name: port.name().into(), + dest: super::net_to_cidr(route.dest), + target: super::router_target_opte(&route.target), + }; + + #[cfg(target_os = "illumos")] + hdl.add_router_entry(&route)?; + + debug!( + self.inner.log, + "Added router entry"; + "port_name" => &port.name(), + "route" => ?route, + ); + } + } + } + + Ok(()) + } + /// Ensure external IPs for an OPTE port are up to date. #[cfg_attr(not(target_os = "illumos"), allow(unused_variables))] pub fn external_ips_ensure( @@ -739,6 +905,29 @@ impl PortTicket { ); return Err(Error::ReleaseMissingPort(self.id, self.kind)); }; + drop(ports); + + // Cleanup the set of subnets we want to receive routes for. + let mut routes = self.manager.routes.lock().unwrap(); + for key in [port.system_router_key(), port.custom_router_key()] { + let should_remove = routes + .get_mut(&key) + .map(|v| { + v.active_ports = v.active_ports.saturating_sub(1); + v.active_ports == 0 + }) + .unwrap_or_default(); + + if should_remove { + routes.remove(&key); + info!( + self.manager.log, + "Removed route set for subnet"; + "id" => ?&key, + ); + } + } + debug!( self.manager.log, "Removed OPTE port from manager"; diff --git a/nexus/db-fixed-data/src/vpc.rs b/nexus/db-fixed-data/src/vpc.rs index 25628a83b5..d5940a976e 100644 --- a/nexus/db-fixed-data/src/vpc.rs +++ b/nexus/db-fixed-data/src/vpc.rs @@ -23,12 +23,21 @@ pub static SERVICES_VPC_ROUTER_ID: Lazy = Lazy::new(|| { .expect("invalid uuid for builtin services vpc router id") }); -/// UUID of default route for built-in Services VPC. -pub static SERVICES_VPC_DEFAULT_ROUTE_ID: Lazy = Lazy::new(|| { - "001de000-074c-4000-8000-000000000002" - .parse() - .expect("invalid uuid for builtin services vpc default route id") -}); +/// UUID of default IPv4 route for built-in Services VPC. +pub static SERVICES_VPC_DEFAULT_V4_ROUTE_ID: Lazy = + Lazy::new(|| { + "001de000-074c-4000-8000-000000000002" + .parse() + .expect("invalid uuid for builtin services vpc default route id") + }); + +/// UUID of default IPv6 route for built-in Services VPC. +pub static SERVICES_VPC_DEFAULT_V6_ROUTE_ID: Lazy = + Lazy::new(|| { + "001de000-074c-4000-8000-000000000003" + .parse() + .expect("invalid uuid for builtin services vpc default route id") + }); /// Built-in VPC for internal services on the rack. pub static SERVICES_VPC: Lazy = Lazy::new(|| { diff --git a/nexus/db-fixed-data/src/vpc_subnet.rs b/nexus/db-fixed-data/src/vpc_subnet.rs index 622799b000..c91581ac13 100644 --- a/nexus/db-fixed-data/src/vpc_subnet.rs +++ b/nexus/db-fixed-data/src/vpc_subnet.rs @@ -31,6 +31,27 @@ pub static NTP_VPC_SUBNET_ID: Lazy = Lazy::new(|| { .expect("invalid uuid for builtin boundary ntp vpc subnet id") }); +/// UUID of built-in subnet route VPC Subnet route for External DNS. +pub static DNS_VPC_SUBNET_ROUTE_ID: Lazy = Lazy::new(|| { + "001de000-c470-4000-8000-000000000004" + .parse() + .expect("invalid uuid for builtin services vpc default route id") +}); + +/// UUID of built-in subnet route VPC Subnet route for Nexus. +pub static NEXUS_VPC_SUBNET_ROUTE_ID: Lazy = Lazy::new(|| { + "001de000-c470-4000-8000-000000000005" + .parse() + .expect("invalid uuid for builtin services vpc default route id") +}); + +/// UUID of built-in subnet route VPC Subnet route for Boundary NTP. +pub static NTP_VPC_SUBNET_ROUTE_ID: Lazy = Lazy::new(|| { + "001de000-c470-4000-8000-000000000006" + .parse() + .expect("invalid uuid for builtin services vpc default route id") +}); + /// Built-in VPC Subnet for External DNS. pub static DNS_VPC_SUBNET: Lazy = Lazy::new(|| { VpcSubnet::new( diff --git a/nexus/db-model/src/schema.rs b/nexus/db-model/src/schema.rs index d08a51edd4..72d01f094b 100644 --- a/nexus/db-model/src/schema.rs +++ b/nexus/db-model/src/schema.rs @@ -1106,6 +1106,7 @@ table! { rcgen -> Int8, ipv4_block -> Inet, ipv6_block -> Inet, + custom_router_id -> Nullable, } } @@ -1120,6 +1121,7 @@ table! { kind -> crate::VpcRouterKindEnum, vpc_id -> Uuid, rcgen -> Int8, + resolved_version -> Int8, } } diff --git a/nexus/db-model/src/schema_versions.rs b/nexus/db-model/src/schema_versions.rs index 04fafe4f93..ec94221496 100644 --- a/nexus/db-model/src/schema_versions.rs +++ b/nexus/db-model/src/schema_versions.rs @@ -17,7 +17,7 @@ use std::collections::BTreeMap; /// /// This must be updated when you change the database schema. Refer to /// schema/crdb/README.adoc in the root of this repository for details. -pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(77, 0, 0); +pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(78, 0, 0); /// List of all past database schema versions, in *reverse* order /// @@ -29,6 +29,7 @@ static KNOWN_VERSIONS: Lazy> = Lazy::new(|| { // | leaving the first copy as an example for the next person. // v // KnownVersion::new(next_int, "unique-dirname-with-the-sql-files"), + KnownVersion::new(78, "vpc-subnet-routing"), KnownVersion::new(77, "remove-view-for-v2p-mappings"), KnownVersion::new(76, "lookup-region-snapshot-by-snapshot-id"), KnownVersion::new(75, "add-cockroach-zone-id-to-node-id"), diff --git a/nexus/db-model/src/vpc_route.rs b/nexus/db-model/src/vpc_route.rs index 168ed41cef..3015df691f 100644 --- a/nexus/db-model/src/vpc_route.rs +++ b/nexus/db-model/src/vpc_route.rs @@ -18,7 +18,7 @@ use std::io::Write; use uuid::Uuid; impl_enum_wrapper!( - #[derive(SqlType, Debug)] + #[derive(SqlType, Debug, QueryId)] #[diesel(postgres_type(name = "router_route_kind", schema = "public"))] pub struct RouterRouteKindEnum; @@ -127,6 +127,46 @@ impl RouterRoute { destination: RouteDestination::new(params.destination), } } + + /// Create a subnet routing rule for a VPC's system router. + /// + /// This defaults to use the same name as the subnet. If this would conflict + /// with the internet gateway rules, then the UUID is used instead (alongside + /// notice that a name conflict has occurred). + pub fn for_subnet( + route_id: Uuid, + system_router_id: Uuid, + subnet: Name, + ) -> Self { + let forbidden_names = ["default-v4", "default-v6"]; + + let name = if forbidden_names.contains(&subnet.as_str()) { + // unwrap safety: a uuid is not by itself a valid name + // so prepend it with another string. + // - length constraint is <63 chars, + // - a UUID is 36 chars including hyphens, + // - "{subnet}-" is 11 chars + // - "conflict-" is 9 chars + // = 56 chars + format!("conflict-{subnet}-{route_id}").parse().unwrap() + } else { + subnet.0.clone() + }; + + Self::new( + route_id, + system_router_id, + external::RouterRouteKind::VpcSubnet, + params::RouterRouteCreate { + identity: external::IdentityMetadataCreateParams { + name, + description: format!("VPC Subnet route for '{subnet}'"), + }, + target: external::RouteTarget::Subnet(subnet.0.clone()), + destination: external::RouteDestination::Subnet(subnet.0), + }, + ) + } } impl Into for RouterRoute { diff --git a/nexus/db-model/src/vpc_router.rs b/nexus/db-model/src/vpc_router.rs index 71c753e6aa..51409c38d5 100644 --- a/nexus/db-model/src/vpc_router.rs +++ b/nexus/db-model/src/vpc_router.rs @@ -44,6 +44,7 @@ pub struct VpcRouter { pub vpc_id: Uuid, pub kind: VpcRouterKind, pub rcgen: Generation, + pub resolved_version: i64, } impl VpcRouter { @@ -54,7 +55,13 @@ impl VpcRouter { params: params::VpcRouterCreate, ) -> Self { let identity = VpcRouterIdentity::new(router_id, params.identity); - Self { identity, vpc_id, kind, rcgen: Generation::new() } + Self { + identity, + vpc_id, + kind, + rcgen: Generation::new(), + resolved_version: 0, + } } } diff --git a/nexus/db-model/src/vpc_subnet.rs b/nexus/db-model/src/vpc_subnet.rs index f3c90a908e..eaa7c6e87d 100644 --- a/nexus/db-model/src/vpc_subnet.rs +++ b/nexus/db-model/src/vpc_subnet.rs @@ -39,6 +39,7 @@ pub struct VpcSubnet { pub rcgen: Generation, pub ipv4_block: Ipv4Net, pub ipv6_block: Ipv6Net, + pub custom_router_id: Option, } impl VpcSubnet { @@ -60,6 +61,7 @@ impl VpcSubnet { rcgen: Generation::new(), ipv4_block: Ipv4Net(ipv4_block), ipv6_block: Ipv6Net(ipv6_block), + custom_router_id: None, } } @@ -102,6 +104,7 @@ impl From for views::VpcSubnet { vpc_id: subnet.vpc_id, ipv4_block: subnet.ipv4_block.0, ipv6_block: subnet.ipv6_block.0, + custom_router_id: subnet.custom_router_id, } } } diff --git a/nexus/db-queries/src/db/datastore/network_interface.rs b/nexus/db-queries/src/db/datastore/network_interface.rs index 3076afa39f..a9b406becf 100644 --- a/nexus/db-queries/src/db/datastore/network_interface.rs +++ b/nexus/db-queries/src/db/datastore/network_interface.rs @@ -136,11 +136,27 @@ impl DataStore { ), )); } - self.create_network_interface_raw(opctx, interface) + + let out = self + .create_network_interface_raw(opctx, interface) .await // Convert to `InstanceNetworkInterface` before returning; we know // this is valid as we've checked the condition on-entry. - .map(NetworkInterface::as_instance) + .map(NetworkInterface::as_instance)?; + + // `instance:xxx` targets in router rules resolve to the primary + // NIC of that instance. Accordingly, NIC create may cause dangling + // entries to re-resolve to a valid instance (even if it is not yet + // started). + // This will not trigger the route RPW directly, we still need to do + // so in e.g. the instance watcher task. + if out.primary { + self.vpc_increment_rpw_version(opctx, out.vpc_id) + .await + .map_err(|e| network_interface::InsertError::External(e))?; + } + + Ok(out) } /// List network interfaces associated with a given service. @@ -608,6 +624,28 @@ impl DataStore { .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) } + /// Retrieve the primary network interface for a given instance. + pub async fn instance_get_primary_network_interface( + &self, + opctx: &OpContext, + authz_instance: &authz::Instance, + ) -> LookupResult { + opctx.authorize(authz::Action::ListChildren, authz_instance).await?; + + use db::schema::instance_network_interface::dsl; + dsl::instance_network_interface + .filter(dsl::time_deleted.is_null()) + .filter(dsl::instance_id.eq(authz_instance.id())) + .filter(dsl::is_primary.eq(true)) + .select(InstanceNetworkInterface::as_select()) + .limit(1) + .first_async::( + &*self.pool_connection_authorized(opctx).await?, + ) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + } + /// Get network interface associated with a given probe. pub async fn probe_get_network_interface( &self, diff --git a/nexus/db-queries/src/db/datastore/vpc.rs b/nexus/db-queries/src/db/datastore/vpc.rs index 5322e20dbf..89ee1c468e 100644 --- a/nexus/db-queries/src/db/datastore/vpc.rs +++ b/nexus/db-queries/src/db/datastore/vpc.rs @@ -5,6 +5,7 @@ //! [`DataStore`] methods on [`Vpc`]s. use super::DataStore; +use super::SQL_BATCH_SIZE; use crate::authz; use crate::context::OpContext; use crate::db; @@ -20,6 +21,7 @@ use crate::db::model::InstanceNetworkInterface; use crate::db::model::Name; use crate::db::model::Project; use crate::db::model::RouterRoute; +use crate::db::model::RouterRouteKind; use crate::db::model::RouterRouteUpdate; use crate::db::model::Sled; use crate::db::model::Vni; @@ -33,6 +35,7 @@ use crate::db::model::VpcSubnetUpdate; use crate::db::model::VpcUpdate; use crate::db::model::{Ipv4Net, Ipv6Net}; use crate::db::pagination::paginated; +use crate::db::pagination::Paginator; use crate::db::queries::vpc::InsertVpcQuery; use crate::db::queries::vpc::VniSearchIter; use crate::db::queries::vpc_subnet::FilterConflictingVpcSubnetRangesQuery; @@ -43,6 +46,7 @@ use chrono::Utc; use diesel::prelude::*; use diesel::result::DatabaseErrorKind; use diesel::result::Error as DieselError; +use futures::stream::{self, StreamExt}; use ipnetwork::IpNetwork; use nexus_db_fixed_data::vpc::SERVICES_VPC_ID; use nexus_types::deployment::BlueprintZoneFilter; @@ -59,11 +63,16 @@ use omicron_common::api::external::LookupType; use omicron_common::api::external::ResourceType; use omicron_common::api::external::RouteDestination; use omicron_common::api::external::RouteTarget; -use omicron_common::api::external::RouterRouteKind; +use omicron_common::api::external::RouterRouteKind as ExternalRouteKind; use omicron_common::api::external::UpdateResult; use omicron_common::api::external::Vni as ExternalVni; +use omicron_common::api::internal::shared::RouterTarget; +use oxnet::IpNet; use ref_cast::RefCast; use std::collections::BTreeMap; +use std::collections::HashMap; +use std::collections::HashSet; +use std::net::IpAddr; use uuid::Uuid; impl DataStore { @@ -74,7 +83,8 @@ impl DataStore { ) -> Result<(), Error> { use nexus_db_fixed_data::project::SERVICES_PROJECT_ID; use nexus_db_fixed_data::vpc::SERVICES_VPC; - use nexus_db_fixed_data::vpc::SERVICES_VPC_DEFAULT_ROUTE_ID; + use nexus_db_fixed_data::vpc::SERVICES_VPC_DEFAULT_V4_ROUTE_ID; + use nexus_db_fixed_data::vpc::SERVICES_VPC_DEFAULT_V6_ROUTE_ID; opctx.authorize(authz::Action::Modify, &authz::DATABASE).await?; @@ -135,35 +145,49 @@ impl DataStore { .map(|(authz_router, _)| authz_router)? }; - let route = RouterRoute::new( - *SERVICES_VPC_DEFAULT_ROUTE_ID, - SERVICES_VPC.system_router_id, - RouterRouteKind::Default, - nexus_types::external_api::params::RouterRouteCreate { - identity: IdentityMetadataCreateParams { - name: "default".parse().unwrap(), - description: - "Default internet gateway route for Oxide Services" - .to_string(), + // Unwrap safety: these are known valid CIDR blocks. + let default_ips = [ + ( + "default-v4", + "0.0.0.0/0".parse().unwrap(), + *SERVICES_VPC_DEFAULT_V4_ROUTE_ID, + ), + ( + "default-v6", + "::/0".parse().unwrap(), + *SERVICES_VPC_DEFAULT_V6_ROUTE_ID, + ), + ]; + + for (name, default, uuid) in default_ips { + let route = RouterRoute::new( + uuid, + SERVICES_VPC.system_router_id, + ExternalRouteKind::Default, + nexus_types::external_api::params::RouterRouteCreate { + identity: IdentityMetadataCreateParams { + name: name.parse().unwrap(), + description: + "Default internet gateway route for Oxide Services" + .to_string(), + }, + target: RouteTarget::InternetGateway( + "outbound".parse().unwrap(), + ), + destination: RouteDestination::IpNet(default), }, - target: RouteTarget::InternetGateway( - "outbound".parse().unwrap(), - ), - destination: RouteDestination::Vpc( - SERVICES_VPC.identity.name.clone().into(), - ), - }, - ); - self.router_create_route(opctx, &authz_router, route) - .await - .map(|_| ()) - .or_else(|e| match e { - Error::ObjectAlreadyExists { .. } => Ok(()), - _ => Err(e), - })?; + ); + self.router_create_route(opctx, &authz_router, route) + .await + .map(|_| ()) + .or_else(|e| match e { + Error::ObjectAlreadyExists { .. } => Ok(()), + _ => Err(e), + })?; + } self.load_builtin_vpc_fw_rules(opctx).await?; - self.load_builtin_vpc_subnets(opctx).await?; + self.load_builtin_vpc_subnets(opctx, &authz_router).await?; info!(opctx.log, "created built-in services vpc"); @@ -228,10 +252,15 @@ impl DataStore { async fn load_builtin_vpc_subnets( &self, opctx: &OpContext, + authz_router: &authz::VpcRouter, ) -> Result<(), Error> { + use nexus_db_fixed_data::vpc::SERVICES_VPC; use nexus_db_fixed_data::vpc_subnet::DNS_VPC_SUBNET; + use nexus_db_fixed_data::vpc_subnet::DNS_VPC_SUBNET_ROUTE_ID; use nexus_db_fixed_data::vpc_subnet::NEXUS_VPC_SUBNET; + use nexus_db_fixed_data::vpc_subnet::NEXUS_VPC_SUBNET_ROUTE_ID; use nexus_db_fixed_data::vpc_subnet::NTP_VPC_SUBNET; + use nexus_db_fixed_data::vpc_subnet::NTP_VPC_SUBNET_ROUTE_ID; debug!(opctx.log, "attempting to create built-in VPC Subnets"); @@ -242,9 +271,11 @@ impl DataStore { .lookup_for(authz::Action::CreateChild) .await .internal_context("lookup built-in services vpc")?; - for vpc_subnet in - [&*DNS_VPC_SUBNET, &*NEXUS_VPC_SUBNET, &*NTP_VPC_SUBNET] - { + for (vpc_subnet, route_id) in [ + (&*DNS_VPC_SUBNET, *DNS_VPC_SUBNET_ROUTE_ID), + (&*NEXUS_VPC_SUBNET, *NEXUS_VPC_SUBNET_ROUTE_ID), + (&*NTP_VPC_SUBNET, *NTP_VPC_SUBNET_ROUTE_ID), + ] { if let Ok(_) = db::lookup::LookupPath::new(opctx, self) .vpc_subnet_id(vpc_subnet.id()) .fetch() @@ -260,6 +291,20 @@ impl DataStore { Error::ObjectAlreadyExists { .. } => Ok(()), _ => Err(e), })?; + + let route = RouterRoute::for_subnet( + route_id, + SERVICES_VPC.system_router_id, + vpc_subnet.name().clone().into(), + ); + + self.router_create_route(opctx, &authz_router, route) + .await + .map(|_| ()) + .or_else(|e| match e { + Error::ObjectAlreadyExists { .. } => Ok(()), + _ => Err(e), + })?; } info!(opctx.log, "created built-in services vpc subnets"); @@ -770,6 +815,9 @@ impl DataStore { assert_eq!(authz_vpc.id(), subnet.vpc_id); let db_subnet = self.vpc_create_subnet_raw(subnet).await?; + self.vpc_system_router_ensure_subnet_routes(opctx, authz_vpc.id()) + .await + .map_err(SubnetError::External)?; Ok(( authz::VpcSubnet::new( authz_vpc.clone(), @@ -850,6 +898,12 @@ impl DataStore { "deletion failed due to concurrent modification", )); } else { + self.vpc_system_router_ensure_subnet_routes( + opctx, + db_subnet.vpc_id, + ) + .await?; + Ok(()) } } @@ -863,7 +917,7 @@ impl DataStore { opctx.authorize(authz::Action::Modify, authz_subnet).await?; use db::schema::vpc_subnet::dsl; - diesel::update(dsl::vpc_subnet) + let out = diesel::update(dsl::vpc_subnet) .filter(dsl::time_deleted.is_null()) .filter(dsl::id.eq(authz_subnet.id())) .set(updates) @@ -875,7 +929,11 @@ impl DataStore { e, ErrorHandler::NotFoundByResource(authz_subnet), ) - }) + })?; + + self.vpc_system_router_ensure_subnet_routes(opctx, out.vpc_id).await?; + + Ok(out) } pub async fn subnet_list_instance_network_interfaces( @@ -994,6 +1052,29 @@ impl DataStore { ErrorHandler::NotFoundByResource(authz_router), ) })?; + + // All child routes are deleted. + use db::schema::router_route::dsl as rr; + let now = Utc::now(); + diesel::update(rr::router_route) + .filter(rr::time_deleted.is_null()) + .filter(rr::vpc_router_id.eq(authz_router.id())) + .set(rr::time_deleted.eq(now)) + .execute_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + + // Unlink all subnets from this router. + // XXX: We might this want to error out before the delete fires. + use db::schema::vpc_subnet::dsl as vpc; + diesel::update(vpc::vpc_subnet) + .filter(vpc::time_deleted.is_null()) + .filter(vpc::custom_router_id.eq(authz_router.id())) + .set(vpc::custom_router_id.eq(Option::::None)) + .execute_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + Ok(()) } @@ -1059,6 +1140,17 @@ impl DataStore { assert_eq!(authz_router.id(), route.vpc_router_id); opctx.authorize(authz::Action::CreateChild, authz_router).await?; + Self::router_create_route_on_connection( + route, + &*self.pool_connection_authorized(opctx).await?, + ) + .await + } + + pub async fn router_create_route_on_connection( + route: RouterRoute, + conn: &async_bb8_diesel::Connection, + ) -> CreateResult { use db::schema::router_route::dsl; let router_id = route.vpc_router_id; let name = route.name().clone(); @@ -1067,9 +1159,7 @@ impl DataStore { router_id, diesel::insert_into(dsl::router_route).values(route), ) - .insert_and_get_result_async( - &*self.pool_connection_authorized(opctx).await?, - ) + .insert_and_get_result_async(conn) .await .map_err(|e| match e { AsyncInsertError::CollectionNotFound => Error::ObjectNotFound { @@ -1221,6 +1311,487 @@ impl DataStore { ) }) } + + /// Ensure the system router for a VPC has the correct set of subnet + /// routing rules, after any changes to a subnet. + pub async fn vpc_system_router_ensure_subnet_routes( + &self, + opctx: &OpContext, + vpc_id: Uuid, + ) -> Result<(), Error> { + // These rules are immutable from a user's perspective, and + // aren't something which they can meaningfully interact with, + // so uuid stability on e.g. VPC rename is not a primary concern. + // We make sure only to alter VPC subnet rules here: users may + // modify other system routes like internet gateways (which are + // `RouteKind::Default`). + let conn = self.pool_connection_authorized(opctx).await?; + self.transaction_retry_wrapper("vpc_subnet_route_reconcile") + .transaction(&conn, |conn| async move { + use db::schema::router_route::dsl; + use db::schema::vpc::dsl as vpc; + use db::schema::vpc_subnet::dsl as subnet; + + let system_router_id = vpc::vpc + .filter(vpc::id.eq(vpc_id)) + .filter(vpc::time_deleted.is_null()) + .select(vpc::system_router_id) + .limit(1) + .get_result_async(&conn) + .await?; + + let valid_subnets: Vec = subnet::vpc_subnet + .filter(subnet::vpc_id.eq(vpc_id)) + .filter(subnet::time_deleted.is_null()) + .select(VpcSubnet::as_select()) + .load_async(&conn) + .await?; + + let current_rules: Vec = dsl::router_route + .filter( + dsl::kind + .eq(RouterRouteKind(ExternalRouteKind::VpcSubnet)), + ) + .filter(dsl::time_deleted.is_null()) + .filter(dsl::vpc_router_id.eq(system_router_id)) + .select(RouterRoute::as_select()) + .load_async(&conn) + .await?; + + // Build the add/delete sets. + let expected_names: HashSet = valid_subnets + .iter() + .map(|v| v.identity.name.clone()) + .collect(); + + // This checks that we have rules which *point to* the named + // subnets, rather than working with rule names (even if these + // are set to match the subnet where possible). + // Rule names are effectively randomised when someone, e.g., + // names a subnet "default-v4"/"-v6", and this prevents us + // from repeatedly adding/deleting that route. + let mut found_names = HashSet::new(); + let mut invalid = Vec::new(); + for rule in current_rules { + let id = rule.id(); + match (rule.kind.0, rule.target.0) { + ( + ExternalRouteKind::VpcSubnet, + RouteTarget::Subnet(n), + ) if expected_names.contains(Name::ref_cast(&n)) => { + let _ = found_names.insert(n.into()); + } + _ => invalid.push(id), + } + } + + // Add/Remove routes. Retry if number is incorrect due to + // concurrent modification. + let now = Utc::now(); + let to_update = invalid.len(); + let updated_rows = diesel::update(dsl::router_route) + .filter(dsl::time_deleted.is_null()) + .filter(dsl::id.eq_any(invalid)) + .set(dsl::time_deleted.eq(now)) + .execute_async(&conn) + .await?; + + if updated_rows != to_update { + return Err(DieselError::RollbackTransaction); + } + + // Duplicate rules are caught here using the UNIQUE constraint + // on names in a router. Only nexus can alter the system router, + // so there is no risk of collision with user-specified names. + // + // Subnets named "default-v4" or "default-v6" have their rules renamed + // to include the rule UUID. + for subnet in expected_names.difference(&found_names) { + let route_id = Uuid::new_v4(); + let route = db::model::RouterRoute::for_subnet( + route_id, + system_router_id, + subnet.clone(), + ); + + match Self::router_create_route_on_connection(route, &conn) + .await + { + Err(Error::Conflict { .. }) => { + return Err(DieselError::RollbackTransaction) + } + Err(_) => return Err(DieselError::NotFound), + _ => {} + } + } + + // Verify that route set is exactly as intended, and rollback otherwise. + let current_rules: Vec = dsl::router_route + .filter( + dsl::kind + .eq(RouterRouteKind(ExternalRouteKind::VpcSubnet)), + ) + .filter(dsl::time_deleted.is_null()) + .filter(dsl::vpc_router_id.eq(system_router_id)) + .select(RouterRoute::as_select()) + .load_async(&conn) + .await?; + + if current_rules.len() != expected_names.len() { + return Err(DieselError::RollbackTransaction); + } + + for rule in current_rules { + match (rule.kind.0, rule.target.0) { + ( + ExternalRouteKind::VpcSubnet, + RouteTarget::Subnet(n), + ) if expected_names.contains(Name::ref_cast(&n)) => {} + _ => return Err(DieselError::RollbackTransaction), + } + } + + Ok(()) + }) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + + self.vpc_increment_rpw_version(opctx, vpc_id).await + } + + /// Look up a VPC by VNI. + pub async fn vpc_get_system_router( + &self, + opctx: &OpContext, + vpc_id: Uuid, + ) -> LookupResult { + use db::schema::vpc::dsl as vpc_dsl; + use db::schema::vpc_router::dsl as router_dsl; + + vpc_dsl::vpc + .inner_join( + router_dsl::vpc_router + .on(router_dsl::id.eq(vpc_dsl::system_router_id)), + ) + .filter(vpc_dsl::time_deleted.is_null()) + .filter(vpc_dsl::id.eq(vpc_id)) + .filter(router_dsl::time_deleted.is_null()) + .filter(router_dsl::vpc_id.eq(vpc_id)) + .select(VpcRouter::as_select()) + .limit(1) + .first_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map_err(|e| { + public_error_from_diesel( + e, + ErrorHandler::NotFoundByLookup( + ResourceType::Vpc, + LookupType::ById(vpc_id), + ), + ) + }) + } + + /// Fetch all active custom routers (and their parent subnets) + /// in a VPC. + pub async fn vpc_get_active_custom_routers( + &self, + opctx: &OpContext, + vpc_id: Uuid, + ) -> ListResultVec<(VpcSubnet, VpcRouter)> { + use db::schema::vpc_router::dsl as router_dsl; + use db::schema::vpc_subnet::dsl as subnet_dsl; + + subnet_dsl::vpc_subnet + .inner_join( + router_dsl::vpc_router.on(router_dsl::id + .nullable() + .eq(subnet_dsl::custom_router_id)), + ) + .filter(subnet_dsl::time_deleted.is_null()) + .filter(subnet_dsl::vpc_id.eq(vpc_id)) + .filter(router_dsl::time_deleted.is_null()) + .filter(router_dsl::vpc_id.eq(vpc_id)) + .select((VpcSubnet::as_select(), VpcRouter::as_select())) + .load_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map_err(|e| { + public_error_from_diesel( + e, + ErrorHandler::NotFoundByLookup( + ResourceType::Vpc, + LookupType::ById(vpc_id), + ), + ) + }) + } + + /// Resolve all targets in a router into concrete details. + pub async fn vpc_resolve_router_rules( + &self, + opctx: &OpContext, + vpc_router_id: Uuid, + ) -> Result, Error> { + // Get all rules in target router. + opctx.check_complex_operations_allowed()?; + + let (.., authz_project, authz_vpc, authz_router) = + db::lookup::LookupPath::new(opctx, self) + .vpc_router_id(vpc_router_id) + .lookup_for(authz::Action::Read) + .await + .internal_context("lookup router by id for rules")?; + let mut paginator = Paginator::new(SQL_BATCH_SIZE); + let mut all_rules = vec![]; + while let Some(p) = paginator.next() { + let batch = self + .vpc_router_route_list( + opctx, + &authz_router, + &PaginatedBy::Id(p.current_pagparams()), + ) + .await?; + paginator = p + .found_batch(&batch, &|s: &nexus_db_model::RouterRoute| s.id()); + all_rules.extend(batch); + } + + // This is not in a transaction, because... + // We're not necessarily too concerned about getting partially + // updated state when resolving these names. See the header discussion + // in `nexus/src/app/background/vpc_routes.rs`: any state updates + // are followed by a version bump/notify, so we will be eventually + // consistent with route resolution. + let mut subnet_names = HashSet::new(); + let mut vpc_names = HashSet::new(); + let mut inetgw_names = HashSet::new(); + let mut instance_names = HashSet::new(); + for rule in &all_rules { + match &rule.target.0 { + RouteTarget::Vpc(n) => { + vpc_names.insert(n.clone()); + } + RouteTarget::Subnet(n) => { + subnet_names.insert(n.clone()); + } + RouteTarget::Instance(n) => { + instance_names.insert(n.clone()); + } + RouteTarget::InternetGateway(n) => { + inetgw_names.insert(n.clone()); + } + _ => {} + } + + match &rule.destination.0 { + RouteDestination::Vpc(n) => { + vpc_names.insert(n.clone()); + } + RouteDestination::Subnet(n) => { + subnet_names.insert(n.clone()); + } + _ => {} + } + } + + // TODO: This would be nice to solve in fewer queries. + let subnets = stream::iter(subnet_names) + .filter_map(|name| async { + db::lookup::LookupPath::new(opctx, self) + .vpc_id(authz_vpc.id()) + .vpc_subnet_name(Name::ref_cast(&name)) + .fetch() + .await + .ok() + .map(|(.., subnet)| (name, subnet)) + }) + .collect::>() + .await; + + // TODO: unused until VPC peering. + let _vpcs = stream::iter(vpc_names) + .filter_map(|name| async { + db::lookup::LookupPath::new(opctx, self) + .project_id(authz_project.id()) + .vpc_name(Name::ref_cast(&name)) + .fetch() + .await + .ok() + .map(|(.., vpc)| (name, vpc)) + }) + .collect::>() + .await; + + let instances = stream::iter(instance_names) + .filter_map(|name| async { + db::lookup::LookupPath::new(opctx, self) + .project_id(authz_project.id()) + .instance_name(Name::ref_cast(&name)) + .fetch() + .await + .ok() + .map(|(.., auth, inst)| (name, auth, inst)) + }) + .filter_map(|(name, authz_instance, instance)| async move { + // XXX: currently an instance can have one primary NIC, + // and it is not dual-stack (v4 + v6). We need + // to clarify what should be resolved in the v6 case. + self.instance_get_primary_network_interface( + opctx, + &authz_instance, + ) + .await + .ok() + .map(|primary_nic| (name, (instance, primary_nic))) + }) + .collect::>() + .await; + + // TODO: validate names of Internet Gateways. + + // See the discussion in `resolve_firewall_rules_for_sled_agent` on + // how we should resolve name misses in route resolution. + // This method adopts the same strategy: a lookup failure corresponds + // to a NO-OP rule. + let mut out = HashMap::new(); + for rule in all_rules { + // Some dests/targets (e.g., subnet) resolve to *several* specifiers + // to handle both v4 and v6. The user-facing API will prevent severe + // mistakes on naked IPs/CIDRs (mixed v4/6), but we need to be smarter + // around named entities here. + let (v4_dest, v6_dest) = match rule.destination.0 { + RouteDestination::Ip(ip @ IpAddr::V4(_)) => { + (Some(IpNet::host_net(ip)), None) + } + RouteDestination::Ip(ip @ IpAddr::V6(_)) => { + (None, Some(IpNet::host_net(ip))) + } + RouteDestination::IpNet(ip @ IpNet::V4(_)) => (Some(ip), None), + RouteDestination::IpNet(ip @ IpNet::V6(_)) => (None, Some(ip)), + RouteDestination::Subnet(n) => subnets + .get(&n) + .map(|s| { + ( + Some(s.ipv4_block.0.into()), + Some(s.ipv6_block.0.into()), + ) + }) + .unwrap_or_default(), + + // TODO: VPC peering. + RouteDestination::Vpc(_) => (None, None), + }; + + let (v4_target, v6_target) = match rule.target.0 { + RouteTarget::Ip(ip @ IpAddr::V4(_)) => { + (Some(RouterTarget::Ip(ip)), None) + } + RouteTarget::Ip(ip @ IpAddr::V6(_)) => { + (None, Some(RouterTarget::Ip(ip))) + } + RouteTarget::Subnet(n) => subnets + .get(&n) + .map(|s| { + ( + Some(RouterTarget::VpcSubnet( + s.ipv4_block.0.into(), + )), + Some(RouterTarget::VpcSubnet( + s.ipv6_block.0.into(), + )), + ) + }) + .unwrap_or_default(), + RouteTarget::Instance(n) => instances + .get(&n) + .map(|i| match i.1.ip { + // TODO: update for dual-stack v4/6. + ip @ IpNetwork::V4(_) => { + (Some(RouterTarget::Ip(ip.ip())), None) + } + ip @ IpNetwork::V6(_) => { + (None, Some(RouterTarget::Ip(ip.ip()))) + } + }) + .unwrap_or_default(), + RouteTarget::Drop => { + (Some(RouterTarget::Drop), Some(RouterTarget::Drop)) + } + + // TODO: Internet Gateways. + // The semantic here is 'name match => allow', + // as the other aspect they will control is SNAT + // IP allocation. Today, presence of this rule + // allows upstream regardless of name. + RouteTarget::InternetGateway(_n) => ( + Some(RouterTarget::InternetGateway), + Some(RouterTarget::InternetGateway), + ), + + // TODO: VPC Peering. + RouteTarget::Vpc(_) => (None, None), + }; + + // XXX: Is there another way we should be handling destination + // collisions within a router? 'first/last wins' is fairly + // arbitrary when lookups are sorted on UUID, but it's + // unpredictable. + // It would be really useful to raise collisions and + // misses to users, somehow. + if let (Some(dest), Some(target)) = (v4_dest, v4_target) { + out.insert(dest, target); + } + + if let (Some(dest), Some(target)) = (v6_dest, v6_target) { + out.insert(dest, target); + } + } + + Ok(out) + } + + /// Trigger an RPW version bump on a single VPC router in response + /// to CRUD operations on individual routes. + pub async fn vpc_router_increment_rpw_version( + &self, + opctx: &OpContext, + router_id: Uuid, + ) -> UpdateResult<()> { + // NOTE: this operation and `vpc_increment_rpw_version` do not + // have auth checks, as these can occur in connection with unrelated + // resources -- the current user may have access to those, but be unable + // to modify the entire set of VPC routers in a project. + + use db::schema::vpc_router::dsl; + diesel::update(dsl::vpc_router) + .filter(dsl::time_deleted.is_null()) + .filter(dsl::id.eq(router_id)) + .set(dsl::resolved_version.eq(dsl::resolved_version + 1)) + .execute_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + + Ok(()) + } + + /// Trigger an RPW version bump on *all* routers within a VPC in + /// response to changes to named entities (e.g., subnets, instances). + pub async fn vpc_increment_rpw_version( + &self, + opctx: &OpContext, + vpc_id: Uuid, + ) -> UpdateResult<()> { + use db::schema::vpc_router::dsl; + diesel::update(dsl::vpc_router) + .filter(dsl::time_deleted.is_null()) + .filter(dsl::vpc_id.eq(vpc_id)) + .set(dsl::resolved_version.eq(dsl::resolved_version + 1)) + .execute_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + + Ok(()) + } } #[cfg(test)] @@ -1232,6 +1803,7 @@ mod tests { use crate::db::datastore::test_utils::IneligibleSleds; use crate::db::model::Project; use crate::db::queries::vpc::MAX_VNI_SEARCH_RANGE_SIZE; + use nexus_db_fixed_data::silo::DEFAULT_SILO; use nexus_db_fixed_data::vpc_subnet::NEXUS_VPC_SUBNET; use nexus_db_model::IncompleteNetworkInterface; use nexus_db_model::SledUpdate; @@ -1249,7 +1821,10 @@ mod tests { use omicron_common::api::external::Generation; use omicron_test_utils::dev; use omicron_uuid_kinds::GenericUuid; + use omicron_uuid_kinds::InstanceUuid; use omicron_uuid_kinds::SledUuid; + use oxnet::IpNet; + use oxnet::Ipv4Net; use slog::info; // Test that we detect the right error condition and return None when we @@ -1748,4 +2323,487 @@ mod tests { db.cleanup().await.unwrap(); logctx.cleanup_successful(); } + + async fn create_initial_vpc( + log: &slog::Logger, + opctx: &OpContext, + datastore: &DataStore, + ) -> (authz::Project, authz::Vpc, Vpc, authz::VpcRouter, VpcRouter) { + // Create a project and VPC. + let project_params = params::ProjectCreate { + identity: IdentityMetadataCreateParams { + name: "project".parse().unwrap(), + description: String::from("test project"), + }, + }; + let project = Project::new(DEFAULT_SILO.id(), project_params); + let (authz_project, _) = datastore + .project_create(&opctx, project) + .await + .expect("failed to create project"); + + let vpc_name: external::Name = "my-vpc".parse().unwrap(); + let description = String::from("test vpc"); + let mut incomplete_vpc = IncompleteVpc::new( + Uuid::new_v4(), + authz_project.id(), + Uuid::new_v4(), + params::VpcCreate { + identity: IdentityMetadataCreateParams { + name: vpc_name.clone(), + description: description.clone(), + }, + ipv6_prefix: None, + dns_name: vpc_name.clone(), + }, + ) + .expect("failed to create incomplete VPC"); + let this_vni = Vni(external::Vni::try_from(2048).unwrap()); + incomplete_vpc.vni = this_vni; + info!( + log, + "creating initial VPC"; + "vni" => ?this_vni, + ); + let query = InsertVpcQuery::new(incomplete_vpc); + let (authz_vpc, db_vpc) = datastore + .project_create_vpc_raw(&opctx, &authz_project, query) + .await + .expect("failed to create initial set of VPCs") + .expect("expected an actual VPC"); + info!( + log, + "created VPC"; + "vpc" => ?db_vpc, + ); + + // Now create the system router for this VPC. Subnet CRUD + // operations need this defined to succeed. + let router = VpcRouter::new( + db_vpc.system_router_id, + db_vpc.id(), + VpcRouterKind::System, + nexus_types::external_api::params::VpcRouterCreate { + identity: IdentityMetadataCreateParams { + name: "system".parse().unwrap(), + description: description.clone(), + }, + }, + ); + + let (authz_router, db_router) = datastore + .vpc_create_router(&opctx, &authz_vpc, router) + .await + .unwrap(); + + (authz_project, authz_vpc, db_vpc, authz_router, db_router) + } + + async fn new_subnet_ez( + opctx: &OpContext, + datastore: &DataStore, + db_vpc: &Vpc, + authz_vpc: &authz::Vpc, + name: &str, + ip: [u8; 4], + prefix_len: u8, + ) -> (authz::VpcSubnet, VpcSubnet) { + let ipv6_block = db_vpc + .ipv6_prefix + .random_subnet( + omicron_common::address::VPC_SUBNET_IPV6_PREFIX_LENGTH, + ) + .map(|block| block.0) + .unwrap(); + + datastore + .vpc_create_subnet( + &opctx, + &authz_vpc, + db::model::VpcSubnet::new( + Uuid::new_v4(), + db_vpc.id(), + IdentityMetadataCreateParams { + name: name.parse().unwrap(), + description: "A subnet...".into(), + }, + Ipv4Net::new(core::net::Ipv4Addr::from(ip), prefix_len) + .unwrap(), + ipv6_block, + ), + ) + .await + .unwrap() + } + + // Test to verify that subnet CRUD operations are correctly + // reflected in the nexus-managed system router attached to a VPC, + // and that these resolve to the v4/6 subnets of each. + #[tokio::test] + async fn test_vpc_system_router_sync_to_subnets() { + usdt::register_probes().unwrap(); + let logctx = + dev::test_setup_log("test_vpc_system_router_sync_to_subnets"); + let log = &logctx.log; + let mut db = test_setup_database(&logctx.log).await; + let (opctx, datastore) = datastore_test(&logctx, &db).await; + + let (_, authz_vpc, db_vpc, _, db_router) = + create_initial_vpc(log, &opctx, &datastore).await; + + // InternetGateway route creation is handled by the saga proper, + // so we'll only have subnet routes here. Initially, we start with none: + verify_all_subnet_routes_in_router( + &opctx, + &datastore, + db_router.id(), + &[], + ) + .await; + + // Add a new subnet and we should get a new route. + let (authz_sub0, sub0) = new_subnet_ez( + &opctx, + &datastore, + &db_vpc, + &authz_vpc, + "s0", + [172, 30, 0, 0], + 22, + ) + .await; + + verify_all_subnet_routes_in_router( + &opctx, + &datastore, + db_router.id(), + &[&sub0], + ) + .await; + + // Add another, and get another route. + let (authz_sub1, sub1) = new_subnet_ez( + &opctx, + &datastore, + &db_vpc, + &authz_vpc, + "s1", + [172, 31, 0, 0], + 22, + ) + .await; + + verify_all_subnet_routes_in_router( + &opctx, + &datastore, + db_router.id(), + &[&sub0, &sub1], + ) + .await; + + // Rename one subnet, and our invariants should hold. + let sub0 = datastore + .vpc_update_subnet( + &opctx, + &authz_sub0, + VpcSubnetUpdate { + name: Some( + "a-new-name".parse::().unwrap().into(), + ), + description: None, + time_modified: Utc::now(), + }, + ) + .await + .unwrap(); + + verify_all_subnet_routes_in_router( + &opctx, + &datastore, + db_router.id(), + &[&sub0, &sub1], + ) + .await; + + // Delete one, and routes should stay in sync. + datastore.vpc_delete_subnet(&opctx, &sub0, &authz_sub0).await.unwrap(); + + verify_all_subnet_routes_in_router( + &opctx, + &datastore, + db_router.id(), + &[&sub1], + ) + .await; + + // If we use a reserved name, we should be able to update the table. + let sub1 = datastore + .vpc_update_subnet( + &opctx, + &authz_sub1, + VpcSubnetUpdate { + name: Some( + "default-v4".parse::().unwrap().into(), + ), + description: None, + time_modified: Utc::now(), + }, + ) + .await + .unwrap(); + + verify_all_subnet_routes_in_router( + &opctx, + &datastore, + db_router.id(), + &[&sub1], + ) + .await; + + // Ditto for adding such a route. + let (_, sub0) = new_subnet_ez( + &opctx, + &datastore, + &db_vpc, + &authz_vpc, + "default-v6", + [172, 30, 0, 0], + 22, + ) + .await; + + verify_all_subnet_routes_in_router( + &opctx, + &datastore, + db_router.id(), + &[&sub0, &sub1], + ) + .await; + + db.cleanup().await.unwrap(); + logctx.cleanup_successful(); + } + + async fn verify_all_subnet_routes_in_router( + opctx: &OpContext, + datastore: &DataStore, + router_id: Uuid, + subnets: &[&VpcSubnet], + ) -> Vec { + let conn = datastore.pool_connection_authorized(opctx).await.unwrap(); + + use db::schema::router_route::dsl; + let routes = dsl::router_route + .filter(dsl::time_deleted.is_null()) + .filter(dsl::vpc_router_id.eq(router_id)) + .filter(dsl::kind.eq(RouterRouteKind(ExternalRouteKind::VpcSubnet))) + .select(RouterRoute::as_select()) + .load_async(&*conn) + .await + .unwrap(); + + // We should have exactly as many subnet routes as subnets. + assert_eq!(routes.len(), subnets.len()); + + let mut names: HashMap<_, _> = + subnets.iter().map(|s| (s.name().clone(), 0usize)).collect(); + + // Each should have a target+dest bound to a subnet by name. + for route in &routes { + let found_name = match &route.target.0 { + RouteTarget::Subnet(name) => name, + e => panic!("found target {e:?} instead of Subnet({{name}})"), + }; + + match &route.destination.0 { + RouteDestination::Subnet(name) => assert_eq!(name, found_name), + e => panic!("found dest {e:?} instead of Subnet({{name}})"), + } + + *names.get_mut(found_name).unwrap() += 1; + } + + // Each name should be used exactly once. + for (name, count) in names { + assert_eq!(count, 1, "subnet {name} should appear exactly once") + } + + // Resolve the routes: we should have two for each entry: + let resolved = datastore + .vpc_resolve_router_rules(&opctx, router_id) + .await + .unwrap(); + assert_eq!(resolved.len(), 2 * subnets.len()); + + // And each subnet generates a v4->v4 and v6->v6. + for subnet in subnets { + assert!(resolved.iter().any(|(k, v)| { + *k == subnet.ipv4_block.0.into() + && match v { + RouterTarget::VpcSubnet(ip) => { + *ip == subnet.ipv4_block.0.into() + } + _ => false, + } + })); + assert!(resolved.iter().any(|(k, v)| { + *k == subnet.ipv6_block.0.into() + && match v { + RouterTarget::VpcSubnet(ip) => { + *ip == subnet.ipv6_block.0.into() + } + _ => false, + } + })); + } + + routes + } + + // Test to verify that VPC routers resolve to the primary addr + // of an instance NIC. + #[tokio::test] + async fn test_vpc_router_rule_instance_resolve() { + usdt::register_probes().unwrap(); + let logctx = + dev::test_setup_log("test_vpc_router_rule_instance_resolve"); + let log = &logctx.log; + let mut db = test_setup_database(&logctx.log).await; + let (opctx, datastore) = datastore_test(&logctx, &db).await; + + let (authz_project, authz_vpc, db_vpc, authz_router, _) = + create_initial_vpc(log, &opctx, &datastore).await; + + // Create a subnet for an instance to live in. + let (authz_sub0, sub0) = new_subnet_ez( + &opctx, + &datastore, + &db_vpc, + &authz_vpc, + "s0", + [172, 30, 0, 0], + 22, + ) + .await; + + // Add a rule pointing to the instance before it is created. + // We're commiting some minor data integrity sins by putting + // these into a system router, but that's irrelevant to resolution. + let inst_name = "insty".parse::().unwrap(); + let _ = datastore + .router_create_route( + &opctx, + &authz_router, + RouterRoute::new( + Uuid::new_v4(), + authz_router.id(), + external::RouterRouteKind::Custom, + params::RouterRouteCreate { + identity: IdentityMetadataCreateParams { + name: "to-vpn".parse().unwrap(), + description: "A rule...".into(), + }, + target: external::RouteTarget::Instance( + inst_name.clone(), + ), + destination: external::RouteDestination::IpNet( + "192.168.0.0/16".parse().unwrap(), + ), + }, + ), + ) + .await + .unwrap(); + + // Resolve the rules: we will have two entries generated by the + // VPC subnet (v4, v6). + let routes = datastore + .vpc_resolve_router_rules(&opctx, authz_router.id()) + .await + .unwrap(); + + assert_eq!(routes.len(), 2); + + // Create an instance, this will have no effect for now as + // the instance lacks a NIC. + let db_inst = datastore + .project_create_instance( + &opctx, + &authz_project, + db::model::Instance::new( + InstanceUuid::new_v4(), + authz_project.id(), + ¶ms::InstanceCreate { + identity: IdentityMetadataCreateParams { + name: inst_name.clone(), + description: "An instance...".into(), + }, + ncpus: external::InstanceCpuCount(1), + memory: 10.into(), + hostname: "insty".parse().unwrap(), + user_data: vec![], + network_interfaces: + params::InstanceNetworkInterfaceAttachment::None, + external_ips: vec![], + disks: vec![], + ssh_public_keys: None, + start: false, + }, + ), + ) + .await + .unwrap(); + let (.., authz_instance) = + db::lookup::LookupPath::new(&opctx, &datastore) + .instance_id(db_inst.id()) + .lookup_for(authz::Action::CreateChild) + .await + .unwrap(); + + let routes = datastore + .vpc_resolve_router_rules(&opctx, authz_router.id()) + .await + .unwrap(); + + assert_eq!(routes.len(), 2); + + // Create a primary NIC on the instance; the route can now resolve + // to the instance's IP. + let nic = datastore + .instance_create_network_interface( + &opctx, + &authz_sub0, + &authz_instance, + IncompleteNetworkInterface::new_instance( + Uuid::new_v4(), + InstanceUuid::from_untyped_uuid(db_inst.id()), + sub0, + IdentityMetadataCreateParams { + name: "nic".parse().unwrap(), + description: "A NIC...".into(), + }, + None, + ) + .unwrap(), + ) + .await + .unwrap(); + + let routes = datastore + .vpc_resolve_router_rules(&opctx, authz_router.id()) + .await + .unwrap(); + + // Verify we now have a route pointing at this instance. + assert_eq!(routes.len(), 3); + assert!(routes.iter().any(|(k, v)| (*k + == "192.168.0.0/16".parse::().unwrap()) + && match v { + RouterTarget::Ip(ip) => *ip == nic.ip.ip(), + _ => false, + })); + + db.cleanup().await.unwrap(); + logctx.cleanup_successful(); + } } diff --git a/nexus/src/app/background/init.rs b/nexus/src/app/background/init.rs index 3d98f80f4a..c5bef1f517 100644 --- a/nexus/src/app/background/init.rs +++ b/nexus/src/app/background/init.rs @@ -25,6 +25,7 @@ use super::tasks::service_firewall_rules; use super::tasks::sync_service_zone_nat::ServiceZoneNatTracker; use super::tasks::sync_switch_configuration::SwitchPortSettingsManager; use super::tasks::v2p_mappings::V2PManager; +use super::tasks::vpc_routes; use super::Driver; use super::TaskHandle; use crate::app::oximeter::PRODUCER_LEASE_DURATION; @@ -118,6 +119,9 @@ pub struct BackgroundTasks { /// task handle for deletion of database records for VMMs abandoned by their /// instances. pub task_abandoned_vmm_reaper: TaskHandle, + + /// task handle for propagation of VPC router rules to all OPTE ports + pub task_vpc_route_manager: TaskHandle, } impl BackgroundTasks { @@ -457,6 +461,19 @@ impl BackgroundTasks { vec![], ); + // Background task: OPTE port route propagation + let task_vpc_route_manager = { + let watcher = vpc_routes::VpcRouteManager::new(datastore.clone()); + driver.register( + "vpc_route_manager".to_string(), + "propagates updated VPC routes to all OPTE ports".into(), + config.switch_port_settings_manager.period_secs, + Box::new(watcher), + opctx.child(BTreeMap::new()), + vec![], + ) + }; + // Background task: abandoned VMM reaping let task_abandoned_vmm_reaper = driver.register( String::from("abandoned_vmm_reaper"), @@ -495,6 +512,7 @@ impl BackgroundTasks { task_instance_watcher, task_service_firewall_propagation, task_abandoned_vmm_reaper, + task_vpc_route_manager, } } diff --git a/nexus/src/app/background/tasks/mod.rs b/nexus/src/app/background/tasks/mod.rs index 3886b43a30..cb2ab46c2a 100644 --- a/nexus/src/app/background/tasks/mod.rs +++ b/nexus/src/app/background/tasks/mod.rs @@ -26,3 +26,4 @@ pub mod service_firewall_rules; pub mod sync_service_zone_nat; pub mod sync_switch_configuration; pub mod v2p_mappings; +pub mod vpc_routes; diff --git a/nexus/src/app/background/tasks/vpc_routes.rs b/nexus/src/app/background/tasks/vpc_routes.rs new file mode 100644 index 0000000000..5ba428308b --- /dev/null +++ b/nexus/src/app/background/tasks/vpc_routes.rs @@ -0,0 +1,283 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Background task for propagating VPC routes (system and custom) to sleds. + +use crate::app::background::BackgroundTask; +use futures::future::BoxFuture; +use futures::FutureExt; +use nexus_db_model::{Sled, SledState, Vni}; +use nexus_db_queries::{context::OpContext, db::DataStore}; +use nexus_networking::sled_client_from_address; +use nexus_types::{ + deployment::SledFilter, external_api::views::SledPolicy, identity::Asset, + identity::Resource, +}; +use omicron_common::api::internal::shared::{ + ResolvedVpcRoute, ResolvedVpcRouteSet, RouterId, RouterKind, RouterVersion, +}; +use serde_json::json; +use std::collections::hash_map::Entry; +use std::{ + collections::{HashMap, HashSet}, + sync::Arc, +}; +use uuid::Uuid; + +pub struct VpcRouteManager { + datastore: Arc, +} + +impl VpcRouteManager { + pub fn new(datastore: Arc) -> Self { + Self { datastore } + } +} + +// This RPW doesn't concern itself overly much with resolved router targets +// and destinations being partial wrt. the current generation, in the same +// vein as how firewall rules are handled. Gating *pushing* this update on a +// generation number can be a bit more risky, but there's a sort of eventual +// consistency happening here that keeps this safe. +// +// Any location which updates name-resolvable state follows the pattern: +// * Update state. +// * Update (VPC-wide) router generation numbers. +// * Awaken this task. This might happen indirectly via e.g. instance start. +// +// As a result, any update which accidentally sees partial state will be followed +// by re-triggering this RPW with a higher generation number, giving us a re-resolved +// route set and pushing to any relevant sleds. +impl BackgroundTask for VpcRouteManager { + fn activate<'a>( + &'a mut self, + opctx: &'a OpContext, + ) -> BoxFuture<'a, serde_json::Value> { + async { + let log = &opctx.log; + + let sleds = match self + .datastore + .sled_list_all_batched(opctx, SledFilter::InService) + .await + { + Ok(v) => v, + Err(e) => { + let msg = format!("failed to enumerate sleds: {:#}", e); + error!(&log, "{msg}"); + return json!({"error": msg}); + } + } + .into_iter() + .filter(|sled| { + matches!(sled.state(), SledState::Active) + && matches!(sled.policy(), SledPolicy::InService { .. }) + }); + + // Map sled db records to sled-agent clients + let sled_clients: Vec<(Sled, sled_agent_client::Client)> = sleds + .map(|sled| { + let client = sled_client_from_address( + sled.id(), + sled.address(), + &log, + ); + (sled, client) + }) + .collect(); + + let mut known_rules: HashMap> = + HashMap::new(); + let mut db_routers = HashMap::new(); + let mut vni_to_vpc = HashMap::new(); + + for (sled, client) in sled_clients { + let Ok(route_sets) = client.list_vpc_routes().await else { + warn!( + log, + "failed to fetch current VPC route state from sled"; + "sled" => sled.serial_number(), + ); + continue; + }; + + let route_sets = route_sets.into_inner(); + + // Lookup all VPC<->Subnet<->Router associations we might need, + // based on the set of VNIs reported by this sled. + // These provide the versions we'll stick with -- in the worst + // case we push newer state to a sled with an older generation + // number, which will be fixed up on the next activation. + for set in &route_sets { + let db_vni = Vni(set.id.vni); + let maybe_vpc = vni_to_vpc.entry(set.id.vni); + let vpc = match maybe_vpc { + Entry::Occupied(_) => { + continue; + } + Entry::Vacant(v) => { + let Ok(vpc) = self + .datastore + .resolve_vni_to_vpc(opctx, db_vni) + .await + else { + error!( + log, + "failed to fetch VPC from VNI"; + "sled" => sled.serial_number(), + "vni" => ?db_vni + ); + continue; + }; + + v.insert(vpc) + } + }; + + let vpc_id = vpc.identity().id; + + let Ok(system_router) = self + .datastore + .vpc_get_system_router(opctx, vpc_id) + .await + else { + error!( + log, + "failed to fetch system router for VPC"; + "vpc" => vpc_id.to_string() + ); + continue; + }; + + let Ok(custom_routers) = self + .datastore + .vpc_get_active_custom_routers(opctx, vpc_id) + .await + else { + error!( + log, + "failed to fetch custom routers for VPC"; + "vpc" => vpc_id.to_string() + ); + continue; + }; + + db_routers.insert( + RouterId { vni: set.id.vni, kind: RouterKind::System }, + system_router, + ); + db_routers.extend(custom_routers.iter().map( + |(subnet, router)| { + ( + RouterId { + vni: set.id.vni, + kind: RouterKind::Custom( + subnet.ipv4_block.0.into(), + ), + }, + router.clone(), + ) + }, + )); + db_routers.extend(custom_routers.into_iter().map( + |(subnet, router)| { + ( + RouterId { + vni: set.id.vni, + kind: RouterKind::Custom( + subnet.ipv6_block.0.into(), + ), + }, + router, + ) + }, + )); + } + + let mut to_push = Vec::new(); + let mut set_rules = |id, version, routes| { + to_push.push(ResolvedVpcRouteSet { id, routes, version }); + }; + + // resolve into known_rules on an as-needed basis. + for set in &route_sets { + let Some(db_router) = db_routers.get(&set.id) else { + // The sled wants to know about rules for a VPC + // subnet with no custom router set. Send them + // the empty list, and unset its table version. + set_rules(set.id, None, HashSet::new()); + continue; + }; + + let router_id = db_router.id(); + let version = RouterVersion { + version: db_router.resolved_version as u64, + router_id, + }; + + // Only attempt to resolve/push a ruleset if we have a + // different router ID than the sled, or a higher version + // number. + match &set.version { + Some(v) if !v.is_replaced_by(&version) => { + continue; + } + _ => {} + } + + // We may have already resolved the rules for this + // router in a previous iteration. + if let Some(rules) = known_rules.get(&router_id) { + set_rules(set.id, Some(version), rules.clone()); + continue; + } + + match self + .datastore + .vpc_resolve_router_rules( + opctx, + db_router.identity().id, + ) + .await + { + Ok(rules) => { + let collapsed: HashSet<_> = rules + .into_iter() + .map(|(dest, target)| ResolvedVpcRoute { + dest, + target, + }) + .collect(); + set_rules(set.id, Some(version), collapsed.clone()); + known_rules.insert(router_id, collapsed); + } + Err(e) => { + error!( + &log, + "failed to compute subnet routes"; + "router" => router_id.to_string(), + "err" => e.to_string() + ); + } + } + } + + if !to_push.is_empty() { + if let Err(e) = client.set_vpc_routes(&to_push).await { + error!( + log, + "failed to push new VPC route state from sled"; + "sled" => sled.serial_number(), + "err" => ?e + ); + continue; + }; + } + } + + json!({}) + } + .boxed() + } +} diff --git a/nexus/src/app/instance.rs b/nexus/src/app/instance.rs index ee70b8bb06..6b4d87063a 100644 --- a/nexus/src/app/instance.rs +++ b/nexus/src/app/instance.rs @@ -1551,6 +1551,7 @@ impl super::Nexus { self.v2p_notification_tx.clone(), ) .await?; + self.vpc_needed_notify_sleds(); Ok(()) } diff --git a/nexus/src/app/sagas/vpc_create.rs b/nexus/src/app/sagas/vpc_create.rs index cc40a8d43a..2b6615ad40 100644 --- a/nexus/src/app/sagas/vpc_create.rs +++ b/nexus/src/app/sagas/vpc_create.rs @@ -17,6 +17,7 @@ use omicron_common::api::external::LookupType; use omicron_common::api::external::RouteDestination; use omicron_common::api::external::RouteTarget; use omicron_common::api::external::RouterRouteKind; +use oxnet::IpNet; use serde::Deserialize; use serde::Serialize; use steno::ActionError; @@ -44,9 +45,13 @@ declare_saga_actions! { + svc_create_router - svc_create_router_undo } - VPC_CREATE_ROUTE -> "route" { - + svc_create_route - - svc_create_route_undo + VPC_CREATE_V4_ROUTE -> "route4" { + + svc_create_v4_route + - svc_create_v4_route_undo + } + VPC_CREATE_V6_ROUTE -> "route6" { + + svc_create_v6_route + - svc_create_v6_route_undo } VPC_CREATE_SUBNET -> "subnet" { + svc_create_subnet @@ -79,8 +84,13 @@ pub fn create_dag( ACTION_GENERATE_ID.as_ref(), )); builder.append(Node::action( - "default_route_id", - "GenerateDefaultRouteId", + "default_v4_route_id", + "GenerateDefaultV4RouteId", + ACTION_GENERATE_ID.as_ref(), + )); + builder.append(Node::action( + "default_v6_route_id", + "GenerateDefaultV6RouteId", ACTION_GENERATE_ID.as_ref(), )); builder.append(Node::action( @@ -90,7 +100,8 @@ pub fn create_dag( )); builder.append(vpc_create_vpc_action()); builder.append(vpc_create_router_action()); - builder.append(vpc_create_route_action()); + builder.append(vpc_create_v4_route_action()); + builder.append(vpc_create_v6_route_action()); builder.append(vpc_create_subnet_action()); builder.append(vpc_update_firewall_action()); builder.append(vpc_notify_sleds_action()); @@ -217,8 +228,45 @@ async fn svc_create_router_undo( Ok(()) } +async fn svc_create_v4_route( + sagactx: NexusActionContext, +) -> Result<(), ActionError> { + let default_route_id = sagactx.lookup::("default_v4_route_id")?; + let default_route = + "0.0.0.0/0".parse().expect("known-valid specifier for a default route"); + svc_create_route(sagactx, default_route_id, default_route, "default-v4") + .await +} + +async fn svc_create_v4_route_undo( + sagactx: NexusActionContext, +) -> Result<(), anyhow::Error> { + let route_id = sagactx.lookup::("default_v4_route_id")?; + svc_create_route_undo(sagactx, route_id).await +} + +async fn svc_create_v6_route( + sagactx: NexusActionContext, +) -> Result<(), ActionError> { + let default_route_id = sagactx.lookup::("default_v6_route_id")?; + let default_route = + "::/0".parse().expect("known-valid specifier for a default route"); + svc_create_route(sagactx, default_route_id, default_route, "default-v6") + .await +} + +async fn svc_create_v6_route_undo( + sagactx: NexusActionContext, +) -> Result<(), anyhow::Error> { + let route_id = sagactx.lookup::("default_v6_route_id")?; + svc_create_route_undo(sagactx, route_id).await +} + async fn svc_create_route( sagactx: NexusActionContext, + route_id: Uuid, + default_net: IpNet, + name: &str, ) -> Result<(), ActionError> { let osagactx = sagactx.user_data(); let params = sagactx.saga_params::()?; @@ -226,23 +274,20 @@ async fn svc_create_route( &sagactx, ¶ms.serialized_authn, ); - let default_route_id = sagactx.lookup::("default_route_id")?; let system_router_id = sagactx.lookup::("system_router_id")?; let authz_router = sagactx.lookup::("router")?; let route = db::model::RouterRoute::new( - default_route_id, + route_id, system_router_id, RouterRouteKind::Default, params::RouterRouteCreate { identity: IdentityMetadataCreateParams { - name: "default".parse().unwrap(), + name: name.parse().unwrap(), description: "The default route of a vpc".to_string(), }, target: RouteTarget::InternetGateway("outbound".parse().unwrap()), - destination: RouteDestination::Vpc( - params.vpc_create.identity.name.clone(), - ), + destination: RouteDestination::IpNet(default_net), }, ); @@ -256,6 +301,7 @@ async fn svc_create_route( async fn svc_create_route_undo( sagactx: NexusActionContext, + route_id: Uuid, ) -> Result<(), anyhow::Error> { let osagactx = sagactx.user_data(); let params = sagactx.saga_params::()?; @@ -264,7 +310,6 @@ async fn svc_create_route_undo( ¶ms.serialized_authn, ); let authz_router = sagactx.lookup::("router")?; - let route_id = sagactx.lookup::("default_route_id")?; let authz_route = authz::RouterRoute::new( authz_router, route_id, @@ -538,12 +583,25 @@ pub(crate) mod test { .await .expect("Failed to delete default Subnet"); - // Default route + // Default gateway routes + let (.., authz_route, _route) = LookupPath::new(&opctx, &datastore) + .project_id(project_id) + .vpc_name(&default_name.clone().into()) + .vpc_router_name(&system_name.clone().into()) + .router_route_name(&"default-v4".parse::().unwrap().into()) + .fetch() + .await + .expect("Failed to fetch default route"); + datastore + .router_delete_route(&opctx, &authz_route) + .await + .expect("Failed to delete default route"); + let (.., authz_route, _route) = LookupPath::new(&opctx, &datastore) .project_id(project_id) .vpc_name(&default_name.clone().into()) .vpc_router_name(&system_name.clone().into()) - .router_route_name(&default_name.clone().into()) + .router_route_name(&"default-v6".parse::().unwrap().into()) .fetch() .await .expect("Failed to fetch default route"); diff --git a/nexus/src/app/vpc_router.rs b/nexus/src/app/vpc_router.rs index 523a450bbd..40b4c1de0f 100644 --- a/nexus/src/app/vpc_router.rs +++ b/nexus/src/app/vpc_router.rs @@ -83,6 +83,10 @@ impl super::Nexus { .db_datastore .vpc_create_router(&opctx, &authz_vpc, router) .await?; + + // Note: we don't trigger the route RPW here as it's impossible + // for the router to be bound to a subnet at this point. + Ok(router) } @@ -114,9 +118,6 @@ impl super::Nexus { .await } - // TODO: When a router is deleted all its routes should be deleted - // TODO: When a router is deleted it should be unassociated w/ any subnets it may be associated with - // or trigger an error pub(crate) async fn vpc_delete_router( &self, opctx: &OpContext, @@ -131,7 +132,12 @@ impl super::Nexus { if db_router.kind == VpcRouterKind::System { return Err(Error::invalid_request("Cannot delete system router")); } - self.db_datastore.vpc_delete_router(opctx, &authz_router).await + let out = + self.db_datastore.vpc_delete_router(opctx, &authz_router).await?; + + self.vpc_needed_notify_sleds(); + + Ok(out) } // Routes @@ -198,6 +204,9 @@ impl super::Nexus { .db_datastore .router_create_route(&opctx, &authz_router, route) .await?; + + self.vpc_router_increment_rpw_version(opctx, &authz_router).await?; + Ok(route) } @@ -220,7 +229,7 @@ impl super::Nexus { route_lookup: &lookup::RouterRoute<'_>, params: ¶ms::RouterRouteUpdate, ) -> UpdateResult { - let (.., vpc, _, authz_route, db_route) = + let (.., vpc, authz_router, authz_route, db_route) = route_lookup.fetch_for(authz::Action::Modify).await?; // TODO: Write a test for this once there's a way to test it (i.e. // subnets automatically register to the system router table) @@ -235,9 +244,14 @@ impl super::Nexus { ))); } } - self.db_datastore + let out = self + .db_datastore .router_update_route(&opctx, &authz_route, params.clone().into()) - .await + .await?; + + self.vpc_router_increment_rpw_version(opctx, &authz_router).await?; + + Ok(out) } pub(crate) async fn router_delete_route( @@ -245,7 +259,7 @@ impl super::Nexus { opctx: &OpContext, route_lookup: &lookup::RouterRoute<'_>, ) -> DeleteResult { - let (.., authz_route, db_route) = + let (.., authz_router, authz_route, db_route) = route_lookup.fetch_for(authz::Action::Delete).await?; // Only custom routes can be deleted @@ -255,6 +269,37 @@ impl super::Nexus { "DELETE not allowed on system routes", )); } - self.db_datastore.router_delete_route(opctx, &authz_route).await + let out = + self.db_datastore.router_delete_route(opctx, &authz_route).await?; + + self.vpc_router_increment_rpw_version(opctx, &authz_router).await?; + + Ok(out) + } + + /// Trigger the VPC routing RPW in repsonse to a state change + /// or a new possible listener (e.g., instance/probe start, NIC + /// create). + pub(crate) fn vpc_needed_notify_sleds(&self) { + self.background_tasks + .activate(&self.background_tasks.task_vpc_route_manager) + } + + /// Trigger an RPW version bump on a single VPC router in response + /// to CRUD operations on individual routes. + /// + /// This will also awaken the VPC Router RPW. + pub(crate) async fn vpc_router_increment_rpw_version( + &self, + opctx: &OpContext, + authz_router: &authz::VpcRouter, + ) -> UpdateResult<()> { + self.datastore() + .vpc_router_increment_rpw_version(opctx, authz_router.id()) + .await?; + + self.vpc_needed_notify_sleds(); + + Ok(()) } } diff --git a/nexus/src/app/vpc_subnet.rs b/nexus/src/app/vpc_subnet.rs index f081f351db..478e1af9f9 100644 --- a/nexus/src/app/vpc_subnet.rs +++ b/nexus/src/app/vpc_subnet.rs @@ -64,8 +64,7 @@ impl super::Nexus { )), } } - // TODO: When a subnet is created it should add a route entry into the VPC's - // system router + pub(crate) async fn vpc_create_subnet( &self, opctx: &OpContext, @@ -109,7 +108,7 @@ impl super::Nexus { // See for // details. let subnet_id = Uuid::new_v4(); - match params.ipv6_block { + let out = match params.ipv6_block { None => { const NUM_RETRIES: usize = 2; let mut retry = 0; @@ -213,7 +212,11 @@ impl super::Nexus { .map(|(.., subnet)| subnet) .map_err(SubnetError::into_external) } - } + }?; + + self.vpc_needed_notify_sleds(); + + Ok(out) } pub(crate) async fn vpc_subnet_list( @@ -235,13 +238,16 @@ impl super::Nexus { ) -> UpdateResult { let (.., authz_subnet) = vpc_subnet_lookup.lookup_for(authz::Action::Modify).await?; - self.db_datastore + let out = self + .db_datastore .vpc_update_subnet(&opctx, &authz_subnet, params.clone().into()) - .await + .await?; + + self.vpc_needed_notify_sleds(); + + Ok(out) } - // TODO: When a subnet is deleted it should remove its entry from the VPC's - // system router. pub(crate) async fn vpc_delete_subnet( &self, opctx: &OpContext, @@ -249,9 +255,14 @@ impl super::Nexus { ) -> DeleteResult { let (.., authz_subnet, db_subnet) = vpc_subnet_lookup.fetch_for(authz::Action::Delete).await?; - self.db_datastore + let out = self + .db_datastore .vpc_delete_subnet(opctx, &db_subnet, &authz_subnet) - .await + .await?; + + self.vpc_needed_notify_sleds(); + + Ok(out) } pub(crate) async fn subnet_list_instance_network_interfaces( diff --git a/nexus/src/external_api/http_entrypoints.rs b/nexus/src/external_api/http_entrypoints.rs index 350836441e..e814df2b61 100644 --- a/nexus/src/external_api/http_entrypoints.rs +++ b/nexus/src/external_api/http_entrypoints.rs @@ -5704,7 +5704,7 @@ async fn vpc_router_route_view( .await } -/// Create router +/// Create route #[endpoint { method = POST, path = "/v1/vpc-router-routes", diff --git a/nexus/tests/integration_tests/instances.rs b/nexus/tests/integration_tests/instances.rs index 4f7a1d1b77..75ddf847bf 100644 --- a/nexus/tests/integration_tests/instances.rs +++ b/nexus/tests/integration_tests/instances.rs @@ -18,6 +18,7 @@ use nexus_db_queries::context::OpContext; use nexus_db_queries::db::fixed_data::silo::DEFAULT_SILO; use nexus_db_queries::db::fixed_data::silo::DEFAULT_SILO_ID; use nexus_db_queries::db::lookup::LookupPath; +use nexus_db_queries::db::DataStore; use nexus_test_interface::NexusServer; use nexus_test_utils::http_testing::AuthnMode; use nexus_test_utils::http_testing::NexusRequest; @@ -59,6 +60,9 @@ use omicron_common::api::external::InstanceState; use omicron_common::api::external::Name; use omicron_common::api::external::NameOrId; use omicron_common::api::external::Vni; +use omicron_common::api::internal::shared::ResolvedVpcRoute; +use omicron_common::api::internal::shared::RouterId; +use omicron_common::api::internal::shared::RouterKind; use omicron_nexus::app::MAX_MEMORY_BYTES_PER_INSTANCE; use omicron_nexus::app::MAX_VCPU_PER_INSTANCE; use omicron_nexus::app::MIN_MEMORY_BYTES_PER_INSTANCE; @@ -70,6 +74,7 @@ use omicron_uuid_kinds::PropolisUuid; use omicron_uuid_kinds::SledUuid; use omicron_uuid_kinds::{GenericUuid, InstanceUuid}; use sled_agent_client::TestInterfaces as _; +use std::collections::HashSet; use std::convert::TryFrom; use std::net::Ipv4Addr; use std::sync::Arc; @@ -672,6 +677,30 @@ async fn test_instance_start_creates_networking_state( for agent in &sled_agents { assert_sled_v2p_mappings(agent, &nics[0], guest_nics[0].vni).await; } + + // Ensure that the target sled agent for our instance has received + // up-to-date VPC routes. + let with_vmm = datastore + .instance_fetch_with_vmm(&opctx, &authz_instance) + .await + .unwrap(); + + let mut checked = false; + for agent in &sled_agents { + if Some(agent.id) == with_vmm.sled_id().map(SledUuid::into_untyped_uuid) + { + assert_sled_vpc_routes( + agent, + &opctx, + datastore, + nics[0].subnet_id, + guest_nics[0].vni, + ) + .await; + checked = true; + } + } + assert!(checked); } #[nexus_test] @@ -836,7 +865,9 @@ async fn test_instance_migrate(cptestctx: &ControlPlaneTestContext) { } #[nexus_test] -async fn test_instance_migrate_v2p(cptestctx: &ControlPlaneTestContext) { +async fn test_instance_migrate_v2p_and_routes( + cptestctx: &ControlPlaneTestContext, +) { let client = &cptestctx.external_client; let apictx = &cptestctx.server.server_context(); let nexus = &apictx.nexus; @@ -965,6 +996,15 @@ async fn test_instance_migrate_v2p(cptestctx: &ControlPlaneTestContext) { if sled_agent.id != dst_sled_id.into_untyped_uuid() { assert_sled_v2p_mappings(sled_agent, &nics[0], guest_nics[0].vni) .await; + } else { + assert_sled_vpc_routes( + sled_agent, + &opctx, + datastore, + nics[0].subnet_id, + guest_nics[0].vni, + ) + .await; } } } @@ -4764,6 +4804,78 @@ async fn assert_sled_v2p_mappings( .expect("matching v2p mapping should be present"); } +/// Asserts that supplied sled agent's most recent VPC route sets +/// contain up-to-date routes for a known subnet. +pub async fn assert_sled_vpc_routes( + sled_agent: &Arc, + opctx: &OpContext, + datastore: &DataStore, + subnet_id: Uuid, + vni: Vni, +) { + let (.., authz_vpc, _, db_subnet) = LookupPath::new(opctx, datastore) + .vpc_subnet_id(subnet_id) + .fetch() + .await + .unwrap(); + + let custom_routes: HashSet<_> = + if let Some(router_id) = db_subnet.custom_router_id { + datastore + .vpc_resolve_router_rules(opctx, router_id) + .await + .unwrap() + .into_iter() + .map(|(dest, target)| ResolvedVpcRoute { dest, target }) + .collect() + } else { + Default::default() + }; + + let (.., vpc) = LookupPath::new(opctx, datastore) + .vpc_id(authz_vpc.id()) + .fetch() + .await + .unwrap(); + + let system_routes: HashSet<_> = datastore + .vpc_resolve_router_rules(opctx, vpc.system_router_id) + .await + .unwrap() + .into_iter() + .map(|(dest, target)| ResolvedVpcRoute { dest, target }) + .collect(); + + assert!(!system_routes.is_empty()); + + let condition = || async { + let vpc_routes = sled_agent.vpc_routes.lock().await; + let sys_routes_found = vpc_routes.iter().any(|(id, set)| { + *id == RouterId { vni, kind: RouterKind::System } + && set.routes == system_routes + }); + let custom_routes_found = vpc_routes.iter().any(|(id, set)| { + *id == RouterId { + vni, + kind: RouterKind::Custom(db_subnet.ipv4_block.0.into()), + } && set.routes == custom_routes + }); + + if sys_routes_found && custom_routes_found { + Ok(()) + } else { + Err(CondCheckError::NotYet::<()>) + } + }; + wait_for_condition( + condition, + &Duration::from_secs(1), + &Duration::from_secs(30), + ) + .await + .expect("matching vpc routes should be present"); +} + /// Simulate completion of an ongoing instance state transition. To do this, we /// have to look up the instance, then get the sled agent associated with that /// instance, and then tell it to finish simulating whatever async transition is diff --git a/nexus/tests/integration_tests/router_routes.rs b/nexus/tests/integration_tests/router_routes.rs index 10c594bba9..79a5db8eaf 100644 --- a/nexus/tests/integration_tests/router_routes.rs +++ b/nexus/tests/integration_tests/router_routes.rs @@ -10,10 +10,12 @@ use nexus_test_utils::identity_eq; use nexus_test_utils::resource_helpers::objects_list_page_authz; use nexus_test_utils_macros::nexus_test; use nexus_types::external_api::params; +use omicron_common::api::external::SimpleIdentity; use omicron_common::api::external::{ IdentityMetadataCreateParams, IdentityMetadataUpdateParams, RouteDestination, RouteTarget, RouterRoute, RouterRouteKind, }; +use oxnet::IpNet; use std::net::IpAddr; use std::net::Ipv4Addr; @@ -59,27 +61,48 @@ async fn test_router_routes(cptestctx: &ControlPlaneTestContext) { .await .items; - // The system should start with a single, pre-configured route - assert_eq!(system_router_routes.len(), 1); - - // That route should be the default route - let default_route = &system_router_routes[0]; - assert_eq!(default_route.kind, RouterRouteKind::Default); - - // It errors if you try to delete the default route - let error: dropshot::HttpErrorResponseBody = NexusRequest::expect_failure( - client, - StatusCode::BAD_REQUEST, - Method::DELETE, - get_route_url("system", "default").as_str(), - ) - .authn_as(AuthnMode::PrivilegedUser) - .execute() - .await - .unwrap() - .parsed_body() - .unwrap(); - assert_eq!(error.message, "DELETE not allowed on system routes"); + // The system should start with three preconfigured routes: + // - a default v4 gateway route + // - a default v6 gateway route + // - a managed subnet route for the 'default' subnet + assert_eq!(system_router_routes.len(), 3); + + let mut v4_route = None; + let mut v6_route = None; + let mut subnet_route = None; + for route in system_router_routes { + match (&route.kind, &route.destination, &route.target) { + (RouterRouteKind::Default, RouteDestination::IpNet(IpNet::V4(_)), RouteTarget::InternetGateway(_)) => {v4_route = Some(route);}, + (RouterRouteKind::Default, RouteDestination::IpNet(IpNet::V6(_)), RouteTarget::InternetGateway(_)) => {v6_route = Some(route);}, + (RouterRouteKind::VpcSubnet, RouteDestination::Subnet(n0), RouteTarget::Subnet(n1)) if n0 == n1 && n0.as_str() == "default" => {subnet_route = Some(route);}, + _ => panic!("unexpected system route {route:?} -- wanted gateway and subnet"), + } + } + + let v4_route = + v4_route.expect("no v4 gateway route found in system router"); + let v6_route = + v6_route.expect("no v6 gateway route found in system router"); + let subnet_route = + subnet_route.expect("no default subnet route found in system router"); + + // Deleting any default system route is disallowed. + for route in &[&v4_route, &v6_route, &subnet_route] { + let error: dropshot::HttpErrorResponseBody = + NexusRequest::expect_failure( + client, + StatusCode::BAD_REQUEST, + Method::DELETE, + get_route_url("system", route.name().as_str()).as_str(), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap() + .parsed_body() + .unwrap(); + assert_eq!(error.message, "DELETE not allowed on system routes"); + } // Create a custom router create_router(&client, project_name, vpc_name, router_name).await; diff --git a/nexus/types/src/external_api/views.rs b/nexus/types/src/external_api/views.rs index 394bef5d2f..8e2ee39c21 100644 --- a/nexus/types/src/external_api/views.rs +++ b/nexus/types/src/external_api/views.rs @@ -262,7 +262,7 @@ pub struct Vpc { } /// A VPC subnet represents a logical grouping for instances that allows network traffic between -/// them, within a IPv4 subnetwork or optionall an IPv6 subnetwork. +/// them, within a IPv4 subnetwork or optionally an IPv6 subnetwork. #[derive(ObjectIdentity, Clone, Debug, Deserialize, Serialize, JsonSchema)] pub struct VpcSubnet { /// common identifying metadata @@ -277,6 +277,9 @@ pub struct VpcSubnet { /// The IPv6 subnet CIDR block. pub ipv6_block: Ipv6Net, + + /// ID for an attached custom router. + pub custom_router_id: Option, } #[derive(Clone, Copy, Debug, Deserialize, Serialize, PartialEq, JsonSchema)] diff --git a/openapi/nexus.json b/openapi/nexus.json index a985a3e42c..f4cd1ce89e 100644 --- a/openapi/nexus.json +++ b/openapi/nexus.json @@ -19760,9 +19760,15 @@ ] }, "VpcSubnet": { - "description": "A VPC subnet represents a logical grouping for instances that allows network traffic between them, within a IPv4 subnetwork or optionall an IPv6 subnetwork.", + "description": "A VPC subnet represents a logical grouping for instances that allows network traffic between them, within a IPv4 subnetwork or optionally an IPv6 subnetwork.", "type": "object", "properties": { + "custom_router_id": { + "nullable": true, + "description": "ID for an attached custom router.", + "type": "string", + "format": "uuid" + }, "description": { "description": "human-readable free-form text about a resource", "type": "string" diff --git a/openapi/sled-agent.json b/openapi/sled-agent.json index 05081d8298..3ac130c565 100644 --- a/openapi/sled-agent.json +++ b/openapi/sled-agent.json @@ -950,6 +950,63 @@ } } }, + "/vpc-routes": { + "get": { + "summary": "Get the current versions of VPC routing rules.", + "operationId": "list_vpc_routes", + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "title": "Array_of_ResolvedVpcRouteState", + "type": "array", + "items": { + "$ref": "#/components/schemas/ResolvedVpcRouteState" + } + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + }, + "put": { + "summary": "Update VPC routing rules.", + "operationId": "set_vpc_routes", + "requestBody": { + "content": { + "application/json": { + "schema": { + "title": "Array_of_ResolvedVpcRouteSet", + "type": "array", + "items": { + "$ref": "#/components/schemas/ResolvedVpcRouteSet" + } + } + } + }, + "required": true + }, + "responses": { + "204": { + "description": "resource updated" + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, "/zones": { "get": { "summary": "List the zones that are currently managed by the sled agent.", @@ -4236,6 +4293,70 @@ "rack_subnet" ] }, + "ResolvedVpcRoute": { + "description": "A VPC route resolved into a concrete target.", + "type": "object", + "properties": { + "dest": { + "$ref": "#/components/schemas/IpNet" + }, + "target": { + "$ref": "#/components/schemas/RouterTarget" + } + }, + "required": [ + "dest", + "target" + ] + }, + "ResolvedVpcRouteSet": { + "description": "An updated set of routes for a given VPC and/or subnet.", + "type": "object", + "properties": { + "id": { + "$ref": "#/components/schemas/RouterId" + }, + "routes": { + "type": "array", + "items": { + "$ref": "#/components/schemas/ResolvedVpcRoute" + }, + "uniqueItems": true + }, + "version": { + "nullable": true, + "allOf": [ + { + "$ref": "#/components/schemas/RouterVersion" + } + ] + } + }, + "required": [ + "id", + "routes" + ] + }, + "ResolvedVpcRouteState": { + "description": "Version information for routes on a given VPC subnet.", + "type": "object", + "properties": { + "id": { + "$ref": "#/components/schemas/RouterId" + }, + "version": { + "nullable": true, + "allOf": [ + { + "$ref": "#/components/schemas/RouterVersion" + } + ] + } + }, + "required": [ + "id" + ] + }, "RouteConfig": { "type": "object", "properties": { @@ -4266,6 +4387,148 @@ "nexthop" ] }, + "RouterId": { + "description": "Identifier for a VPC and/or subnet.", + "type": "object", + "properties": { + "kind": { + "$ref": "#/components/schemas/RouterKind" + }, + "vni": { + "$ref": "#/components/schemas/Vni" + } + }, + "required": [ + "kind", + "vni" + ] + }, + "RouterKind": { + "description": "The scope of a set of VPC router rules.", + "oneOf": [ + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "system" + ] + } + }, + "required": [ + "type" + ] + }, + { + "type": "object", + "properties": { + "subnet": { + "$ref": "#/components/schemas/IpNet" + }, + "type": { + "type": "string", + "enum": [ + "custom" + ] + } + }, + "required": [ + "subnet", + "type" + ] + } + ] + }, + "RouterTarget": { + "description": "The target for a given router entry.", + "oneOf": [ + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "drop" + ] + } + }, + "required": [ + "type" + ] + }, + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "internet_gateway" + ] + } + }, + "required": [ + "type" + ] + }, + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "ip" + ] + }, + "value": { + "type": "string", + "format": "ip" + } + }, + "required": [ + "type", + "value" + ] + }, + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "vpc_subnet" + ] + }, + "value": { + "$ref": "#/components/schemas/IpNet" + } + }, + "required": [ + "type", + "value" + ] + } + ] + }, + "RouterVersion": { + "description": "Information on the current parent router (and version) of a route set according to the control plane.", + "type": "object", + "properties": { + "router_id": { + "type": "string", + "format": "uuid" + }, + "version": { + "type": "integer", + "format": "uint64", + "minimum": 0 + } + }, + "required": [ + "router_id", + "version" + ] + }, "SemverVersion": { "type": "string", "pattern": "^(0|[1-9]\\d*)\\.(0|[1-9]\\d*)\\.(0|[1-9]\\d*)(?:-((?:0|[1-9]\\d*|\\d*[a-zA-Z-][0-9a-zA-Z-]*)(?:\\.(?:0|[1-9]\\d*|\\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?(?:\\+([0-9a-zA-Z-]+(?:\\.[0-9a-zA-Z-]+)*))?$" diff --git a/package-manifest.toml b/package-manifest.toml index d653f9d999..027ca52b48 100644 --- a/package-manifest.toml +++ b/package-manifest.toml @@ -563,10 +563,10 @@ source.repo = "maghemite" # `tools/maghemite_openapi_version`. Failing to do so will cause a failure when # building `ddm-admin-client` (which will instruct you to update # `tools/maghemite_openapi_version`). -source.commit = "c67f6ab49e0e8a49bcf84542500fceb6b9417ca4" +source.commit = "3c3fa8482fe09a01da62fbd35efe124ea9cac9e7" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/maghemite/image//mg-ddm-gz.sha256.txt -source.sha256 = "33e3b09408551be860debac08de50a840909d4e6c6bed9aecaef63fe8bef2d69" +source.sha256 = "63b6c74584e32f52893730e3a567da29c7f93934c38882614aad59034bdd980d" output.type = "tarball" [package.mg-ddm] @@ -579,10 +579,10 @@ source.repo = "maghemite" # `tools/maghemite_openapi_version`. Failing to do so will cause a failure when # building `ddm-admin-client` (which will instruct you to update # `tools/maghemite_openapi_version`). -source.commit = "c67f6ab49e0e8a49bcf84542500fceb6b9417ca4" +source.commit = "3c3fa8482fe09a01da62fbd35efe124ea9cac9e7" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/maghemite/image//mg-ddm.sha256.txt -source.sha256 = "81674afa17873f84bb49a800c8511938d1c2e871026cbb17e5eed2b645b1eb55" +source.sha256 = "b9908b81fee00d71b750f5b9a0f866c807adb0f924ab635295d28753538836f5" output.type = "zone" output.intermediate_only = true @@ -594,10 +594,10 @@ source.repo = "maghemite" # `tools/maghemite_openapi_version`. Failing to do so will cause a failure when # building `ddm-admin-client` (which will instruct you to update # `tools/maghemite_openapi_version`). -source.commit = "c67f6ab49e0e8a49bcf84542500fceb6b9417ca4" +source.commit = "3c3fa8482fe09a01da62fbd35efe124ea9cac9e7" # The SHA256 digest is automatically posted to: -# https://buildomat.eng.oxide.computer/public/file/oxidecomputer/maghemite/image//mg-ddm.sha256.txt -source.sha256 = "5e8bdd6774ef6041189621306577d0e0d174d596d216e53740ce6f035316c5af" +# https://buildomat.eng.oxide.computer/public/file/oxidecomputer/maghemite/image//mgd.sha256.txt +source.sha256 = "51f446933f0d8c426b15ea0845b66664da9b9a129893d12b25d7912b52f07362" output.type = "zone" output.intermediate_only = true diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index 905fd111c1..b6102c3a64 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -1406,7 +1406,9 @@ CREATE TABLE IF NOT EXISTS omicron.public.vpc_subnet ( /* Child resource creation generation number */ rcgen INT8 NOT NULL, ipv4_block INET NOT NULL, - ipv6_block INET NOT NULL + ipv6_block INET NOT NULL, + /* nullable FK to the `vpc_router` table. */ + custom_router_id UUID ); /* Subnet and network interface names are unique per VPC, not project */ @@ -1636,7 +1638,13 @@ CREATE TABLE IF NOT EXISTS omicron.public.vpc_router ( time_deleted TIMESTAMPTZ, kind omicron.public.vpc_router_kind NOT NULL, vpc_id UUID NOT NULL, - rcgen INT NOT NULL + rcgen INT NOT NULL, + /* + * version information used to trigger VPC router RPW. + * this is sensitive to CRUD on named resources beyond + * routers e.g. instances, subnets, ... + */ + resolved_version INT NOT NULL DEFAULT 0 ); CREATE UNIQUE INDEX IF NOT EXISTS lookup_router_by_vpc ON omicron.public.vpc_router ( @@ -1662,6 +1670,7 @@ CREATE TABLE IF NOT EXISTS omicron.public.router_route ( /* Indicates that the object has been deleted */ time_deleted TIMESTAMPTZ, + /* FK to the `vpc_router` table. */ vpc_router_id UUID NOT NULL, kind omicron.public.router_route_kind NOT NULL, target STRING(128) NOT NULL, @@ -4098,7 +4107,7 @@ INSERT INTO omicron.public.db_metadata ( version, target_version ) VALUES - (TRUE, NOW(), NOW(), '77.0.0', NULL) + (TRUE, NOW(), NOW(), '78.0.0', NULL) ON CONFLICT DO NOTHING; COMMIT; diff --git a/schema/crdb/vpc-subnet-routing/up01.sql b/schema/crdb/vpc-subnet-routing/up01.sql new file mode 100644 index 0000000000..d1869dd010 --- /dev/null +++ b/schema/crdb/vpc-subnet-routing/up01.sql @@ -0,0 +1,3 @@ +-- Each subnet may have a custom router attached. +ALTER TABLE omicron.public.vpc_subnet +ADD COLUMN IF NOT EXISTS custom_router_id UUID; diff --git a/schema/crdb/vpc-subnet-routing/up02.sql b/schema/crdb/vpc-subnet-routing/up02.sql new file mode 100644 index 0000000000..77e72961a3 --- /dev/null +++ b/schema/crdb/vpc-subnet-routing/up02.sql @@ -0,0 +1,7 @@ +/* + * version information used to trigger VPC router RPW. + * this is sensitive to CRUD on named resources beyond + * routers e.g. instances, subnets, ... + */ +ALTER TABLE omicron.public.vpc_router +ADD COLUMN IF NOT EXISTS resolved_version INT NOT NULL DEFAULT 0; diff --git a/schema/crdb/vpc-subnet-routing/up03.sql b/schema/crdb/vpc-subnet-routing/up03.sql new file mode 100644 index 0000000000..fb4fd2324a --- /dev/null +++ b/schema/crdb/vpc-subnet-routing/up03.sql @@ -0,0 +1,96 @@ +set local disallow_full_table_scans = off; + +-- We need to manually rebuild a compliant set of routes. +-- Remove everything that exists today. +DELETE FROM omicron.public.router_route WHERE 1=1; + +-- Insert gateway routes for all VPCs. +INSERT INTO omicron.public.router_route + ( + id, name, + description, + time_created, time_modified, + vpc_router_id, kind, + target, destination + ) +SELECT + gen_random_uuid(), 'default-v4', + 'The default route of a vpc', + now(), now(), + omicron.public.vpc_router.id, 'default', + 'inetgw:outbound', 'ipnet:0.0.0.0/0' +FROM + omicron.public.vpc_router +ON CONFLICT DO NOTHING; + +INSERT INTO omicron.public.router_route + ( + id, name, + description, + time_created, time_modified, + vpc_router_id, kind, + target, destination + ) +SELECT + gen_random_uuid(), 'default-v6', + 'The default route of a vpc', + now(), now(), + omicron.public.vpc_router.id, 'default', + 'inetgw:outbound', 'ipnet:::/0' +FROM + omicron.public.vpc_router +ON CONFLICT DO NOTHING; + +-- Insert subnet routes for every defined VPC subnet. +INSERT INTO omicron.public.router_route + ( + id, name, + description, + time_created, time_modified, + vpc_router_id, kind, + target, destination + ) +SELECT + gen_random_uuid(), vpc_subnet.name, + 'VPC Subnet route for ''' || vpc_subnet.name || '''', + now(), now(), + omicron.public.vpc_router.id, 'vpc_subnet', + 'subnet:' || vpc_subnet.name, 'subnet:' || vpc_subnet.name +FROM + (omicron.public.vpc_subnet JOIN omicron.public.vpc + ON vpc_subnet.vpc_id = vpc.id) JOIN omicron.public.vpc_router + ON vpc_router.vpc_id = vpc.id +ON CONFLICT DO NOTHING; + +-- Replace IDs of fixed_data routes for the services VPC. +-- This is done instead of an insert to match the initial +-- empty state of dbinit.sql. +WITH known_ids (new_id, new_name, new_description) AS ( + VALUES + ( + '001de000-074c-4000-8000-000000000002', 'default-v4', + 'Default internet gateway route for Oxide Services' + ), + ( + '001de000-074c-4000-8000-000000000003', 'default-v6', + 'Default internet gateway route for Oxide Services' + ), + ( + '001de000-c470-4000-8000-000000000004', 'external-dns', + 'Built-in VPC Subnet for Oxide service (external-dns)' + ), + ( + '001de000-c470-4000-8000-000000000005', 'nexus', + 'Built-in VPC Subnet for Oxide service (nexus)' + ), + ( + '001de000-c470-4000-8000-000000000006', 'boundary-ntp', + 'Built-in VPC Subnet for Oxide service (boundary-ntp)' + ) +) +UPDATE omicron.public.router_route +SET + id = CAST(new_id AS UUID), + description = new_description +FROM known_ids +WHERE vpc_router_id = '001de000-074c-4000-8000-000000000001' AND new_name = router_route.name; diff --git a/sled-agent/src/http_entrypoints.rs b/sled-agent/src/http_entrypoints.rs index 6defd18a95..2d41e2860a 100644 --- a/sled-agent/src/http_entrypoints.rs +++ b/sled-agent/src/http_entrypoints.rs @@ -31,7 +31,9 @@ use omicron_common::api::external::Error; use omicron_common::api::internal::nexus::{ DiskRuntimeState, SledInstanceState, UpdateArtifactId, }; -use omicron_common::api::internal::shared::SwitchPorts; +use omicron_common::api::internal::shared::{ + ResolvedVpcRouteSet, ResolvedVpcRouteState, SwitchPorts, +}; use omicron_uuid_kinds::{GenericUuid, InstanceUuid}; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; @@ -86,6 +88,8 @@ pub fn api() -> SledApiDescription { api.register(host_os_write_status_delete)?; api.register(inventory)?; api.register(bootstore_status)?; + api.register(list_vpc_routes)?; + api.register(set_vpc_routes)?; Ok(()) } @@ -1025,3 +1029,29 @@ async fn bootstore_status( .into(); Ok(HttpResponseOk(status)) } + +/// Get the current versions of VPC routing rules. +#[endpoint { + method = GET, + path = "/vpc-routes", +}] +async fn list_vpc_routes( + request_context: RequestContext, +) -> Result>, HttpError> { + let sa = request_context.context(); + Ok(HttpResponseOk(sa.list_vpc_routes())) +} + +/// Update VPC routing rules. +#[endpoint { + method = PUT, + path = "/vpc-routes", +}] +async fn set_vpc_routes( + request_context: RequestContext, + body: TypedBody>, +) -> Result { + let sa = request_context.context(); + sa.set_vpc_routes(body.into_inner())?; + Ok(HttpResponseUpdatedNoContent()) +} diff --git a/sled-agent/src/instance.rs b/sled-agent/src/instance.rs index 04b68ef752..ec4d503e7b 100644 --- a/sled-agent/src/instance.rs +++ b/sled-agent/src/instance.rs @@ -27,7 +27,7 @@ use backoff::BackoffError; use chrono::Utc; use illumos_utils::dladm::Etherstub; use illumos_utils::link::VnicAllocator; -use illumos_utils::opte::{DhcpCfg, PortManager}; +use illumos_utils::opte::{DhcpCfg, PortCreateParams, PortManager}; use illumos_utils::running_zone::{RunningZone, ZoneBuilderFactory}; use illumos_utils::svc::wait_for_service; use illumos_utils::zone::PROPOLIS_ZONE_PREFIX; @@ -1327,14 +1327,15 @@ impl InstanceRunner { } else { (None, None, &[][..]) }; - let port = self.port_manager.create_port( + let port = self.port_manager.create_port(PortCreateParams { nic, - snat, + source_nat: snat, ephemeral_ip, floating_ips, - &self.firewall_rules, - self.dhcp_config.clone(), - )?; + firewall_rules: &self.firewall_rules, + dhcp_config: self.dhcp_config.clone(), + is_service: false, + })?; opte_ports.push(port); } diff --git a/sled-agent/src/probe_manager.rs b/sled-agent/src/probe_manager.rs index 16559039a2..40af604645 100644 --- a/sled-agent/src/probe_manager.rs +++ b/sled-agent/src/probe_manager.rs @@ -3,10 +3,12 @@ use anyhow::{anyhow, Result}; use illumos_utils::dladm::Etherstub; use illumos_utils::link::VnicAllocator; use illumos_utils::opte::params::VpcFirewallRule; -use illumos_utils::opte::{DhcpCfg, PortManager}; +use illumos_utils::opte::{DhcpCfg, PortCreateParams, PortManager}; use illumos_utils::running_zone::{RunningZone, ZoneBuilderFactory}; use illumos_utils::zone::Zones; -use nexus_client::types::{ProbeExternalIp, ProbeInfo}; +use nexus_client::types::{ + BackgroundTasksActivateRequest, ProbeExternalIp, ProbeInfo, +}; use omicron_common::api::external::{ VpcFirewallRuleAction, VpcFirewallRuleDirection, VpcFirewallRulePriority, VpcFirewallRuleStatus, @@ -179,24 +181,44 @@ impl ProbeManagerInner { } }; - self.add(target.difference(¤t)).await; + let n_added = self.add(target.difference(¤t)).await; self.remove(current.difference(&target)).await; self.check(current.intersection(&target)).await; + + // If we have created some new probes, we may need the control plane + // to provide us with valid routes for the VPC the probe belongs to. + if n_added > 0 { + if let Err(e) = self + .nexus_client + .client() + .bgtask_activate(&BackgroundTasksActivateRequest { + bgtask_names: vec!["vpc_route_manager".into()], + }) + .await + { + error!(self.log, "get routes for probe: {e}"); + } + } } }) } /// Add a set of probes to this sled. - async fn add<'a, I>(self: &Arc, probes: I) + /// + /// Returns the number of inserted probes. + async fn add<'a, I>(self: &Arc, probes: I) -> usize where I: Iterator, { + let mut i = 0; for probe in probes { info!(self.log, "adding probe {}", probe.id); if let Err(e) = self.add_probe(probe).await { error!(self.log, "add probe: {e}"); } + i += 1; } + i } /// Add a probe to this sled. This sets up resources for the probe zone @@ -223,12 +245,12 @@ impl ProbeManagerInner { .get(0) .ok_or(anyhow!("expected an external ip"))?; - let port = self.port_manager.create_port( - &nic, - None, - Some(eip.ip), - &[], // floating ips - &[VpcFirewallRule { + let port = self.port_manager.create_port(PortCreateParams { + nic, + source_nat: None, + ephemeral_ip: Some(eip.ip), + floating_ips: &[], + firewall_rules: &[VpcFirewallRule { status: VpcFirewallRuleStatus::Enabled, direction: VpcFirewallRuleDirection::Inbound, targets: vec![nic.clone()], @@ -238,8 +260,9 @@ impl ProbeManagerInner { action: VpcFirewallRuleAction::Allow, priority: VpcFirewallRulePriority(100), }], - DhcpCfg::default(), - )?; + dhcp_config: DhcpCfg::default(), + is_service: false, + })?; let installed_zone = ZoneBuilderFactory::default() .builder() diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index 5f53601681..fb57990f1b 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -48,7 +48,9 @@ use illumos_utils::dladm::{ Dladm, Etherstub, EtherstubVnic, GetSimnetError, PhysicalLink, }; use illumos_utils::link::{Link, VnicAllocator}; -use illumos_utils::opte::{DhcpCfg, Port, PortManager, PortTicket}; +use illumos_utils::opte::{ + DhcpCfg, Port, PortCreateParams, PortManager, PortTicket, +}; use illumos_utils::running_zone::{ EnsureAddressError, InstalledZone, RunCommandError, RunningZone, ZoneBuilderFactory, @@ -1162,11 +1164,19 @@ impl ServiceManager { // Create the OPTE port for the service. // Note we don't plumb any firewall rules at this point, - // Nexus will plumb them down later but the default OPTE + // Nexus will plumb them down later but services' default OPTE // config allows outbound access which is enough for // Boundary NTP which needs to come up before Nexus. let port = port_manager - .create_port(nic, snat, None, floating_ips, &[], DhcpCfg::default()) + .create_port(PortCreateParams { + nic, + source_nat: snat, + ephemeral_ip: None, + floating_ips, + firewall_rules: &[], + dhcp_config: DhcpCfg::default(), + is_service: true, + }) .map_err(|err| Error::ServicePortCreation { service: zone_type_str.clone(), err: Box::new(err), diff --git a/sled-agent/src/sim/http_entrypoints.rs b/sled-agent/src/sim/http_entrypoints.rs index 012889c664..cfafaeea22 100644 --- a/sled-agent/src/sim/http_entrypoints.rs +++ b/sled-agent/src/sim/http_entrypoints.rs @@ -24,7 +24,9 @@ use illumos_utils::opte::params::VirtualNetworkInterfaceHost; use omicron_common::api::internal::nexus::DiskRuntimeState; use omicron_common::api::internal::nexus::SledInstanceState; use omicron_common::api::internal::nexus::UpdateArtifactId; -use omicron_common::api::internal::shared::SwitchPorts; +use omicron_common::api::internal::shared::{ + ResolvedVpcRouteSet, ResolvedVpcRouteState, SwitchPorts, +}; use omicron_uuid_kinds::{GenericUuid, InstanceUuid}; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; @@ -64,6 +66,8 @@ pub fn api() -> SledApiDescription { api.register(omicron_zones_get)?; api.register(omicron_zones_put)?; api.register(sled_add)?; + api.register(list_vpc_routes)?; + api.register(set_vpc_routes)?; Ok(()) } @@ -508,3 +512,27 @@ async fn sled_add( ) -> Result { Ok(HttpResponseUpdatedNoContent()) } + +#[endpoint { + method = GET, + path = "/vpc-routes", +}] +async fn list_vpc_routes( + rqctx: RequestContext>, +) -> Result>, HttpError> { + let sa = rqctx.context(); + Ok(HttpResponseOk(sa.list_vpc_routes().await)) +} + +#[endpoint { + method = PUT, + path = "/vpc-routes", +}] +async fn set_vpc_routes( + rqctx: RequestContext>, + body: TypedBody>, +) -> Result { + let sa = rqctx.context(); + sa.set_vpc_routes(body.into_inner()).await; + Ok(HttpResponseUpdatedNoContent()) +} diff --git a/sled-agent/src/sim/sled_agent.rs b/sled-agent/src/sim/sled_agent.rs index f47d8a9100..9cb146531b 100644 --- a/sled-agent/src/sim/sled_agent.rs +++ b/sled-agent/src/sim/sled_agent.rs @@ -36,7 +36,10 @@ use omicron_common::api::internal::nexus::{ use omicron_common::api::internal::nexus::{ InstanceRuntimeState, VmmRuntimeState, }; -use omicron_common::api::internal::shared::RackNetworkConfig; +use omicron_common::api::internal::shared::{ + RackNetworkConfig, ResolvedVpcRoute, ResolvedVpcRouteSet, + ResolvedVpcRouteState, RouterId, RouterKind, RouterVersion, +}; use omicron_common::disk::DiskIdentity; use omicron_uuid_kinds::{GenericUuid, InstanceUuid, PropolisUuid, ZpoolUuid}; use oxnet::Ipv6Net; @@ -77,6 +80,7 @@ pub struct SledAgent { Mutex>, PropolisClient)>>, /// lists of external IPs assigned to instances pub external_ips: Mutex>>, + pub vpc_routes: Mutex>, config: Config, fake_zones: Mutex, instance_ensure_state_error: Mutex>, @@ -182,6 +186,7 @@ impl SledAgent { disk_id_to_region_ids: Mutex::new(HashMap::new()), v2p_mappings: Mutex::new(HashSet::new()), external_ips: Mutex::new(HashMap::new()), + vpc_routes: Mutex::new(HashMap::new()), mock_propolis: Mutex::new(None), config: config.clone(), fake_zones: Mutex::new(OmicronZonesConfig { @@ -360,6 +365,18 @@ impl SledAgent { self.map_disk_ids_to_region_ids(&vcr).await?; } + let mut routes = self.vpc_routes.lock().await; + for nic in &hardware.nics { + let my_routers = [ + RouterId { vni: nic.vni, kind: RouterKind::System }, + RouterId { vni: nic.vni, kind: RouterKind::Custom(nic.subnet) }, + ]; + + for router in my_routers { + routes.entry(router).or_default(); + } + } + Ok(instance_run_time_state) } @@ -879,4 +896,49 @@ impl SledAgent { pub async fn drop_dataset(&self, zpool_id: ZpoolUuid, dataset_id: Uuid) { self.storage.lock().await.drop_dataset(zpool_id, dataset_id) } + + pub async fn list_vpc_routes(&self) -> Vec { + let routes = self.vpc_routes.lock().await; + routes + .iter() + .map(|(k, v)| ResolvedVpcRouteState { id: *k, version: v.version }) + .collect() + } + + pub async fn set_vpc_routes(&self, new_routes: Vec) { + let mut routes = self.vpc_routes.lock().await; + for new in new_routes { + // Disregard any route information for a subnet we don't have. + let Some(old) = routes.get(&new.id) else { + continue; + }; + + // We have to handle subnet router changes, as well as + // spurious updates from multiple Nexus instances. + // If there's a UUID match, only update if vers increased, + // otherwise take the update verbatim (including loss of version). + match (old.version, new.version) { + (Some(old_vers), Some(new_vers)) + if !old_vers.is_replaced_by(&new_vers) => + { + continue; + } + _ => {} + }; + + routes.insert( + new.id, + RouteSet { version: new.version, routes: new.routes }, + ); + } + } +} + +/// Stored routes (and usage count) for a given VPC/subnet. +// NB: We aren't doing post count tracking here to unsubscribe +// from (VNI, subnet) pairs. +#[derive(Debug, Clone, Default)] +pub struct RouteSet { + pub version: Option, + pub routes: HashSet, } diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index 993e5f6a94..7f05d55e60 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -49,7 +49,8 @@ use omicron_common::api::internal::nexus::{ SledInstanceState, VmmRuntimeState, }; use omicron_common::api::internal::shared::{ - HostPortConfig, RackNetworkConfig, + HostPortConfig, RackNetworkConfig, ResolvedVpcRouteSet, + ResolvedVpcRouteState, }; use omicron_common::api::{ internal::nexus::DiskRuntimeState, internal::nexus::InstanceRuntimeState, @@ -1096,6 +1097,17 @@ impl SledAgent { self.inner.bootstore.clone() } + pub fn list_vpc_routes(&self) -> Vec { + self.inner.port_manager.vpc_routes_list() + } + + pub fn set_vpc_routes( + &self, + routes: Vec, + ) -> Result<(), Error> { + self.inner.port_manager.vpc_routes_ensure(routes).map_err(Error::from) + } + /// Return the metric producer registry. pub fn metrics_registry(&self) -> &ProducerRegistry { self.inner.metrics_manager.registry() diff --git a/tools/maghemite_ddm_openapi_version b/tools/maghemite_ddm_openapi_version index 9a93e6b556..569d3d7813 100644 --- a/tools/maghemite_ddm_openapi_version +++ b/tools/maghemite_ddm_openapi_version @@ -1,2 +1,2 @@ -COMMIT="5630887d0373857f77cb264f84aa19bdec720ce3" -SHA2="004e873e4120aa26460271368485266b75b7f964e5ed4dbee8fb5db4519470d7" +COMMIT="3c3fa8482fe09a01da62fbd35efe124ea9cac9e7" +SHA2="007bfb717ccbc077c0250dee3121aeb0c5bb0d1c16795429a514fa4f8635a5ef" diff --git a/tools/maghemite_mg_openapi_version b/tools/maghemite_mg_openapi_version index 22918c581a..de64133971 100644 --- a/tools/maghemite_mg_openapi_version +++ b/tools/maghemite_mg_openapi_version @@ -1,2 +1,2 @@ -COMMIT="5630887d0373857f77cb264f84aa19bdec720ce3" -SHA2="fdb33ee7425923560534672264008ef8948d227afce948ab704de092ad72157c" +COMMIT="3c3fa8482fe09a01da62fbd35efe124ea9cac9e7" +SHA2="e4b42ab9daad90f0c561a830b62a9d17e294b4d0da0a6d44b4030929b0c37b7e" diff --git a/tools/maghemite_mgd_checksums b/tools/maghemite_mgd_checksums index d2ad05383d..f9d4fd4491 100644 --- a/tools/maghemite_mgd_checksums +++ b/tools/maghemite_mgd_checksums @@ -1,2 +1,2 @@ -CIDL_SHA256="6ae4bc3b332e91706c1c6633a7fc218aac65b7feff5643ee2dbbe79b841e0df3" -MGD_LINUX_SHA256="7930008cf8ce535a8b31043fc3edde0e825bd54d75f73234929bd0037ecc3a41" +CIDL_SHA256="51f446933f0d8c426b15ea0845b66664da9b9a129893d12b25d7912b52f07362" +MGD_LINUX_SHA256="736067394778cc4c38fecb1ca8647db3ca7ab1b5c4446f3ce2b5350379ba95b7" diff --git a/tools/opte_version b/tools/opte_version index 529b93110f..ff992938ae 100644 --- a/tools/opte_version +++ b/tools/opte_version @@ -1 +1 @@ -0.31.262 +0.32.265 diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index 0dca1a904e..7dfc9a1402 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -93,7 +93,7 @@ schemars = { version = "0.8.21", features = ["bytes", "chrono", "uuid1"] } scopeguard = { version = "1.2.0" } semver = { version = "1.0.23", features = ["serde"] } serde = { version = "1.0.203", features = ["alloc", "derive", "rc"] } -serde_json = { version = "1.0.117", features = ["raw_value", "unbounded_depth"] } +serde_json = { version = "1.0.118", features = ["raw_value", "unbounded_depth"] } sha2 = { version = "0.10.8", features = ["oid"] } similar = { version = "2.5.0", features = ["inline", "unicode"] } slog = { version = "2.7.0", features = ["dynamic-keys", "max_level_trace", "release_max_level_debug", "release_max_level_trace"] } @@ -197,7 +197,7 @@ schemars = { version = "0.8.21", features = ["bytes", "chrono", "uuid1"] } scopeguard = { version = "1.2.0" } semver = { version = "1.0.23", features = ["serde"] } serde = { version = "1.0.203", features = ["alloc", "derive", "rc"] } -serde_json = { version = "1.0.117", features = ["raw_value", "unbounded_depth"] } +serde_json = { version = "1.0.118", features = ["raw_value", "unbounded_depth"] } sha2 = { version = "0.10.8", features = ["oid"] } similar = { version = "2.5.0", features = ["inline", "unicode"] } slog = { version = "2.7.0", features = ["dynamic-keys", "max_level_trace", "release_max_level_debug", "release_max_level_trace"] } From 06743b0b3158db4360b775055279fc3b3c75212c Mon Sep 17 00:00:00 2001 From: David Crespo Date: Wed, 26 Jun 2024 15:48:30 -0500 Subject: [PATCH 12/13] Put max length in `Name` description (#5958) Closes #5940. This is hack around a current limitation of the docs site, namely that we do not show meta constraints like `maxLength` on types. Because the regex pattern for `Name` is gnarly and unreadable, we already rely on the description to give users a sense of the requirements. This change adds max length to that description. I updated everything but the Nexus OpenAPI schema manually with find/replace, so I will see from CI whether I got that right. image --- common/src/api/external/mod.rs | 4 ++-- common/tests/output/pagination-schema.txt | 4 ++-- openapi/bootstrap-agent.json | 4 ++-- openapi/nexus-internal.json | 4 ++-- openapi/nexus.json | 4 ++-- openapi/sled-agent.json | 2 +- openapi/wicketd.json | 2 +- schema/all-zones-requests.json | 2 +- schema/rss-service-plan-v2.json | 2 +- schema/rss-service-plan-v3.json | 2 +- schema/rss-sled-plan.json | 4 ++-- 11 files changed, 17 insertions(+), 17 deletions(-) diff --git a/common/src/api/external/mod.rs b/common/src/api/external/mod.rs index 58d45fecef..158b087a5c 100644 --- a/common/src/api/external/mod.rs +++ b/common/src/api/external/mod.rs @@ -305,8 +305,8 @@ impl JsonSchema for Name { "Names must begin with a lower case ASCII letter, be \ composed exclusively of lowercase ASCII, uppercase \ ASCII, numbers, and '-', and may not end with a '-'. \ - Names cannot be a UUID though they may contain a UUID." - .to_string(), + Names cannot be a UUID, but they may contain a UUID. \ + They can be at most 63 characters long.".to_string(), ), ..Default::default() })), diff --git a/common/tests/output/pagination-schema.txt b/common/tests/output/pagination-schema.txt index 436e614994..9da2fffc26 100644 --- a/common/tests/output/pagination-schema.txt +++ b/common/tests/output/pagination-schema.txt @@ -135,7 +135,7 @@ schema for pagination parameters: page selector, scan by name only "definitions": { "Name": { "title": "A name unique within the parent collection", - "description": "Names must begin with a lower case ASCII letter, be composed exclusively of lowercase ASCII, uppercase ASCII, numbers, and '-', and may not end with a '-'. Names cannot be a UUID though they may contain a UUID.", + "description": "Names must begin with a lower case ASCII letter, be composed exclusively of lowercase ASCII, uppercase ASCII, numbers, and '-', and may not end with a '-'. Names cannot be a UUID, but they may contain a UUID. They can be at most 63 characters long.", "type": "string", "maxLength": 63, "minLength": 1, @@ -224,7 +224,7 @@ schema for pagination parameters: page selector, scan by name or id "definitions": { "Name": { "title": "A name unique within the parent collection", - "description": "Names must begin with a lower case ASCII letter, be composed exclusively of lowercase ASCII, uppercase ASCII, numbers, and '-', and may not end with a '-'. Names cannot be a UUID though they may contain a UUID.", + "description": "Names must begin with a lower case ASCII letter, be composed exclusively of lowercase ASCII, uppercase ASCII, numbers, and '-', and may not end with a '-'. Names cannot be a UUID, but they may contain a UUID. They can be at most 63 characters long.", "type": "string", "maxLength": 63, "minLength": 1, diff --git a/openapi/bootstrap-agent.json b/openapi/bootstrap-agent.json index 6050939b94..370f0fb404 100644 --- a/openapi/bootstrap-agent.json +++ b/openapi/bootstrap-agent.json @@ -734,7 +734,7 @@ }, "Name": { "title": "A name unique within the parent collection", - "description": "Names must begin with a lower case ASCII letter, be composed exclusively of lowercase ASCII, uppercase ASCII, numbers, and '-', and may not end with a '-'. Names cannot be a UUID though they may contain a UUID.", + "description": "Names must begin with a lower case ASCII letter, be composed exclusively of lowercase ASCII, uppercase ASCII, numbers, and '-', and may not end with a '-'. Names cannot be a UUID, but they may contain a UUID. They can be at most 63 characters long.", "type": "string", "pattern": "^(?![0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$)^[a-z]([a-zA-Z0-9-]*[a-zA-Z0-9]+)?$", "minLength": 1, @@ -1254,7 +1254,7 @@ ] }, "UserId": { - "description": "Names must begin with a lower case ASCII letter, be composed exclusively of lowercase ASCII, uppercase ASCII, numbers, and '-', and may not end with a '-'. Names cannot be a UUID though they may contain a UUID.\n\n
JSON schema\n\n```json { \"title\": \"A name unique within the parent collection\", \"description\": \"Names must begin with a lower case ASCII letter, be composed exclusively of lowercase ASCII, uppercase ASCII, numbers, and '-', and may not end with a '-'. Names cannot be a UUID though they may contain a UUID.\", \"type\": \"string\", \"maxLength\": 63, \"minLength\": 1, \"pattern\": \"^(?![0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$)^[a-z]([a-zA-Z0-9-]*[a-zA-Z0-9]+)?$\" } ```
", + "description": "Names must begin with a lower case ASCII letter, be composed exclusively of lowercase ASCII, uppercase ASCII, numbers, and '-', and may not end with a '-'. Names cannot be a UUID, but they may contain a UUID. They can be at most 63 characters long.\n\n
JSON schema\n\n```json { \"title\": \"A name unique within the parent collection\", \"description\": \"Names must begin with a lower case ASCII letter, be composed exclusively of lowercase ASCII, uppercase ASCII, numbers, and '-', and may not end with a '-'. Names cannot be a UUID, but they may contain a UUID. They can be at most 63 characters long.\", \"type\": \"string\", \"maxLength\": 63, \"minLength\": 1, \"pattern\": \"^(?![0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$)^[a-z]([a-zA-Z0-9-]*[a-zA-Z0-9]+)?$\" } ```
", "type": "string" } }, diff --git a/openapi/nexus-internal.json b/openapi/nexus-internal.json index 72731e83e8..d479e2e20c 100644 --- a/openapi/nexus-internal.json +++ b/openapi/nexus-internal.json @@ -3526,7 +3526,7 @@ }, "Name": { "title": "A name unique within the parent collection", - "description": "Names must begin with a lower case ASCII letter, be composed exclusively of lowercase ASCII, uppercase ASCII, numbers, and '-', and may not end with a '-'. Names cannot be a UUID though they may contain a UUID.", + "description": "Names must begin with a lower case ASCII letter, be composed exclusively of lowercase ASCII, uppercase ASCII, numbers, and '-', and may not end with a '-'. Names cannot be a UUID, but they may contain a UUID. They can be at most 63 characters long.", "type": "string", "pattern": "^(?![0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$)^[a-z]([a-zA-Z0-9-]*[a-zA-Z0-9]+)?$", "minLength": 1, @@ -5031,7 +5031,7 @@ }, "UserId": { "title": "A name unique within the parent collection", - "description": "Names must begin with a lower case ASCII letter, be composed exclusively of lowercase ASCII, uppercase ASCII, numbers, and '-', and may not end with a '-'. Names cannot be a UUID though they may contain a UUID.", + "description": "Names must begin with a lower case ASCII letter, be composed exclusively of lowercase ASCII, uppercase ASCII, numbers, and '-', and may not end with a '-'. Names cannot be a UUID, but they may contain a UUID. They can be at most 63 characters long.", "type": "string", "pattern": "^(?![0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$)^[a-z]([a-zA-Z0-9-]*[a-zA-Z0-9]+)?$", "minLength": 1, diff --git a/openapi/nexus.json b/openapi/nexus.json index f4cd1ce89e..598f0453ff 100644 --- a/openapi/nexus.json +++ b/openapi/nexus.json @@ -15786,7 +15786,7 @@ }, "Name": { "title": "A name unique within the parent collection", - "description": "Names must begin with a lower case ASCII letter, be composed exclusively of lowercase ASCII, uppercase ASCII, numbers, and '-', and may not end with a '-'. Names cannot be a UUID though they may contain a UUID.", + "description": "Names must begin with a lower case ASCII letter, be composed exclusively of lowercase ASCII, uppercase ASCII, numbers, and '-', and may not end with a '-'. Names cannot be a UUID, but they may contain a UUID. They can be at most 63 characters long.", "type": "string", "pattern": "^(?![0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$)^[a-z]([a-zA-Z0-9-]*[a-zA-Z0-9]+)?$", "minLength": 1, @@ -18868,7 +18868,7 @@ }, "UserId": { "title": "A name unique within the parent collection", - "description": "Names must begin with a lower case ASCII letter, be composed exclusively of lowercase ASCII, uppercase ASCII, numbers, and '-', and may not end with a '-'. Names cannot be a UUID though they may contain a UUID.", + "description": "Names must begin with a lower case ASCII letter, be composed exclusively of lowercase ASCII, uppercase ASCII, numbers, and '-', and may not end with a '-'. Names cannot be a UUID, but they may contain a UUID. They can be at most 63 characters long.", "type": "string", "pattern": "^(?![0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$)^[a-z]([a-zA-Z0-9-]*[a-zA-Z0-9]+)?$", "minLength": 1, diff --git a/openapi/sled-agent.json b/openapi/sled-agent.json index 3ac130c565..aa5163a964 100644 --- a/openapi/sled-agent.json +++ b/openapi/sled-agent.json @@ -3559,7 +3559,7 @@ }, "Name": { "title": "A name unique within the parent collection", - "description": "Names must begin with a lower case ASCII letter, be composed exclusively of lowercase ASCII, uppercase ASCII, numbers, and '-', and may not end with a '-'. Names cannot be a UUID though they may contain a UUID.", + "description": "Names must begin with a lower case ASCII letter, be composed exclusively of lowercase ASCII, uppercase ASCII, numbers, and '-', and may not end with a '-'. Names cannot be a UUID, but they may contain a UUID. They can be at most 63 characters long.", "type": "string", "pattern": "^(?![0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$)^[a-z]([a-zA-Z0-9-]*[a-zA-Z0-9]+)?$", "minLength": 1, diff --git a/openapi/wicketd.json b/openapi/wicketd.json index 555b8cf44c..7d50a38268 100644 --- a/openapi/wicketd.json +++ b/openapi/wicketd.json @@ -1774,7 +1774,7 @@ }, "Name": { "title": "A name unique within the parent collection", - "description": "Names must begin with a lower case ASCII letter, be composed exclusively of lowercase ASCII, uppercase ASCII, numbers, and '-', and may not end with a '-'. Names cannot be a UUID though they may contain a UUID.", + "description": "Names must begin with a lower case ASCII letter, be composed exclusively of lowercase ASCII, uppercase ASCII, numbers, and '-', and may not end with a '-'. Names cannot be a UUID, but they may contain a UUID. They can be at most 63 characters long.", "type": "string", "pattern": "^(?![0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$)^[a-z]([a-zA-Z0-9-]*[a-zA-Z0-9]+)?$", "minLength": 1, diff --git a/schema/all-zones-requests.json b/schema/all-zones-requests.json index 526e41376f..210f1df2f9 100644 --- a/schema/all-zones-requests.json +++ b/schema/all-zones-requests.json @@ -105,7 +105,7 @@ }, "Name": { "title": "A name unique within the parent collection", - "description": "Names must begin with a lower case ASCII letter, be composed exclusively of lowercase ASCII, uppercase ASCII, numbers, and '-', and may not end with a '-'. Names cannot be a UUID though they may contain a UUID.", + "description": "Names must begin with a lower case ASCII letter, be composed exclusively of lowercase ASCII, uppercase ASCII, numbers, and '-', and may not end with a '-'. Names cannot be a UUID, but they may contain a UUID. They can be at most 63 characters long.", "type": "string", "maxLength": 63, "minLength": 1, diff --git a/schema/rss-service-plan-v2.json b/schema/rss-service-plan-v2.json index ee0b21af81..e5aba43040 100644 --- a/schema/rss-service-plan-v2.json +++ b/schema/rss-service-plan-v2.json @@ -178,7 +178,7 @@ }, "Name": { "title": "A name unique within the parent collection", - "description": "Names must begin with a lower case ASCII letter, be composed exclusively of lowercase ASCII, uppercase ASCII, numbers, and '-', and may not end with a '-'. Names cannot be a UUID though they may contain a UUID.", + "description": "Names must begin with a lower case ASCII letter, be composed exclusively of lowercase ASCII, uppercase ASCII, numbers, and '-', and may not end with a '-'. Names cannot be a UUID, but they may contain a UUID. They can be at most 63 characters long.", "type": "string", "maxLength": 63, "minLength": 1, diff --git a/schema/rss-service-plan-v3.json b/schema/rss-service-plan-v3.json index d1540ca351..9348774c35 100644 --- a/schema/rss-service-plan-v3.json +++ b/schema/rss-service-plan-v3.json @@ -219,7 +219,7 @@ }, "Name": { "title": "A name unique within the parent collection", - "description": "Names must begin with a lower case ASCII letter, be composed exclusively of lowercase ASCII, uppercase ASCII, numbers, and '-', and may not end with a '-'. Names cannot be a UUID though they may contain a UUID.", + "description": "Names must begin with a lower case ASCII letter, be composed exclusively of lowercase ASCII, uppercase ASCII, numbers, and '-', and may not end with a '-'. Names cannot be a UUID, but they may contain a UUID. They can be at most 63 characters long.", "type": "string", "maxLength": 63, "minLength": 1, diff --git a/schema/rss-sled-plan.json b/schema/rss-sled-plan.json index a42a22efd8..cb3c5c8eeb 100644 --- a/schema/rss-sled-plan.json +++ b/schema/rss-sled-plan.json @@ -606,7 +606,7 @@ }, "Name": { "title": "A name unique within the parent collection", - "description": "Names must begin with a lower case ASCII letter, be composed exclusively of lowercase ASCII, uppercase ASCII, numbers, and '-', and may not end with a '-'. Names cannot be a UUID though they may contain a UUID.", + "description": "Names must begin with a lower case ASCII letter, be composed exclusively of lowercase ASCII, uppercase ASCII, numbers, and '-', and may not end with a '-'. Names cannot be a UUID, but they may contain a UUID. They can be at most 63 characters long.", "type": "string", "maxLength": 63, "minLength": 1, @@ -1017,7 +1017,7 @@ } }, "UserId": { - "description": "Names must begin with a lower case ASCII letter, be composed exclusively of lowercase ASCII, uppercase ASCII, numbers, and '-', and may not end with a '-'. Names cannot be a UUID though they may contain a UUID.\n\n
JSON schema\n\n```json { \"title\": \"A name unique within the parent collection\", \"description\": \"Names must begin with a lower case ASCII letter, be composed exclusively of lowercase ASCII, uppercase ASCII, numbers, and '-', and may not end with a '-'. Names cannot be a UUID though they may contain a UUID.\", \"type\": \"string\", \"maxLength\": 63, \"minLength\": 1, \"pattern\": \"^(?![0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$)^[a-z]([a-zA-Z0-9-]*[a-zA-Z0-9]+)?$\" } ```
", + "description": "Names must begin with a lower case ASCII letter, be composed exclusively of lowercase ASCII, uppercase ASCII, numbers, and '-', and may not end with a '-'. Names cannot be a UUID, but they may contain a UUID. They can be at most 63 characters long.\n\n
JSON schema\n\n```json { \"title\": \"A name unique within the parent collection\", \"description\": \"Names must begin with a lower case ASCII letter, be composed exclusively of lowercase ASCII, uppercase ASCII, numbers, and '-', and may not end with a '-'. Names cannot be a UUID, but they may contain a UUID. They can be at most 63 characters long.\", \"type\": \"string\", \"maxLength\": 63, \"minLength\": 1, \"pattern\": \"^(?![0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$)^[a-z]([a-zA-Z0-9-]*[a-zA-Z0-9]+)?$\" } ```
", "type": "string" } } From 97fe552507bf5cdfaaa85a3256e337df5b5b05c4 Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Wed, 26 Jun 2024 23:23:32 +0100 Subject: [PATCH 13/13] VPC Subnet Routing [2/2] -- Custom Routers and NIC 'transit IP' lists (#5823) This PR builds on #5777 to provide the Custom routers for subnets as described in RFD21. This entails a few things: * We remove the `unpublished = true` tag from the user API for VPC routers and routes. * Custom routers may be attached/detached to a VPC subnet using the `custom_router` field in subnet `POST` and `PUT` requests. * NICs now individually have a `transit_ips` list, which denotes an additional set of CIDR blocks that a NIC is allowed to send and receive traffic on. This is set during `POST` and/or `PUT` on instances which are stopped. This is a key feature to enable software routing by instances, as today's default behaviour drops any packets not matching an assigned IP for an instance. * I suspect there will be some discussion over the shape of this API, so there isn't yet test coverage here until we know we're happy with it. * Revisited which router routes can be created by users, e.g., better validation on v4/v6 dest/target pairs. There are some allowances around currently non-existent features: * **Internet Gateways.** We allow unlimited use of one pseudo-gateway, `inetgw:outbound`, which appears in our existing rules. Using this target sends packets upstream as it does today. * **VPC peering.** VPCs as destinations/targets are currently disallowed in router routes. Closes #2116. --- Cargo.lock | 1 + common/src/api/external/mod.rs | 10 +- common/src/api/internal/shared.rs | 2 + illumos-utils/src/opte/port_manager.rs | 35 + nexus/db-model/src/collection.rs | 4 + nexus/db-model/src/network_interface.rs | 26 +- nexus/db-model/src/omicron_zone_config.rs | 1 + nexus/db-model/src/schema.rs | 2 + nexus/db-model/src/schema_versions.rs | 3 +- nexus/db-model/src/vpc_router.rs | 18 +- nexus/db-queries/src/db/collection_attach.rs | 26 +- .../src/db/datastore/network_interface.rs | 3 + nexus/db-queries/src/db/datastore/rack.rs | 9 + nexus/db-queries/src/db/datastore/vpc.rs | 82 +- .../execution/src/external_networking.rs | 3 + .../planning/src/blueprint_builder/builder.rs | 1 + .../output/planner_nonprovisionable_2_2a.txt | 1 + nexus/src/app/vpc_router.rs | 71 +- nexus/src/app/vpc_subnet.rs | 93 +- nexus/src/external_api/http_entrypoints.rs | 10 - nexus/test-utils/Cargo.toml | 1 + nexus/test-utils/src/lib.rs | 2 + nexus/test-utils/src/resource_helpers.rs | 104 + nexus/tests/integration_tests/endpoints.rs | 3 + nexus/tests/integration_tests/instances.rs | 10 +- .../tests/integration_tests/router_routes.rs | 389 +++- .../integration_tests/subnet_allocation.rs | 1 + nexus/tests/integration_tests/vpc_routers.rs | 509 ++++- nexus/tests/integration_tests/vpc_subnets.rs | 5 + nexus/tests/output/nexus_tags.txt | 10 + .../output/unexpected-authz-endpoints.txt | 10 - nexus/types/src/external_api/params.rs | 21 + openapi/nexus-internal.json | 7 + openapi/nexus.json | 1780 ++++++++++++++--- openapi/sled-agent.json | 7 + schema/all-zones-requests.json | 7 + schema/crdb/dbinit.sql | 14 +- schema/crdb/nic-spoof-allow/up01.sql | 2 + schema/crdb/nic-spoof-allow/up02.sql | 1 + schema/crdb/nic-spoof-allow/up03.sql | 20 + schema/rss-service-plan-v3.json | 7 + sled-agent/src/rack_setup/plan/service.rs | 3 + sled-agent/src/sim/server.rs | 2 + 43 files changed, 2902 insertions(+), 414 deletions(-) create mode 100644 schema/crdb/nic-spoof-allow/up01.sql create mode 100644 schema/crdb/nic-spoof-allow/up02.sql create mode 100644 schema/crdb/nic-spoof-allow/up03.sql diff --git a/Cargo.lock b/Cargo.lock index 30c6f861ad..b91310a1ee 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4917,6 +4917,7 @@ dependencies = [ "oximeter", "oximeter-collector", "oximeter-producer", + "oxnet", "serde", "serde_json", "serde_urlencoded", diff --git a/common/src/api/external/mod.rs b/common/src/api/external/mod.rs index 158b087a5c..2397cd15f8 100644 --- a/common/src/api/external/mod.rs +++ b/common/src/api/external/mod.rs @@ -1407,14 +1407,13 @@ pub struct RouterRoute { /// common identifying metadata #[serde(flatten)] pub identity: IdentityMetadata, - /// The ID of the VPC Router to which the route belongs pub vpc_router_id: Uuid, - /// Describes the kind of router. Set at creation. `read-only` pub kind: RouterRouteKind, - + /// The location that matched packets should be forwarded to. pub target: RouteTarget, + /// Selects which traffic this routing rule will apply to. pub destination: RouteDestination, } @@ -1979,6 +1978,11 @@ pub struct InstanceNetworkInterface { /// True if this interface is the primary for the instance to which it's /// attached. pub primary: bool, + + /// A set of additional networks that this interface may send and + /// receive traffic on. + #[serde(default)] + pub transit_ips: Vec, } #[derive( diff --git a/common/src/api/internal/shared.rs b/common/src/api/internal/shared.rs index 090b3c3058..884b4dc165 100644 --- a/common/src/api/internal/shared.rs +++ b/common/src/api/internal/shared.rs @@ -57,6 +57,8 @@ pub struct NetworkInterface { pub vni: Vni, pub primary: bool, pub slot: u8, + #[serde(default)] + pub transit_ips: Vec, } /// An IP address and port range used for source NAT, i.e., making diff --git a/illumos-utils/src/opte/port_manager.rs b/illumos-utils/src/opte/port_manager.rs index caeda81217..984e3c55fa 100644 --- a/illumos-utils/src/opte/port_manager.rs +++ b/illumos-utils/src/opte/port_manager.rs @@ -440,6 +440,41 @@ impl PortManager { } } + // If there are any transit IPs set, allow them through. + // TODO: Currently set only in initial state. + // This, external IPs, and cfg'able state + // (DHCP?) are probably worth being managed by an RPW. + for block in &nic.transit_ips { + #[cfg(target_os = "illumos")] + { + use oxide_vpc::api::Direction; + + // In principle if this were an operation on an existing + // port, we would explicitly undo the In addition if the + // Out addition fails. + // However, failure here will just destroy the port + // outright -- this should only happen if an excessive + // number of rules are specified. + hdl.allow_cidr( + &port_name, + super::net_to_cidr(*block), + Direction::In, + )?; + hdl.allow_cidr( + &port_name, + super::net_to_cidr(*block), + Direction::Out, + )?; + } + + debug!( + self.inner.log, + "Added CIDR to in/out allowlist"; + "port_name" => &port_name, + "cidr" => ?block, + ); + } + info!( self.inner.log, "Created OPTE port"; diff --git a/nexus/db-model/src/collection.rs b/nexus/db-model/src/collection.rs index b86e35d407..964aaad248 100644 --- a/nexus/db-model/src/collection.rs +++ b/nexus/db-model/src/collection.rs @@ -152,4 +152,8 @@ pub trait DatastoreAttachTargetConfig: type ResourceTimeDeletedColumn: Column::Table> + Default + ExpressionMethods; + + /// Controls whether a resource may be attached to a new collection without + /// first being explicitly detached from the previous one + const ALLOW_FROM_ATTACHED: bool = false; } diff --git a/nexus/db-model/src/network_interface.rs b/nexus/db-model/src/network_interface.rs index 6d347ecd37..79b16b5658 100644 --- a/nexus/db-model/src/network_interface.rs +++ b/nexus/db-model/src/network_interface.rs @@ -13,6 +13,7 @@ use chrono::DateTime; use chrono::Utc; use db_macros::Resource; use diesel::AsChangeset; +use ipnetwork::IpNetwork; use ipnetwork::NetworkSize; use nexus_types::external_api::params; use nexus_types::identity::Resource; @@ -64,11 +65,13 @@ pub struct NetworkInterface { // // If user requests an address of either kind, give exactly that and not the other. // If neither is specified, auto-assign one of each? - pub ip: ipnetwork::IpNetwork, + pub ip: IpNetwork, pub slot: SqlU8, #[diesel(column_name = is_primary)] pub primary: bool, + + pub transit_ips: Vec, } impl NetworkInterface { @@ -102,6 +105,7 @@ impl NetworkInterface { vni: external::Vni::try_from(0).unwrap(), primary: self.primary, slot: *self.slot, + transit_ips: self.transit_ips.into_iter().map(Into::into).collect(), } } } @@ -122,11 +126,13 @@ pub struct InstanceNetworkInterface { pub subnet_id: Uuid, pub mac: MacAddr, - pub ip: ipnetwork::IpNetwork, + pub ip: IpNetwork, pub slot: SqlU8, #[diesel(column_name = is_primary)] pub primary: bool, + + pub transit_ips: Vec, } /// Service Network Interface DB model. @@ -145,7 +151,7 @@ pub struct ServiceNetworkInterface { pub subnet_id: Uuid, pub mac: MacAddr, - pub ip: ipnetwork::IpNetwork, + pub ip: IpNetwork, pub slot: SqlU8, #[diesel(column_name = is_primary)] @@ -242,6 +248,7 @@ impl NetworkInterface { ip: self.ip, slot: self.slot, primary: self.primary, + transit_ips: self.transit_ips, } } @@ -290,6 +297,7 @@ impl From for NetworkInterface { ip: iface.ip, slot: iface.slot, primary: iface.primary, + transit_ips: iface.transit_ips, } } } @@ -313,6 +321,7 @@ impl From for NetworkInterface { ip: iface.ip, slot: iface.slot, primary: iface.primary, + transit_ips: vec![], } } } @@ -460,6 +469,7 @@ pub struct NetworkInterfaceUpdate { pub time_modified: DateTime, #[diesel(column_name = is_primary)] pub primary: Option, + pub transit_ips: Vec, } impl From for external::InstanceNetworkInterface { @@ -472,6 +482,11 @@ impl From for external::InstanceNetworkInterface { ip: iface.ip.ip(), mac: *iface.mac, primary: iface.primary, + transit_ips: iface + .transit_ips + .into_iter() + .map(Into::into) + .collect(), } } } @@ -484,6 +499,11 @@ impl From for NetworkInterfaceUpdate { description: params.identity.description, time_modified: Utc::now(), primary, + transit_ips: params + .transit_ips + .into_iter() + .map(Into::into) + .collect(), } } } diff --git a/nexus/db-model/src/omicron_zone_config.rs b/nexus/db-model/src/omicron_zone_config.rs index c2258dba6c..3b18a749a7 100644 --- a/nexus/db-model/src/omicron_zone_config.rs +++ b/nexus/db-model/src/omicron_zone_config.rs @@ -659,6 +659,7 @@ impl OmicronZoneNic { vni: omicron_common::api::external::Vni::try_from(*self.vni) .context("parsing VNI")?, subnet: self.subnet.into(), + transit_ips: vec![], }) } } diff --git a/nexus/db-model/src/schema.rs b/nexus/db-model/src/schema.rs index 72d01f094b..c716c930e3 100644 --- a/nexus/db-model/src/schema.rs +++ b/nexus/db-model/src/schema.rs @@ -511,6 +511,7 @@ table! { ip -> Inet, slot -> Int2, is_primary -> Bool, + transit_ips -> Array, } } @@ -529,6 +530,7 @@ table! { ip -> Inet, slot -> Int2, is_primary -> Bool, + transit_ips -> Array, } } joinable!(instance_network_interface -> instance (instance_id)); diff --git a/nexus/db-model/src/schema_versions.rs b/nexus/db-model/src/schema_versions.rs index ec94221496..0ca16498ba 100644 --- a/nexus/db-model/src/schema_versions.rs +++ b/nexus/db-model/src/schema_versions.rs @@ -17,7 +17,7 @@ use std::collections::BTreeMap; /// /// This must be updated when you change the database schema. Refer to /// schema/crdb/README.adoc in the root of this repository for details. -pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(78, 0, 0); +pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(79, 0, 0); /// List of all past database schema versions, in *reverse* order /// @@ -29,6 +29,7 @@ static KNOWN_VERSIONS: Lazy> = Lazy::new(|| { // | leaving the first copy as an example for the next person. // v // KnownVersion::new(next_int, "unique-dirname-with-the-sql-files"), + KnownVersion::new(79, "nic-spoof-allow"), KnownVersion::new(78, "vpc-subnet-routing"), KnownVersion::new(77, "remove-view-for-v2p-mappings"), KnownVersion::new(76, "lookup-region-snapshot-by-snapshot-id"), diff --git a/nexus/db-model/src/vpc_router.rs b/nexus/db-model/src/vpc_router.rs index 51409c38d5..ee8988ae69 100644 --- a/nexus/db-model/src/vpc_router.rs +++ b/nexus/db-model/src/vpc_router.rs @@ -4,7 +4,8 @@ use super::{impl_enum_type, Generation, Name, RouterRoute}; use crate::collection::DatastoreCollectionConfig; -use crate::schema::{router_route, vpc_router}; +use crate::schema::{router_route, vpc_router, vpc_subnet}; +use crate::{DatastoreAttachTargetConfig, VpcSubnet}; use chrono::{DateTime, Utc}; use db_macros::Resource; use nexus_types::external_api::params; @@ -41,8 +42,8 @@ pub struct VpcRouter { #[diesel(embed)] identity: VpcRouterIdentity, - pub vpc_id: Uuid, pub kind: VpcRouterKind, + pub vpc_id: Uuid, pub rcgen: Generation, pub resolved_version: i64, } @@ -99,3 +100,16 @@ impl From for VpcRouterUpdate { } } } + +impl DatastoreAttachTargetConfig for VpcRouter { + type Id = Uuid; + + type CollectionIdColumn = vpc_router::dsl::id; + type CollectionTimeDeletedColumn = vpc_router::dsl::time_deleted; + + type ResourceIdColumn = vpc_subnet::dsl::id; + type ResourceCollectionIdColumn = vpc_subnet::dsl::custom_router_id; + type ResourceTimeDeletedColumn = vpc_subnet::dsl::time_deleted; + + const ALLOW_FROM_ATTACHED: bool = true; +} diff --git a/nexus/db-queries/src/db/collection_attach.rs b/nexus/db-queries/src/db/collection_attach.rs index fccc1aa324..95e6afeb4b 100644 --- a/nexus/db-queries/src/db/collection_attach.rs +++ b/nexus/db-queries/src/db/collection_attach.rs @@ -232,12 +232,26 @@ pub trait DatastoreAttachTarget: .filter(collection_table().primary_key().eq(collection_id)) .filter(Self::CollectionTimeDeletedColumn::default().is_null()), ); - let resource_query = Box::new( - resource_query - .filter(resource_table().primary_key().eq(resource_id)) - .filter(Self::ResourceTimeDeletedColumn::default().is_null()) - .filter(Self::ResourceCollectionIdColumn::default().is_null()), - ); + let resource_query = if Self::ALLOW_FROM_ATTACHED { + Box::new( + resource_query + .filter(resource_table().primary_key().eq(resource_id)) + .filter( + Self::ResourceTimeDeletedColumn::default().is_null(), + ), + ) + } else { + Box::new( + resource_query + .filter(resource_table().primary_key().eq(resource_id)) + .filter( + Self::ResourceTimeDeletedColumn::default().is_null(), + ) + .filter( + Self::ResourceCollectionIdColumn::default().is_null(), + ), + ) + }; let update_resource_statement = update .into_boxed() diff --git a/nexus/db-queries/src/db/datastore/network_interface.rs b/nexus/db-queries/src/db/datastore/network_interface.rs index a9b406becf..c5a8992cd2 100644 --- a/nexus/db-queries/src/db/datastore/network_interface.rs +++ b/nexus/db-queries/src/db/datastore/network_interface.rs @@ -60,6 +60,7 @@ struct NicInfo { vni: db::model::Vni, primary: bool, slot: i16, + transit_ips: Vec, } impl From for omicron_common::api::internal::shared::NetworkInterface { @@ -92,6 +93,7 @@ impl From for omicron_common::api::internal::shared::NetworkInterface { vni: nic.vni.0, primary: nic.primary, slot: u8::try_from(nic.slot).unwrap(), + transit_ips: nic.transit_ips.iter().map(|v| (*v).into()).collect(), } } } @@ -502,6 +504,7 @@ impl DataStore { vpc::vni, network_interface::is_primary, network_interface::slot, + network_interface::transit_ips, )) .get_results_async::( &*self.pool_connection_authorized(opctx).await?, diff --git a/nexus/db-queries/src/db/datastore/rack.rs b/nexus/db-queries/src/db/datastore/rack.rs index 4af6bf7263..627f1f60ab 100644 --- a/nexus/db-queries/src/db/datastore/rack.rs +++ b/nexus/db-queries/src/db/datastore/rack.rs @@ -1390,6 +1390,7 @@ mod test { vni: Vni::SERVICES_VNI, primary: true, slot: 0, + transit_ips: vec![], }, }, ), @@ -1416,6 +1417,7 @@ mod test { vni: Vni::SERVICES_VNI, primary: true, slot: 0, + transit_ips: vec![], }, external_ip: OmicronZoneExternalSnatIp { id: ExternalIpUuid::new_v4(), @@ -1462,6 +1464,7 @@ mod test { vni: Vni::SERVICES_VNI, primary: true, slot: 0, + transit_ips: vec![], }, }, ), @@ -1488,6 +1491,7 @@ mod test { vni: Vni::SERVICES_VNI, primary: true, slot: 0, + transit_ips: vec![], }, external_ip: OmicronZoneExternalSnatIp { id: ExternalIpUuid::new_v4(), @@ -1715,6 +1719,7 @@ mod test { vni: Vni::SERVICES_VNI, primary: true, slot: 0, + transit_ips: vec![], }, }, ), @@ -1746,6 +1751,7 @@ mod test { vni: Vni::SERVICES_VNI, primary: true, slot: 0, + transit_ips: vec![], }, }, ), @@ -1984,6 +1990,7 @@ mod test { vni: Vni::SERVICES_VNI, primary: true, slot: 0, + transit_ips: vec![], }, }, ), @@ -2089,6 +2096,7 @@ mod test { vni: Vni::SERVICES_VNI, primary: true, slot: 0, + transit_ips: vec![], }, }, ), @@ -2120,6 +2128,7 @@ mod test { vni: Vni::SERVICES_VNI, primary: true, slot: 0, + transit_ips: vec![], }, }, ), diff --git a/nexus/db-queries/src/db/datastore/vpc.rs b/nexus/db-queries/src/db/datastore/vpc.rs index 89ee1c468e..fdb9c82fb5 100644 --- a/nexus/db-queries/src/db/datastore/vpc.rs +++ b/nexus/db-queries/src/db/datastore/vpc.rs @@ -9,6 +9,8 @@ use super::SQL_BATCH_SIZE; use crate::authz; use crate::context::OpContext; use crate::db; +use crate::db::collection_attach::AttachError; +use crate::db::collection_attach::DatastoreAttachTarget; use crate::db::collection_insert::AsyncInsertError; use crate::db::collection_insert::DatastoreCollection; use crate::db::error::public_error_from_diesel; @@ -936,6 +938,81 @@ impl DataStore { Ok(out) } + pub async fn vpc_subnet_set_custom_router( + &self, + opctx: &OpContext, + authz_subnet: &authz::VpcSubnet, + authz_router: &authz::VpcRouter, + ) -> Result { + opctx.authorize(authz::Action::Modify, authz_subnet).await?; + opctx.authorize(authz::Action::Read, authz_router).await?; + + use db::schema::vpc_router::dsl as router_dsl; + use db::schema::vpc_subnet::dsl as subnet_dsl; + + let query = VpcRouter::attach_resource( + authz_router.id(), + authz_subnet.id(), + router_dsl::vpc_router + .into_boxed() + .filter(router_dsl::kind.eq(VpcRouterKind::Custom)), + subnet_dsl::vpc_subnet.into_boxed(), + u32::MAX, + diesel::update(subnet_dsl::vpc_subnet).set(( + subnet_dsl::time_modified.eq(Utc::now()), + subnet_dsl::custom_router_id.eq(authz_router.id()), + )), + ); + + query + .attach_and_get_result_async( + &*self.pool_connection_authorized(opctx).await?, + ) + .await + .map(|(_, resource)| resource) + .map_err(|e| match e { + AttachError::CollectionNotFound => Error::not_found_by_id( + ResourceType::VpcRouter, + &authz_router.id(), + ), + AttachError::ResourceNotFound => Error::not_found_by_id( + ResourceType::VpcSubnet, + &authz_subnet.id(), + ), + // The only other failure reason can be an attempt to use a system router. + AttachError::NoUpdate { .. } => Error::invalid_request( + "cannot attach a system router to a VPC subnet", + ), + AttachError::DatabaseError(e) => { + public_error_from_diesel(e, ErrorHandler::Server) + } + }) + } + + pub async fn vpc_subnet_unset_custom_router( + &self, + opctx: &OpContext, + authz_subnet: &authz::VpcSubnet, + ) -> Result { + opctx.authorize(authz::Action::Modify, authz_subnet).await?; + + use db::schema::vpc_subnet::dsl; + + diesel::update(dsl::vpc_subnet) + .filter(dsl::time_deleted.is_null()) + .filter(dsl::id.eq(authz_subnet.id())) + .set(dsl::custom_router_id.eq(Option::::None)) + .returning(VpcSubnet::as_returning()) + .get_result_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map_err(|e| { + public_error_from_diesel( + e, + ErrorHandler::NotFoundByResource(authz_subnet), + ) + }) + } + pub async fn subnet_list_instance_network_interfaces( &self, opctx: &OpContext, @@ -1065,7 +1142,10 @@ impl DataStore { .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; // Unlink all subnets from this router. - // XXX: We might this want to error out before the delete fires. + // This will temporarily leave some hanging subnet attachments. + // `vpc_get_active_custom_routers` will join and then filter, + // so such rows will be treated as though they have no custom router + // by the RPW. use db::schema::vpc_subnet::dsl as vpc; diesel::update(vpc::vpc_subnet) .filter(vpc::time_deleted.is_null()) diff --git a/nexus/reconfigurator/execution/src/external_networking.rs b/nexus/reconfigurator/execution/src/external_networking.rs index 13cf601135..3ac1de96d5 100644 --- a/nexus/reconfigurator/execution/src/external_networking.rs +++ b/nexus/reconfigurator/execution/src/external_networking.rs @@ -499,6 +499,7 @@ mod tests { vni: Vni::SERVICES_VNI, primary: true, slot: 0, + transit_ips: vec![], }; let dns_id = OmicronZoneUuid::new_v4(); @@ -524,6 +525,7 @@ mod tests { vni: Vni::SERVICES_VNI, primary: true, slot: 0, + transit_ips: vec![], }; // Boundary NTP: @@ -552,6 +554,7 @@ mod tests { vni: Vni::SERVICES_VNI, primary: true, slot: 0, + transit_ips: vec![], }; Self { diff --git a/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs b/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs index c93781a073..1609a6c0cd 100644 --- a/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs +++ b/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs @@ -828,6 +828,7 @@ impl<'a> BlueprintBuilder<'a> { vni: Vni::SERVICES_VNI, primary: true, slot: 0, + transit_ips: vec![], } }; diff --git a/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_2_2a.txt b/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_2_2a.txt index 4d366f849c..837cc56553 100644 --- a/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_2_2a.txt +++ b/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_2_2a.txt @@ -206,6 +206,7 @@ ERRORS: ), primary: true, slot: 0, + transit_ips: [], }, external_tls: false, external_dns_servers: [], diff --git a/nexus/src/app/vpc_router.rs b/nexus/src/app/vpc_router.rs index 40b4c1de0f..fdc834a14c 100644 --- a/nexus/src/app/vpc_router.rs +++ b/nexus/src/app/vpc_router.rs @@ -20,8 +20,12 @@ use omicron_common::api::external::Error; use omicron_common::api::external::ListResultVec; use omicron_common::api::external::LookupResult; use omicron_common::api::external::NameOrId; +use omicron_common::api::external::RouteDestination; +use omicron_common::api::external::RouteTarget; use omicron_common::api::external::RouterRouteKind; use omicron_common::api::external::UpdateResult; +use oxnet::IpNet; +use std::net::IpAddr; use uuid::Uuid; impl super::Nexus { @@ -130,7 +134,7 @@ impl super::Nexus { // router kind cannot be changed, but it might be able to save us a // database round-trip. if db_router.kind == VpcRouterKind::System { - return Err(Error::invalid_request("Cannot delete system router")); + return Err(Error::invalid_request("cannot delete system router")); } let out = self.db_datastore.vpc_delete_router(opctx, &authz_router).await?; @@ -191,8 +195,47 @@ impl super::Nexus { kind: &RouterRouteKind, params: ¶ms::RouterRouteCreate, ) -> CreateResult { - let (.., authz_router) = - router_lookup.lookup_for(authz::Action::CreateChild).await?; + let (.., authz_router, db_router) = + router_lookup.fetch_for(authz::Action::CreateChild).await?; + + if db_router.kind == VpcRouterKind::System { + return Err(Error::invalid_request( + "user-provided routes cannot be added to a system router", + )); + } + + // Validate route destinations/targets at this stage: + // - mixed explicit v4 and v6 are disallowed. + // - users cannot specify 'Vpc' as a custom router dest/target. + // - users cannot specify 'Subnet' as a custom router target. + // - the only internet gateway we support today is 'outbound'. + match (¶ms.destination, ¶ms.target) { + (RouteDestination::Ip(IpAddr::V4(_)), RouteTarget::Ip(IpAddr::V4(_))) + | (RouteDestination::Ip(IpAddr::V6(_)), RouteTarget::Ip(IpAddr::V6(_))) + | (RouteDestination::IpNet(IpNet::V4(_)), RouteTarget::Ip(IpAddr::V4(_))) + | (RouteDestination::IpNet(IpNet::V6(_)), RouteTarget::Ip(IpAddr::V6(_))) => {}, + + (RouteDestination::Ip(_), RouteTarget::Ip(_)) + | (RouteDestination::IpNet(_), RouteTarget::Ip(_)) + => return Err(Error::invalid_request( + "cannot mix explicit IPv4 and IPv6 addresses between destination and target" + )), + + (RouteDestination::Vpc(_), _) | (_, RouteTarget::Vpc(_)) => return Err(Error::invalid_request( + "VPCs cannot be used as a destination or target in custom routers" + )), + + (_, RouteTarget::Subnet(_)) => return Err(Error::invalid_request( + "subnets cannot be used as a target in custom routers" + )), + + (_, RouteTarget::InternetGateway(n)) if n.as_str() != "outbound" => return Err(Error::invalid_request( + "'outbound' is currently the only valid internet gateway" + )), + + _ => {}, + }; + let id = Uuid::new_v4(); let route = db::model::RouterRoute::new( id, @@ -229,21 +272,31 @@ impl super::Nexus { route_lookup: &lookup::RouterRoute<'_>, params: ¶ms::RouterRouteUpdate, ) -> UpdateResult { - let (.., vpc, authz_router, authz_route, db_route) = + let (.., authz_router, authz_route, db_route) = route_lookup.fetch_for(authz::Action::Modify).await?; - // TODO: Write a test for this once there's a way to test it (i.e. - // subnets automatically register to the system router table) + match db_route.kind.0 { - RouterRouteKind::Custom | RouterRouteKind::Default => (), + // Default routes allow a constrained form of modification: + // only the target may change. + RouterRouteKind::Default if + params.identity.name.is_some() + || params.identity.description.is_some() + || params.destination != db_route.destination.0 => { + return Err(Error::invalid_request( + "the destination and metadata of a Default route cannot be changed", + ))}, + + RouterRouteKind::Custom | RouterRouteKind::Default => {}, + _ => { return Err(Error::invalid_request(format!( - "routes of type {} from the system table of VPC {:?} \ + "routes of type {} within the system router \ are not modifiable", db_route.kind.0, - vpc.id() ))); } } + let out = self .db_datastore .router_update_route(&opctx, &authz_route, params.clone().into()) diff --git a/nexus/src/app/vpc_subnet.rs b/nexus/src/app/vpc_subnet.rs index 478e1af9f9..ce0cd423f4 100644 --- a/nexus/src/app/vpc_subnet.rs +++ b/nexus/src/app/vpc_subnet.rs @@ -108,7 +108,7 @@ impl super::Nexus { // See for // details. let subnet_id = Uuid::new_v4(); - let out = match params.ipv6_block { + let mut out = match params.ipv6_block { None => { const NUM_RETRIES: usize = 2; let mut retry = 0; @@ -214,6 +214,23 @@ impl super::Nexus { } }?; + // XX: rollback the creation if this fails? + if let Some(custom_router) = ¶ms.custom_router { + let (.., authz_subnet) = LookupPath::new(opctx, &self.db_datastore) + .vpc_subnet_id(out.id()) + .lookup_for(authz::Action::Modify) + .await?; + + out = self + .vpc_subnet_update_custom_router( + opctx, + &authz_vpc, + &authz_subnet, + Some(custom_router), + ) + .await?; + } + self.vpc_needed_notify_sleds(); Ok(out) @@ -236,8 +253,18 @@ impl super::Nexus { vpc_subnet_lookup: &lookup::VpcSubnet<'_>, params: ¶ms::VpcSubnetUpdate, ) -> UpdateResult { - let (.., authz_subnet) = + let (.., authz_vpc, authz_subnet) = vpc_subnet_lookup.lookup_for(authz::Action::Modify).await?; + + // Updating the custom router is a separate action. + self.vpc_subnet_update_custom_router( + opctx, + &authz_vpc, + &authz_subnet, + params.custom_router.as_ref(), + ) + .await?; + let out = self .db_datastore .vpc_update_subnet(&opctx, &authz_subnet, params.clone().into()) @@ -248,6 +275,68 @@ impl super::Nexus { Ok(out) } + async fn vpc_subnet_update_custom_router( + &self, + opctx: &OpContext, + authz_vpc: &authz::Vpc, + authz_subnet: &authz::VpcSubnet, + custom_router: Option<&NameOrId>, + ) -> UpdateResult { + // Resolve the VPC router, if specified. + let router_lookup = match custom_router { + Some(key @ NameOrId::Name(_)) => self + .vpc_router_lookup( + opctx, + params::RouterSelector { + project: None, + vpc: Some(NameOrId::Id(authz_vpc.id())), + router: key.clone(), + }, + ) + .map(Some), + Some(key @ NameOrId::Id(_)) => self + .vpc_router_lookup( + opctx, + params::RouterSelector { + project: None, + vpc: None, + router: key.clone(), + }, + ) + .map(Some), + None => Ok(None), + }?; + + let router_lookup = if let Some(l) = router_lookup { + let (.., rtr_authz_vpc, authz_router) = + l.lookup_for(authz::Action::Read).await?; + + if authz_vpc.id() != rtr_authz_vpc.id() { + return Err(Error::invalid_request( + "router and subnet must belong to the same VPC", + )); + } + + Some(authz_router) + } else { + None + }; + + if let Some(authz_router) = router_lookup { + self.db_datastore + .vpc_subnet_set_custom_router( + opctx, + &authz_subnet, + &authz_router, + ) + .await + } else { + self.db_datastore + .vpc_subnet_unset_custom_router(opctx, &authz_subnet) + .await + } + } + pub(crate) async fn vpc_delete_subnet( &self, opctx: &OpContext, diff --git a/nexus/src/external_api/http_entrypoints.rs b/nexus/src/external_api/http_entrypoints.rs index e814df2b61..2678768b48 100644 --- a/nexus/src/external_api/http_entrypoints.rs +++ b/nexus/src/external_api/http_entrypoints.rs @@ -5446,7 +5446,6 @@ async fn vpc_firewall_rules_update( method = GET, path = "/v1/vpc-routers", tags = ["vpcs"], - unpublished = true, }] async fn vpc_router_list( rqctx: RequestContext, @@ -5486,7 +5485,6 @@ async fn vpc_router_list( method = GET, path = "/v1/vpc-routers/{router}", tags = ["vpcs"], - unpublished = true, }] async fn vpc_router_view( rqctx: RequestContext, @@ -5520,7 +5518,6 @@ async fn vpc_router_view( method = POST, path = "/v1/vpc-routers", tags = ["vpcs"], - unpublished = true, }] async fn vpc_router_create( rqctx: RequestContext, @@ -5556,7 +5553,6 @@ async fn vpc_router_create( method = DELETE, path = "/v1/vpc-routers/{router}", tags = ["vpcs"], - unpublished = true, }] async fn vpc_router_delete( rqctx: RequestContext, @@ -5590,7 +5586,6 @@ async fn vpc_router_delete( method = PUT, path = "/v1/vpc-routers/{router}", tags = ["vpcs"], - unpublished = true, }] async fn vpc_router_update( rqctx: RequestContext, @@ -5630,7 +5625,6 @@ async fn vpc_router_update( method = GET, path = "/v1/vpc-router-routes", tags = ["vpcs"], - unpublished = true, }] async fn vpc_router_route_list( rqctx: RequestContext, @@ -5672,7 +5666,6 @@ async fn vpc_router_route_list( method = GET, path = "/v1/vpc-router-routes/{route}", tags = ["vpcs"], - unpublished = true, }] async fn vpc_router_route_view( rqctx: RequestContext, @@ -5709,7 +5702,6 @@ async fn vpc_router_route_view( method = POST, path = "/v1/vpc-router-routes", tags = ["vpcs"], - unpublished = true, }] async fn vpc_router_route_create( rqctx: RequestContext, @@ -5745,7 +5737,6 @@ async fn vpc_router_route_create( method = DELETE, path = "/v1/vpc-router-routes/{route}", tags = ["vpcs"], - unpublished = true, }] async fn vpc_router_route_delete( rqctx: RequestContext, @@ -5781,7 +5772,6 @@ async fn vpc_router_route_delete( method = PUT, path = "/v1/vpc-router-routes/{route}", tags = ["vpcs"], - unpublished = true, }] async fn vpc_router_route_update( rqctx: RequestContext, diff --git a/nexus/test-utils/Cargo.toml b/nexus/test-utils/Cargo.toml index 0eab038f91..7732e00d70 100644 --- a/nexus/test-utils/Cargo.toml +++ b/nexus/test-utils/Cargo.toml @@ -37,6 +37,7 @@ omicron-uuid-kinds.workspace = true oximeter.workspace = true oximeter-collector.workspace = true oximeter-producer.workspace = true +oxnet.workspace = true serde.workspace = true serde_json.workspace = true serde_urlencoded.workspace = true diff --git a/nexus/test-utils/src/lib.rs b/nexus/test-utils/src/lib.rs index 97fd66f949..7d69e6b3b0 100644 --- a/nexus/test-utils/src/lib.rs +++ b/nexus/test-utils/src/lib.rs @@ -688,6 +688,7 @@ impl<'a, N: NexusServer> ControlPlaneTestContextBuilder<'a, N> { slot: 0, subnet: (*NEXUS_OPTE_IPV4_SUBNET).into(), vni: Vni::SERVICES_VNI, + transit_ips: vec![], }, }), }); @@ -1048,6 +1049,7 @@ impl<'a, N: NexusServer> ControlPlaneTestContextBuilder<'a, N> { slot: 0, subnet: (*DNS_OPTE_IPV4_SUBNET).into(), vni: Vni::SERVICES_VNI, + transit_ips: vec![], }, }, ), diff --git a/nexus/test-utils/src/resource_helpers.rs b/nexus/test-utils/src/resource_helpers.rs index 2aef32d37c..ccebffd197 100644 --- a/nexus/test-utils/src/resource_helpers.rs +++ b/nexus/test-utils/src/resource_helpers.rs @@ -26,6 +26,7 @@ use nexus_types::external_api::views::FloatingIp; use nexus_types::external_api::views::IpPool; use nexus_types::external_api::views::IpPoolRange; use nexus_types::external_api::views::User; +use nexus_types::external_api::views::VpcSubnet; use nexus_types::external_api::views::{Project, Silo, Vpc, VpcRouter}; use nexus_types::identity::Resource; use nexus_types::internal_api::params as internal_params; @@ -36,12 +37,17 @@ use omicron_common::api::external::IdentityMetadataCreateParams; use omicron_common::api::external::Instance; use omicron_common::api::external::InstanceCpuCount; use omicron_common::api::external::NameOrId; +use omicron_common::api::external::RouteDestination; +use omicron_common::api::external::RouteTarget; +use omicron_common::api::external::RouterRoute; use omicron_common::disk::DiskIdentity; use omicron_sled_agent::sim::SledAgent; use omicron_test_utils::dev::poll::wait_for_condition; use omicron_test_utils::dev::poll::CondCheckError; use omicron_uuid_kinds::GenericUuid; use omicron_uuid_kinds::ZpoolUuid; +use oxnet::Ipv4Net; +use oxnet::Ipv6Net; use slog::debug; use std::net::IpAddr; use std::sync::Arc; @@ -559,6 +565,32 @@ pub async fn create_vpc_with_error( .unwrap() } +pub async fn create_vpc_subnet( + client: &ClientTestContext, + project_name: &str, + vpc_name: &str, + subnet_name: &str, + ipv4_block: Ipv4Net, + ipv6_block: Option, + custom_router: Option<&str>, +) -> VpcSubnet { + object_create( + &client, + &format!("/v1/vpc-subnets?project={project_name}&vpc={vpc_name}"), + ¶ms::VpcSubnetCreate { + identity: IdentityMetadataCreateParams { + name: subnet_name.parse().unwrap(), + description: "vpc description".to_string(), + }, + ipv4_block, + ipv6_block, + custom_router: custom_router + .map(|n| NameOrId::Name(n.parse().unwrap())), + }, + ) + .await +} + pub async fn create_router( client: &ClientTestContext, project_name: &str, @@ -584,6 +616,78 @@ pub async fn create_router( .unwrap() } +pub async fn create_route( + client: &ClientTestContext, + project_name: &str, + vpc_name: &str, + router_name: &str, + route_name: &str, + destination: RouteDestination, + target: RouteTarget, +) -> RouterRoute { + NexusRequest::objects_post( + &client, + format!( + "/v1/vpc-router-routes?project={}&vpc={}&router={}", + &project_name, &vpc_name, &router_name + ) + .as_str(), + ¶ms::RouterRouteCreate { + identity: IdentityMetadataCreateParams { + name: route_name.parse().unwrap(), + description: String::from("route description"), + }, + target, + destination, + }, + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap() + .parsed_body() + .unwrap() +} + +#[allow(clippy::too_many_arguments)] +pub async fn create_route_with_error( + client: &ClientTestContext, + project_name: &str, + vpc_name: &str, + router_name: &str, + route_name: &str, + destination: RouteDestination, + target: RouteTarget, + status: StatusCode, +) -> HttpErrorResponseBody { + NexusRequest::new( + RequestBuilder::new( + client, + Method::POST, + format!( + "/v1/vpc-router-routes?project={}&vpc={}&router={}", + &project_name, &vpc_name, &router_name + ) + .as_str(), + ) + .body(Some(¶ms::RouterRouteCreate { + identity: IdentityMetadataCreateParams { + name: route_name.parse().unwrap(), + description: String::from("route description"), + }, + target, + destination, + })) + .expect_status(Some(status)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap() + .parsed_body() + .unwrap() +} + pub async fn assert_ip_pool_utilization( client: &ClientTestContext, pool_name: &str, diff --git a/nexus/tests/integration_tests/endpoints.rs b/nexus/tests/integration_tests/endpoints.rs index ca46a8bf06..a8e12ae5d9 100644 --- a/nexus/tests/integration_tests/endpoints.rs +++ b/nexus/tests/integration_tests/endpoints.rs @@ -202,6 +202,7 @@ pub static DEMO_VPC_SUBNET_CREATE: Lazy = }, ipv4_block: "10.1.2.3/8".parse().unwrap(), ipv6_block: None, + custom_router: None, }); // VPC Router used for testing @@ -461,6 +462,7 @@ pub static DEMO_INSTANCE_NIC_PUT: Lazy = description: Some(String::from("an updated description")), }, primary: false, + transit_ips: vec![], }); pub static DEMO_CERTIFICATE_NAME: Lazy = @@ -1513,6 +1515,7 @@ pub static VERIFY_ENDPOINTS: Lazy> = Lazy::new(|| { name: None, description: Some("different".to_string()) }, + custom_router: None, }).unwrap() ), AllowedMethod::Delete, diff --git a/nexus/tests/integration_tests/instances.rs b/nexus/tests/integration_tests/instances.rs index 75ddf847bf..f17fc3732a 100644 --- a/nexus/tests/integration_tests/instances.rs +++ b/nexus/tests/integration_tests/instances.rs @@ -1801,6 +1801,7 @@ async fn test_instance_with_new_custom_network_interfaces( }, ipv4_block: "172.31.0.0/24".parse().unwrap(), ipv6_block: None, + custom_router: None, }; let _response = NexusRequest::objects_post( client, @@ -1947,6 +1948,7 @@ async fn test_instance_create_delete_network_interface( }, ipv4_block: "172.31.0.0/24".parse().unwrap(), ipv6_block: None, + custom_router: None, }; let _response = NexusRequest::objects_post( client, @@ -2188,6 +2190,7 @@ async fn test_instance_update_network_interfaces( }, ipv4_block: "172.31.0.0/24".parse().unwrap(), ipv6_block: None, + custom_router: None, }; let _response = NexusRequest::objects_post( client, @@ -2287,6 +2290,7 @@ async fn test_instance_update_network_interfaces( description: Some(new_description.clone()), }, primary: false, + transit_ips: vec![], }; // Verify we fail to update the NIC when the instance is running @@ -2363,6 +2367,7 @@ async fn test_instance_update_network_interfaces( description: None, }, primary: true, + transit_ips: vec![], }; let updated_primary_iface1 = NexusRequest::object_put( client, @@ -2456,6 +2461,7 @@ async fn test_instance_update_network_interfaces( description: None, }, primary: true, + transit_ips: vec![], }; let new_primary_iface = NexusRequest::object_put( client, @@ -4812,7 +4818,7 @@ pub async fn assert_sled_vpc_routes( datastore: &DataStore, subnet_id: Uuid, vni: Vni, -) { +) -> (HashSet, HashSet) { let (.., authz_vpc, _, db_subnet) = LookupPath::new(opctx, datastore) .vpc_subnet_id(subnet_id) .fetch() @@ -4874,6 +4880,8 @@ pub async fn assert_sled_vpc_routes( ) .await .expect("matching vpc routes should be present"); + + (system_routes, custom_routes) } /// Simulate completion of an ongoing instance state transition. To do this, we diff --git a/nexus/tests/integration_tests/router_routes.rs b/nexus/tests/integration_tests/router_routes.rs index 79a5db8eaf..38f4ecec9a 100644 --- a/nexus/tests/integration_tests/router_routes.rs +++ b/nexus/tests/integration_tests/router_routes.rs @@ -2,14 +2,21 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. +use dropshot::test_util::ClientTestContext; use dropshot::Method; use http::StatusCode; +use itertools::Itertools; use nexus_test_utils::http_testing::AuthnMode; use nexus_test_utils::http_testing::NexusRequest; use nexus_test_utils::identity_eq; +use nexus_test_utils::resource_helpers::create_route; +use nexus_test_utils::resource_helpers::create_route_with_error; +use nexus_test_utils::resource_helpers::object_put; +use nexus_test_utils::resource_helpers::object_put_error; use nexus_test_utils::resource_helpers::objects_list_page_authz; use nexus_test_utils_macros::nexus_test; use nexus_types::external_api::params; +use nexus_types::external_api::params::RouterRouteUpdate; use omicron_common::api::external::SimpleIdentity; use omicron_common::api::external::{ IdentityMetadataCreateParams, IdentityMetadataUpdateParams, @@ -23,40 +30,39 @@ use nexus_test_utils::resource_helpers::{ create_project, create_router, create_vpc, }; +use crate::integration_tests::vpc_routers::PROJECT_NAME; +use crate::integration_tests::vpc_routers::ROUTER_NAMES; +use crate::integration_tests::vpc_routers::VPC_NAME; + type ControlPlaneTestContext = nexus_test_utils::ControlPlaneTestContext; -#[nexus_test] -async fn test_router_routes(cptestctx: &ControlPlaneTestContext) { - let client = &cptestctx.external_client; - - let project_name = "springfield-squidport"; - let vpc_name = "vpc1"; - let router_name = "router1"; - - let get_routes_url = |router_name: &str| -> String { - format!( - "/v1/vpc-router-routes?project={}&vpc={}&router={}", - project_name, vpc_name, router_name - ) - }; - - let get_route_url = |router_name: &str, route_name: &str| -> String { - format!( - "/v1/vpc-router-routes/{}?project={}&vpc={}&router={}", - route_name, project_name, vpc_name, router_name - ) - }; - - let _ = create_project(&client, project_name).await; +fn get_routes_url(vpc_name: &str, router_name: &str) -> String { + format!( + "/v1/vpc-router-routes?project={}&vpc={}&router={}", + PROJECT_NAME, vpc_name, router_name + ) +} - // Create a vpc - create_vpc(&client, project_name, vpc_name).await; +fn get_route_url( + vpc_name: &str, + router_name: &str, + route_name: &str, +) -> String { + format!( + "/v1/vpc-router-routes/{}?project={}&vpc={}&router={}", + route_name, PROJECT_NAME, vpc_name, router_name + ) +} +async fn get_system_routes( + client: &ClientTestContext, + vpc_name: &str, +) -> [RouterRoute; 3] { // Get the system router's routes let system_router_routes = objects_list_page_authz::( client, - get_routes_url("system").as_str(), + get_routes_url(vpc_name, "system").as_str(), ) .await .items; @@ -86,6 +92,27 @@ async fn test_router_routes(cptestctx: &ControlPlaneTestContext) { let subnet_route = subnet_route.expect("no default subnet route found in system router"); + [v4_route, v6_route, subnet_route] +} + +#[nexus_test] +async fn test_router_routes_crud_operations( + cptestctx: &ControlPlaneTestContext, +) { + let client = &cptestctx.external_client; + + let vpc_name = "vpc1"; + let router_name = "router1"; + + let _ = create_project(&client, PROJECT_NAME).await; + + // Create a vpc + create_vpc(&client, PROJECT_NAME, vpc_name).await; + + // Get the system router's routes + let [v4_route, v6_route, subnet_route] = + get_system_routes(client, vpc_name).await; + // Deleting any default system route is disallowed. for route in &[&v4_route, &v6_route, &subnet_route] { let error: dropshot::HttpErrorResponseBody = @@ -93,7 +120,8 @@ async fn test_router_routes(cptestctx: &ControlPlaneTestContext) { client, StatusCode::BAD_REQUEST, Method::DELETE, - get_route_url("system", route.name().as_str()).as_str(), + get_route_url(vpc_name, "system", route.name().as_str()) + .as_str(), ) .authn_as(AuthnMode::PrivilegedUser) .execute() @@ -105,12 +133,12 @@ async fn test_router_routes(cptestctx: &ControlPlaneTestContext) { } // Create a custom router - create_router(&client, project_name, vpc_name, router_name).await; + create_router(&client, PROJECT_NAME, vpc_name, router_name).await; // Get routes list for custom router let routes = objects_list_page_authz::( client, - get_routes_url(router_name).as_str(), + get_routes_url(vpc_name, router_name).as_str(), ) .await .items; @@ -118,12 +146,12 @@ async fn test_router_routes(cptestctx: &ControlPlaneTestContext) { assert_eq!(routes.len(), 0); let route_name = "custom-route"; - let route_url = get_route_url(router_name, route_name); + let route_url = get_route_url(vpc_name, router_name, route_name); // Create a new custom route let route_created: RouterRoute = NexusRequest::objects_post( client, - get_routes_url(router_name).as_str(), + get_routes_url(vpc_name, router_name).as_str(), ¶ms::RouterRouteCreate { identity: IdentityMetadataCreateParams { name: route_name.parse().unwrap(), @@ -205,10 +233,307 @@ async fn test_router_routes(cptestctx: &ControlPlaneTestContext) { client, StatusCode::NOT_FOUND, Method::GET, - get_route_url(router_name, route_name).as_str(), + get_route_url(vpc_name, router_name, route_name).as_str(), ) .authn_as(AuthnMode::PrivilegedUser) .execute() .await .unwrap(); } + +#[nexus_test] +async fn test_router_routes_disallow_mixed_v4_v6( + cptestctx: &ControlPlaneTestContext, +) { + let client = &cptestctx.external_client; + let _ = create_project(&client, PROJECT_NAME).await; + let _ = create_vpc(&client, PROJECT_NAME, VPC_NAME).await; + + let router_name = ROUTER_NAMES[0]; + let _router = + create_router(&client, PROJECT_NAME, VPC_NAME, router_name).await; + + // Some targets/strings refer to a mixed v4/v6 entity, e.g., + // subnet or instance. Others refer to one kind only (ipnet, ip). + // Users should not be able to mix v4 and v6 in these latter routes + // -- route resolution will ignore them, but a helpful error message + // is more useful. + let dest_set: [RouteDestination; 5] = [ + "ip:4.4.4.4".parse().unwrap(), + "ipnet:4.4.4.0/24".parse().unwrap(), + "ip:2001:4860:4860::8888".parse().unwrap(), + "ipnet:2001:4860:4860::/64".parse().unwrap(), + "subnet:named-subnet".parse().unwrap(), + ]; + + let target_set: [RouteTarget; 5] = [ + "ip:172.30.0.5".parse().unwrap(), + "ip:fd37:faf4:cc25::5".parse().unwrap(), + "instance:named-instance".parse().unwrap(), + "inetgw:outbound".parse().unwrap(), + "drop".parse().unwrap(), + ]; + + for (i, (dest, target)) in dest_set + .into_iter() + .cartesian_product(target_set.into_iter()) + .enumerate() + { + use RouteDestination as Rd; + use RouteTarget as Rt; + let allowed = match (&dest, &target) { + (Rd::Ip(IpAddr::V4(_)), Rt::Ip(IpAddr::V4(_))) + | (Rd::Ip(IpAddr::V6(_)), Rt::Ip(IpAddr::V6(_))) + | (Rd::IpNet(IpNet::V4(_)), Rt::Ip(IpAddr::V4(_))) + | (Rd::IpNet(IpNet::V6(_)), Rt::Ip(IpAddr::V6(_))) => true, + (Rd::Ip(_), Rt::Ip(_)) | (Rd::IpNet(_), Rt::Ip(_)) => false, + _ => true, + }; + + let route_name = format!("test-route-{i}"); + + if allowed { + create_route( + client, + PROJECT_NAME, + VPC_NAME, + router_name, + &route_name, + dest, + target, + ) + .await; + } else { + let err = create_route_with_error( + client, + PROJECT_NAME, + VPC_NAME, + router_name, + &route_name, + dest, + target, + StatusCode::BAD_REQUEST, + ) + .await; + assert_eq!( + err.message, + "cannot mix explicit IPv4 and IPv6 addresses between destination and target" + ); + } + } +} + +#[nexus_test] +async fn test_router_routes_modify_system_routes( + cptestctx: &ControlPlaneTestContext, +) { + let client = &cptestctx.external_client; + let _ = create_project(&client, PROJECT_NAME).await; + let _ = create_vpc(&client, PROJECT_NAME, VPC_NAME).await; + + // Attempting to add a new route to a system router should fail. + let err = create_route_with_error( + client, + PROJECT_NAME, + VPC_NAME, + "system", + "bad-route", + "ipnet:240.0.0.0/8".parse().unwrap(), + "inetgw:outbound".parse().unwrap(), + StatusCode::BAD_REQUEST, + ) + .await; + assert_eq!( + err.message, + "user-provided routes cannot be added to a system router" + ); + + // Get the system router's routes + let [v4_route, v6_route, subnet_route] = + get_system_routes(client, VPC_NAME).await; + + // Attempting to modify a VPC subnet route should fail. + // Deletes are tested above. + let err = object_put_error( + client, + &get_route_url(VPC_NAME, "system", subnet_route.name().as_str()) + .as_str(), + &RouterRouteUpdate { + identity: IdentityMetadataUpdateParams { + name: None, + description: None, + }, + target: "drop".parse().unwrap(), + destination: "subnet:default".parse().unwrap(), + }, + StatusCode::BAD_REQUEST, + ) + .await; + assert_eq!( + err.message, + "routes of type VpcSubnet within the system router are not modifiable" + ); + + // Modifying the target of a Default (gateway) route should succeed. + let v4_route: RouterRoute = object_put( + client, + &get_route_url(VPC_NAME, "system", v4_route.name().as_str()).as_str(), + &RouterRouteUpdate { + identity: IdentityMetadataUpdateParams { + name: None, + description: None, + }, + destination: v4_route.destination, + target: "drop".parse().unwrap(), + }, + ) + .await; + assert_eq!(v4_route.target, RouteTarget::Drop); + + let v6_route: RouterRoute = object_put( + client, + &get_route_url(VPC_NAME, "system", v6_route.name().as_str()).as_str(), + &RouterRouteUpdate { + identity: IdentityMetadataUpdateParams { + name: None, + description: None, + }, + destination: v6_route.destination, + target: "drop".parse().unwrap(), + }, + ) + .await; + assert_eq!(v6_route.target, RouteTarget::Drop); + + // Modifying the *destination* should not. + let err = object_put_error( + client, + &get_route_url(VPC_NAME, "system", v4_route.name().as_str()).as_str(), + &RouterRouteUpdate { + identity: IdentityMetadataUpdateParams { + name: None, + description: None, + }, + destination: "ipnet:10.0.0.0/8".parse().unwrap(), + target: "drop".parse().unwrap(), + }, + StatusCode::BAD_REQUEST, + ) + .await; + assert_eq!( + err.message, + "the destination and metadata of a Default route cannot be changed", + ); +} + +#[nexus_test] +async fn test_router_routes_internet_gateway_target( + cptestctx: &ControlPlaneTestContext, +) { + let client = &cptestctx.external_client; + let _ = create_project(&client, PROJECT_NAME).await; + let _ = create_vpc(&client, PROJECT_NAME, VPC_NAME).await; + let router_name = ROUTER_NAMES[0]; + let _router = + create_router(&client, PROJECT_NAME, VPC_NAME, router_name).await; + + // Internet gateways are not fully supported: only 'inetgw:outbound' + // is a valid choice. + let dest: RouteDestination = "ipnet:240.0.0.0/8".parse().unwrap(); + + let err = create_route_with_error( + client, + PROJECT_NAME, + VPC_NAME, + &router_name, + "bad-route", + dest.clone(), + "inetgw:not-a-real-gw".parse().unwrap(), + StatusCode::BAD_REQUEST, + ) + .await; + assert_eq!( + err.message, + "'outbound' is currently the only valid internet gateway" + ); + + // This can be used in a custom router, in addition + // to its default system spot. + let target: RouteTarget = "inetgw:outbound".parse().unwrap(); + let route = create_route( + client, + PROJECT_NAME, + VPC_NAME, + router_name, + "good-route", + dest.clone(), + target.clone(), + ) + .await; + assert_eq!(route.destination, dest); + assert_eq!(route.target, target); +} + +#[nexus_test] +async fn test_router_routes_disallow_custom_targets( + cptestctx: &ControlPlaneTestContext, +) { + let client = &cptestctx.external_client; + let _ = create_project(&client, PROJECT_NAME).await; + let _ = create_vpc(&client, PROJECT_NAME, VPC_NAME).await; + let router_name = ROUTER_NAMES[0]; + let _router = + create_router(&client, PROJECT_NAME, VPC_NAME, router_name).await; + + // Neither 'vpc:xxx' nor 'subnet:xxx' can be specified as route targets + // in custom routers. + let dest: RouteDestination = "ipnet:240.0.0.0/8".parse().unwrap(); + + let err = create_route_with_error( + client, + PROJECT_NAME, + VPC_NAME, + &router_name, + "bad-route", + dest.clone(), + "vpc:a-vpc-name-unknown".parse().unwrap(), + StatusCode::BAD_REQUEST, + ) + .await; + assert_eq!( + err.message, + "VPCs cannot be used as a destination or target in custom routers" + ); + + let err = create_route_with_error( + client, + PROJECT_NAME, + VPC_NAME, + &router_name, + "bad-route", + "vpc:a-vpc-name-unknown".parse().unwrap(), + "drop".parse().unwrap(), + StatusCode::BAD_REQUEST, + ) + .await; + assert_eq!( + err.message, + "VPCs cannot be used as a destination or target in custom routers" + ); + + let err = create_route_with_error( + client, + PROJECT_NAME, + VPC_NAME, + &router_name, + "bad-route", + dest.clone(), + "subnet:a-vpc-name-unknown".parse().unwrap(), + StatusCode::BAD_REQUEST, + ) + .await; + assert_eq!( + err.message, + "subnets cannot be used as a target in custom routers" + ); +} diff --git a/nexus/tests/integration_tests/subnet_allocation.rs b/nexus/tests/integration_tests/subnet_allocation.rs index 794c769da4..8e1f5834c5 100644 --- a/nexus/tests/integration_tests/subnet_allocation.rs +++ b/nexus/tests/integration_tests/subnet_allocation.rs @@ -111,6 +111,7 @@ async fn test_subnet_allocation(cptestctx: &ControlPlaneTestContext) { // Use the minimum subnet size ipv4_block: subnet, ipv6_block: None, + custom_router: None, }; NexusRequest::objects_post(client, &subnets_url, &Some(&subnet_create)) .authn_as(AuthnMode::PrivilegedUser) diff --git a/nexus/tests/integration_tests/vpc_routers.rs b/nexus/tests/integration_tests/vpc_routers.rs index 0b931efbd7..d85a8cba8e 100644 --- a/nexus/tests/integration_tests/vpc_routers.rs +++ b/nexus/tests/integration_tests/vpc_routers.rs @@ -2,50 +2,91 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. +use crate::integration_tests::instances::assert_sled_vpc_routes; +use crate::integration_tests::instances::instance_simulate; +use dropshot::test_util::ClientTestContext; use http::method::Method; use http::StatusCode; +use nexus_db_queries::context::OpContext; +use nexus_db_queries::db::lookup::LookupPath; use nexus_test_utils::http_testing::AuthnMode; use nexus_test_utils::http_testing::NexusRequest; use nexus_test_utils::http_testing::RequestBuilder; use nexus_test_utils::identity_eq; +use nexus_test_utils::resource_helpers::create_default_ip_pool; +use nexus_test_utils::resource_helpers::create_instance_with; +use nexus_test_utils::resource_helpers::create_route; use nexus_test_utils::resource_helpers::create_router; +use nexus_test_utils::resource_helpers::create_vpc_subnet; +use nexus_test_utils::resource_helpers::object_delete; use nexus_test_utils::resource_helpers::objects_list_page_authz; use nexus_test_utils::resource_helpers::{create_project, create_vpc}; +use nexus_test_utils::resource_helpers::{object_put, object_put_error}; use nexus_test_utils_macros::nexus_test; use nexus_types::external_api::params; +use nexus_types::external_api::params::InstanceNetworkInterfaceAttachment; +use nexus_types::external_api::params::InstanceNetworkInterfaceCreate; +use nexus_types::external_api::params::VpcSubnetUpdate; use nexus_types::external_api::views::VpcRouter; use nexus_types::external_api::views::VpcRouterKind; +use nexus_types::external_api::views::VpcSubnet; use omicron_common::api::external::IdentityMetadataCreateParams; use omicron_common::api::external::IdentityMetadataUpdateParams; +use omicron_common::api::external::NameOrId; +use omicron_common::api::external::SimpleIdentity; +use omicron_common::api::internal::shared::ResolvedVpcRoute; +use omicron_common::api::internal::shared::RouterTarget; +use omicron_uuid_kinds::GenericUuid; +use omicron_uuid_kinds::InstanceUuid; +use std::collections::HashMap; + +pub const PROJECT_NAME: &str = "cartographer"; +pub const VPC_NAME: &str = "the-isles"; +pub const SUBNET_NAMES: &[&str] = &["scotia", "albion", "eire"]; +const INSTANCE_NAMES: &[&str] = &["glaschu", "londinium"]; +pub const ROUTER_NAMES: &[&str] = &["cycle-network", "motorways"]; type ControlPlaneTestContext = nexus_test_utils::ControlPlaneTestContext; #[nexus_test] -async fn test_vpc_routers(cptestctx: &ControlPlaneTestContext) { +async fn test_vpc_routers_crud_operations(cptestctx: &ControlPlaneTestContext) { let client = &cptestctx.external_client; // Create a project that we'll use for testing. - let project_name = "springfield-squidport"; - let _ = create_project(&client, project_name).await; + let _ = create_project(&client, PROJECT_NAME).await; // Create a VPC. - let vpc_name = "vpc1"; - let vpc = create_vpc(&client, project_name, vpc_name).await; + let vpc = create_vpc(&client, PROJECT_NAME, VPC_NAME).await; let routers_url = - format!("/v1/vpc-routers?project={}&vpc={}", project_name, vpc_name); + format!("/v1/vpc-routers?project={}&vpc={}", PROJECT_NAME, VPC_NAME); // get routers should have only the system router created w/ the VPC - let routers = - objects_list_page_authz::(client, &routers_url).await.items; + let routers = list_routers(client, &VPC_NAME).await; assert_eq!(routers.len(), 1); assert_eq!(routers[0].kind, VpcRouterKind::System); - let router_name = "router1"; + // This router should not be deletable. + let system_router_url = format!("/v1/vpc-routers/{}", routers[0].id()); + let error: dropshot::HttpErrorResponseBody = NexusRequest::expect_failure( + client, + StatusCode::BAD_REQUEST, + Method::DELETE, + &system_router_url, + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap() + .parsed_body() + .unwrap(); + assert_eq!(error.message, "cannot delete system router"); + + let router_name = ROUTER_NAMES[0]; let router_url = format!( "/v1/vpc-routers/{}?project={}&vpc={}", - router_name, project_name, vpc_name + router_name, PROJECT_NAME, VPC_NAME ); // fetching a particular router should 404 @@ -61,11 +102,14 @@ async fn test_vpc_routers(cptestctx: &ControlPlaneTestContext) { .unwrap() .parsed_body() .unwrap(); - assert_eq!(error.message, "not found: vpc-router with name \"router1\""); + assert_eq!( + error.message, + format!("not found: vpc-router with name \"{router_name}\"") + ); // Create a VPC Router. let router = - create_router(&client, project_name, vpc_name, router_name).await; + create_router(&client, PROJECT_NAME, VPC_NAME, router_name).await; assert_eq!(router.identity.name, router_name); assert_eq!(router.identity.description, "router description"); assert_eq!(router.vpc_id, vpc.identity.id); @@ -82,7 +126,7 @@ async fn test_vpc_routers(cptestctx: &ControlPlaneTestContext) { routers_eq(&router, &same_router); // routers list should now have the one in it - let routers = objects_list_page_authz(client, &routers_url).await.items; + let routers = list_routers(client, &VPC_NAME).await; assert_eq!(routers.len(), 2); routers_eq(&routers[0], &router); @@ -103,12 +147,15 @@ async fn test_vpc_routers(cptestctx: &ControlPlaneTestContext) { .unwrap() .parsed_body() .unwrap(); - assert_eq!(error.message, "already exists: vpc-router \"router1\""); + assert_eq!( + error.message, + format!("already exists: vpc-router \"{router_name}\"") + ); - let router2_name = "router2"; + let router2_name = ROUTER_NAMES[1]; let router2_url = format!( "/v1/vpc-routers/{}?project={}&vpc={}", - router2_name, project_name, vpc_name + router2_name, PROJECT_NAME, VPC_NAME ); // second router 404s before it's created @@ -124,18 +171,20 @@ async fn test_vpc_routers(cptestctx: &ControlPlaneTestContext) { .unwrap() .parsed_body() .unwrap(); - assert_eq!(error.message, "not found: vpc-router with name \"router2\""); + assert_eq!( + error.message, + format!("not found: vpc-router with name \"{router2_name}\"") + ); // create second custom router let router2 = - create_router(client, project_name, vpc_name, router2_name).await; + create_router(client, PROJECT_NAME, VPC_NAME, router2_name).await; assert_eq!(router2.identity.name, router2_name); assert_eq!(router2.vpc_id, vpc.identity.id); assert_eq!(router2.kind, VpcRouterKind::Custom); // routers list should now have two custom and one system - let routers = - objects_list_page_authz::(client, &routers_url).await.items; + let routers = list_routers(client, &VPC_NAME).await; assert_eq!(routers.len(), 3); routers_eq(&routers[0], &router); routers_eq(&routers[1], &router2); @@ -175,11 +224,14 @@ async fn test_vpc_routers(cptestctx: &ControlPlaneTestContext) { .unwrap() .parsed_body() .unwrap(); - assert_eq!(error.message, "not found: vpc-router with name \"router1\""); + assert_eq!( + error.message, + format!("not found: vpc-router with name \"{router_name}\"") + ); let router_url = format!( "/v1/vpc-routers/new-name?project={}&vpc={}", - project_name, vpc_name + PROJECT_NAME, VPC_NAME ); // fetching by new name works @@ -191,14 +243,17 @@ async fn test_vpc_routers(cptestctx: &ControlPlaneTestContext) { .unwrap() .parsed_body() .unwrap(); + routers_eq(&update, &updated_router); assert_eq!(&updated_router.identity.description, "another description"); // fetching list should show updated one - let routers = - objects_list_page_authz::(client, &routers_url).await.items; + let routers = list_routers(client, &VPC_NAME).await; assert_eq!(routers.len(), 3); - routers_eq(&routers[0], &updated_router); + routers_eq( + &routers.iter().find(|v| v.name().as_str() == "new-name").unwrap(), + &updated_router, + ); // delete first router NexusRequest::object_delete(&client, &router_url) @@ -208,8 +263,7 @@ async fn test_vpc_routers(cptestctx: &ControlPlaneTestContext) { .unwrap(); // routers list should now have two again, one system and one custom - let routers = - objects_list_page_authz::(client, &routers_url).await.items; + let routers = list_routers(client, &VPC_NAME).await; assert_eq!(routers.len(), 2); routers_eq(&routers[0], &router2); @@ -245,14 +299,411 @@ async fn test_vpc_routers(cptestctx: &ControlPlaneTestContext) { // Creating a router with the same name in a different VPC is allowed let vpc2_name = "vpc2"; - let vpc2 = create_vpc(&client, project_name, vpc2_name).await; + let vpc2 = create_vpc(&client, PROJECT_NAME, vpc2_name).await; let router_same_name = - create_router(&client, project_name, vpc2_name, router2_name).await; + create_router(&client, PROJECT_NAME, vpc2_name, router2_name).await; assert_eq!(router_same_name.identity.name, router2_name); assert_eq!(router_same_name.vpc_id, vpc2.identity.id); } +#[nexus_test] +async fn test_vpc_routers_attach_to_subnet( + cptestctx: &ControlPlaneTestContext, +) { + let client = &cptestctx.external_client; + + // Create a project that we'll use for testing. + let _ = create_project(&client, PROJECT_NAME).await; + let _ = create_vpc(&client, PROJECT_NAME, VPC_NAME).await; + + let subnet_name = "default"; + + let subnets_url = + format!("/v1/vpc-subnets?project={}&vpc={}", PROJECT_NAME, VPC_NAME); + + // get routers should have only the system router created w/ the VPC + let routers = list_routers(client, VPC_NAME).await; + assert_eq!(routers.len(), 1); + assert_eq!(routers[0].kind, VpcRouterKind::System); + + // Create a custom router for later use. + let router_name = ROUTER_NAMES[0]; + let router = + create_router(&client, PROJECT_NAME, VPC_NAME, router_name).await; + assert_eq!(router.kind, VpcRouterKind::Custom); + + // Attaching a system router should fail. + let err = object_put_error( + client, + &format!( + "/v1/vpc-subnets/{subnet_name}?project={PROJECT_NAME}&vpc={VPC_NAME}" + ), + &VpcSubnetUpdate { + identity: IdentityMetadataUpdateParams { + name: None, + description: None, + }, + custom_router: Some(routers[0].identity.id.into()), + }, + StatusCode::BAD_REQUEST, + ) + .await; + assert_eq!(err.message, "cannot attach a system router to a VPC subnet"); + + // Attaching a new custom router should succeed. + let default_subnet = set_custom_router( + client, + "default", + VPC_NAME, + Some(router.identity.id.into()), + ) + .await; + assert_eq!(default_subnet.custom_router_id, Some(router.identity.id)); + + // Attaching a custom router to another subnet (same VPC) should succeed: + // ... at create time. + let subnet2_name = SUBNET_NAMES[0]; + let subnet2 = create_vpc_subnet( + &client, + &PROJECT_NAME, + &VPC_NAME, + &subnet2_name, + "192.168.0.0/24".parse().unwrap(), + None, + Some(router_name), + ) + .await; + assert_eq!(subnet2.custom_router_id, Some(router.identity.id)); + + // ... and via update. + let subnet3_name = SUBNET_NAMES[1]; + let _ = create_vpc_subnet( + &client, + &PROJECT_NAME, + &VPC_NAME, + &subnet3_name, + "192.168.1.0/24".parse().unwrap(), + None, + None, + ) + .await; + + let subnet3 = set_custom_router( + client, + subnet3_name, + VPC_NAME, + Some(router.identity.id.into()), + ) + .await; + assert_eq!(subnet3.custom_router_id, Some(router.identity.id)); + + // Attaching a custom router to another VPC's subnet should fail. + create_vpc(&client, PROJECT_NAME, "vpc1").await; + let err = object_put_error( + client, + &format!("/v1/vpc-subnets/default?project={PROJECT_NAME}&vpc=vpc1"), + &VpcSubnetUpdate { + identity: IdentityMetadataUpdateParams { + name: None, + description: None, + }, + custom_router: Some(router.identity.id.into()), + }, + StatusCode::BAD_REQUEST, + ) + .await; + assert_eq!(err.message, "router and subnet must belong to the same VPC"); + + // Detach (and double detach) should succeed without issue. + let subnet3 = set_custom_router(client, subnet3_name, VPC_NAME, None).await; + assert_eq!(subnet3.custom_router_id, None); + let subnet3 = set_custom_router(client, subnet3_name, VPC_NAME, None).await; + assert_eq!(subnet3.custom_router_id, None); + + // Assigning a new router should not require that we first detach the old one. + let router2_name = ROUTER_NAMES[1]; + let router2 = + create_router(&client, PROJECT_NAME, VPC_NAME, router2_name).await; + let subnet2 = set_custom_router( + client, + subnet2_name, + VPC_NAME, + Some(router2.identity.id.into()), + ) + .await; + assert_eq!(subnet2.custom_router_id, Some(router2.identity.id)); + + // Reset subnet2 back to our first router. + let subnet2 = set_custom_router( + client, + subnet2_name, + VPC_NAME, + Some(router.identity.id.into()), + ) + .await; + assert_eq!(subnet2.custom_router_id, Some(router.identity.id)); + + // Deleting a custom router should detach from remaining subnets. + object_delete( + &client, + &format!( + "/v1/vpc-routers/{router_name}?vpc={VPC_NAME}&project={PROJECT_NAME}", + ), + ) + .await; + + for subnet in + objects_list_page_authz::(client, &subnets_url).await.items + { + assert!(subnet.custom_router_id.is_none(), "{subnet:?}"); + } +} + +#[nexus_test] +async fn test_vpc_routers_custom_delivered_to_instance( + cptestctx: &ControlPlaneTestContext, +) { + let client = &cptestctx.external_client; + let apictx = &cptestctx.server.server_context(); + let nexus = &apictx.nexus; + let datastore = nexus.datastore(); + let opctx = + OpContext::for_tests(cptestctx.logctx.log.new(o!()), datastore.clone()); + + // Create some instances, one per subnet, and a default pool etc. + create_default_ip_pool(client).await; + create_project(client, PROJECT_NAME).await; + + let vpc = create_vpc(&client, PROJECT_NAME, VPC_NAME).await; + + let mut subnets = vec![]; + let mut instances = vec![]; + let mut instance_nics = HashMap::new(); + for (i, (subnet_name, instance_name)) in + SUBNET_NAMES.iter().zip(INSTANCE_NAMES.iter()).enumerate() + { + let subnet = create_vpc_subnet( + &client, + PROJECT_NAME, + VPC_NAME, + subnet_name, + format!("192.168.{i}.0/24").parse().unwrap(), + None, + None, + ) + .await; + + let instance = create_instance_with( + client, + PROJECT_NAME, + instance_name, + &InstanceNetworkInterfaceAttachment::Create(vec![ + InstanceNetworkInterfaceCreate { + identity: IdentityMetadataCreateParams { + name: format!("nic-{i}").parse().unwrap(), + description: "".into(), + }, + vpc_name: vpc.name().clone(), + subnet_name: subnet_name.parse().unwrap(), + ip: Some(format!("192.168.{i}.10").parse().unwrap()), + }, + ]), + vec![], + vec![], + true, + ) + .await; + instance_simulate( + nexus, + &InstanceUuid::from_untyped_uuid(instance.identity.id), + ) + .await; + + let (.., authz_instance) = LookupPath::new(&opctx, &datastore) + .instance_id(instance.identity.id) + .lookup_for(nexus_db_queries::authz::Action::Read) + .await + .unwrap(); + + let guest_nics = datastore + .derive_guest_network_interface_info(&opctx, &authz_instance) + .await + .unwrap(); + + instance_nics.insert(*instance_name, guest_nics); + subnets.push(subnet); + instances.push(instance); + } + + let sled_agent = &cptestctx.sled_agent.sled_agent; + + // Create some routers! + let mut routers = vec![]; + for router_name in ROUTER_NAMES { + let router = + create_router(&client, PROJECT_NAME, VPC_NAME, router_name).await; + + routers.push(router); + } + + let vni = instance_nics[INSTANCE_NAMES[0]][0].vni; + + // Installing a custom router onto a subnet with a live instance + // should install routes at that sled. We should only have one sled. + // First, assert the default state. + for subnet in &subnets { + let (_system, custom) = assert_sled_vpc_routes( + &sled_agent, + &opctx, + &datastore, + subnet.id(), + vni, + ) + .await; + + assert!(custom.is_empty()); + } + + // Push a distinct route into each router and attach to each subnet. + for i in 0..2 { + create_route( + &client, + PROJECT_NAME, + VPC_NAME, + ROUTER_NAMES[i], + "a-sharp-drop", + format!("ipnet:24{i}.0.0.0/8").parse().unwrap(), + "drop".parse().unwrap(), + ) + .await; + + set_custom_router( + &client, + SUBNET_NAMES[i], + VPC_NAME, + Some(NameOrId::Name(ROUTER_NAMES[i].parse().unwrap())), + ) + .await; + } + + // Re-verify, assert that new routes are resolved correctly. + // Vec<(System, Custom)>. + let mut last_routes = vec![]; + for subnet in &subnets { + last_routes.push( + assert_sled_vpc_routes( + &sled_agent, + &opctx, + &datastore, + subnet.id(), + vni, + ) + .await, + ); + } + + assert!(last_routes[0].1.contains(&ResolvedVpcRoute { + dest: "240.0.0.0/8".parse().unwrap(), + target: RouterTarget::Drop + })); + assert!(last_routes[1].1.contains(&ResolvedVpcRoute { + dest: "241.0.0.0/8".parse().unwrap(), + target: RouterTarget::Drop + })); + + // Adding a new route should propagate that out to sleds. + create_route( + &client, + PROJECT_NAME, + VPC_NAME, + ROUTER_NAMES[0], + "ncn-74", + "ipnet:2.0.7.0/24".parse().unwrap(), + format!("instance:{}", INSTANCE_NAMES[1]).parse().unwrap(), + ) + .await; + + let (new_system, new_custom) = assert_sled_vpc_routes( + &sled_agent, + &opctx, + &datastore, + subnets[0].id(), + vni, + ) + .await; + + assert_eq!(last_routes[0].0, new_system); + assert!(new_custom.contains(&ResolvedVpcRoute { + dest: "2.0.7.0/24".parse().unwrap(), + target: RouterTarget::Ip(instance_nics[INSTANCE_NAMES[1]][0].ip) + })); + + // Swapping router should change the installed routes at that sled. + set_custom_router( + &client, + SUBNET_NAMES[0], + VPC_NAME, + Some(NameOrId::Name(ROUTER_NAMES[1].parse().unwrap())), + ) + .await; + let (new_system, new_custom) = assert_sled_vpc_routes( + &sled_agent, + &opctx, + &datastore, + subnets[0].id(), + vni, + ) + .await; + assert_eq!(last_routes[0].0, new_system); + assert_eq!(last_routes[1].1, new_custom); + + // Unsetting a router should remove affected non-system routes. + set_custom_router(&client, SUBNET_NAMES[0], VPC_NAME, None).await; + let (new_system, new_custom) = assert_sled_vpc_routes( + &sled_agent, + &opctx, + &datastore, + subnets[0].id(), + vni, + ) + .await; + assert_eq!(last_routes[0].0, new_system); + assert!(new_custom.is_empty()); +} + +async fn set_custom_router( + client: &ClientTestContext, + subnet_name: &str, + vpc_name: &str, + custom_router: Option, +) -> VpcSubnet { + object_put( + client, + &format!( + "/v1/vpc-subnets/{subnet_name}?project={PROJECT_NAME}&vpc={vpc_name}" + ), + &VpcSubnetUpdate { + identity: IdentityMetadataUpdateParams { + name: None, + description: None, + }, + custom_router, + }, + ) + .await +} + +async fn list_routers( + client: &ClientTestContext, + vpc_name: &str, +) -> Vec { + let routers_url = + format!("/v1/vpc-routers?project={}&vpc={}", PROJECT_NAME, vpc_name); + let out = objects_list_page_authz::(client, &routers_url).await; + out.items +} + fn routers_eq(sn1: &VpcRouter, sn2: &VpcRouter) { identity_eq(&sn1.identity, &sn2.identity); assert_eq!(sn1.vpc_id, sn2.vpc_id); diff --git a/nexus/tests/integration_tests/vpc_subnets.rs b/nexus/tests/integration_tests/vpc_subnets.rs index 81e7156e8e..b12c43aecc 100644 --- a/nexus/tests/integration_tests/vpc_subnets.rs +++ b/nexus/tests/integration_tests/vpc_subnets.rs @@ -179,6 +179,7 @@ async fn test_vpc_subnets(cptestctx: &ControlPlaneTestContext) { }, ipv4_block, ipv6_block: Some(ipv6_block), + custom_router: None, }; let subnet: VpcSubnet = NexusRequest::objects_post(client, &subnets_url, &new_subnet) @@ -230,6 +231,7 @@ async fn test_vpc_subnets(cptestctx: &ControlPlaneTestContext) { }, ipv4_block, ipv6_block: Some(ipv6_block), + custom_router: None, }; let expected_error = format!( "IP address range '{}' conflicts with an existing subnet", @@ -257,6 +259,7 @@ async fn test_vpc_subnets(cptestctx: &ControlPlaneTestContext) { }, ipv4_block: other_ipv4_block, ipv6_block: other_ipv6_block, + custom_router: None, }; let error: dropshot::HttpErrorResponseBody = NexusRequest::new( RequestBuilder::new(client, Method::POST, &subnets_url) @@ -301,6 +304,7 @@ async fn test_vpc_subnets(cptestctx: &ControlPlaneTestContext) { }, ipv4_block, ipv6_block: None, + custom_router: None, }; let subnet2: VpcSubnet = NexusRequest::objects_post(client, &subnets_url, &new_subnet) @@ -329,6 +333,7 @@ async fn test_vpc_subnets(cptestctx: &ControlPlaneTestContext) { name: Some("new-name".parse().unwrap()), description: Some("another description".to_string()), }, + custom_router: None, }; NexusRequest::object_put(client, &subnet_url, Some(&update_params)) .authn_as(AuthnMode::PrivilegedUser) diff --git a/nexus/tests/output/nexus_tags.txt b/nexus/tests/output/nexus_tags.txt index a32fe5c4b9..35d8c32561 100644 --- a/nexus/tests/output/nexus_tags.txt +++ b/nexus/tests/output/nexus_tags.txt @@ -232,6 +232,16 @@ vpc_delete DELETE /v1/vpcs/{vpc} vpc_firewall_rules_update PUT /v1/vpc-firewall-rules vpc_firewall_rules_view GET /v1/vpc-firewall-rules vpc_list GET /v1/vpcs +vpc_router_create POST /v1/vpc-routers +vpc_router_delete DELETE /v1/vpc-routers/{router} +vpc_router_list GET /v1/vpc-routers +vpc_router_route_create POST /v1/vpc-router-routes +vpc_router_route_delete DELETE /v1/vpc-router-routes/{route} +vpc_router_route_list GET /v1/vpc-router-routes +vpc_router_route_update PUT /v1/vpc-router-routes/{route} +vpc_router_route_view GET /v1/vpc-router-routes/{route} +vpc_router_update PUT /v1/vpc-routers/{router} +vpc_router_view GET /v1/vpc-routers/{router} vpc_subnet_create POST /v1/vpc-subnets vpc_subnet_delete DELETE /v1/vpc-subnets/{subnet} vpc_subnet_list GET /v1/vpc-subnets diff --git a/nexus/tests/output/unexpected-authz-endpoints.txt b/nexus/tests/output/unexpected-authz-endpoints.txt index e8bb60224a..cd05058762 100644 --- a/nexus/tests/output/unexpected-authz-endpoints.txt +++ b/nexus/tests/output/unexpected-authz-endpoints.txt @@ -1,13 +1,3 @@ API endpoints tested by unauthorized.rs but not found in the OpenAPI spec: -GET "/v1/vpc-routers?project=demo-project&vpc=demo-vpc" -POST "/v1/vpc-routers?project=demo-project&vpc=demo-vpc" -GET "/v1/vpc-routers/demo-vpc-router?project=demo-project&vpc=demo-vpc" -PUT "/v1/vpc-routers/demo-vpc-router?project=demo-project&vpc=demo-vpc" -DELETE "/v1/vpc-routers/demo-vpc-router?project=demo-project&vpc=demo-vpc" -GET "/v1/vpc-router-routes?project=demo-project&vpc=demo-vpc&router=demo-vpc-router" -POST "/v1/vpc-router-routes?project=demo-project&vpc=demo-vpc&router=demo-vpc-router" -GET "/v1/vpc-router-routes/demo-router-route?project=demo-project&vpc=demo-vpc&router=demo-vpc-router" -PUT "/v1/vpc-router-routes/demo-router-route?project=demo-project&vpc=demo-vpc&router=demo-vpc-router" -DELETE "/v1/vpc-router-routes/demo-router-route?project=demo-project&vpc=demo-vpc&router=demo-vpc-router" PUT "/v1/system/update/repository?file_name=demo-repo.zip" GET "/v1/system/update/repository/1.0.0" diff --git a/nexus/types/src/external_api/params.rs b/nexus/types/src/external_api/params.rs index ac169a35ee..6d92f2b1ba 100644 --- a/nexus/types/src/external_api/params.rs +++ b/nexus/types/src/external_api/params.rs @@ -808,6 +808,11 @@ pub struct InstanceNetworkInterfaceUpdate { // for the instance, though not the name. #[serde(default)] pub primary: bool, + + /// A set of additional networks that this interface may send and + /// receive traffic on. + #[serde(default)] + pub transit_ips: Vec, } // CERTIFICATES @@ -1220,6 +1225,14 @@ pub struct VpcSubnetCreate { /// be assigned if one is not provided. It must not overlap with any /// existing subnet in the VPC. pub ipv6_block: Option, + + /// An optional router, used to direct packets sent from hosts in this subnet + /// to any destination address. + /// + /// Custom routers apply in addition to the VPC-wide *system* router, and have + /// higher priority than the system router for an otherwise + /// equal-prefix-length match. + pub custom_router: Option, } /// Updateable properties of a `VpcSubnet` @@ -1227,6 +1240,10 @@ pub struct VpcSubnetCreate { pub struct VpcSubnetUpdate { #[serde(flatten)] pub identity: IdentityMetadataUpdateParams, + + /// An optional router, used to direct packets sent from hosts in this subnet + /// to any destination address. + pub custom_router: Option, } // VPC ROUTERS @@ -1252,7 +1269,9 @@ pub struct VpcRouterUpdate { pub struct RouterRouteCreate { #[serde(flatten)] pub identity: IdentityMetadataCreateParams, + /// The location that matched packets should be forwarded to. pub target: RouteTarget, + /// Selects which traffic this routing rule will apply to. pub destination: RouteDestination, } @@ -1261,7 +1280,9 @@ pub struct RouterRouteCreate { pub struct RouterRouteUpdate { #[serde(flatten)] pub identity: IdentityMetadataUpdateParams, + /// The location that matched packets should be forwarded to. pub target: RouteTarget, + /// Selects which traffic this routing rule will apply to. pub destination: RouteDestination, } diff --git a/openapi/nexus-internal.json b/openapi/nexus-internal.json index d479e2e20c..9d495a726c 100644 --- a/openapi/nexus-internal.json +++ b/openapi/nexus-internal.json @@ -3564,6 +3564,13 @@ "subnet": { "$ref": "#/components/schemas/IpNet" }, + "transit_ips": { + "default": [], + "type": "array", + "items": { + "$ref": "#/components/schemas/IpNet" + } + }, "vni": { "$ref": "#/components/schemas/Vni" } diff --git a/openapi/nexus.json b/openapi/nexus.json index 598f0453ff..8521366b8b 100644 --- a/openapi/nexus.json +++ b/openapi/nexus.json @@ -8346,13 +8346,14 @@ } } }, - "/v1/vpc-subnets": { + "/v1/vpc-router-routes": { "get": { "tags": [ "vpcs" ], - "summary": "List subnets", - "operationId": "vpc_subnet_list", + "summary": "List routes", + "description": "List the routes associated with a router in a particular VPC.", + "operationId": "vpc_router_route_list", "parameters": [ { "in": "query", @@ -8382,6 +8383,14 @@ "$ref": "#/components/schemas/NameOrId" } }, + { + "in": "query", + "name": "router", + "description": "Name or ID of the router", + "schema": { + "$ref": "#/components/schemas/NameOrId" + } + }, { "in": "query", "name": "sort_by", @@ -8392,7 +8401,7 @@ { "in": "query", "name": "vpc", - "description": "Name or ID of the VPC", + "description": "Name or ID of the VPC, only required if `subnet` is provided as a `Name`", "schema": { "$ref": "#/components/schemas/NameOrId" } @@ -8404,7 +8413,7 @@ "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/VpcSubnetResultsPage" + "$ref": "#/components/schemas/RouterRouteResultsPage" } } } @@ -8418,7 +8427,7 @@ }, "x-dropshot-pagination": { "required": [ - "vpc" + "router" ] } }, @@ -8426,8 +8435,8 @@ "tags": [ "vpcs" ], - "summary": "Create subnet", - "operationId": "vpc_subnet_create", + "summary": "Create route", + "operationId": "vpc_router_route_create", "parameters": [ { "in": "query", @@ -8439,19 +8448,27 @@ }, { "in": "query", - "name": "vpc", - "description": "Name or ID of the VPC", + "name": "router", + "description": "Name or ID of the router", "required": true, "schema": { "$ref": "#/components/schemas/NameOrId" } + }, + { + "in": "query", + "name": "vpc", + "description": "Name or ID of the VPC, only required if `subnet` is provided as a `Name`", + "schema": { + "$ref": "#/components/schemas/NameOrId" + } } ], "requestBody": { "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/VpcSubnetCreate" + "$ref": "#/components/schemas/RouterRouteCreate" } } }, @@ -8463,7 +8480,7 @@ "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/VpcSubnet" + "$ref": "#/components/schemas/RouterRoute" } } } @@ -8477,18 +8494,18 @@ } } }, - "/v1/vpc-subnets/{subnet}": { + "/v1/vpc-router-routes/{route}": { "get": { "tags": [ "vpcs" ], - "summary": "Fetch subnet", - "operationId": "vpc_subnet_view", + "summary": "Fetch route", + "operationId": "vpc_router_route_view", "parameters": [ { "in": "path", - "name": "subnet", - "description": "Name or ID of the subnet", + "name": "route", + "description": "Name or ID of the route", "required": true, "schema": { "$ref": "#/components/schemas/NameOrId" @@ -8502,10 +8519,19 @@ "$ref": "#/components/schemas/NameOrId" } }, + { + "in": "query", + "name": "router", + "description": "Name or ID of the router", + "required": true, + "schema": { + "$ref": "#/components/schemas/NameOrId" + } + }, { "in": "query", "name": "vpc", - "description": "Name or ID of the VPC", + "description": "Name or ID of the VPC, only required if `subnet` is provided as a `Name`", "schema": { "$ref": "#/components/schemas/NameOrId" } @@ -8517,7 +8543,7 @@ "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/VpcSubnet" + "$ref": "#/components/schemas/RouterRoute" } } } @@ -8534,13 +8560,13 @@ "tags": [ "vpcs" ], - "summary": "Update subnet", - "operationId": "vpc_subnet_update", + "summary": "Update route", + "operationId": "vpc_router_route_update", "parameters": [ { "in": "path", - "name": "subnet", - "description": "Name or ID of the subnet", + "name": "route", + "description": "Name or ID of the route", "required": true, "schema": { "$ref": "#/components/schemas/NameOrId" @@ -8554,10 +8580,18 @@ "$ref": "#/components/schemas/NameOrId" } }, + { + "in": "query", + "name": "router", + "description": "Name or ID of the router", + "schema": { + "$ref": "#/components/schemas/NameOrId" + } + }, { "in": "query", "name": "vpc", - "description": "Name or ID of the VPC", + "description": "Name or ID of the VPC, only required if `subnet` is provided as a `Name`", "schema": { "$ref": "#/components/schemas/NameOrId" } @@ -8567,7 +8601,7 @@ "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/VpcSubnetUpdate" + "$ref": "#/components/schemas/RouterRouteUpdate" } } }, @@ -8579,7 +8613,7 @@ "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/VpcSubnet" + "$ref": "#/components/schemas/RouterRoute" } } } @@ -8596,13 +8630,13 @@ "tags": [ "vpcs" ], - "summary": "Delete subnet", - "operationId": "vpc_subnet_delete", + "summary": "Delete route", + "operationId": "vpc_router_route_delete", "parameters": [ { "in": "path", - "name": "subnet", - "description": "Name or ID of the subnet", + "name": "route", + "description": "Name or ID of the route", "required": true, "schema": { "$ref": "#/components/schemas/NameOrId" @@ -8616,10 +8650,18 @@ "$ref": "#/components/schemas/NameOrId" } }, + { + "in": "query", + "name": "router", + "description": "Name or ID of the router", + "schema": { + "$ref": "#/components/schemas/NameOrId" + } + }, { "in": "query", "name": "vpc", - "description": "Name or ID of the VPC", + "description": "Name or ID of the VPC, only required if `subnet` is provided as a `Name`", "schema": { "$ref": "#/components/schemas/NameOrId" } @@ -8638,23 +8680,14 @@ } } }, - "/v1/vpc-subnets/{subnet}/network-interfaces": { + "/v1/vpc-routers": { "get": { "tags": [ "vpcs" ], - "summary": "List network interfaces", - "operationId": "vpc_subnet_list_network_interfaces", + "summary": "List routers", + "operationId": "vpc_router_list", "parameters": [ - { - "in": "path", - "name": "subnet", - "description": "Name or ID of the subnet", - "required": true, - "schema": { - "$ref": "#/components/schemas/NameOrId" - } - }, { "in": "query", "name": "limit", @@ -8705,7 +8738,7 @@ "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/InstanceNetworkInterfaceResultsPage" + "$ref": "#/components/schemas/VpcRouterResultsPage" } } } @@ -8718,89 +8751,30 @@ } }, "x-dropshot-pagination": { - "required": [] + "required": [ + "vpc" + ] } - } - }, - "/v1/vpcs": { - "get": { + }, + "post": { "tags": [ "vpcs" ], - "summary": "List VPCs", - "operationId": "vpc_list", + "summary": "Create VPC router", + "operationId": "vpc_router_create", "parameters": [ - { - "in": "query", - "name": "limit", - "description": "Maximum number of items returned by a single call", - "schema": { - "nullable": true, - "type": "integer", - "format": "uint32", - "minimum": 1 - } - }, - { - "in": "query", - "name": "page_token", - "description": "Token returned by previous call to retrieve the subsequent page", - "schema": { - "nullable": true, - "type": "string" - } - }, { "in": "query", "name": "project", - "description": "Name or ID of the project", + "description": "Name or ID of the project, only required if `vpc` is provided as a `Name`", "schema": { "$ref": "#/components/schemas/NameOrId" } }, { "in": "query", - "name": "sort_by", - "schema": { - "$ref": "#/components/schemas/NameOrIdSortMode" - } - } - ], - "responses": { - "200": { - "description": "successful operation", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/VpcResultsPage" - } - } - } - }, - "4XX": { - "$ref": "#/components/responses/Error" - }, - "5XX": { - "$ref": "#/components/responses/Error" - } - }, - "x-dropshot-pagination": { - "required": [ - "project" - ] - } - }, - "post": { - "tags": [ - "vpcs" - ], - "summary": "Create VPC", - "operationId": "vpc_create", - "parameters": [ - { - "in": "query", - "name": "project", - "description": "Name or ID of the project", + "name": "vpc", + "description": "Name or ID of the VPC", "required": true, "schema": { "$ref": "#/components/schemas/NameOrId" @@ -8811,7 +8785,7 @@ "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/VpcCreate" + "$ref": "#/components/schemas/VpcRouterCreate" } } }, @@ -8823,7 +8797,7 @@ "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/Vpc" + "$ref": "#/components/schemas/VpcRouter" } } } @@ -8837,18 +8811,18 @@ } } }, - "/v1/vpcs/{vpc}": { + "/v1/vpc-routers/{router}": { "get": { "tags": [ "vpcs" ], - "summary": "Fetch VPC", - "operationId": "vpc_view", + "summary": "Fetch router", + "operationId": "vpc_router_view", "parameters": [ { "in": "path", - "name": "vpc", - "description": "Name or ID of the VPC", + "name": "router", + "description": "Name or ID of the router", "required": true, "schema": { "$ref": "#/components/schemas/NameOrId" @@ -8857,7 +8831,15 @@ { "in": "query", "name": "project", - "description": "Name or ID of the project", + "description": "Name or ID of the project, only required if `vpc` is provided as a `Name`", + "schema": { + "$ref": "#/components/schemas/NameOrId" + } + }, + { + "in": "query", + "name": "vpc", + "description": "Name or ID of the VPC", "schema": { "$ref": "#/components/schemas/NameOrId" } @@ -8869,7 +8851,7 @@ "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/Vpc" + "$ref": "#/components/schemas/VpcRouter" } } } @@ -8886,13 +8868,13 @@ "tags": [ "vpcs" ], - "summary": "Update a VPC", - "operationId": "vpc_update", + "summary": "Update router", + "operationId": "vpc_router_update", "parameters": [ { "in": "path", - "name": "vpc", - "description": "Name or ID of the VPC", + "name": "router", + "description": "Name or ID of the router", "required": true, "schema": { "$ref": "#/components/schemas/NameOrId" @@ -8901,7 +8883,15 @@ { "in": "query", "name": "project", - "description": "Name or ID of the project", + "description": "Name or ID of the project, only required if `vpc` is provided as a `Name`", + "schema": { + "$ref": "#/components/schemas/NameOrId" + } + }, + { + "in": "query", + "name": "vpc", + "description": "Name or ID of the VPC", "schema": { "$ref": "#/components/schemas/NameOrId" } @@ -8911,7 +8901,7 @@ "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/VpcUpdate" + "$ref": "#/components/schemas/VpcRouterUpdate" } } }, @@ -8923,7 +8913,7 @@ "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/Vpc" + "$ref": "#/components/schemas/VpcRouter" } } } @@ -8940,13 +8930,13 @@ "tags": [ "vpcs" ], - "summary": "Delete VPC", - "operationId": "vpc_delete", + "summary": "Delete router", + "operationId": "vpc_router_delete", "parameters": [ { "in": "path", - "name": "vpc", - "description": "Name or ID of the VPC", + "name": "router", + "description": "Name or ID of the router", "required": true, "schema": { "$ref": "#/components/schemas/NameOrId" @@ -8955,7 +8945,15 @@ { "in": "query", "name": "project", - "description": "Name or ID of the project", + "description": "Name or ID of the project, only required if `vpc` is provided as a `Name`", + "schema": { + "$ref": "#/components/schemas/NameOrId" + } + }, + { + "in": "query", + "name": "vpc", + "description": "Name or ID of the VPC", "schema": { "$ref": "#/components/schemas/NameOrId" } @@ -8973,36 +8971,664 @@ } } } - } - }, - "components": { - "schemas": { - "Address": { - "description": "An address tied to an address lot.", - "type": "object", - "properties": { - "address": { - "description": "The address and prefix length of this address.", - "allOf": [ - { - "$ref": "#/components/schemas/IpNet" - } - ] + }, + "/v1/vpc-subnets": { + "get": { + "tags": [ + "vpcs" + ], + "summary": "List subnets", + "operationId": "vpc_subnet_list", + "parameters": [ + { + "in": "query", + "name": "limit", + "description": "Maximum number of items returned by a single call", + "schema": { + "nullable": true, + "type": "integer", + "format": "uint32", + "minimum": 1 + } }, - "address_lot": { - "description": "The address lot this address is drawn from.", - "allOf": [ - { - "$ref": "#/components/schemas/NameOrId" - } - ] + { + "in": "query", + "name": "page_token", + "description": "Token returned by previous call to retrieve the subsequent page", + "schema": { + "nullable": true, + "type": "string" + } }, - "vlan_id": { - "nullable": true, - "description": "Optional VLAN ID for this address", - "type": "integer", - "format": "uint16", - "minimum": 0 + { + "in": "query", + "name": "project", + "description": "Name or ID of the project, only required if `vpc` is provided as a `Name`", + "schema": { + "$ref": "#/components/schemas/NameOrId" + } + }, + { + "in": "query", + "name": "sort_by", + "schema": { + "$ref": "#/components/schemas/NameOrIdSortMode" + } + }, + { + "in": "query", + "name": "vpc", + "description": "Name or ID of the VPC", + "schema": { + "$ref": "#/components/schemas/NameOrId" + } + } + ], + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/VpcSubnetResultsPage" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + }, + "x-dropshot-pagination": { + "required": [ + "vpc" + ] + } + }, + "post": { + "tags": [ + "vpcs" + ], + "summary": "Create subnet", + "operationId": "vpc_subnet_create", + "parameters": [ + { + "in": "query", + "name": "project", + "description": "Name or ID of the project, only required if `vpc` is provided as a `Name`", + "schema": { + "$ref": "#/components/schemas/NameOrId" + } + }, + { + "in": "query", + "name": "vpc", + "description": "Name or ID of the VPC", + "required": true, + "schema": { + "$ref": "#/components/schemas/NameOrId" + } + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/VpcSubnetCreate" + } + } + }, + "required": true + }, + "responses": { + "201": { + "description": "successful creation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/VpcSubnet" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, + "/v1/vpc-subnets/{subnet}": { + "get": { + "tags": [ + "vpcs" + ], + "summary": "Fetch subnet", + "operationId": "vpc_subnet_view", + "parameters": [ + { + "in": "path", + "name": "subnet", + "description": "Name or ID of the subnet", + "required": true, + "schema": { + "$ref": "#/components/schemas/NameOrId" + } + }, + { + "in": "query", + "name": "project", + "description": "Name or ID of the project, only required if `vpc` is provided as a `Name`", + "schema": { + "$ref": "#/components/schemas/NameOrId" + } + }, + { + "in": "query", + "name": "vpc", + "description": "Name or ID of the VPC", + "schema": { + "$ref": "#/components/schemas/NameOrId" + } + } + ], + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/VpcSubnet" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + }, + "put": { + "tags": [ + "vpcs" + ], + "summary": "Update subnet", + "operationId": "vpc_subnet_update", + "parameters": [ + { + "in": "path", + "name": "subnet", + "description": "Name or ID of the subnet", + "required": true, + "schema": { + "$ref": "#/components/schemas/NameOrId" + } + }, + { + "in": "query", + "name": "project", + "description": "Name or ID of the project, only required if `vpc` is provided as a `Name`", + "schema": { + "$ref": "#/components/schemas/NameOrId" + } + }, + { + "in": "query", + "name": "vpc", + "description": "Name or ID of the VPC", + "schema": { + "$ref": "#/components/schemas/NameOrId" + } + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/VpcSubnetUpdate" + } + } + }, + "required": true + }, + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/VpcSubnet" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + }, + "delete": { + "tags": [ + "vpcs" + ], + "summary": "Delete subnet", + "operationId": "vpc_subnet_delete", + "parameters": [ + { + "in": "path", + "name": "subnet", + "description": "Name or ID of the subnet", + "required": true, + "schema": { + "$ref": "#/components/schemas/NameOrId" + } + }, + { + "in": "query", + "name": "project", + "description": "Name or ID of the project, only required if `vpc` is provided as a `Name`", + "schema": { + "$ref": "#/components/schemas/NameOrId" + } + }, + { + "in": "query", + "name": "vpc", + "description": "Name or ID of the VPC", + "schema": { + "$ref": "#/components/schemas/NameOrId" + } + } + ], + "responses": { + "204": { + "description": "successful deletion" + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, + "/v1/vpc-subnets/{subnet}/network-interfaces": { + "get": { + "tags": [ + "vpcs" + ], + "summary": "List network interfaces", + "operationId": "vpc_subnet_list_network_interfaces", + "parameters": [ + { + "in": "path", + "name": "subnet", + "description": "Name or ID of the subnet", + "required": true, + "schema": { + "$ref": "#/components/schemas/NameOrId" + } + }, + { + "in": "query", + "name": "limit", + "description": "Maximum number of items returned by a single call", + "schema": { + "nullable": true, + "type": "integer", + "format": "uint32", + "minimum": 1 + } + }, + { + "in": "query", + "name": "page_token", + "description": "Token returned by previous call to retrieve the subsequent page", + "schema": { + "nullable": true, + "type": "string" + } + }, + { + "in": "query", + "name": "project", + "description": "Name or ID of the project, only required if `vpc` is provided as a `Name`", + "schema": { + "$ref": "#/components/schemas/NameOrId" + } + }, + { + "in": "query", + "name": "sort_by", + "schema": { + "$ref": "#/components/schemas/NameOrIdSortMode" + } + }, + { + "in": "query", + "name": "vpc", + "description": "Name or ID of the VPC", + "schema": { + "$ref": "#/components/schemas/NameOrId" + } + } + ], + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/InstanceNetworkInterfaceResultsPage" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + }, + "x-dropshot-pagination": { + "required": [] + } + } + }, + "/v1/vpcs": { + "get": { + "tags": [ + "vpcs" + ], + "summary": "List VPCs", + "operationId": "vpc_list", + "parameters": [ + { + "in": "query", + "name": "limit", + "description": "Maximum number of items returned by a single call", + "schema": { + "nullable": true, + "type": "integer", + "format": "uint32", + "minimum": 1 + } + }, + { + "in": "query", + "name": "page_token", + "description": "Token returned by previous call to retrieve the subsequent page", + "schema": { + "nullable": true, + "type": "string" + } + }, + { + "in": "query", + "name": "project", + "description": "Name or ID of the project", + "schema": { + "$ref": "#/components/schemas/NameOrId" + } + }, + { + "in": "query", + "name": "sort_by", + "schema": { + "$ref": "#/components/schemas/NameOrIdSortMode" + } + } + ], + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/VpcResultsPage" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + }, + "x-dropshot-pagination": { + "required": [ + "project" + ] + } + }, + "post": { + "tags": [ + "vpcs" + ], + "summary": "Create VPC", + "operationId": "vpc_create", + "parameters": [ + { + "in": "query", + "name": "project", + "description": "Name or ID of the project", + "required": true, + "schema": { + "$ref": "#/components/schemas/NameOrId" + } + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/VpcCreate" + } + } + }, + "required": true + }, + "responses": { + "201": { + "description": "successful creation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Vpc" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, + "/v1/vpcs/{vpc}": { + "get": { + "tags": [ + "vpcs" + ], + "summary": "Fetch VPC", + "operationId": "vpc_view", + "parameters": [ + { + "in": "path", + "name": "vpc", + "description": "Name or ID of the VPC", + "required": true, + "schema": { + "$ref": "#/components/schemas/NameOrId" + } + }, + { + "in": "query", + "name": "project", + "description": "Name or ID of the project", + "schema": { + "$ref": "#/components/schemas/NameOrId" + } + } + ], + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Vpc" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + }, + "put": { + "tags": [ + "vpcs" + ], + "summary": "Update a VPC", + "operationId": "vpc_update", + "parameters": [ + { + "in": "path", + "name": "vpc", + "description": "Name or ID of the VPC", + "required": true, + "schema": { + "$ref": "#/components/schemas/NameOrId" + } + }, + { + "in": "query", + "name": "project", + "description": "Name or ID of the project", + "schema": { + "$ref": "#/components/schemas/NameOrId" + } + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/VpcUpdate" + } + } + }, + "required": true + }, + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Vpc" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + }, + "delete": { + "tags": [ + "vpcs" + ], + "summary": "Delete VPC", + "operationId": "vpc_delete", + "parameters": [ + { + "in": "path", + "name": "vpc", + "description": "Name or ID of the VPC", + "required": true, + "schema": { + "$ref": "#/components/schemas/NameOrId" + } + }, + { + "in": "query", + "name": "project", + "description": "Name or ID of the project", + "schema": { + "$ref": "#/components/schemas/NameOrId" + } + } + ], + "responses": { + "204": { + "description": "successful deletion" + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + } + }, + "components": { + "schemas": { + "Address": { + "description": "An address tied to an address lot.", + "type": "object", + "properties": { + "address": { + "description": "The address and prefix length of this address.", + "allOf": [ + { + "$ref": "#/components/schemas/IpNet" + } + ] + }, + "address_lot": { + "description": "The address lot this address is drawn from.", + "allOf": [ + { + "$ref": "#/components/schemas/NameOrId" + } + ] + }, + "vlan_id": { + "nullable": true, + "description": "Optional VLAN ID for this address", + "type": "integer", + "format": "uint16", + "minimum": 0 } }, "required": [ @@ -14713,6 +15339,14 @@ "type": "string", "format": "date-time" }, + "transit_ips": { + "description": "A set of additional networks that this interface may send and receive traffic on.", + "default": [], + "type": "array", + "items": { + "$ref": "#/components/schemas/IpNet" + } + }, "vpc_id": { "description": "The VPC to which the interface belongs.", "type": "string", @@ -14871,6 +15505,14 @@ "description": "Make a secondary interface the instance's primary interface.\n\nIf applied to a secondary interface, that interface will become the primary on the next reboot of the instance. Note that this may have implications for routing between instances, as the new primary interface will be on a distinct subnet from the previous primary interface.\n\nNote that this can only be used to select a new primary interface for an instance. Requests to change the primary interface into a secondary will return an error.", "default": false, "type": "boolean" + }, + "transit_ips": { + "description": "A set of additional networks that this interface may send and receive traffic on.", + "default": [], + "type": "array", + "items": { + "$ref": "#/components/schemas/IpNet" + } } } }, @@ -15845,6 +16487,13 @@ "subnet": { "$ref": "#/components/schemas/IpNet" }, + "transit_ips": { + "default": [], + "type": "array", + "items": { + "$ref": "#/components/schemas/IpNet" + } + }, "vni": { "$ref": "#/components/schemas/Vni" } @@ -16382,110 +17031,481 @@ "description": "Roles directly assigned on this resource", "type": "array", "items": { - "$ref": "#/components/schemas/ProjectRoleRoleAssignment" + "$ref": "#/components/schemas/ProjectRoleRoleAssignment" + } + } + }, + "required": [ + "role_assignments" + ] + }, + "ProjectRoleRoleAssignment": { + "description": "Describes the assignment of a particular role on a particular resource to a particular identity (user, group, etc.)\n\nThe resource is not part of this structure. Rather, `RoleAssignment`s are put into a `Policy` and that Policy is applied to a particular resource.", + "type": "object", + "properties": { + "identity_id": { + "type": "string", + "format": "uuid" + }, + "identity_type": { + "$ref": "#/components/schemas/IdentityType" + }, + "role_name": { + "$ref": "#/components/schemas/ProjectRole" + } + }, + "required": [ + "identity_id", + "identity_type", + "role_name" + ] + }, + "ProjectUpdate": { + "description": "Updateable properties of a `Project`", + "type": "object", + "properties": { + "description": { + "nullable": true, + "type": "string" + }, + "name": { + "nullable": true, + "allOf": [ + { + "$ref": "#/components/schemas/Name" + } + ] + } + } + }, + "Quantile": { + "description": "Structure for estimating the p-quantile of a population.\n\nThis is based on the P² algorithm for estimating quantiles using constant space.\n\nThe algorithm consists of maintaining five markers: the minimum, the p/2-, p-, and (1 + p)/2 quantiles, and the maximum.", + "type": "object", + "properties": { + "desired_marker_positions": { + "description": "The desired marker positions.", + "type": "array", + "items": { + "type": "number", + "format": "double" + }, + "minItems": 5, + "maxItems": 5 + }, + "marker_heights": { + "description": "The heights of the markers.", + "type": "array", + "items": { + "type": "number", + "format": "double" + }, + "minItems": 5, + "maxItems": 5 + }, + "marker_positions": { + "description": "The positions of the markers.\n\nWe track sample size in the 5th position, as useful observations won't start until we've filled the heights at the 6th sample anyway This does deviate from the paper, but it's a more useful representation that works according to the paper's algorithm.", + "type": "array", + "items": { + "type": "integer", + "format": "uint64", + "minimum": 0 + }, + "minItems": 5, + "maxItems": 5 + }, + "p": { + "description": "The p value for the quantile.", + "type": "number", + "format": "double" + } + }, + "required": [ + "desired_marker_positions", + "marker_heights", + "marker_positions", + "p" + ] + }, + "Rack": { + "description": "View of an Rack", + "type": "object", + "properties": { + "id": { + "description": "unique, immutable, system-controlled identifier for each resource", + "type": "string", + "format": "uuid" + }, + "time_created": { + "description": "timestamp when this resource was created", + "type": "string", + "format": "date-time" + }, + "time_modified": { + "description": "timestamp when this resource was last modified", + "type": "string", + "format": "date-time" + } + }, + "required": [ + "id", + "time_created", + "time_modified" + ] + }, + "RackResultsPage": { + "description": "A single page of results", + "type": "object", + "properties": { + "items": { + "description": "list of items on this page of results", + "type": "array", + "items": { + "$ref": "#/components/schemas/Rack" + } + }, + "next_page": { + "nullable": true, + "description": "token used to fetch the next page of results (if any)", + "type": "string" + } + }, + "required": [ + "items" + ] + }, + "Role": { + "description": "View of a Role", + "type": "object", + "properties": { + "description": { + "type": "string" + }, + "name": { + "$ref": "#/components/schemas/RoleName" + } + }, + "required": [ + "description", + "name" + ] + }, + "RoleName": { + "title": "A name for a built-in role", + "description": "Role names consist of two string components separated by dot (\".\").", + "type": "string", + "pattern": "[a-z-]+\\.[a-z-]+", + "maxLength": 63 + }, + "RoleResultsPage": { + "description": "A single page of results", + "type": "object", + "properties": { + "items": { + "description": "list of items on this page of results", + "type": "array", + "items": { + "$ref": "#/components/schemas/Role" + } + }, + "next_page": { + "nullable": true, + "description": "token used to fetch the next page of results (if any)", + "type": "string" + } + }, + "required": [ + "items" + ] + }, + "Route": { + "description": "A route to a destination network through a gateway address.", + "type": "object", + "properties": { + "dst": { + "description": "The route destination.", + "allOf": [ + { + "$ref": "#/components/schemas/IpNet" + } + ] + }, + "gw": { + "description": "The route gateway.", + "type": "string", + "format": "ip" + }, + "vid": { + "nullable": true, + "description": "VLAN id the gateway is reachable over.", + "type": "integer", + "format": "uint16", + "minimum": 0 + } + }, + "required": [ + "dst", + "gw" + ] + }, + "RouteConfig": { + "description": "Route configuration data associated with a switch port configuration.", + "type": "object", + "properties": { + "routes": { + "description": "The set of routes assigned to a switch port.", + "type": "array", + "items": { + "$ref": "#/components/schemas/Route" } } }, "required": [ - "role_assignments" + "routes" ] }, - "ProjectRoleRoleAssignment": { - "description": "Describes the assignment of a particular role on a particular resource to a particular identity (user, group, etc.)\n\nThe resource is not part of this structure. Rather, `RoleAssignment`s are put into a `Policy` and that Policy is applied to a particular resource.", - "type": "object", - "properties": { - "identity_id": { - "type": "string", - "format": "uuid" + "RouteDestination": { + "description": "A `RouteDestination` is used to match traffic with a routing rule, on the destination of that traffic.\n\nWhen traffic is to be sent to a destination that is within a given `RouteDestination`, the corresponding `RouterRoute` applies, and traffic will be forward to the `RouteTarget` for that rule.", + "oneOf": [ + { + "description": "Route applies to traffic destined for a specific IP address", + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "ip" + ] + }, + "value": { + "type": "string", + "format": "ip" + } + }, + "required": [ + "type", + "value" + ] }, - "identity_type": { - "$ref": "#/components/schemas/IdentityType" + { + "description": "Route applies to traffic destined for a specific IP subnet", + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "ip_net" + ] + }, + "value": { + "$ref": "#/components/schemas/IpNet" + } + }, + "required": [ + "type", + "value" + ] }, - "role_name": { - "$ref": "#/components/schemas/ProjectRole" + { + "description": "Route applies to traffic destined for the given VPC.", + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "vpc" + ] + }, + "value": { + "$ref": "#/components/schemas/Name" + } + }, + "required": [ + "type", + "value" + ] + }, + { + "description": "Route applies to traffic", + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "subnet" + ] + }, + "value": { + "$ref": "#/components/schemas/Name" + } + }, + "required": [ + "type", + "value" + ] } - }, - "required": [ - "identity_id", - "identity_type", - "role_name" ] }, - "ProjectUpdate": { - "description": "Updateable properties of a `Project`", - "type": "object", - "properties": { - "description": { - "nullable": true, - "type": "string" + "RouteTarget": { + "description": "A `RouteTarget` describes the possible locations that traffic matching a route destination can be sent.", + "oneOf": [ + { + "description": "Forward traffic to a particular IP address.", + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "ip" + ] + }, + "value": { + "type": "string", + "format": "ip" + } + }, + "required": [ + "type", + "value" + ] }, - "name": { - "nullable": true, - "allOf": [ - { + { + "description": "Forward traffic to a VPC", + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "vpc" + ] + }, + "value": { "$ref": "#/components/schemas/Name" } + }, + "required": [ + "type", + "value" ] - } - } - }, - "Quantile": { - "description": "Structure for estimating the p-quantile of a population.\n\nThis is based on the P² algorithm for estimating quantiles using constant space.\n\nThe algorithm consists of maintaining five markers: the minimum, the p/2-, p-, and (1 + p)/2 quantiles, and the maximum.", - "type": "object", - "properties": { - "desired_marker_positions": { - "description": "The desired marker positions.", - "type": "array", - "items": { - "type": "number", - "format": "double" + }, + { + "description": "Forward traffic to a VPC Subnet", + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "subnet" + ] + }, + "value": { + "$ref": "#/components/schemas/Name" + } }, - "minItems": 5, - "maxItems": 5 + "required": [ + "type", + "value" + ] }, - "marker_heights": { - "description": "The heights of the markers.", - "type": "array", - "items": { - "type": "number", - "format": "double" + { + "description": "Forward traffic to a specific instance", + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "instance" + ] + }, + "value": { + "$ref": "#/components/schemas/Name" + } }, - "minItems": 5, - "maxItems": 5 + "required": [ + "type", + "value" + ] }, - "marker_positions": { - "description": "The positions of the markers.\n\nWe track sample size in the 5th position, as useful observations won't start until we've filled the heights at the 6th sample anyway This does deviate from the paper, but it's a more useful representation that works according to the paper's algorithm.", - "type": "array", - "items": { - "type": "integer", - "format": "uint64", - "minimum": 0 + { + "description": "Forward traffic to an internet gateway", + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "internet_gateway" + ] + }, + "value": { + "$ref": "#/components/schemas/Name" + } }, - "minItems": 5, - "maxItems": 5 + "required": [ + "type", + "value" + ] }, - "p": { - "description": "The p value for the quantile.", - "type": "number", - "format": "double" + { + "description": "Drop matching traffic", + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "drop" + ] + } + }, + "required": [ + "type" + ] } - }, - "required": [ - "desired_marker_positions", - "marker_heights", - "marker_positions", - "p" ] }, - "Rack": { - "description": "View of an Rack", + "RouterRoute": { + "description": "A route defines a rule that governs where traffic should be sent based on its destination.", "type": "object", "properties": { + "description": { + "description": "human-readable free-form text about a resource", + "type": "string" + }, + "destination": { + "description": "Selects which traffic this routing rule will apply to.", + "allOf": [ + { + "$ref": "#/components/schemas/RouteDestination" + } + ] + }, "id": { "description": "unique, immutable, system-controlled identifier for each resource", "type": "string", "format": "uuid" }, + "kind": { + "description": "Describes the kind of router. Set at creation. `read-only`", + "allOf": [ + { + "$ref": "#/components/schemas/RouterRouteKind" + } + ] + }, + "name": { + "description": "unique, mutable, user-controlled identifier for each resource", + "allOf": [ + { + "$ref": "#/components/schemas/Name" + } + ] + }, + "target": { + "description": "The location that matched packets should be forwarded to.", + "allOf": [ + { + "$ref": "#/components/schemas/RouteTarget" + } + ] + }, "time_created": { "description": "timestamp when this resource was created", "type": "string", @@ -16495,59 +17515,93 @@ "description": "timestamp when this resource was last modified", "type": "string", "format": "date-time" + }, + "vpc_router_id": { + "description": "The ID of the VPC Router to which the route belongs", + "type": "string", + "format": "uuid" } }, "required": [ + "description", + "destination", "id", + "kind", + "name", + "target", "time_created", - "time_modified" - ] - }, - "RackResultsPage": { - "description": "A single page of results", - "type": "object", - "properties": { - "items": { - "description": "list of items on this page of results", - "type": "array", - "items": { - "$ref": "#/components/schemas/Rack" - } - }, - "next_page": { - "nullable": true, - "description": "token used to fetch the next page of results (if any)", - "type": "string" - } - }, - "required": [ - "items" + "time_modified", + "vpc_router_id" ] }, - "Role": { - "description": "View of a Role", + "RouterRouteCreate": { + "description": "Create-time parameters for a `RouterRoute`", "type": "object", "properties": { "description": { "type": "string" }, + "destination": { + "description": "Selects which traffic this routing rule will apply to.", + "allOf": [ + { + "$ref": "#/components/schemas/RouteDestination" + } + ] + }, "name": { - "$ref": "#/components/schemas/RoleName" + "$ref": "#/components/schemas/Name" + }, + "target": { + "description": "The location that matched packets should be forwarded to.", + "allOf": [ + { + "$ref": "#/components/schemas/RouteTarget" + } + ] } }, "required": [ "description", - "name" + "destination", + "name", + "target" ] }, - "RoleName": { - "title": "A name for a built-in role", - "description": "Role names consist of two string components separated by dot (\".\").", - "type": "string", - "pattern": "[a-z-]+\\.[a-z-]+", - "maxLength": 63 + "RouterRouteKind": { + "description": "The kind of a `RouterRoute`\n\nThe kind determines certain attributes such as if the route is modifiable and describes how or where the route was created.", + "oneOf": [ + { + "description": "Determines the default destination of traffic, such as whether it goes to the internet or not.\n\n`Destination: An Internet Gateway` `Modifiable: true`", + "type": "string", + "enum": [ + "default" + ] + }, + { + "description": "Automatically added for each VPC Subnet in the VPC\n\n`Destination: A VPC Subnet` `Modifiable: false`", + "type": "string", + "enum": [ + "vpc_subnet" + ] + }, + { + "description": "Automatically added when VPC peering is established\n\n`Destination: A different VPC` `Modifiable: false`", + "type": "string", + "enum": [ + "vpc_peering" + ] + }, + { + "description": "Created by a user; see `RouteTarget`\n\n`Destination: User defined` `Modifiable: true`", + "type": "string", + "enum": [ + "custom" + ] + } + ] }, - "RoleResultsPage": { + "RouterRouteResultsPage": { "description": "A single page of results", "type": "object", "properties": { @@ -16555,7 +17609,7 @@ "description": "list of items on this page of results", "type": "array", "items": { - "$ref": "#/components/schemas/Role" + "$ref": "#/components/schemas/RouterRoute" } }, "next_page": { @@ -16568,50 +17622,42 @@ "items" ] }, - "Route": { - "description": "A route to a destination network through a gateway address.", + "RouterRouteUpdate": { + "description": "Updateable properties of a `RouterRoute`", "type": "object", "properties": { - "dst": { - "description": "The route destination.", + "description": { + "nullable": true, + "type": "string" + }, + "destination": { + "description": "Selects which traffic this routing rule will apply to.", "allOf": [ { - "$ref": "#/components/schemas/IpNet" + "$ref": "#/components/schemas/RouteDestination" } ] }, - "gw": { - "description": "The route gateway.", - "type": "string", - "format": "ip" - }, - "vid": { + "name": { "nullable": true, - "description": "VLAN id the gateway is reachable over.", - "type": "integer", - "format": "uint16", - "minimum": 0 - } - }, - "required": [ - "dst", - "gw" - ] - }, - "RouteConfig": { - "description": "Route configuration data associated with a switch port configuration.", - "type": "object", - "properties": { - "routes": { - "description": "The set of routes assigned to a switch port.", - "type": "array", - "items": { - "$ref": "#/components/schemas/Route" - } + "allOf": [ + { + "$ref": "#/components/schemas/Name" + } + ] + }, + "target": { + "description": "The location that matched packets should be forwarded to.", + "allOf": [ + { + "$ref": "#/components/schemas/RouteTarget" + } + ] } }, "required": [ - "routes" + "destination", + "target" ] }, "SamlIdentityProvider": { @@ -19759,6 +20805,118 @@ "items" ] }, + "VpcRouter": { + "description": "A VPC router defines a series of rules that indicate where traffic should be sent depending on its destination.", + "type": "object", + "properties": { + "description": { + "description": "human-readable free-form text about a resource", + "type": "string" + }, + "id": { + "description": "unique, immutable, system-controlled identifier for each resource", + "type": "string", + "format": "uuid" + }, + "kind": { + "$ref": "#/components/schemas/VpcRouterKind" + }, + "name": { + "description": "unique, mutable, user-controlled identifier for each resource", + "allOf": [ + { + "$ref": "#/components/schemas/Name" + } + ] + }, + "time_created": { + "description": "timestamp when this resource was created", + "type": "string", + "format": "date-time" + }, + "time_modified": { + "description": "timestamp when this resource was last modified", + "type": "string", + "format": "date-time" + }, + "vpc_id": { + "description": "The VPC to which the router belongs.", + "type": "string", + "format": "uuid" + } + }, + "required": [ + "description", + "id", + "kind", + "name", + "time_created", + "time_modified", + "vpc_id" + ] + }, + "VpcRouterCreate": { + "description": "Create-time parameters for a `VpcRouter`", + "type": "object", + "properties": { + "description": { + "type": "string" + }, + "name": { + "$ref": "#/components/schemas/Name" + } + }, + "required": [ + "description", + "name" + ] + }, + "VpcRouterKind": { + "type": "string", + "enum": [ + "system", + "custom" + ] + }, + "VpcRouterResultsPage": { + "description": "A single page of results", + "type": "object", + "properties": { + "items": { + "description": "list of items on this page of results", + "type": "array", + "items": { + "$ref": "#/components/schemas/VpcRouter" + } + }, + "next_page": { + "nullable": true, + "description": "token used to fetch the next page of results (if any)", + "type": "string" + } + }, + "required": [ + "items" + ] + }, + "VpcRouterUpdate": { + "description": "Updateable properties of a `VpcRouter`", + "type": "object", + "properties": { + "description": { + "nullable": true, + "type": "string" + }, + "name": { + "nullable": true, + "allOf": [ + { + "$ref": "#/components/schemas/Name" + } + ] + } + } + }, "VpcSubnet": { "description": "A VPC subnet represents a logical grouping for instances that allows network traffic between them, within a IPv4 subnetwork or optionally an IPv6 subnetwork.", "type": "object", @@ -19833,6 +20991,15 @@ "description": "Create-time parameters for a `VpcSubnet`", "type": "object", "properties": { + "custom_router": { + "nullable": true, + "description": "An optional router, used to direct packets sent from hosts in this subnet to any destination address.\n\nCustom routers apply in addition to the VPC-wide *system* router, and have higher priority than the system router for an otherwise equal-prefix-length match.", + "allOf": [ + { + "$ref": "#/components/schemas/NameOrId" + } + ] + }, "description": { "type": "string" }, @@ -19888,6 +21055,15 @@ "description": "Updateable properties of a `VpcSubnet`", "type": "object", "properties": { + "custom_router": { + "nullable": true, + "description": "An optional router, used to direct packets sent from hosts in this subnet to any destination address.", + "allOf": [ + { + "$ref": "#/components/schemas/NameOrId" + } + ] + }, "description": { "nullable": true, "type": "string" diff --git a/openapi/sled-agent.json b/openapi/sled-agent.json index aa5163a964..be13ba7a8b 100644 --- a/openapi/sled-agent.json +++ b/openapi/sled-agent.json @@ -3597,6 +3597,13 @@ "subnet": { "$ref": "#/components/schemas/IpNet" }, + "transit_ips": { + "default": [], + "type": "array", + "items": { + "$ref": "#/components/schemas/IpNet" + } + }, "vni": { "$ref": "#/components/schemas/Vni" } diff --git a/schema/all-zones-requests.json b/schema/all-zones-requests.json index 210f1df2f9..20b99b2064 100644 --- a/schema/all-zones-requests.json +++ b/schema/all-zones-requests.json @@ -154,6 +154,13 @@ "subnet": { "$ref": "#/definitions/IpNet" }, + "transit_ips": { + "default": [], + "type": "array", + "items": { + "$ref": "#/definitions/IpNet" + } + }, "vni": { "$ref": "#/definitions/Vni" } diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index b6102c3a64..fcb02af8cf 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -1473,7 +1473,14 @@ CREATE TABLE IF NOT EXISTS omicron.public.network_interface ( * The primary interface appears in DNS and its address is used for external * connectivity. */ - is_primary BOOL NOT NULL + is_primary BOOL NOT NULL, + + /* + * A supplementary list of addresses/CIDR blocks which a NIC is + * *allowed* to send/receive traffic on, in addition to its + * assigned address. + */ + transit_ips INET[] NOT NULL DEFAULT ARRAY[] ); /* A view of the network_interface table for just instance-kind records. */ @@ -1491,7 +1498,8 @@ SELECT mac, ip, slot, - is_primary + is_primary, + transit_ips FROM omicron.public.network_interface WHERE @@ -4107,7 +4115,7 @@ INSERT INTO omicron.public.db_metadata ( version, target_version ) VALUES - (TRUE, NOW(), NOW(), '78.0.0', NULL) + (TRUE, NOW(), NOW(), '79.0.0', NULL) ON CONFLICT DO NOTHING; COMMIT; diff --git a/schema/crdb/nic-spoof-allow/up01.sql b/schema/crdb/nic-spoof-allow/up01.sql new file mode 100644 index 0000000000..2ca13e0a38 --- /dev/null +++ b/schema/crdb/nic-spoof-allow/up01.sql @@ -0,0 +1,2 @@ +ALTER TABLE omicron.public.network_interface +ADD COLUMN IF NOT EXISTS transit_ips INET[] NOT NULL DEFAULT ARRAY[]; diff --git a/schema/crdb/nic-spoof-allow/up02.sql b/schema/crdb/nic-spoof-allow/up02.sql new file mode 100644 index 0000000000..68ab39567d --- /dev/null +++ b/schema/crdb/nic-spoof-allow/up02.sql @@ -0,0 +1 @@ +DROP VIEW IF EXISTS omicron.public.instance_network_interface; diff --git a/schema/crdb/nic-spoof-allow/up03.sql b/schema/crdb/nic-spoof-allow/up03.sql new file mode 100644 index 0000000000..ac3cfe6b32 --- /dev/null +++ b/schema/crdb/nic-spoof-allow/up03.sql @@ -0,0 +1,20 @@ +CREATE VIEW IF NOT EXISTS omicron.public.instance_network_interface AS +SELECT + id, + name, + description, + time_created, + time_modified, + time_deleted, + parent_id AS instance_id, + vpc_id, + subnet_id, + mac, + ip, + slot, + is_primary, + transit_ips +FROM + omicron.public.network_interface +WHERE + kind = 'instance'; diff --git a/schema/rss-service-plan-v3.json b/schema/rss-service-plan-v3.json index 9348774c35..481c92cc36 100644 --- a/schema/rss-service-plan-v3.json +++ b/schema/rss-service-plan-v3.json @@ -268,6 +268,13 @@ "subnet": { "$ref": "#/definitions/IpNet" }, + "transit_ips": { + "default": [], + "type": "array", + "items": { + "$ref": "#/definitions/IpNet" + } + }, "vni": { "$ref": "#/definitions/Vni" } diff --git a/sled-agent/src/rack_setup/plan/service.rs b/sled-agent/src/rack_setup/plan/service.rs index 8499a0000c..f13c15723c 100644 --- a/sled-agent/src/rack_setup/plan/service.rs +++ b/sled-agent/src/rack_setup/plan/service.rs @@ -1042,6 +1042,7 @@ impl ServicePortBuilder { vni: Vni::SERVICES_VNI, primary: true, slot: 0, + transit_ips: vec![], }; Some((nic, external_ip)) @@ -1082,6 +1083,7 @@ impl ServicePortBuilder { vni: Vni::SERVICES_VNI, primary: true, slot: 0, + transit_ips: vec![], }; Ok((nic, external_ip)) @@ -1139,6 +1141,7 @@ impl ServicePortBuilder { vni: Vni::SERVICES_VNI, primary: true, slot: 0, + transit_ips: vec![], }; Ok((nic, snat_cfg)) diff --git a/sled-agent/src/sim/server.rs b/sled-agent/src/sim/server.rs index 7ce34473e7..5b66342a1a 100644 --- a/sled-agent/src/sim/server.rs +++ b/sled-agent/src/sim/server.rs @@ -410,6 +410,7 @@ pub async fn run_standalone_server( vni: Vni::SERVICES_VNI, primary: true, slot: 0, + transit_ips: vec![], }, external_tls: false, external_dns_servers: vec![], @@ -453,6 +454,7 @@ pub async fn run_standalone_server( vni: Vni::SERVICES_VNI, primary: true, slot: 0, + transit_ips: vec![], }, }, });