From 9c33a6268dc5c3ddfab4696b47f90260878e821c Mon Sep 17 00:00:00 2001 From: bnaecker Date: Wed, 15 Nov 2023 11:22:35 -0800 Subject: [PATCH 01/10] Send oximeter all existing producer assignments on registration (#4500) - Fixes #4498 - When `oximeter` registers with `nexus`, we send it any producer assignments so that it can continue collecting after a restart. The list of assignments is paginated from the database, but we previously only sent the first page. This ensures we send all records, and adds a regression test. --- nexus/src/app/oximeter.rs | 54 ++++++++------ nexus/tests/integration_tests/oximeter.rs | 90 +++++++++++++++++++++++ 2 files changed, 122 insertions(+), 22 deletions(-) diff --git a/nexus/src/app/oximeter.rs b/nexus/src/app/oximeter.rs index 03f833b087..7dfa2fb68b 100644 --- a/nexus/src/app/oximeter.rs +++ b/nexus/src/app/oximeter.rs @@ -87,32 +87,43 @@ impl super::Nexus { "address" => oximeter_info.address, ); - // Regardless, notify the collector of any assigned metric producers. This should be empty - // if this Oximeter collector is registering for the first time, but may not be if the - // service is re-registering after failure. - let pagparams = DataPageParams { - marker: None, - direction: PaginationOrder::Ascending, - limit: std::num::NonZeroU32::new(100).unwrap(), - }; - let producers = self - .db_datastore - .producers_list_by_oximeter_id( - oximeter_info.collector_id, - &pagparams, - ) - .await?; - if !producers.is_empty() { + // Regardless, notify the collector of any assigned metric producers. + // + // This should be empty if this Oximeter collector is registering for + // the first time, but may not be if the service is re-registering after + // failure. + let client = self.build_oximeter_client( + &oximeter_info.collector_id, + oximeter_info.address, + ); + let mut last_producer_id = None; + loop { + let pagparams = DataPageParams { + marker: last_producer_id.as_ref(), + direction: PaginationOrder::Ascending, + limit: std::num::NonZeroU32::new(100).unwrap(), + }; + let producers = self + .db_datastore + .producers_list_by_oximeter_id( + oximeter_info.collector_id, + &pagparams, + ) + .await?; + if producers.is_empty() { + return Ok(()); + } debug!( self.log, - "registered oximeter collector that is already assigned producers, re-assigning them to the collector"; + "re-assigning existing metric producers to a collector"; "n_producers" => producers.len(), "collector_id" => ?oximeter_info.collector_id, ); - let client = self.build_oximeter_client( - &oximeter_info.collector_id, - oximeter_info.address, - ); + // Be sure to continue paginating from the last producer. + // + // Safety: We check just above if the list is empty, so there is a + // last element. + last_producer_id.replace(producers.last().unwrap().id()); for producer in producers.into_iter() { let producer_info = oximeter_client::types::ProducerEndpoint { id: producer.id(), @@ -132,7 +143,6 @@ impl super::Nexus { .map_err(Error::from)?; } } - Ok(()) } /// Register as a metric producer with the oximeter metric collection server. diff --git a/nexus/tests/integration_tests/oximeter.rs b/nexus/tests/integration_tests/oximeter.rs index 2cda594e18..65aaa18642 100644 --- a/nexus/tests/integration_tests/oximeter.rs +++ b/nexus/tests/integration_tests/oximeter.rs @@ -4,11 +4,17 @@ //! Integration tests for oximeter collectors and producers. +use dropshot::Method; +use http::StatusCode; use nexus_test_interface::NexusServer; use nexus_test_utils_macros::nexus_test; +use omicron_common::api::internal::nexus::ProducerEndpoint; use omicron_test_utils::dev::poll::{wait_for_condition, CondCheckError}; use oximeter_db::DbWrite; +use std::collections::BTreeSet; use std::net; +use std::net::Ipv6Addr; +use std::net::SocketAddr; use std::time::Duration; use uuid::Uuid; @@ -332,3 +338,87 @@ async fn test_oximeter_reregistration() { ); context.teardown().await; } + +// A regression test for https://github.com/oxidecomputer/omicron/issues/4498 +#[tokio::test] +async fn test_oximeter_collector_reregistration_gets_all_assignments() { + let mut context = nexus_test_utils::test_setup::( + "test_oximeter_collector_reregistration_gets_all_assignments", + ) + .await; + let oximeter_id = nexus_test_utils::OXIMETER_UUID.parse().unwrap(); + + // Create a bunch of producer records. + // + // Note that the actual count is arbitrary, but it should be larger than the + // internal pagination limit used in `Nexus::upsert_oximeter_collector()`, + // which is currently 100. + const N_PRODUCERS: usize = 150; + let mut ids = BTreeSet::new(); + for _ in 0..N_PRODUCERS { + let id = Uuid::new_v4(); + ids.insert(id); + let info = ProducerEndpoint { + id, + address: SocketAddr::new(Ipv6Addr::LOCALHOST.into(), 12345), + base_route: String::from("/collect"), + interval: Duration::from_secs(1), + }; + context + .internal_client + .make_request( + Method::POST, + "/metrics/producers", + Some(&info), + StatusCode::NO_CONTENT, + ) + .await + .expect("failed to register test producer"); + } + + // Check that `oximeter` has these registered. + let producers = + context.oximeter.list_producers(None, N_PRODUCERS * 2).await; + let actual_ids: BTreeSet<_> = + producers.iter().map(|info| info.id).collect(); + + // There is an additional producer that's created as part of the normal test + // setup, so we'll check that all of the new producers exist, and that + // there's exactly 1 additional one. + assert!( + ids.is_subset(&actual_ids), + "oximeter did not get the right set of producers" + ); + assert_eq!( + ids.len(), + actual_ids.len() - 1, + "oximeter did not get the right set of producers" + ); + + // Drop and restart oximeter, which should result in the exact same set of + // producers again. + drop(context.oximeter); + context.oximeter = nexus_test_utils::start_oximeter( + context.logctx.log.new(o!("component" => "oximeter")), + context.server.get_http_server_internal_address().await, + context.clickhouse.port(), + oximeter_id, + ) + .await + .expect("failed to restart oximeter"); + + let producers = + context.oximeter.list_producers(None, N_PRODUCERS * 2).await; + let actual_ids: BTreeSet<_> = + producers.iter().map(|info| info.id).collect(); + assert!( + ids.is_subset(&actual_ids), + "oximeter did not get the right set of producers after re-registering" + ); + assert_eq!( + ids.len(), + actual_ids.len() - 1, + "oximeter did not get the right set of producers after re-registering" + ); + context.teardown().await; +} From cc5c40e03f9c52e54cb56c228b21d4cbcad49d4f Mon Sep 17 00:00:00 2001 From: Patrick Mooney Date: Wed, 15 Nov 2023 22:23:29 -0600 Subject: [PATCH 02/10] Pare down nexus-client dependencies Adding sled-hardware and sled-storage as dependencies to nexus-client bring in a large portion of unrelated crates, all in service of some (albeit convenient) type conversion routines. For the sake of nexus-client consumers outside of omicron, we can manually implement those few conversions in sled-agent where they are consumed. --- Cargo.lock | 2 -- clients/nexus-client/Cargo.toml | 2 -- clients/nexus-client/src/lib.rs | 33 ------------------- sled-agent/src/nexus.rs | 48 ++++++++++++++++++++++++++++ sled-agent/src/rack_setup/service.rs | 4 +-- sled-agent/src/sled_agent.rs | 6 ++-- sled-agent/src/storage_monitor.rs | 6 ++-- 7 files changed, 55 insertions(+), 46 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 6b3007b86e..f98f7c06ba 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3934,8 +3934,6 @@ dependencies = [ "schemars", "serde", "serde_json", - "sled-hardware", - "sled-storage", "slog", "uuid", ] diff --git a/clients/nexus-client/Cargo.toml b/clients/nexus-client/Cargo.toml index 239cb77789..2734142f9f 100644 --- a/clients/nexus-client/Cargo.toml +++ b/clients/nexus-client/Cargo.toml @@ -10,8 +10,6 @@ futures.workspace = true ipnetwork.workspace = true omicron-common.workspace = true omicron-passwords.workspace = true -sled-hardware.workspace = true -sled-storage.workspace = true progenitor.workspace = true regress.workspace = true reqwest = { workspace = true, features = ["rustls-tls", "stream"] } diff --git a/clients/nexus-client/src/lib.rs b/clients/nexus-client/src/lib.rs index 9f81492d10..23ceb114fc 100644 --- a/clients/nexus-client/src/lib.rs +++ b/clients/nexus-client/src/lib.rs @@ -388,36 +388,3 @@ impl From } } } - -impl From for types::PhysicalDiskKind { - fn from(value: sled_hardware::DiskVariant) -> Self { - match value { - sled_hardware::DiskVariant::U2 => types::PhysicalDiskKind::U2, - sled_hardware::DiskVariant::M2 => types::PhysicalDiskKind::M2, - } - } -} - -impl From for types::Baseboard { - fn from(b: sled_hardware::Baseboard) -> types::Baseboard { - types::Baseboard { - serial_number: b.identifier().to_string(), - part_number: b.model().to_string(), - revision: b.revision(), - } - } -} - -impl From for types::DatasetKind { - fn from(k: sled_storage::dataset::DatasetKind) -> Self { - use sled_storage::dataset::DatasetKind::*; - match k { - CockroachDb => Self::Cockroach, - Crucible => Self::Crucible, - Clickhouse => Self::Clickhouse, - ClickhouseKeeper => Self::ClickhouseKeeper, - ExternalDns => Self::ExternalDns, - InternalDns => Self::InternalDns, - } - } -} diff --git a/sled-agent/src/nexus.rs b/sled-agent/src/nexus.rs index 2af6fa0023..cc715f4010 100644 --- a/sled-agent/src/nexus.rs +++ b/sled-agent/src/nexus.rs @@ -154,3 +154,51 @@ fn d2n_record( } } } + +// Although it is a bit awkward to define these conversions here, it frees us +// from depending on sled_storage/sled_hardware in the nexus_client crate. + +pub(crate) trait ConvertInto: Sized { + fn convert(self) -> T; +} + +impl ConvertInto + for sled_hardware::DiskVariant +{ + fn convert(self) -> nexus_client::types::PhysicalDiskKind { + use nexus_client::types::PhysicalDiskKind; + + match self { + sled_hardware::DiskVariant::U2 => PhysicalDiskKind::U2, + sled_hardware::DiskVariant::M2 => PhysicalDiskKind::M2, + } + } +} + +impl ConvertInto for sled_hardware::Baseboard { + fn convert(self) -> nexus_client::types::Baseboard { + nexus_client::types::Baseboard { + serial_number: self.identifier().to_string(), + part_number: self.model().to_string(), + revision: self.revision(), + } + } +} + +impl ConvertInto + for sled_storage::dataset::DatasetKind +{ + fn convert(self) -> nexus_client::types::DatasetKind { + use nexus_client::types::DatasetKind; + use sled_storage::dataset::DatasetKind::*; + + match self { + CockroachDb => DatasetKind::Cockroach, + Crucible => DatasetKind::Crucible, + Clickhouse => DatasetKind::Clickhouse, + ClickhouseKeeper => DatasetKind::ClickhouseKeeper, + ExternalDns => DatasetKind::ExternalDns, + InternalDns => DatasetKind::InternalDns, + } + } +} diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index 34d5e06cfe..7dcbfa7045 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -63,7 +63,7 @@ use crate::bootstrap::early_networking::{ use crate::bootstrap::params::BootstrapAddressDiscovery; use crate::bootstrap::params::StartSledAgentRequest; use crate::bootstrap::rss_handle::BootstrapAgentHandle; -use crate::nexus::d2n_params; +use crate::nexus::{d2n_params, ConvertInto}; use crate::params::{ AutonomousServiceOnlyError, ServiceType, ServiceZoneRequest, ServiceZoneService, TimeSync, ZoneType, @@ -564,7 +564,7 @@ impl ServiceInner { dataset_id: dataset.id, request: NexusTypes::DatasetPutRequest { address: dataset.service_address.to_string(), - kind: dataset.name.dataset().clone().into(), + kind: dataset.name.dataset().clone().convert(), }, }) } diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index aec64a1349..9826a987d4 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -13,7 +13,7 @@ use crate::config::Config; use crate::instance_manager::{InstanceManager, ReservoirMode}; use crate::long_running_tasks::LongRunningTaskHandles; use crate::metrics::MetricsManager; -use crate::nexus::{NexusClientWithResolver, NexusRequestQueue}; +use crate::nexus::{ConvertInto, NexusClientWithResolver, NexusRequestQueue}; use crate::params::{ DiskStateRequested, InstanceHardware, InstanceMigrationSourceParams, InstancePutStateResponse, InstanceStateRequested, @@ -607,9 +607,7 @@ impl SledAgent { let nexus_client = self.inner.nexus_client.clone(); let sled_address = self.inner.sled_address(); let is_scrimlet = self.inner.hardware.is_scrimlet(); - let baseboard = nexus_client::types::Baseboard::from( - self.inner.hardware.baseboard(), - ); + let baseboard = self.inner.hardware.baseboard().convert(); let usable_hardware_threads = self.inner.hardware.online_processor_count(); let usable_physical_ram = diff --git a/sled-agent/src/storage_monitor.rs b/sled-agent/src/storage_monitor.rs index f552fdfd86..0c9b287396 100644 --- a/sled-agent/src/storage_monitor.rs +++ b/sled-agent/src/storage_monitor.rs @@ -7,7 +7,7 @@ //! code. use crate::dump_setup::DumpSetup; -use crate::nexus::NexusClientWithResolver; +use crate::nexus::{ConvertInto, NexusClientWithResolver}; use derive_more::From; use futures::stream::FuturesOrdered; use futures::FutureExt; @@ -338,7 +338,7 @@ fn compute_resource_diffs( model: disk_id.model.clone(), serial: disk_id.serial.clone(), vendor: disk_id.vendor.clone(), - variant: updated_disk.variant().into(), + variant: updated_disk.variant().convert(), }); } if pool != updated_pool { @@ -363,7 +363,7 @@ fn compute_resource_diffs( model: disk_id.model.clone(), serial: disk_id.serial.clone(), vendor: disk_id.vendor.clone(), - variant: updated_disk.variant().into(), + variant: updated_disk.variant().convert(), }); put_pool(disk_id, updated_pool); } From 9ef3f328a1331970492070d87a385f9f246ee251 Mon Sep 17 00:00:00 2001 From: Alan Hanson Date: Thu, 16 Nov 2023 07:47:35 -0800 Subject: [PATCH 03/10] Update Crucible (51a3121) and Propolis (8ad2d4f) (#4499) Crucible changes: test-crudd can collect more info, test_up can be gentle (#997) Decrypt without holding the downstairs lock (#1021) Add raw file backend (#991) Don't hold the Downstairs lock while doing encryption (#1019) Antagonize the Crucible Agent (#1011) The Pantry should reject non-block sized writes (#1013) Propolis changes: make headroom for linux virtio/9p client impl (#565) Guarantee Tokio access for Entity methods --------- Co-authored-by: Alan Hanson --- Cargo.lock | 16 ++++++++-------- Cargo.toml | 12 ++++++------ package-manifest.toml | 12 ++++++------ 3 files changed, 20 insertions(+), 20 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f98f7c06ba..58d0653728 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -447,7 +447,7 @@ dependencies = [ [[package]] name = "bhyve_api" version = "0.0.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=5ed82315541271e2734746a9ca79e39f35c12283#5ed82315541271e2734746a9ca79e39f35c12283" +source = "git+https://github.com/oxidecomputer/propolis?rev=54398875a2125227d13827d4236dce943c019b1c#54398875a2125227d13827d4236dce943c019b1c" dependencies = [ "bhyve_api_sys", "libc", @@ -457,7 +457,7 @@ dependencies = [ [[package]] name = "bhyve_api_sys" version = "0.0.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=5ed82315541271e2734746a9ca79e39f35c12283#5ed82315541271e2734746a9ca79e39f35c12283" +source = "git+https://github.com/oxidecomputer/propolis?rev=54398875a2125227d13827d4236dce943c019b1c#54398875a2125227d13827d4236dce943c019b1c" dependencies = [ "libc", "strum", @@ -1270,7 +1270,7 @@ dependencies = [ [[package]] name = "crucible-agent-client" version = "0.0.1" -source = "git+https://github.com/oxidecomputer/crucible?rev=da534e73380f3cc53ca0de073e1ea862ae32109b#da534e73380f3cc53ca0de073e1ea862ae32109b" +source = "git+https://github.com/oxidecomputer/crucible?rev=51a3121c8318fc7ac97d74f917ce1d37962e785f#51a3121c8318fc7ac97d74f917ce1d37962e785f" dependencies = [ "anyhow", "chrono", @@ -1286,7 +1286,7 @@ dependencies = [ [[package]] name = "crucible-pantry-client" version = "0.0.1" -source = "git+https://github.com/oxidecomputer/crucible?rev=da534e73380f3cc53ca0de073e1ea862ae32109b#da534e73380f3cc53ca0de073e1ea862ae32109b" +source = "git+https://github.com/oxidecomputer/crucible?rev=51a3121c8318fc7ac97d74f917ce1d37962e785f#51a3121c8318fc7ac97d74f917ce1d37962e785f" dependencies = [ "anyhow", "chrono", @@ -1303,7 +1303,7 @@ dependencies = [ [[package]] name = "crucible-smf" version = "0.0.0" -source = "git+https://github.com/oxidecomputer/crucible?rev=da534e73380f3cc53ca0de073e1ea862ae32109b#da534e73380f3cc53ca0de073e1ea862ae32109b" +source = "git+https://github.com/oxidecomputer/crucible?rev=51a3121c8318fc7ac97d74f917ce1d37962e785f#51a3121c8318fc7ac97d74f917ce1d37962e785f" dependencies = [ "crucible-workspace-hack", "libc", @@ -6095,7 +6095,7 @@ dependencies = [ [[package]] name = "propolis-client" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=5ed82315541271e2734746a9ca79e39f35c12283#5ed82315541271e2734746a9ca79e39f35c12283" +source = "git+https://github.com/oxidecomputer/propolis?rev=54398875a2125227d13827d4236dce943c019b1c#54398875a2125227d13827d4236dce943c019b1c" dependencies = [ "async-trait", "base64 0.21.5", @@ -6116,7 +6116,7 @@ dependencies = [ [[package]] name = "propolis-mock-server" version = "0.0.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=5ed82315541271e2734746a9ca79e39f35c12283#5ed82315541271e2734746a9ca79e39f35c12283" +source = "git+https://github.com/oxidecomputer/propolis?rev=54398875a2125227d13827d4236dce943c019b1c#54398875a2125227d13827d4236dce943c019b1c" dependencies = [ "anyhow", "atty", @@ -6146,7 +6146,7 @@ dependencies = [ [[package]] name = "propolis_types" version = "0.0.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=5ed82315541271e2734746a9ca79e39f35c12283#5ed82315541271e2734746a9ca79e39f35c12283" +source = "git+https://github.com/oxidecomputer/propolis?rev=54398875a2125227d13827d4236dce943c019b1c#54398875a2125227d13827d4236dce943c019b1c" dependencies = [ "schemars", "serde", diff --git a/Cargo.toml b/Cargo.toml index c51ac069a9..82bca496a5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -169,9 +169,9 @@ cookie = "0.16" criterion = { version = "0.5.1", features = [ "async_tokio" ] } crossbeam = "0.8" crossterm = { version = "0.27.0", features = ["event-stream"] } -crucible-agent-client = { git = "https://github.com/oxidecomputer/crucible", rev = "da534e73380f3cc53ca0de073e1ea862ae32109b" } -crucible-pantry-client = { git = "https://github.com/oxidecomputer/crucible", rev = "da534e73380f3cc53ca0de073e1ea862ae32109b" } -crucible-smf = { git = "https://github.com/oxidecomputer/crucible", rev = "da534e73380f3cc53ca0de073e1ea862ae32109b" } +crucible-agent-client = { git = "https://github.com/oxidecomputer/crucible", rev = "51a3121c8318fc7ac97d74f917ce1d37962e785f" } +crucible-pantry-client = { git = "https://github.com/oxidecomputer/crucible", rev = "51a3121c8318fc7ac97d74f917ce1d37962e785f" } +crucible-smf = { git = "https://github.com/oxidecomputer/crucible", rev = "51a3121c8318fc7ac97d74f917ce1d37962e785f" } curve25519-dalek = "4" datatest-stable = "0.2.3" display-error-chain = "0.2.0" @@ -290,9 +290,9 @@ pretty-hex = "0.3.0" proc-macro2 = "1.0" progenitor = { git = "https://github.com/oxidecomputer/progenitor", branch = "main" } progenitor-client = { git = "https://github.com/oxidecomputer/progenitor", branch = "main" } -bhyve_api = { git = "https://github.com/oxidecomputer/propolis", rev = "5ed82315541271e2734746a9ca79e39f35c12283" } -propolis-client = { git = "https://github.com/oxidecomputer/propolis", rev = "5ed82315541271e2734746a9ca79e39f35c12283" } -propolis-mock-server = { git = "https://github.com/oxidecomputer/propolis", rev = "5ed82315541271e2734746a9ca79e39f35c12283" } +bhyve_api = { git = "https://github.com/oxidecomputer/propolis", rev = "54398875a2125227d13827d4236dce943c019b1c" } +propolis-client = { git = "https://github.com/oxidecomputer/propolis", rev = "54398875a2125227d13827d4236dce943c019b1c" } +propolis-mock-server = { git = "https://github.com/oxidecomputer/propolis", rev = "54398875a2125227d13827d4236dce943c019b1c" } proptest = "1.3.1" quote = "1.0" rand = "0.8.5" diff --git a/package-manifest.toml b/package-manifest.toml index 61c90a3e75..c65c5b6933 100644 --- a/package-manifest.toml +++ b/package-manifest.toml @@ -384,10 +384,10 @@ only_for_targets.image = "standard" # 3. Use source.type = "manual" instead of "prebuilt" source.type = "prebuilt" source.repo = "crucible" -source.commit = "da534e73380f3cc53ca0de073e1ea862ae32109b" +source.commit = "51a3121c8318fc7ac97d74f917ce1d37962e785f" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/crucible/image//crucible.sha256.txt -source.sha256 = "572ac3b19e51b4e476266a62c2b7e06eff81c386cb48247c4b9f9b1e2ee81895" +source.sha256 = "897d0fd6c0b82db42256a63a13c228152e1117434afa2681f649b291e3c6f46d" output.type = "zone" [package.crucible-pantry] @@ -395,10 +395,10 @@ service_name = "crucible_pantry" only_for_targets.image = "standard" source.type = "prebuilt" source.repo = "crucible" -source.commit = "da534e73380f3cc53ca0de073e1ea862ae32109b" +source.commit = "51a3121c8318fc7ac97d74f917ce1d37962e785f" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/crucible/image//crucible-pantry.sha256.txt -source.sha256 = "812269958e18f54d72bc10bb4fb81f26c084cf762da7fd98e63d58c689be9ad1" +source.sha256 = "fe545de7ac4f15454d7827927149c5f0fc68ce9545b4f1ef96aac9ac8039805a" output.type = "zone" # Refer to @@ -409,10 +409,10 @@ service_name = "propolis-server" only_for_targets.image = "standard" source.type = "prebuilt" source.repo = "propolis" -source.commit = "4019eb10fc2f4ba9bf210d0461dc6292b68309c2" +source.commit = "54398875a2125227d13827d4236dce943c019b1c" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/propolis/image//propolis-server.sha256.txt -source.sha256 = "aa1d9dc5c9117c100f9636901e8eec6679d7dfbf869c46b7f2873585f94a1b89" +source.sha256 = "01b8563db6626f90ee3fb6d97e7921b0a680373d843c1bea7ebf46fcea4f7b28" output.type = "zone" [package.mg-ddm-gz] From ca9d90a79c2a7a9ffdca12a705d007d7f28ce61f Mon Sep 17 00:00:00 2001 From: "oxide-reflector-bot[bot]" <130185838+oxide-reflector-bot[bot]@users.noreply.github.com> Date: Thu, 16 Nov 2023 17:02:11 +0000 Subject: [PATCH 04/10] Update maghemite to `12b392b` (#4505) --- package-manifest.toml | 12 ++++++------ tools/maghemite_ddm_openapi_version | 2 +- tools/maghemite_mg_openapi_version | 4 ++-- tools/maghemite_mgd_checksums | 4 ++-- 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/package-manifest.toml b/package-manifest.toml index c65c5b6933..f320215a13 100644 --- a/package-manifest.toml +++ b/package-manifest.toml @@ -425,10 +425,10 @@ source.repo = "maghemite" # `tools/maghemite_openapi_version`. Failing to do so will cause a failure when # building `ddm-admin-client` (which will instruct you to update # `tools/maghemite_openapi_version`). -source.commit = "aefdfd3a57e5ca1949d4a913b8e35ce8cd7dfa8b" +source.commit = "12b392be94ff93abc3017bf2610a3b18e2174a2d" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/maghemite/image//maghemite.sha256.txt -source.sha256 = "d871406ed926571efebdab248de08d4f1ca6c31d4f9a691ce47b186474165c57" +source.sha256 = "38851c79c85d53e997db748520fb27c82299ce7e58a550e35646a548498f1271" output.type = "tarball" [package.mg-ddm] @@ -441,10 +441,10 @@ source.repo = "maghemite" # `tools/maghemite_openapi_version`. Failing to do so will cause a failure when # building `ddm-admin-client` (which will instruct you to update # `tools/maghemite_openapi_version`). -source.commit = "aefdfd3a57e5ca1949d4a913b8e35ce8cd7dfa8b" +source.commit = "12b392be94ff93abc3017bf2610a3b18e2174a2d" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/maghemite/image//mg-ddm.sha256.txt -source.sha256 = "85ec05a8726989b5cb0a567de6b0855f6f84b6f3409ac99ccaf372be5821e45d" +source.sha256 = "8cd94e9a6f6175081ce78f0281085a08a5306cde453d8e21deb28050945b1d88" output.type = "zone" output.intermediate_only = true @@ -456,10 +456,10 @@ source.repo = "maghemite" # `tools/maghemite_openapi_version`. Failing to do so will cause a failure when # building `ddm-admin-client` (which will instruct you to update # `tools/maghemite_openapi_version`). -source.commit = "aefdfd3a57e5ca1949d4a913b8e35ce8cd7dfa8b" +source.commit = "12b392be94ff93abc3017bf2610a3b18e2174a2d" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/maghemite/image//mg-ddm.sha256.txt -source.sha256 = "aa7241cd35976f28f25aaf3ce2ce2af14dae1da9d67585c7de3b724dbcc55e60" +source.sha256 = "c4a7a626c84a28de3d2c6bfd85592bda2abad8cf5b41b2ce90b9c03904ccd3df" output.type = "zone" output.intermediate_only = true diff --git a/tools/maghemite_ddm_openapi_version b/tools/maghemite_ddm_openapi_version index 40db886f69..76bdb9ca92 100644 --- a/tools/maghemite_ddm_openapi_version +++ b/tools/maghemite_ddm_openapi_version @@ -1,2 +1,2 @@ -COMMIT="aefdfd3a57e5ca1949d4a913b8e35ce8cd7dfa8b" +COMMIT="12b392be94ff93abc3017bf2610a3b18e2174a2d" SHA2="9737906555a60911636532f00f1dc2866dc7cd6553beb106e9e57beabad41cdf" diff --git a/tools/maghemite_mg_openapi_version b/tools/maghemite_mg_openapi_version index ad88fef13e..d6d1788cbc 100644 --- a/tools/maghemite_mg_openapi_version +++ b/tools/maghemite_mg_openapi_version @@ -1,2 +1,2 @@ -COMMIT="aefdfd3a57e5ca1949d4a913b8e35ce8cd7dfa8b" -SHA2="b3f55fe24e54530fdf96c22a033f9edc0bad9c0a5e3344763a23e52b251d5113" +COMMIT="12b392be94ff93abc3017bf2610a3b18e2174a2d" +SHA2="6c1fab8d5028b52a161d8bf02aae47844699cdc5f7b28e1ac519fc4ec1ab3971" diff --git a/tools/maghemite_mgd_checksums b/tools/maghemite_mgd_checksums index 7c1644b031..9657147159 100644 --- a/tools/maghemite_mgd_checksums +++ b/tools/maghemite_mgd_checksums @@ -1,2 +1,2 @@ -CIDL_SHA256="aa7241cd35976f28f25aaf3ce2ce2af14dae1da9d67585c7de3b724dbcc55e60" -MGD_LINUX_SHA256="a39387c361ff2c2d0701d66c00b10e43c72fb5ddd1a5900b59ecccb832c80731" \ No newline at end of file +CIDL_SHA256="c4a7a626c84a28de3d2c6bfd85592bda2abad8cf5b41b2ce90b9c03904ccd3df" +MGD_LINUX_SHA256="81231b30872fa1c581aa22c101f32d11f33f335758ac1fd2653436fbc7aab93f" \ No newline at end of file From 2585b88d3b74976792d188fea6e79f4badd20077 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Fri, 17 Nov 2023 09:06:58 -0800 Subject: [PATCH 05/10] [db-queries] Avoid interactive transaction for all authz checks (#4506) While working on https://github.com/oxidecomputer/customer-support/issues/46 , I noticed that we perform an interactive transaction for every authz check perfomed by the database. This PR re-writes this code to avoid using an interactive transaction altogether. --- nexus/db-queries/src/db/datastore/role.rs | 69 +++++++++++------------ 1 file changed, 34 insertions(+), 35 deletions(-) diff --git a/nexus/db-queries/src/db/datastore/role.rs b/nexus/db-queries/src/db/datastore/role.rs index b2ad441475..3a57ffc44c 100644 --- a/nexus/db-queries/src/db/datastore/role.rs +++ b/nexus/db-queries/src/db/datastore/role.rs @@ -127,7 +127,8 @@ impl DataStore { resource_type: ResourceType, resource_id: Uuid, ) -> Result, Error> { - use db::schema::role_assignment::dsl; + use db::schema::role_assignment::dsl as role_dsl; + use db::schema::silo_group_membership::dsl as group_dsl; // There is no resource-specific authorization check because all // authenticated users need to be able to list their own roles -- @@ -140,41 +141,39 @@ impl DataStore { // into some hurt by assigning loads of roles to someone and having that // person attempt to access anything. - self.pool_connection_authorized(opctx).await? - .transaction_async(|conn| async move { - let mut role_assignments = dsl::role_assignment - .filter(dsl::identity_type.eq(identity_type.clone())) - .filter(dsl::identity_id.eq(identity_id)) - .filter(dsl::resource_type.eq(resource_type.to_string())) - .filter(dsl::resource_id.eq(resource_id)) - .select(RoleAssignment::as_select()) - .load_async::(&conn) - .await?; - - // Return the roles that a silo user has from their group memberships - if identity_type == IdentityType::SiloUser { - use db::schema::silo_group_membership; - - let mut group_role_assignments = dsl::role_assignment - .filter(dsl::identity_type.eq(IdentityType::SiloGroup)) - .filter(dsl::identity_id.eq_any( - silo_group_membership::dsl::silo_group_membership - .filter(silo_group_membership::dsl::silo_user_id.eq(identity_id)) - .select(silo_group_membership::dsl::silo_group_id) - )) - .filter(dsl::resource_type.eq(resource_type.to_string())) - .filter(dsl::resource_id.eq(resource_id)) - .select(RoleAssignment::as_select()) - .load_async::(&conn) - .await?; - - role_assignments.append(&mut group_role_assignments); - } + let direct_roles_query = role_dsl::role_assignment + .filter(role_dsl::identity_type.eq(identity_type.clone())) + .filter(role_dsl::identity_id.eq(identity_id)) + .filter(role_dsl::resource_type.eq(resource_type.to_string())) + .filter(role_dsl::resource_id.eq(resource_id)) + .select(RoleAssignment::as_select()); + + let roles_from_groups_query = role_dsl::role_assignment + .filter(role_dsl::identity_type.eq(IdentityType::SiloGroup)) + .filter( + role_dsl::identity_id.eq_any( + group_dsl::silo_group_membership + .filter(group_dsl::silo_user_id.eq(identity_id)) + .select(group_dsl::silo_group_id), + ), + ) + .filter(role_dsl::resource_type.eq(resource_type.to_string())) + .filter(role_dsl::resource_id.eq(resource_id)) + .select(RoleAssignment::as_select()); - Ok(role_assignments) - }) - .await - .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + let conn = self.pool_connection_authorized(opctx).await?; + if identity_type == IdentityType::SiloUser { + direct_roles_query + .union(roles_from_groups_query) + .load_async::(&*conn) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + } else { + direct_roles_query + .load_async::(&*conn) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + } } /// Fetches all of the externally-visible role assignments for the specified From 781ed1236489bb536506fb03ba1be39b34ed81a5 Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Fri, 17 Nov 2023 17:30:02 +0000 Subject: [PATCH 06/10] Update `how-to-run.adoc` post-BGP (#4404) Small edits needed to bring this in line with how routes are configured now. `gateway_ip` is no longer explicitly named as such, so I've added an explicit comment to signpost how $GATEWAY_IP should be used w.r.t. the default route. --- docs/how-to-run.adoc | 40 +++++++++++++++++++++++++++++++++------- 1 file changed, 33 insertions(+), 7 deletions(-) diff --git a/docs/how-to-run.adoc b/docs/how-to-run.adoc index 04d274da8b..f6d780ad72 100644 --- a/docs/how-to-run.adoc +++ b/docs/how-to-run.adoc @@ -266,26 +266,52 @@ last = "192.168.1.29" This is a range of IP addresses on your external network that Omicron can assign to externally-facing services (like DNS and the API). You'll need to change these if you've picked different addresses for your external network. See <<_external_networking>> above for more on this. +You will also need to update route information if your `$GATEWAY_IP` differs from the default. +The below example demonstrates a single static gateway route; in-depth explanations for testing with BGP can be found https://docs.oxide.computer/guides/system/network-preparations#_rack_switch_configuration_with_bgp[in the Network Preparations guide] and https://docs.oxide.computer/guides/operator/configuring-bgp[the Configuring BGP guide]: + [source,toml] ---- # Configuration to bring up boundary services and make Nexus reachable from the # outside. This block assumes that you're following option (2) above: putting # your Oxide system on an existing network that you control. [rack_network_config] -# The gateway for the external network -gateway_ip = "192.168.1.199" +# An internal-only IPv6 address block which contains AZ-wide services. +# This does not need to be changed. +rack_subnet = "fd00:1122:3344:01::/56" # A range of IP addresses used by Boundary Services on the network. In a real # system, these would be addresses of the uplink ports on the Sidecar. With # softnpu, only one address is used. infra_ip_first = "192.168.1.30" infra_ip_last = "192.168.1.30" -# Name of the port. This should always be "qsfp0" when using softnpu. -uplink_port = "qsfp0" -uplink_port_speed = "40G" -uplink_port_fec="none" + +# Configurations for BGP routers to run on the scrimlets. +# This array can typically be safely left empty for home/local use, +# otherwise this is a list of { asn: u32, originate: [""] } +# structs which will be be inserted when Nexus is started by sled-agent. +# See the 'Network Preparations' guide linked above. +bgp = [] + +[[rack_network_config.ports]] +# Routes associated with this port. +# NOTE: The below `nexthop` should be set to $GATEWAY_IP for your configuration +routes = [{nexthop = "192.168.1.199", destination = "0.0.0.0/0"}] +# Addresses associated with this port. # For softnpu, an address within the "infra" block above that will be used for # the softnpu uplink port. You can just pick the first address in that pool. -uplink_ip = "192.168.1.30" +addresses = ["192.168.1.30/32"] +# Name of the uplink port. This should always be "qsfp0" when using softnpu. +port = "qsfp0" +# The speed of this port. +uplink_port_speed = "40G" +# The forward error correction mode for this port. +uplink_port_fec="none" +# Switch to use for the uplink. For single-rack deployments this can be +# "switch0" (upper slot) or "switch1" (lower slot). For single-node softnpu +# and dendrite stub environments, use "switch0" +switch = "switch0" +# Neighbors we expect to peer with over BGP on this port. +# see: common/src/api/internal/shared.rs – BgpPeerConfig +bgp_peers = [] ---- In some configurations (not the one described here), it may be necessary to update `smf/sled-agent/$MACHINE/config.toml`: From 1860621199e1af8fd1e47a42a898d8ea9def5737 Mon Sep 17 00:00:00 2001 From: Rain Date: Fri, 17 Nov 2023 15:48:28 -0800 Subject: [PATCH 07/10] [ci] switch GHA to using the PR tip by default (#4462) Fixes #4461 (see that issue for more). --- .github/workflows/check-opte-ver.yml | 2 ++ .github/workflows/check-workspace-deps.yml | 2 ++ .github/workflows/hakari.yml | 2 ++ .github/workflows/rust.yml | 8 ++++++++ .github/workflows/validate-openapi-spec.yml | 2 ++ 5 files changed, 16 insertions(+) diff --git a/.github/workflows/check-opte-ver.yml b/.github/workflows/check-opte-ver.yml index 9fc390277b..42ef1dda11 100644 --- a/.github/workflows/check-opte-ver.yml +++ b/.github/workflows/check-opte-ver.yml @@ -10,6 +10,8 @@ jobs: runs-on: ubuntu-22.04 steps: - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # v3.6.0 + with: + ref: ${{ github.event.pull_request.head.sha }} # see omicron#4461 - name: Install jq run: sudo apt-get install -y jq - name: Install toml-cli diff --git a/.github/workflows/check-workspace-deps.yml b/.github/workflows/check-workspace-deps.yml index 7ba0c66566..f94ed32fde 100644 --- a/.github/workflows/check-workspace-deps.yml +++ b/.github/workflows/check-workspace-deps.yml @@ -11,5 +11,7 @@ jobs: runs-on: ubuntu-22.04 steps: - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # v3.6.0 + with: + ref: ${{ github.event.pull_request.head.sha }} # see omicron#4461 - name: Check Workspace Dependencies run: cargo xtask check-workspace-deps diff --git a/.github/workflows/hakari.yml b/.github/workflows/hakari.yml index 6f2dc04b91..07b7124f73 100644 --- a/.github/workflows/hakari.yml +++ b/.github/workflows/hakari.yml @@ -18,6 +18,8 @@ jobs: RUSTFLAGS: -D warnings steps: - uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4 + with: + ref: ${{ github.event.pull_request.head.sha }} # see omicron#4461 - uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af # v1 with: toolchain: stable diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index f2581845d9..6239add88f 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -10,6 +10,8 @@ jobs: runs-on: ubuntu-22.04 steps: - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # v3.6.0 + with: + ref: ${{ github.event.pull_request.head.sha }} # see omicron#4461 - name: Report cargo version run: cargo --version - name: Report rustfmt version @@ -30,6 +32,8 @@ jobs: - name: Disable packages.microsoft.com repo run: sudo rm -f /etc/apt/sources.list.d/microsoft-prod.list - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # v3.6.0 + with: + ref: ${{ github.event.pull_request.head.sha }} # see omicron#4461 - uses: Swatinem/rust-cache@3cf7f8cc28d1b4e7d01e3783be10a97d55d483c8 # v2.7.1 if: ${{ github.ref != 'refs/heads/main' }} - name: Report cargo version @@ -58,6 +62,8 @@ jobs: - name: Disable packages.microsoft.com repo run: sudo rm -f /etc/apt/sources.list.d/microsoft-prod.list - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # v3.6.0 + with: + ref: ${{ github.event.pull_request.head.sha }} # see omicron#4461 - uses: Swatinem/rust-cache@3cf7f8cc28d1b4e7d01e3783be10a97d55d483c8 # v2.7.1 if: ${{ github.ref != 'refs/heads/main' }} - name: Report cargo version @@ -86,6 +92,8 @@ jobs: - name: Disable packages.microsoft.com repo run: sudo rm -f /etc/apt/sources.list.d/microsoft-prod.list - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # v3.6.0 + with: + ref: ${{ github.event.pull_request.head.sha }} # see omicron#4461 - uses: Swatinem/rust-cache@3cf7f8cc28d1b4e7d01e3783be10a97d55d483c8 # v2.7.1 if: ${{ github.ref != 'refs/heads/main' }} - name: Report cargo version diff --git a/.github/workflows/validate-openapi-spec.yml b/.github/workflows/validate-openapi-spec.yml index 2716c0571f..ea77ed9497 100644 --- a/.github/workflows/validate-openapi-spec.yml +++ b/.github/workflows/validate-openapi-spec.yml @@ -11,6 +11,8 @@ jobs: runs-on: ubuntu-22.04 steps: - uses: actions/checkout@f43a0e5ff2bd294095638e18286ca9a3d1956744 # v3.6.0 + with: + ref: ${{ github.event.pull_request.head.sha }} # see omicron#4461 - uses: actions/setup-node@1a4442cacd436585916779262731d5b162bc6ec7 # v3.8.2 with: node-version: '18' From d13a0dc8f8609daf57be6725bc1c5467ea7e1fe4 Mon Sep 17 00:00:00 2001 From: Rain Date: Fri, 17 Nov 2023 17:02:02 -0800 Subject: [PATCH 08/10] [wicket] produce a better error message if MGS isn't available (#4510) Currently, we just time out in this case. Produce a better error message by waiting for 80% of `WICKETD_TIMEOUT`. --- wicket-common/src/lib.rs | 7 ++++++ wicket/src/cli/rack_update.rs | 3 ++- wicket/src/wicketd.rs | 5 +---- wicketd/src/http_entrypoints.rs | 40 +++++++++++++++++++++++++-------- 4 files changed, 41 insertions(+), 14 deletions(-) diff --git a/wicket-common/src/lib.rs b/wicket-common/src/lib.rs index 9e92d20c0a..aea0634ce7 100644 --- a/wicket-common/src/lib.rs +++ b/wicket-common/src/lib.rs @@ -4,6 +4,13 @@ // Copyright 2023 Oxide Computer Company +use std::time::Duration; + pub mod rack_setup; pub mod rack_update; pub mod update_events; + +// WICKETD_TIMEOUT used to be 1 second, but that might be too short (and in +// particular might be responsible for +// https://github.com/oxidecomputer/omicron/issues/3103). +pub const WICKETD_TIMEOUT: Duration = Duration::from_secs(5); diff --git a/wicket/src/cli/rack_update.rs b/wicket/src/cli/rack_update.rs index f539c22c35..fa41fa7b8c 100644 --- a/wicket/src/cli/rack_update.rs +++ b/wicket/src/cli/rack_update.rs @@ -22,6 +22,7 @@ use update_engine::{ }; use wicket_common::{ rack_update::ClearUpdateStateResponse, update_events::EventReport, + WICKETD_TIMEOUT, }; use wicketd_client::types::{ClearUpdateStateParams, StartUpdateParams}; @@ -31,7 +32,7 @@ use crate::{ parse_event_report_map, ComponentId, CreateClearUpdateStateOptions, CreateStartUpdateOptions, }, - wicketd::{create_wicketd_client, WICKETD_TIMEOUT}, + wicketd::create_wicketd_client, }; use super::command::CommandOutput; diff --git a/wicket/src/wicketd.rs b/wicket/src/wicketd.rs index ec1130a594..a951bf428b 100644 --- a/wicket/src/wicketd.rs +++ b/wicket/src/wicketd.rs @@ -10,6 +10,7 @@ use std::net::SocketAddrV6; use tokio::sync::mpsc::{self, Sender, UnboundedSender}; use tokio::time::{interval, Duration, MissedTickBehavior}; use wicket_common::rack_update::{SpIdentifier, SpType}; +use wicket_common::WICKETD_TIMEOUT; use wicketd_client::types::{ AbortUpdateOptions, ClearUpdateStateOptions, ClearUpdateStateParams, GetInventoryParams, GetInventoryResponse, GetLocationResponse, @@ -38,10 +39,6 @@ impl From for SpIdentifier { } const WICKETD_POLL_INTERVAL: Duration = Duration::from_millis(500); -// WICKETD_TIMEOUT used to be 1 second, but that might be too short (and in -// particular might be responsible for -// https://github.com/oxidecomputer/omicron/issues/3103). -pub(crate) const WICKETD_TIMEOUT: Duration = Duration::from_secs(5); // Assume that these requests are periodic on the order of seconds or the // result of human interaction. In either case, this buffer should be plenty diff --git a/wicketd/src/http_entrypoints.rs b/wicketd/src/http_entrypoints.rs index d6cb6ebd6d..dbd3e31072 100644 --- a/wicketd/src/http_entrypoints.rs +++ b/wicketd/src/http_entrypoints.rs @@ -51,6 +51,7 @@ use std::time::Duration; use tokio::io::AsyncWriteExt; use wicket_common::rack_setup::PutRssUserConfigInsensitive; use wicket_common::update_events::EventReport; +use wicket_common::WICKETD_TIMEOUT; use crate::ServerContext; @@ -896,21 +897,42 @@ async fn post_start_update( // 1. We haven't pulled its state in our inventory (most likely cause: the // cubby is empty; less likely cause: the SP is misbehaving, which will // make updating it very unlikely to work anyway) - // 2. We have pulled its state but our hardware manager says we can't update - // it (most likely cause: the target is the sled we're currently running - // on; less likely cause: our hardware manager failed to get our local - // identifying information, and it refuses to update this target out of - // an abundance of caution). + // 2. We have pulled its state but our hardware manager says we can't + // update it (most likely cause: the target is the sled we're currently + // running on; less likely cause: our hardware manager failed to get our + // local identifying information, and it refuses to update this target + // out of an abundance of caution). // - // First, get our most-recently-cached inventory view. - let inventory = match rqctx.mgs_handle.get_cached_inventory().await { - Ok(inventory) => inventory, - Err(ShutdownInProgress) => { + // First, get our most-recently-cached inventory view. (Only wait 80% of + // WICKETD_TIMEOUT for this: if even a cached inventory isn't available, + // it's because we've never established contact with MGS. In that case, we + // should produce a useful error message rather than timing out on the + // client.) + let inventory = match tokio::time::timeout( + WICKETD_TIMEOUT.mul_f32(0.8), + rqctx.mgs_handle.get_cached_inventory(), + ) + .await + { + Ok(Ok(inventory)) => inventory, + Ok(Err(ShutdownInProgress)) => { return Err(HttpError::for_unavail( None, "Server is shutting down".into(), )); } + Err(_) => { + // Have to construct an HttpError manually because + // HttpError::for_unavail doesn't accept an external message. + let message = + "Rack inventory not yet available (is MGS alive?)".to_owned(); + return Err(HttpError { + status_code: http::StatusCode::SERVICE_UNAVAILABLE, + error_code: None, + external_message: message.clone(), + internal_message: message, + }); + } }; // Error cases. From 14f8f3159d40c7d044d12f1424d209cbc100a9cb Mon Sep 17 00:00:00 2001 From: Levon Tarver <11586085+internet-diglett@users.noreply.github.com> Date: Fri, 17 Nov 2023 22:22:17 -0600 Subject: [PATCH 09/10] NAT RPW (#3804) * Add db table for tracking nat entries * Add endpoint for retrieving changesets * Update instance sagas to update table and trigger RPW * Periodically cleanup soft-deleted entries that no longer need to be sync'd by dendrite. The other half of the RPW lives in Dendrite. It will periodically check for a changeset, or check for a changeset when the trigger endpoint is called by the relevant saga / nexus operation. --- common/src/api/external/mod.rs | 1 + common/src/nexus_config.rs | 17 +- dev-tools/omdb/src/bin/omdb/nexus.rs | 1 + dev-tools/omdb/tests/env.out | 15 + dev-tools/omdb/tests/successes.out | 12 + nexus/db-model/src/ipv4_nat_entry.rs | 81 ++++ nexus/db-model/src/lib.rs | 2 + nexus/db-model/src/schema.rs | 28 +- .../src/db/datastore/ipv4_nat_entry.rs | 440 ++++++++++++++++++ nexus/db-queries/src/db/datastore/mod.rs | 1 + nexus/examples/config.toml | 1 + nexus/src/app/background/init.rs | 24 + nexus/src/app/background/mod.rs | 1 + nexus/src/app/background/nat_cleanup.rs | 111 +++++ nexus/src/app/instance_network.rs | 239 ++++++---- nexus/src/app/mod.rs | 1 + nexus/src/internal_api/http_entrypoints.rs | 51 ++ nexus/tests/config.test.toml | 1 + openapi/nexus-internal.json | 104 +++++ package-manifest.toml | 12 +- schema/crdb/{10.0.0 => 11.0.0}/README.md | 0 schema/crdb/{10.0.0 => 11.0.0}/up01.sql | 0 schema/crdb/{10.0.0 => 11.0.0}/up02.sql | 0 schema/crdb/{10.0.0 => 11.0.0}/up03.sql | 0 schema/crdb/{10.0.0 => 11.0.0}/up04.sql | 0 schema/crdb/{10.0.0 => 11.0.0}/up05.sql | 0 schema/crdb/{10.0.0 => 11.0.0}/up06.sql | 0 schema/crdb/{10.0.0 => 11.0.0}/up07.sql | 0 schema/crdb/{10.0.0 => 11.0.0}/up08.sql | 0 schema/crdb/{10.0.0 => 11.0.0}/up09.sql | 0 schema/crdb/11.0.0/up1.sql | 1 + schema/crdb/{10.0.0 => 11.0.0}/up10.sql | 0 schema/crdb/{10.0.0 => 11.0.0}/up11.sql | 0 schema/crdb/{10.0.0 => 11.0.0}/up12.sql | 0 schema/crdb/11.0.0/up2.sql | 13 + schema/crdb/11.0.0/up3.sql | 13 + schema/crdb/11.0.0/up4.sql | 5 + schema/crdb/11.0.0/up5.sql | 1 + schema/crdb/11.0.0/up6.sql | 13 + schema/crdb/dbinit.sql | 80 +++- smf/nexus/multi-sled/config-partial.toml | 1 + smf/nexus/single-sled/config-partial.toml | 1 + tools/dendrite_openapi_version | 4 +- tools/dendrite_stub_checksums | 6 +- 44 files changed, 1168 insertions(+), 113 deletions(-) create mode 100644 nexus/db-model/src/ipv4_nat_entry.rs create mode 100644 nexus/db-queries/src/db/datastore/ipv4_nat_entry.rs create mode 100644 nexus/src/app/background/nat_cleanup.rs rename schema/crdb/{10.0.0 => 11.0.0}/README.md (100%) rename schema/crdb/{10.0.0 => 11.0.0}/up01.sql (100%) rename schema/crdb/{10.0.0 => 11.0.0}/up02.sql (100%) rename schema/crdb/{10.0.0 => 11.0.0}/up03.sql (100%) rename schema/crdb/{10.0.0 => 11.0.0}/up04.sql (100%) rename schema/crdb/{10.0.0 => 11.0.0}/up05.sql (100%) rename schema/crdb/{10.0.0 => 11.0.0}/up06.sql (100%) rename schema/crdb/{10.0.0 => 11.0.0}/up07.sql (100%) rename schema/crdb/{10.0.0 => 11.0.0}/up08.sql (100%) rename schema/crdb/{10.0.0 => 11.0.0}/up09.sql (100%) create mode 100644 schema/crdb/11.0.0/up1.sql rename schema/crdb/{10.0.0 => 11.0.0}/up10.sql (100%) rename schema/crdb/{10.0.0 => 11.0.0}/up11.sql (100%) rename schema/crdb/{10.0.0 => 11.0.0}/up12.sql (100%) create mode 100644 schema/crdb/11.0.0/up2.sql create mode 100644 schema/crdb/11.0.0/up3.sql create mode 100644 schema/crdb/11.0.0/up4.sql create mode 100644 schema/crdb/11.0.0/up5.sql create mode 100644 schema/crdb/11.0.0/up6.sql diff --git a/common/src/api/external/mod.rs b/common/src/api/external/mod.rs index fcea57220d..adf661516a 100644 --- a/common/src/api/external/mod.rs +++ b/common/src/api/external/mod.rs @@ -750,6 +750,7 @@ pub enum ResourceType { UserBuiltin, Zpool, Vmm, + Ipv4NatEntry, } // IDENTITY METADATA diff --git a/common/src/nexus_config.rs b/common/src/nexus_config.rs index 4e821e2676..94c39b4436 100644 --- a/common/src/nexus_config.rs +++ b/common/src/nexus_config.rs @@ -335,6 +335,8 @@ pub struct BackgroundTaskConfig { pub dns_external: DnsTasksConfig, /// configuration for external endpoint list watcher pub external_endpoints: ExternalEndpointsConfig, + /// configuration for nat table garbage collector + pub nat_cleanup: NatCleanupConfig, /// configuration for inventory tasks pub inventory: InventoryConfig, } @@ -371,6 +373,14 @@ pub struct ExternalEndpointsConfig { // allow/disallow wildcard certs, don't serve expired certs, etc.) } +#[serde_as] +#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)] +pub struct NatCleanupConfig { + /// period (in seconds) for periodic activations of this background task + #[serde_as(as = "DurationSeconds")] + pub period_secs: Duration, +} + #[serde_as] #[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)] pub struct InventoryConfig { @@ -498,7 +508,7 @@ mod test { BackgroundTaskConfig, Config, ConfigDropshotWithTls, ConsoleConfig, Database, DeploymentConfig, DnsTasksConfig, DpdConfig, ExternalEndpointsConfig, InternalDns, InventoryConfig, LoadError, - LoadErrorKind, MgdConfig, PackageConfig, SchemeName, + LoadErrorKind, MgdConfig, NatCleanupConfig, PackageConfig, SchemeName, TimeseriesDbConfig, Tunables, UpdatesConfig, }; use crate::address::{Ipv6Subnet, RACK_PREFIX}; @@ -649,6 +659,7 @@ mod test { dns_external.period_secs_propagation = 7 dns_external.max_concurrent_server_updates = 8 external_endpoints.period_secs = 9 + nat_cleanup.period_secs = 30 inventory.period_secs = 10 inventory.nkeep = 11 inventory.disable = false @@ -746,6 +757,9 @@ mod test { external_endpoints: ExternalEndpointsConfig { period_secs: Duration::from_secs(9), }, + nat_cleanup: NatCleanupConfig { + period_secs: Duration::from_secs(30), + }, inventory: InventoryConfig { period_secs: Duration::from_secs(10), nkeep: 11, @@ -804,6 +818,7 @@ mod test { dns_external.period_secs_propagation = 7 dns_external.max_concurrent_server_updates = 8 external_endpoints.period_secs = 9 + nat_cleanup.period_secs = 30 inventory.period_secs = 10 inventory.nkeep = 3 inventory.disable = false diff --git a/dev-tools/omdb/src/bin/omdb/nexus.rs b/dev-tools/omdb/src/bin/omdb/nexus.rs index 128d4315f2..9f91d38504 100644 --- a/dev-tools/omdb/src/bin/omdb/nexus.rs +++ b/dev-tools/omdb/src/bin/omdb/nexus.rs @@ -159,6 +159,7 @@ async fn cmd_nexus_background_tasks_show( "dns_config_external", "dns_servers_external", "dns_propagation_external", + "nat_v4_garbage_collector", ] { if let Some(bgtask) = tasks.remove(name) { print_task(&bgtask); diff --git a/dev-tools/omdb/tests/env.out b/dev-tools/omdb/tests/env.out index 7949c1eb61..fd50d80c81 100644 --- a/dev-tools/omdb/tests/env.out +++ b/dev-tools/omdb/tests/env.out @@ -61,6 +61,11 @@ task: "inventory_collection" collects hardware and software inventory data from the whole system +task: "nat_v4_garbage_collector" + prunes soft-deleted IPV4 NAT entries from ipv4_nat_entry table based on a + predetermined retention policy + + --------------------------------------------- stderr: note: using Nexus URL http://127.0.0.1:REDACTED_PORT @@ -121,6 +126,11 @@ task: "inventory_collection" collects hardware and software inventory data from the whole system +task: "nat_v4_garbage_collector" + prunes soft-deleted IPV4 NAT entries from ipv4_nat_entry table based on a + predetermined retention policy + + --------------------------------------------- stderr: note: Nexus URL not specified. Will pick one from DNS. @@ -168,6 +178,11 @@ task: "inventory_collection" collects hardware and software inventory data from the whole system +task: "nat_v4_garbage_collector" + prunes soft-deleted IPV4 NAT entries from ipv4_nat_entry table based on a + predetermined retention policy + + --------------------------------------------- stderr: note: Nexus URL not specified. Will pick one from DNS. diff --git a/dev-tools/omdb/tests/successes.out b/dev-tools/omdb/tests/successes.out index 8162b6d9de..6bc3a85e8a 100644 --- a/dev-tools/omdb/tests/successes.out +++ b/dev-tools/omdb/tests/successes.out @@ -255,6 +255,11 @@ task: "inventory_collection" collects hardware and software inventory data from the whole system +task: "nat_v4_garbage_collector" + prunes soft-deleted IPV4 NAT entries from ipv4_nat_entry table based on a + predetermined retention policy + + --------------------------------------------- stderr: note: using Nexus URL http://127.0.0.1:REDACTED_PORT/ @@ -319,6 +324,13 @@ task: "dns_propagation_external" [::1]:REDACTED_PORT success +task: "nat_v4_garbage_collector" + configured period: every 30s + currently executing: no + last completed activation: iter 2, triggered by an explicit signal + started at (s ago) and ran for ms +warning: unknown background task: "nat_v4_garbage_collector" (don't know how to interpret details: Null) + task: "external_endpoints" configured period: every 1m currently executing: no diff --git a/nexus/db-model/src/ipv4_nat_entry.rs b/nexus/db-model/src/ipv4_nat_entry.rs new file mode 100644 index 0000000000..570a46b5e9 --- /dev/null +++ b/nexus/db-model/src/ipv4_nat_entry.rs @@ -0,0 +1,81 @@ +use std::net::{Ipv4Addr, Ipv6Addr}; + +use super::MacAddr; +use crate::{schema::ipv4_nat_entry, Ipv4Net, Ipv6Net, SqlU16, Vni}; +use chrono::{DateTime, Utc}; +use omicron_common::api::external; +use schemars::JsonSchema; +use serde::Serialize; +use uuid::Uuid; + +/// Values used to create an Ipv4NatEntry +#[derive(Insertable, Debug, Clone)] +#[diesel(table_name = ipv4_nat_entry)] +pub struct Ipv4NatValues { + pub external_address: Ipv4Net, + pub first_port: SqlU16, + pub last_port: SqlU16, + pub sled_address: Ipv6Net, + pub vni: Vni, + pub mac: MacAddr, +} + +/// Database representation of an Ipv4 NAT Entry. +#[derive(Queryable, Debug, Clone, Selectable)] +#[diesel(table_name = ipv4_nat_entry)] +pub struct Ipv4NatEntry { + pub id: Uuid, + pub external_address: Ipv4Net, + pub first_port: SqlU16, + pub last_port: SqlU16, + pub sled_address: Ipv6Net, + pub vni: Vni, + pub mac: MacAddr, + pub version_added: i64, + pub version_removed: Option, + pub time_created: DateTime, + pub time_deleted: Option>, +} + +impl Ipv4NatEntry { + pub fn first_port(&self) -> u16 { + self.first_port.into() + } + + pub fn last_port(&self) -> u16 { + self.last_port.into() + } +} + +/// NAT Record +#[derive(Clone, Debug, Serialize, JsonSchema)] +pub struct Ipv4NatEntryView { + pub external_address: Ipv4Addr, + pub first_port: u16, + pub last_port: u16, + pub sled_address: Ipv6Addr, + pub vni: external::Vni, + pub mac: external::MacAddr, + pub gen: i64, + pub deleted: bool, +} + +impl From for Ipv4NatEntryView { + fn from(value: Ipv4NatEntry) -> Self { + let (gen, deleted) = match value.version_removed { + Some(gen) => (gen, true), + None => (value.version_added, false), + }; + + Self { + external_address: value.external_address.ip(), + first_port: value.first_port(), + last_port: value.last_port(), + sled_address: value.sled_address.ip(), + vni: value.vni.0, + mac: *value.mac, + gen, + deleted, + } + } +} diff --git a/nexus/db-model/src/lib.rs b/nexus/db-model/src/lib.rs index 7aa8a6b076..6b65eb87ec 100644 --- a/nexus/db-model/src/lib.rs +++ b/nexus/db-model/src/lib.rs @@ -53,6 +53,7 @@ mod system_update; // These actually represent subqueries, not real table. // However, they must be defined in the same crate as our tables // for join-based marker trait generation. +mod ipv4_nat_entry; pub mod queries; mod rack; mod region; @@ -124,6 +125,7 @@ pub use instance_cpu_count::*; pub use instance_state::*; pub use inventory::*; pub use ip_pool::*; +pub use ipv4_nat_entry::*; pub use ipv4net::*; pub use ipv6::*; pub use ipv6net::*; diff --git a/nexus/db-model/src/schema.rs b/nexus/db-model/src/schema.rs index 7c6b8bbd0a..4844f2a33f 100644 --- a/nexus/db-model/src/schema.rs +++ b/nexus/db-model/src/schema.rs @@ -489,6 +489,32 @@ table! { } } +table! { + ipv4_nat_entry (id) { + id -> Uuid, + external_address -> Inet, + first_port -> Int4, + last_port -> Int4, + sled_address -> Inet, + vni -> Int4, + mac -> Int8, + version_added -> Int8, + version_removed -> Nullable, + time_created -> Timestamptz, + time_deleted -> Nullable, + } +} + +// This is the sequence used for the version number +// in ipv4_nat_entry. +table! { + ipv4_nat_version (last_value) { + last_value -> Int8, + log_cnt -> Int8, + is_called -> Bool, + } +} + table! { external_ip (id) { id -> Uuid, @@ -1243,7 +1269,7 @@ table! { /// /// This should be updated whenever the schema is changed. For more details, /// refer to: schema/crdb/README.adoc -pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(10, 0, 0); +pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(11, 0, 0); allow_tables_to_appear_in_same_query!( system_update, diff --git a/nexus/db-queries/src/db/datastore/ipv4_nat_entry.rs b/nexus/db-queries/src/db/datastore/ipv4_nat_entry.rs new file mode 100644 index 0000000000..274937b299 --- /dev/null +++ b/nexus/db-queries/src/db/datastore/ipv4_nat_entry.rs @@ -0,0 +1,440 @@ +use super::DataStore; +use crate::context::OpContext; +use crate::db; +use crate::db::error::public_error_from_diesel; +use crate::db::error::ErrorHandler; +use crate::db::model::{Ipv4NatEntry, Ipv4NatValues}; +use async_bb8_diesel::AsyncRunQueryDsl; +use chrono::{DateTime, Utc}; +use diesel::prelude::*; +use diesel::sql_types::BigInt; +use nexus_db_model::ExternalIp; +use nexus_db_model::Ipv4NatEntryView; +use omicron_common::api::external::CreateResult; +use omicron_common::api::external::DeleteResult; +use omicron_common::api::external::Error; +use omicron_common::api::external::ListResultVec; +use omicron_common::api::external::LookupResult; +use omicron_common::api::external::LookupType; +use omicron_common::api::external::ResourceType; + +impl DataStore { + pub async fn ensure_ipv4_nat_entry( + &self, + opctx: &OpContext, + nat_entry: Ipv4NatValues, + ) -> CreateResult<()> { + use db::schema::ipv4_nat_entry::dsl; + use diesel::sql_types; + + // Look up any NAT entries that already have the exact parameters + // we're trying to INSERT. + let matching_entry_subquery = dsl::ipv4_nat_entry + .filter(dsl::external_address.eq(nat_entry.external_address)) + .filter(dsl::first_port.eq(nat_entry.first_port)) + .filter(dsl::last_port.eq(nat_entry.last_port)) + .filter(dsl::sled_address.eq(nat_entry.sled_address)) + .filter(dsl::vni.eq(nat_entry.vni)) + .filter(dsl::mac.eq(nat_entry.mac)) + .select(( + dsl::external_address, + dsl::first_port, + dsl::last_port, + dsl::sled_address, + dsl::vni, + dsl::mac, + )); + + // SELECT exactly the values we're trying to INSERT, but only + // if it does not already exist. + let new_entry_subquery = diesel::dsl::select(( + nat_entry.external_address.into_sql::(), + nat_entry.first_port.into_sql::(), + nat_entry.last_port.into_sql::(), + nat_entry.sled_address.into_sql::(), + nat_entry.vni.into_sql::(), + nat_entry.mac.into_sql::(), + )) + .filter(diesel::dsl::not(diesel::dsl::exists(matching_entry_subquery))); + + diesel::insert_into(dsl::ipv4_nat_entry) + .values(new_entry_subquery) + .into_columns(( + dsl::external_address, + dsl::first_port, + dsl::last_port, + dsl::sled_address, + dsl::vni, + dsl::mac, + )) + .execute_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + + Ok(()) + } + + pub async fn ipv4_nat_delete( + &self, + opctx: &OpContext, + nat_entry: &Ipv4NatEntry, + ) -> DeleteResult { + use db::schema::ipv4_nat_entry::dsl; + + let updated_rows = diesel::update(dsl::ipv4_nat_entry) + .set(( + dsl::version_removed.eq(ipv4_nat_next_version().nullable()), + dsl::time_deleted.eq(Utc::now()), + )) + .filter(dsl::time_deleted.is_null()) + .filter(dsl::version_removed.is_null()) + .filter(dsl::id.eq(nat_entry.id)) + .filter(dsl::version_added.eq(nat_entry.version_added)) + .execute_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + + if updated_rows == 0 { + return Err(Error::ObjectNotFound { + type_name: ResourceType::Ipv4NatEntry, + lookup_type: LookupType::ByCompositeId( + "id, version_added".to_string(), + ), + }); + } + Ok(()) + } + + pub async fn ipv4_nat_find_by_id( + &self, + opctx: &OpContext, + id: uuid::Uuid, + ) -> LookupResult { + use db::schema::ipv4_nat_entry::dsl; + + let result = dsl::ipv4_nat_entry + .filter(dsl::id.eq(id)) + .select(Ipv4NatEntry::as_select()) + .limit(1) + .load_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + + if let Some(nat_entry) = result.first() { + Ok(nat_entry.clone()) + } else { + Err(Error::InvalidRequest { + message: "no matching records".to_string(), + }) + } + } + + pub async fn ipv4_nat_delete_by_external_ip( + &self, + opctx: &OpContext, + external_ip: &ExternalIp, + ) -> DeleteResult { + use db::schema::ipv4_nat_entry::dsl; + + let updated_rows = diesel::update(dsl::ipv4_nat_entry) + .set(( + dsl::version_removed.eq(ipv4_nat_next_version().nullable()), + dsl::time_deleted.eq(Utc::now()), + )) + .filter(dsl::time_deleted.is_null()) + .filter(dsl::version_removed.is_null()) + .filter(dsl::external_address.eq(external_ip.ip)) + .filter(dsl::first_port.eq(external_ip.first_port)) + .filter(dsl::last_port.eq(external_ip.last_port)) + .execute_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + + if updated_rows == 0 { + return Err(Error::ObjectNotFound { + type_name: ResourceType::Ipv4NatEntry, + lookup_type: LookupType::ByCompositeId( + "external_ip, first_port, last_port".to_string(), + ), + }); + } + Ok(()) + } + + pub async fn ipv4_nat_find_by_values( + &self, + opctx: &OpContext, + values: Ipv4NatValues, + ) -> LookupResult { + use db::schema::ipv4_nat_entry::dsl; + let result = dsl::ipv4_nat_entry + .filter(dsl::external_address.eq(values.external_address)) + .filter(dsl::first_port.eq(values.first_port)) + .filter(dsl::last_port.eq(values.last_port)) + .filter(dsl::mac.eq(values.mac)) + .filter(dsl::sled_address.eq(values.sled_address)) + .filter(dsl::vni.eq(values.vni)) + .filter(dsl::time_deleted.is_null()) + .select(Ipv4NatEntry::as_select()) + .limit(1) + .load_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + + if let Some(nat_entry) = result.first() { + Ok(nat_entry.clone()) + } else { + Err(Error::InvalidRequest { + message: "no matching records".to_string(), + }) + } + } + + pub async fn ipv4_nat_list_since_version( + &self, + opctx: &OpContext, + version: i64, + limit: u32, + ) -> ListResultVec { + use db::schema::ipv4_nat_entry::dsl; + + let list = dsl::ipv4_nat_entry + .filter( + dsl::version_added + .gt(version) + .or(dsl::version_removed.gt(version)), + ) + .limit(limit as i64) + .select(Ipv4NatEntry::as_select()) + .load_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + + Ok(list) + } + + pub async fn ipv4_nat_changeset( + &self, + opctx: &OpContext, + version: i64, + limit: u32, + ) -> ListResultVec { + let nat_entries = + self.ipv4_nat_list_since_version(opctx, version, limit).await?; + let nat_entries: Vec = + nat_entries.iter().map(|e| e.clone().into()).collect(); + Ok(nat_entries) + } + + pub async fn ipv4_nat_current_version( + &self, + opctx: &OpContext, + ) -> LookupResult { + use db::schema::ipv4_nat_version::dsl; + + let latest: Option = dsl::ipv4_nat_version + .select(diesel::dsl::max(dsl::last_value)) + .first_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + + match latest { + Some(value) => Ok(value), + None => Err(Error::InvalidRequest { + message: "sequence table is empty!".to_string(), + }), + } + } + + pub async fn ipv4_nat_cleanup( + &self, + opctx: &OpContext, + version: i64, + before_timestamp: DateTime, + ) -> DeleteResult { + use db::schema::ipv4_nat_entry::dsl; + + diesel::delete(dsl::ipv4_nat_entry) + .filter(dsl::version_removed.lt(version)) + .filter(dsl::time_deleted.lt(before_timestamp)) + .execute_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + + Ok(()) + } +} + +fn ipv4_nat_next_version() -> diesel::expression::SqlLiteral { + diesel::dsl::sql::("nextval('omicron.public.ipv4_nat_version')") +} + +#[cfg(test)] +mod test { + use std::str::FromStr; + + use crate::db::datastore::datastore_test; + use chrono::Utc; + use nexus_db_model::{Ipv4NatValues, MacAddr, Vni}; + use nexus_test_utils::db::test_setup_database; + use omicron_common::api::external; + use omicron_test_utils::dev; + + // Test our ability to track additions and deletions since a given version number + #[tokio::test] + async fn nat_version_tracking() { + let logctx = dev::test_setup_log("test_nat_version_tracking"); + let mut db = test_setup_database(&logctx.log).await; + let (opctx, datastore) = datastore_test(&logctx, &db).await; + + // We should not have any NAT entries at this moment + let initial_state = + datastore.ipv4_nat_list_since_version(&opctx, 0, 10).await.unwrap(); + + assert!(initial_state.is_empty()); + assert_eq!( + datastore.ipv4_nat_current_version(&opctx).await.unwrap(), + 0 + ); + + // Each change (creation / deletion) to the NAT table should increment the + // version number of the row in the NAT table + let external_address = external::Ipv4Net( + ipnetwork::Ipv4Network::try_from("10.0.0.100").unwrap(), + ); + + let sled_address = external::Ipv6Net( + ipnetwork::Ipv6Network::try_from("fd00:1122:3344:104::1").unwrap(), + ); + + // Add a nat entry. + let nat1 = Ipv4NatValues { + external_address: external_address.into(), + first_port: 0.into(), + last_port: 999.into(), + sled_address: sled_address.into(), + vni: Vni(external::Vni::random()), + mac: MacAddr( + external::MacAddr::from_str("A8:40:25:F5:EB:2A").unwrap(), + ), + }; + + datastore.ensure_ipv4_nat_entry(&opctx, nat1.clone()).await.unwrap(); + let first_entry = + datastore.ipv4_nat_find_by_values(&opctx, nat1).await.unwrap(); + + let nat_entries = + datastore.ipv4_nat_list_since_version(&opctx, 0, 10).await.unwrap(); + + // The NAT table has undergone one change. One entry has been added, + // none deleted, so we should be at version 1. + assert_eq!(nat_entries.len(), 1); + assert_eq!(nat_entries.last().unwrap().version_added, 1); + assert_eq!( + datastore.ipv4_nat_current_version(&opctx).await.unwrap(), + 1 + ); + + // Add another nat entry. + let nat2 = Ipv4NatValues { + external_address: external_address.into(), + first_port: 1000.into(), + last_port: 1999.into(), + sled_address: sled_address.into(), + vni: Vni(external::Vni::random()), + mac: MacAddr( + external::MacAddr::from_str("A8:40:25:F5:EB:2B").unwrap(), + ), + }; + + datastore.ensure_ipv4_nat_entry(&opctx, nat2).await.unwrap(); + + let nat_entries = + datastore.ipv4_nat_list_since_version(&opctx, 0, 10).await.unwrap(); + + // The NAT table has undergone two changes. Two entries have been + // added, none deleted, so we should be at version 2. + let nat_entry = + nat_entries.iter().find(|e| e.version_added == 2).unwrap(); + assert_eq!(nat_entries.len(), 2); + assert_eq!(nat_entry.version_added, 2); + assert_eq!( + datastore.ipv4_nat_current_version(&opctx).await.unwrap(), + 2 + ); + + // Test Cleanup logic + // Cleanup should only perma-delete entries that are older than a + // specified version number and whose `time_deleted` field is + // older than a specified age. + let time_cutoff = Utc::now(); + datastore.ipv4_nat_cleanup(&opctx, 2, time_cutoff).await.unwrap(); + + // Nothing should have changed (no records currently marked for deletion) + let nat_entries = + datastore.ipv4_nat_list_since_version(&opctx, 0, 10).await.unwrap(); + + assert_eq!(nat_entries.len(), 2); + assert_eq!( + datastore.ipv4_nat_current_version(&opctx).await.unwrap(), + 2 + ); + + // Delete the first nat entry. It should show up as a later version number. + datastore.ipv4_nat_delete(&opctx, &first_entry).await.unwrap(); + let nat_entries = + datastore.ipv4_nat_list_since_version(&opctx, 0, 10).await.unwrap(); + + // The NAT table has undergone three changes. Two entries have been + // added, one deleted, so we should be at version 3. Since the + // first entry was marked for deletion (and it was the third change), + // the first entry's version number should now be 3. + let nat_entry = + nat_entries.iter().find(|e| e.version_removed.is_some()).unwrap(); + assert_eq!(nat_entries.len(), 2); + assert_eq!(nat_entry.version_removed, Some(3)); + assert_eq!(nat_entry.id, first_entry.id); + assert_eq!( + datastore.ipv4_nat_current_version(&opctx).await.unwrap(), + 3 + ); + + // Try cleaning up with the old version and time cutoff values + datastore.ipv4_nat_cleanup(&opctx, 2, time_cutoff).await.unwrap(); + + // Try cleaning up with a greater version and old time cutoff values + datastore.ipv4_nat_cleanup(&opctx, 6, time_cutoff).await.unwrap(); + + // Try cleaning up with a older version and newer time cutoff values + datastore.ipv4_nat_cleanup(&opctx, 2, Utc::now()).await.unwrap(); + + // Both records should still exist (soft deleted record is newer than cutoff + // values ) + let nat_entries = + datastore.ipv4_nat_list_since_version(&opctx, 0, 10).await.unwrap(); + + assert_eq!(nat_entries.len(), 2); + assert_eq!( + datastore.ipv4_nat_current_version(&opctx).await.unwrap(), + 3 + ); + + // Try cleaning up with a both cutoff values increased + datastore.ipv4_nat_cleanup(&opctx, 4, Utc::now()).await.unwrap(); + + // Soft deleted NAT entry should be removed from the table + let nat_entries = + datastore.ipv4_nat_list_since_version(&opctx, 0, 10).await.unwrap(); + + assert_eq!(nat_entries.len(), 1); + + // version should be unchanged + assert_eq!( + datastore.ipv4_nat_current_version(&opctx).await.unwrap(), + 3 + ); + + db.cleanup().await.unwrap(); + logctx.cleanup_successful(); + } +} diff --git a/nexus/db-queries/src/db/datastore/mod.rs b/nexus/db-queries/src/db/datastore/mod.rs index 91373f6875..7385970fb1 100644 --- a/nexus/db-queries/src/db/datastore/mod.rs +++ b/nexus/db-queries/src/db/datastore/mod.rs @@ -63,6 +63,7 @@ mod image; mod instance; mod inventory; mod ip_pool; +mod ipv4_nat_entry; mod network_interface; mod oximeter; mod physical_disk; diff --git a/nexus/examples/config.toml b/nexus/examples/config.toml index efc9aa9c27..3679fa8196 100644 --- a/nexus/examples/config.toml +++ b/nexus/examples/config.toml @@ -92,6 +92,7 @@ dns_external.max_concurrent_server_updates = 5 # certificates it will take _other_ Nexus instances to notice and stop serving # them (on a sunny day). external_endpoints.period_secs = 60 +nat_cleanup.period_secs = 30 # How frequently to collect hardware/software inventory from the whole system # (even if we don't have reason to believe anything has changed). inventory.period_secs = 600 diff --git a/nexus/src/app/background/init.rs b/nexus/src/app/background/init.rs index b000dd9bda..d27248ffdc 100644 --- a/nexus/src/app/background/init.rs +++ b/nexus/src/app/background/init.rs @@ -10,12 +10,15 @@ use super::dns_propagation; use super::dns_servers; use super::external_endpoints; use super::inventory_collection; +use super::nat_cleanup; use nexus_db_model::DnsGroup; use nexus_db_queries::context::OpContext; use nexus_db_queries::db::DataStore; +use omicron_common::api::internal::shared::SwitchLocation; use omicron_common::nexus_config::BackgroundTaskConfig; use omicron_common::nexus_config::DnsTasksConfig; use std::collections::BTreeMap; +use std::collections::HashMap; use std::sync::Arc; use uuid::Uuid; @@ -44,6 +47,8 @@ pub struct BackgroundTasks { pub external_endpoints: tokio::sync::watch::Receiver< Option, >, + /// task handle for the ipv4 nat entry garbage collector + pub nat_cleanup: common::TaskHandle, /// task handle for the task that collects inventory pub task_inventory_collection: common::TaskHandle, @@ -55,6 +60,7 @@ impl BackgroundTasks { opctx: &OpContext, datastore: Arc, config: &BackgroundTaskConfig, + dpd_clients: &HashMap>, nexus_id: Uuid, resolver: internal_dns::resolver::Resolver, ) -> BackgroundTasks { @@ -96,6 +102,23 @@ impl BackgroundTasks { (task, watcher_channel) }; + let nat_cleanup = { + driver.register( + "nat_v4_garbage_collector".to_string(), + String::from( + "prunes soft-deleted IPV4 NAT entries from ipv4_nat_entry table \ + based on a predetermined retention policy", + ), + config.nat_cleanup.period_secs, + Box::new(nat_cleanup::Ipv4NatGarbageCollector::new( + datastore.clone(), + dpd_clients.values().map(|client| client.clone()).collect(), + )), + opctx.child(BTreeMap::new()), + vec![], + ) + }; + // Background task: inventory collector let task_inventory_collection = { let collector = inventory_collection::InventoryCollector::new( @@ -128,6 +151,7 @@ impl BackgroundTasks { task_external_dns_servers, task_external_endpoints, external_endpoints, + nat_cleanup, task_inventory_collection, } } diff --git a/nexus/src/app/background/mod.rs b/nexus/src/app/background/mod.rs index e1f474b41a..954207cb3c 100644 --- a/nexus/src/app/background/mod.rs +++ b/nexus/src/app/background/mod.rs @@ -11,6 +11,7 @@ mod dns_servers; mod external_endpoints; mod init; mod inventory_collection; +mod nat_cleanup; mod status; pub use common::Driver; diff --git a/nexus/src/app/background/nat_cleanup.rs b/nexus/src/app/background/nat_cleanup.rs new file mode 100644 index 0000000000..1691d96a4b --- /dev/null +++ b/nexus/src/app/background/nat_cleanup.rs @@ -0,0 +1,111 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Background task for garbage collecting ipv4_nat_entry table. +//! Responsible for cleaning up soft deleted entries once they +//! have been propagated to running dpd instances. + +use super::common::BackgroundTask; +use chrono::{Duration, Utc}; +use futures::future::BoxFuture; +use futures::FutureExt; +use nexus_db_queries::context::OpContext; +use nexus_db_queries::db::DataStore; +use serde_json::json; +use std::sync::Arc; + +/// Background task that periodically prunes soft-deleted entries +/// from ipv4_nat_entry table +pub struct Ipv4NatGarbageCollector { + datastore: Arc, + dpd_clients: Vec>, +} + +impl Ipv4NatGarbageCollector { + pub fn new( + datastore: Arc, + dpd_clients: Vec>, + ) -> Ipv4NatGarbageCollector { + Ipv4NatGarbageCollector { datastore, dpd_clients } + } +} + +impl BackgroundTask for Ipv4NatGarbageCollector { + fn activate<'a, 'b, 'c>( + &'a mut self, + opctx: &'b OpContext, + ) -> BoxFuture<'c, serde_json::Value> + where + 'a: 'c, + 'b: 'c, + { + async { + let log = &opctx.log; + + let result = self.datastore.ipv4_nat_current_version(opctx).await; + + let mut min_gen = match result { + Ok(gen) => gen, + Err(error) => { + warn!( + &log, + "failed to read generation of database"; + "error" => format!("{:#}", error) + ); + return json!({ + "error": + format!( + "failed to read generation of database: \ + {:#}", + error + ) + }); + } + }; + + for client in &self.dpd_clients { + let response = client.ipv4_nat_generation().await; + match response { + Ok(gen) => min_gen = std::cmp::min(min_gen, *gen), + Err(error) => { + warn!( + &log, + "failed to read generation of dpd"; + "error" => format!("{:#}", error) + ); + return json!({ + "error": + format!( + "failed to read generation of dpd: \ + {:#}", + error + ) + }); + } + } + } + + let retention_threshold = Utc::now() - Duration::weeks(2); + + let result = self + .datastore + .ipv4_nat_cleanup(opctx, min_gen, retention_threshold) + .await + .unwrap(); + + let rv = serde_json::to_value(&result).unwrap_or_else(|error| { + json!({ + "error": + format!( + "failed to serialize final value: {:#}", + error + ) + }) + }); + + rv + } + .boxed() + } +} diff --git a/nexus/src/app/instance_network.rs b/nexus/src/app/instance_network.rs index 0f52cbd260..abb8c744e1 100644 --- a/nexus/src/app/instance_network.rs +++ b/nexus/src/app/instance_network.rs @@ -5,6 +5,10 @@ //! Routines that manage instance-related networking state. use crate::app::sagas::retry_until_known_result; +use ipnetwork::IpNetwork; +use ipnetwork::Ipv6Network; +use nexus_db_model::Ipv4NatValues; +use nexus_db_model::Vni as DbVni; use nexus_db_queries::authz; use nexus_db_queries::context::OpContext; use nexus_db_queries::db; @@ -12,6 +16,8 @@ use nexus_db_queries::db::identity::Asset; use nexus_db_queries::db::lookup::LookupPath; use omicron_common::api::external::DataPageParams; use omicron_common::api::external::Error; +use omicron_common::api::external::Ipv4Net; +use omicron_common::api::external::Ipv6Net; use omicron_common::api::internal::nexus; use omicron_common::api::internal::shared::SwitchLocation; use sled_agent_client::types::DeleteVirtualNetworkInterfaceHost; @@ -330,8 +336,6 @@ impl super::Nexus { )) })?; - let vni: u32 = network_interface.vni.into(); - info!(log, "looking up instance's external IPs"; "instance_id" => %instance_id); @@ -349,6 +353,9 @@ impl super::Nexus { } } + let sled_address = + Ipv6Net(Ipv6Network::new(*sled_ip_address.ip(), 128).unwrap()); + for target_ip in ips .iter() .enumerate() @@ -361,29 +368,58 @@ impl super::Nexus { }) .map(|(_, ip)| ip) { - retry_until_known_result(log, || async { - dpd_client - .ensure_nat_entry( - &log, - target_ip.ip, - dpd_client::types::MacAddr { - a: mac_address.into_array(), - }, - *target_ip.first_port, - *target_ip.last_port, - vni, - sled_ip_address.ip(), - ) - .await - }) - .await - .map_err(|e| { - Error::internal_error(&format!( - "failed to ensure dpd entry: {e}" - )) - })?; + // For each external ip, add a nat entry to the database + self.ensure_nat_entry( + target_ip, + sled_address, + &network_interface, + mac_address, + opctx, + ) + .await?; } + // Notify dendrite that there are changes for it to reconcile. + // In the event of a failure to notify dendrite, we'll log an error + // and rely on dendrite's RPW timer to catch it up. + if let Err(e) = dpd_client.ipv4_nat_trigger_update().await { + error!(self.log, "failed to notify dendrite of nat updates"; "error" => ?e); + }; + + Ok(()) + } + + async fn ensure_nat_entry( + &self, + target_ip: &nexus_db_model::ExternalIp, + sled_address: Ipv6Net, + network_interface: &sled_agent_client::types::NetworkInterface, + mac_address: macaddr::MacAddr6, + opctx: &OpContext, + ) -> Result<(), Error> { + match target_ip.ip { + IpNetwork::V4(v4net) => { + let nat_entry = Ipv4NatValues { + external_address: Ipv4Net(v4net).into(), + first_port: target_ip.first_port, + last_port: target_ip.last_port, + sled_address: sled_address.into(), + vni: DbVni(network_interface.vni.clone().into()), + mac: nexus_db_model::MacAddr( + omicron_common::api::external::MacAddr(mac_address), + ), + }; + self.db_datastore + .ensure_ipv4_nat_entry(opctx, nat_entry) + .await?; + } + IpNetwork::V6(_v6net) => { + // TODO: implement handling of v6 nat. + return Err(Error::InternalError { + internal_message: "ipv6 nat is not yet implemented".into(), + }); + } + }; Ok(()) } @@ -419,55 +455,54 @@ impl super::Nexus { let mut errors = vec![]; for entry in external_ips { - for switch in &boundary_switches { - debug!(log, "deleting instance nat mapping"; - "instance_id" => %instance_id, - "switch" => switch.to_string(), - "entry" => #?entry); - - let client_result = - self.dpd_clients.get(switch).ok_or_else(|| { - Error::internal_error(&format!( - "unable to find dendrite client for {switch}" - )) - }); - - let dpd_client = match client_result { - Ok(client) => client, - Err(new_error) => { - errors.push(new_error); - continue; + // Soft delete the NAT entry + match self + .db_datastore + .ipv4_nat_delete_by_external_ip(&opctx, &entry) + .await + { + Ok(_) => Ok(()), + Err(err) => match err { + Error::ObjectNotFound { .. } => { + warn!(log, "no matching nat entries to soft delete"); + Ok(()) } - }; + _ => { + let message = format!( + "failed to delete nat entry due to error: {err:?}" + ); + error!(log, "{}", message); + Err(Error::internal_error(&message)) + } + }, + }?; + } - let result = retry_until_known_result(log, || async { - dpd_client - .ensure_nat_entry_deleted( - log, - entry.ip, - *entry.first_port, - ) - .await - }) - .await; - - if let Err(e) = result { - let e = Error::internal_error(&format!( - "failed to delete nat entry via dpd: {e}" - )); - - error!(log, "error deleting nat mapping: {e:#?}"; - "instance_id" => %instance_id, - "switch" => switch.to_string(), - "entry" => #?entry); - errors.push(e); - } else { - debug!(log, "deleting nat mapping successful"; - "instance_id" => %instance_id, - "switch" => switch.to_string(), - "entry" => #?entry); + for switch in &boundary_switches { + debug!(&self.log, "notifying dendrite of updates"; + "instance_id" => %authz_instance.id(), + "switch" => switch.to_string()); + + let client_result = self.dpd_clients.get(switch).ok_or_else(|| { + Error::internal_error(&format!( + "unable to find dendrite client for {switch}" + )) + }); + + let dpd_client = match client_result { + Ok(client) => client, + Err(new_error) => { + errors.push(new_error); + continue; } - } + }; + + // Notify dendrite that there are changes for it to reconcile. + // In the event of a failure to notify dendrite, we'll log an error + // and rely on dendrite's RPW timer to catch it up. + if let Err(e) = dpd_client.ipv4_nat_trigger_update().await { + error!(self.log, "failed to notify dendrite of nat updates"; "error" => ?e); + }; } if let Some(e) = errors.into_iter().nth(0) { @@ -496,32 +531,48 @@ impl super::Nexus { let boundary_switches = self.boundary_switches(opctx).await?; for external_ip in external_ips { - for switch in &boundary_switches { - debug!(&self.log, "deleting instance nat mapping"; + match self + .db_datastore + .ipv4_nat_delete_by_external_ip(&opctx, &external_ip) + .await + { + Ok(_) => Ok(()), + Err(err) => match err { + Error::ObjectNotFound { .. } => { + warn!( + self.log, + "no matching nat entries to soft delete" + ); + Ok(()) + } + _ => { + let message = format!( + "failed to delete nat entry due to error: {err:?}" + ); + error!(self.log, "{}", message); + Err(Error::internal_error(&message)) + } + }, + }?; + } + + for switch in &boundary_switches { + debug!(&self.log, "notifying dendrite of updates"; "instance_id" => %authz_instance.id(), - "switch" => switch.to_string(), - "entry" => #?external_ip); - - let dpd_client = - self.dpd_clients.get(switch).ok_or_else(|| { - Error::internal_error(&format!( - "unable to find dendrite client for {switch}" - )) - })?; - - dpd_client - .ensure_nat_entry_deleted( - &self.log, - external_ip.ip, - *external_ip.first_port, - ) - .await - .map_err(|e| { - Error::internal_error(&format!( - "failed to delete nat entry via dpd: {e}" - )) - })?; - } + "switch" => switch.to_string()); + + let dpd_client = self.dpd_clients.get(switch).ok_or_else(|| { + Error::internal_error(&format!( + "unable to find dendrite client for {switch}" + )) + })?; + + // Notify dendrite that there are changes for it to reconcile. + // In the event of a failure to notify dendrite, we'll log an error + // and rely on dendrite's RPW timer to catch it up. + if let Err(e) = dpd_client.ipv4_nat_trigger_update().await { + error!(self.log, "failed to notify dendrite of nat updates"; "error" => ?e); + }; } Ok(()) diff --git a/nexus/src/app/mod.rs b/nexus/src/app/mod.rs index ef8132451a..18c9dae841 100644 --- a/nexus/src/app/mod.rs +++ b/nexus/src/app/mod.rs @@ -349,6 +349,7 @@ impl Nexus { &background_ctx, Arc::clone(&db_datastore), &config.pkg.background_tasks, + &dpd_clients, config.deployment.id, resolver.clone(), ); diff --git a/nexus/src/internal_api/http_entrypoints.rs b/nexus/src/internal_api/http_entrypoints.rs index ebb21feb40..9a20911893 100644 --- a/nexus/src/internal_api/http_entrypoints.rs +++ b/nexus/src/internal_api/http_entrypoints.rs @@ -24,6 +24,7 @@ use dropshot::RequestContext; use dropshot::ResultsPage; use dropshot::TypedBody; use hyper::Body; +use nexus_db_model::Ipv4NatEntryView; use nexus_types::internal_api::params::SwitchPutRequest; use nexus_types::internal_api::params::SwitchPutResponse; use nexus_types::internal_api::views::to_list; @@ -68,6 +69,8 @@ pub(crate) fn internal_api() -> NexusApiDescription { api.register(saga_list)?; api.register(saga_view)?; + api.register(ipv4_nat_changeset)?; + api.register(bgtask_list)?; api.register(bgtask_view)?; @@ -540,3 +543,51 @@ async fn bgtask_view( }; apictx.internal_latencies.instrument_dropshot_handler(&rqctx, handler).await } + +// NAT RPW internal APIs + +/// Path parameters for NAT ChangeSet +#[derive(Deserialize, JsonSchema)] +struct RpwNatPathParam { + /// which change number to start generating + /// the change set from + from_gen: i64, +} + +/// Query parameters for NAT ChangeSet +#[derive(Deserialize, JsonSchema)] +struct RpwNatQueryParam { + limit: u32, +} + +/// Fetch NAT ChangeSet +/// +/// Caller provides their generation as `from_gen`, along with a query +/// parameter for the page size (`limit`). Endpoint will return changes +/// that have occured since the caller's generation number up to the latest +/// change or until the `limit` is reached. If there are no changes, an +/// empty vec is returned. +#[endpoint { + method = GET, + path = "/nat/ipv4/changeset/{from_gen}" +}] +async fn ipv4_nat_changeset( + rqctx: RequestContext>, + path_params: Path, + query_params: Query, +) -> Result>, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let opctx = crate::context::op_context_for_internal_api(&rqctx).await; + let nexus = &apictx.nexus; + let path = path_params.into_inner(); + let query = query_params.into_inner(); + let mut changeset = nexus + .datastore() + .ipv4_nat_changeset(&opctx, path.from_gen, query.limit) + .await?; + changeset.sort_by_key(|e| e.gen); + Ok(HttpResponseOk(changeset)) + }; + apictx.internal_latencies.instrument_dropshot_handler(&rqctx, handler).await +} diff --git a/nexus/tests/config.test.toml b/nexus/tests/config.test.toml index 54f7e03eef..fbed9aed8e 100644 --- a/nexus/tests/config.test.toml +++ b/nexus/tests/config.test.toml @@ -90,6 +90,7 @@ dns_external.max_concurrent_server_updates = 5 # certificates it will take _other_ Nexus instances to notice and stop serving # them (on a sunny day). external_endpoints.period_secs = 60 +nat_cleanup.period_secs = 30 # How frequently to collect hardware/software inventory from the whole system # (even if we don't have reason to believe anything has changed). inventory.period_secs = 600 diff --git a/openapi/nexus-internal.json b/openapi/nexus-internal.json index f83cf68a8a..fcb285d9eb 100644 --- a/openapi/nexus-internal.json +++ b/openapi/nexus-internal.json @@ -323,6 +323,57 @@ } } }, + "/nat/ipv4/changeset/{from_gen}": { + "get": { + "summary": "Fetch NAT ChangeSet", + "description": "Caller provides their generation as `from_gen`, along with a query parameter for the page size (`limit`). Endpoint will return changes that have occured since the caller's generation number up to the latest change or until the `limit` is reached. If there are no changes, an empty vec is returned.", + "operationId": "ipv4_nat_changeset", + "parameters": [ + { + "in": "path", + "name": "from_gen", + "description": "which change number to start generating the change set from", + "required": true, + "schema": { + "type": "integer", + "format": "int64" + } + }, + { + "in": "query", + "name": "limit", + "required": true, + "schema": { + "type": "integer", + "format": "uint32", + "minimum": 0 + } + } + ], + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "title": "Array_of_Ipv4NatEntryView", + "type": "array", + "items": { + "$ref": "#/components/schemas/Ipv4NatEntryView" + } + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, "/physical-disk": { "put": { "summary": "Report that a physical disk for the specified sled has come online.", @@ -3763,6 +3814,53 @@ } ] }, + "Ipv4NatEntryView": { + "description": "NAT Record", + "type": "object", + "properties": { + "deleted": { + "type": "boolean" + }, + "external_address": { + "type": "string", + "format": "ipv4" + }, + "first_port": { + "type": "integer", + "format": "uint16", + "minimum": 0 + }, + "gen": { + "type": "integer", + "format": "int64" + }, + "last_port": { + "type": "integer", + "format": "uint16", + "minimum": 0 + }, + "mac": { + "$ref": "#/components/schemas/MacAddr" + }, + "sled_address": { + "type": "string", + "format": "ipv6" + }, + "vni": { + "$ref": "#/components/schemas/Vni" + } + }, + "required": [ + "deleted", + "external_address", + "first_port", + "gen", + "last_port", + "mac", + "sled_address", + "vni" + ] + }, "Ipv4Network": { "type": "string", "pattern": "^((25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\/(3[0-2]|[0-2]?[0-9])$" @@ -5335,6 +5433,12 @@ "time_updated" ] }, + "Vni": { + "description": "A Geneve Virtual Network Identifier", + "type": "integer", + "format": "uint32", + "minimum": 0 + }, "ZpoolPutRequest": { "description": "Sent by a sled agent on startup to Nexus to request further instruction", "type": "object", diff --git a/package-manifest.toml b/package-manifest.toml index f320215a13..ca96341f2a 100644 --- a/package-manifest.toml +++ b/package-manifest.toml @@ -476,8 +476,8 @@ only_for_targets.image = "standard" # 2. Copy dendrite.tar.gz from dendrite/out to omicron/out source.type = "prebuilt" source.repo = "dendrite" -source.commit = "147b03901aa8305b5271e0133a09f628b8140949" -source.sha256 = "14fe7f904f963b50188d6e060106b63df6d061ca64238f7b21623c432b5944e3" +source.commit = "8ff834e7d0a6adb263240edd40537f2c0768f1a4" +source.sha256 = "c00e79f55e0bdf048069b2d18a4d009ddfef46e7e5d846887cf96e843a8884bd" output.type = "zone" output.intermediate_only = true @@ -501,8 +501,8 @@ only_for_targets.image = "standard" # 2. Copy the output zone image from dendrite/out to omicron/out source.type = "prebuilt" source.repo = "dendrite" -source.commit = "147b03901aa8305b5271e0133a09f628b8140949" -source.sha256 = "f3aa685e4096f8f6e2ea6c169f391dbb88707abcbf1d2bde29163d81736e8ec6" +source.commit = "8ff834e7d0a6adb263240edd40537f2c0768f1a4" +source.sha256 = "428cce1e9aa399b1b49c04e7fd0bc1cb0e3f3fae6fda96055892a42e010c9d6f" output.type = "zone" output.intermediate_only = true @@ -519,8 +519,8 @@ only_for_targets.image = "standard" # 2. Copy dendrite.tar.gz from dendrite/out to omicron/out/dendrite-softnpu.tar.gz source.type = "prebuilt" source.repo = "dendrite" -source.commit = "147b03901aa8305b5271e0133a09f628b8140949" -source.sha256 = "dece729ce4127216fba48e9cfed90ec2e5a57ee4ca6c4afc5fa770de6ea636bf" +source.commit = "8ff834e7d0a6adb263240edd40537f2c0768f1a4" +source.sha256 = "5dd3534bec5eb4f857d0bf3994b26650288f650d409eec6aaa29860a2f481c37" output.type = "zone" output.intermediate_only = true diff --git a/schema/crdb/10.0.0/README.md b/schema/crdb/11.0.0/README.md similarity index 100% rename from schema/crdb/10.0.0/README.md rename to schema/crdb/11.0.0/README.md diff --git a/schema/crdb/10.0.0/up01.sql b/schema/crdb/11.0.0/up01.sql similarity index 100% rename from schema/crdb/10.0.0/up01.sql rename to schema/crdb/11.0.0/up01.sql diff --git a/schema/crdb/10.0.0/up02.sql b/schema/crdb/11.0.0/up02.sql similarity index 100% rename from schema/crdb/10.0.0/up02.sql rename to schema/crdb/11.0.0/up02.sql diff --git a/schema/crdb/10.0.0/up03.sql b/schema/crdb/11.0.0/up03.sql similarity index 100% rename from schema/crdb/10.0.0/up03.sql rename to schema/crdb/11.0.0/up03.sql diff --git a/schema/crdb/10.0.0/up04.sql b/schema/crdb/11.0.0/up04.sql similarity index 100% rename from schema/crdb/10.0.0/up04.sql rename to schema/crdb/11.0.0/up04.sql diff --git a/schema/crdb/10.0.0/up05.sql b/schema/crdb/11.0.0/up05.sql similarity index 100% rename from schema/crdb/10.0.0/up05.sql rename to schema/crdb/11.0.0/up05.sql diff --git a/schema/crdb/10.0.0/up06.sql b/schema/crdb/11.0.0/up06.sql similarity index 100% rename from schema/crdb/10.0.0/up06.sql rename to schema/crdb/11.0.0/up06.sql diff --git a/schema/crdb/10.0.0/up07.sql b/schema/crdb/11.0.0/up07.sql similarity index 100% rename from schema/crdb/10.0.0/up07.sql rename to schema/crdb/11.0.0/up07.sql diff --git a/schema/crdb/10.0.0/up08.sql b/schema/crdb/11.0.0/up08.sql similarity index 100% rename from schema/crdb/10.0.0/up08.sql rename to schema/crdb/11.0.0/up08.sql diff --git a/schema/crdb/10.0.0/up09.sql b/schema/crdb/11.0.0/up09.sql similarity index 100% rename from schema/crdb/10.0.0/up09.sql rename to schema/crdb/11.0.0/up09.sql diff --git a/schema/crdb/11.0.0/up1.sql b/schema/crdb/11.0.0/up1.sql new file mode 100644 index 0000000000..a4d31edd71 --- /dev/null +++ b/schema/crdb/11.0.0/up1.sql @@ -0,0 +1 @@ +CREATE SEQUENCE IF NOT EXISTS omicron.public.ipv4_nat_version START 1 INCREMENT 1; diff --git a/schema/crdb/10.0.0/up10.sql b/schema/crdb/11.0.0/up10.sql similarity index 100% rename from schema/crdb/10.0.0/up10.sql rename to schema/crdb/11.0.0/up10.sql diff --git a/schema/crdb/10.0.0/up11.sql b/schema/crdb/11.0.0/up11.sql similarity index 100% rename from schema/crdb/10.0.0/up11.sql rename to schema/crdb/11.0.0/up11.sql diff --git a/schema/crdb/10.0.0/up12.sql b/schema/crdb/11.0.0/up12.sql similarity index 100% rename from schema/crdb/10.0.0/up12.sql rename to schema/crdb/11.0.0/up12.sql diff --git a/schema/crdb/11.0.0/up2.sql b/schema/crdb/11.0.0/up2.sql new file mode 100644 index 0000000000..b92d4c73d3 --- /dev/null +++ b/schema/crdb/11.0.0/up2.sql @@ -0,0 +1,13 @@ +CREATE TABLE IF NOT EXISTS omicron.public.ipv4_nat_entry ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + external_address INET NOT NULL, + first_port INT4 NOT NULL, + last_port INT4 NOT NULL, + sled_address INET NOT NULL, + vni INT4 NOT NULL, + mac INT8 NOT NULL, + version_added INT8 NOT NULL DEFAULT nextval('omicron.public.ipv4_nat_version'), + version_removed INT8, + time_created TIMESTAMPTZ NOT NULL DEFAULT now(), + time_deleted TIMESTAMPTZ +); diff --git a/schema/crdb/11.0.0/up3.sql b/schema/crdb/11.0.0/up3.sql new file mode 100644 index 0000000000..1247aad693 --- /dev/null +++ b/schema/crdb/11.0.0/up3.sql @@ -0,0 +1,13 @@ +CREATE UNIQUE INDEX IF NOT EXISTS ipv4_nat_version_added ON omicron.public.ipv4_nat_entry ( + version_added +) +STORING ( + external_address, + first_port, + last_port, + sled_address, + vni, + mac, + time_created, + time_deleted +); diff --git a/schema/crdb/11.0.0/up4.sql b/schema/crdb/11.0.0/up4.sql new file mode 100644 index 0000000000..b9cfe305d2 --- /dev/null +++ b/schema/crdb/11.0.0/up4.sql @@ -0,0 +1,5 @@ +CREATE UNIQUE INDEX IF NOT EXISTS overlapping_ipv4_nat_entry ON omicron.public.ipv4_nat_entry ( + external_address, + first_port, + last_port +) WHERE time_deleted IS NULL; diff --git a/schema/crdb/11.0.0/up5.sql b/schema/crdb/11.0.0/up5.sql new file mode 100644 index 0000000000..dce2211eae --- /dev/null +++ b/schema/crdb/11.0.0/up5.sql @@ -0,0 +1 @@ +CREATE INDEX IF NOT EXISTS ipv4_nat_lookup ON omicron.public.ipv4_nat_entry (external_address, first_port, last_port, sled_address, vni, mac); diff --git a/schema/crdb/11.0.0/up6.sql b/schema/crdb/11.0.0/up6.sql new file mode 100644 index 0000000000..e4958eb352 --- /dev/null +++ b/schema/crdb/11.0.0/up6.sql @@ -0,0 +1,13 @@ +CREATE UNIQUE INDEX IF NOT EXISTS ipv4_nat_version_removed ON omicron.public.ipv4_nat_entry ( + version_removed +) +STORING ( + external_address, + first_port, + last_port, + sled_address, + vni, + mac, + time_created, + time_deleted +); diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index 875877ee96..a74cabfe6e 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -2738,12 +2738,24 @@ CREATE TABLE IF NOT EXISTS omicron.public.inv_caboose ( COMMIT; BEGIN; -/*******************************************************************/ +CREATE TABLE IF NOT EXISTS omicron.public.db_metadata ( + -- There should only be one row of this table for the whole DB. + -- It's a little goofy, but filter on "singleton = true" before querying + -- or applying updates, and you'll access the singleton row. + -- + -- We also add a constraint on this table to ensure it's not possible to + -- access the version of this table with "singleton = false". + singleton BOOL NOT NULL PRIMARY KEY, + time_created TIMESTAMPTZ NOT NULL, + time_modified TIMESTAMPTZ NOT NULL, + -- Semver representation of the DB version + version STRING(64) NOT NULL, -/* - * Metadata for the schema itself. This version number isn't great, as there's - * nothing to ensure it gets bumped when it should be, but it's a start. - */ + -- (Optional) Semver representation of the DB version to which we're upgrading + target_version STRING(64), + + CHECK (singleton = true) +); -- Per-VMM state. CREATE TABLE IF NOT EXISTS omicron.public.vmm ( @@ -2812,6 +2824,62 @@ CREATE TYPE IF NOT EXISTS omicron.public.switch_link_speed AS ENUM ( ALTER TABLE omicron.public.switch_port_settings_link_config ADD COLUMN IF NOT EXISTS fec omicron.public.switch_link_fec; ALTER TABLE omicron.public.switch_port_settings_link_config ADD COLUMN IF NOT EXISTS speed omicron.public.switch_link_speed; +CREATE SEQUENCE IF NOT EXISTS omicron.public.ipv4_nat_version START 1 INCREMENT 1; + +CREATE TABLE IF NOT EXISTS omicron.public.ipv4_nat_entry ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + external_address INET NOT NULL, + first_port INT4 NOT NULL, + last_port INT4 NOT NULL, + sled_address INET NOT NULL, + vni INT4 NOT NULL, + mac INT8 NOT NULL, + version_added INT8 NOT NULL DEFAULT nextval('omicron.public.ipv4_nat_version'), + version_removed INT8, + time_created TIMESTAMPTZ NOT NULL DEFAULT now(), + time_deleted TIMESTAMPTZ +); + +CREATE UNIQUE INDEX IF NOT EXISTS ipv4_nat_version_added ON omicron.public.ipv4_nat_entry ( + version_added +) +STORING ( + external_address, + first_port, + last_port, + sled_address, + vni, + mac, + time_created, + time_deleted +); + +CREATE UNIQUE INDEX IF NOT EXISTS overlapping_ipv4_nat_entry ON omicron.public.ipv4_nat_entry ( + external_address, + first_port, + last_port +) WHERE time_deleted IS NULL; + +CREATE INDEX IF NOT EXISTS ipv4_nat_lookup ON omicron.public.ipv4_nat_entry (external_address, first_port, last_port, sled_address, vni, mac); + +CREATE UNIQUE INDEX IF NOT EXISTS ipv4_nat_version_removed ON omicron.public.ipv4_nat_entry ( + version_removed +) +STORING ( + external_address, + first_port, + last_port, + sled_address, + vni, + mac, + time_created, + time_deleted +); + +/* + * Metadata for the schema itself. This version number isn't great, as there's + * nothing to ensure it gets bumped when it should be, but it's a start. + */ CREATE TABLE IF NOT EXISTS omicron.public.db_metadata ( -- There should only be one row of this table for the whole DB. -- It's a little goofy, but filter on "singleton = true" before querying @@ -2838,7 +2906,7 @@ INSERT INTO omicron.public.db_metadata ( version, target_version ) VALUES - ( TRUE, NOW(), NOW(), '10.0.0', NULL) + ( TRUE, NOW(), NOW(), '11.0.0', NULL) ON CONFLICT DO NOTHING; COMMIT; diff --git a/smf/nexus/multi-sled/config-partial.toml b/smf/nexus/multi-sled/config-partial.toml index cae1f650c9..94c8f5572e 100644 --- a/smf/nexus/multi-sled/config-partial.toml +++ b/smf/nexus/multi-sled/config-partial.toml @@ -38,6 +38,7 @@ dns_external.max_concurrent_server_updates = 5 # certificates it will take _other_ Nexus instances to notice and stop serving # them (on a sunny day). external_endpoints.period_secs = 60 +nat_cleanup.period_secs = 30 # How frequently to collect hardware/software inventory from the whole system # (even if we don't have reason to believe anything has changed). inventory.period_secs = 600 diff --git a/smf/nexus/single-sled/config-partial.toml b/smf/nexus/single-sled/config-partial.toml index be8683be54..fcaa6176a8 100644 --- a/smf/nexus/single-sled/config-partial.toml +++ b/smf/nexus/single-sled/config-partial.toml @@ -38,6 +38,7 @@ dns_external.max_concurrent_server_updates = 5 # certificates it will take _other_ Nexus instances to notice and stop serving # them (on a sunny day). external_endpoints.period_secs = 60 +nat_cleanup.period_secs = 30 # How frequently to collect hardware/software inventory from the whole system # (even if we don't have reason to believe anything has changed). inventory.period_secs = 600 diff --git a/tools/dendrite_openapi_version b/tools/dendrite_openapi_version index aadf68da1b..ba4b5a5722 100644 --- a/tools/dendrite_openapi_version +++ b/tools/dendrite_openapi_version @@ -1,2 +1,2 @@ -COMMIT="147b03901aa8305b5271e0133a09f628b8140949" -SHA2="82437c74afd4894aa5b9ea800d5777793e8777fe87471321dd22ad1a1c9c9ef3" +COMMIT="8ff834e7d0a6adb263240edd40537f2c0768f1a4" +SHA2="07d115bfa8498a8015ca2a8447efeeac32e24aeb25baf3d5e2313216e11293c0" diff --git a/tools/dendrite_stub_checksums b/tools/dendrite_stub_checksums index 81a957323c..619a6bf287 100644 --- a/tools/dendrite_stub_checksums +++ b/tools/dendrite_stub_checksums @@ -1,3 +1,3 @@ -CIDL_SHA256_ILLUMOS="14fe7f904f963b50188d6e060106b63df6d061ca64238f7b21623c432b5944e3" -CIDL_SHA256_LINUX_DPD="fff6c7484bbb06aa644e3fe41b200e4f7f8d7f65d067cbecd851c834c15fe2ec" -CIDL_SHA256_LINUX_SWADM="0449383a57468aec3b5a4ad26962cfc9e9a121bd13e777329e8a70767e6d9aae" +CIDL_SHA256_ILLUMOS="c00e79f55e0bdf048069b2d18a4d009ddfef46e7e5d846887cf96e843a8884bd" +CIDL_SHA256_LINUX_DPD="b5d829b4628759ac374106f3c56c29074b29577fd0ff72f61c3b8289fea430fe" +CIDL_SHA256_LINUX_SWADM="afc68828f54dc57b32dc1556fc588baeab12341c30e96cc0fadb49f401b4b48f" From 7adc3c0184b27328a3949b4e7a9809bde19ab834 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Sat, 18 Nov 2023 16:33:55 -0800 Subject: [PATCH 10/10] Update Rust crate vsss-rs to 3.3.1 (#4478) --- bootstore/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bootstore/Cargo.toml b/bootstore/Cargo.toml index 18e3e3876b..93eb6a3c48 100644 --- a/bootstore/Cargo.toml +++ b/bootstore/Cargo.toml @@ -27,7 +27,7 @@ slog.workspace = true thiserror.workspace = true tokio.workspace = true uuid.workspace = true -vsss-rs = { version = "3.2.0", features = ["std", "curve25519"] } +vsss-rs = { version = "3.3.1", features = ["std", "curve25519"] } zeroize.workspace = true # See omicron-rpaths for more about the "pq-sys" dependency.