From 5f61f27629b943cb8820eb3b40711d2f230f2219 Mon Sep 17 00:00:00 2001 From: Levon Tarver Date: Mon, 31 Jul 2023 22:57:55 +0000 Subject: [PATCH] NAT RPW for instance networking * Add db table for tracking nat entries * Add endpoint for retrieving changesets * Update instance sagas to update table and trigger RPW * Periodically cleanup soft-deleted entries that no longer need to be sync'd by dendrite. The other half of the RPW lives in Dendrite. It will periodically check for a changeset, or check for a changeset when the trigger endpoint is called by the relevant saga / nexus operation. --- Cargo.lock | 2 +- common/src/api/external/mod.rs | 1 + common/src/nexus_config.rs | 19 +- nexus/db-model/src/ipv4_nat_entry.rs | 116 +++++ nexus/db-model/src/lib.rs | 2 + nexus/db-model/src/schema.rs | 27 +- .../src/db/datastore/ipv4_nat_entry.rs | 467 ++++++++++++++++++ nexus/db-queries/src/db/datastore/mod.rs | 1 + nexus/examples/config.toml | 1 + nexus/src/app/background/init.rs | 25 +- nexus/src/app/background/mod.rs | 1 + nexus/src/app/background/nat_cleanup.rs | 109 ++++ nexus/src/app/instance.rs | 46 +- nexus/src/app/mod.rs | 1 + nexus/src/app/sagas/instance_create.rs | 38 +- nexus/src/app/sagas/instance_delete.rs | 50 +- nexus/src/internal_api/http_entrypoints.rs | 79 +++ nexus/tests/config.test.toml | 1 + openapi/nexus-internal.json | 144 ++++++ package-manifest.toml | 12 +- schema/crdb/4.0.1/up.sql | 111 +++++ schema/crdb/dbinit.sql | 40 +- smf/nexus/config-partial.toml | 1 + tools/dendrite_openapi_version | 4 +- tools/dendrite_stub_checksums | 6 +- 25 files changed, 1225 insertions(+), 79 deletions(-) create mode 100644 nexus/db-model/src/ipv4_nat_entry.rs create mode 100644 nexus/db-queries/src/db/datastore/ipv4_nat_entry.rs create mode 100644 nexus/src/app/background/nat_cleanup.rs create mode 100644 schema/crdb/4.0.1/up.sql diff --git a/Cargo.lock b/Cargo.lock index 2fdadbe0ea7..5545b69819a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6568,7 +6568,7 @@ dependencies = [ "indoc 2.0.3", "itertools 0.11.0", "paste", - "strum 0.25.0", + "strum", "unicode-segmentation", "unicode-width", ] diff --git a/common/src/api/external/mod.rs b/common/src/api/external/mod.rs index 06a06ea52a1..4e279081216 100644 --- a/common/src/api/external/mod.rs +++ b/common/src/api/external/mod.rs @@ -732,6 +732,7 @@ pub enum ResourceType { UpdateableComponent, UserBuiltin, Zpool, + Ipv4NatEntry, } // IDENTITY METADATA diff --git a/common/src/nexus_config.rs b/common/src/nexus_config.rs index 73ccec996cb..56c4bbc14c2 100644 --- a/common/src/nexus_config.rs +++ b/common/src/nexus_config.rs @@ -311,6 +311,8 @@ pub struct BackgroundTaskConfig { pub dns_external: DnsTasksConfig, /// configuration for external endpoint list watcher pub external_endpoints: ExternalEndpointsConfig, + /// configuration for nat table garbage collector + pub nat_cleanup: NatCleanupConfig, } #[serde_as] @@ -345,6 +347,14 @@ pub struct ExternalEndpointsConfig { // allow/disallow wildcard certs, don't serve expired certs, etc.) } +#[serde_as] +#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)] +pub struct NatCleanupConfig { + /// period (in seconds) for periodic activations of this background task + #[serde_as(as = "DurationSeconds")] + pub period_secs: Duration, +} + /// Configuration for a nexus server #[derive(Clone, Debug, Deserialize, PartialEq, Serialize)] pub struct PackageConfig { @@ -448,7 +458,7 @@ mod test { use crate::nexus_config::{ BackgroundTaskConfig, ConfigDropshotWithTls, Database, DeploymentConfig, DnsTasksConfig, DpdConfig, ExternalEndpointsConfig, - InternalDns, LoadErrorKind, + InternalDns, LoadErrorKind, NatCleanupConfig, }; use dropshot::ConfigDropshot; use dropshot::ConfigLogging; @@ -594,6 +604,7 @@ mod test { dns_external.period_secs_propagation = 7 dns_external.max_concurrent_server_updates = 8 external_endpoints.period_secs = 9 + nat_cleanup.period_secs = 30 "##, ) .unwrap(); @@ -675,7 +686,10 @@ mod test { }, external_endpoints: ExternalEndpointsConfig { period_secs: Duration::from_secs(9), - } + }, + nat_cleanup: NatCleanupConfig { + period_secs: Duration::from_secs(30), + }, }, }, } @@ -724,6 +738,7 @@ mod test { dns_external.period_secs_propagation = 7 dns_external.max_concurrent_server_updates = 8 external_endpoints.period_secs = 9 + nat_cleanup.period_secs = 30 "##, ) .unwrap(); diff --git a/nexus/db-model/src/ipv4_nat_entry.rs b/nexus/db-model/src/ipv4_nat_entry.rs new file mode 100644 index 00000000000..8ff214c92d0 --- /dev/null +++ b/nexus/db-model/src/ipv4_nat_entry.rs @@ -0,0 +1,116 @@ +use std::net::{Ipv4Addr, Ipv6Addr}; + +use super::MacAddr; +use crate::{ + schema::{ipv4_nat_entry, nat_gen}, + SqlU16, SqlU32, Vni, +}; +use chrono::{DateTime, Utc}; +use omicron_common::api::external; +use schemars::JsonSchema; +use serde::Serialize; +use uuid::Uuid; + +// TODO correctness +// If we're not going to store ipv4 and ipv6 +// NAT entries in the same table, and we don't +// need any of the special properties of the IpNetwork +// column type, does it make sense to use a different +// column type? +/// Database representation of an Ipv4 NAT Entry. +#[derive(Insertable, Debug, Clone)] +#[diesel(table_name = ipv4_nat_entry)] +pub struct Ipv4NatValues { + pub external_address: ipnetwork::IpNetwork, + pub first_port: SqlU16, + pub last_port: SqlU16, + pub sled_address: ipnetwork::IpNetwork, + pub vni: Vni, + pub mac: MacAddr, +} + +// TODO correctness +// If we're not going to store ipv4 and ipv6 +// NAT entries in the same table, we should probably +// make the types more restrictive to prevent an +// accidental ipv6 entry from being created. +#[derive(Queryable, Debug, Clone, Selectable)] +#[diesel(table_name = ipv4_nat_entry)] +pub struct Ipv4NatEntry { + pub id: Uuid, + pub external_address: ipnetwork::IpNetwork, + pub first_port: SqlU16, + pub last_port: SqlU16, + pub sled_address: ipnetwork::IpNetwork, + pub vni: Vni, + pub mac: MacAddr, + pub gen: SqlU32, + pub time_created: DateTime, + pub time_deleted: Option>, +} + +impl Ipv4NatEntry { + pub fn first_port(&self) -> u16 { + self.first_port.into() + } + + pub fn last_port(&self) -> u16 { + self.last_port.into() + } + + pub fn gen(&self) -> u32 { + self.gen.into() + } +} + +#[derive(Queryable, Debug, Clone, Selectable)] +#[diesel(table_name = nat_gen)] +pub struct Ipv4NatGen { + pub last_value: SqlU32, + pub log_cnt: SqlU32, + pub is_called: bool, +} + +/// NAT Record +#[derive(Clone, Debug, Serialize, JsonSchema)] +pub struct Ipv4NatEntryView { + pub external_address: Ipv4Addr, + pub first_port: u16, + pub last_port: u16, + pub sled_address: Ipv6Addr, + pub vni: external::Vni, + pub mac: external::MacAddr, + pub gen: u32, + pub deleted: bool, +} + +impl From for Ipv4NatEntryView { + fn from(value: Ipv4NatEntry) -> Self { + let external_address = match value.external_address.ip() { + std::net::IpAddr::V4(a) => a, + std::net::IpAddr::V6(_) => unreachable!(), + }; + + let sled_address = match value.sled_address.ip() { + std::net::IpAddr::V4(_) => unreachable!(), + std::net::IpAddr::V6(a) => a, + }; + + Self { + external_address, + first_port: value.first_port(), + last_port: value.last_port(), + sled_address, + vni: value.vni.0, + mac: *value.mac, + gen: value.gen(), + deleted: value.time_deleted.is_some(), + } + } +} + +/// NAT Generation +#[derive(Clone, Debug, Serialize, JsonSchema)] +pub struct Ipv4NatGenView { + pub gen: u32, +} diff --git a/nexus/db-model/src/lib.rs b/nexus/db-model/src/lib.rs index 334dedad9ff..ddf2e8841bc 100644 --- a/nexus/db-model/src/lib.rs +++ b/nexus/db-model/src/lib.rs @@ -51,6 +51,7 @@ mod system_update; // These actually represent subqueries, not real table. // However, they must be defined in the same crate as our tables // for join-based marker trait generation. +mod ipv4_nat_entry; pub mod queries; mod rack; mod region; @@ -119,6 +120,7 @@ pub use instance::*; pub use instance_cpu_count::*; pub use instance_state::*; pub use ip_pool::*; +pub use ipv4_nat_entry::*; pub use ipv4net::*; pub use ipv6::*; pub use ipv6net::*; diff --git a/nexus/db-model/src/schema.rs b/nexus/db-model/src/schema.rs index 3fde9ee715a..9ef5f3b96eb 100644 --- a/nexus/db-model/src/schema.rs +++ b/nexus/db-model/src/schema.rs @@ -471,6 +471,31 @@ table! { } } +table! { + ipv4_nat_entry (id) { + id -> Uuid, + external_address -> Inet, + first_port -> Int4, + last_port -> Int4, + sled_address -> Inet, + vni -> Int4, + mac -> Int8, + gen -> Int8, + time_created -> Timestamptz, + time_deleted -> Nullable, + } +} + +// This is the sequence used for the generation number +// in ipv4_nat_entry. +table! { + nat_gen (last_value) { + last_value -> Int8, + log_cnt -> Int8, + is_called -> Bool, + } +} + table! { external_ip (id) { id -> Uuid, @@ -1130,7 +1155,7 @@ table! { /// /// This should be updated whenever the schema is changed. For more details, /// refer to: schema/crdb/README.adoc -pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(4, 0, 0); +pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(4, 0, 1); allow_tables_to_appear_in_same_query!( system_update, diff --git a/nexus/db-queries/src/db/datastore/ipv4_nat_entry.rs b/nexus/db-queries/src/db/datastore/ipv4_nat_entry.rs new file mode 100644 index 00000000000..6f6f65c0070 --- /dev/null +++ b/nexus/db-queries/src/db/datastore/ipv4_nat_entry.rs @@ -0,0 +1,467 @@ +use super::DataStore; +use crate::context::OpContext; +use crate::db; +use crate::db::error::public_error_from_diesel_pool; +use crate::db::error::ErrorHandler; +use crate::db::model::{Ipv4NatEntry, Ipv4NatValues}; +use async_bb8_diesel::AsyncRunQueryDsl; +use chrono::{DateTime, Utc}; +use diesel::prelude::*; +use diesel::sql_query; +use diesel::sql_types::BigInt; +use diesel::sql_types::Inet; +use diesel::sql_types::Integer; +use nexus_db_model::ExternalIp; +use nexus_db_model::Ipv4NatEntryView; +use nexus_db_model::SqlU32; +use omicron_common::api::external::CreateResult; +use omicron_common::api::external::DeleteResult; +use omicron_common::api::external::Error; +use omicron_common::api::external::ListResultVec; +use omicron_common::api::external::LookupResult; +use omicron_common::api::external::LookupType; +use omicron_common::api::external::ResourceType; +use uuid::Uuid; + +impl DataStore { + pub async fn ensure_ipv4_nat_entry( + &self, + opctx: &OpContext, + nat_entry: Ipv4NatValues, + ) -> CreateResult<()> { + // TODO: What authorization do we need? + // TODO: What additional validation do we need? + + // Ensure that the record with the parameters we want exists in the + // database. If an entry exists with the same external ip but + // different target sled / vpc / mac, we will violate a uniqueness + // constraint (which is desired in this case). + sql_query( + " + INSERT INTO omicron.public.ipv4_nat_entry ( + external_address, + first_port, + last_port, + sled_address, + vni, + mac + ) + SELECT external_address, first_port, last_port, sled_address, vni, mac + FROM ( VALUES ($1, $2, $3, $4, $5, $6) ) AS data (external_address, first_port, last_port, sled_address, vni, mac) + WHERE NOT EXISTS ( + SELECT external_address, first_port, last_port, sled_address, vni, mac + FROM omicron.public.ipv4_nat_entry as entry + WHERE data.external_address = entry.external_address + AND data.first_port = entry.first_port + AND data.last_port = entry.last_port + AND data.sled_address = entry.sled_address + AND data.vni = entry.vni + AND data.mac = entry.mac + ) + ", + ) + .bind::(nat_entry.external_address) + .bind::(nat_entry.first_port) + .bind::(nat_entry.last_port) + .bind::(nat_entry.sled_address) + .bind::(nat_entry.vni) + .bind::(nat_entry.mac) + .execute_async(self.pool_authorized(opctx).await?) + .await + .map_err(|e| public_error_from_diesel_pool(e, ErrorHandler::Server))?; + + Ok(()) + } + + pub async fn ipv4_nat_delete( + &self, + opctx: &OpContext, + nat_entry: &Ipv4NatEntry, + ) -> DeleteResult { + use db::schema::ipv4_nat_entry::dsl; + + // TODO: What authorization do we need? + + let now = Utc::now(); + let updated_rows = diesel::update(dsl::ipv4_nat_entry) + .filter(dsl::time_deleted.is_null()) + .filter(dsl::id.eq(nat_entry.id)) + // matching on generation is how we detect races + .filter(dsl::gen.eq(nat_entry.gen)) + .set(dsl::time_deleted.eq(now)) + .execute_async(self.pool_authorized(opctx).await?) + .await + .map_err(|e| { + public_error_from_diesel_pool(e, ErrorHandler::Server) + })?; + + if updated_rows == 0 { + return Err(Error::ObjectNotFound { + type_name: ResourceType::Ipv4NatEntry, + lookup_type: LookupType::ByCompositeId("id, gen".to_string()), + }); + } + Ok(()) + } + + pub async fn ipv4_nat_cancel_delete( + &self, + opctx: &OpContext, + nat_entry: &Ipv4NatEntry, + ) -> DeleteResult { + use db::schema::ipv4_nat_entry::dsl; + + // TODO: What authorization do we need? + + let updated_rows = diesel::update(dsl::ipv4_nat_entry) + .filter(dsl::time_deleted.is_not_null()) + .filter(dsl::id.eq(nat_entry.id)) + .filter(dsl::gen.eq(nat_entry.gen)) + .set(dsl::time_deleted.eq(Option::>::None)) + .execute_async(self.pool_authorized(opctx).await?) + .await + .map_err(|e| { + public_error_from_diesel_pool(e, ErrorHandler::Server) + })?; + + if updated_rows == 0 { + return Err(Error::InvalidRequest { + message: "no matching records".to_string(), + }); + } + Ok(()) + } + + pub async fn ipv4_nat_find_by_id( + &self, + opctx: &OpContext, + id: Uuid, + ) -> LookupResult { + use db::schema::ipv4_nat_entry::dsl; + + // TODO: What authorization do we need? + + let result = dsl::ipv4_nat_entry + .filter(dsl::id.eq(id)) + .select(Ipv4NatEntry::as_select()) + .limit(1) + .load_async(self.pool_authorized(opctx).await?) + .await + .map_err(|e| { + public_error_from_diesel_pool(e, ErrorHandler::Server) + })?; + + if let Some(nat_entry) = result.first() { + Ok(nat_entry.clone()) + } else { + Err(Error::InvalidRequest { + message: "no matching records".to_string(), + }) + } + } + + pub async fn ipv4_nat_delete_by_external_ip( + &self, + opctx: &OpContext, + external_ip: &ExternalIp, + ) -> DeleteResult { + use db::schema::ipv4_nat_entry::dsl; + let now = Utc::now(); + let updated_rows = diesel::update(dsl::ipv4_nat_entry) + .filter(dsl::time_deleted.is_null()) + .filter(dsl::external_address.eq(external_ip.ip)) + .filter(dsl::first_port.eq(external_ip.first_port)) + .filter(dsl::last_port.eq(external_ip.last_port)) + .set(dsl::time_deleted.eq(now)) + .execute_async(self.pool_authorized(opctx).await?) + .await + .map_err(|e| { + public_error_from_diesel_pool(e, ErrorHandler::Server) + })?; + + if updated_rows == 0 { + return Err(Error::ObjectNotFound { + type_name: ResourceType::Ipv4NatEntry, + lookup_type: LookupType::ByCompositeId( + "external_ip, first_port, last_port".to_string(), + ), + }); + } + Ok(()) + } + + pub async fn ipv4_nat_find_by_values( + &self, + opctx: &OpContext, + values: Ipv4NatValues, + ) -> LookupResult { + use db::schema::ipv4_nat_entry::dsl; + let result = dsl::ipv4_nat_entry + .filter(dsl::external_address.eq(values.external_address)) + .filter(dsl::first_port.eq(values.first_port)) + .filter(dsl::last_port.eq(values.last_port)) + .filter(dsl::mac.eq(values.mac)) + .filter(dsl::sled_address.eq(values.sled_address)) + .filter(dsl::vni.eq(values.vni)) + .filter(dsl::time_deleted.is_null()) + .select(Ipv4NatEntry::as_select()) + .limit(1) + .load_async(self.pool_authorized(opctx).await?) + .await + .map_err(|e| { + public_error_from_diesel_pool(e, ErrorHandler::Server) + })?; + + if let Some(nat_entry) = result.first() { + Ok(nat_entry.clone()) + } else { + Err(Error::InvalidRequest { + message: "no matching records".to_string(), + }) + } + } + + pub async fn ipv4_nat_list_since_gen( + &self, + opctx: &OpContext, + gen: u32, + limit: u32, + ) -> ListResultVec { + use db::schema::ipv4_nat_entry::dsl; + let gen: SqlU32 = gen.into(); + + // TODO: What authorization do we need? + + let list = dsl::ipv4_nat_entry + .filter(dsl::gen.gt(gen)) + .order_by(dsl::gen) + .limit(limit as i64) + .select(Ipv4NatEntry::as_select()) + .load_async(self.pool_authorized(opctx).await?) + .await + .map_err(|e| { + public_error_from_diesel_pool(e, ErrorHandler::Server) + })?; + + Ok(list) + } + + pub async fn ipv4_nat_changeset( + &self, + opctx: &OpContext, + gen: u32, + limit: u32, + ) -> ListResultVec { + let nat_entries = + self.ipv4_nat_list_since_gen(opctx, gen, limit).await?; + let nat_entries: Vec = + nat_entries.iter().map(|e| e.clone().into()).collect(); + Ok(nat_entries) + } + + pub async fn ipv4_nat_current_gen( + &self, + opctx: &OpContext, + ) -> LookupResult { + use db::schema::nat_gen::dsl; + + // TODO: What authorization do we need? + + let latest: Option = dsl::nat_gen + .select(diesel::dsl::max(dsl::last_value)) + .first_async(self.pool_authorized(opctx).await?) + .await + .map_err(|e| { + public_error_from_diesel_pool(e, ErrorHandler::Server) + })?; + + match latest { + Some(value) => Ok(*value), + None => Err(Error::InvalidRequest { + message: "sequence table is empty!".to_string(), + }), + } + } + + pub async fn ipv4_nat_cleanup( + &self, + opctx: &OpContext, + before_gen: u32, + before_timestamp: DateTime, + ) -> DeleteResult { + use db::schema::ipv4_nat_entry::dsl; + let gen: SqlU32 = before_gen.into(); + // TODO: What authorization do we need? + + diesel::delete(dsl::ipv4_nat_entry) + .filter(dsl::gen.lt(gen)) + .filter(dsl::time_deleted.lt(before_timestamp)) + .execute_async(self.pool_authorized(opctx).await?) + .await + .map_err(|e| { + public_error_from_diesel_pool(e, ErrorHandler::Server) + })?; + + Ok(()) + } +} + +#[cfg(test)] +mod test { + use std::str::FromStr; + + use crate::db::datastore::datastore_test; + use chrono::Utc; + use nexus_db_model::{Ipv4NatValues, MacAddr, Vni}; + use nexus_test_utils::db::test_setup_database; + use omicron_common::api::external; + use omicron_test_utils::dev; + + // Test our ability to track additions and deletions since a given generation number + #[tokio::test] + async fn nat_generation_tracking() { + let logctx = dev::test_setup_log("test_nat_generation_tracking"); + let mut db = test_setup_database(&logctx.log).await; + let (opctx, datastore) = datastore_test(&logctx, &db).await; + + // We should not have any NAT entries at this moment + let initial_state = + datastore.ipv4_nat_list_since_gen(&opctx, 0, 10).await.unwrap(); + + assert!(initial_state.is_empty()); + assert_eq!(datastore.ipv4_nat_current_gen(&opctx).await.unwrap(), 0); + + // Each change (creation / deletion) to the NAT table should increment the + // generation number of the row in the NAT table + let external_address = + ipnetwork::IpNetwork::try_from("10.0.0.100").unwrap(); + + let sled_address = + ipnetwork::IpNetwork::try_from("fd00:1122:3344:104::1").unwrap(); + + // Add a nat entry. + let nat1 = Ipv4NatValues { + external_address, + first_port: 0.into(), + last_port: 999.into(), + sled_address, + vni: Vni(external::Vni::random()), + mac: MacAddr( + external::MacAddr::from_str("A8:40:25:F5:EB:2A").unwrap(), + ), + }; + + datastore.ensure_ipv4_nat_entry(&opctx, nat1.clone()).await.unwrap(); + let first_entry = + datastore.ipv4_nat_find_by_values(&opctx, nat1).await.unwrap(); + + let nat_entries = + datastore.ipv4_nat_list_since_gen(&opctx, 0, 10).await.unwrap(); + + // The NAT table has undergone one change. One entry has been added, + // none deleted, so we should be at generation 1. + assert_eq!(nat_entries.len(), 1); + assert_eq!(nat_entries.last().unwrap().gen(), 1); + assert_eq!(datastore.ipv4_nat_current_gen(&opctx).await.unwrap(), 1); + + // Add another nat entry. + let nat2 = Ipv4NatValues { + external_address, + first_port: 1000.into(), + last_port: 1999.into(), + sled_address, + vni: Vni(external::Vni::random()), + mac: MacAddr( + external::MacAddr::from_str("A8:40:25:F5:EB:2B").unwrap(), + ), + }; + + datastore.ensure_ipv4_nat_entry(&opctx, nat2).await.unwrap(); + + let nat_entries = + datastore.ipv4_nat_list_since_gen(&opctx, 0, 10).await.unwrap(); + + // The NAT table has undergone two changes. Two entries have been + // added, none deleted, so we should be at generation 2. + assert_eq!(nat_entries.len(), 2); + assert_eq!(nat_entries.last().unwrap().gen(), 2); + assert_eq!(datastore.ipv4_nat_current_gen(&opctx).await.unwrap(), 2); + + // Delete the first nat entry. It should show up as a later generation number. + datastore.ipv4_nat_delete(&opctx, &first_entry).await.unwrap(); + let nat_entries = + datastore.ipv4_nat_list_since_gen(&opctx, 0, 10).await.unwrap(); + + // The NAT table has undergone three changes. Two entries have been + // added, one deleted, so we should be at generation 3. Since the + // first entry was marked for deletion (and it was the third change), + // the first entry's generation number should now be 3. + let nat_entry = nat_entries.last().unwrap(); + assert_eq!(nat_entries.len(), 2); + assert_eq!(nat_entry.gen(), 3); + assert_eq!(nat_entry.id, first_entry.id); + assert_eq!(datastore.ipv4_nat_current_gen(&opctx).await.unwrap(), 3); + + // Cancel deletion of NAT entry + datastore.ipv4_nat_cancel_delete(&opctx, nat_entry).await.unwrap(); + let nat_entries = + datastore.ipv4_nat_list_since_gen(&opctx, 0, 10).await.unwrap(); + + // The NAT table has undergone four changes. + let nat_entry = nat_entries.last().unwrap(); + assert_eq!(nat_entries.len(), 2); + assert_eq!(nat_entry.gen(), 4); + assert_eq!(nat_entry.id, first_entry.id); + assert_eq!(datastore.ipv4_nat_current_gen(&opctx).await.unwrap(), 4); + + // Test Cleanup logic + // Cleanup should only perma-delete entries that are older than a + // specified generation number and whose `time_deleted` field is + // older than a specified age. + let time_cutoff = Utc::now(); + datastore.ipv4_nat_cleanup(&opctx, 4, time_cutoff).await.unwrap(); + + // Nothing should have changed (no records currently marked for deletion) + let nat_entries = + datastore.ipv4_nat_list_since_gen(&opctx, 0, 10).await.unwrap(); + + assert_eq!(nat_entries.len(), 2); + assert_eq!(datastore.ipv4_nat_current_gen(&opctx).await.unwrap(), 4); + + // Soft delete a record + let nat_entry = nat_entries.last().unwrap(); + datastore.ipv4_nat_delete(&opctx, nat_entry).await.unwrap(); + + // Try cleaning up with the old gen and time cutoff values + datastore.ipv4_nat_cleanup(&opctx, 4, time_cutoff).await.unwrap(); + + // Try cleaning up with a greater gen and old time cutoff values + datastore.ipv4_nat_cleanup(&opctx, 6, time_cutoff).await.unwrap(); + + // Try cleaning up with a older gen and newer time cutoff values + datastore.ipv4_nat_cleanup(&opctx, 4, Utc::now()).await.unwrap(); + + // Both records should still exist (soft deleted record is newer than cutoff + // values ) + let nat_entries = + datastore.ipv4_nat_list_since_gen(&opctx, 0, 10).await.unwrap(); + + assert_eq!(nat_entries.len(), 2); + assert_eq!(datastore.ipv4_nat_current_gen(&opctx).await.unwrap(), 5); + + // Try cleaning up with a both cutoff values increased + datastore.ipv4_nat_cleanup(&opctx, 6, Utc::now()).await.unwrap(); + + // Soft deleted NAT entry should be removed from the table + let nat_entries = + datastore.ipv4_nat_list_since_gen(&opctx, 0, 10).await.unwrap(); + + assert_eq!(nat_entries.len(), 1); + + // Generation should be unchanged + assert_eq!(datastore.ipv4_nat_current_gen(&opctx).await.unwrap(), 5); + + db.cleanup().await.unwrap(); + logctx.cleanup_successful(); + } +} diff --git a/nexus/db-queries/src/db/datastore/mod.rs b/nexus/db-queries/src/db/datastore/mod.rs index 13fb132abb6..d1a9d3ed26f 100644 --- a/nexus/db-queries/src/db/datastore/mod.rs +++ b/nexus/db-queries/src/db/datastore/mod.rs @@ -60,6 +60,7 @@ mod identity_provider; mod image; mod instance; mod ip_pool; +mod ipv4_nat_entry; mod network_interface; mod oximeter; mod physical_disk; diff --git a/nexus/examples/config.toml b/nexus/examples/config.toml index f1b20c32a10..f2016839a51 100644 --- a/nexus/examples/config.toml +++ b/nexus/examples/config.toml @@ -92,3 +92,4 @@ dns_external.max_concurrent_server_updates = 5 # certificates it will take _other_ Nexus instances to notice and stop serving # them (on a sunny day). external_endpoints.period_secs = 60 +nat_cleanup.period_secs = 30 diff --git a/nexus/src/app/background/init.rs b/nexus/src/app/background/init.rs index c8665943faf..2e8d6f67986 100644 --- a/nexus/src/app/background/init.rs +++ b/nexus/src/app/background/init.rs @@ -9,12 +9,15 @@ use super::dns_config; use super::dns_propagation; use super::dns_servers; use super::external_endpoints; +use super::nat_cleanup; use nexus_db_model::DnsGroup; use nexus_db_queries::context::OpContext; use nexus_db_queries::db::DataStore; +use omicron_common::api::internal::shared::SwitchLocation; use omicron_common::nexus_config::BackgroundTaskConfig; use omicron_common::nexus_config::DnsTasksConfig; use std::collections::BTreeMap; +use std::collections::HashMap; use std::sync::Arc; /// Describes ongoing background tasks and provides interfaces for working with @@ -42,6 +45,8 @@ pub struct BackgroundTasks { pub external_endpoints: tokio::sync::watch::Receiver< Option, >, + /// task handle for the ipv4 nat entry garbage collector + pub nat_cleanup: common::TaskHandle, } impl BackgroundTasks { @@ -50,6 +55,7 @@ impl BackgroundTasks { opctx: &OpContext, datastore: Arc, config: &BackgroundTaskConfig, + dpd_clients: &HashMap>, ) -> BackgroundTasks { let mut driver = common::Driver::new(); @@ -70,8 +76,9 @@ impl BackgroundTasks { // Background task: External endpoints list watcher let (task_external_endpoints, external_endpoints) = { - let watcher = - external_endpoints::ExternalEndpointsWatcher::new(datastore); + let watcher = external_endpoints::ExternalEndpointsWatcher::new( + datastore.clone(), + ); let watcher_channel = watcher.watcher(); let task = driver.register( "external_endpoints".to_string(), @@ -83,6 +90,19 @@ impl BackgroundTasks { (task, watcher_channel) }; + let nat_cleanup = { + driver.register( + "nat_garbage_collector".to_string(), + config.nat_cleanup.period_secs, + Box::new(nat_cleanup::Ipv4NatGarbageCollector::new( + datastore, + dpd_clients.values().map(|client| client.clone()).collect(), + )), + opctx.child(BTreeMap::new()), + vec![], + ) + }; + BackgroundTasks { driver, task_internal_dns_config, @@ -91,6 +111,7 @@ impl BackgroundTasks { task_external_dns_servers, task_external_endpoints, external_endpoints, + nat_cleanup, } } diff --git a/nexus/src/app/background/mod.rs b/nexus/src/app/background/mod.rs index a3bf8efb298..4bdf3339782 100644 --- a/nexus/src/app/background/mod.rs +++ b/nexus/src/app/background/mod.rs @@ -10,6 +10,7 @@ mod dns_propagation; mod dns_servers; mod external_endpoints; mod init; +mod nat_cleanup; pub use common::Driver; pub use common::TaskHandle; diff --git a/nexus/src/app/background/nat_cleanup.rs b/nexus/src/app/background/nat_cleanup.rs new file mode 100644 index 00000000000..d1c1f4e0a01 --- /dev/null +++ b/nexus/src/app/background/nat_cleanup.rs @@ -0,0 +1,109 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Background task for garbage collecting ipv4_nat_entry table. +//! Responsible for cleaning up soft deleted entries once they +//! have been propagated to running dpd instances. + +use super::common::BackgroundTask; +use chrono::Utc; +use futures::future::BoxFuture; +use futures::FutureExt; +use nexus_db_queries::context::OpContext; +use nexus_db_queries::db::DataStore; +use serde_json::json; +use std::sync::Arc; + +/// Background task that periodically prunes soft-deleted entries +/// from ipv4_nat_entry table +pub struct Ipv4NatGarbageCollector { + datastore: Arc, + dpd_clients: Vec>, +} + +impl Ipv4NatGarbageCollector { + pub fn new( + datastore: Arc, + dpd_clients: Vec>, + ) -> Ipv4NatGarbageCollector { + Ipv4NatGarbageCollector { datastore, dpd_clients } + } +} + +impl BackgroundTask for Ipv4NatGarbageCollector { + fn activate<'a, 'b, 'c>( + &'a mut self, + opctx: &'b OpContext, + ) -> BoxFuture<'c, serde_json::Value> + where + 'a: 'c, + 'b: 'c, + { + async { + let log = &opctx.log; + + let result = self.datastore.ipv4_nat_current_gen(opctx).await; + + let mut min_gen = match result { + Ok(gen) => gen, + Err(error) => { + warn!( + &log, + "failed to read generation of database"; + "error" => format!("{:#}", error) + ); + return json!({ + "error": + format!( + "failed to read generation of database: \ + {:#}", + error + ) + }); + } + }; + + for client in &self.dpd_clients { + let response = client.ipv4_nat_generation().await; + match response { + Ok(gen) => min_gen = std::cmp::min(min_gen, *gen), + Err(error) => { + warn!( + &log, + "failed to read generation of dpd"; + "error" => format!("{:#}", error) + ); + return json!({ + "error": + format!( + "failed to read generation of dpd: \ + {:#}", + error + ) + }); + } + } + } + + let result = self + .datastore + .ipv4_nat_cleanup(opctx, min_gen, Utc::now()) + .await + .unwrap(); + + let rv = serde_json::to_value(&result).unwrap_or_else(|error| { + json!({ + "error": + format!( + "failed to serialize final value: {:#}", + error + ) + }) + }); + + rv + } + .boxed() + } +} diff --git a/nexus/src/app/instance.rs b/nexus/src/app/instance.rs index 1ac63d19f67..814de1f0d48 100644 --- a/nexus/src/app/instance.rs +++ b/nexus/src/app/instance.rs @@ -11,13 +11,15 @@ use super::MAX_NICS_PER_INSTANCE; use super::MAX_VCPU_PER_INSTANCE; use super::MIN_MEMORY_BYTES_PER_INSTANCE; use crate::app::sagas; -use crate::app::sagas::retry_until_known_result; use crate::cidata::InstanceCiData; use crate::external_api::params; use cancel_safe_futures::prelude::*; use futures::future::Fuse; use futures::{FutureExt, SinkExt, StreamExt}; +use ipnetwork::IpNetwork; use nexus_db_model::IpKind; +use nexus_db_model::Ipv4NatValues; +use nexus_db_model::Vni as DbVni; use nexus_db_queries::authn; use nexus_db_queries::authz; use nexus_db_queries::authz::ApiResource; @@ -55,6 +57,7 @@ use sled_agent_client::types::InstanceStateRequested; use sled_agent_client::types::SourceNatConfig; use sled_agent_client::Client as SledAgentClient; use std::collections::HashSet; +use std::net::IpAddr; use std::net::SocketAddr; use std::str::FromStr; use std::sync::Arc; @@ -1315,8 +1318,6 @@ impl super::Nexus { )) })?; - let vni: u32 = network_interface.vni.into(); - info!(log, "looking up instance's external IPs"; "instance_id" => %instance_id); @@ -1346,29 +1347,26 @@ impl super::Nexus { }) .map(|(_, ip)| ip) { - retry_until_known_result(log, || async { - dpd_client - .ensure_nat_entry( - &log, - target_ip.ip, - dpd_client::types::MacAddr { - a: mac_address.into_array(), - }, - *target_ip.first_port, - *target_ip.last_port, - vni, - sled_ip_address.ip(), - ) - .await - }) - .await - .map_err(|e| { - Error::internal_error(&format!( - "failed to ensure dpd entry: {e}" - )) - })?; + let nat_entry = Ipv4NatValues { + external_address: target_ip.ip, + first_port: target_ip.first_port, + last_port: target_ip.last_port, + sled_address: IpNetwork::new( + IpAddr::V6(*sled_ip_address.ip()), + 128, + ) + .unwrap(), + vni: DbVni(network_interface.vni.clone().into()), + mac: nexus_db_model::MacAddr( + omicron_common::api::external::MacAddr(mac_address), + ), + }; + self.db_datastore.ensure_ipv4_nat_entry(opctx, nat_entry).await?; } + dpd_client.ipv4_nat_trigger_update().await.map_err(|e| { + Error::internal_error(&format!("failed to ensure dpd entry: {e}")) + })?; Ok(()) } diff --git a/nexus/src/app/mod.rs b/nexus/src/app/mod.rs index 4dd93f7707e..f07cea52de4 100644 --- a/nexus/src/app/mod.rs +++ b/nexus/src/app/mod.rs @@ -282,6 +282,7 @@ impl Nexus { &background_ctx, Arc::clone(&db_datastore), &config.pkg.background_tasks, + &dpd_clients, ); let external_resolver = { diff --git a/nexus/src/app/sagas/instance_create.rs b/nexus/src/app/sagas/instance_create.rs index d5af080381a..4b189f0b9c2 100644 --- a/nexus/src/app/sagas/instance_create.rs +++ b/nexus/src/app/sagas/instance_create.rs @@ -6,7 +6,6 @@ use super::{NexusActionContext, NexusSaga, SagaInitError, ACTION_GENERATE_ID}; use crate::app::instance::WriteBackUpdatedInstance; use crate::app::sagas::declare_saga_actions; use crate::app::sagas::disk_create::{self, SagaDiskCreate}; -use crate::app::sagas::retry_until_known_result; use crate::app::{ MAX_DISKS_PER_INSTANCE, MAX_EXTERNAL_IPS_PER_INSTANCE, MAX_NICS_PER_INSTANCE, @@ -438,6 +437,7 @@ async fn sic_remove_network_config( ¶ms.serialized_authn, ); let osagactx = sagactx.user_data(); + let dpd_client = osagactx.nexus().dpd_clients.get(&switch).ok_or_else(|| { Error::internal_error(&format!( @@ -463,23 +463,29 @@ async fn sic_remove_network_config( debug!(log, "deleting nat mapping for entry: {target_ip:#?}"); - let result = retry_until_known_result(log, || async { - dpd_client - .ensure_nat_entry_deleted(log, target_ip.ip, *target_ip.first_port) - .await - }) - .await; - - match result { - Ok(_) => { - debug!(log, "deletion of nat entry successful for: {target_ip:#?}"); - Ok(()) - } - Err(e) => Err(Error::internal_error(&format!( - "failed to delete nat entry via dpd: {e}" - ))), + // TODO - separate into separate saga node + // Ideally we will execute this step once in a separate saga node so + // we don't get an error for a missing nat entry when deleting entries + // for multiple switches + match datastore.ipv4_nat_delete_by_external_ip(&opctx, target_ip).await { + Ok(_) => Ok(()), + Err(err) => match err { + Error::ObjectNotFound { .. } => { + warn!(log, "no matching nat entries to soft delete"); + Ok(()) + } + _ => { + let message = + format!("failed to delete nat entry due to error: {err:?}"); + error!(log, "{}", message); + Err(ActionError::action_failed(Error::internal_error(&message))) + } + }, }?; + // it is ok to not check result since we have a periodic job to reconcile + let _ = dpd_client.ipv4_nat_trigger_update().await; + Ok(()) } diff --git a/nexus/src/app/sagas/instance_delete.rs b/nexus/src/app/sagas/instance_delete.rs index 88a5823ad05..70b6e9e8c1a 100644 --- a/nexus/src/app/sagas/instance_delete.rs +++ b/nexus/src/app/sagas/instance_delete.rs @@ -8,7 +8,6 @@ use super::ActionRegistry; use super::NexusActionContext; use super::NexusSaga; use crate::app::sagas::declare_saga_actions; -use crate::app::sagas::retry_until_known_result; use nexus_db_queries::db; use nexus_db_queries::db::lookup::LookupPath; use nexus_db_queries::{authn, authz}; @@ -164,11 +163,38 @@ async fn sid_delete_network_config( .map_err(ActionError::action_failed)?; let mut errors: Vec = vec![]; + // TODO - there was a reason we did this in a loop instead of separate + // saga nodes, but I don't remember why. Since we're performing + // a soft-deletion of each entry via the db, it is possible for us to update + // all of the records in a single db transaction, so maybe we should break + // this out into a separate saga node and do that instead. // Here we are attempting to delete every existing NAT entry while deferring // any error handling. If we don't defer error handling, we might end up // bailing out before we've attempted deletion of all entries. for entry in external_ips { + let db_result = + match datastore.ipv4_nat_delete_by_external_ip(&opctx, entry).await + { + Ok(_) => Ok(()), + Err(err) => match err { + Error::ObjectNotFound { .. } => { + warn!(log, "no matching nat entries to soft delete"); + Ok(()) + } + _ => Err(ActionError::action_failed( + Error::internal_error(&format!( + "failed to delete nat entry due to error: {err:?}" + )), + )), + }, + }; + + if let Err(err) = db_result { + errors.push(err); + continue; + } + for switch in ¶ms.boundary_switches { debug!(log, "deleting nat mapping"; "switch" => switch.to_string(), "entry" => #?entry); @@ -187,26 +213,8 @@ async fn sid_delete_network_config( } }; - let result = retry_until_known_result(log, || async { - dpd_client - .ensure_nat_entry_deleted(log, entry.ip, *entry.first_port) - .await - }) - .await; - - match result { - Ok(_) => { - debug!(log, "deleting nat mapping successful"; "switch" => switch.to_string(), "entry" => format!("{entry:#?}")); - } - Err(e) => { - let new_error = - ActionError::action_failed(Error::internal_error( - &format!("failed to delete nat entry via dpd: {e}"), - )); - error!(log, "{new_error:#?}"); - errors.push(new_error); - } - } + // it is ok to not check result since we have a periodic job to reconcile + let _ = dpd_client.ipv4_nat_trigger_update().await; } } diff --git a/nexus/src/internal_api/http_entrypoints.rs b/nexus/src/internal_api/http_entrypoints.rs index 6efc0d46e34..e3d66cdac87 100644 --- a/nexus/src/internal_api/http_entrypoints.rs +++ b/nexus/src/internal_api/http_entrypoints.rs @@ -24,6 +24,8 @@ use dropshot::RequestContext; use dropshot::ResultsPage; use dropshot::TypedBody; use hyper::Body; +use nexus_db_model::Ipv4NatEntryView; +use nexus_db_model::Ipv4NatGenView; use nexus_types::internal_api::params::SwitchPutRequest; use nexus_types::internal_api::params::SwitchPutResponse; use nexus_types::internal_api::views::to_list; @@ -65,6 +67,8 @@ pub(crate) fn internal_api() -> NexusApiDescription { api.register(saga_list)?; api.register(saga_view)?; + api.register(ipv4_nat_changeset)?; + api.register(ipv4_nat_gen)?; Ok(()) } @@ -484,3 +488,78 @@ async fn saga_view( }; apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await } + +// NAT RPW internal APIs + +/// Path parameters for NAT ChangeSet +#[derive(Deserialize, JsonSchema)] +struct RpwNatPathParam { + /// which change number to start generating + /// the change set from + from_gen: u32, +} + +/// Query parameters for NAT ChangeSet +#[derive(Deserialize, JsonSchema)] +struct RpwNatQueryParam { + limit: u32, +} + +/// Fetch NAT ChangeSet +/// +/// Caller provides their generation as `from_gen`, along with a query +/// parameter for the page size (`limit`). Endpoint will return changes +/// that have occured since the caller's generation number up to the latest +/// change or until the `limit` is reached. If there are no changes, an +/// empty vec is returned. +#[endpoint { + method = GET, + path = "/rpw/nat/ipv4/changeset/{from_gen}" +}] +async fn ipv4_nat_changeset( + rqctx: RequestContext>, + path_params: Path, + query_params: Query, +) -> Result>, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let opctx = crate::context::op_context_for_internal_api(&rqctx).await; + let nexus = &apictx.nexus; + let path = path_params.into_inner(); + let query = query_params.into_inner(); + let changeset = nexus + .datastore() + .ipv4_nat_changeset(&opctx, path.from_gen, query.limit) + .await?; + Ok(HttpResponseOk(changeset)) + }; + apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await +} + +// TODO: Remove (maybe) +// This endpoint may be more trouble than it's worth. The reason this endpoint +// may be a headache is because the underlying SEQUENCE in the DB will advance, +// even on failed transactions. This is not a big deal, but it will lead to +// callers trying to catch up to entries that don't exist if they check this +// endpoint first. As an alternative, we could just page through the +// changesets until we get nothing back, then revert to periodic polling +// of the changeset endpoint. + +/// Fetch NAT generation +#[endpoint { + method = GET, + path = "/rpw/nat/ipv4/gen" +}] +async fn ipv4_nat_gen( + rqctx: RequestContext>, +) -> Result, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let opctx = crate::context::op_context_for_internal_api(&rqctx).await; + let nexus = &apictx.nexus; + let gen = nexus.datastore().ipv4_nat_current_gen(&opctx).await?; + let view = Ipv4NatGenView { gen }; + Ok(HttpResponseOk(view)) + }; + apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await +} diff --git a/nexus/tests/config.test.toml b/nexus/tests/config.test.toml index 6eeacceaedd..c56abe88088 100644 --- a/nexus/tests/config.test.toml +++ b/nexus/tests/config.test.toml @@ -89,3 +89,4 @@ dns_external.max_concurrent_server_updates = 5 # certificates it will take _other_ Nexus instances to notice and stop serving # them (on a sunny day). external_endpoints.period_secs = 60 +nat_cleanup.period_secs = 30 diff --git a/openapi/nexus-internal.json b/openapi/nexus-internal.json index 12043d80960..048c692ec59 100644 --- a/openapi/nexus-internal.json +++ b/openapi/nexus-internal.json @@ -357,6 +357,82 @@ } } }, + "/rpw/nat/ipv4/changeset/{from_gen}": { + "get": { + "summary": "Fetch NAT ChangeSet", + "description": "Caller provides their generation as `from_gen`, along with a query parameter for the page size (`limit`). Endpoint will return changes that have occured since the caller's generation number up to the latest change or until the `limit` is reached. If there are no changes, an empty vec is returned.", + "operationId": "ipv4_nat_changeset", + "parameters": [ + { + "in": "path", + "name": "from_gen", + "description": "which change number to start generating the change set from", + "required": true, + "schema": { + "type": "integer", + "format": "uint32", + "minimum": 0 + } + }, + { + "in": "query", + "name": "limit", + "required": true, + "schema": { + "type": "integer", + "format": "uint32", + "minimum": 0 + } + } + ], + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "title": "Array_of_Ipv4NatEntryView", + "type": "array", + "items": { + "$ref": "#/components/schemas/Ipv4NatEntryView" + } + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, + "/rpw/nat/ipv4/gen": { + "get": { + "summary": "Fetch NAT generation", + "operationId": "ipv4_nat_gen", + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Ipv4NatGenView" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, "/sagas": { "get": { "summary": "List sagas", @@ -2041,6 +2117,68 @@ } ] }, + "Ipv4NatEntryView": { + "description": "NAT Record", + "type": "object", + "properties": { + "deleted": { + "type": "boolean" + }, + "external_address": { + "type": "string", + "format": "ipv4" + }, + "first_port": { + "type": "integer", + "format": "uint16", + "minimum": 0 + }, + "gen": { + "type": "integer", + "format": "uint32", + "minimum": 0 + }, + "last_port": { + "type": "integer", + "format": "uint16", + "minimum": 0 + }, + "mac": { + "$ref": "#/components/schemas/MacAddr" + }, + "sled_address": { + "type": "string", + "format": "ipv6" + }, + "vni": { + "$ref": "#/components/schemas/Vni" + } + }, + "required": [ + "deleted", + "external_address", + "first_port", + "gen", + "last_port", + "mac", + "sled_address", + "vni" + ] + }, + "Ipv4NatGenView": { + "description": "NAT Generation", + "type": "object", + "properties": { + "gen": { + "type": "integer", + "format": "uint32", + "minimum": 0 + } + }, + "required": [ + "gen" + ] + }, "Ipv4Network": { "type": "string", "pattern": "^((25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\/(3[0-2]|[0-2]?[0-9])$" @@ -3326,6 +3464,12 @@ "minLength": 1, "maxLength": 63 }, + "Vni": { + "description": "A Geneve Virtual Network Identifier", + "type": "integer", + "format": "uint32", + "minimum": 0 + }, "ZpoolPutRequest": { "description": "Sent by a sled agent on startup to Nexus to request further instruction", "type": "object", diff --git a/package-manifest.toml b/package-manifest.toml index b2e1552f3cd..e1dca890316 100644 --- a/package-manifest.toml +++ b/package-manifest.toml @@ -451,8 +451,8 @@ only_for_targets.image = "standard" # 2. Copy dendrite.tar.gz from dendrite/out to omicron/out source.type = "prebuilt" source.repo = "dendrite" -source.commit = "363e365135cfa46d7f7558d8670f35aa8fe412e9" -source.sha256 = "2dc34eaac7eb9d320594f3ac125df6a601fe020e0b3c7f16eb0a5ebddc8e18b9" +source.commit = "9529c76ab8cde2cd50a7afba0d7e6bb6071f5dd9" +source.sha256 = "80fe3a3d9bf406d0072e9f73997fc28ae973938cb19e335d64d9e5055659af99" output.type = "zone" output.intermediate_only = true @@ -476,8 +476,8 @@ only_for_targets.image = "standard" # 2. Copy the output zone image from dendrite/out to omicron/out source.type = "prebuilt" source.repo = "dendrite" -source.commit = "363e365135cfa46d7f7558d8670f35aa8fe412e9" -source.sha256 = "1616eb25ab3d3a8b678b6cf3675af7ba61d455c3e6c2ba2a2d35a663861bc8e8" +source.commit = "9529c76ab8cde2cd50a7afba0d7e6bb6071f5dd9" +source.sha256 = "e2979b01dba442ed3802b303bdd8e7a45cae807e81fff256ad39de852cec9c29" output.type = "zone" output.intermediate_only = true @@ -494,8 +494,8 @@ only_for_targets.image = "standard" # 2. Copy dendrite.tar.gz from dendrite/out to omicron/out/dendrite-softnpu.tar.gz source.type = "prebuilt" source.repo = "dendrite" -source.commit = "363e365135cfa46d7f7558d8670f35aa8fe412e9" -source.sha256 = "a045e6dbb84dbceaf3a8a7dc33d283449fbeaf081442d0ae14ce8d8ffcdda4e9" +source.commit = "9529c76ab8cde2cd50a7afba0d7e6bb6071f5dd9" +source.sha256 = "a6dbf2a58bf560af6f1ca7b9cf6726ff445a091a2b5cee25d5e2742a97d40921" output.type = "zone" output.intermediate_only = true diff --git a/schema/crdb/4.0.1/up.sql b/schema/crdb/4.0.1/up.sql new file mode 100644 index 00000000000..f6fe3d4087e --- /dev/null +++ b/schema/crdb/4.0.1/up.sql @@ -0,0 +1,111 @@ +-- CRDB documentation recommends the following: +-- "Execute schema changes either as single statements (as an implicit transaction), +-- or in an explicit transaction consisting of the single schema change statement." +-- +-- For each schema change, we transactionally: +-- 1. Check the current version +-- 2. Apply the idempotent update + +BEGIN; +SELECT CAST( + IF( + ( + SELECT version = '4.0.0' and target_version = '4.0.1' + FROM omicron.public.db_metadata WHERE singleton = true + ), + 'true', + 'Invalid starting version for schema change' + ) AS BOOL +); + +CREATE SEQUENCE IF NOT EXISTS omicron.public.nat_gen START 1 INCREMENT 1; +COMMIT; + +BEGIN; +SELECT CAST( + IF( + ( + SELECT version = '4.0.0' and target_version = '4.0.1' + FROM omicron.public.db_metadata WHERE singleton = true + ), + 'true', + 'Invalid starting version for schema change' + ) AS BOOL +); + +CREATE TABLE IF NOT EXISTS omicron.public.ipv4_nat_entry ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + external_address INET NOT NULL, + first_port INT4 NOT NULL, + last_port INT4 NOT NULL, + sled_address INET NOT NULL, + vni INT4 NOT NULL, + mac INT8 NOT NULL, + gen INT8 NOT NULL DEFAULT nextval('omicron.public.nat_gen') ON UPDATE nextval('omicron.public.nat_gen'), + time_created TIMESTAMPTZ NOT NULL DEFAULT now(), + time_deleted TIMESTAMPTZ +); +COMMIT; + +BEGIN; +SELECT CAST( + IF( + ( + SELECT version = '4.0.0' and target_version = '4.0.1' + FROM omicron.public.db_metadata WHERE singleton = true + ), + 'true', + 'Invalid starting version for schema change' + ) AS BOOL +); + +CREATE UNIQUE INDEX IF NOT EXISTS ipv4_nat_gen ON omicron.public.ipv4_nat_entry ( + gen +) +STORING ( + external_address, + first_port, + last_port, + sled_address, + vni, + mac, + time_created, + time_deleted +); +COMMIT; + +BEGIN; +SELECT CAST( + IF( + ( + SELECT version = '4.0.0' and target_version = '4.0.1' + FROM omicron.public.db_metadata WHERE singleton = true + ), + 'true', + 'Invalid starting version for schema change' + ) AS BOOL +); + +CREATE UNIQUE INDEX IF NOT EXISTS overlapping_ipv4_nat_entry ON omicron.public.ipv4_nat_entry ( + external_address, + first_port, + last_port +) WHERE time_deleted IS NULL; + +COMMIT; + +BEGIN; +SELECT CAST( + IF( + ( + SELECT version = '4.0.0' and target_version = '4.0.1' + FROM omicron.public.db_metadata WHERE singleton = true + ), + 'true', + 'Invalid starting version for schema change' + ) AS BOOL +); + +CREATE INDEX IF NOT EXISTS ipv4_nat_lookup ON omicron.public.ipv4_nat_entry (external_address, first_port, last_port, sled_address, vni, mac); + +COMMIT; diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index 4b38b7dfe48..ed717f9e37f 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -2562,7 +2562,45 @@ INSERT INTO omicron.public.db_metadata ( version, target_version ) VALUES - ( TRUE, NOW(), NOW(), '4.0.0', NULL) + ( TRUE, NOW(), NOW(), '4.0.1', NULL) ON CONFLICT DO NOTHING; + +CREATE SEQUENCE IF NOT EXISTS omicron.public.nat_gen START 1 INCREMENT 1; + +CREATE TABLE IF NOT EXISTS omicron.public.ipv4_nat_entry ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + external_address INET NOT NULL, + first_port INT4 NOT NULL, + last_port INT4 NOT NULL, + sled_address INET NOT NULL, + vni INT4 NOT NULL, + mac INT8 NOT NULL, + gen INT8 NOT NULL DEFAULT nextval('omicron.public.nat_gen') ON UPDATE nextval('omicron.public.nat_gen'), + time_created TIMESTAMPTZ NOT NULL DEFAULT now(), + time_deleted TIMESTAMPTZ +); + +CREATE UNIQUE INDEX IF NOT EXISTS ipv4_nat_gen ON omicron.public.ipv4_nat_entry ( + gen +) +STORING ( + external_address, + first_port, + last_port, + sled_address, + vni, + mac, + time_created, + time_deleted +); + +CREATE UNIQUE INDEX IF NOT EXISTS overlapping_ipv4_nat_entry ON omicron.public.ipv4_nat_entry ( + external_address, + first_port, + last_port +) WHERE time_deleted IS NULL; + +CREATE INDEX IF NOT EXISTS ipv4_nat_lookup ON omicron.public.ipv4_nat_entry (external_address, first_port, last_port, sled_address, vni, mac); + COMMIT; diff --git a/smf/nexus/config-partial.toml b/smf/nexus/config-partial.toml index b29727c4aa5..ec8ada9fd02 100644 --- a/smf/nexus/config-partial.toml +++ b/smf/nexus/config-partial.toml @@ -38,3 +38,4 @@ dns_external.max_concurrent_server_updates = 5 # certificates it will take _other_ Nexus instances to notice and stop serving # them (on a sunny day). external_endpoints.period_secs = 60 +nat_cleanup.period_secs = 30 diff --git a/tools/dendrite_openapi_version b/tools/dendrite_openapi_version index cbdbca76622..24e68ba05f7 100644 --- a/tools/dendrite_openapi_version +++ b/tools/dendrite_openapi_version @@ -1,2 +1,2 @@ -COMMIT="363e365135cfa46d7f7558d8670f35aa8fe412e9" -SHA2="4da5edf1571a550a90aa8679a25c1535d2b02154dfb6034f170e421c2633bc31" +COMMIT="9529c76ab8cde2cd50a7afba0d7e6bb6071f5dd9" +SHA2="53458bec594a3b653d5c8a3876b84e39c1aa7ee38b972bdad1c33d98807ebd15" diff --git a/tools/dendrite_stub_checksums b/tools/dendrite_stub_checksums index acff400104e..c0737185983 100644 --- a/tools/dendrite_stub_checksums +++ b/tools/dendrite_stub_checksums @@ -1,3 +1,3 @@ -CIDL_SHA256_ILLUMOS="2dc34eaac7eb9d320594f3ac125df6a601fe020e0b3c7f16eb0a5ebddc8e18b9" -CIDL_SHA256_LINUX_DPD="5a976d1e43031f4790d1cd2f42d226b47c1be9c998917666f21cfaa3a7b13939" -CIDL_SHA256_LINUX_SWADM="38680e69364ffbfc43fea524786580d151ff45ce2f1802bd5179599f7c80e5f8" +CIDL_SHA256_ILLUMOS="80fe3a3d9bf406d0072e9f73997fc28ae973938cb19e335d64d9e5055659af99" +CIDL_SHA256_LINUX_DPD="8598e1169e4038cc44df3464c1022239ce94875199c1c8cb5d7a1fea4569d145" +CIDL_SHA256_LINUX_SWADM="5b8d6ebd8773ea4661419bd0300108bd5b6cd26aa0d15c328d1dcf4426aefa9d"