diff --git a/common/src/api/external/mod.rs b/common/src/api/external/mod.rs index fcea57220d..adf661516a 100644 --- a/common/src/api/external/mod.rs +++ b/common/src/api/external/mod.rs @@ -750,6 +750,7 @@ pub enum ResourceType { UserBuiltin, Zpool, Vmm, + Ipv4NatEntry, } // IDENTITY METADATA diff --git a/common/src/nexus_config.rs b/common/src/nexus_config.rs index 4e821e2676..94c39b4436 100644 --- a/common/src/nexus_config.rs +++ b/common/src/nexus_config.rs @@ -335,6 +335,8 @@ pub struct BackgroundTaskConfig { pub dns_external: DnsTasksConfig, /// configuration for external endpoint list watcher pub external_endpoints: ExternalEndpointsConfig, + /// configuration for nat table garbage collector + pub nat_cleanup: NatCleanupConfig, /// configuration for inventory tasks pub inventory: InventoryConfig, } @@ -371,6 +373,14 @@ pub struct ExternalEndpointsConfig { // allow/disallow wildcard certs, don't serve expired certs, etc.) } +#[serde_as] +#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)] +pub struct NatCleanupConfig { + /// period (in seconds) for periodic activations of this background task + #[serde_as(as = "DurationSeconds")] + pub period_secs: Duration, +} + #[serde_as] #[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)] pub struct InventoryConfig { @@ -498,7 +508,7 @@ mod test { BackgroundTaskConfig, Config, ConfigDropshotWithTls, ConsoleConfig, Database, DeploymentConfig, DnsTasksConfig, DpdConfig, ExternalEndpointsConfig, InternalDns, InventoryConfig, LoadError, - LoadErrorKind, MgdConfig, PackageConfig, SchemeName, + LoadErrorKind, MgdConfig, NatCleanupConfig, PackageConfig, SchemeName, TimeseriesDbConfig, Tunables, UpdatesConfig, }; use crate::address::{Ipv6Subnet, RACK_PREFIX}; @@ -649,6 +659,7 @@ mod test { dns_external.period_secs_propagation = 7 dns_external.max_concurrent_server_updates = 8 external_endpoints.period_secs = 9 + nat_cleanup.period_secs = 30 inventory.period_secs = 10 inventory.nkeep = 11 inventory.disable = false @@ -746,6 +757,9 @@ mod test { external_endpoints: ExternalEndpointsConfig { period_secs: Duration::from_secs(9), }, + nat_cleanup: NatCleanupConfig { + period_secs: Duration::from_secs(30), + }, inventory: InventoryConfig { period_secs: Duration::from_secs(10), nkeep: 11, @@ -804,6 +818,7 @@ mod test { dns_external.period_secs_propagation = 7 dns_external.max_concurrent_server_updates = 8 external_endpoints.period_secs = 9 + nat_cleanup.period_secs = 30 inventory.period_secs = 10 inventory.nkeep = 3 inventory.disable = false diff --git a/dev-tools/omdb/src/bin/omdb/nexus.rs b/dev-tools/omdb/src/bin/omdb/nexus.rs index 128d4315f2..9f91d38504 100644 --- a/dev-tools/omdb/src/bin/omdb/nexus.rs +++ b/dev-tools/omdb/src/bin/omdb/nexus.rs @@ -159,6 +159,7 @@ async fn cmd_nexus_background_tasks_show( "dns_config_external", "dns_servers_external", "dns_propagation_external", + "nat_v4_garbage_collector", ] { if let Some(bgtask) = tasks.remove(name) { print_task(&bgtask); diff --git a/dev-tools/omdb/tests/env.out b/dev-tools/omdb/tests/env.out index 7949c1eb61..fd50d80c81 100644 --- a/dev-tools/omdb/tests/env.out +++ b/dev-tools/omdb/tests/env.out @@ -61,6 +61,11 @@ task: "inventory_collection" collects hardware and software inventory data from the whole system +task: "nat_v4_garbage_collector" + prunes soft-deleted IPV4 NAT entries from ipv4_nat_entry table based on a + predetermined retention policy + + --------------------------------------------- stderr: note: using Nexus URL http://127.0.0.1:REDACTED_PORT @@ -121,6 +126,11 @@ task: "inventory_collection" collects hardware and software inventory data from the whole system +task: "nat_v4_garbage_collector" + prunes soft-deleted IPV4 NAT entries from ipv4_nat_entry table based on a + predetermined retention policy + + --------------------------------------------- stderr: note: Nexus URL not specified. Will pick one from DNS. @@ -168,6 +178,11 @@ task: "inventory_collection" collects hardware and software inventory data from the whole system +task: "nat_v4_garbage_collector" + prunes soft-deleted IPV4 NAT entries from ipv4_nat_entry table based on a + predetermined retention policy + + --------------------------------------------- stderr: note: Nexus URL not specified. Will pick one from DNS. diff --git a/dev-tools/omdb/tests/successes.out b/dev-tools/omdb/tests/successes.out index 8162b6d9de..6bc3a85e8a 100644 --- a/dev-tools/omdb/tests/successes.out +++ b/dev-tools/omdb/tests/successes.out @@ -255,6 +255,11 @@ task: "inventory_collection" collects hardware and software inventory data from the whole system +task: "nat_v4_garbage_collector" + prunes soft-deleted IPV4 NAT entries from ipv4_nat_entry table based on a + predetermined retention policy + + --------------------------------------------- stderr: note: using Nexus URL http://127.0.0.1:REDACTED_PORT/ @@ -319,6 +324,13 @@ task: "dns_propagation_external" [::1]:REDACTED_PORT success +task: "nat_v4_garbage_collector" + configured period: every 30s + currently executing: no + last completed activation: iter 2, triggered by an explicit signal + started at (s ago) and ran for ms +warning: unknown background task: "nat_v4_garbage_collector" (don't know how to interpret details: Null) + task: "external_endpoints" configured period: every 1m currently executing: no diff --git a/nexus/db-model/src/ipv4_nat_entry.rs b/nexus/db-model/src/ipv4_nat_entry.rs new file mode 100644 index 0000000000..570a46b5e9 --- /dev/null +++ b/nexus/db-model/src/ipv4_nat_entry.rs @@ -0,0 +1,81 @@ +use std::net::{Ipv4Addr, Ipv6Addr}; + +use super::MacAddr; +use crate::{schema::ipv4_nat_entry, Ipv4Net, Ipv6Net, SqlU16, Vni}; +use chrono::{DateTime, Utc}; +use omicron_common::api::external; +use schemars::JsonSchema; +use serde::Serialize; +use uuid::Uuid; + +/// Values used to create an Ipv4NatEntry +#[derive(Insertable, Debug, Clone)] +#[diesel(table_name = ipv4_nat_entry)] +pub struct Ipv4NatValues { + pub external_address: Ipv4Net, + pub first_port: SqlU16, + pub last_port: SqlU16, + pub sled_address: Ipv6Net, + pub vni: Vni, + pub mac: MacAddr, +} + +/// Database representation of an Ipv4 NAT Entry. +#[derive(Queryable, Debug, Clone, Selectable)] +#[diesel(table_name = ipv4_nat_entry)] +pub struct Ipv4NatEntry { + pub id: Uuid, + pub external_address: Ipv4Net, + pub first_port: SqlU16, + pub last_port: SqlU16, + pub sled_address: Ipv6Net, + pub vni: Vni, + pub mac: MacAddr, + pub version_added: i64, + pub version_removed: Option, + pub time_created: DateTime, + pub time_deleted: Option>, +} + +impl Ipv4NatEntry { + pub fn first_port(&self) -> u16 { + self.first_port.into() + } + + pub fn last_port(&self) -> u16 { + self.last_port.into() + } +} + +/// NAT Record +#[derive(Clone, Debug, Serialize, JsonSchema)] +pub struct Ipv4NatEntryView { + pub external_address: Ipv4Addr, + pub first_port: u16, + pub last_port: u16, + pub sled_address: Ipv6Addr, + pub vni: external::Vni, + pub mac: external::MacAddr, + pub gen: i64, + pub deleted: bool, +} + +impl From for Ipv4NatEntryView { + fn from(value: Ipv4NatEntry) -> Self { + let (gen, deleted) = match value.version_removed { + Some(gen) => (gen, true), + None => (value.version_added, false), + }; + + Self { + external_address: value.external_address.ip(), + first_port: value.first_port(), + last_port: value.last_port(), + sled_address: value.sled_address.ip(), + vni: value.vni.0, + mac: *value.mac, + gen, + deleted, + } + } +} diff --git a/nexus/db-model/src/lib.rs b/nexus/db-model/src/lib.rs index 7aa8a6b076..6b65eb87ec 100644 --- a/nexus/db-model/src/lib.rs +++ b/nexus/db-model/src/lib.rs @@ -53,6 +53,7 @@ mod system_update; // These actually represent subqueries, not real table. // However, they must be defined in the same crate as our tables // for join-based marker trait generation. +mod ipv4_nat_entry; pub mod queries; mod rack; mod region; @@ -124,6 +125,7 @@ pub use instance_cpu_count::*; pub use instance_state::*; pub use inventory::*; pub use ip_pool::*; +pub use ipv4_nat_entry::*; pub use ipv4net::*; pub use ipv6::*; pub use ipv6net::*; diff --git a/nexus/db-model/src/schema.rs b/nexus/db-model/src/schema.rs index 7c6b8bbd0a..4844f2a33f 100644 --- a/nexus/db-model/src/schema.rs +++ b/nexus/db-model/src/schema.rs @@ -489,6 +489,32 @@ table! { } } +table! { + ipv4_nat_entry (id) { + id -> Uuid, + external_address -> Inet, + first_port -> Int4, + last_port -> Int4, + sled_address -> Inet, + vni -> Int4, + mac -> Int8, + version_added -> Int8, + version_removed -> Nullable, + time_created -> Timestamptz, + time_deleted -> Nullable, + } +} + +// This is the sequence used for the version number +// in ipv4_nat_entry. +table! { + ipv4_nat_version (last_value) { + last_value -> Int8, + log_cnt -> Int8, + is_called -> Bool, + } +} + table! { external_ip (id) { id -> Uuid, @@ -1243,7 +1269,7 @@ table! { /// /// This should be updated whenever the schema is changed. For more details, /// refer to: schema/crdb/README.adoc -pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(10, 0, 0); +pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(11, 0, 0); allow_tables_to_appear_in_same_query!( system_update, diff --git a/nexus/db-queries/src/db/datastore/ipv4_nat_entry.rs b/nexus/db-queries/src/db/datastore/ipv4_nat_entry.rs new file mode 100644 index 0000000000..274937b299 --- /dev/null +++ b/nexus/db-queries/src/db/datastore/ipv4_nat_entry.rs @@ -0,0 +1,440 @@ +use super::DataStore; +use crate::context::OpContext; +use crate::db; +use crate::db::error::public_error_from_diesel; +use crate::db::error::ErrorHandler; +use crate::db::model::{Ipv4NatEntry, Ipv4NatValues}; +use async_bb8_diesel::AsyncRunQueryDsl; +use chrono::{DateTime, Utc}; +use diesel::prelude::*; +use diesel::sql_types::BigInt; +use nexus_db_model::ExternalIp; +use nexus_db_model::Ipv4NatEntryView; +use omicron_common::api::external::CreateResult; +use omicron_common::api::external::DeleteResult; +use omicron_common::api::external::Error; +use omicron_common::api::external::ListResultVec; +use omicron_common::api::external::LookupResult; +use omicron_common::api::external::LookupType; +use omicron_common::api::external::ResourceType; + +impl DataStore { + pub async fn ensure_ipv4_nat_entry( + &self, + opctx: &OpContext, + nat_entry: Ipv4NatValues, + ) -> CreateResult<()> { + use db::schema::ipv4_nat_entry::dsl; + use diesel::sql_types; + + // Look up any NAT entries that already have the exact parameters + // we're trying to INSERT. + let matching_entry_subquery = dsl::ipv4_nat_entry + .filter(dsl::external_address.eq(nat_entry.external_address)) + .filter(dsl::first_port.eq(nat_entry.first_port)) + .filter(dsl::last_port.eq(nat_entry.last_port)) + .filter(dsl::sled_address.eq(nat_entry.sled_address)) + .filter(dsl::vni.eq(nat_entry.vni)) + .filter(dsl::mac.eq(nat_entry.mac)) + .select(( + dsl::external_address, + dsl::first_port, + dsl::last_port, + dsl::sled_address, + dsl::vni, + dsl::mac, + )); + + // SELECT exactly the values we're trying to INSERT, but only + // if it does not already exist. + let new_entry_subquery = diesel::dsl::select(( + nat_entry.external_address.into_sql::(), + nat_entry.first_port.into_sql::(), + nat_entry.last_port.into_sql::(), + nat_entry.sled_address.into_sql::(), + nat_entry.vni.into_sql::(), + nat_entry.mac.into_sql::(), + )) + .filter(diesel::dsl::not(diesel::dsl::exists(matching_entry_subquery))); + + diesel::insert_into(dsl::ipv4_nat_entry) + .values(new_entry_subquery) + .into_columns(( + dsl::external_address, + dsl::first_port, + dsl::last_port, + dsl::sled_address, + dsl::vni, + dsl::mac, + )) + .execute_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + + Ok(()) + } + + pub async fn ipv4_nat_delete( + &self, + opctx: &OpContext, + nat_entry: &Ipv4NatEntry, + ) -> DeleteResult { + use db::schema::ipv4_nat_entry::dsl; + + let updated_rows = diesel::update(dsl::ipv4_nat_entry) + .set(( + dsl::version_removed.eq(ipv4_nat_next_version().nullable()), + dsl::time_deleted.eq(Utc::now()), + )) + .filter(dsl::time_deleted.is_null()) + .filter(dsl::version_removed.is_null()) + .filter(dsl::id.eq(nat_entry.id)) + .filter(dsl::version_added.eq(nat_entry.version_added)) + .execute_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + + if updated_rows == 0 { + return Err(Error::ObjectNotFound { + type_name: ResourceType::Ipv4NatEntry, + lookup_type: LookupType::ByCompositeId( + "id, version_added".to_string(), + ), + }); + } + Ok(()) + } + + pub async fn ipv4_nat_find_by_id( + &self, + opctx: &OpContext, + id: uuid::Uuid, + ) -> LookupResult { + use db::schema::ipv4_nat_entry::dsl; + + let result = dsl::ipv4_nat_entry + .filter(dsl::id.eq(id)) + .select(Ipv4NatEntry::as_select()) + .limit(1) + .load_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + + if let Some(nat_entry) = result.first() { + Ok(nat_entry.clone()) + } else { + Err(Error::InvalidRequest { + message: "no matching records".to_string(), + }) + } + } + + pub async fn ipv4_nat_delete_by_external_ip( + &self, + opctx: &OpContext, + external_ip: &ExternalIp, + ) -> DeleteResult { + use db::schema::ipv4_nat_entry::dsl; + + let updated_rows = diesel::update(dsl::ipv4_nat_entry) + .set(( + dsl::version_removed.eq(ipv4_nat_next_version().nullable()), + dsl::time_deleted.eq(Utc::now()), + )) + .filter(dsl::time_deleted.is_null()) + .filter(dsl::version_removed.is_null()) + .filter(dsl::external_address.eq(external_ip.ip)) + .filter(dsl::first_port.eq(external_ip.first_port)) + .filter(dsl::last_port.eq(external_ip.last_port)) + .execute_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + + if updated_rows == 0 { + return Err(Error::ObjectNotFound { + type_name: ResourceType::Ipv4NatEntry, + lookup_type: LookupType::ByCompositeId( + "external_ip, first_port, last_port".to_string(), + ), + }); + } + Ok(()) + } + + pub async fn ipv4_nat_find_by_values( + &self, + opctx: &OpContext, + values: Ipv4NatValues, + ) -> LookupResult { + use db::schema::ipv4_nat_entry::dsl; + let result = dsl::ipv4_nat_entry + .filter(dsl::external_address.eq(values.external_address)) + .filter(dsl::first_port.eq(values.first_port)) + .filter(dsl::last_port.eq(values.last_port)) + .filter(dsl::mac.eq(values.mac)) + .filter(dsl::sled_address.eq(values.sled_address)) + .filter(dsl::vni.eq(values.vni)) + .filter(dsl::time_deleted.is_null()) + .select(Ipv4NatEntry::as_select()) + .limit(1) + .load_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + + if let Some(nat_entry) = result.first() { + Ok(nat_entry.clone()) + } else { + Err(Error::InvalidRequest { + message: "no matching records".to_string(), + }) + } + } + + pub async fn ipv4_nat_list_since_version( + &self, + opctx: &OpContext, + version: i64, + limit: u32, + ) -> ListResultVec { + use db::schema::ipv4_nat_entry::dsl; + + let list = dsl::ipv4_nat_entry + .filter( + dsl::version_added + .gt(version) + .or(dsl::version_removed.gt(version)), + ) + .limit(limit as i64) + .select(Ipv4NatEntry::as_select()) + .load_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + + Ok(list) + } + + pub async fn ipv4_nat_changeset( + &self, + opctx: &OpContext, + version: i64, + limit: u32, + ) -> ListResultVec { + let nat_entries = + self.ipv4_nat_list_since_version(opctx, version, limit).await?; + let nat_entries: Vec = + nat_entries.iter().map(|e| e.clone().into()).collect(); + Ok(nat_entries) + } + + pub async fn ipv4_nat_current_version( + &self, + opctx: &OpContext, + ) -> LookupResult { + use db::schema::ipv4_nat_version::dsl; + + let latest: Option = dsl::ipv4_nat_version + .select(diesel::dsl::max(dsl::last_value)) + .first_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + + match latest { + Some(value) => Ok(value), + None => Err(Error::InvalidRequest { + message: "sequence table is empty!".to_string(), + }), + } + } + + pub async fn ipv4_nat_cleanup( + &self, + opctx: &OpContext, + version: i64, + before_timestamp: DateTime, + ) -> DeleteResult { + use db::schema::ipv4_nat_entry::dsl; + + diesel::delete(dsl::ipv4_nat_entry) + .filter(dsl::version_removed.lt(version)) + .filter(dsl::time_deleted.lt(before_timestamp)) + .execute_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + + Ok(()) + } +} + +fn ipv4_nat_next_version() -> diesel::expression::SqlLiteral { + diesel::dsl::sql::("nextval('omicron.public.ipv4_nat_version')") +} + +#[cfg(test)] +mod test { + use std::str::FromStr; + + use crate::db::datastore::datastore_test; + use chrono::Utc; + use nexus_db_model::{Ipv4NatValues, MacAddr, Vni}; + use nexus_test_utils::db::test_setup_database; + use omicron_common::api::external; + use omicron_test_utils::dev; + + // Test our ability to track additions and deletions since a given version number + #[tokio::test] + async fn nat_version_tracking() { + let logctx = dev::test_setup_log("test_nat_version_tracking"); + let mut db = test_setup_database(&logctx.log).await; + let (opctx, datastore) = datastore_test(&logctx, &db).await; + + // We should not have any NAT entries at this moment + let initial_state = + datastore.ipv4_nat_list_since_version(&opctx, 0, 10).await.unwrap(); + + assert!(initial_state.is_empty()); + assert_eq!( + datastore.ipv4_nat_current_version(&opctx).await.unwrap(), + 0 + ); + + // Each change (creation / deletion) to the NAT table should increment the + // version number of the row in the NAT table + let external_address = external::Ipv4Net( + ipnetwork::Ipv4Network::try_from("10.0.0.100").unwrap(), + ); + + let sled_address = external::Ipv6Net( + ipnetwork::Ipv6Network::try_from("fd00:1122:3344:104::1").unwrap(), + ); + + // Add a nat entry. + let nat1 = Ipv4NatValues { + external_address: external_address.into(), + first_port: 0.into(), + last_port: 999.into(), + sled_address: sled_address.into(), + vni: Vni(external::Vni::random()), + mac: MacAddr( + external::MacAddr::from_str("A8:40:25:F5:EB:2A").unwrap(), + ), + }; + + datastore.ensure_ipv4_nat_entry(&opctx, nat1.clone()).await.unwrap(); + let first_entry = + datastore.ipv4_nat_find_by_values(&opctx, nat1).await.unwrap(); + + let nat_entries = + datastore.ipv4_nat_list_since_version(&opctx, 0, 10).await.unwrap(); + + // The NAT table has undergone one change. One entry has been added, + // none deleted, so we should be at version 1. + assert_eq!(nat_entries.len(), 1); + assert_eq!(nat_entries.last().unwrap().version_added, 1); + assert_eq!( + datastore.ipv4_nat_current_version(&opctx).await.unwrap(), + 1 + ); + + // Add another nat entry. + let nat2 = Ipv4NatValues { + external_address: external_address.into(), + first_port: 1000.into(), + last_port: 1999.into(), + sled_address: sled_address.into(), + vni: Vni(external::Vni::random()), + mac: MacAddr( + external::MacAddr::from_str("A8:40:25:F5:EB:2B").unwrap(), + ), + }; + + datastore.ensure_ipv4_nat_entry(&opctx, nat2).await.unwrap(); + + let nat_entries = + datastore.ipv4_nat_list_since_version(&opctx, 0, 10).await.unwrap(); + + // The NAT table has undergone two changes. Two entries have been + // added, none deleted, so we should be at version 2. + let nat_entry = + nat_entries.iter().find(|e| e.version_added == 2).unwrap(); + assert_eq!(nat_entries.len(), 2); + assert_eq!(nat_entry.version_added, 2); + assert_eq!( + datastore.ipv4_nat_current_version(&opctx).await.unwrap(), + 2 + ); + + // Test Cleanup logic + // Cleanup should only perma-delete entries that are older than a + // specified version number and whose `time_deleted` field is + // older than a specified age. + let time_cutoff = Utc::now(); + datastore.ipv4_nat_cleanup(&opctx, 2, time_cutoff).await.unwrap(); + + // Nothing should have changed (no records currently marked for deletion) + let nat_entries = + datastore.ipv4_nat_list_since_version(&opctx, 0, 10).await.unwrap(); + + assert_eq!(nat_entries.len(), 2); + assert_eq!( + datastore.ipv4_nat_current_version(&opctx).await.unwrap(), + 2 + ); + + // Delete the first nat entry. It should show up as a later version number. + datastore.ipv4_nat_delete(&opctx, &first_entry).await.unwrap(); + let nat_entries = + datastore.ipv4_nat_list_since_version(&opctx, 0, 10).await.unwrap(); + + // The NAT table has undergone three changes. Two entries have been + // added, one deleted, so we should be at version 3. Since the + // first entry was marked for deletion (and it was the third change), + // the first entry's version number should now be 3. + let nat_entry = + nat_entries.iter().find(|e| e.version_removed.is_some()).unwrap(); + assert_eq!(nat_entries.len(), 2); + assert_eq!(nat_entry.version_removed, Some(3)); + assert_eq!(nat_entry.id, first_entry.id); + assert_eq!( + datastore.ipv4_nat_current_version(&opctx).await.unwrap(), + 3 + ); + + // Try cleaning up with the old version and time cutoff values + datastore.ipv4_nat_cleanup(&opctx, 2, time_cutoff).await.unwrap(); + + // Try cleaning up with a greater version and old time cutoff values + datastore.ipv4_nat_cleanup(&opctx, 6, time_cutoff).await.unwrap(); + + // Try cleaning up with a older version and newer time cutoff values + datastore.ipv4_nat_cleanup(&opctx, 2, Utc::now()).await.unwrap(); + + // Both records should still exist (soft deleted record is newer than cutoff + // values ) + let nat_entries = + datastore.ipv4_nat_list_since_version(&opctx, 0, 10).await.unwrap(); + + assert_eq!(nat_entries.len(), 2); + assert_eq!( + datastore.ipv4_nat_current_version(&opctx).await.unwrap(), + 3 + ); + + // Try cleaning up with a both cutoff values increased + datastore.ipv4_nat_cleanup(&opctx, 4, Utc::now()).await.unwrap(); + + // Soft deleted NAT entry should be removed from the table + let nat_entries = + datastore.ipv4_nat_list_since_version(&opctx, 0, 10).await.unwrap(); + + assert_eq!(nat_entries.len(), 1); + + // version should be unchanged + assert_eq!( + datastore.ipv4_nat_current_version(&opctx).await.unwrap(), + 3 + ); + + db.cleanup().await.unwrap(); + logctx.cleanup_successful(); + } +} diff --git a/nexus/db-queries/src/db/datastore/mod.rs b/nexus/db-queries/src/db/datastore/mod.rs index 91373f6875..7385970fb1 100644 --- a/nexus/db-queries/src/db/datastore/mod.rs +++ b/nexus/db-queries/src/db/datastore/mod.rs @@ -63,6 +63,7 @@ mod image; mod instance; mod inventory; mod ip_pool; +mod ipv4_nat_entry; mod network_interface; mod oximeter; mod physical_disk; diff --git a/nexus/examples/config.toml b/nexus/examples/config.toml index efc9aa9c27..3679fa8196 100644 --- a/nexus/examples/config.toml +++ b/nexus/examples/config.toml @@ -92,6 +92,7 @@ dns_external.max_concurrent_server_updates = 5 # certificates it will take _other_ Nexus instances to notice and stop serving # them (on a sunny day). external_endpoints.period_secs = 60 +nat_cleanup.period_secs = 30 # How frequently to collect hardware/software inventory from the whole system # (even if we don't have reason to believe anything has changed). inventory.period_secs = 600 diff --git a/nexus/src/app/background/init.rs b/nexus/src/app/background/init.rs index b000dd9bda..d27248ffdc 100644 --- a/nexus/src/app/background/init.rs +++ b/nexus/src/app/background/init.rs @@ -10,12 +10,15 @@ use super::dns_propagation; use super::dns_servers; use super::external_endpoints; use super::inventory_collection; +use super::nat_cleanup; use nexus_db_model::DnsGroup; use nexus_db_queries::context::OpContext; use nexus_db_queries::db::DataStore; +use omicron_common::api::internal::shared::SwitchLocation; use omicron_common::nexus_config::BackgroundTaskConfig; use omicron_common::nexus_config::DnsTasksConfig; use std::collections::BTreeMap; +use std::collections::HashMap; use std::sync::Arc; use uuid::Uuid; @@ -44,6 +47,8 @@ pub struct BackgroundTasks { pub external_endpoints: tokio::sync::watch::Receiver< Option, >, + /// task handle for the ipv4 nat entry garbage collector + pub nat_cleanup: common::TaskHandle, /// task handle for the task that collects inventory pub task_inventory_collection: common::TaskHandle, @@ -55,6 +60,7 @@ impl BackgroundTasks { opctx: &OpContext, datastore: Arc, config: &BackgroundTaskConfig, + dpd_clients: &HashMap>, nexus_id: Uuid, resolver: internal_dns::resolver::Resolver, ) -> BackgroundTasks { @@ -96,6 +102,23 @@ impl BackgroundTasks { (task, watcher_channel) }; + let nat_cleanup = { + driver.register( + "nat_v4_garbage_collector".to_string(), + String::from( + "prunes soft-deleted IPV4 NAT entries from ipv4_nat_entry table \ + based on a predetermined retention policy", + ), + config.nat_cleanup.period_secs, + Box::new(nat_cleanup::Ipv4NatGarbageCollector::new( + datastore.clone(), + dpd_clients.values().map(|client| client.clone()).collect(), + )), + opctx.child(BTreeMap::new()), + vec![], + ) + }; + // Background task: inventory collector let task_inventory_collection = { let collector = inventory_collection::InventoryCollector::new( @@ -128,6 +151,7 @@ impl BackgroundTasks { task_external_dns_servers, task_external_endpoints, external_endpoints, + nat_cleanup, task_inventory_collection, } } diff --git a/nexus/src/app/background/mod.rs b/nexus/src/app/background/mod.rs index e1f474b41a..954207cb3c 100644 --- a/nexus/src/app/background/mod.rs +++ b/nexus/src/app/background/mod.rs @@ -11,6 +11,7 @@ mod dns_servers; mod external_endpoints; mod init; mod inventory_collection; +mod nat_cleanup; mod status; pub use common::Driver; diff --git a/nexus/src/app/background/nat_cleanup.rs b/nexus/src/app/background/nat_cleanup.rs new file mode 100644 index 0000000000..1691d96a4b --- /dev/null +++ b/nexus/src/app/background/nat_cleanup.rs @@ -0,0 +1,111 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Background task for garbage collecting ipv4_nat_entry table. +//! Responsible for cleaning up soft deleted entries once they +//! have been propagated to running dpd instances. + +use super::common::BackgroundTask; +use chrono::{Duration, Utc}; +use futures::future::BoxFuture; +use futures::FutureExt; +use nexus_db_queries::context::OpContext; +use nexus_db_queries::db::DataStore; +use serde_json::json; +use std::sync::Arc; + +/// Background task that periodically prunes soft-deleted entries +/// from ipv4_nat_entry table +pub struct Ipv4NatGarbageCollector { + datastore: Arc, + dpd_clients: Vec>, +} + +impl Ipv4NatGarbageCollector { + pub fn new( + datastore: Arc, + dpd_clients: Vec>, + ) -> Ipv4NatGarbageCollector { + Ipv4NatGarbageCollector { datastore, dpd_clients } + } +} + +impl BackgroundTask for Ipv4NatGarbageCollector { + fn activate<'a, 'b, 'c>( + &'a mut self, + opctx: &'b OpContext, + ) -> BoxFuture<'c, serde_json::Value> + where + 'a: 'c, + 'b: 'c, + { + async { + let log = &opctx.log; + + let result = self.datastore.ipv4_nat_current_version(opctx).await; + + let mut min_gen = match result { + Ok(gen) => gen, + Err(error) => { + warn!( + &log, + "failed to read generation of database"; + "error" => format!("{:#}", error) + ); + return json!({ + "error": + format!( + "failed to read generation of database: \ + {:#}", + error + ) + }); + } + }; + + for client in &self.dpd_clients { + let response = client.ipv4_nat_generation().await; + match response { + Ok(gen) => min_gen = std::cmp::min(min_gen, *gen), + Err(error) => { + warn!( + &log, + "failed to read generation of dpd"; + "error" => format!("{:#}", error) + ); + return json!({ + "error": + format!( + "failed to read generation of dpd: \ + {:#}", + error + ) + }); + } + } + } + + let retention_threshold = Utc::now() - Duration::weeks(2); + + let result = self + .datastore + .ipv4_nat_cleanup(opctx, min_gen, retention_threshold) + .await + .unwrap(); + + let rv = serde_json::to_value(&result).unwrap_or_else(|error| { + json!({ + "error": + format!( + "failed to serialize final value: {:#}", + error + ) + }) + }); + + rv + } + .boxed() + } +} diff --git a/nexus/src/app/instance_network.rs b/nexus/src/app/instance_network.rs index 0f52cbd260..abb8c744e1 100644 --- a/nexus/src/app/instance_network.rs +++ b/nexus/src/app/instance_network.rs @@ -5,6 +5,10 @@ //! Routines that manage instance-related networking state. use crate::app::sagas::retry_until_known_result; +use ipnetwork::IpNetwork; +use ipnetwork::Ipv6Network; +use nexus_db_model::Ipv4NatValues; +use nexus_db_model::Vni as DbVni; use nexus_db_queries::authz; use nexus_db_queries::context::OpContext; use nexus_db_queries::db; @@ -12,6 +16,8 @@ use nexus_db_queries::db::identity::Asset; use nexus_db_queries::db::lookup::LookupPath; use omicron_common::api::external::DataPageParams; use omicron_common::api::external::Error; +use omicron_common::api::external::Ipv4Net; +use omicron_common::api::external::Ipv6Net; use omicron_common::api::internal::nexus; use omicron_common::api::internal::shared::SwitchLocation; use sled_agent_client::types::DeleteVirtualNetworkInterfaceHost; @@ -330,8 +336,6 @@ impl super::Nexus { )) })?; - let vni: u32 = network_interface.vni.into(); - info!(log, "looking up instance's external IPs"; "instance_id" => %instance_id); @@ -349,6 +353,9 @@ impl super::Nexus { } } + let sled_address = + Ipv6Net(Ipv6Network::new(*sled_ip_address.ip(), 128).unwrap()); + for target_ip in ips .iter() .enumerate() @@ -361,29 +368,58 @@ impl super::Nexus { }) .map(|(_, ip)| ip) { - retry_until_known_result(log, || async { - dpd_client - .ensure_nat_entry( - &log, - target_ip.ip, - dpd_client::types::MacAddr { - a: mac_address.into_array(), - }, - *target_ip.first_port, - *target_ip.last_port, - vni, - sled_ip_address.ip(), - ) - .await - }) - .await - .map_err(|e| { - Error::internal_error(&format!( - "failed to ensure dpd entry: {e}" - )) - })?; + // For each external ip, add a nat entry to the database + self.ensure_nat_entry( + target_ip, + sled_address, + &network_interface, + mac_address, + opctx, + ) + .await?; } + // Notify dendrite that there are changes for it to reconcile. + // In the event of a failure to notify dendrite, we'll log an error + // and rely on dendrite's RPW timer to catch it up. + if let Err(e) = dpd_client.ipv4_nat_trigger_update().await { + error!(self.log, "failed to notify dendrite of nat updates"; "error" => ?e); + }; + + Ok(()) + } + + async fn ensure_nat_entry( + &self, + target_ip: &nexus_db_model::ExternalIp, + sled_address: Ipv6Net, + network_interface: &sled_agent_client::types::NetworkInterface, + mac_address: macaddr::MacAddr6, + opctx: &OpContext, + ) -> Result<(), Error> { + match target_ip.ip { + IpNetwork::V4(v4net) => { + let nat_entry = Ipv4NatValues { + external_address: Ipv4Net(v4net).into(), + first_port: target_ip.first_port, + last_port: target_ip.last_port, + sled_address: sled_address.into(), + vni: DbVni(network_interface.vni.clone().into()), + mac: nexus_db_model::MacAddr( + omicron_common::api::external::MacAddr(mac_address), + ), + }; + self.db_datastore + .ensure_ipv4_nat_entry(opctx, nat_entry) + .await?; + } + IpNetwork::V6(_v6net) => { + // TODO: implement handling of v6 nat. + return Err(Error::InternalError { + internal_message: "ipv6 nat is not yet implemented".into(), + }); + } + }; Ok(()) } @@ -419,55 +455,54 @@ impl super::Nexus { let mut errors = vec![]; for entry in external_ips { - for switch in &boundary_switches { - debug!(log, "deleting instance nat mapping"; - "instance_id" => %instance_id, - "switch" => switch.to_string(), - "entry" => #?entry); - - let client_result = - self.dpd_clients.get(switch).ok_or_else(|| { - Error::internal_error(&format!( - "unable to find dendrite client for {switch}" - )) - }); - - let dpd_client = match client_result { - Ok(client) => client, - Err(new_error) => { - errors.push(new_error); - continue; + // Soft delete the NAT entry + match self + .db_datastore + .ipv4_nat_delete_by_external_ip(&opctx, &entry) + .await + { + Ok(_) => Ok(()), + Err(err) => match err { + Error::ObjectNotFound { .. } => { + warn!(log, "no matching nat entries to soft delete"); + Ok(()) } - }; + _ => { + let message = format!( + "failed to delete nat entry due to error: {err:?}" + ); + error!(log, "{}", message); + Err(Error::internal_error(&message)) + } + }, + }?; + } - let result = retry_until_known_result(log, || async { - dpd_client - .ensure_nat_entry_deleted( - log, - entry.ip, - *entry.first_port, - ) - .await - }) - .await; - - if let Err(e) = result { - let e = Error::internal_error(&format!( - "failed to delete nat entry via dpd: {e}" - )); - - error!(log, "error deleting nat mapping: {e:#?}"; - "instance_id" => %instance_id, - "switch" => switch.to_string(), - "entry" => #?entry); - errors.push(e); - } else { - debug!(log, "deleting nat mapping successful"; - "instance_id" => %instance_id, - "switch" => switch.to_string(), - "entry" => #?entry); + for switch in &boundary_switches { + debug!(&self.log, "notifying dendrite of updates"; + "instance_id" => %authz_instance.id(), + "switch" => switch.to_string()); + + let client_result = self.dpd_clients.get(switch).ok_or_else(|| { + Error::internal_error(&format!( + "unable to find dendrite client for {switch}" + )) + }); + + let dpd_client = match client_result { + Ok(client) => client, + Err(new_error) => { + errors.push(new_error); + continue; } - } + }; + + // Notify dendrite that there are changes for it to reconcile. + // In the event of a failure to notify dendrite, we'll log an error + // and rely on dendrite's RPW timer to catch it up. + if let Err(e) = dpd_client.ipv4_nat_trigger_update().await { + error!(self.log, "failed to notify dendrite of nat updates"; "error" => ?e); + }; } if let Some(e) = errors.into_iter().nth(0) { @@ -496,32 +531,48 @@ impl super::Nexus { let boundary_switches = self.boundary_switches(opctx).await?; for external_ip in external_ips { - for switch in &boundary_switches { - debug!(&self.log, "deleting instance nat mapping"; + match self + .db_datastore + .ipv4_nat_delete_by_external_ip(&opctx, &external_ip) + .await + { + Ok(_) => Ok(()), + Err(err) => match err { + Error::ObjectNotFound { .. } => { + warn!( + self.log, + "no matching nat entries to soft delete" + ); + Ok(()) + } + _ => { + let message = format!( + "failed to delete nat entry due to error: {err:?}" + ); + error!(self.log, "{}", message); + Err(Error::internal_error(&message)) + } + }, + }?; + } + + for switch in &boundary_switches { + debug!(&self.log, "notifying dendrite of updates"; "instance_id" => %authz_instance.id(), - "switch" => switch.to_string(), - "entry" => #?external_ip); - - let dpd_client = - self.dpd_clients.get(switch).ok_or_else(|| { - Error::internal_error(&format!( - "unable to find dendrite client for {switch}" - )) - })?; - - dpd_client - .ensure_nat_entry_deleted( - &self.log, - external_ip.ip, - *external_ip.first_port, - ) - .await - .map_err(|e| { - Error::internal_error(&format!( - "failed to delete nat entry via dpd: {e}" - )) - })?; - } + "switch" => switch.to_string()); + + let dpd_client = self.dpd_clients.get(switch).ok_or_else(|| { + Error::internal_error(&format!( + "unable to find dendrite client for {switch}" + )) + })?; + + // Notify dendrite that there are changes for it to reconcile. + // In the event of a failure to notify dendrite, we'll log an error + // and rely on dendrite's RPW timer to catch it up. + if let Err(e) = dpd_client.ipv4_nat_trigger_update().await { + error!(self.log, "failed to notify dendrite of nat updates"; "error" => ?e); + }; } Ok(()) diff --git a/nexus/src/app/mod.rs b/nexus/src/app/mod.rs index ef8132451a..18c9dae841 100644 --- a/nexus/src/app/mod.rs +++ b/nexus/src/app/mod.rs @@ -349,6 +349,7 @@ impl Nexus { &background_ctx, Arc::clone(&db_datastore), &config.pkg.background_tasks, + &dpd_clients, config.deployment.id, resolver.clone(), ); diff --git a/nexus/src/internal_api/http_entrypoints.rs b/nexus/src/internal_api/http_entrypoints.rs index ebb21feb40..9a20911893 100644 --- a/nexus/src/internal_api/http_entrypoints.rs +++ b/nexus/src/internal_api/http_entrypoints.rs @@ -24,6 +24,7 @@ use dropshot::RequestContext; use dropshot::ResultsPage; use dropshot::TypedBody; use hyper::Body; +use nexus_db_model::Ipv4NatEntryView; use nexus_types::internal_api::params::SwitchPutRequest; use nexus_types::internal_api::params::SwitchPutResponse; use nexus_types::internal_api::views::to_list; @@ -68,6 +69,8 @@ pub(crate) fn internal_api() -> NexusApiDescription { api.register(saga_list)?; api.register(saga_view)?; + api.register(ipv4_nat_changeset)?; + api.register(bgtask_list)?; api.register(bgtask_view)?; @@ -540,3 +543,51 @@ async fn bgtask_view( }; apictx.internal_latencies.instrument_dropshot_handler(&rqctx, handler).await } + +// NAT RPW internal APIs + +/// Path parameters for NAT ChangeSet +#[derive(Deserialize, JsonSchema)] +struct RpwNatPathParam { + /// which change number to start generating + /// the change set from + from_gen: i64, +} + +/// Query parameters for NAT ChangeSet +#[derive(Deserialize, JsonSchema)] +struct RpwNatQueryParam { + limit: u32, +} + +/// Fetch NAT ChangeSet +/// +/// Caller provides their generation as `from_gen`, along with a query +/// parameter for the page size (`limit`). Endpoint will return changes +/// that have occured since the caller's generation number up to the latest +/// change or until the `limit` is reached. If there are no changes, an +/// empty vec is returned. +#[endpoint { + method = GET, + path = "/nat/ipv4/changeset/{from_gen}" +}] +async fn ipv4_nat_changeset( + rqctx: RequestContext>, + path_params: Path, + query_params: Query, +) -> Result>, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let opctx = crate::context::op_context_for_internal_api(&rqctx).await; + let nexus = &apictx.nexus; + let path = path_params.into_inner(); + let query = query_params.into_inner(); + let mut changeset = nexus + .datastore() + .ipv4_nat_changeset(&opctx, path.from_gen, query.limit) + .await?; + changeset.sort_by_key(|e| e.gen); + Ok(HttpResponseOk(changeset)) + }; + apictx.internal_latencies.instrument_dropshot_handler(&rqctx, handler).await +} diff --git a/nexus/tests/config.test.toml b/nexus/tests/config.test.toml index 54f7e03eef..fbed9aed8e 100644 --- a/nexus/tests/config.test.toml +++ b/nexus/tests/config.test.toml @@ -90,6 +90,7 @@ dns_external.max_concurrent_server_updates = 5 # certificates it will take _other_ Nexus instances to notice and stop serving # them (on a sunny day). external_endpoints.period_secs = 60 +nat_cleanup.period_secs = 30 # How frequently to collect hardware/software inventory from the whole system # (even if we don't have reason to believe anything has changed). inventory.period_secs = 600 diff --git a/openapi/nexus-internal.json b/openapi/nexus-internal.json index f83cf68a8a..fcb285d9eb 100644 --- a/openapi/nexus-internal.json +++ b/openapi/nexus-internal.json @@ -323,6 +323,57 @@ } } }, + "/nat/ipv4/changeset/{from_gen}": { + "get": { + "summary": "Fetch NAT ChangeSet", + "description": "Caller provides their generation as `from_gen`, along with a query parameter for the page size (`limit`). Endpoint will return changes that have occured since the caller's generation number up to the latest change or until the `limit` is reached. If there are no changes, an empty vec is returned.", + "operationId": "ipv4_nat_changeset", + "parameters": [ + { + "in": "path", + "name": "from_gen", + "description": "which change number to start generating the change set from", + "required": true, + "schema": { + "type": "integer", + "format": "int64" + } + }, + { + "in": "query", + "name": "limit", + "required": true, + "schema": { + "type": "integer", + "format": "uint32", + "minimum": 0 + } + } + ], + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "title": "Array_of_Ipv4NatEntryView", + "type": "array", + "items": { + "$ref": "#/components/schemas/Ipv4NatEntryView" + } + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, "/physical-disk": { "put": { "summary": "Report that a physical disk for the specified sled has come online.", @@ -3763,6 +3814,53 @@ } ] }, + "Ipv4NatEntryView": { + "description": "NAT Record", + "type": "object", + "properties": { + "deleted": { + "type": "boolean" + }, + "external_address": { + "type": "string", + "format": "ipv4" + }, + "first_port": { + "type": "integer", + "format": "uint16", + "minimum": 0 + }, + "gen": { + "type": "integer", + "format": "int64" + }, + "last_port": { + "type": "integer", + "format": "uint16", + "minimum": 0 + }, + "mac": { + "$ref": "#/components/schemas/MacAddr" + }, + "sled_address": { + "type": "string", + "format": "ipv6" + }, + "vni": { + "$ref": "#/components/schemas/Vni" + } + }, + "required": [ + "deleted", + "external_address", + "first_port", + "gen", + "last_port", + "mac", + "sled_address", + "vni" + ] + }, "Ipv4Network": { "type": "string", "pattern": "^((25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\/(3[0-2]|[0-2]?[0-9])$" @@ -5335,6 +5433,12 @@ "time_updated" ] }, + "Vni": { + "description": "A Geneve Virtual Network Identifier", + "type": "integer", + "format": "uint32", + "minimum": 0 + }, "ZpoolPutRequest": { "description": "Sent by a sled agent on startup to Nexus to request further instruction", "type": "object", diff --git a/package-manifest.toml b/package-manifest.toml index c65c5b6933..37069bff7d 100644 --- a/package-manifest.toml +++ b/package-manifest.toml @@ -476,8 +476,8 @@ only_for_targets.image = "standard" # 2. Copy dendrite.tar.gz from dendrite/out to omicron/out source.type = "prebuilt" source.repo = "dendrite" -source.commit = "147b03901aa8305b5271e0133a09f628b8140949" -source.sha256 = "14fe7f904f963b50188d6e060106b63df6d061ca64238f7b21623c432b5944e3" +source.commit = "8ff834e7d0a6adb263240edd40537f2c0768f1a4" +source.sha256 = "c00e79f55e0bdf048069b2d18a4d009ddfef46e7e5d846887cf96e843a8884bd" output.type = "zone" output.intermediate_only = true @@ -501,8 +501,8 @@ only_for_targets.image = "standard" # 2. Copy the output zone image from dendrite/out to omicron/out source.type = "prebuilt" source.repo = "dendrite" -source.commit = "147b03901aa8305b5271e0133a09f628b8140949" -source.sha256 = "f3aa685e4096f8f6e2ea6c169f391dbb88707abcbf1d2bde29163d81736e8ec6" +source.commit = "8ff834e7d0a6adb263240edd40537f2c0768f1a4" +source.sha256 = "428cce1e9aa399b1b49c04e7fd0bc1cb0e3f3fae6fda96055892a42e010c9d6f" output.type = "zone" output.intermediate_only = true @@ -519,8 +519,8 @@ only_for_targets.image = "standard" # 2. Copy dendrite.tar.gz from dendrite/out to omicron/out/dendrite-softnpu.tar.gz source.type = "prebuilt" source.repo = "dendrite" -source.commit = "147b03901aa8305b5271e0133a09f628b8140949" -source.sha256 = "dece729ce4127216fba48e9cfed90ec2e5a57ee4ca6c4afc5fa770de6ea636bf" +source.commit = "8ff834e7d0a6adb263240edd40537f2c0768f1a4" +source.sha256 = "5dd3534bec5eb4f857d0bf3994b26650288f650d409eec6aaa29860a2f481c37" output.type = "zone" output.intermediate_only = true diff --git a/schema/crdb/10.0.0/README.md b/schema/crdb/11.0.0/README.md similarity index 100% rename from schema/crdb/10.0.0/README.md rename to schema/crdb/11.0.0/README.md diff --git a/schema/crdb/10.0.0/up01.sql b/schema/crdb/11.0.0/up01.sql similarity index 100% rename from schema/crdb/10.0.0/up01.sql rename to schema/crdb/11.0.0/up01.sql diff --git a/schema/crdb/10.0.0/up02.sql b/schema/crdb/11.0.0/up02.sql similarity index 100% rename from schema/crdb/10.0.0/up02.sql rename to schema/crdb/11.0.0/up02.sql diff --git a/schema/crdb/10.0.0/up03.sql b/schema/crdb/11.0.0/up03.sql similarity index 100% rename from schema/crdb/10.0.0/up03.sql rename to schema/crdb/11.0.0/up03.sql diff --git a/schema/crdb/10.0.0/up04.sql b/schema/crdb/11.0.0/up04.sql similarity index 100% rename from schema/crdb/10.0.0/up04.sql rename to schema/crdb/11.0.0/up04.sql diff --git a/schema/crdb/10.0.0/up05.sql b/schema/crdb/11.0.0/up05.sql similarity index 100% rename from schema/crdb/10.0.0/up05.sql rename to schema/crdb/11.0.0/up05.sql diff --git a/schema/crdb/10.0.0/up06.sql b/schema/crdb/11.0.0/up06.sql similarity index 100% rename from schema/crdb/10.0.0/up06.sql rename to schema/crdb/11.0.0/up06.sql diff --git a/schema/crdb/10.0.0/up07.sql b/schema/crdb/11.0.0/up07.sql similarity index 100% rename from schema/crdb/10.0.0/up07.sql rename to schema/crdb/11.0.0/up07.sql diff --git a/schema/crdb/10.0.0/up08.sql b/schema/crdb/11.0.0/up08.sql similarity index 100% rename from schema/crdb/10.0.0/up08.sql rename to schema/crdb/11.0.0/up08.sql diff --git a/schema/crdb/10.0.0/up09.sql b/schema/crdb/11.0.0/up09.sql similarity index 100% rename from schema/crdb/10.0.0/up09.sql rename to schema/crdb/11.0.0/up09.sql diff --git a/schema/crdb/11.0.0/up1.sql b/schema/crdb/11.0.0/up1.sql new file mode 100644 index 0000000000..a4d31edd71 --- /dev/null +++ b/schema/crdb/11.0.0/up1.sql @@ -0,0 +1 @@ +CREATE SEQUENCE IF NOT EXISTS omicron.public.ipv4_nat_version START 1 INCREMENT 1; diff --git a/schema/crdb/10.0.0/up10.sql b/schema/crdb/11.0.0/up10.sql similarity index 100% rename from schema/crdb/10.0.0/up10.sql rename to schema/crdb/11.0.0/up10.sql diff --git a/schema/crdb/10.0.0/up11.sql b/schema/crdb/11.0.0/up11.sql similarity index 100% rename from schema/crdb/10.0.0/up11.sql rename to schema/crdb/11.0.0/up11.sql diff --git a/schema/crdb/10.0.0/up12.sql b/schema/crdb/11.0.0/up12.sql similarity index 100% rename from schema/crdb/10.0.0/up12.sql rename to schema/crdb/11.0.0/up12.sql diff --git a/schema/crdb/11.0.0/up2.sql b/schema/crdb/11.0.0/up2.sql new file mode 100644 index 0000000000..b92d4c73d3 --- /dev/null +++ b/schema/crdb/11.0.0/up2.sql @@ -0,0 +1,13 @@ +CREATE TABLE IF NOT EXISTS omicron.public.ipv4_nat_entry ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + external_address INET NOT NULL, + first_port INT4 NOT NULL, + last_port INT4 NOT NULL, + sled_address INET NOT NULL, + vni INT4 NOT NULL, + mac INT8 NOT NULL, + version_added INT8 NOT NULL DEFAULT nextval('omicron.public.ipv4_nat_version'), + version_removed INT8, + time_created TIMESTAMPTZ NOT NULL DEFAULT now(), + time_deleted TIMESTAMPTZ +); diff --git a/schema/crdb/11.0.0/up3.sql b/schema/crdb/11.0.0/up3.sql new file mode 100644 index 0000000000..1247aad693 --- /dev/null +++ b/schema/crdb/11.0.0/up3.sql @@ -0,0 +1,13 @@ +CREATE UNIQUE INDEX IF NOT EXISTS ipv4_nat_version_added ON omicron.public.ipv4_nat_entry ( + version_added +) +STORING ( + external_address, + first_port, + last_port, + sled_address, + vni, + mac, + time_created, + time_deleted +); diff --git a/schema/crdb/11.0.0/up4.sql b/schema/crdb/11.0.0/up4.sql new file mode 100644 index 0000000000..b9cfe305d2 --- /dev/null +++ b/schema/crdb/11.0.0/up4.sql @@ -0,0 +1,5 @@ +CREATE UNIQUE INDEX IF NOT EXISTS overlapping_ipv4_nat_entry ON omicron.public.ipv4_nat_entry ( + external_address, + first_port, + last_port +) WHERE time_deleted IS NULL; diff --git a/schema/crdb/11.0.0/up5.sql b/schema/crdb/11.0.0/up5.sql new file mode 100644 index 0000000000..dce2211eae --- /dev/null +++ b/schema/crdb/11.0.0/up5.sql @@ -0,0 +1 @@ +CREATE INDEX IF NOT EXISTS ipv4_nat_lookup ON omicron.public.ipv4_nat_entry (external_address, first_port, last_port, sled_address, vni, mac); diff --git a/schema/crdb/11.0.0/up6.sql b/schema/crdb/11.0.0/up6.sql new file mode 100644 index 0000000000..e4958eb352 --- /dev/null +++ b/schema/crdb/11.0.0/up6.sql @@ -0,0 +1,13 @@ +CREATE UNIQUE INDEX IF NOT EXISTS ipv4_nat_version_removed ON omicron.public.ipv4_nat_entry ( + version_removed +) +STORING ( + external_address, + first_port, + last_port, + sled_address, + vni, + mac, + time_created, + time_deleted +); diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index 875877ee96..a74cabfe6e 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -2738,12 +2738,24 @@ CREATE TABLE IF NOT EXISTS omicron.public.inv_caboose ( COMMIT; BEGIN; -/*******************************************************************/ +CREATE TABLE IF NOT EXISTS omicron.public.db_metadata ( + -- There should only be one row of this table for the whole DB. + -- It's a little goofy, but filter on "singleton = true" before querying + -- or applying updates, and you'll access the singleton row. + -- + -- We also add a constraint on this table to ensure it's not possible to + -- access the version of this table with "singleton = false". + singleton BOOL NOT NULL PRIMARY KEY, + time_created TIMESTAMPTZ NOT NULL, + time_modified TIMESTAMPTZ NOT NULL, + -- Semver representation of the DB version + version STRING(64) NOT NULL, -/* - * Metadata for the schema itself. This version number isn't great, as there's - * nothing to ensure it gets bumped when it should be, but it's a start. - */ + -- (Optional) Semver representation of the DB version to which we're upgrading + target_version STRING(64), + + CHECK (singleton = true) +); -- Per-VMM state. CREATE TABLE IF NOT EXISTS omicron.public.vmm ( @@ -2812,6 +2824,62 @@ CREATE TYPE IF NOT EXISTS omicron.public.switch_link_speed AS ENUM ( ALTER TABLE omicron.public.switch_port_settings_link_config ADD COLUMN IF NOT EXISTS fec omicron.public.switch_link_fec; ALTER TABLE omicron.public.switch_port_settings_link_config ADD COLUMN IF NOT EXISTS speed omicron.public.switch_link_speed; +CREATE SEQUENCE IF NOT EXISTS omicron.public.ipv4_nat_version START 1 INCREMENT 1; + +CREATE TABLE IF NOT EXISTS omicron.public.ipv4_nat_entry ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + external_address INET NOT NULL, + first_port INT4 NOT NULL, + last_port INT4 NOT NULL, + sled_address INET NOT NULL, + vni INT4 NOT NULL, + mac INT8 NOT NULL, + version_added INT8 NOT NULL DEFAULT nextval('omicron.public.ipv4_nat_version'), + version_removed INT8, + time_created TIMESTAMPTZ NOT NULL DEFAULT now(), + time_deleted TIMESTAMPTZ +); + +CREATE UNIQUE INDEX IF NOT EXISTS ipv4_nat_version_added ON omicron.public.ipv4_nat_entry ( + version_added +) +STORING ( + external_address, + first_port, + last_port, + sled_address, + vni, + mac, + time_created, + time_deleted +); + +CREATE UNIQUE INDEX IF NOT EXISTS overlapping_ipv4_nat_entry ON omicron.public.ipv4_nat_entry ( + external_address, + first_port, + last_port +) WHERE time_deleted IS NULL; + +CREATE INDEX IF NOT EXISTS ipv4_nat_lookup ON omicron.public.ipv4_nat_entry (external_address, first_port, last_port, sled_address, vni, mac); + +CREATE UNIQUE INDEX IF NOT EXISTS ipv4_nat_version_removed ON omicron.public.ipv4_nat_entry ( + version_removed +) +STORING ( + external_address, + first_port, + last_port, + sled_address, + vni, + mac, + time_created, + time_deleted +); + +/* + * Metadata for the schema itself. This version number isn't great, as there's + * nothing to ensure it gets bumped when it should be, but it's a start. + */ CREATE TABLE IF NOT EXISTS omicron.public.db_metadata ( -- There should only be one row of this table for the whole DB. -- It's a little goofy, but filter on "singleton = true" before querying @@ -2838,7 +2906,7 @@ INSERT INTO omicron.public.db_metadata ( version, target_version ) VALUES - ( TRUE, NOW(), NOW(), '10.0.0', NULL) + ( TRUE, NOW(), NOW(), '11.0.0', NULL) ON CONFLICT DO NOTHING; COMMIT; diff --git a/smf/nexus/multi-sled/config-partial.toml b/smf/nexus/multi-sled/config-partial.toml index cae1f650c9..94c8f5572e 100644 --- a/smf/nexus/multi-sled/config-partial.toml +++ b/smf/nexus/multi-sled/config-partial.toml @@ -38,6 +38,7 @@ dns_external.max_concurrent_server_updates = 5 # certificates it will take _other_ Nexus instances to notice and stop serving # them (on a sunny day). external_endpoints.period_secs = 60 +nat_cleanup.period_secs = 30 # How frequently to collect hardware/software inventory from the whole system # (even if we don't have reason to believe anything has changed). inventory.period_secs = 600 diff --git a/smf/nexus/single-sled/config-partial.toml b/smf/nexus/single-sled/config-partial.toml index be8683be54..fcaa6176a8 100644 --- a/smf/nexus/single-sled/config-partial.toml +++ b/smf/nexus/single-sled/config-partial.toml @@ -38,6 +38,7 @@ dns_external.max_concurrent_server_updates = 5 # certificates it will take _other_ Nexus instances to notice and stop serving # them (on a sunny day). external_endpoints.period_secs = 60 +nat_cleanup.period_secs = 30 # How frequently to collect hardware/software inventory from the whole system # (even if we don't have reason to believe anything has changed). inventory.period_secs = 600 diff --git a/tools/dendrite_openapi_version b/tools/dendrite_openapi_version index aadf68da1b..ba4b5a5722 100644 --- a/tools/dendrite_openapi_version +++ b/tools/dendrite_openapi_version @@ -1,2 +1,2 @@ -COMMIT="147b03901aa8305b5271e0133a09f628b8140949" -SHA2="82437c74afd4894aa5b9ea800d5777793e8777fe87471321dd22ad1a1c9c9ef3" +COMMIT="8ff834e7d0a6adb263240edd40537f2c0768f1a4" +SHA2="07d115bfa8498a8015ca2a8447efeeac32e24aeb25baf3d5e2313216e11293c0" diff --git a/tools/dendrite_stub_checksums b/tools/dendrite_stub_checksums index 81a957323c..619a6bf287 100644 --- a/tools/dendrite_stub_checksums +++ b/tools/dendrite_stub_checksums @@ -1,3 +1,3 @@ -CIDL_SHA256_ILLUMOS="14fe7f904f963b50188d6e060106b63df6d061ca64238f7b21623c432b5944e3" -CIDL_SHA256_LINUX_DPD="fff6c7484bbb06aa644e3fe41b200e4f7f8d7f65d067cbecd851c834c15fe2ec" -CIDL_SHA256_LINUX_SWADM="0449383a57468aec3b5a4ad26962cfc9e9a121bd13e777329e8a70767e6d9aae" +CIDL_SHA256_ILLUMOS="c00e79f55e0bdf048069b2d18a4d009ddfef46e7e5d846887cf96e843a8884bd" +CIDL_SHA256_LINUX_DPD="b5d829b4628759ac374106f3c56c29074b29577fd0ff72f61c3b8289fea430fe" +CIDL_SHA256_LINUX_SWADM="afc68828f54dc57b32dc1556fc588baeab12341c30e96cc0fadb49f401b4b48f"