From f23e2a28ab23de9a8db3c5c8b9ea40631a2481af Mon Sep 17 00:00:00 2001 From: Levon Tarver Date: Mon, 31 Jul 2023 22:57:55 +0000 Subject: [PATCH] WIP: Initial DB Model for NAT RPW WIP: add cleanup method, add query for nat_gen sequence WIP: internal API endpoints begin replacing direct nat config calls with rpw --- Cargo.lock | 2 +- common/src/api/external/mod.rs | 1 + nexus/db-model/src/ipv4_nat_entry.rs | 116 ++++++ nexus/db-model/src/lib.rs | 2 + nexus/db-model/src/schema.rs | 27 +- .../src/db/datastore/ipv4_nat_entry.rs | 385 ++++++++++++++++++ nexus/db-queries/src/db/datastore/mod.rs | 1 + nexus/src/app/instance.rs | 49 +-- nexus/src/app/sagas/instance_create.rs | 6 + nexus/src/app/sagas/instance_delete.rs | 4 + nexus/src/internal_api/http_entrypoints.rs | 79 ++++ openapi/nexus-internal.json | 144 +++++++ package-manifest.toml | 12 +- schema/crdb/4.0.1/up.sql | 92 +++++ schema/crdb/dbinit.sql | 36 +- tools/dendrite_openapi_version | 4 +- tools/dendrite_stub_checksums | 6 +- 17 files changed, 928 insertions(+), 38 deletions(-) create mode 100644 nexus/db-model/src/ipv4_nat_entry.rs create mode 100644 nexus/db-queries/src/db/datastore/ipv4_nat_entry.rs create mode 100644 schema/crdb/4.0.1/up.sql diff --git a/Cargo.lock b/Cargo.lock index 2fdadbe0ea7..5545b69819a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6568,7 +6568,7 @@ dependencies = [ "indoc 2.0.3", "itertools 0.11.0", "paste", - "strum 0.25.0", + "strum", "unicode-segmentation", "unicode-width", ] diff --git a/common/src/api/external/mod.rs b/common/src/api/external/mod.rs index 06a06ea52a1..4e279081216 100644 --- a/common/src/api/external/mod.rs +++ b/common/src/api/external/mod.rs @@ -732,6 +732,7 @@ pub enum ResourceType { UpdateableComponent, UserBuiltin, Zpool, + Ipv4NatEntry, } // IDENTITY METADATA diff --git a/nexus/db-model/src/ipv4_nat_entry.rs b/nexus/db-model/src/ipv4_nat_entry.rs new file mode 100644 index 00000000000..f08bda8a1dc --- /dev/null +++ b/nexus/db-model/src/ipv4_nat_entry.rs @@ -0,0 +1,116 @@ +use std::net::{Ipv4Addr, Ipv6Addr}; + +use super::MacAddr; +use crate::{ + schema::{ipv4_nat_entry, nat_gen}, + SqlU16, SqlU32, Vni, +}; +use chrono::{DateTime, Utc}; +use omicron_common::api::external; +use schemars::JsonSchema; +use serde::Serialize; +use uuid::Uuid; + +// TODO correctness +// If we're not going to store ipv4 and ipv6 +// NAT entries in the same table, and we don't +// need any of the special properties of the IpNetwork +// column type, does it make sense to use a different +// column type? +/// Database representation of an Ipv4 NAT Entry. +#[derive(Insertable, Debug, Clone)] +#[diesel(table_name = ipv4_nat_entry)] +pub struct NewIpv4NatEntry { + pub external_address: ipnetwork::IpNetwork, + pub first_port: SqlU16, + pub last_port: SqlU16, + pub sled_address: ipnetwork::IpNetwork, + pub vni: Vni, + pub mac: MacAddr, +} + +// TODO correctness +// If we're not going to store ipv4 and ipv6 +// NAT entries in the same table, we should probably +// make the types more restrictive to prevent an +// accidental ipv6 entry from being created. +#[derive(Queryable, Debug, Clone, Selectable)] +#[diesel(table_name = ipv4_nat_entry)] +pub struct Ipv4NatEntry { + pub id: Uuid, + pub external_address: ipnetwork::IpNetwork, + pub first_port: SqlU16, + pub last_port: SqlU16, + pub sled_address: ipnetwork::IpNetwork, + pub vni: Vni, + pub mac: MacAddr, + pub gen: SqlU32, + pub time_created: DateTime, + pub time_deleted: Option>, +} + +impl Ipv4NatEntry { + pub fn first_port(&self) -> u16 { + self.first_port.into() + } + + pub fn last_port(&self) -> u16 { + self.last_port.into() + } + + pub fn gen(&self) -> u32 { + self.gen.into() + } +} + +#[derive(Queryable, Debug, Clone, Selectable)] +#[diesel(table_name = nat_gen)] +pub struct Ipv4NatGen { + pub last_value: SqlU32, + pub log_cnt: SqlU32, + pub is_called: bool, +} + +/// NAT Record +#[derive(Clone, Debug, Serialize, JsonSchema)] +pub struct Ipv4NatEntryView { + pub external_address: Ipv4Addr, + pub first_port: u16, + pub last_port: u16, + pub sled_address: Ipv6Addr, + pub vni: external::Vni, + pub mac: external::MacAddr, + pub gen: u32, + pub deleted: bool, +} + +impl From for Ipv4NatEntryView { + fn from(value: Ipv4NatEntry) -> Self { + let external_address = match value.external_address.ip() { + std::net::IpAddr::V4(a) => a, + std::net::IpAddr::V6(_) => unreachable!(), + }; + + let sled_address = match value.sled_address.ip() { + std::net::IpAddr::V4(_) => unreachable!(), + std::net::IpAddr::V6(a) => a, + }; + + Self { + external_address, + first_port: value.first_port(), + last_port: value.last_port(), + sled_address, + vni: value.vni.0, + mac: *value.mac, + gen: value.gen(), + deleted: value.time_deleted.is_some(), + } + } +} + +/// NAT Generation +#[derive(Clone, Debug, Serialize, JsonSchema)] +pub struct Ipv4NatGenView { + pub gen: u32, +} diff --git a/nexus/db-model/src/lib.rs b/nexus/db-model/src/lib.rs index 334dedad9ff..ddf2e8841bc 100644 --- a/nexus/db-model/src/lib.rs +++ b/nexus/db-model/src/lib.rs @@ -51,6 +51,7 @@ mod system_update; // These actually represent subqueries, not real table. // However, they must be defined in the same crate as our tables // for join-based marker trait generation. +mod ipv4_nat_entry; pub mod queries; mod rack; mod region; @@ -119,6 +120,7 @@ pub use instance::*; pub use instance_cpu_count::*; pub use instance_state::*; pub use ip_pool::*; +pub use ipv4_nat_entry::*; pub use ipv4net::*; pub use ipv6::*; pub use ipv6net::*; diff --git a/nexus/db-model/src/schema.rs b/nexus/db-model/src/schema.rs index 3fde9ee715a..9ef5f3b96eb 100644 --- a/nexus/db-model/src/schema.rs +++ b/nexus/db-model/src/schema.rs @@ -471,6 +471,31 @@ table! { } } +table! { + ipv4_nat_entry (id) { + id -> Uuid, + external_address -> Inet, + first_port -> Int4, + last_port -> Int4, + sled_address -> Inet, + vni -> Int4, + mac -> Int8, + gen -> Int8, + time_created -> Timestamptz, + time_deleted -> Nullable, + } +} + +// This is the sequence used for the generation number +// in ipv4_nat_entry. +table! { + nat_gen (last_value) { + last_value -> Int8, + log_cnt -> Int8, + is_called -> Bool, + } +} + table! { external_ip (id) { id -> Uuid, @@ -1130,7 +1155,7 @@ table! { /// /// This should be updated whenever the schema is changed. For more details, /// refer to: schema/crdb/README.adoc -pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(4, 0, 0); +pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(4, 0, 1); allow_tables_to_appear_in_same_query!( system_update, diff --git a/nexus/db-queries/src/db/datastore/ipv4_nat_entry.rs b/nexus/db-queries/src/db/datastore/ipv4_nat_entry.rs new file mode 100644 index 00000000000..5c8c83576c6 --- /dev/null +++ b/nexus/db-queries/src/db/datastore/ipv4_nat_entry.rs @@ -0,0 +1,385 @@ +use super::DataStore; +use crate::context::OpContext; +use crate::db; +use crate::db::error::public_error_from_diesel_pool; +use crate::db::error::ErrorHandler; +use crate::db::model::{Ipv4NatEntry, NewIpv4NatEntry}; +use async_bb8_diesel::AsyncRunQueryDsl; +use chrono::{DateTime, Utc}; +use diesel::prelude::*; +use nexus_db_model::Ipv4NatEntryView; +use nexus_db_model::SqlU32; +use omicron_common::api::external::CreateResult; +use omicron_common::api::external::DeleteResult; +use omicron_common::api::external::Error; +use omicron_common::api::external::ListResultVec; +use omicron_common::api::external::LookupResult; +use uuid::Uuid; + +impl DataStore { + pub async fn ipv4_nat_create( + &self, + opctx: &OpContext, + nat_entry: NewIpv4NatEntry, + ) -> CreateResult { + use db::schema::ipv4_nat_entry::dsl; + + // TODO: What authorization do we need? + // TODO: What additional validation do we need? + // * protect against accidental overlap of port ranges? + + diesel::insert_into(dsl::ipv4_nat_entry) + .values(nat_entry.clone()) + // TODO: correctness + // Should we actually raise an error on conflict and let the + // caller decide whether or not it is expected? + .on_conflict_do_nothing() + .execute_async(self.pool_authorized(opctx).await?) + .await + .map_err(|e| { + public_error_from_diesel_pool(e, ErrorHandler::Server) + })?; + + let result = dsl::ipv4_nat_entry + .filter(dsl::external_address.eq(nat_entry.external_address)) + .filter(dsl::first_port.eq(nat_entry.first_port)) + .filter(dsl::last_port.eq(nat_entry.last_port)) + .select(Ipv4NatEntry::as_select()) + .limit(1) + .load_async(self.pool_authorized(opctx).await?) + .await + .map_err(|e| { + public_error_from_diesel_pool(e, ErrorHandler::Server) + })?; + + Ok(result.first().unwrap().clone()) + } + + pub async fn ipv4_nat_delete( + &self, + opctx: &OpContext, + nat_entry: &Ipv4NatEntry, + ) -> DeleteResult { + use db::schema::ipv4_nat_entry::dsl; + + // TODO: What authorization do we need? + + let now = Utc::now(); + let updated_rows = diesel::update(dsl::ipv4_nat_entry) + .filter(dsl::time_deleted.is_null()) + .filter(dsl::id.eq(nat_entry.id)) + .filter(dsl::gen.eq(nat_entry.gen)) + .set(dsl::time_deleted.eq(now)) + .execute_async(self.pool_authorized(opctx).await?) + .await + .map_err(|e| { + public_error_from_diesel_pool(e, ErrorHandler::Server) + })?; + + if updated_rows == 0 { + return Err(Error::InvalidRequest { + message: "no matching records".to_string(), + }); + } + Ok(()) + } + + pub async fn ipv4_nat_cancel_delete( + &self, + opctx: &OpContext, + nat_entry: &Ipv4NatEntry, + ) -> DeleteResult { + use db::schema::ipv4_nat_entry::dsl; + + // TODO: What authorization do we need? + + let updated_rows = diesel::update(dsl::ipv4_nat_entry) + .filter(dsl::time_deleted.is_not_null()) + .filter(dsl::id.eq(nat_entry.id)) + .filter(dsl::gen.eq(nat_entry.gen)) + .set(dsl::time_deleted.eq(Option::>::None)) + .execute_async(self.pool_authorized(opctx).await?) + .await + .map_err(|e| { + public_error_from_diesel_pool(e, ErrorHandler::Server) + })?; + + if updated_rows == 0 { + return Err(Error::InvalidRequest { + message: "no matching records".to_string(), + }); + } + Ok(()) + } + + pub async fn ipv4_nat_find_by_id( + &self, + opctx: &OpContext, + id: Uuid, + ) -> LookupResult { + use db::schema::ipv4_nat_entry::dsl; + + // TODO: What authorization do we need? + + let result = dsl::ipv4_nat_entry + .filter(dsl::id.eq(id)) + .select(Ipv4NatEntry::as_select()) + .limit(1) + .load_async(self.pool_authorized(opctx).await?) + .await + .map_err(|e| { + public_error_from_diesel_pool(e, ErrorHandler::Server) + })?; + + if result.is_empty() { + return Err(Error::InvalidRequest { + message: "no matching records".to_string(), + }); + } + + Ok(result.first().unwrap().clone()) + } + + pub async fn ipv4_nat_list_since_gen( + &self, + opctx: &OpContext, + gen: u32, + limit: u32, + ) -> ListResultVec { + use db::schema::ipv4_nat_entry::dsl; + let gen: SqlU32 = gen.into(); + + // TODO: What authorization do we need? + + let list = dsl::ipv4_nat_entry + .filter(dsl::gen.gt(gen)) + .order_by(dsl::gen) + .limit(limit as i64) + .select(Ipv4NatEntry::as_select()) + .load_async(self.pool_authorized(opctx).await?) + .await + .map_err(|e| { + public_error_from_diesel_pool(e, ErrorHandler::Server) + })?; + + Ok(list) + } + + pub async fn ipv4_nat_changeset( + &self, + opctx: &OpContext, + gen: u32, + limit: u32, + ) -> ListResultVec { + let nat_entries = + self.ipv4_nat_list_since_gen(opctx, gen, limit).await?; + let nat_entries: Vec = + nat_entries.iter().map(|e| e.clone().into()).collect(); + Ok(nat_entries) + } + + pub async fn ipv4_nat_current_gen( + &self, + opctx: &OpContext, + ) -> LookupResult { + use db::schema::nat_gen::dsl; + + // TODO: What authorization do we need? + + let latest: Option = dsl::nat_gen + .select(diesel::dsl::max(dsl::last_value)) + .first_async(self.pool_authorized(opctx).await?) + .await + .map_err(|e| { + public_error_from_diesel_pool(e, ErrorHandler::Server) + })?; + + match latest { + Some(value) => Ok(*value), + None => Err(Error::InvalidRequest { + message: "sequence table is empty!".to_string(), + }), + } + } + + pub async fn ipv4_nat_cleanup( + &self, + opctx: &OpContext, + before_gen: u32, + before_timestamp: DateTime, + ) -> DeleteResult { + use db::schema::ipv4_nat_entry::dsl; + let gen: SqlU32 = before_gen.into(); + // TODO: What authorization do we need? + + diesel::delete(dsl::ipv4_nat_entry) + .filter(dsl::gen.lt(gen)) + .filter(dsl::time_deleted.lt(before_timestamp)) + .execute_async(self.pool_authorized(opctx).await?) + .await + .map_err(|e| { + public_error_from_diesel_pool(e, ErrorHandler::Server) + })?; + + Ok(()) + } +} + +#[cfg(test)] +mod test { + use std::str::FromStr; + + use crate::db::datastore::datastore_test; + use chrono::Utc; + use nexus_db_model::{MacAddr, NewIpv4NatEntry, Vni}; + use nexus_test_utils::db::test_setup_database; + use omicron_common::api::external; + use omicron_test_utils::dev; + + // Test our ability to track additions and deletions since a given generation number + #[tokio::test] + async fn nat_generation_tracking() { + let logctx = dev::test_setup_log("test_nat_generation_tracking"); + let mut db = test_setup_database(&logctx.log).await; + let (opctx, datastore) = datastore_test(&logctx, &db).await; + + // We should not have any NAT entries at this moment + let initial_state = + datastore.ipv4_nat_list_since_gen(&opctx, 0, 10).await.unwrap(); + + assert!(initial_state.is_empty()); + assert_eq!(datastore.ipv4_nat_current_gen(&opctx).await.unwrap(), 0); + + // Each change (creation / deletion) to the NAT table should increment the + // generation number of the row in the NAT table + let external_address = + ipnetwork::IpNetwork::try_from("10.0.0.100").unwrap(); + + let sled_address = + ipnetwork::IpNetwork::try_from("fd00:1122:3344:104::1").unwrap(); + + // Add a nat entry. + let nat1 = NewIpv4NatEntry { + external_address, + first_port: 0.into(), + last_port: 999.into(), + sled_address, + vni: Vni(external::Vni::random()), + mac: MacAddr( + external::MacAddr::from_str("A8:40:25:F5:EB:2A").unwrap(), + ), + }; + + let first_entry = + datastore.ipv4_nat_create(&opctx, nat1).await.unwrap(); + + let nat_entries = + datastore.ipv4_nat_list_since_gen(&opctx, 0, 10).await.unwrap(); + + // The NAT table has undergone one change. One entry has been added, + // none deleted, so we should be at generation 1. + assert_eq!(nat_entries.len(), 1); + assert_eq!(nat_entries.last().unwrap().gen(), 1); + assert_eq!(datastore.ipv4_nat_current_gen(&opctx).await.unwrap(), 1); + + // Add another nat entry. + let nat2 = NewIpv4NatEntry { + external_address, + first_port: 1000.into(), + last_port: 1999.into(), + sled_address, + vni: Vni(external::Vni::random()), + mac: MacAddr( + external::MacAddr::from_str("A8:40:25:F5:EB:2B").unwrap(), + ), + }; + + datastore.ipv4_nat_create(&opctx, nat2).await.unwrap(); + + let nat_entries = + datastore.ipv4_nat_list_since_gen(&opctx, 0, 10).await.unwrap(); + + // The NAT table has undergone two changes. Two entries have been + // added, none deleted, so we should be at generation 2. + assert_eq!(nat_entries.len(), 2); + assert_eq!(nat_entries.last().unwrap().gen(), 2); + assert_eq!(datastore.ipv4_nat_current_gen(&opctx).await.unwrap(), 2); + + // Delete the first nat entry. It should show up as a later generation number. + datastore.ipv4_nat_delete(&opctx, &first_entry).await.unwrap(); + let nat_entries = + datastore.ipv4_nat_list_since_gen(&opctx, 0, 10).await.unwrap(); + + // The NAT table has undergone three changes. Two entries have been + // added, one deleted, so we should be at generation 3. Since the + // first entry was marked for deletion (and it was the third change), + // the first entry's generation number should now be 3. + let nat_entry = nat_entries.last().unwrap(); + assert_eq!(nat_entries.len(), 2); + assert_eq!(nat_entry.gen(), 3); + assert_eq!(nat_entry.id, first_entry.id); + assert_eq!(datastore.ipv4_nat_current_gen(&opctx).await.unwrap(), 3); + + // Cancel deletion of NAT entry + datastore.ipv4_nat_cancel_delete(&opctx, nat_entry).await.unwrap(); + let nat_entries = + datastore.ipv4_nat_list_since_gen(&opctx, 0, 10).await.unwrap(); + + // The NAT table has undergone four changes. + let nat_entry = nat_entries.last().unwrap(); + assert_eq!(nat_entries.len(), 2); + assert_eq!(nat_entry.gen(), 4); + assert_eq!(nat_entry.id, first_entry.id); + assert_eq!(datastore.ipv4_nat_current_gen(&opctx).await.unwrap(), 4); + + // Test Cleanup logic + // Cleanup should only perma-delete entries that are older than a + // specified generation number and whose `time_deleted` field is + // older than a specified age. + let time_cutoff = Utc::now(); + datastore.ipv4_nat_cleanup(&opctx, 4, time_cutoff).await.unwrap(); + + // Nothing should have changed (no records currently marked for deletion) + let nat_entries = + datastore.ipv4_nat_list_since_gen(&opctx, 0, 10).await.unwrap(); + + assert_eq!(nat_entries.len(), 2); + assert_eq!(datastore.ipv4_nat_current_gen(&opctx).await.unwrap(), 4); + + // Soft delete a record + let nat_entry = nat_entries.last().unwrap(); + datastore.ipv4_nat_delete(&opctx, nat_entry).await.unwrap(); + + // Try cleaning up with the old gen and time cutoff values + datastore.ipv4_nat_cleanup(&opctx, 4, time_cutoff).await.unwrap(); + + // Try cleaning up with a greater gen and old time cutoff values + datastore.ipv4_nat_cleanup(&opctx, 6, time_cutoff).await.unwrap(); + + // Try cleaning up with a older gen and newer time cutoff values + datastore.ipv4_nat_cleanup(&opctx, 4, Utc::now()).await.unwrap(); + + // Both records should still exist (soft deleted record is newer than cutoff + // values ) + let nat_entries = + datastore.ipv4_nat_list_since_gen(&opctx, 0, 10).await.unwrap(); + + assert_eq!(nat_entries.len(), 2); + assert_eq!(datastore.ipv4_nat_current_gen(&opctx).await.unwrap(), 5); + + // Try cleaning up with a both cutoff values increased + datastore.ipv4_nat_cleanup(&opctx, 6, Utc::now()).await.unwrap(); + + // Soft deleted NAT entry should be removed from the table + let nat_entries = + datastore.ipv4_nat_list_since_gen(&opctx, 0, 10).await.unwrap(); + + assert_eq!(nat_entries.len(), 1); + + // Generation should be unchanged + assert_eq!(datastore.ipv4_nat_current_gen(&opctx).await.unwrap(), 5); + + db.cleanup().await.unwrap(); + logctx.cleanup_successful(); + } +} diff --git a/nexus/db-queries/src/db/datastore/mod.rs b/nexus/db-queries/src/db/datastore/mod.rs index 13fb132abb6..d1a9d3ed26f 100644 --- a/nexus/db-queries/src/db/datastore/mod.rs +++ b/nexus/db-queries/src/db/datastore/mod.rs @@ -60,6 +60,7 @@ mod identity_provider; mod image; mod instance; mod ip_pool; +mod ipv4_nat_entry; mod network_interface; mod oximeter; mod physical_disk; diff --git a/nexus/src/app/instance.rs b/nexus/src/app/instance.rs index 1ac63d19f67..65f5a6f59ea 100644 --- a/nexus/src/app/instance.rs +++ b/nexus/src/app/instance.rs @@ -11,13 +11,15 @@ use super::MAX_NICS_PER_INSTANCE; use super::MAX_VCPU_PER_INSTANCE; use super::MIN_MEMORY_BYTES_PER_INSTANCE; use crate::app::sagas; -use crate::app::sagas::retry_until_known_result; use crate::cidata::InstanceCiData; use crate::external_api::params; use cancel_safe_futures::prelude::*; use futures::future::Fuse; use futures::{FutureExt, SinkExt, StreamExt}; +use ipnetwork::IpNetwork; use nexus_db_model::IpKind; +use nexus_db_model::NewIpv4NatEntry; +use nexus_db_model::Vni as DbVni; use nexus_db_queries::authn; use nexus_db_queries::authz; use nexus_db_queries::authz::ApiResource; @@ -55,6 +57,7 @@ use sled_agent_client::types::InstanceStateRequested; use sled_agent_client::types::SourceNatConfig; use sled_agent_client::Client as SledAgentClient; use std::collections::HashSet; +use std::net::IpAddr; use std::net::SocketAddr; use std::str::FromStr; use std::sync::Arc; @@ -1315,8 +1318,6 @@ impl super::Nexus { )) })?; - let vni: u32 = network_interface.vni.into(); - info!(log, "looking up instance's external IPs"; "instance_id" => %instance_id); @@ -1346,29 +1347,29 @@ impl super::Nexus { }) .map(|(_, ip)| ip) { - retry_until_known_result(log, || async { - dpd_client - .ensure_nat_entry( - &log, - target_ip.ip, - dpd_client::types::MacAddr { - a: mac_address.into_array(), - }, - *target_ip.first_port, - *target_ip.last_port, - vni, - sled_ip_address.ip(), - ) - .await - }) - .await - .map_err(|e| { - Error::internal_error(&format!( - "failed to ensure dpd entry: {e}" - )) - })?; + // TODO - @internet-diglett update NAT TABLE + // TODO - should we create a join table for nat entries on instances? + let nat_entry = NewIpv4NatEntry { + external_address: target_ip.ip, + first_port: target_ip.first_port, + last_port: target_ip.last_port, + sled_address: IpNetwork::new( + IpAddr::V6(*sled_ip_address.ip()), + 128, + ) + .unwrap(), + vni: DbVni(network_interface.vni.clone().into()), + mac: nexus_db_model::MacAddr( + omicron_common::api::external::MacAddr(mac_address), + ), + }; + self.db_datastore.ipv4_nat_create(opctx, nat_entry).await?; } + // TODO - @internet-diglett call trigger on dpd_client + dpd_client.ipv4_nat_trigger_update().await.map_err(|e| { + Error::internal_error(&format!("failed to ensure dpd entry: {e}")) + })?; Ok(()) } diff --git a/nexus/src/app/sagas/instance_create.rs b/nexus/src/app/sagas/instance_create.rs index d5af080381a..c53715fd8d0 100644 --- a/nexus/src/app/sagas/instance_create.rs +++ b/nexus/src/app/sagas/instance_create.rs @@ -438,6 +438,11 @@ async fn sic_remove_network_config( ¶ms.serialized_authn, ); let osagactx = sagactx.user_data(); + // TODO - @internet-diglett update NAT TABLE (separate step) + + // TODO - @internet-diglett call trigger on dpd_client + + // TODO - @internet-diglett REMOVE let dpd_client = osagactx.nexus().dpd_clients.get(&switch).ok_or_else(|| { Error::internal_error(&format!( @@ -469,6 +474,7 @@ async fn sic_remove_network_config( .await }) .await; + // match result { Ok(_) => { diff --git a/nexus/src/app/sagas/instance_delete.rs b/nexus/src/app/sagas/instance_delete.rs index 88a5823ad05..3922d71e96b 100644 --- a/nexus/src/app/sagas/instance_delete.rs +++ b/nexus/src/app/sagas/instance_delete.rs @@ -165,6 +165,10 @@ async fn sid_delete_network_config( let mut errors: Vec = vec![]; + // TODO - @internet-diglett update NAT TABLE (separate step) + + // TODO - @internet-diglett call trigger on dpd_client + // Here we are attempting to delete every existing NAT entry while deferring // any error handling. If we don't defer error handling, we might end up // bailing out before we've attempted deletion of all entries. diff --git a/nexus/src/internal_api/http_entrypoints.rs b/nexus/src/internal_api/http_entrypoints.rs index 6efc0d46e34..e3d66cdac87 100644 --- a/nexus/src/internal_api/http_entrypoints.rs +++ b/nexus/src/internal_api/http_entrypoints.rs @@ -24,6 +24,8 @@ use dropshot::RequestContext; use dropshot::ResultsPage; use dropshot::TypedBody; use hyper::Body; +use nexus_db_model::Ipv4NatEntryView; +use nexus_db_model::Ipv4NatGenView; use nexus_types::internal_api::params::SwitchPutRequest; use nexus_types::internal_api::params::SwitchPutResponse; use nexus_types::internal_api::views::to_list; @@ -65,6 +67,8 @@ pub(crate) fn internal_api() -> NexusApiDescription { api.register(saga_list)?; api.register(saga_view)?; + api.register(ipv4_nat_changeset)?; + api.register(ipv4_nat_gen)?; Ok(()) } @@ -484,3 +488,78 @@ async fn saga_view( }; apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await } + +// NAT RPW internal APIs + +/// Path parameters for NAT ChangeSet +#[derive(Deserialize, JsonSchema)] +struct RpwNatPathParam { + /// which change number to start generating + /// the change set from + from_gen: u32, +} + +/// Query parameters for NAT ChangeSet +#[derive(Deserialize, JsonSchema)] +struct RpwNatQueryParam { + limit: u32, +} + +/// Fetch NAT ChangeSet +/// +/// Caller provides their generation as `from_gen`, along with a query +/// parameter for the page size (`limit`). Endpoint will return changes +/// that have occured since the caller's generation number up to the latest +/// change or until the `limit` is reached. If there are no changes, an +/// empty vec is returned. +#[endpoint { + method = GET, + path = "/rpw/nat/ipv4/changeset/{from_gen}" +}] +async fn ipv4_nat_changeset( + rqctx: RequestContext>, + path_params: Path, + query_params: Query, +) -> Result>, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let opctx = crate::context::op_context_for_internal_api(&rqctx).await; + let nexus = &apictx.nexus; + let path = path_params.into_inner(); + let query = query_params.into_inner(); + let changeset = nexus + .datastore() + .ipv4_nat_changeset(&opctx, path.from_gen, query.limit) + .await?; + Ok(HttpResponseOk(changeset)) + }; + apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await +} + +// TODO: Remove (maybe) +// This endpoint may be more trouble than it's worth. The reason this endpoint +// may be a headache is because the underlying SEQUENCE in the DB will advance, +// even on failed transactions. This is not a big deal, but it will lead to +// callers trying to catch up to entries that don't exist if they check this +// endpoint first. As an alternative, we could just page through the +// changesets until we get nothing back, then revert to periodic polling +// of the changeset endpoint. + +/// Fetch NAT generation +#[endpoint { + method = GET, + path = "/rpw/nat/ipv4/gen" +}] +async fn ipv4_nat_gen( + rqctx: RequestContext>, +) -> Result, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let opctx = crate::context::op_context_for_internal_api(&rqctx).await; + let nexus = &apictx.nexus; + let gen = nexus.datastore().ipv4_nat_current_gen(&opctx).await?; + let view = Ipv4NatGenView { gen }; + Ok(HttpResponseOk(view)) + }; + apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await +} diff --git a/openapi/nexus-internal.json b/openapi/nexus-internal.json index 12043d80960..048c692ec59 100644 --- a/openapi/nexus-internal.json +++ b/openapi/nexus-internal.json @@ -357,6 +357,82 @@ } } }, + "/rpw/nat/ipv4/changeset/{from_gen}": { + "get": { + "summary": "Fetch NAT ChangeSet", + "description": "Caller provides their generation as `from_gen`, along with a query parameter for the page size (`limit`). Endpoint will return changes that have occured since the caller's generation number up to the latest change or until the `limit` is reached. If there are no changes, an empty vec is returned.", + "operationId": "ipv4_nat_changeset", + "parameters": [ + { + "in": "path", + "name": "from_gen", + "description": "which change number to start generating the change set from", + "required": true, + "schema": { + "type": "integer", + "format": "uint32", + "minimum": 0 + } + }, + { + "in": "query", + "name": "limit", + "required": true, + "schema": { + "type": "integer", + "format": "uint32", + "minimum": 0 + } + } + ], + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "title": "Array_of_Ipv4NatEntryView", + "type": "array", + "items": { + "$ref": "#/components/schemas/Ipv4NatEntryView" + } + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, + "/rpw/nat/ipv4/gen": { + "get": { + "summary": "Fetch NAT generation", + "operationId": "ipv4_nat_gen", + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Ipv4NatGenView" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, "/sagas": { "get": { "summary": "List sagas", @@ -2041,6 +2117,68 @@ } ] }, + "Ipv4NatEntryView": { + "description": "NAT Record", + "type": "object", + "properties": { + "deleted": { + "type": "boolean" + }, + "external_address": { + "type": "string", + "format": "ipv4" + }, + "first_port": { + "type": "integer", + "format": "uint16", + "minimum": 0 + }, + "gen": { + "type": "integer", + "format": "uint32", + "minimum": 0 + }, + "last_port": { + "type": "integer", + "format": "uint16", + "minimum": 0 + }, + "mac": { + "$ref": "#/components/schemas/MacAddr" + }, + "sled_address": { + "type": "string", + "format": "ipv6" + }, + "vni": { + "$ref": "#/components/schemas/Vni" + } + }, + "required": [ + "deleted", + "external_address", + "first_port", + "gen", + "last_port", + "mac", + "sled_address", + "vni" + ] + }, + "Ipv4NatGenView": { + "description": "NAT Generation", + "type": "object", + "properties": { + "gen": { + "type": "integer", + "format": "uint32", + "minimum": 0 + } + }, + "required": [ + "gen" + ] + }, "Ipv4Network": { "type": "string", "pattern": "^((25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\/(3[0-2]|[0-2]?[0-9])$" @@ -3326,6 +3464,12 @@ "minLength": 1, "maxLength": 63 }, + "Vni": { + "description": "A Geneve Virtual Network Identifier", + "type": "integer", + "format": "uint32", + "minimum": 0 + }, "ZpoolPutRequest": { "description": "Sent by a sled agent on startup to Nexus to request further instruction", "type": "object", diff --git a/package-manifest.toml b/package-manifest.toml index b2e1552f3cd..e1dca890316 100644 --- a/package-manifest.toml +++ b/package-manifest.toml @@ -451,8 +451,8 @@ only_for_targets.image = "standard" # 2. Copy dendrite.tar.gz from dendrite/out to omicron/out source.type = "prebuilt" source.repo = "dendrite" -source.commit = "363e365135cfa46d7f7558d8670f35aa8fe412e9" -source.sha256 = "2dc34eaac7eb9d320594f3ac125df6a601fe020e0b3c7f16eb0a5ebddc8e18b9" +source.commit = "9529c76ab8cde2cd50a7afba0d7e6bb6071f5dd9" +source.sha256 = "80fe3a3d9bf406d0072e9f73997fc28ae973938cb19e335d64d9e5055659af99" output.type = "zone" output.intermediate_only = true @@ -476,8 +476,8 @@ only_for_targets.image = "standard" # 2. Copy the output zone image from dendrite/out to omicron/out source.type = "prebuilt" source.repo = "dendrite" -source.commit = "363e365135cfa46d7f7558d8670f35aa8fe412e9" -source.sha256 = "1616eb25ab3d3a8b678b6cf3675af7ba61d455c3e6c2ba2a2d35a663861bc8e8" +source.commit = "9529c76ab8cde2cd50a7afba0d7e6bb6071f5dd9" +source.sha256 = "e2979b01dba442ed3802b303bdd8e7a45cae807e81fff256ad39de852cec9c29" output.type = "zone" output.intermediate_only = true @@ -494,8 +494,8 @@ only_for_targets.image = "standard" # 2. Copy dendrite.tar.gz from dendrite/out to omicron/out/dendrite-softnpu.tar.gz source.type = "prebuilt" source.repo = "dendrite" -source.commit = "363e365135cfa46d7f7558d8670f35aa8fe412e9" -source.sha256 = "a045e6dbb84dbceaf3a8a7dc33d283449fbeaf081442d0ae14ce8d8ffcdda4e9" +source.commit = "9529c76ab8cde2cd50a7afba0d7e6bb6071f5dd9" +source.sha256 = "a6dbf2a58bf560af6f1ca7b9cf6726ff445a091a2b5cee25d5e2742a97d40921" output.type = "zone" output.intermediate_only = true diff --git a/schema/crdb/4.0.1/up.sql b/schema/crdb/4.0.1/up.sql new file mode 100644 index 00000000000..e0b20f0f202 --- /dev/null +++ b/schema/crdb/4.0.1/up.sql @@ -0,0 +1,92 @@ +-- CRDB documentation recommends the following: +-- "Execute schema changes either as single statements (as an implicit transaction), +-- or in an explicit transaction consisting of the single schema change statement." +-- +-- For each schema change, we transactionally: +-- 1. Check the current version +-- 2. Apply the idempotent update + +BEGIN; +SELECT CAST( + IF( + ( + SELECT version = '4.0.0' and target_version = '4.0.1' + FROM omicron.public.db_metadata WHERE singleton = true + ), + 'true', + 'Invalid starting version for schema change' + ) AS BOOL +); + +CREATE SEQUENCE IF NOT EXISTS omicron.public.nat_gen START 1 INCREMENT 1; +COMMIT; + +BEGIN; +SELECT CAST( + IF( + ( + SELECT version = '4.0.0' and target_version = '4.0.1' + FROM omicron.public.db_metadata WHERE singleton = true + ), + 'true', + 'Invalid starting version for schema change' + ) AS BOOL +); + +CREATE TABLE IF NOT EXISTS omicron.public.ipv4_nat_entry ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + external_address INET NOT NULL, + first_port INT4 NOT NULL, + last_port INT4 NOT NULL, + sled_address INET NOT NULL, + vni INT4 NOT NULL, + mac INT8 NOT NULL, + gen INT8 NOT NULL DEFAULT nextval('omicron.public.nat_gen') ON UPDATE nextval('omicron.public.nat_gen'), + time_created TIMESTAMPTZ NOT NULL DEFAULT now(), + time_deleted TIMESTAMPTZ +); +COMMIT; + +BEGIN; +SELECT CAST( + IF( + ( + SELECT version = '4.0.0' and target_version = '4.0.1' + FROM omicron.public.db_metadata WHERE singleton = true + ), + 'true', + 'Invalid starting version for schema change' + ) AS BOOL +); + +CREATE UNIQUE INDEX IF NOT EXISTS ipv4_nat_address_and_ports ON omicron.public.ipv4_nat_entry ( + external_address, first_port, last_port +); +COMMIT; + +BEGIN; +SELECT CAST( + IF( + ( + SELECT version = '4.0.0' and target_version = '4.0.1' + FROM omicron.public.db_metadata WHERE singleton = true + ), + 'true', + 'Invalid starting version for schema change' + ) AS BOOL +); + +CREATE UNIQUE INDEX IF NOT EXISTS ipv4_nat_gen ON omicron.public.ipv4_nat_entry ( + gen +) +STORING ( + external_address, + first_port, + last_port, + sled_address, + vni, + mac, + time_created, + time_deleted +); +COMMIT; diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index 4b38b7dfe48..a1afcae7e20 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -2562,7 +2562,41 @@ INSERT INTO omicron.public.db_metadata ( version, target_version ) VALUES - ( TRUE, NOW(), NOW(), '4.0.0', NULL) + ( TRUE, NOW(), NOW(), '4.0.1', NULL) ON CONFLICT DO NOTHING; + +CREATE SEQUENCE IF NOT EXISTS omicron.public.nat_gen START 1 INCREMENT 1; + +CREATE TABLE IF NOT EXISTS omicron.public.ipv4_nat_entry ( + id UUID PRIMARY KEY DEFAULT gen_random_uuid(), + external_address INET NOT NULL, + first_port INT4 NOT NULL, + last_port INT4 NOT NULL, + sled_address INET NOT NULL, + vni INT4 NOT NULL, + mac INT8 NOT NULL, + gen INT8 NOT NULL DEFAULT nextval('omicron.public.nat_gen') ON UPDATE nextval('omicron.public.nat_gen'), + time_created TIMESTAMPTZ NOT NULL DEFAULT now(), + time_deleted TIMESTAMPTZ +); + +CREATE UNIQUE INDEX IF NOT EXISTS ipv4_nat_address_and_ports ON omicron.public.ipv4_nat_entry ( + external_address, first_port, last_port +); + +CREATE UNIQUE INDEX IF NOT EXISTS ipv4_nat_gen ON omicron.public.ipv4_nat_entry ( + gen +) +STORING ( + external_address, + first_port, + last_port, + sled_address, + vni, + mac, + time_created, + time_deleted +); + COMMIT; diff --git a/tools/dendrite_openapi_version b/tools/dendrite_openapi_version index cbdbca76622..24e68ba05f7 100644 --- a/tools/dendrite_openapi_version +++ b/tools/dendrite_openapi_version @@ -1,2 +1,2 @@ -COMMIT="363e365135cfa46d7f7558d8670f35aa8fe412e9" -SHA2="4da5edf1571a550a90aa8679a25c1535d2b02154dfb6034f170e421c2633bc31" +COMMIT="9529c76ab8cde2cd50a7afba0d7e6bb6071f5dd9" +SHA2="53458bec594a3b653d5c8a3876b84e39c1aa7ee38b972bdad1c33d98807ebd15" diff --git a/tools/dendrite_stub_checksums b/tools/dendrite_stub_checksums index acff400104e..c0737185983 100644 --- a/tools/dendrite_stub_checksums +++ b/tools/dendrite_stub_checksums @@ -1,3 +1,3 @@ -CIDL_SHA256_ILLUMOS="2dc34eaac7eb9d320594f3ac125df6a601fe020e0b3c7f16eb0a5ebddc8e18b9" -CIDL_SHA256_LINUX_DPD="5a976d1e43031f4790d1cd2f42d226b47c1be9c998917666f21cfaa3a7b13939" -CIDL_SHA256_LINUX_SWADM="38680e69364ffbfc43fea524786580d151ff45ce2f1802bd5179599f7c80e5f8" +CIDL_SHA256_ILLUMOS="80fe3a3d9bf406d0072e9f73997fc28ae973938cb19e335d64d9e5055659af99" +CIDL_SHA256_LINUX_DPD="8598e1169e4038cc44df3464c1022239ce94875199c1c8cb5d7a1fea4569d145" +CIDL_SHA256_LINUX_SWADM="5b8d6ebd8773ea4661419bd0300108bd5b6cd26aa0d15c328d1dcf4426aefa9d"