diff --git a/.github/buildomat/jobs/deploy.sh b/.github/buildomat/jobs/deploy.sh index a2957d35b4..e7990dafe5 100755 --- a/.github/buildomat/jobs/deploy.sh +++ b/.github/buildomat/jobs/deploy.sh @@ -231,12 +231,10 @@ first = \"$SERVICE_IP_POOL_START\" /^last/c\\ last = \"$SERVICE_IP_POOL_END\" } - /^\\[rack_network_config/,/^$/ { - /^infra_ip_first/c\\ + /^infra_ip_first/c\\ infra_ip_first = \"$UPLINK_IP\" - /^infra_ip_last/c\\ + /^infra_ip_last/c\\ infra_ip_last = \"$UPLINK_IP\" - } /^\\[\\[rack_network_config.ports/,/^\$/ { /^routes/c\\ routes = \\[{nexthop = \"$GATEWAY_IP\", destination = \"0.0.0.0/0\"}\\] @@ -335,6 +333,18 @@ while [[ $(pfexec svcs -z $(zoneadm list -n | grep oxz_ntp) \ done echo "Waited for chrony: ${retry}s" +# Wait for at least one nexus zone to become available +retry=0 +until zoneadm list | grep nexus; do + if [[ $retry -gt 300 ]]; then + echo "Failed to start at least one nexus zone after 300 seconds" + exit 1 + fi + sleep 1 + retry=$((retry + 1)) +done +echo "Waited for nexus: ${retry}s" + export RUST_BACKTRACE=1 export E2E_TLS_CERT IPPOOL_START IPPOOL_END eval "$(./tests/bootstrap)" diff --git a/clients/mg-admin-client/src/lib.rs b/clients/mg-admin-client/src/lib.rs index bb1d925c73..b246cddc26 100644 --- a/clients/mg-admin-client/src/lib.rs +++ b/clients/mg-admin-client/src/lib.rs @@ -17,11 +17,13 @@ mod inner { } pub use inner::types; +use inner::types::Prefix4; pub use inner::Error; use inner::Client as InnerClient; use omicron_common::api::external::BgpPeerState; use slog::Logger; +use std::hash::Hash; use std::net::Ipv6Addr; use std::net::SocketAddr; use thiserror::Error; @@ -81,3 +83,18 @@ impl Client { Ok(Self { inner, log }) } } + +impl Eq for Prefix4 {} + +impl PartialEq for Prefix4 { + fn eq(&self, other: &Self) -> bool { + self.value == other.value && self.length == other.length + } +} + +impl Hash for Prefix4 { + fn hash(&self, state: &mut H) { + self.value.hash(state); + self.length.hash(state); + } +} diff --git a/dev-tools/omdb/tests/env.out b/dev-tools/omdb/tests/env.out index ef8cf1631e..0f0aff1789 100644 --- a/dev-tools/omdb/tests/env.out +++ b/dev-tools/omdb/tests/env.out @@ -92,6 +92,10 @@ task: "service_zone_nat_tracker" ensures service zone nat records are recorded in NAT RPW table +task: "switch_port_config_manager" + manages switch port settings for rack switches + + --------------------------------------------- stderr: note: using Nexus URL http://127.0.0.1:REDACTED_PORT @@ -182,6 +186,10 @@ task: "service_zone_nat_tracker" ensures service zone nat records are recorded in NAT RPW table +task: "switch_port_config_manager" + manages switch port settings for rack switches + + --------------------------------------------- stderr: note: Nexus URL not specified. Will pick one from DNS. @@ -259,6 +267,10 @@ task: "service_zone_nat_tracker" ensures service zone nat records are recorded in NAT RPW table +task: "switch_port_config_manager" + manages switch port settings for rack switches + + --------------------------------------------- stderr: note: Nexus URL not specified. Will pick one from DNS. diff --git a/dev-tools/omdb/tests/successes.out b/dev-tools/omdb/tests/successes.out index 2da6e4dceb..dcdd3b3e26 100644 --- a/dev-tools/omdb/tests/successes.out +++ b/dev-tools/omdb/tests/successes.out @@ -299,6 +299,10 @@ task: "service_zone_nat_tracker" ensures service zone nat records are recorded in NAT RPW table +task: "switch_port_config_manager" + manages switch port settings for rack switches + + --------------------------------------------- stderr: note: using Nexus URL http://127.0.0.1:REDACTED_PORT/ @@ -368,7 +372,7 @@ task: "nat_v4_garbage_collector" currently executing: no last completed activation: iter 2, triggered by an explicit signal started at (s ago) and ran for ms -warning: unknown background task: "nat_v4_garbage_collector" (don't know how to interpret details: Null) + last completion reported error: failed to resolve addresses for Dendrite services: no record found for Query { name: Name("_dendrite._tcp.control-plane.oxide.internal."), query_type: SRV, query_class: IN } task: "blueprint_loader" configured period: every 1m 40s @@ -389,7 +393,7 @@ task: "bfd_manager" currently executing: no last completed activation: iter 2, triggered by an explicit signal started at (s ago) and ran for ms -warning: unknown background task: "bfd_manager" (don't know how to interpret details: Object {}) + last completion reported error: failed to resolve addresses for Dendrite services: no record found for Query { name: Name("_dendrite._tcp.control-plane.oxide.internal."), query_type: SRV, query_class: IN } task: "external_endpoints" configured period: every 1m @@ -440,6 +444,13 @@ task: "service_zone_nat_tracker" started at (s ago) and ran for ms last completion reported error: inventory collection is None +task: "switch_port_config_manager" + configured period: every 30s + currently executing: no + last completed activation: iter 2, triggered by an explicit signal + started at (s ago) and ran for ms +warning: unknown background task: "switch_port_config_manager" (don't know how to interpret details: Object {}) + --------------------------------------------- stderr: note: using Nexus URL http://127.0.0.1:REDACTED_PORT/ diff --git a/nexus-config/src/nexus_config.rs b/nexus-config/src/nexus_config.rs index 0c40326001..8b94d0154b 100644 --- a/nexus-config/src/nexus_config.rs +++ b/nexus-config/src/nexus_config.rs @@ -367,6 +367,8 @@ pub struct BackgroundTaskConfig { pub sync_service_zone_nat: SyncServiceZoneNatConfig, /// configuration for the bfd manager task pub bfd_manager: BfdManagerConfig, + /// configuration for the switch port settings manager task + pub switch_port_settings_manager: SwitchPortSettingsManagerConfig, /// configuration for region replacement task pub region_replacement: RegionReplacementConfig, } @@ -427,6 +429,15 @@ pub struct SyncServiceZoneNatConfig { pub period_secs: Duration, } +#[serde_as] +#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)] +pub struct SwitchPortSettingsManagerConfig { + /// Interval (in seconds) for periodic activations of this background task. + /// This task is also activated on-demand when any of the switch port settings + /// api endpoints are called. + #[serde_as(as = "DurationSeconds")] + pub period_secs: Duration, +} #[serde_as] #[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)] pub struct InventoryConfig { @@ -713,6 +724,7 @@ mod test { blueprints.period_secs_load = 10 blueprints.period_secs_execute = 60 sync_service_zone_nat.period_secs = 30 + switch_port_settings_manager.period_secs = 30 region_replacement.period_secs = 30 [default_region_allocation_strategy] type = "random" @@ -828,6 +840,10 @@ mod test { sync_service_zone_nat: SyncServiceZoneNatConfig { period_secs: Duration::from_secs(30) }, + switch_port_settings_manager: + SwitchPortSettingsManagerConfig { + period_secs: Duration::from_secs(30), + }, region_replacement: RegionReplacementConfig { period_secs: Duration::from_secs(30), }, @@ -893,6 +909,7 @@ mod test { blueprints.period_secs_load = 10 blueprints.period_secs_execute = 60 sync_service_zone_nat.period_secs = 30 + switch_port_settings_manager.period_secs = 30 region_replacement.period_secs = 30 [default_region_allocation_strategy] type = "random" diff --git a/nexus/db-model/src/address_lot.rs b/nexus/db-model/src/address_lot.rs index 4fef2466e6..9c6065ddc8 100644 --- a/nexus/db-model/src/address_lot.rs +++ b/nexus/db-model/src/address_lot.rs @@ -11,8 +11,10 @@ use omicron_common::api::external; use serde::{Deserialize, Serialize}; use uuid::Uuid; +pub const INFRA_LOT: &str = "initial-infra"; + impl_enum_type!( - #[derive(SqlType, Debug, Clone, Copy)] + #[derive(SqlType, Debug, Clone, Copy, QueryId)] #[diesel(postgres_type(name = "address_lot_kind", schema = "public"))] pub struct AddressLotKindEnum; @@ -24,7 +26,7 @@ impl_enum_type!( FromSqlRow, PartialEq, Serialize, - Deserialize + Deserialize, )] #[diesel(sql_type = AddressLotKindEnum)] pub enum AddressLotKind; diff --git a/nexus/db-model/src/bootstore.rs b/nexus/db-model/src/bootstore.rs index 38afd37f54..1d7ac82068 100644 --- a/nexus/db-model/src/bootstore.rs +++ b/nexus/db-model/src/bootstore.rs @@ -1,4 +1,5 @@ -use crate::schema::bootstore_keys; +use crate::schema::{bootstore_config, bootstore_keys}; +use chrono::{DateTime, Utc}; use serde::{Deserialize, Serialize}; pub const NETWORK_KEY: &str = "network_key"; @@ -11,3 +12,18 @@ pub struct BootstoreKeys { pub key: String, pub generation: i64, } + +/// BootstoreConfig is a key-value store for bootstrapping data. +/// We serialize the data as json because it is inherently polymorphic and it +/// is not intended to be queried directly. +#[derive( + Queryable, Insertable, Selectable, Clone, Debug, Serialize, Deserialize, +)] +#[diesel(table_name = bootstore_config)] +pub struct BootstoreConfig { + pub key: String, + pub generation: i64, + pub data: serde_json::Value, + pub time_created: DateTime, + pub time_deleted: Option>, +} diff --git a/nexus/db-model/src/schema.rs b/nexus/db-model/src/schema.rs index 2c53dc26a7..771d6836dc 100644 --- a/nexus/db-model/src/schema.rs +++ b/nexus/db-model/src/schema.rs @@ -13,7 +13,7 @@ use omicron_common::api::external::SemverVersion; /// /// This should be updated whenever the schema is changed. For more details, /// refer to: schema/crdb/README.adoc -pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(44, 0, 0); +pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(45, 0, 0); table! { disk (id) { @@ -1529,6 +1529,16 @@ table! { } } +table! { + bootstore_config (key, generation) { + key -> Text, + generation -> Int8, + data -> Jsonb, + time_created -> Timestamptz, + time_deleted -> Nullable, + } +} + table! { bfd_session (remote, switch) { id -> Uuid, diff --git a/nexus/db-model/src/unsigned.rs b/nexus/db-model/src/unsigned.rs index 920cad1cff..5c76168ceb 100644 --- a/nexus/db-model/src/unsigned.rs +++ b/nexus/db-model/src/unsigned.rs @@ -130,6 +130,7 @@ where FromSqlRow, Serialize, Deserialize, + QueryId, )] #[diesel(sql_type = sql_types::BigInt)] #[repr(transparent)] diff --git a/nexus/db-queries/src/db/datastore/address_lot.rs b/nexus/db-queries/src/db/datastore/address_lot.rs index 5c2ffbf1d0..9c75c6fd1b 100644 --- a/nexus/db-queries/src/db/datastore/address_lot.rs +++ b/nexus/db-queries/src/db/datastore/address_lot.rs @@ -53,14 +53,35 @@ impl DataStore { .transaction(&conn, |conn| async move { let lot = AddressLot::new(¶ms.identity, params.kind.into()); - let db_lot: AddressLot = - diesel::insert_into(lot_dsl::address_lot) - .values(lot) - .returning(AddressLot::as_returning()) - .get_result_async(&conn) - .await?; + // @internet-diglett says: + // I hate this. I know how to replace this transaction with + // CTEs but for the life of me I can't get it to work in + // diesel. I gave up and just extended the logic inside + // of the transaction instead chasing diesel trait bound errors. + let found_lot: Option = lot_dsl::address_lot + .filter( + lot_dsl::name + .eq(Name::from(params.identity.name.clone())), + ) + .filter(lot_dsl::time_deleted.is_null()) + .select(AddressLot::as_select()) + .limit(1) + .first_async(&conn) + .await + .ok(); + + let db_lot = match found_lot { + Some(v) => v, + None => { + diesel::insert_into(lot_dsl::address_lot) + .values(lot) + .returning(AddressLot::as_returning()) + .get_result_async(&conn) + .await? + } + }; - let blocks: Vec = params + let desired_blocks: Vec = params .blocks .iter() .map(|b| { @@ -72,14 +93,51 @@ impl DataStore { }) .collect(); - let db_blocks = - diesel::insert_into(block_dsl::address_lot_block) - .values(blocks) - .returning(AddressLotBlock::as_returning()) + let found_blocks: Vec = + block_dsl::address_lot_block + .filter(block_dsl::address_lot_id.eq(db_lot.id())) + .filter( + block_dsl::first_address.eq_any( + desired_blocks + .iter() + .map(|b| b.first_address) + .collect::>(), + ), + ) + .filter( + block_dsl::last_address.eq_any( + desired_blocks + .iter() + .map(|b| b.last_address) + .collect::>(), + ), + ) .get_results_async(&conn) .await?; - Ok(AddressLotCreateResult { lot: db_lot, blocks: db_blocks }) + let mut blocks = vec![]; + + // If the block is found in the database, use the found block. + // If the block is not found in the database, insert it. + for desired_block in desired_blocks { + let block = match found_blocks.iter().find(|db_b| { + db_b.first_address == desired_block.first_address + && db_b.last_address == desired_block.last_address + }) { + Some(block) => block.clone(), + None => { + diesel::insert_into(block_dsl::address_lot_block) + .values(desired_block) + .returning(AddressLotBlock::as_returning()) + .get_results_async(&conn) + .await?[0] + .clone() + } + }; + blocks.push(block); + } + + Ok(AddressLotCreateResult { lot: db_lot, blocks }) }) .await .map_err(|e| { @@ -87,7 +145,7 @@ impl DataStore { e, ErrorHandler::Conflict( ResourceType::AddressLot, - ¶ms.identity.name.as_str(), + params.identity.name.as_str(), ), ) }) @@ -225,6 +283,35 @@ impl DataStore { Ok(address_lot_id) } + + // Take the name of an address lot and look up its blocks + pub async fn address_lot_blocks_by_name( + &self, + opctx: &OpContext, + name: String, + ) -> LookupResult> { + let conn = self.pool_connection_authorized(opctx).await?; + + use db::schema::address_lot::dsl as lot_dsl; + use db::schema::address_lot_block::dsl as block_dsl; + + let address_lot_id = lot_dsl::address_lot + .filter(lot_dsl::name.eq(name)) + .select(lot_dsl::id) + .limit(1) + .first_async::(&*conn) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + + let blocks = block_dsl::address_lot_block + .filter(block_dsl::address_lot_id.eq(address_lot_id)) + .select(AddressLotBlock::as_select()) + .load_async::(&*conn) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + + Ok(blocks) + } } #[derive(Debug)] diff --git a/nexus/db-queries/src/db/datastore/bgp.rs b/nexus/db-queries/src/db/datastore/bgp.rs index e5ac35d19a..eb8285e4dc 100644 --- a/nexus/db-queries/src/db/datastore/bgp.rs +++ b/nexus/db-queries/src/db/datastore/bgp.rs @@ -14,6 +14,7 @@ use crate::transaction_retry::OptionalError; use async_bb8_diesel::AsyncRunQueryDsl; use chrono::Utc; use diesel::{ExpressionMethods, QueryDsl, SelectableHelper}; +use ipnetwork::IpNetwork; use nexus_types::external_api::params; use nexus_types::identity::Resource; use omicron_common::api::external::http_pagination::PaginatedBy; @@ -34,10 +35,13 @@ impl DataStore { use db::schema::{ bgp_announce_set, bgp_announce_set::dsl as announce_set_dsl, }; + use diesel::sql_types; + use diesel::IntoSql; + let conn = self.pool_connection_authorized(opctx).await?; self.transaction_retry_wrapper("bgp_config_set") .transaction(&conn, |conn| async move { - let id: Uuid = match &config.bgp_announce_set_id { + let announce_set_id: Uuid = match &config.bgp_announce_set_id { NameOrId::Name(name) => { announce_set_dsl::bgp_announce_set .filter(bgp_announce_set::time_deleted.is_null()) @@ -50,14 +54,58 @@ impl DataStore { NameOrId::Id(id) => *id, }; - let config = BgpConfig::from_config_create(config, id); - - let result = diesel::insert_into(dsl::bgp_config) - .values(config.clone()) - .returning(BgpConfig::as_returning()) - .get_result_async(&conn) + let config = + BgpConfig::from_config_create(config, announce_set_id); + + let matching_entry_subquery = dsl::bgp_config + .filter(dsl::name.eq(Name::from(config.name().clone()))) + .filter(dsl::time_deleted.is_null()) + .select(dsl::name); + + // SELECT exactly the values we're trying to INSERT, but only + // if it does not already exist. + let new_entry_subquery = diesel::dsl::select(( + config.id().into_sql::(), + config.name().to_string().into_sql::(), + config + .description() + .to_string() + .into_sql::(), + config.asn.into_sql::(), + config.bgp_announce_set_id.into_sql::(), + config + .vrf + .clone() + .into_sql::>(), + Utc::now().into_sql::(), + Utc::now().into_sql::(), + )) + .filter(diesel::dsl::not(diesel::dsl::exists( + matching_entry_subquery, + ))); + + diesel::insert_into(dsl::bgp_config) + .values(new_entry_subquery) + .into_columns(( + dsl::id, + dsl::name, + dsl::description, + dsl::asn, + dsl::bgp_announce_set_id, + dsl::vrf, + dsl::time_created, + dsl::time_modified, + )) + .execute_async(&conn) .await?; - Ok(result) + + dsl::bgp_config + .filter(dsl::name.eq(Name::from(config.name().clone()))) + .filter(dsl::time_deleted.is_null()) + .select(BgpConfig::as_select()) + .limit(1) + .first_async(&conn) + .await }) .await .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) @@ -277,12 +325,29 @@ impl DataStore { .transaction(&conn, |conn| async move { let bas: BgpAnnounceSet = announce.clone().into(); - let db_as: BgpAnnounceSet = - diesel::insert_into(announce_set_dsl::bgp_announce_set) - .values(bas.clone()) - .returning(BgpAnnounceSet::as_returning()) - .get_result_async::(&conn) - .await?; + let found_as: Option = + announce_set_dsl::bgp_announce_set + .filter( + announce_set_dsl::name + .eq(Name::from(bas.name().clone())), + ) + .filter(announce_set_dsl::time_deleted.is_null()) + .select(BgpAnnounceSet::as_select()) + .limit(1) + .first_async(&conn) + .await + .ok(); + + let db_as = match found_as { + Some(v) => v, + None => { + diesel::insert_into(announce_set_dsl::bgp_announce_set) + .values(bas.clone()) + .returning(BgpAnnounceSet::as_returning()) + .get_result_async::(&conn) + .await? + } + }; let mut db_annoucements = Vec::new(); for a in &announce.announcement { @@ -291,13 +356,36 @@ impl DataStore { address_lot_block_id: bas.identity.id, network: a.network.into(), }; - let an = diesel::insert_into( - bgp_announcement_dsl::bgp_announcement, - ) - .values(an.clone()) - .returning(BgpAnnouncement::as_returning()) - .get_result_async::(&conn) - .await?; + + let found_an: Option = + bgp_announcement_dsl::bgp_announcement + .filter( + bgp_announcement_dsl::announce_set_id + .eq(db_as.id()), + ) + .filter( + bgp_announcement_dsl::network + .eq(IpNetwork::from(a.network)), + ) + .select(BgpAnnouncement::as_select()) + .limit(1) + .first_async(&conn) + .await + .ok(); + + let an = match found_an { + Some(v) => v, + None => { + diesel::insert_into( + bgp_announcement_dsl::bgp_announcement, + ) + .values(an.clone()) + .returning(BgpAnnouncement::as_returning()) + .get_result_async::(&conn) + .await? + } + }; + db_annoucements.push(an); } diff --git a/nexus/db-queries/src/db/datastore/bootstore.rs b/nexus/db-queries/src/db/datastore/bootstore.rs index 44f7a2036e..9384a1e13f 100644 --- a/nexus/db-queries/src/db/datastore/bootstore.rs +++ b/nexus/db-queries/src/db/datastore/bootstore.rs @@ -3,10 +3,11 @@ use crate::context::OpContext; use crate::db; use crate::db::error::{public_error_from_diesel, ErrorHandler}; use async_bb8_diesel::AsyncRunQueryDsl; +use diesel::prelude::*; use diesel::ExpressionMethods; use diesel::SelectableHelper; -use nexus_db_model::BootstoreKeys; -use omicron_common::api::external::LookupResult; +use nexus_db_model::{BootstoreConfig, BootstoreKeys}; +use omicron_common::api::external::{CreateResult, LookupResult}; impl DataStore { pub async fn bump_bootstore_generation( @@ -34,4 +35,49 @@ impl DataStore { Ok(bks.generation) } + + pub async fn ensure_bootstore_config( + &self, + opctx: &OpContext, + config: BootstoreConfig, + ) -> CreateResult<()> { + use db::schema::bootstore_config::dsl; + + let conn = self.pool_connection_authorized(opctx).await?; + + diesel::insert_into(dsl::bootstore_config) + .values(config) + .on_conflict((dsl::key, dsl::generation)) + .do_nothing() + .execute_async(&*conn) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + + Ok(()) + } + + pub async fn get_latest_bootstore_config( + &self, + opctx: &OpContext, + key: String, + ) -> LookupResult> { + use db::schema::bootstore_config::dsl; + + let conn = self.pool_connection_authorized(opctx).await?; + + let result = dsl::bootstore_config + .filter(dsl::key.eq(key)) + .select(BootstoreConfig::as_select()) + .order(dsl::generation.desc()) + .limit(1) + .load_async(&*conn) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + + if let Some(nat_entry) = result.first() { + Ok(Some(nat_entry.clone())) + } else { + Ok(None) + } + } } diff --git a/nexus/db-queries/src/db/datastore/rack.rs b/nexus/db-queries/src/db/datastore/rack.rs index e290343978..c86c002c33 100644 --- a/nexus/db-queries/src/db/datastore/rack.rs +++ b/nexus/db-queries/src/db/datastore/rack.rs @@ -185,6 +185,21 @@ impl DataStore { .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) } + pub async fn rack_list_initialized( + &self, + opctx: &OpContext, + pagparams: &DataPageParams<'_, Uuid>, + ) -> ListResultVec { + opctx.authorize(authz::Action::Read, &authz::FLEET).await?; + use db::schema::rack::dsl; + paginated(dsl::rack, dsl::id, pagparams) + .select(Rack::as_select()) + .filter(dsl::initialized.eq(true)) + .load_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + } + /// Stores a new rack in the database. /// /// This function is a no-op if the rack already exists. diff --git a/nexus/examples/config.toml b/nexus/examples/config.toml index ac9d894050..2e946a9c38 100644 --- a/nexus/examples/config.toml +++ b/nexus/examples/config.toml @@ -5,7 +5,7 @@ [console] # Directory for static assets. Absolute path or relative to CWD. static_dir = "out/console-assets" -session_idle_timeout_minutes = 480 # 6 hours +session_idle_timeout_minutes = 480 # 6 hours session_absolute_timeout_minutes = 1440 # 24 hours # List of authentication schemes to support. @@ -35,7 +35,7 @@ rack_id = "c19a698f-c6f9-4a17-ae30-20d711b8f7dc" # Nexus may need to resolve external hosts (e.g. to grab IdP metadata). # These are the DNS servers it should use. -external_dns_servers = [ "1.1.1.1", "9.9.9.9" ] +external_dns_servers = ["1.1.1.1", "9.9.9.9"] [deployment.dropshot_external] # IP Address and TCP port on which to listen for the external API @@ -109,6 +109,7 @@ phantom_disks.period_secs = 30 blueprints.period_secs_load = 10 blueprints.period_secs_execute = 60 sync_service_zone_nat.period_secs = 30 +switch_port_settings_manager.period_secs = 30 region_replacement.period_secs = 30 [default_region_allocation_strategy] diff --git a/nexus/src/app/background/bfd.rs b/nexus/src/app/background/bfd.rs index a586db54e4..ca23fb7f65 100644 --- a/nexus/src/app/background/bfd.rs +++ b/nexus/src/app/background/bfd.rs @@ -5,16 +5,21 @@ //! Background task for managing switch bidirectional forwarding detection //! (BFD) sessions. +use crate::app::{ + background::networking::build_mgd_clients, map_switch_zone_addrs, +}; + use super::common::BackgroundTask; use futures::future::BoxFuture; use futures::FutureExt; +use internal_dns::{resolver::Resolver, ServiceName}; use mg_admin_client::types::{BfdPeerConfig, SessionMode}; use nexus_db_model::{BfdMode, BfdSession}; use nexus_db_queries::{context::OpContext, db::DataStore}; use omicron_common::api::external::{DataPageParams, SwitchLocation}; use serde_json::json; use std::{ - collections::{HashMap, HashSet}, + collections::HashSet, hash::Hash, net::{IpAddr, Ipv4Addr}, sync::Arc, @@ -22,15 +27,12 @@ use std::{ pub struct BfdManager { datastore: Arc, - mgd_clients: HashMap>, + resolver: Resolver, } impl BfdManager { - pub fn new( - datastore: Arc, - mgd_clients: HashMap>, - ) -> Self { - Self { datastore, mgd_clients } + pub fn new(datastore: Arc, resolver: Resolver) -> Self { + Self { datastore, resolver } } } @@ -114,7 +116,30 @@ impl BackgroundTask for BfdManager { let mut current: HashSet = HashSet::new(); - for (location, c) in &self.mgd_clients { + let switch_zone_addresses = match self + .resolver + .lookup_all_ipv6(ServiceName::Dendrite) + .await + { + Ok(addrs) => addrs, + Err(e) => { + error!(log, "failed to resolve addresses for Dendrite services"; "error" => %e); + return json!({ + "error": + format!( + "failed to resolve addresses for Dendrite services: {:#}", + e + ) + }); + }, + }; + + let mappings = + map_switch_zone_addrs(log, switch_zone_addresses).await; + + let mgd_clients = build_mgd_clients(mappings, log); + + for (location, c) in &mgd_clients { let client_current = match c.inner.get_bfd_peers().await { Ok(x) => x.into_inner(), Err(e) => { @@ -159,7 +184,7 @@ impl BackgroundTask for BfdManager { } for x in &to_add { - let mg = match self.mgd_clients.get(&x.switch) { + let mg = match mgd_clients.get(&x.switch) { Some(mg) => mg, None => { error!(&log, "failed to get mg client"; @@ -190,7 +215,7 @@ impl BackgroundTask for BfdManager { } for x in &to_del { - let mg = match self.mgd_clients.get(&x.switch) { + let mg = match mgd_clients.get(&x.switch) { Some(mg) => mg, None => { error!(&log, "failed to get mg client"; diff --git a/nexus/src/app/background/init.rs b/nexus/src/app/background/init.rs index a213f7da72..e3f2154046 100644 --- a/nexus/src/app/background/init.rs +++ b/nexus/src/app/background/init.rs @@ -17,15 +17,14 @@ use super::nat_cleanup; use super::phantom_disks; use super::region_replacement; use super::sync_service_zone_nat::ServiceZoneNatTracker; +use super::sync_switch_configuration::SwitchPortSettingsManager; use crate::app::sagas::SagaRequest; use nexus_config::BackgroundTaskConfig; use nexus_config::DnsTasksConfig; use nexus_db_model::DnsGroup; use nexus_db_queries::context::OpContext; use nexus_db_queries::db::DataStore; -use omicron_common::api::internal::shared::SwitchLocation; use std::collections::BTreeMap; -use std::collections::HashMap; use std::sync::Arc; use tokio::sync::mpsc::Sender; use uuid::Uuid; @@ -76,6 +75,9 @@ pub struct BackgroundTasks { /// task handle for the service zone nat tracker pub task_service_zone_nat_tracker: common::TaskHandle, + /// task handle for the switch port settings manager + pub task_switch_port_settings_manager: common::TaskHandle, + /// task handle for the task that detects if regions need replacement and /// begins the process pub task_region_replacement: common::TaskHandle, @@ -88,8 +90,6 @@ impl BackgroundTasks { opctx: &OpContext, datastore: Arc, config: &BackgroundTaskConfig, - dpd_clients: &HashMap>, - mgd_clients: &HashMap>, nexus_id: Uuid, resolver: internal_dns::resolver::Resolver, saga_request: Sender, @@ -134,8 +134,6 @@ impl BackgroundTasks { (task, watcher_channel) }; - let dpd_clients: Vec<_> = dpd_clients.values().cloned().collect(); - let nat_cleanup = { driver.register( "nat_v4_garbage_collector".to_string(), @@ -146,7 +144,7 @@ impl BackgroundTasks { config.nat_cleanup.period_secs, Box::new(nat_cleanup::Ipv4NatGarbageCollector::new( datastore.clone(), - dpd_clients.clone(), + resolver.clone() )), opctx.child(BTreeMap::new()), vec![], @@ -163,7 +161,7 @@ impl BackgroundTasks { config.bfd_manager.period_secs, Box::new(bfd::BfdManager::new( datastore.clone(), - mgd_clients.clone(), + resolver.clone(), )), opctx.child(BTreeMap::new()), vec![], @@ -227,7 +225,7 @@ impl BackgroundTasks { let task_inventory_collection = { let collector = inventory_collection::InventoryCollector::new( datastore.clone(), - resolver, + resolver.clone(), &nexus_id.to_string(), config.inventory.nkeep, config.inventory.disable, @@ -256,7 +254,21 @@ impl BackgroundTasks { config.sync_service_zone_nat.period_secs, Box::new(ServiceZoneNatTracker::new( datastore.clone(), - dpd_clients.clone(), + resolver.clone(), + )), + opctx.child(BTreeMap::new()), + vec![], + ) + }; + + let task_switch_port_settings_manager = { + driver.register( + "switch_port_config_manager".to_string(), + String::from("manages switch port settings for rack switches"), + config.switch_port_settings_manager.period_secs, + Box::new(SwitchPortSettingsManager::new( + datastore.clone(), + resolver.clone(), )), opctx.child(BTreeMap::new()), vec![], @@ -298,6 +310,7 @@ impl BackgroundTasks { task_blueprint_loader, task_blueprint_executor, task_service_zone_nat_tracker, + task_switch_port_settings_manager, task_region_replacement, } } diff --git a/nexus/src/app/background/mod.rs b/nexus/src/app/background/mod.rs index 0065a41a9e..9867f1dc6d 100644 --- a/nexus/src/app/background/mod.rs +++ b/nexus/src/app/background/mod.rs @@ -15,9 +15,11 @@ mod external_endpoints; mod init; mod inventory_collection; mod nat_cleanup; +mod networking; mod phantom_disks; mod region_replacement; mod status; mod sync_service_zone_nat; +mod sync_switch_configuration; pub use init::BackgroundTasks; diff --git a/nexus/src/app/background/nat_cleanup.rs b/nexus/src/app/background/nat_cleanup.rs index 5014dc0553..16b1b7e357 100644 --- a/nexus/src/app/background/nat_cleanup.rs +++ b/nexus/src/app/background/nat_cleanup.rs @@ -6,10 +6,15 @@ //! Responsible for cleaning up soft deleted entries once they //! have been propagated to running dpd instances. +use crate::app::map_switch_zone_addrs; + use super::common::BackgroundTask; +use super::networking::build_dpd_clients; use chrono::{Duration, Utc}; use futures::future::BoxFuture; use futures::FutureExt; +use internal_dns::resolver::Resolver; +use internal_dns::ServiceName; use nexus_db_queries::context::OpContext; use nexus_db_queries::db::DataStore; use serde_json::json; @@ -19,15 +24,15 @@ use std::sync::Arc; /// from ipv4_nat_entry table pub struct Ipv4NatGarbageCollector { datastore: Arc, - dpd_clients: Vec>, + resolver: Resolver, } impl Ipv4NatGarbageCollector { pub fn new( datastore: Arc, - dpd_clients: Vec>, + resolver: Resolver, ) -> Ipv4NatGarbageCollector { - Ipv4NatGarbageCollector { datastore, dpd_clients } + Ipv4NatGarbageCollector { datastore, resolver } } } @@ -60,7 +65,30 @@ impl BackgroundTask for Ipv4NatGarbageCollector { } }; - for client in &self.dpd_clients { + let switch_zone_addresses = match self + .resolver + .lookup_all_ipv6(ServiceName::Dendrite) + .await + { + Ok(addrs) => addrs, + Err(e) => { + error!(log, "failed to resolve addresses for Dendrite services"; "error" => %e); + return json!({ + "error": + format!( + "failed to resolve addresses for Dendrite services: {:#}", + e + ) + }); + }, + }; + + let mappings = + map_switch_zone_addrs(log, switch_zone_addresses).await; + + let dpd_clients = build_dpd_clients(&mappings, log); + + for (_location, client) in dpd_clients { let response = client.ipv4_nat_generation().await; match response { Ok(gen) => min_gen = std::cmp::min(min_gen, *gen), diff --git a/nexus/src/app/background/networking.rs b/nexus/src/app/background/networking.rs new file mode 100644 index 0000000000..95e0cd32b3 --- /dev/null +++ b/nexus/src/app/background/networking.rs @@ -0,0 +1,113 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use db::datastore::SwitchPortSettingsCombinedResult; +use dpd_client::types::{ + LinkCreate, LinkId, LinkSettings, PortFec, PortSettings, PortSpeed, +}; +use nexus_db_model::{SwitchLinkFec, SwitchLinkSpeed}; +use nexus_db_queries::db; +use omicron_common::address::DENDRITE_PORT; +use omicron_common::{address::MGD_PORT, api::external::SwitchLocation}; +use std::{collections::HashMap, net::SocketAddrV6}; + +pub(crate) fn build_mgd_clients( + mappings: HashMap, + log: &slog::Logger, +) -> HashMap { + let mut clients: Vec<(SwitchLocation, mg_admin_client::Client)> = vec![]; + for (location, addr) in &mappings { + let port = MGD_PORT; + let socketaddr = + std::net::SocketAddr::V6(SocketAddrV6::new(*addr, port, 0, 0)); + let client = + match mg_admin_client::Client::new(&log.clone(), socketaddr) { + Ok(client) => client, + Err(e) => { + error!( + log, + "error building mgd client"; + "location" => %location, + "addr" => %addr, + "error" => %e, + ); + continue; + } + }; + clients.push((*location, client)); + } + clients.into_iter().collect::>() +} + +pub(crate) fn build_dpd_clients( + mappings: &HashMap, + log: &slog::Logger, +) -> HashMap { + let dpd_clients: HashMap = mappings + .iter() + .map(|(location, addr)| { + let port = DENDRITE_PORT; + + let client_state = dpd_client::ClientState { + tag: String::from("nexus"), + log: log.new(o!( + "component" => "DpdClient" + )), + }; + + let dpd_client = dpd_client::Client::new( + &format!("http://[{addr}]:{port}"), + client_state, + ); + (*location, dpd_client) + }) + .collect(); + dpd_clients +} + +pub(crate) fn api_to_dpd_port_settings( + settings: &SwitchPortSettingsCombinedResult, +) -> Result { + let mut dpd_port_settings = PortSettings { links: HashMap::new() }; + + //TODO breakouts + let link_id = LinkId(0); + + for l in settings.links.iter() { + dpd_port_settings.links.insert( + link_id.to_string(), + LinkSettings { + params: LinkCreate { + autoneg: l.autoneg, + lane: Some(LinkId(0)), + kr: false, + fec: match l.fec { + SwitchLinkFec::Firecode => PortFec::Firecode, + SwitchLinkFec::Rs => PortFec::Rs, + SwitchLinkFec::None => PortFec::None, + }, + speed: match l.speed { + SwitchLinkSpeed::Speed0G => PortSpeed::Speed0G, + SwitchLinkSpeed::Speed1G => PortSpeed::Speed1G, + SwitchLinkSpeed::Speed10G => PortSpeed::Speed10G, + SwitchLinkSpeed::Speed25G => PortSpeed::Speed25G, + SwitchLinkSpeed::Speed40G => PortSpeed::Speed40G, + SwitchLinkSpeed::Speed50G => PortSpeed::Speed50G, + SwitchLinkSpeed::Speed100G => PortSpeed::Speed100G, + SwitchLinkSpeed::Speed200G => PortSpeed::Speed200G, + SwitchLinkSpeed::Speed400G => PortSpeed::Speed400G, + }, + }, + //TODO won't work for breakouts + addrs: settings + .addresses + .iter() + .map(|a| a.address.ip()) + .collect(), + }, + ); + } + + Ok(dpd_port_settings) +} diff --git a/nexus/src/app/background/sync_service_zone_nat.rs b/nexus/src/app/background/sync_service_zone_nat.rs index a3d52f4469..e23621ed23 100644 --- a/nexus/src/app/background/sync_service_zone_nat.rs +++ b/nexus/src/app/background/sync_service_zone_nat.rs @@ -5,10 +5,15 @@ //! Background task for detecting changes to service zone locations and //! updating the NAT rpw table accordingly +use crate::app::map_switch_zone_addrs; + use super::common::BackgroundTask; +use super::networking::build_dpd_clients; use anyhow::Context; use futures::future::BoxFuture; use futures::FutureExt; +use internal_dns::resolver::Resolver; +use internal_dns::ServiceName; use nexus_db_model::Ipv4NatValues; use nexus_db_queries::context::OpContext; use nexus_db_queries::db::lookup::LookupPath; @@ -36,15 +41,12 @@ const MIN_EXTERNAL_DNS_COUNT: usize = 1; /// persisted in the NAT RPW table pub struct ServiceZoneNatTracker { datastore: Arc, - dpd_clients: Vec>, + resolver: Resolver, } impl ServiceZoneNatTracker { - pub fn new( - datastore: Arc, - dpd_clients: Vec>, - ) -> Self { - Self { datastore, dpd_clients } + pub fn new(datastore: Arc, resolver: Resolver) -> Self { + Self { datastore, resolver } } } @@ -332,7 +334,31 @@ impl BackgroundTask for ServiceZoneNatTracker { // notify dpd if we've added any new records if result > 0 { - for client in &self.dpd_clients { + + let switch_zone_addresses = match self + .resolver + .lookup_all_ipv6(ServiceName::Dendrite) + .await + { + Ok(addrs) => addrs, + Err(e) => { + error!(log, "failed to resolve addresses for Dendrite services"; "error" => %e); + return json!({ + "error": + format!( + "failed to resolve addresses for Dendrite services: {:#}", + e + ) + }); + }, + }; + + let mappings = + map_switch_zone_addrs(log, switch_zone_addresses).await; + + let dpd_clients = build_dpd_clients(&mappings, log); + + for (_location, client) in dpd_clients { if let Err(e) = client.ipv4_nat_trigger_update().await { error!( &log, diff --git a/nexus/src/app/background/sync_switch_configuration.rs b/nexus/src/app/background/sync_switch_configuration.rs new file mode 100644 index 0000000000..4bc75bad10 --- /dev/null +++ b/nexus/src/app/background/sync_switch_configuration.rs @@ -0,0 +1,1418 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Background task for propagating user provided switch configurations +//! to relevant management daemons (dendrite, mgd, sled-agent, etc.) + +use crate::app::{ + background::networking::{ + api_to_dpd_port_settings, build_dpd_clients, build_mgd_clients, + }, + map_switch_zone_addrs, +}; + +use internal_dns::resolver::Resolver; +use internal_dns::ServiceName; +use ipnetwork::IpNetwork; +use nexus_db_model::{ + AddressLotBlock, BgpConfig, BootstoreConfig, LoopbackAddress, + SwitchLinkFec, SwitchLinkSpeed, SwitchPortBgpPeerConfig, INFRA_LOT, + NETWORK_KEY, +}; +use uuid::Uuid; + +use super::common::BackgroundTask; +use dpd_client::types::PortId; +use futures::future::BoxFuture; +use futures::FutureExt; +use mg_admin_client::types::{ + AddStaticRoute4Request, ApplyRequest, BgpPeerConfig, + DeleteStaticRoute4Request, Prefix4, StaticRoute4, StaticRoute4List, +}; +use nexus_db_queries::{ + context::OpContext, + db::{datastore::SwitchPortSettingsCombinedResult, DataStore}, +}; +use nexus_types::{external_api::params, identity::Resource}; +use omicron_common::OMICRON_DPD_TAG; +use omicron_common::{ + address::{get_sled_address, Ipv6Subnet}, + api::external::{DataPageParams, SwitchLocation}, +}; +use serde_json::json; +use sled_agent_client::types::{ + BgpConfig as SledBgpConfig, BgpPeerConfig as SledBgpPeerConfig, + EarlyNetworkConfig, EarlyNetworkConfigBody, HostPortConfig, Ipv4Network, + PortConfigV1, RackNetworkConfigV1, RouteConfig as SledRouteConfig, +}; +use std::{ + collections::{hash_map::Entry, HashMap, HashSet}, + net::{IpAddr, Ipv4Addr}, + str::FromStr, + sync::Arc, +}; + +const DPD_TAG: Option<&'static str> = Some(OMICRON_DPD_TAG); + +// This is more of an implementation detail of the BGP implementation. It +// defines the maximum time the peering engine will wait for external messages +// before breaking to check for shutdown conditions. +const BGP_SESSION_RESOLUTION: u64 = 100; + +pub struct SwitchPortSettingsManager { + datastore: Arc, + resolver: Resolver, +} + +impl SwitchPortSettingsManager { + pub fn new(datastore: Arc, resolver: Resolver) -> Self { + Self { datastore, resolver } + } + + async fn switch_ports<'a>( + &'a mut self, + opctx: &OpContext, + log: &slog::Logger, + ) -> Result, serde_json::Value> { + let port_list = match self + .datastore + .switch_port_list(opctx, &DataPageParams::max_page()) + .await + { + Ok(port_list) => port_list, + Err(e) => { + error!( + &log, + "failed to enumerate switch ports"; + "error" => format!("{:#}", e) + ); + return Err(json!({ + "error": + format!( + "failed enumerate switch ports: \ + {:#}", + e + ) + })); + } + }; + Ok(port_list) + } + + async fn changes<'a>( + &'a mut self, + port_list: Vec, + opctx: &OpContext, + log: &slog::Logger, + ) -> Result< + Vec<(SwitchLocation, nexus_db_model::SwitchPort, PortSettingsChange)>, + serde_json::Value, + > { + let mut changes = Vec::new(); + for port in port_list { + let location: SwitchLocation = + match port.switch_location.clone().parse() { + Ok(location) => location, + Err(e) => { + error!( + &log, + "failed to parse switch location"; + "switch_location" => ?port.switch_location, + "error" => ?e + ); + continue; + } + }; + + let id = match port.port_settings_id { + Some(id) => id, + _ => { + changes.push((location, port, PortSettingsChange::Clear)); + continue; + } + }; + + info!( + log, + "fetching switch port settings"; + "switch_location" => ?location, + "port" => ?port, + ); + + let settings = match self + .datastore + .switch_port_settings_get(opctx, &id.into()) + .await + { + Ok(settings) => settings, + Err(e) => { + error!( + &log, + "failed to get switch port settings"; + "switch_port_settings_id" => ?id, + "error" => format!("{:#}", e) + ); + return Err(json!({ + "error": + format!( + "failed to get switch port settings: \ + {:#}", + e + ) + })); + } + }; + + changes.push(( + location, + port, + PortSettingsChange::Apply(Box::new(settings)), + )); + } + Ok(changes) + } + + async fn db_loopback_addresses<'a>( + &'a mut self, + opctx: &OpContext, + log: &slog::Logger, + ) -> Result< + HashSet<(SwitchLocation, IpAddr)>, + omicron_common::api::external::Error, + > { + let values = self + .datastore + .loopback_address_list(opctx, &DataPageParams::max_page()) + .await?; + + let mut set: HashSet<(SwitchLocation, IpAddr)> = HashSet::new(); + + // TODO: are we doing anything special with anycast addresses at the moment? + for LoopbackAddress { switch_location, address, .. } in values.iter() { + let location: SwitchLocation = match switch_location.parse() { + Ok(v) => v, + Err(e) => { + error!( + log, + "failed to parse switch location for loopback address"; + "address" => %address, + "location" => switch_location, + "error" => ?e, + ); + continue; + } + }; + set.insert((location, address.ip())); + } + + Ok(set) + } +} + +enum PortSettingsChange { + Apply(Box), + Clear, +} + +impl BackgroundTask for SwitchPortSettingsManager { + fn activate<'a>( + &'a mut self, + opctx: &'a OpContext, + ) -> BoxFuture<'a, serde_json::Value> { + async move { + let log = &opctx.log; + + let racks = match self.datastore.rack_list_initialized(opctx, &DataPageParams::max_page()).await { + Ok(racks) => racks, + Err(e) => { + error!(log, "failed to retrieve racks from database"; "error" => ?e); + return json!({ + "error": + format!( + "failed to retrieve racks from database : \ + {:#}", + e + ) + }); + }, + }; + + // TODO: https://github.com/oxidecomputer/omicron/issues/3090 + // Here we're iterating over racks because that's technically the correct thing to do, + // but our logic for pulling switch ports and their related configurations + // *isn't* per-rack, so that's something we'll need to revisit in the future. + for rack in &racks { + + // lookup switch zones via DNS + // TODO https://github.com/oxidecomputer/omicron/issues/5201 + let switch_zone_addresses = match self + .resolver + .lookup_all_ipv6(ServiceName::Dendrite) + .await + { + Ok(addrs) => addrs, + Err(e) => { + error!(log, "failed to resolve addresses for Dendrite services"; "error" => %e); + continue; + }, + }; + + // TODO https://github.com/oxidecomputer/omicron/issues/5201 + let mappings = + map_switch_zone_addrs(log, switch_zone_addresses).await; + + // TODO https://github.com/oxidecomputer/omicron/issues/5201 + // build sled agent clients + let sled_agent_clients = build_sled_agent_clients(&mappings, log); + + // TODO https://github.com/oxidecomputer/omicron/issues/5201 + // build dpd clients + let dpd_clients = build_dpd_clients(&mappings, log); + + // TODO https://github.com/oxidecomputer/omicron/issues/5201 + // build mgd clients + let mgd_clients = build_mgd_clients(mappings, log); + + let port_list = match self.switch_ports(opctx, log).await { + Ok(value) => value, + Err(e) => { + error!(log, "failed to generate switchports for rack"; "error" => %e); + continue; + }, + }; + + // + // calculate and apply switch port changes + // + + let changes = match self.changes(port_list, opctx, log).await { + Ok(value) => value, + Err(e) => { + error!(log, "failed to generate changeset for switchport settings"; "error" => %e); + continue; + }, + }; + + apply_switch_port_changes(&dpd_clients, &changes, log).await; + + // + // calculate and apply routing changes + // + + // get the static routes on each switch + let current_static_routes = + static_routes_on_switch(&mgd_clients, log).await; + info!(&log, "retrieved existing routes"; "routes" => ?current_static_routes); + + // generate the complete set of static routes that should be on a given switch + let desired_static_routes = static_routes_in_db(&changes); + info!(&log, "retrieved desired routes"; "routes" => ?desired_static_routes); + + // diff the current and desired routes. + // Add what is missing from current, remove what is not present in desired. + let routes_to_add = static_routes_to_add( + &desired_static_routes, + ¤t_static_routes, + log, + ); + info!(&log, "calculated static routes to add"; "routes" => ?routes_to_add); + + let routes_to_del = static_routes_to_del( + current_static_routes, + desired_static_routes, + ); + info!(&log, "calculated static routes to delete"; "routes" => ?routes_to_del); + + // delete the unneeded routes first, just in case there is a conflicting route for + // one we need to add + info!(&log, "deleting static routes"; "routes" => ?routes_to_del); + delete_static_routes(&mgd_clients, routes_to_del, log).await; + + // add the new routes + info!(&log, "adding static routes"; "routes" => ?routes_to_add); + add_static_routes(&mgd_clients, routes_to_add, log).await; + + + // + // calculate and apply loopback address changes + // + + match self.db_loopback_addresses(opctx, log).await { + Ok(desired_loopback_addresses) => { + let current_loopback_addresses = switch_loopback_addresses(&dpd_clients, log).await; + + let loopbacks_to_add: Vec<(SwitchLocation, IpAddr)> = desired_loopback_addresses + .difference(¤t_loopback_addresses) + .map(|i| (i.0, i.1)) + .collect(); + let loopbacks_to_del: Vec<(SwitchLocation, IpAddr)> = current_loopback_addresses + .difference(&desired_loopback_addresses) + .map(|i| (i.0, i.1)) + .collect(); + + delete_loopback_addresses_from_switch(&loopbacks_to_del, &dpd_clients, log).await; + add_loopback_addresses_to_switch(&loopbacks_to_add, dpd_clients, log).await; + }, + Err(e) => { + error!( + log, + "error fetching loopback addresses from db, skipping loopback config"; + "error" => %e + ); + }, + }; + + // + // calculate and apply switch zone SMF changes + // + let uplinks = uplinks(&changes); + + // yeet the messages + for (location, config) in &uplinks { + let client: &sled_agent_client::Client = + match sled_agent_clients.get(location) { + Some(client) => client, + None => { + error!(log, "sled-agent client is missing, cannot send updates"; "location" => %location); + continue; + }, + }; + + info!( + &log, + "applying SMF config uplink updates to switch zone"; + "switch_location" => ?location, + "config" => ?config, + ); + if let Err(e) = client + .uplink_ensure(&sled_agent_client::types::SwitchPorts { + uplinks: config.clone(), + }) + .await + { + error!( + log, + "error while applying smf updates to switch zone"; + "location" => %location, + "error" => %e, + ); + } + } + + // + // calculate and apply BGP changes + // + + // build a list of desired settings for each switch + let mut desired_bgp_configs: HashMap< + SwitchLocation, + Vec, + > = HashMap::new(); + + // we currently only support one bgp config per switch + let mut switch_bgp_config: HashMap = HashMap::new(); + + // Prefixes are associated to BgpConfig via the config id + let mut bgp_announce_prefixes: HashMap> = HashMap::new(); + + let mut bootstore_bgp_peer_info: Vec<(SwitchPortBgpPeerConfig, u32, Ipv4Addr)> = vec![]; + + for (location, port, change) in &changes { + let PortSettingsChange::Apply(settings) = change else { + continue; + }; + + // desired peer configurations for a given switch port + let mut peers: HashMap> = HashMap::new(); + + for peer in &settings.bgp_peers { + let bgp_config_id = peer.bgp_config_id; + + // since we only have one bgp config per switch, we only need to fetch it once + let bgp_config = match switch_bgp_config.entry(*location) { + Entry::Occupied(occupied_entry) => { + let (existing_id, existing_config) = occupied_entry.get().clone(); + // verify peers don't have differing configs + if existing_id != bgp_config_id { + // should we flag the switch and not do *any* updates to it? + // with the logic as-is, it will skip the config for this port and move on + error!( + log, + "peers do not have matching asn (only one asn allowed per switch)"; + "switch" => ?location, + "first_config_id" => ?existing_id, + "second_config_id" => ?bgp_config_id, + ); + break; + } + existing_config + }, + Entry::Vacant(vacant_entry) => { + // get the bgp config for this peer + let config = match self + .datastore + .bgp_config_get(opctx, &bgp_config_id.into()) + .await + { + Ok(config) => config, + Err(e) => { + error!( + log, + "error while fetching bgp peer config from db"; + "location" => %location, + "port_name" => %port.port_name, + "error" => %e, + ); + continue; + }, + }; + vacant_entry.insert((bgp_config_id, config.clone())); + config + }, + }; + + // + // build a list of prefixes from the announcements in the bgp config + // + + // Same thing as above, check to see if we've already built the announce set, + // if so we'll skip this step + if bgp_announce_prefixes.get(&bgp_config.bgp_announce_set_id).is_none() { + let announcements = match self + .datastore + .bgp_announce_list( + opctx, + ¶ms::BgpAnnounceSetSelector { + name_or_id: bgp_config + .bgp_announce_set_id + .into(), + }, + ) + .await + { + Ok(a) => a, + Err(e) => { + error!( + log, + "error while fetching bgp announcements from db"; + "location" => %location, + "bgp_announce_set_id" => %bgp_config.bgp_announce_set_id, + "error" => %e, + ); + continue; + }, + }; + + let mut prefixes: Vec = vec![]; + + for announcement in &announcements { + let value = match announcement.network.ip() { + IpAddr::V4(value) => value, + IpAddr::V6(a) => { + error!(log, "bad request, only ipv4 supported at this time"; "requested_address" => ?a); + continue; + }, + }; + prefixes.push(Prefix4 { value, length: announcement.network.prefix() }); + } + bgp_announce_prefixes.insert(bgp_config.bgp_announce_set_id, prefixes); + } + + // now that the peer passes the above validations, add it to the list for configuration + let peer_config = BgpPeerConfig { + name: format!("{}", peer.addr.ip()), + host: format!("{}:179", peer.addr.ip()), + hold_time: peer.hold_time.0.into(), + idle_hold_time: peer.idle_hold_time.0.into(), + delay_open: peer.delay_open.0.into(), + connect_retry: peer.connect_retry.0.into(), + keepalive: peer.keepalive.0.into(), + resolution: BGP_SESSION_RESOLUTION, + passive: false, + }; + + // add it to data for the bootstore + // only ipv4 is supported now + match peer.addr { + ipnetwork::IpNetwork::V4(addr) => { + bootstore_bgp_peer_info.push((peer.clone(), bgp_config.asn.0, addr.ip())); + }, + ipnetwork::IpNetwork::V6(_) => continue, //TODO v6 + }; + + // update the stored vec if it exists, create a new on if it doesn't exist + match peers.entry(port.port_name.clone()) { + Entry::Occupied(mut occupied_entry) => { + occupied_entry.get_mut().push(peer_config); + }, + Entry::Vacant(vacant_entry) => { + vacant_entry.insert(vec![peer_config]); + }, + } + } + + let (config_id, request_bgp_config) = match switch_bgp_config.get(location) { + Some(config) => config, + None => { + info!(log, "no bgp config found for switch, skipping."; "switch" => ?location); + continue; + }, + }; + + let request_prefixes = match bgp_announce_prefixes.get(&request_bgp_config.bgp_announce_set_id) { + Some(prefixes) => prefixes, + None => { + error!( + log, + "no prefixes to announce found for bgp config"; + "switch" => ?location, + "announce_set_id" => ?request_bgp_config.bgp_announce_set_id, + "bgp_config_id" => ?config_id, + ); + continue; + }, + }; + + let request = ApplyRequest { + asn: *request_bgp_config.asn, + peers, + originate: request_prefixes.clone(), + }; + + match desired_bgp_configs.entry(*location) { + Entry::Occupied(mut occupied_entry) => { + occupied_entry.get_mut().push(request); + } + Entry::Vacant(vacant_entry) => { + vacant_entry.insert(vec![request]); + } + } + } + + for (location, configs) in &desired_bgp_configs { + let client = match mgd_clients.get(location) { + Some(client) => client, + None => { + error!(log, "no mgd client found for switch"; "switch_location" => ?location); + continue; + }, + }; + for config in configs { + info!( + &log, + "applying bgp config"; + "switch_location" => ?location, + "config" => ?config, + ); + if let Err(e) = client.inner.bgp_apply(config).await { + error!(log, "error while applying bgp configuration"; "error" => ?e); + } + } + } + + // + // calculate and apply bootstore changes + // + + // TODO: #5232 Make ntp servers w/ generation tracking first-class citizens in the db + // We're using the latest bootstore config from the sled agents to get the ntp + // servers. We should instead be pulling this information from the db. However, it + // seems that we're currently not storing the ntp servers in the db as a first-class + // citizen, so we'll need to add that first. + + // find the active sled-agent bootstore config with the highest generation + let mut latest_sled_agent_bootstore_config: Option = None; + + // Since we update the first scrimlet we can reach (we failover to the second one + // if updating the first one fails) we need to check them both. + for (_location, client) in &sled_agent_clients { + let scrimlet_cfg = match client.read_network_bootstore_config_cache().await { + Ok(config) => config, + Err(e) => { + error!(log, "unable to read bootstore config from scrimlet"; "error" => ?e); + continue; + } + }; + if let Some(other_config) = latest_sled_agent_bootstore_config.as_mut() { + if other_config.generation < scrimlet_cfg.generation { + *other_config = scrimlet_cfg.clone(); + } + } else { + latest_sled_agent_bootstore_config = Some(scrimlet_cfg.clone()); + } + } + + // TODO: this will also be removed once the above is resolved + // Move on to the next rack if neither scrimlet is reachable. + // if both scrimlets are unreachable we probably have bigger problems on this rack + let ntp_servers = match latest_sled_agent_bootstore_config { + Some(config) => { + config.body.ntp_servers.clone() + }, + None => { + error!(log, "both scrimlets are unreachable, cannot update bootstore"); + continue; + } + }; + + // build the desired bootstore config from the records we've fetched + let subnet = match rack.rack_subnet { + Some(IpNetwork::V6(subnet)) => subnet, + Some(IpNetwork::V4(_)) => { + error!(log, "rack subnet must be ipv6"; "rack" => ?rack); + continue; + }, + None => { + error!(log, "rack subnet not set"; "rack" => ?rack); + continue; + } + }; + + // TODO: @rcgoodfellow is this correct? Do we place the BgpConfig for both switches in a single Vec to send to the bootstore? + let bgp: Vec = switch_bgp_config.iter().map(|(_location, (_id, config))| { + let announcements: Vec = bgp_announce_prefixes + .get(&config.bgp_announce_set_id) + .expect("bgp config is present but announce set is not populated") + .iter() + .map(|prefix| { + ipnetwork::Ipv4Network::new(prefix.value, prefix.length) + .expect("Prefix4 and Ipv4Network's value types have diverged") + .into() + }).collect(); + + SledBgpConfig { + asn: config.asn.0, + originate: announcements, + } + }).collect(); + + let mut ports: Vec = vec![]; + + for (location, port, change) in &changes { + let PortSettingsChange::Apply(info) = change else { + continue; + }; + + let port_config = PortConfigV1 { + addresses: info.addresses.iter().map(|a| a.address).collect(), + autoneg: info + .links + .get(0) //TODO breakout support + .map(|l| l.autoneg) + .unwrap_or(false), + bgp_peers: bootstore_bgp_peer_info + .iter() + .map(|(p, asn, addr)| SledBgpPeerConfig { + addr: *addr, + asn: *asn, + port: port.port_name.clone(), + hold_time: Some(p.hold_time.0.into()), + connect_retry: Some(p.connect_retry.0.into()), + delay_open: Some(p.delay_open.0.into()), + idle_hold_time: Some(p.idle_hold_time.0.into()), + keepalive: Some(p.keepalive.0.into()), + }) + .collect(), + port: port.port_name.clone(), + routes: info + .routes + .iter() + .map(|r| SledRouteConfig { + destination: r.dst, + nexthop: r.gw.ip(), + }) + .collect(), + switch: *location, + uplink_port_fec: info + .links + .get(0) //TODO https://github.com/oxidecomputer/omicron/issues/3062 + .map(|l| l.fec) + .unwrap_or(SwitchLinkFec::None) + .into(), + uplink_port_speed: info + .links + .get(0) //TODO https://github.com/oxidecomputer/omicron/issues/3062 + .map(|l| l.speed) + .unwrap_or(SwitchLinkSpeed::Speed100G) + .into(), + }; + ports.push(port_config); + } + + let blocks = match self.datastore.address_lot_blocks_by_name(opctx, INFRA_LOT.into()).await { + Ok(blocks) => blocks, + Err(e) => { + error!(log, "error while fetching address lot blocks from db"; "error" => %e); + continue; + }, + }; + + // currently there should only be one block assigned. If there is more than one + // block, grab the first one and emit a warning. + if blocks.len() > 1 { + warn!(log, "more than one block assigned to infra lot"; "blocks" => ?blocks); + } + + let (infra_ip_first, infra_ip_last)= match blocks.get(0) { + Some(AddressLotBlock{ first_address, last_address, ..}) => { + match (first_address, last_address) { + (IpNetwork::V4(first), IpNetwork::V4(last)) => (first.ip(), last.ip()), + _ => { + error!(log, "infra lot block must be ipv4"; "block" => ?blocks.get(0)); + continue; + }, + } + }, + None => { + error!(log, "no blocks assigned to infra lot"); + continue; + }, + }; + + let mut desired_config = EarlyNetworkConfig { + generation: 0, + schema_version: 1, + body: EarlyNetworkConfigBody { + ntp_servers, + rack_network_config: Some(RackNetworkConfigV1 { + rack_subnet: subnet, + infra_ip_first, + infra_ip_last, + ports, + bgp, + }), + }, + }; + + // should_update is a boolean value that determines whether or not we need to + // increment the bootstore version and push a new config to the sled agents. + // + // * If the config we've built from the switchport configuration information is + // different from the last config we've cached in the db, we update the config, + // cache it in the db, and apply it. + // * If the last cached config cannot be succesfully deserialized into our current + // bootstore format, we assume that it is an older format and update the config, + // cache it in the db, and apply it. + // * If there is no last cached config, we assume that this is the first time this + // rpw has run for the given rack, so we update the config, cache it in the db, + // and apply it. + // * If we cannot fetch the latest version due to a db error, something is broken + // so we don't do anything. + let bootstore_needs_update = match self.datastore.get_latest_bootstore_config(opctx, NETWORK_KEY.into()).await { + Ok(Some(BootstoreConfig { data, .. })) => { + match serde_json::from_value::(data.clone()) { + Ok(config) => { + if config.body.ntp_servers != desired_config.body.ntp_servers { + info!( + log, + "ntp servers have changed"; + "old" => ?config.body.ntp_servers, + "new" => ?desired_config.body.ntp_servers, + ); + true + } else if config.body.rack_network_config != desired_config.body.rack_network_config { + info!( + log, + "rack network config has changed"; + "old" => ?config.body.rack_network_config, + "new" => ?desired_config.body.rack_network_config, + ); + true + } else { + false + } + }, + Err(e) => { + error!( + log, + "bootstore config does not deserialized to current EarlyNetworkConfig format"; + "key" => %NETWORK_KEY, + "value" => %data, + "error" => %e, + ); + true + }, + } + }, + Ok(None) => { + warn!( + log, + "no bootstore config found in db"; + "key" => %NETWORK_KEY, + ); + true + }, + Err(e) => { + error!( + log, + "error while fetching last applied bootstore config"; + "key" => %NETWORK_KEY, + "error" => %e, + ); + continue; + }, + }; + + if bootstore_needs_update { + let generation = match self.datastore + .bump_bootstore_generation(opctx, NETWORK_KEY.into()) + .await { + Ok(value) => value, + Err(e) => { + error!( + log, + "error while fetching next bootstore generation from db"; + "key" => %NETWORK_KEY, + "error" => %e, + ); + continue; + }, + }; + + desired_config.generation = generation as u64; + info!( + &log, + "updating bootstore config"; + "config" => ?desired_config, + ); + + // spush the updates to both scrimlets + // if both scrimlets are down, bootstore updates aren't happening anyway + let mut one_succeeded = false; + for (location, client) in &sled_agent_clients { + if let Err(e) = client.write_network_bootstore_config(&desired_config).await { + error!( + log, + "error updating bootstore"; + "location" => %location, + "config" => ?desired_config, + "error" => %e, + ) + } else { + one_succeeded = true; + } + } + + // if at least one succeeded, record this update in the db + if one_succeeded { + let config = BootstoreConfig { + key: NETWORK_KEY.into(), + generation: desired_config.generation as i64, + data: serde_json::to_value(&desired_config).unwrap(), + time_created: chrono::Utc::now(), + time_deleted: None, + }; + if let Err(e) = self.datastore.ensure_bootstore_config(opctx, config.clone()).await { + // if this fails, worst case scenario is that we will send the bootstore + // information it already has on the next run + error!( + log, + "error while caching bootstore config in db"; + "config" => ?config, + "error" => %e, + ); + } + } + } + } + json!({}) + } + .boxed() + } +} + +async fn add_loopback_addresses_to_switch( + loopbacks_to_add: &[(SwitchLocation, IpAddr)], + dpd_clients: HashMap, + log: &slog::Logger, +) { + for (location, address) in loopbacks_to_add { + let client = match dpd_clients.get(location) { + Some(v) => v, + None => { + error!(log, "dpd_client is missing, cannot create loopback addresses"; "location" => %location); + continue; + } + }; + + if let Err(e) = + client.ensure_loopback_created(log, *address, OMICRON_DPD_TAG).await + { + error!(log, "error while creating loopback address"; "error" => %e); + }; + } +} + +async fn delete_loopback_addresses_from_switch( + loopbacks_to_del: &[(SwitchLocation, IpAddr)], + dpd_clients: &HashMap, + log: &slog::Logger, +) { + for (location, address) in loopbacks_to_del { + let client = match dpd_clients.get(location) { + Some(v) => v, + None => { + error!(log, "dpd_client is missing, cannot delete loopback addresses"; "location" => %location); + continue; + } + }; + + if let Err(e) = client.ensure_loopback_deleted(log, *address).await { + error!(log, "error while deleting loopback address"; "error" => %e); + }; + } +} + +async fn switch_loopback_addresses( + dpd_clients: &HashMap, + log: &slog::Logger, +) -> HashSet<(SwitchLocation, IpAddr)> { + let mut current_loopback_addresses: HashSet<(SwitchLocation, IpAddr)> = + HashSet::new(); + + for (location, client) in dpd_clients { + let ipv4_loopbacks = match client.loopback_ipv4_list().await { + Ok(v) => v, + Err(e) => { + error!( + log, + "error fetching ipv4 loopback addresses from switch"; + "location" => %location, + "error" => %e, + ); + continue; + } + }; + + let ipv6_loopbacks = match client.loopback_ipv6_list().await { + Ok(v) => v, + Err(e) => { + error!( + log, + "error fetching ipv6 loopback addresses from switch"; + "location" => %location, + "error" => %e, + ); + continue; + } + }; + + for entry in ipv4_loopbacks.iter() { + current_loopback_addresses + .insert((*location, IpAddr::V4(entry.addr))); + } + + for entry in ipv6_loopbacks.iter() { + current_loopback_addresses + .insert((*location, IpAddr::V6(entry.addr))); + } + } + current_loopback_addresses +} + +fn uplinks( + changes: &[( + SwitchLocation, + nexus_db_model::SwitchPort, + PortSettingsChange, + )], +) -> HashMap> { + let mut uplinks: HashMap> = + HashMap::new(); + for (location, port, change) in changes { + let PortSettingsChange::Apply(config) = change else { + continue; + }; + let config = HostPortConfig { + port: port.port_name.clone(), + addrs: config.addresses.iter().map(|a| a.address).collect(), + }; + + match uplinks.entry(*location) { + Entry::Occupied(mut occupied_entry) => { + occupied_entry.get_mut().push(config); + } + Entry::Vacant(vacant_entry) => { + vacant_entry.insert(vec![config]); + } + } + } + uplinks +} + +fn build_sled_agent_clients( + mappings: &HashMap, + log: &slog::Logger, +) -> HashMap { + let sled_agent_clients: HashMap = + mappings + .iter() + .map(|(location, addr)| { + // build sled agent address from switch zone address + let addr = get_sled_address(Ipv6Subnet::new(*addr)); + let client = sled_agent_client::Client::new( + &format!("http://{}", addr), + log.clone(), + ); + (*location, client) + }) + .collect(); + sled_agent_clients +} + +fn static_routes_to_del( + current_static_routes: HashMap< + SwitchLocation, + HashSet<(Ipv4Addr, Prefix4)>, + >, + desired_static_routes: HashMap< + SwitchLocation, + HashSet<(Ipv4Addr, Prefix4)>, + >, +) -> HashMap { + let mut routes_to_del: HashMap = + HashMap::new(); + + // find routes to remove + for (switch_location, routes_on_switch) in ¤t_static_routes { + if let Some(routes_wanted) = desired_static_routes.get(switch_location) + { + // if it's on the switch but not desired (in our db), it should be removed + let stale_routes = routes_on_switch + .difference(routes_wanted) + .map(|(nexthop, prefix)| StaticRoute4 { + nexthop: *nexthop, + prefix: prefix.clone(), + }) + .collect::>(); + + routes_to_del.insert( + *switch_location, + DeleteStaticRoute4Request { + routes: StaticRoute4List { list: stale_routes }, + }, + ); + } else { + // if no desired routes are present, all routes on this switch should be deleted + let stale_routes = routes_on_switch + .iter() + .map(|(nexthop, prefix)| StaticRoute4 { + nexthop: *nexthop, + prefix: prefix.clone(), + }) + .collect::>(); + + let req = DeleteStaticRoute4Request { + routes: StaticRoute4List { list: stale_routes }, + }; + + routes_to_del.insert(*switch_location, req); + continue; + }; + } + routes_to_del +} + +fn static_routes_to_add( + desired_static_routes: &HashMap< + SwitchLocation, + HashSet<(Ipv4Addr, Prefix4)>, + >, + current_static_routes: &HashMap< + SwitchLocation, + HashSet<(Ipv4Addr, Prefix4)>, + >, + log: &slog::Logger, +) -> HashMap { + let mut routes_to_add: HashMap = + HashMap::new(); + + // find routes to add + for (switch_location, routes_wanted) in desired_static_routes { + let routes_on_switch = match current_static_routes.get(&switch_location) + { + Some(routes) => routes, + None => { + warn!( + &log, + "no discovered routes from switch. it is possible that an earlier api call failed."; + "switch_location" => ?switch_location, + ); + continue; + } + }; + let missing_routes = routes_wanted + .difference(routes_on_switch) + .map(|(nexthop, prefix)| StaticRoute4 { + nexthop: *nexthop, + prefix: prefix.clone(), + }) + .collect::>(); + + routes_to_add.insert( + *switch_location, + AddStaticRoute4Request { + routes: StaticRoute4List { list: missing_routes }, + }, + ); + } + routes_to_add +} + +fn static_routes_in_db( + changes: &[( + SwitchLocation, + nexus_db_model::SwitchPort, + PortSettingsChange, + )], +) -> HashMap> { + let mut routes_from_db: HashMap< + SwitchLocation, + HashSet<(Ipv4Addr, Prefix4)>, + > = HashMap::new(); + + for (location, _port, change) in changes { + // we only need to check for ports that have a configuration present. No config == no routes. + let PortSettingsChange::Apply(settings) = change else { + continue; + }; + let mut routes = HashSet::new(); + for route in &settings.routes { + // convert to appropriate types for comparison and insertion + let nexthop = match route.gw.ip() { + IpAddr::V4(v4) => v4, + IpAddr::V6(_) => continue, + }; + let prefix = match route.dst.ip() { + IpAddr::V4(v4) => { + Prefix4 { value: v4, length: route.dst.prefix() } + } + IpAddr::V6(_) => continue, + }; + routes.insert((nexthop, prefix)); + } + + match routes_from_db.entry(*location) { + Entry::Occupied(mut occupied_entry) => { + occupied_entry.get_mut().extend(routes); + } + Entry::Vacant(vacant_entry) => { + vacant_entry.insert(routes); + } + } + } + routes_from_db +} + +// apply changes for each port +// if we encounter an error, we log it and keep going instead of bailing +async fn apply_switch_port_changes( + dpd_clients: &HashMap, + changes: &[( + SwitchLocation, + nexus_db_model::SwitchPort, + PortSettingsChange, + )], + log: &slog::Logger, +) { + for (location, switch_port, change) in changes { + let client = match dpd_clients.get(&location) { + Some(client) => client, + None => { + error!( + &log, + "no DPD client for switch location"; + "switch_location" => ?location + ); + continue; + } + }; + + let port_name = switch_port.port_name.clone(); + + let dpd_port_id = match PortId::from_str(port_name.as_str()) { + Ok(port_id) => port_id, + Err(e) => { + error!( + &log, + "failed to parse switch port id"; + "db_switch_port_name" => ?switch_port.port_name, + "switch_location" => ?location, + "error" => format!("{:#}", e) + ); + continue; + } + }; + + match change { + PortSettingsChange::Apply(settings) => { + let dpd_port_settings = match api_to_dpd_port_settings( + &settings, + ) { + Ok(settings) => settings, + Err(e) => { + error!( + &log, + "failed to convert switch port settings"; + "switch_port_id" => ?port_name, + "switch_location" => ?location, + "switch_port_settings_id" => ?settings.settings.id(), + "error" => format!("{:#}", e) + ); + continue; + } + }; + + // apply settings via dpd client + info!( + &log, + "applying settings to switch port"; + "switch_location" => ?location, + "port_id" => ?dpd_port_id, + "settings" => ?dpd_port_settings, + ); + match client + .port_settings_apply( + &dpd_port_id, + DPD_TAG, + &dpd_port_settings, + ) + .await + { + Ok(_) => {} + Err(e) => { + error!( + &log, + "failed to apply switch port settings"; + "switch_port_id" => ?port_name, + "switch_location" => ?location, + "error" => format!("{:#}", e) + ); + } + } + } + PortSettingsChange::Clear => { + // clear settings via dpd client + info!( + &log, + "clearing switch port settings"; + "switch_location" => ?location, + "port_id" => ?dpd_port_id, + ); + match client.port_settings_clear(&dpd_port_id, DPD_TAG).await { + Ok(_) => {} + Err(e) => { + error!( + &log, + "failed to clear switch port settings"; + "switch_port_id" => ?port_name, + "switch_location" => ?location, + "error" => format!("{:#}", e) + ); + } + } + } + } + } +} + +async fn static_routes_on_switch<'a>( + mgd_clients: &HashMap, + log: &slog::Logger, +) -> HashMap> { + let mut routes_on_switch = HashMap::new(); + + for (location, client) in mgd_clients { + let static_routes: HashSet<(Ipv4Addr, Prefix4)> = + match client.inner.static_list_v4_routes().await { + Ok(routes) => routes + .list + .iter() + .map(|r| (r.nexthop, r.prefix.clone())) + .collect(), + Err(_) => { + error!( + &log, + "unable to retrieve routes from switch"; + "switch_location" => ?location, + ); + continue; + } + }; + routes_on_switch.insert(*location, static_routes); + } + routes_on_switch +} + +async fn delete_static_routes( + mgd_clients: &HashMap, + routes_to_del: HashMap, + log: &slog::Logger, +) { + for (switch_location, request) in routes_to_del { + let client = match mgd_clients.get(&switch_location) { + Some(client) => client, + None => { + error!( + &log, + "mgd client not found for switch location"; + "switch_location" => ?switch_location, + ); + continue; + } + }; + + info!( + &log, + "removing static v4 routes"; + "switch_location" => ?switch_location, + "request" => ?request, + ); + if let Err(e) = client.inner.static_remove_v4_route(&request).await { + error!( + &log, + "failed to delete routes from mgd"; + "switch_location" => ?switch_location, + "request" => ?request, + "error" => format!("{:#}", e) + ); + }; + } +} + +async fn add_static_routes<'a>( + mgd_clients: &HashMap, + routes_to_add: HashMap, + log: &slog::Logger, +) { + for (switch_location, request) in routes_to_add { + let client = match mgd_clients.get(&switch_location) { + Some(client) => client, + None => { + error!( + &log, + "mgd client not found for switch location"; + "switch_location" => ?switch_location, + ); + continue; + } + }; + + info!( + &log, + "adding static v4 routes"; + "switch_location" => ?switch_location, + "request" => ?request, + ); + if let Err(e) = client.inner.static_add_v4_route(&request).await { + error!( + &log, + "failed to add routes to mgd"; + "switch_location" => ?switch_location, + "request" => ?request, + "error" => format!("{:#}", e) + ); + }; + } +} diff --git a/nexus/src/app/bfd.rs b/nexus/src/app/bfd.rs index 2d95ad9a58..22b9fc82ef 100644 --- a/nexus/src/app/bfd.rs +++ b/nexus/src/app/bfd.rs @@ -7,15 +7,18 @@ use mg_admin_client::types::BfdPeerState; use nexus_db_queries::context::OpContext; use nexus_types::external_api::shared::{BfdState, BfdStatus}; use omicron_common::api::{external::Error, internal::shared::SwitchLocation}; -use std::sync::Arc; impl super::Nexus { - fn mg_client_for_switch_location( + async fn mg_client_for_switch_location( &self, switch: SwitchLocation, - ) -> Result, Error> { - let mg_client: Arc = self - .mg_clients + ) -> Result { + let mg_client: mg_admin_client::Client = self + .mg_clients() + .await + .map_err(|e| { + Error::internal_error(&format!("failed to get mg clients: {e}")) + })? .get(&switch) .ok_or_else(|| { Error::not_found_by_name( @@ -64,7 +67,7 @@ impl super::Nexus { // be updated for multirack. let mut result = Vec::new(); for s in &[SwitchLocation::Switch0, SwitchLocation::Switch1] { - let mg_client = self.mg_client_for_switch_location(*s)?; + let mg_client = self.mg_client_for_switch_location(*s).await?; let status = mg_client .inner .get_bfd_peers() diff --git a/nexus/src/app/bgp.rs b/nexus/src/app/bgp.rs index 51d22dbced..9e609712e2 100644 --- a/nexus/src/app/bgp.rs +++ b/nexus/src/app/bgp.rs @@ -8,8 +8,8 @@ use nexus_db_model::{BgpAnnounceSet, BgpAnnouncement, BgpConfig}; use nexus_db_queries::context::OpContext; use omicron_common::api::external::http_pagination::PaginatedBy; use omicron_common::api::external::{ - BgpImportedRouteIpv4, BgpPeerStatus, CreateResult, DeleteResult, Ipv4Net, - ListResultVec, LookupResult, NameOrId, + self, BgpImportedRouteIpv4, BgpPeerStatus, CreateResult, DeleteResult, + Ipv4Net, ListResultVec, LookupResult, NameOrId, }; impl super::Nexus { @@ -88,7 +88,11 @@ impl super::Nexus { ) -> ListResultVec { opctx.authorize(authz::Action::Read, &authz::FLEET).await?; let mut result = Vec::new(); - for (switch, client) in &self.mg_clients { + for (switch, client) in &self.mg_clients().await.map_err(|e| { + external::Error::internal_error(&format!( + "failed to get mg clients: {e}" + )) + })? { let router_info = match client.inner.get_routers().await { Ok(result) => result.into_inner(), Err(e) => { @@ -126,7 +130,11 @@ impl super::Nexus { ) -> ListResultVec { opctx.authorize(authz::Action::Read, &authz::FLEET).await?; let mut result = Vec::new(); - for (switch, client) in &self.mg_clients { + for (switch, client) in &self.mg_clients().await.map_err(|e| { + external::Error::internal_error(&format!( + "failed to get mg clients: {e}" + )) + })? { let imported: Vec = match client .inner .get_imported4(&mg_admin_client::types::GetImported4Request { diff --git a/nexus/src/app/instance_network.rs b/nexus/src/app/instance_network.rs index 741b5b8b6d..c345809f4d 100644 --- a/nexus/src/app/instance_network.rs +++ b/nexus/src/app/instance_network.rs @@ -28,7 +28,6 @@ use sled_agent_client::types::DeleteVirtualNetworkInterfaceHost; use sled_agent_client::types::SetVirtualNetworkInterfaceHost; use std::collections::HashSet; use std::str::FromStr; -use std::sync::Arc; use uuid::Uuid; impl super::Nexus { @@ -464,7 +463,7 @@ impl super::Nexus { probe_id: Uuid, sled_ip_address: std::net::Ipv6Addr, ip_index_filter: Option, - dpd_client: &Arc, + dpd_client: &dpd_client::Client, ) -> Result<(), Error> { let log = &self.log; @@ -738,7 +737,12 @@ impl super::Nexus { "instance_id" => ?instance_id, "switch" => switch.to_string()); - let client_result = self.dpd_clients.get(switch).ok_or_else(|| { + let clients = self.dpd_clients().await.map_err(|e| { + Error::internal_error(&format!( + "failed to get dpd clients: {e}" + )) + })?; + let client_result = clients.get(switch).ok_or_else(|| { Error::internal_error(&format!( "unable to find dendrite client for {switch}" )) @@ -823,7 +827,13 @@ impl super::Nexus { "probe_id" => %probe_id, "switch" => switch.to_string()); - let client_result = self.dpd_clients.get(switch).ok_or_else(|| { + let dpd_clients = self.dpd_clients().await.map_err(|e| { + Error::internal_error(&format!( + "unable to get dpd_clients: {e}" + )) + })?; + + let client_result = dpd_clients.get(switch).ok_or_else(|| { Error::internal_error(&format!( "unable to find dendrite client for {switch}" )) diff --git a/nexus/src/app/mod.rs b/nexus/src/app/mod.rs index d387998f6a..9d94e08a5d 100644 --- a/nexus/src/app/mod.rs +++ b/nexus/src/app/mod.rs @@ -30,6 +30,7 @@ use omicron_common::api::external::Error; use omicron_common::api::internal::shared::SwitchLocation; use slog::Logger; use std::collections::HashMap; +use std::net::SocketAddrV6; use std::net::{IpAddr, Ipv6Addr}; use std::sync::Arc; use uuid::Uuid; @@ -181,12 +182,6 @@ pub struct Nexus { // https://github.com/oxidecomputer/omicron/issues/3732 external_dns_servers: Vec, - /// Mapping of SwitchLocations to their respective Dendrite Clients - dpd_clients: HashMap>, - - /// Map switch location to maghemite admin clients. - mg_clients: HashMap>, - /// Background tasks background_tasks: background::BackgroundTasks, @@ -372,8 +367,6 @@ impl Nexus { &background_ctx, Arc::clone(&db_datastore), &config.pkg.background_tasks, - &dpd_clients, - &mg_clients, config.deployment.id, resolver.clone(), saga_request, @@ -422,8 +415,6 @@ impl Nexus { .deployment .external_dns_servers .clone(), - dpd_clients, - mg_clients, background_tasks, default_region_allocation_strategy: config .pkg @@ -875,6 +866,81 @@ impl Nexus { } } } + + pub(crate) async fn dpd_clients( + &self, + ) -> Result, String> { + let mappings = self.switch_zone_address_mappings().await?; + let clients: HashMap = mappings + .iter() + .map(|(location, addr)| { + let port = DENDRITE_PORT; + + let client_state = dpd_client::ClientState { + tag: String::from("nexus"), + log: self.log.new(o!( + "component" => "DpdClient" + )), + }; + + let dpd_client = dpd_client::Client::new( + &format!("http://[{addr}]:{port}"), + client_state, + ); + (*location, dpd_client) + }) + .collect(); + Ok(clients) + } + + pub(crate) async fn mg_clients( + &self, + ) -> Result, String> { + let mappings = self.switch_zone_address_mappings().await?; + let mut clients: Vec<(SwitchLocation, mg_admin_client::Client)> = + vec![]; + for (location, addr) in &mappings { + let port = MGD_PORT; + let socketaddr = + std::net::SocketAddr::V6(SocketAddrV6::new(*addr, port, 0, 0)); + let client = match mg_admin_client::Client::new( + &self.log.clone(), + socketaddr, + ) { + Ok(client) => client, + Err(e) => { + error!( + self.log, + "error building mgd client"; + "location" => %location, + "addr" => %addr, + "error" => %e, + ); + continue; + } + }; + clients.push((*location, client)); + } + Ok(clients.into_iter().collect::>()) + } + + async fn switch_zone_address_mappings( + &self, + ) -> Result, String> { + let switch_zone_addresses = match self + .resolver() + .await + .lookup_all_ipv6(ServiceName::Dendrite) + .await + { + Ok(addrs) => addrs, + Err(e) => { + error!(self.log, "failed to resolve addresses for Dendrite services"; "error" => %e); + return Err(e.to_string()); + } + }; + Ok(map_switch_zone_addrs(&self.log, switch_zone_addresses).await) + } } /// For unimplemented endpoints, indicates whether the resource identified diff --git a/nexus/src/app/probe.rs b/nexus/src/app/probe.rs index 0fce9d3431..e85c040a28 100644 --- a/nexus/src/app/probe.rs +++ b/nexus/src/app/probe.rs @@ -72,11 +72,18 @@ impl super::Nexus { self.boundary_switches(&self.opctx_alloc).await?; for switch in &boundary_switches { - let dpd_client = self.dpd_clients.get(switch).ok_or_else(|| { + let dpd_clients = self.dpd_clients().await.map_err(|e| { + Error::internal_error(&format!( + "failed to get dpd_clients: {e}" + )) + })?; + + let dpd_client = dpd_clients.get(switch).ok_or_else(|| { Error::internal_error(&format!( "could not find dpd client for {switch}" )) })?; + self.probe_ensure_dpd_config( opctx, probe.id(), diff --git a/nexus/src/app/rack.rs b/nexus/src/app/rack.rs index 35f3337625..4a4a61142e 100644 --- a/nexus/src/app/rack.rs +++ b/nexus/src/app/rack.rs @@ -12,7 +12,7 @@ use gateway_client::types::SpType; use ipnetwork::{IpNetwork, Ipv6Network}; use nexus_db_model::DnsGroup; use nexus_db_model::InitialDnsGroup; -use nexus_db_model::{SwitchLinkFec, SwitchLinkSpeed}; +use nexus_db_model::INFRA_LOT; use nexus_db_queries::authz; use nexus_db_queries::context::OpContext; use nexus_db_queries::db; @@ -54,19 +54,14 @@ use omicron_common::api::external::Name; use omicron_common::api::external::NameOrId; use omicron_common::api::internal::shared::ExternalPortDiscovery; use sled_agent_client::types::AddSledRequest; -use sled_agent_client::types::EarlyNetworkConfigBody; use sled_agent_client::types::StartSledAgentRequest; use sled_agent_client::types::StartSledAgentRequestBody; -use sled_agent_client::types::{ - BgpConfig, BgpPeerConfig as SledBgpPeerConfig, EarlyNetworkConfig, - PortConfigV1, RackNetworkConfigV1, RouteConfig as SledRouteConfig, -}; + use slog_error_chain::InlineErrorChain; use std::collections::BTreeMap; use std::collections::BTreeSet; use std::collections::HashMap; use std::net::IpAddr; -use std::net::Ipv4Addr; use std::num::NonZeroU32; use std::str::FromStr; use uuid::Uuid; @@ -222,47 +217,6 @@ impl super::Nexus { let rack_network_config = &request.rack_network_config; - self.db_datastore - .rack_set_initialized( - opctx, - RackInit { - rack_subnet: rack_network_config.rack_subnet.into(), - rack_id, - blueprint, - services: request.services, - datasets, - service_ip_pool_ranges, - internal_dns, - external_dns, - recovery_silo, - recovery_silo_fq_dns_name, - recovery_user_id: request.recovery_silo.user_name, - recovery_user_password_hash: request - .recovery_silo - .user_password_hash - .into(), - dns_update, - }, - ) - .await?; - - // Plumb the firewall rules for the built-in services - self.plumb_service_firewall_rules(opctx, &[]).await?; - - // We've potentially updated the list of DNS servers and the DNS - // configuration for both internal and external DNS, plus the Silo - // certificates. Activate the relevant background tasks. - for task in &[ - &self.background_tasks.task_internal_dns_config, - &self.background_tasks.task_internal_dns_servers, - &self.background_tasks.task_external_dns_config, - &self.background_tasks.task_external_dns_servers, - &self.background_tasks.task_external_endpoints, - &self.background_tasks.task_inventory_collection, - ] { - self.background_tasks.activate(task); - } - // TODO - https://github.com/oxidecomputer/omicron/pull/3359 // register all switches found during rack initialization // identify requested switch from config and associate @@ -342,12 +296,11 @@ impl super::Nexus { // going forward via self.run_saga()? Note that self.create_runnable_saga and // self.execute_saga are currently not available within this scope. info!(self.log, "Recording Rack Network Configuration"); - let address_lot_name = - Name::from_str("initial-infra").map_err(|e| { - Error::internal_error(&format!( - "unable to use `initial-infra` as `Name`: {e}" - )) - })?; + let address_lot_name = Name::from_str(INFRA_LOT).map_err(|e| { + Error::internal_error(&format!( + "unable to use `initial-infra` as `Name`: {e}" + )) + })?; let identity = IdentityMetadataCreateParams { name: address_lot_name.clone(), description: "initial infrastructure ip address lot".to_string(), @@ -391,7 +344,8 @@ impl super::Nexus { let address_lot_name: Name = format!("as{}-lot", bgp_config.asn).parse().unwrap(); - self.db_datastore + match self + .db_datastore .address_lot_create( &opctx, &AddressLotCreate { @@ -414,14 +368,19 @@ impl super::Nexus { }, ) .await - .map_err(|e| { - Error::internal_error(&format!( - "unable to create address lot for BGP as {}: {}", - bgp_config.asn, e - )) - })?; + { + Ok(_) => Ok(()), + Err(e) => match e { + Error::ObjectAlreadyExists { .. } => Ok(()), + _ => Err(Error::internal_error(&format!( + "unable to create address lot for BGP as {}: {e}", + bgp_config.asn + ))), + }, + }?; - self.db_datastore + match self + .db_datastore .bgp_create_announce_set( &opctx, &BgpAnnounceSetCreate { @@ -447,14 +406,19 @@ impl super::Nexus { }, ) .await - .map_err(|e| { - Error::internal_error(&format!( - "unable to create bgp announce set for as {}: {}", - bgp_config.asn, e - )) - })?; + { + Ok(_) => Ok(()), + Err(e) => match e { + Error::ObjectAlreadyExists { .. } => Ok(()), + _ => Err(Error::internal_error(&format!( + "unable to create bgp announce set for as {}: {e}", + bgp_config.asn + ))), + }, + }?; - self.db_datastore + match self + .db_datastore .bgp_config_set( &opctx, &BgpConfigCreate { @@ -471,12 +435,16 @@ impl super::Nexus { }, ) .await - .map_err(|e| { - Error::internal_error(&format!( - "unable to set bgp config for as {}: {}", - bgp_config.asn, e - )) - })?; + { + Ok(_) => Ok(()), + Err(e) => match e { + Error::ObjectAlreadyExists { .. } => Ok(()), + _ => Err(Error::internal_error(&format!( + "unable to set bgp config for as {}: {e}", + bgp_config.asn + ))), + }, + }?; } for (idx, uplink_config) in rack_network_config.ports.iter().enumerate() @@ -615,6 +583,47 @@ impl super::Nexus { self.initial_bootstore_sync(&opctx).await?; + self.db_datastore + .rack_set_initialized( + opctx, + RackInit { + rack_subnet: rack_network_config.rack_subnet.into(), + rack_id, + blueprint, + services: request.services, + datasets, + service_ip_pool_ranges, + internal_dns, + external_dns, + recovery_silo, + recovery_silo_fq_dns_name, + recovery_user_id: request.recovery_silo.user_name, + recovery_user_password_hash: request + .recovery_silo + .user_password_hash + .into(), + dns_update, + }, + ) + .await?; + + // Plumb the firewall rules for the built-in services + self.plumb_service_firewall_rules(opctx, &[]).await?; + + // We've potentially updated the list of DNS servers and the DNS + // configuration for both internal and external DNS, plus the Silo + // certificates. Activate the relevant background tasks. + for task in &[ + &self.background_tasks.task_internal_dns_config, + &self.background_tasks.task_internal_dns_servers, + &self.background_tasks.task_external_dns_config, + &self.background_tasks.task_external_dns_servers, + &self.background_tasks.task_external_endpoints, + &self.background_tasks.task_inventory_collection, + ] { + self.background_tasks.activate(task); + } + Ok(()) } @@ -673,115 +682,6 @@ impl super::Nexus { Ok(()) } - pub(crate) async fn bootstore_network_config( - &self, - opctx: &OpContext, - ) -> Result { - let rack = self.rack_lookup(opctx, &self.rack_id).await?; - let subnet = rack_subnet(rack.rack_subnet)?; - - let db_ports = self.active_port_settings(opctx).await?; - let mut ports = Vec::new(); - let mut bgp = Vec::new(); - for (port, info) in &db_ports { - let mut peer_info = Vec::new(); - for p in &info.bgp_peers { - let bgp_config = - self.bgp_config_get(&opctx, p.bgp_config_id.into()).await?; - let announcements = self - .bgp_announce_list( - &opctx, - ¶ms::BgpAnnounceSetSelector { - name_or_id: bgp_config.bgp_announce_set_id.into(), - }, - ) - .await?; - let addr = match p.addr { - ipnetwork::IpNetwork::V4(addr) => addr, - ipnetwork::IpNetwork::V6(_) => continue, //TODO v6 - }; - peer_info.push((p, bgp_config.asn.0, addr.ip())); - bgp.push(BgpConfig { - asn: bgp_config.asn.0, - originate: announcements - .iter() - .filter_map(|a| match a.network { - IpNetwork::V4(net) => Some(net.into()), - //TODO v6 - _ => None, - }) - .collect(), - }); - } - - let p = PortConfigV1 { - routes: info - .routes - .iter() - .map(|r| SledRouteConfig { - destination: r.dst, - nexthop: r.gw.ip(), - }) - .collect(), - addresses: info.addresses.iter().map(|a| a.address).collect(), - bgp_peers: peer_info - .iter() - .map(|(p, asn, addr)| SledBgpPeerConfig { - addr: *addr, - asn: *asn, - port: port.port_name.clone(), - hold_time: Some(p.hold_time.0.into()), - connect_retry: Some(p.connect_retry.0.into()), - delay_open: Some(p.delay_open.0.into()), - idle_hold_time: Some(p.idle_hold_time.0.into()), - keepalive: Some(p.keepalive.0.into()), - }) - .collect(), - switch: port.switch_location.parse().unwrap(), - port: port.port_name.clone(), - uplink_port_fec: info - .links - .get(0) //TODO https://github.com/oxidecomputer/omicron/issues/3062 - .map(|l| l.fec) - .unwrap_or(SwitchLinkFec::None) - .into(), - uplink_port_speed: info - .links - .get(0) //TODO https://github.com/oxidecomputer/omicron/issues/3062 - .map(|l| l.speed) - .unwrap_or(SwitchLinkSpeed::Speed100G) - .into(), - autoneg: info - .links - .get(0) //TODO breakout support - .map(|l| l.autoneg) - .unwrap_or(false), - }; - - ports.push(p); - } - - let result = EarlyNetworkConfig { - generation: 0, - schema_version: 1, - body: EarlyNetworkConfigBody { - ntp_servers: Vec::new(), //TODO - rack_network_config: Some(RackNetworkConfigV1 { - rack_subnet: subnet, - //TODO: We need to remove these. They are inconsistent with - // a generic set of addresses on ports that may not be - // contiguous. - infra_ip_first: Ipv4Addr::UNSPECIFIED, - infra_ip_last: Ipv4Addr::UNSPECIFIED, - ports, - bgp, - }), - }, - }; - - Ok(result) - } - /// Return the list of sleds that are inserted into an initialized rack /// but not yet initialized as part of a rack. // diff --git a/nexus/src/app/sagas/loopback_address_create.rs b/nexus/src/app/sagas/loopback_address_create.rs deleted file mode 100644 index c32a5f387d..0000000000 --- a/nexus/src/app/sagas/loopback_address_create.rs +++ /dev/null @@ -1,178 +0,0 @@ -// This Source Code Form is subject to the terms of the Mozilla Public -// License, v. 2.0. If a copy of the MPL was not distributed with this -// file, You can obtain one at https://mozilla.org/MPL/2.0/. - -use super::{NexusActionContext, NEXUS_DPD_TAG}; -use crate::app::sagas::{ - declare_saga_actions, ActionRegistry, NexusSaga, SagaInitError, -}; -use crate::external_api::params; -use anyhow::Error; -use nexus_db_queries::authn; -use nexus_db_queries::authz; -use nexus_db_queries::db::model::LoopbackAddress; -use omicron_common::api::internal::shared::SwitchLocation; -use omicron_common::retry_until_known_result; -use serde::{Deserialize, Serialize}; -use std::sync::Arc; -use steno::ActionError; - -#[derive(Debug, Deserialize, Serialize)] -pub(crate) struct Params { - pub serialized_authn: authn::saga::Serialized, - pub loopback_address: params::LoopbackAddressCreate, -} - -declare_saga_actions! { - loopback_address_create; - CREATE_LOOPBACK_ADDRESS_RECORD -> "created_loopback_address_record" { - + slc_loopback_address_create_record - - slc_loopback_address_delete_record - } - CREATE_LOOPBACK_ADDRESS -> "create_loopback_address" { - + slc_loopback_address_create - } -} - -#[derive(Debug)] -pub(crate) struct SagaLoopbackAddressCreate; -impl NexusSaga for SagaLoopbackAddressCreate { - const NAME: &'static str = "loopback-address-create"; - type Params = Params; - - fn register_actions(registry: &mut ActionRegistry) { - loopback_address_create_register_actions(registry); - } - - fn make_saga_dag( - _params: &Self::Params, - mut builder: steno::DagBuilder, - ) -> Result { - builder.append(create_loopback_address_record_action()); - builder.append(create_loopback_address_action()); - - Ok(builder.build()?) - } -} - -async fn slc_loopback_address_create_record( - sagactx: NexusActionContext, -) -> Result { - let osagactx = sagactx.user_data(); - let params = sagactx.saga_params::()?; - let nexus = osagactx.nexus(); - - let opctx = crate::context::op_context_for_saga_action( - &sagactx, - ¶ms.serialized_authn, - ); - - let address_lot_lookup = nexus - .address_lot_lookup(&opctx, params.loopback_address.address_lot.clone()) - .map_err(ActionError::action_failed)?; - let (.., authz_address_lot) = address_lot_lookup - .lookup_for(authz::Action::CreateChild) - .await - .map_err(|e| ActionError::action_failed(e.to_string()))?; - - // Just a check to make sure a valid rack id was passed in. - nexus - .rack_lookup(&opctx, ¶ms.loopback_address.rack_id) - .await - .map_err(ActionError::action_failed)?; - - // If there is a failure down the road, this record will get cleaned up by - // the unwind action slc_loopback_address_delete_record. In the case that - // the saga is retried, we will just retry with a new id, returning that to - // the caller if the saga retry is successful. Having intermediate ids here - // is ok. - let value = nexus - .db_datastore - .loopback_address_create( - &opctx, - ¶ms.loopback_address, - None, - &authz_address_lot, - ) - .await - .map_err(ActionError::action_failed)?; - - Ok(value) -} - -async fn slc_loopback_address_delete_record( - sagactx: NexusActionContext, -) -> Result<(), Error> { - let osagactx = sagactx.user_data(); - let params = sagactx.saga_params::()?; - let nexus = osagactx.nexus(); - - let opctx = crate::context::op_context_for_saga_action( - &sagactx, - ¶ms.serialized_authn, - ); - - let loopback_address_lookup = nexus.loopback_address_lookup( - &opctx, - params.loopback_address.rack_id, - params.loopback_address.switch_location.clone().into(), - params.loopback_address.address.into(), - )?; - - let (.., authz_loopback_address) = - loopback_address_lookup.lookup_for(authz::Action::Delete).await?; - - nexus - .db_datastore - .loopback_address_delete(&opctx, &authz_loopback_address) - .await?; - - Ok(()) -} - -async fn slc_loopback_address_create( - sagactx: NexusActionContext, -) -> Result<(), ActionError> { - let params = sagactx.saga_params::()?; - let log = sagactx.user_data().log(); - - let dpd_client: Arc = - select_dendrite_client(&sagactx).await?; - - retry_until_known_result(log, || async { - dpd_client - .ensure_loopback_created( - log, - params.loopback_address.address, - NEXUS_DPD_TAG, - ) - .await - }) - .await - .map_err(|e| ActionError::action_failed(e.to_string())) -} - -pub(crate) async fn select_dendrite_client( - sagactx: &NexusActionContext, -) -> Result, ActionError> { - let osagactx = sagactx.user_data(); - let params = sagactx.saga_params::()?; - - let switch_location: SwitchLocation = params - .loopback_address - .switch_location - .as_str() - .parse() - .map_err(ActionError::action_failed)?; - let dpd_client: Arc = osagactx - .nexus() - .dpd_clients - .get(&switch_location) - .ok_or_else(|| { - ActionError::action_failed(format!( - "requested switch not available: {switch_location}" - )) - })? - .clone(); - Ok(dpd_client) -} diff --git a/nexus/src/app/sagas/loopback_address_delete.rs b/nexus/src/app/sagas/loopback_address_delete.rs deleted file mode 100644 index 822a360acf..0000000000 --- a/nexus/src/app/sagas/loopback_address_delete.rs +++ /dev/null @@ -1,190 +0,0 @@ -// This Source Code Form is subject to the terms of the Mozilla Public -// License, v. 2.0. If a copy of the MPL was not distributed with this -// file, You can obtain one at https://mozilla.org/MPL/2.0/. - -use super::NexusActionContext; -use crate::app::sagas::{ - declare_saga_actions, ActionRegistry, NexusSaga, SagaInitError, -}; -use crate::external_api::params; -use anyhow::{anyhow, Error}; -use nexus_db_queries::authn; -use nexus_db_queries::authz; -use nexus_db_queries::db::model::{LoopbackAddress, Name}; -use nexus_types::identity::Asset; -use omicron_common::api::external::{IpNet, NameOrId}; -use omicron_common::retry_until_known_result; -use serde::{Deserialize, Serialize}; -use std::sync::Arc; -use steno::ActionError; -use uuid::Uuid; - -#[derive(Debug, Deserialize, Serialize)] -pub(crate) struct Params { - pub serialized_authn: authn::saga::Serialized, - pub rack_id: Uuid, - pub switch_location: Name, - pub address: IpNet, -} - -declare_saga_actions! { - loopback_address_delete; - DELETE_LOOPBACK_ADDRESS_RECORD -> "deleted_loopback_address_record" { - + slc_loopback_address_delete_record - - slc_loopback_address_undelete_record - } - DELETE_LOOPBACK_ADDRESS -> "delete_loopback_address" { - + slc_loopback_address_delete - } -} - -#[derive(Debug)] -pub(crate) struct SagaLoopbackAddressDelete; -impl NexusSaga for SagaLoopbackAddressDelete { - const NAME: &'static str = "loopback-address-delete"; - type Params = Params; - - fn register_actions(registry: &mut ActionRegistry) { - loopback_address_delete_register_actions(registry); - } - - fn make_saga_dag( - _params: &Self::Params, - mut builder: steno::DagBuilder, - ) -> Result { - builder.append(delete_loopback_address_record_action()); - builder.append(delete_loopback_address_action()); - - Ok(builder.build()?) - } -} - -async fn slc_loopback_address_delete_record( - sagactx: NexusActionContext, -) -> Result { - let osagactx = sagactx.user_data(); - let params = sagactx.saga_params::()?; - let nexus = osagactx.nexus(); - - let opctx = crate::context::op_context_for_saga_action( - &sagactx, - ¶ms.serialized_authn, - ); - - let loopback_address_lookup = nexus - .loopback_address_lookup( - &opctx, - params.rack_id, - params.switch_location, - params.address, - ) - .map_err(ActionError::action_failed)?; - - let (.., authz_loopback_address) = loopback_address_lookup - .lookup_for(authz::Action::Delete) - .await - .map_err(ActionError::action_failed)?; - - let value = nexus - .db_datastore - .loopback_address_get(&opctx, &authz_loopback_address) - .await - .map_err(ActionError::action_failed)?; - - nexus - .db_datastore - .loopback_address_delete(&opctx, &authz_loopback_address) - .await - .map_err(ActionError::action_failed)?; - - Ok(value) -} - -async fn slc_loopback_address_undelete_record( - sagactx: NexusActionContext, -) -> Result<(), Error> { - let osagactx = sagactx.user_data(); - let params = sagactx.saga_params::()?; - let nexus = osagactx.nexus(); - - let opctx = crate::context::op_context_for_saga_action( - &sagactx, - ¶ms.serialized_authn, - ); - - let value = - sagactx.lookup::("deleted_loopback_address_record")?; - - let address_lot_id = nexus - .db_datastore - .address_lot_id_for_block_id(&opctx, value.address_lot_block_id) - .await?; - - let arg = params::LoopbackAddressCreate { - address_lot: NameOrId::Id(address_lot_id), - rack_id: value.rack_id, - switch_location: value - .switch_location - .parse() - .map_err(|e| anyhow!("bad switch location name: {}", e))?, - address: value.address.ip(), - mask: value.address.prefix(), - anycast: value.anycast, - }; - - let address_lot_lookup = nexus - .address_lot_lookup(&opctx, arg.address_lot.clone()) - .map_err(ActionError::action_failed)?; - let (.., authz_address_lot) = address_lot_lookup - .lookup_for(authz::Action::Modify) - .await - .map_err(|e| ActionError::action_failed(e.to_string()))?; - - // Just a check to make sure a valid rack id was passed in. - nexus - .rack_lookup(&opctx, &arg.rack_id) - .await - .map_err(ActionError::action_failed)?; - - nexus - .db_datastore - .loopback_address_create( - &opctx, - &arg, - Some(value.id()), - &authz_address_lot, - ) - .await?; - - Ok(()) -} - -async fn slc_loopback_address_delete( - sagactx: NexusActionContext, -) -> Result<(), ActionError> { - let osagactx = sagactx.user_data(); - let params = sagactx.saga_params::()?; - let log = sagactx.user_data().log(); - let switch = ¶ms - .switch_location - .as_str() - .parse() - .map_err(|e| ActionError::action_failed(format!("{e:#?}")))?; - - let dpd_client: Arc = osagactx - .nexus() - .dpd_clients - .get(&switch) - .ok_or_else(|| { - ActionError::action_failed(format!( - "unable to retrieve dendrite client for {switch}" - )) - })? - .clone(); - - retry_until_known_result(log, || async { - dpd_client.ensure_loopback_deleted(log, params.address.ip()).await - }) - .await - .map_err(|e| ActionError::action_failed(e.to_string())) -} diff --git a/nexus/src/app/sagas/mod.rs b/nexus/src/app/sagas/mod.rs index 01b01c4571..e725c1f093 100644 --- a/nexus/src/app/sagas/mod.rs +++ b/nexus/src/app/sagas/mod.rs @@ -31,14 +31,9 @@ pub mod instance_ip_attach; pub mod instance_ip_detach; pub mod instance_migrate; pub mod instance_start; -pub mod loopback_address_create; -pub mod loopback_address_delete; pub mod project_create; pub mod snapshot_create; pub mod snapshot_delete; -pub mod switch_port_settings_apply; -pub mod switch_port_settings_clear; -pub mod switch_port_settings_common; pub mod test_saga; pub mod volume_delete; pub mod volume_remove_rop; @@ -145,20 +140,6 @@ fn make_action_registry() -> ActionRegistry { ::register_actions( &mut registry, ); - ::register_actions( - &mut registry, - ); - ::register_actions( - &mut registry, - ); - ::register_actions( - &mut registry, - ); - ::register_actions( - &mut registry, - ); ::register_actions( &mut registry, ); @@ -321,8 +302,6 @@ macro_rules! declare_saga_actions { }; } -use omicron_common::OMICRON_DPD_TAG as NEXUS_DPD_TAG; - pub(crate) use __action_name; pub(crate) use __emit_action; pub(crate) use __stringify_ident; diff --git a/nexus/src/app/sagas/switch_port_settings_apply.rs b/nexus/src/app/sagas/switch_port_settings_apply.rs deleted file mode 100644 index 44f2f77ea1..0000000000 --- a/nexus/src/app/sagas/switch_port_settings_apply.rs +++ /dev/null @@ -1,586 +0,0 @@ -// This Source Code Form is subject to the terms of the Mozilla Public -// License, v. 2.0. If a copy of the MPL was not distributed with this -// file, You can obtain one at https://mozilla.org/MPL/2.0/. - -use super::{NexusActionContext, NEXUS_DPD_TAG}; -use crate::app::sagas::switch_port_settings_common::{ - api_to_dpd_port_settings, ensure_switch_port_bgp_settings, - ensure_switch_port_uplink, select_dendrite_client, select_mg_client, - switch_sled_agent, write_bootstore_config, -}; -use crate::app::sagas::{ - declare_saga_actions, ActionRegistry, NexusSaga, SagaInitError, -}; -use anyhow::Error; -use db::datastore::SwitchPortSettingsCombinedResult; -use dpd_client::types::PortId; -use mg_admin_client::types::{ - AddStaticRoute4Request, DeleteStaticRoute4Request, Prefix4, StaticRoute4, - StaticRoute4List, -}; -use nexus_db_model::NETWORK_KEY; -use nexus_db_queries::db::datastore::UpdatePrecondition; -use nexus_db_queries::{authn, db}; -use omicron_common::api::external::{self, NameOrId}; -use omicron_common::api::internal::shared::SwitchLocation; -use omicron_common::retry_until_known_result; -use serde::{Deserialize, Serialize}; -use std::net::IpAddr; -use std::str::FromStr; -use std::sync::Arc; -use steno::ActionError; -use uuid::Uuid; - -// switch port settings apply saga: input parameters - -#[derive(Debug, Deserialize, Serialize)] -pub(crate) struct Params { - pub serialized_authn: authn::saga::Serialized, - pub switch_port_id: Uuid, - pub switch_port_settings_id: Uuid, - pub switch_port_name: String, -} - -// switch port settings apply: actions - -declare_saga_actions! { - switch_port_settings_apply; - ASSOCIATE_SWITCH_PORT -> "original_switch_port_settings_id" { - + spa_associate_switch_port - - spa_disassociate_switch_port - } - GET_SWITCH_PORT_SETTINGS -> "switch_port_settings" { - + spa_get_switch_port_settings - } - ENSURE_SWITCH_PORT_SETTINGS -> "ensure_switch_port_settings" { - + spa_ensure_switch_port_settings - - spa_undo_ensure_switch_port_settings - } - ENSURE_SWITCH_PORT_UPLINK -> "ensure_switch_port_uplink" { - + spa_ensure_switch_port_uplink - - spa_undo_ensure_switch_port_uplink - } - ENSURE_SWITCH_ROUTES -> "ensure_switch_routes" { - + spa_ensure_switch_routes - - spa_undo_ensure_switch_routes - } - ENSURE_SWITCH_PORT_BGP_SETTINGS -> "ensure_switch_port_bgp_settings" { - + spa_ensure_switch_port_bgp_settings - - spa_undo_ensure_switch_port_bgp_settings - } - ENSURE_SWITCH_PORT_BOOTSTORE_NETWORK_SETTINGS -> "ensure_switch_port_bootstore_network_settings" { - + spa_ensure_switch_port_bootstore_network_settings - - spa_undo_ensure_switch_port_bootstore_network_settings - } -} - -// switch port settings apply saga: definition - -#[derive(Debug)] -pub(crate) struct SagaSwitchPortSettingsApply; - -impl NexusSaga for SagaSwitchPortSettingsApply { - const NAME: &'static str = "switch-port-settings-apply"; - type Params = Params; - - fn register_actions(registry: &mut ActionRegistry) { - switch_port_settings_apply_register_actions(registry); - } - - fn make_saga_dag( - _params: &Self::Params, - mut builder: steno::DagBuilder, - ) -> Result { - builder.append(associate_switch_port_action()); - builder.append(get_switch_port_settings_action()); - builder.append(ensure_switch_port_settings_action()); - builder.append(ensure_switch_port_uplink_action()); - builder.append(ensure_switch_routes_action()); - builder.append(ensure_switch_port_bgp_settings_action()); - builder.append(ensure_switch_port_bootstore_network_settings_action()); - Ok(builder.build()?) - } -} - -async fn spa_associate_switch_port( - sagactx: NexusActionContext, -) -> Result, ActionError> { - let osagactx = sagactx.user_data(); - let params = sagactx.saga_params::()?; - let nexus = osagactx.nexus(); - - let opctx = crate::context::op_context_for_saga_action( - &sagactx, - ¶ms.serialized_authn, - ); - - // first get the current association so we fall back to this on failure - let port = - nexus.get_switch_port(&opctx, params.switch_port_id).await.map_err( - |e| ActionError::action_failed(format!("get switch port: {e}")), - )?; - - // update the switch port settings association - nexus - .set_switch_port_settings_id( - &opctx, - params.switch_port_id, - Some(params.switch_port_settings_id), - UpdatePrecondition::DontCare, - ) - .await - .map_err(|e| { - ActionError::action_failed(format!( - "set switch port settings id {e}" - )) - })?; - - Ok(port.port_settings_id) -} - -async fn spa_get_switch_port_settings( - sagactx: NexusActionContext, -) -> Result { - let osagactx = sagactx.user_data(); - let params = sagactx.saga_params::()?; - let nexus = osagactx.nexus(); - let opctx = crate::context::op_context_for_saga_action( - &sagactx, - ¶ms.serialized_authn, - ); - - let port_settings = nexus - .switch_port_settings_get( - &opctx, - &NameOrId::Id(params.switch_port_settings_id), - ) - .await - .map_err(|e| { - ActionError::action_failed(format!("get switch port settings: {e}")) - })?; - - Ok(port_settings) -} - -async fn spa_ensure_switch_port_settings( - sagactx: NexusActionContext, -) -> Result<(), ActionError> { - let params = sagactx.saga_params::()?; - let log = sagactx.user_data().log(); - let opctx = crate::context::op_context_for_saga_action( - &sagactx, - ¶ms.serialized_authn, - ); - - let settings = sagactx - .lookup::("switch_port_settings")?; - - let port_id: PortId = - PortId::from_str(¶ms.switch_port_name).map_err(|e| { - ActionError::action_failed(format!("parse port id: {e}")) - })?; - - let dpd_client: Arc = - select_dendrite_client(&sagactx, &opctx, params.switch_port_id).await?; - - let dpd_port_settings = - api_to_dpd_port_settings(&settings).map_err(|e| { - ActionError::action_failed(format!( - "translate api port settings to dpd port settings: {e}", - )) - })?; - - retry_until_known_result(log, || async { - dpd_client - .port_settings_apply( - &port_id, - Some(NEXUS_DPD_TAG), - &dpd_port_settings, - ) - .await - }) - .await - .map_err(|e| match e { - progenitor_client::Error::ErrorResponse(ref er) => { - if er.status().is_client_error() { - ActionError::action_failed(format!( - "bad request: dpd port settings apply {}", - er.message, - )) - } else { - ActionError::action_failed(format!( - "dpd port settings apply {e}" - )) - } - } - _ => ActionError::action_failed(format!("dpd port settings apply {e}")), - })?; - - Ok(()) -} - -async fn spa_ensure_switch_routes( - sagactx: NexusActionContext, -) -> Result<(), ActionError> { - let params = sagactx.saga_params::()?; - let opctx = crate::context::op_context_for_saga_action( - &sagactx, - ¶ms.serialized_authn, - ); - - let settings = sagactx - .lookup::("switch_port_settings")?; - - let mut rq = AddStaticRoute4Request { - routes: StaticRoute4List { list: Vec::new() }, - }; - for r in settings.routes { - let nexthop = match r.gw.ip() { - IpAddr::V4(v4) => v4, - IpAddr::V6(_) => continue, - }; - let prefix = match r.dst.ip() { - IpAddr::V4(v4) => Prefix4 { value: v4, length: r.dst.prefix() }, - IpAddr::V6(_) => continue, - }; - let sr = StaticRoute4 { nexthop, prefix }; - rq.routes.list.push(sr); - } - - let mg_client: Arc = - select_mg_client(&sagactx, &opctx, params.switch_port_id).await?; - - mg_client.inner.static_add_v4_route(&rq).await.map_err(|e| { - ActionError::action_failed(format!("mgd static route add {e}")) - })?; - - Ok(()) -} - -async fn spa_undo_ensure_switch_routes( - sagactx: NexusActionContext, -) -> Result<(), Error> { - let params = sagactx.saga_params::()?; - let opctx = crate::context::op_context_for_saga_action( - &sagactx, - ¶ms.serialized_authn, - ); - let settings = sagactx - .lookup::("switch_port_settings")?; - - let mut rq = DeleteStaticRoute4Request { - routes: StaticRoute4List { list: Vec::new() }, - }; - - for r in settings.routes { - let nexthop = match r.gw.ip() { - IpAddr::V4(v4) => v4, - IpAddr::V6(_) => continue, - }; - let prefix = match r.gw.ip() { - IpAddr::V4(v4) => Prefix4 { value: v4, length: r.gw.prefix() }, - IpAddr::V6(_) => continue, - }; - let sr = StaticRoute4 { nexthop, prefix }; - rq.routes.list.push(sr); - } - - let mg_client: Arc = - select_mg_client(&sagactx, &opctx, params.switch_port_id).await?; - - mg_client.inner.static_remove_v4_route(&rq).await.map_err(|e| { - ActionError::action_failed(format!("mgd static route remove {e}")) - })?; - - Ok(()) -} - -async fn spa_undo_ensure_switch_port_settings( - sagactx: NexusActionContext, -) -> Result<(), Error> { - let osagactx = sagactx.user_data(); - let params = sagactx.saga_params::()?; - let nexus = osagactx.nexus(); - let opctx = crate::context::op_context_for_saga_action( - &sagactx, - ¶ms.serialized_authn, - ); - let log = sagactx.user_data().log(); - - let port_id: PortId = PortId::from_str(¶ms.switch_port_name) - .map_err(|e| external::Error::internal_error(e.to_string().as_str()))?; - - let orig_port_settings_id = sagactx - .lookup::>("original_switch_port_settings_id") - .map_err(|e| external::Error::internal_error(&e.to_string()))?; - - let dpd_client = - select_dendrite_client(&sagactx, &opctx, params.switch_port_id).await?; - - let id = match orig_port_settings_id { - Some(id) => id, - None => { - retry_until_known_result(log, || async { - dpd_client - .port_settings_clear(&port_id, Some(NEXUS_DPD_TAG)) - .await - }) - .await - .map_err(|e| external::Error::internal_error(&e.to_string()))?; - - return Ok(()); - } - }; - - let settings = nexus - .switch_port_settings_get(&opctx, &NameOrId::Id(id)) - .await - .map_err(|e| { - ActionError::action_failed(format!("switch port settings get: {e}")) - })?; - - let dpd_port_settings = - api_to_dpd_port_settings(&settings).map_err(|e| { - ActionError::action_failed(format!( - "translate api to dpd port settings {e}" - )) - })?; - - retry_until_known_result(log, || async { - dpd_client - .port_settings_apply( - &port_id, - Some(NEXUS_DPD_TAG), - &dpd_port_settings, - ) - .await - }) - .await - .map_err(|e| external::Error::internal_error(&e.to_string()))?; - - Ok(()) -} - -async fn spa_undo_ensure_switch_port_bgp_settings( - sagactx: NexusActionContext, -) -> Result<(), Error> { - use mg_admin_client::types::DeleteNeighborRequest; - - let osagactx = sagactx.user_data(); - let nexus = osagactx.nexus(); - let params = sagactx.saga_params::()?; - let opctx = crate::context::op_context_for_saga_action( - &sagactx, - ¶ms.serialized_authn, - ); - - let settings = sagactx - .lookup::("switch_port_settings") - .map_err(|e| { - ActionError::action_failed(format!( - "lookup switch port settings (bgp undo): {e}" - )) - })?; - - let mg_client: Arc = - select_mg_client(&sagactx, &opctx, params.switch_port_id) - .await - .map_err(|e| { - ActionError::action_failed(format!( - "select mg client (undo): {e}" - )) - })?; - - for peer in settings.bgp_peers { - let config = nexus - .bgp_config_get(&opctx, peer.bgp_config_id.into()) - .await - .map_err(|e| { - ActionError::action_failed(format!("delete bgp config: {e}")) - })?; - - mg_client - .inner - .delete_neighbor(&DeleteNeighborRequest { - asn: *config.asn, - addr: peer.addr.ip(), - }) - .await - .map_err(|e| { - ActionError::action_failed(format!("delete neighbor: {e}")) - })?; - } - - Ok(()) -} - -async fn spa_ensure_switch_port_bootstore_network_settings( - sagactx: NexusActionContext, -) -> Result<(), ActionError> { - let osagactx = sagactx.user_data(); - let nexus = osagactx.nexus(); - let params = sagactx.saga_params::()?; - let opctx = crate::context::op_context_for_saga_action( - &sagactx, - ¶ms.serialized_authn, - ); - - // Just choosing the sled agent associated with switch0 for no reason. - let sa = switch_sled_agent(SwitchLocation::Switch0, &sagactx).await?; - - let mut config = - nexus.bootstore_network_config(&opctx).await.map_err(|e| { - ActionError::action_failed(format!( - "read nexus bootstore network config: {e}" - )) - })?; - - let generation = nexus - .datastore() - .bump_bootstore_generation(&opctx, NETWORK_KEY.into()) - .await - .map_err(|e| { - ActionError::action_failed(format!( - "bump bootstore network generation number: {e}" - )) - })?; - - config.generation = generation as u64; - write_bootstore_config(&sa, &config).await?; - - Ok(()) -} - -async fn spa_undo_ensure_switch_port_bootstore_network_settings( - sagactx: NexusActionContext, -) -> Result<(), Error> { - // The overall saga update failed but the bootstore udpate succeeded. - // Between now and then other updates may have happened which prevent us - // from simply undoing the changes we did before, as we may inadvertently - // roll back changes at the intersection of this failed update and other - // succesful updates. The only thing we can really do here is attempt a - // complete update of the bootstore network settings based on the current - // state in the Nexus databse which, we assume to be consistent at any point - // in time. - - let nexus = sagactx.user_data().nexus(); - let params = sagactx.saga_params::()?; - let opctx = crate::context::op_context_for_saga_action( - &sagactx, - ¶ms.serialized_authn, - ); - - // Just choosing the sled agent associated with switch0 for no reason. - let sa = switch_sled_agent(SwitchLocation::Switch0, &sagactx).await?; - - let config = nexus.bootstore_network_config(&opctx).await?; - write_bootstore_config(&sa, &config).await?; - - Ok(()) -} - -async fn spa_ensure_switch_port_uplink( - sagactx: NexusActionContext, -) -> Result<(), ActionError> { - let params = sagactx.saga_params::()?; - let opctx = crate::context::op_context_for_saga_action( - &sagactx, - ¶ms.serialized_authn, - ); - ensure_switch_port_uplink( - sagactx, - &opctx, - false, - None, - params.switch_port_id, - params.switch_port_name, - ) - .await -} - -async fn spa_undo_ensure_switch_port_uplink( - sagactx: NexusActionContext, -) -> Result<(), Error> { - let params = sagactx.saga_params::()?; - let opctx = crate::context::op_context_for_saga_action( - &sagactx, - ¶ms.serialized_authn, - ); - Ok(ensure_switch_port_uplink( - sagactx, - &opctx, - true, - None, - params.switch_port_id, - params.switch_port_name, - ) - .await?) -} - -// a common route representation for dendrite and port settings -#[derive(Debug, Clone, Eq, Hash, PartialEq, Serialize, Deserialize)] -pub(crate) struct Route { - pub dst: IpAddr, - pub masklen: u8, - pub nexthop: Option, -} - -async fn spa_disassociate_switch_port( - sagactx: NexusActionContext, -) -> Result<(), Error> { - let osagactx = sagactx.user_data(); - let params = sagactx.saga_params::()?; - let nexus = osagactx.nexus(); - - let opctx = crate::context::op_context_for_saga_action( - &sagactx, - ¶ms.serialized_authn, - ); - - // set the port settings id back to what it was before the saga started - let orig_port_settings_id = - sagactx.lookup::>("original_switch_port_settings_id")?; - - nexus - .set_switch_port_settings_id( - &opctx, - params.switch_port_id, - orig_port_settings_id, - UpdatePrecondition::Value(params.switch_port_settings_id), - ) - .await - .map_err(|e| { - ActionError::action_failed(format!( - "set switch port settings id for disassociate: {e}" - )) - })?; - - Ok(()) -} - -async fn spa_ensure_switch_port_bgp_settings( - sagactx: NexusActionContext, -) -> Result<(), ActionError> { - let settings = sagactx - .lookup::("switch_port_settings") - .map_err(|e| { - ActionError::action_failed(format!( - "lookup switch port settings: {e}" - )) - })?; - - let params = sagactx.saga_params::()?; - let opctx = crate::context::op_context_for_saga_action( - &sagactx, - ¶ms.serialized_authn, - ); - - ensure_switch_port_bgp_settings( - sagactx, - &opctx, - settings, - params.switch_port_name.clone(), - params.switch_port_id, - ) - .await -} diff --git a/nexus/src/app/sagas/switch_port_settings_clear.rs b/nexus/src/app/sagas/switch_port_settings_clear.rs deleted file mode 100644 index 2e35530ef1..0000000000 --- a/nexus/src/app/sagas/switch_port_settings_clear.rs +++ /dev/null @@ -1,546 +0,0 @@ -// This Source Code Form is subject to the terms of the Mozilla Public -// License, v. 2.0. If a copy of the MPL was not distributed with this -// file, You can obtain one at https://mozilla.org/MPL/2.0/. - -use super::{NexusActionContext, NEXUS_DPD_TAG}; -use crate::app::sagas::switch_port_settings_common::{ - api_to_dpd_port_settings, apply_bootstore_update, bootstore_update, - ensure_switch_port_bgp_settings, ensure_switch_port_uplink, - read_bootstore_config, select_dendrite_client, select_mg_client, - switch_sled_agent, write_bootstore_config, -}; -use crate::app::sagas::{ - declare_saga_actions, ActionRegistry, NexusSaga, SagaInitError, -}; -use anyhow::Error; -use dpd_client::types::PortId; -use mg_admin_client::types::{ - AddStaticRoute4Request, DeleteNeighborRequest, DeleteStaticRoute4Request, - Prefix4, StaticRoute4, StaticRoute4List, -}; -use nexus_db_model::NETWORK_KEY; -use nexus_db_queries::authn; -use nexus_db_queries::db::datastore::UpdatePrecondition; -use omicron_common::api::external::{self, NameOrId, SwitchLocation}; -use omicron_common::retry_until_known_result; -use serde::{Deserialize, Serialize}; -use std::net::IpAddr; -use std::str::FromStr; -use std::sync::Arc; -use steno::ActionError; -use uuid::Uuid; - -#[derive(Debug, Deserialize, Serialize)] -pub(crate) struct Params { - pub serialized_authn: authn::saga::Serialized, - pub switch_port_id: Uuid, - pub port_name: String, -} - -declare_saga_actions! { - switch_port_settings_clear; - DISASSOCIATE_SWITCH_PORT -> "original_switch_port_settings" { - + spa_disassociate_switch_port - - spa_reassociate_switch_port - } - CLEAR_SWITCH_PORT_SETTINGS -> "switch_port_settings" { - + spa_clear_switch_port_settings - - spa_undo_clear_switch_port_settings - } - CLEAR_SWITCH_PORT_ROUTES -> "clear_switch_port_routes" { - + spa_clear_switch_port_routes - - spa_undo_clear_switch_port_routes - } - CLEAR_SWITCH_PORT_UPLINK -> "clear_switch_port_uplink" { - + spa_clear_switch_port_uplink - - spa_undo_clear_switch_port_uplink - } - CLEAR_SWITCH_PORT_BGP_SETTINGS -> "clear_switch_port_bgp_settings" { - + spa_clear_switch_port_bgp_settings - - spa_undo_clear_switch_port_bgp_settings - } - CLEAR_SWITCH_PORT_BOOTSTORE_NETWORK_SETTINGS -> "clear_switch_port_bootstore_network_settings" { - + spa_clear_switch_port_bootstore_network_settings - - spa_undo_clear_switch_port_bootstore_network_settings - } -} - -#[derive(Debug)] -pub(crate) struct SagaSwitchPortSettingsClear; -impl NexusSaga for SagaSwitchPortSettingsClear { - const NAME: &'static str = "switch-port-settings-clear"; - type Params = Params; - - fn register_actions(registry: &mut ActionRegistry) { - switch_port_settings_clear_register_actions(registry); - } - - fn make_saga_dag( - _params: &Self::Params, - mut builder: steno::DagBuilder, - ) -> Result { - builder.append(disassociate_switch_port_action()); - builder.append(clear_switch_port_settings_action()); - builder.append(clear_switch_port_uplink_action()); - builder.append(clear_switch_port_bgp_settings_action()); - builder.append(clear_switch_port_bootstore_network_settings_action()); - Ok(builder.build()?) - } -} - -async fn spa_disassociate_switch_port( - sagactx: NexusActionContext, -) -> Result, ActionError> { - let osagactx = sagactx.user_data(); - let params = sagactx.saga_params::()?; - let nexus = osagactx.nexus(); - - let opctx = crate::context::op_context_for_saga_action( - &sagactx, - ¶ms.serialized_authn, - ); - - // first get the current association so we fall back to this on failure - let port = nexus - .get_switch_port(&opctx, params.switch_port_id) - .await - .map_err(ActionError::action_failed)?; - - // update the switch port settings association - nexus - .set_switch_port_settings_id( - &opctx, - params.switch_port_id, - None, - UpdatePrecondition::DontCare, - ) - .await - .map_err(ActionError::action_failed)?; - - Ok(port.port_settings_id) -} - -async fn spa_reassociate_switch_port( - sagactx: NexusActionContext, -) -> Result<(), anyhow::Error> { - let osagactx = sagactx.user_data(); - let params = sagactx.saga_params::()?; - let nexus = osagactx.nexus(); - - let opctx = crate::context::op_context_for_saga_action( - &sagactx, - ¶ms.serialized_authn, - ); - - // set the port settings id back to what it was before the saga started - let orig_port_settings_id = - sagactx.lookup::>("original_switch_port_settings")?; - - nexus - .set_switch_port_settings_id( - &opctx, - params.switch_port_id, - orig_port_settings_id, - UpdatePrecondition::Null, - ) - .await - .map_err(ActionError::action_failed)?; - - Ok(()) -} - -async fn spa_clear_switch_port_settings( - sagactx: NexusActionContext, -) -> Result<(), ActionError> { - let params = sagactx.saga_params::()?; - let log = sagactx.user_data().log(); - let opctx = crate::context::op_context_for_saga_action( - &sagactx, - ¶ms.serialized_authn, - ); - - let port_id: PortId = PortId::from_str(¶ms.port_name) - .map_err(|e| ActionError::action_failed(e.to_string()))?; - - let dpd_client = - select_dendrite_client(&sagactx, &opctx, params.switch_port_id).await?; - - retry_until_known_result(log, || async { - dpd_client.port_settings_clear(&port_id, Some(NEXUS_DPD_TAG)).await - }) - .await - .map_err(|e| ActionError::action_failed(e.to_string()))?; - - Ok(()) -} - -async fn spa_undo_clear_switch_port_settings( - sagactx: NexusActionContext, -) -> Result<(), Error> { - let osagactx = sagactx.user_data(); - let params = sagactx.saga_params::()?; - let nexus = osagactx.nexus(); - let opctx = crate::context::op_context_for_saga_action( - &sagactx, - ¶ms.serialized_authn, - ); - let log = sagactx.user_data().log(); - - let port_id: PortId = PortId::from_str(¶ms.port_name) - .map_err(|e| external::Error::internal_error(e.to_string().as_str()))?; - - let orig_port_settings_id = sagactx - .lookup::>("original_switch_port_settings_id") - .map_err(|e| external::Error::internal_error(&e.to_string()))?; - - let id = match orig_port_settings_id { - Some(id) => id, - None => return Ok(()), - }; - - let settings = nexus - .switch_port_settings_get(&opctx, &NameOrId::Id(id)) - .await - .map_err(ActionError::action_failed)?; - - let dpd_client = - select_dendrite_client(&sagactx, &opctx, params.switch_port_id).await?; - - let dpd_port_settings = api_to_dpd_port_settings(&settings) - .map_err(ActionError::action_failed)?; - - retry_until_known_result(log, || async { - dpd_client - .port_settings_apply( - &port_id, - Some(NEXUS_DPD_TAG), - &dpd_port_settings, - ) - .await - }) - .await - .map_err(|e| external::Error::internal_error(&e.to_string()))?; - - Ok(()) -} - -async fn spa_clear_switch_port_uplink( - sagactx: NexusActionContext, -) -> Result<(), ActionError> { - let params = sagactx.saga_params::()?; - let opctx = crate::context::op_context_for_saga_action( - &sagactx, - ¶ms.serialized_authn, - ); - ensure_switch_port_uplink( - sagactx, - &opctx, - true, - None, - params.switch_port_id, - params.port_name.clone(), - ) - .await -} - -async fn spa_undo_clear_switch_port_uplink( - sagactx: NexusActionContext, -) -> Result<(), Error> { - let id = sagactx - .lookup::>("original_switch_port_settings_id") - .map_err(|e| external::Error::internal_error(&e.to_string()))?; - let params = sagactx.saga_params::()?; - let opctx = crate::context::op_context_for_saga_action( - &sagactx, - ¶ms.serialized_authn, - ); - - Ok(ensure_switch_port_uplink( - sagactx, - &opctx, - false, - id, - params.switch_port_id, - params.port_name.clone(), - ) - .await?) -} - -async fn spa_clear_switch_port_bgp_settings( - sagactx: NexusActionContext, -) -> Result<(), ActionError> { - let osagactx = sagactx.user_data(); - let params = sagactx.saga_params::()?; - let nexus = osagactx.nexus(); - let opctx = crate::context::op_context_for_saga_action( - &sagactx, - ¶ms.serialized_authn, - ); - - let orig_port_settings_id = sagactx - .lookup::>("original_switch_port_settings_id") - .map_err(|e| { - ActionError::action_failed(format!( - "original port settings id lookup: {e}" - )) - })?; - - let id = match orig_port_settings_id { - Some(id) => id, - None => return Ok(()), - }; - - let settings = nexus - .switch_port_settings_get(&opctx, &NameOrId::Id(id)) - .await - .map_err(ActionError::action_failed)?; - - let mg_client: Arc = - select_mg_client(&sagactx, &opctx, params.switch_port_id) - .await - .map_err(|e| { - ActionError::action_failed(format!( - "select mg client (undo): {e}" - )) - })?; - - for peer in settings.bgp_peers { - let config = nexus - .bgp_config_get(&opctx, peer.bgp_config_id.into()) - .await - .map_err(|e| { - ActionError::action_failed(format!("delete bgp config: {e}")) - })?; - - mg_client - .inner - .delete_neighbor(&DeleteNeighborRequest { - asn: *config.asn, - addr: peer.addr.ip(), - }) - .await - .map_err(|e| { - ActionError::action_failed(format!("delete neighbor: {e}")) - })?; - } - - Ok(()) -} - -async fn spa_undo_clear_switch_port_bgp_settings( - sagactx: NexusActionContext, -) -> Result<(), Error> { - let osagactx = sagactx.user_data(); - let params = sagactx.saga_params::()?; - let nexus = osagactx.nexus(); - let opctx = crate::context::op_context_for_saga_action( - &sagactx, - ¶ms.serialized_authn, - ); - - let orig_port_settings_id = - sagactx.lookup::>("original_switch_port_settings_id")?; - - let id = match orig_port_settings_id { - Some(id) => id, - None => return Ok(()), - }; - - let settings = - nexus.switch_port_settings_get(&opctx, &NameOrId::Id(id)).await?; - - Ok(ensure_switch_port_bgp_settings( - sagactx, - &opctx, - settings, - params.port_name.clone(), - params.switch_port_id, - ) - .await?) -} - -async fn spa_clear_switch_port_routes( - sagactx: NexusActionContext, -) -> Result<(), ActionError> { - let osagactx = sagactx.user_data(); - let params = sagactx.saga_params::()?; - let nexus = osagactx.nexus(); - let opctx = crate::context::op_context_for_saga_action( - &sagactx, - ¶ms.serialized_authn, - ); - - let orig_port_settings_id = - sagactx.lookup::>("original_switch_port_settings_id")?; - - let id = match orig_port_settings_id { - Some(id) => id, - None => return Ok(()), - }; - - let settings = nexus - .switch_port_settings_get(&opctx, &NameOrId::Id(id)) - .await - .map_err(ActionError::action_failed)?; - - let mut rq = DeleteStaticRoute4Request { - routes: StaticRoute4List { list: Vec::new() }, - }; - - for r in settings.routes { - let nexthop = match r.gw.ip() { - IpAddr::V4(v4) => v4, - IpAddr::V6(_) => continue, - }; - let prefix = match r.gw.ip() { - IpAddr::V4(v4) => Prefix4 { value: v4, length: r.gw.prefix() }, - IpAddr::V6(_) => continue, - }; - let sr = StaticRoute4 { nexthop, prefix }; - rq.routes.list.push(sr); - } - - let mg_client: Arc = - select_mg_client(&sagactx, &opctx, params.switch_port_id).await?; - - mg_client.inner.static_remove_v4_route(&rq).await.map_err(|e| { - ActionError::action_failed(format!("mgd static route remove {e}")) - })?; - - Ok(()) -} - -async fn spa_undo_clear_switch_port_routes( - sagactx: NexusActionContext, -) -> Result<(), Error> { - let osagactx = sagactx.user_data(); - let params = sagactx.saga_params::()?; - let nexus = osagactx.nexus(); - let opctx = crate::context::op_context_for_saga_action( - &sagactx, - ¶ms.serialized_authn, - ); - - let orig_port_settings_id = - sagactx.lookup::>("original_switch_port_settings_id")?; - - let id = match orig_port_settings_id { - Some(id) => id, - None => return Ok(()), - }; - - let settings = nexus - .switch_port_settings_get(&opctx, &NameOrId::Id(id)) - .await - .map_err(ActionError::action_failed)?; - - let mut rq = AddStaticRoute4Request { - routes: StaticRoute4List { list: Vec::new() }, - }; - - for r in settings.routes { - let nexthop = match r.gw.ip() { - IpAddr::V4(v4) => v4, - IpAddr::V6(_) => continue, - }; - let prefix = match r.gw.ip() { - IpAddr::V4(v4) => Prefix4 { value: v4, length: r.gw.prefix() }, - IpAddr::V6(_) => continue, - }; - let sr = StaticRoute4 { nexthop, prefix }; - rq.routes.list.push(sr); - } - - let mg_client: Arc = - select_mg_client(&sagactx, &opctx, params.switch_port_id).await?; - - mg_client.inner.static_add_v4_route(&rq).await.map_err(|e| { - ActionError::action_failed(format!("mgd static route remove {e}")) - })?; - - Ok(()) -} - -async fn spa_clear_switch_port_bootstore_network_settings( - sagactx: NexusActionContext, -) -> Result<(), ActionError> { - let nexus = sagactx.user_data().nexus(); - let params = sagactx.saga_params::()?; - let opctx = crate::context::op_context_for_saga_action( - &sagactx, - ¶ms.serialized_authn, - ); - - // Just choosing the sled agent associated with switch0 for no reason. - let sa = switch_sled_agent(SwitchLocation::Switch0, &sagactx).await?; - - let mut config = - nexus.bootstore_network_config(&opctx).await.map_err(|e| { - ActionError::action_failed(format!( - "read nexus bootstore network config: {e}" - )) - })?; - - let generation = nexus - .datastore() - .bump_bootstore_generation(&opctx, NETWORK_KEY.into()) - .await - .map_err(|e| { - ActionError::action_failed(format!( - "bump bootstore network generation number: {e}" - )) - })?; - - config.generation = generation as u64; - write_bootstore_config(&sa, &config).await?; - - Ok(()) -} - -async fn spa_undo_clear_switch_port_bootstore_network_settings( - sagactx: NexusActionContext, -) -> Result<(), Error> { - let osagactx = sagactx.user_data(); - let params = sagactx.saga_params::()?; - let nexus = osagactx.nexus(); - let opctx = crate::context::op_context_for_saga_action( - &sagactx, - ¶ms.serialized_authn, - ); - - let orig_port_settings_id = sagactx - .lookup::>("original_switch_port_settings_id") - .map_err(|e| { - ActionError::action_failed(format!( - "original port settings id lookup: {e}" - )) - })?; - - let id = match orig_port_settings_id { - Some(id) => id, - None => return Ok(()), - }; - - let settings = nexus - .switch_port_settings_get(&opctx, &NameOrId::Id(id)) - .await - .map_err(ActionError::action_failed)?; - - // Just choosing the sled agent associated with switch0 for no reason. - let sa = switch_sled_agent(SwitchLocation::Switch0, &sagactx).await?; - - // Read the current bootstore config, perform the update and write it back. - let mut config = read_bootstore_config(&sa).await?; - let update = bootstore_update( - &nexus, - &opctx, - params.switch_port_id, - ¶ms.port_name, - &settings, - ) - .await?; - apply_bootstore_update(&mut config, &update)?; - write_bootstore_config(&sa, &config).await?; - - Ok(()) -} diff --git a/nexus/src/app/sagas/switch_port_settings_common.rs b/nexus/src/app/sagas/switch_port_settings_common.rs deleted file mode 100644 index 9c710d837d..0000000000 --- a/nexus/src/app/sagas/switch_port_settings_common.rs +++ /dev/null @@ -1,603 +0,0 @@ -// This Source Code Form is subject to the terms of the Mozilla Public -// License, v. 2.0. If a copy of the MPL was not distributed with this -// file, You can obtain one at https://mozilla.org/MPL/2.0/. - -use super::NexusActionContext; -use crate::app::map_switch_zone_addrs; -use crate::Nexus; -use db::datastore::SwitchPortSettingsCombinedResult; -use dpd_client::types::{ - LinkCreate, LinkId, LinkSettings, PortFec, PortSettings, PortSpeed, -}; -use internal_dns::ServiceName; -use ipnetwork::IpNetwork; -use mg_admin_client::types::Prefix4; -use mg_admin_client::types::{ApplyRequest, BgpPeerConfig}; -use nexus_db_model::{SwitchLinkFec, SwitchLinkSpeed}; -use nexus_db_queries::context::OpContext; -use nexus_db_queries::db; -use nexus_types::external_api::params; -use omicron_common::address::SLED_AGENT_PORT; -use omicron_common::api::external::NameOrId; -use omicron_common::api::internal::shared::{ - ParseSwitchLocationError, SwitchLocation, -}; -use sled_agent_client::types::PortConfigV1; -use sled_agent_client::types::RouteConfig; -use sled_agent_client::types::{BgpConfig, EarlyNetworkConfig}; -use sled_agent_client::types::{ - BgpPeerConfig as OmicronBgpPeerConfig, HostPortConfig, -}; -use std::collections::HashMap; -use std::net::SocketAddrV6; -use std::net::{IpAddr, Ipv6Addr}; -use std::sync::Arc; -use steno::ActionError; -use uuid::Uuid; - -// This is more of an implementation detail of the BGP implementation. It -// defines the maximum time the peering engine will wait for external messages -// before breaking to check for shutdown conditions. -const BGP_SESSION_RESOLUTION: u64 = 100; - -pub(crate) fn api_to_dpd_port_settings( - settings: &SwitchPortSettingsCombinedResult, -) -> Result { - let mut dpd_port_settings = PortSettings { - links: HashMap::new(), - v4_routes: HashMap::new(), - v6_routes: HashMap::new(), - }; - - //TODO breakouts - let link_id = LinkId(0); - - for l in settings.links.iter() { - dpd_port_settings.links.insert( - link_id.to_string(), - LinkSettings { - params: LinkCreate { - autoneg: l.autoneg, - lane: Some(LinkId(0)), - kr: false, - fec: match l.fec { - SwitchLinkFec::Firecode => PortFec::Firecode, - SwitchLinkFec::Rs => PortFec::Rs, - SwitchLinkFec::None => PortFec::None, - }, - speed: match l.speed { - SwitchLinkSpeed::Speed0G => PortSpeed::Speed0G, - SwitchLinkSpeed::Speed1G => PortSpeed::Speed1G, - SwitchLinkSpeed::Speed10G => PortSpeed::Speed10G, - SwitchLinkSpeed::Speed25G => PortSpeed::Speed25G, - SwitchLinkSpeed::Speed40G => PortSpeed::Speed40G, - SwitchLinkSpeed::Speed50G => PortSpeed::Speed50G, - SwitchLinkSpeed::Speed100G => PortSpeed::Speed100G, - SwitchLinkSpeed::Speed200G => PortSpeed::Speed200G, - SwitchLinkSpeed::Speed400G => PortSpeed::Speed400G, - }, - }, - //TODO won't work for breakouts - addrs: settings - .addresses - .iter() - .map(|a| a.address.ip()) - .collect(), - }, - ); - } - - Ok(dpd_port_settings) -} - -pub(crate) fn apply_bootstore_update( - config: &mut EarlyNetworkConfig, - update: &EarlyNetworkPortUpdate, -) -> Result { - let mut change = BootstoreNetworkPortChange::default(); - - let rack_net_config = match &mut config.body.rack_network_config { - Some(cfg) => cfg, - None => { - return Err(ActionError::action_failed( - "rack network config not yet initialized".to_string(), - )) - } - }; - - for port in &mut rack_net_config.ports { - if port.port == update.port.port { - change.previous_port_config = Some(port.clone()); - *port = update.port.clone(); - break; - } - } - if change.previous_port_config.is_none() { - rack_net_config.ports.push(update.port.clone()); - } - - for updated_bgp in &update.bgp_configs { - let mut exists = false; - for resident_bgp in &mut rack_net_config.bgp { - if resident_bgp.asn == updated_bgp.asn { - change.changed_bgp_configs.push(resident_bgp.clone()); - *resident_bgp = updated_bgp.clone(); - exists = true; - break; - } - } - if !exists { - change.added_bgp_configs.push(updated_bgp.clone()); - } - } - rack_net_config.bgp.extend_from_slice(&change.added_bgp_configs); - - Ok(change) -} - -pub(crate) async fn bootstore_update( - nexus: &Arc, - opctx: &OpContext, - switch_port_id: Uuid, - switch_port_name: &str, - settings: &SwitchPortSettingsCombinedResult, -) -> Result { - let switch_port = - nexus.get_switch_port(&opctx, switch_port_id).await.map_err(|e| { - ActionError::action_failed(format!( - "get switch port for uplink: {e}" - )) - })?; - - let switch_location: SwitchLocation = - switch_port.switch_location.parse().map_err( - |e: ParseSwitchLocationError| { - ActionError::action_failed(format!( - "get switch location for uplink: {e:?}", - )) - }, - )?; - - let mut peer_info = Vec::new(); - let mut bgp_configs = Vec::new(); - for p in &settings.bgp_peers { - let bgp_config = nexus - .bgp_config_get(&opctx, p.bgp_config_id.into()) - .await - .map_err(|e| { - ActionError::action_failed(format!("get bgp config: {e}")) - })?; - - let announcements = nexus - .bgp_announce_list( - &opctx, - ¶ms::BgpAnnounceSetSelector { - name_or_id: NameOrId::Id(bgp_config.bgp_announce_set_id), - }, - ) - .await - .map_err(|e| { - ActionError::action_failed(format!( - "get bgp announcements: {e}" - )) - })?; - - peer_info.push((p, bgp_config.asn.0)); - bgp_configs.push(BgpConfig { - asn: bgp_config.asn.0, - originate: announcements - .iter() - .filter_map(|a| match a.network { - IpNetwork::V4(net) => Some(net.into()), - //TODO v6 - _ => None, - }) - .collect(), - }); - } - - let update = EarlyNetworkPortUpdate { - port: PortConfigV1 { - routes: settings - .routes - .iter() - .map(|r| RouteConfig { destination: r.dst, nexthop: r.gw.ip() }) - .collect(), - addresses: settings.addresses.iter().map(|a| a.address).collect(), - switch: switch_location, - port: switch_port_name.into(), - uplink_port_fec: settings - .links - .get(0) - .map(|l| l.fec) - .unwrap_or(SwitchLinkFec::None) - .into(), - uplink_port_speed: settings - .links - .get(0) - .map(|l| l.speed) - .unwrap_or(SwitchLinkSpeed::Speed100G) - .into(), - autoneg: settings.links.get(0).map(|l| l.autoneg).unwrap_or(false), - bgp_peers: peer_info - .iter() - .filter_map(|(p, asn)| { - //TODO v6 - match p.addr.ip() { - IpAddr::V4(addr) => Some(OmicronBgpPeerConfig { - asn: *asn, - port: switch_port_name.into(), - addr, - hold_time: Some(p.hold_time.0.into()), - connect_retry: Some(p.connect_retry.0.into()), - delay_open: Some(p.delay_open.0.into()), - idle_hold_time: Some(p.idle_hold_time.0.into()), - keepalive: Some(p.keepalive.0.into()), - }), - IpAddr::V6(_) => { - warn!(opctx.log, "IPv6 peers not yet supported"); - None - } - } - }) - .collect(), - }, - bgp_configs, - }; - - Ok(update) -} - -pub(crate) async fn ensure_switch_port_uplink( - sagactx: NexusActionContext, - opctx: &OpContext, - skip_self: bool, - inject: Option, - switch_port_id: Uuid, - switch_port_name: String, -) -> Result<(), ActionError> { - let osagactx = sagactx.user_data(); - let nexus = osagactx.nexus(); - - let switch_port = - nexus.get_switch_port(&opctx, switch_port_id).await.map_err(|e| { - ActionError::action_failed(format!( - "get switch port for uplink: {e}" - )) - })?; - - let switch_location: SwitchLocation = - switch_port.switch_location.parse().map_err(|e| { - ActionError::action_failed(format!( - "get switch location for uplink: {e:?}", - )) - })?; - - let mut uplinks: Vec = Vec::new(); - - // The sled agent uplinks interface is an all or nothing interface, so we - // need to get all the uplink configs for all the ports. - let active_ports = - nexus.active_port_settings(&opctx).await.map_err(|e| { - ActionError::action_failed(format!( - "get active switch port settings: {e}" - )) - })?; - - for (port, info) in &active_ports { - // Since we are undoing establishing uplinks for the settings - // associated with this port we skip adding this ports uplinks - // to the list - effectively removing them. - if skip_self && port.id == switch_port.id { - continue; - } - uplinks.push(HostPortConfig { - port: port.port_name.clone(), - addrs: info.addresses.iter().map(|a| a.address).collect(), - }) - } - - if let Some(id) = inject { - let settings = nexus - .switch_port_settings_get(&opctx, &id.into()) - .await - .map_err(|e| { - ActionError::action_failed(format!( - "get switch port settings for injection: {e}" - )) - })?; - uplinks.push(HostPortConfig { - port: switch_port_name.clone(), - addrs: settings.addresses.iter().map(|a| a.address).collect(), - }) - } - - let sc = switch_sled_agent(switch_location, &sagactx).await?; - sc.uplink_ensure(&sled_agent_client::types::SwitchPorts { uplinks }) - .await - .map_err(|e| { - ActionError::action_failed(format!("ensure uplink: {e}")) - })?; - - Ok(()) -} - -pub(crate) async fn read_bootstore_config( - sa: &sled_agent_client::Client, -) -> Result { - Ok(sa - .read_network_bootstore_config_cache() - .await - .map_err(|e| { - ActionError::action_failed(format!( - "read bootstore network config: {e}" - )) - })? - .into_inner()) -} - -pub(crate) async fn write_bootstore_config( - sa: &sled_agent_client::Client, - config: &EarlyNetworkConfig, -) -> Result<(), ActionError> { - sa.write_network_bootstore_config(config).await.map_err(|e| { - ActionError::action_failed(format!( - "write bootstore network config: {e}" - )) - })?; - Ok(()) -} - -pub(crate) async fn select_mg_client( - sagactx: &NexusActionContext, - opctx: &OpContext, - switch_port_id: Uuid, -) -> Result, ActionError> { - let osagactx = sagactx.user_data(); - let nexus = osagactx.nexus(); - - let switch_port = - nexus.get_switch_port(&opctx, switch_port_id).await.map_err(|e| { - ActionError::action_failed(format!( - "get switch port for mg client selection: {e}" - )) - })?; - - let switch_location: SwitchLocation = - switch_port.switch_location.parse().map_err( - |e: ParseSwitchLocationError| { - ActionError::action_failed(format!( - "get switch location for uplink: {e:?}", - )) - }, - )?; - - let mg_client: Arc = osagactx - .nexus() - .mg_clients - .get(&switch_location) - .ok_or_else(|| { - ActionError::action_failed(format!( - "requested switch not available: {switch_location}" - )) - })? - .clone(); - Ok(mg_client) -} - -pub(crate) async fn switch_sled_agent( - location: SwitchLocation, - sagactx: &NexusActionContext, -) -> Result { - let nexus = sagactx.user_data().nexus(); - let sled_agent_addr = get_scrimlet_address(location, nexus).await?; - Ok(sled_agent_client::Client::new( - &format!("http://{}", sled_agent_addr), - sagactx.user_data().log().clone(), - )) -} - -pub(crate) async fn ensure_switch_port_bgp_settings( - sagactx: NexusActionContext, - opctx: &OpContext, - settings: SwitchPortSettingsCombinedResult, - switch_port_name: String, - switch_port_id: Uuid, -) -> Result<(), ActionError> { - let osagactx = sagactx.user_data(); - let nexus = osagactx.nexus(); - let mg_client: Arc = - select_mg_client(&sagactx, opctx, switch_port_id).await.map_err( - |e| ActionError::action_failed(format!("select mg client: {e}")), - )?; - - let mut bgp_peer_configs = HashMap::>::new(); - - let mut cfg: Option = None; - - for peer in settings.bgp_peers { - let config = nexus - .bgp_config_get(&opctx, peer.bgp_config_id.into()) - .await - .map_err(|e| { - ActionError::action_failed(format!("get bgp config: {e}")) - })?; - - if let Some(cfg) = &cfg { - if config.asn != cfg.asn { - return Err(ActionError::action_failed( - "bad request: only one AS allowed per switch".to_string(), - )); - } - } else { - cfg = Some(config); - } - - let bpc = BgpPeerConfig { - name: format!("{}", peer.addr.ip()), //TODO user defined name? - host: format!("{}:179", peer.addr.ip()), - hold_time: peer.hold_time.0.into(), - idle_hold_time: peer.idle_hold_time.0.into(), - delay_open: peer.delay_open.0.into(), - connect_retry: peer.connect_retry.0.into(), - keepalive: peer.keepalive.0.into(), - resolution: BGP_SESSION_RESOLUTION, - passive: false, - }; - - match bgp_peer_configs.get_mut(&switch_port_name) { - Some(peers) => { - peers.push(bpc); - } - None => { - bgp_peer_configs.insert(switch_port_name.clone(), vec![bpc]); - } - } - } - - if let Some(cfg) = &cfg { - let announcements = nexus - .bgp_announce_list( - &opctx, - ¶ms::BgpAnnounceSetSelector { - name_or_id: NameOrId::Id(cfg.bgp_announce_set_id), - }, - ) - .await - .map_err(|e| { - ActionError::action_failed(format!( - "get bgp announcements: {e}" - )) - })?; - - let mut prefixes = Vec::new(); - for a in &announcements { - let value = match a.network.ip() { - IpAddr::V4(value) => Ok(value), - IpAddr::V6(_) => Err(ActionError::action_failed( - "bad request: IPv6 announcement not yet supported" - .to_string(), - )), - }?; - prefixes.push(Prefix4 { value, length: a.network.prefix() }); - } - mg_client - .inner - .bgp_apply(&ApplyRequest { - asn: cfg.asn.0, - peers: bgp_peer_configs, - originate: prefixes, - }) - .await - .map_err(|e| { - ActionError::action_failed(format!("apply bgp settings: {e}")) - })?; - } - - Ok(()) -} - -pub(crate) async fn get_scrimlet_address( - location: SwitchLocation, - nexus: &Arc, -) -> Result { - /* TODO this depends on DNS entries only coming from RSS, it's broken - on the upgrade path - nexus - .resolver() - .await - .lookup_socket_v6(ServiceName::Scrimlet(location)) - .await - .map_err(|e| e.to_string()) - .map_err(|e| { - ActionError::action_failed(format!( - "scrimlet dns lookup failed {e}", - )) - }) - */ - let result = nexus - .resolver() - .await - .lookup_all_ipv6(ServiceName::Dendrite) - .await - .map_err(|e| { - ActionError::action_failed(format!( - "scrimlet dns lookup failed {e}", - )) - }); - - let mappings = match result { - Ok(addrs) => map_switch_zone_addrs(&nexus.log, addrs).await, - Err(e) => { - warn!(nexus.log, "Failed to lookup Dendrite address: {e}"); - return Err(ActionError::action_failed(format!( - "switch mapping failed {e}", - ))); - } - }; - - let addr = match mappings.get(&location) { - Some(addr) => addr, - None => { - return Err(ActionError::action_failed(format!( - "address for switch at location: {location} not found", - ))); - } - }; - - let mut segments = addr.segments(); - segments[7] = 1; - let addr = Ipv6Addr::from(segments); - - Ok(SocketAddrV6::new(addr, SLED_AGENT_PORT, 0, 0)) -} - -#[derive(Clone, Debug, Default)] -pub(crate) struct BootstoreNetworkPortChange { - previous_port_config: Option, - changed_bgp_configs: Vec, - added_bgp_configs: Vec, -} - -#[derive(Clone, Debug)] -pub struct EarlyNetworkPortUpdate { - port: PortConfigV1, - bgp_configs: Vec, -} - -pub(crate) async fn select_dendrite_client( - sagactx: &NexusActionContext, - opctx: &OpContext, - switch_port_id: Uuid, -) -> Result, ActionError> { - let osagactx = sagactx.user_data(); - let nexus = osagactx.nexus(); - - let switch_port = - nexus.get_switch_port(&opctx, switch_port_id).await.map_err(|e| { - ActionError::action_failed(format!( - "get switch port for dendrite client selection {e}" - )) - })?; - - let switch_location: SwitchLocation = - switch_port.switch_location.parse().map_err( - |e: ParseSwitchLocationError| { - ActionError::action_failed(format!( - "get switch location for uplink: {e:?}", - )) - }, - )?; - - let dpd_client: Arc = osagactx - .nexus() - .dpd_clients - .get(&switch_location) - .ok_or_else(|| { - ActionError::action_failed(format!( - "requested switch not available: {switch_location}" - )) - })? - .clone(); - Ok(dpd_client) -} diff --git a/nexus/src/app/switch_interface.rs b/nexus/src/app/switch_interface.rs index 0acb2b7fe7..c3ce0f553c 100644 --- a/nexus/src/app/switch_interface.rs +++ b/nexus/src/app/switch_interface.rs @@ -2,10 +2,8 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -use crate::app::sagas; use crate::external_api::params; use db::model::{LoopbackAddress, Name}; -use nexus_db_queries::authn; use nexus_db_queries::authz; use nexus_db_queries::context::OpContext; use nexus_db_queries::db; @@ -13,8 +11,7 @@ use nexus_db_queries::db::lookup; use nexus_db_queries::db::lookup::LookupPath; use omicron_common::api::external::LookupResult; use omicron_common::api::external::{ - CreateResult, DataPageParams, DeleteResult, Error, InternalContext, IpNet, - ListResultVec, + CreateResult, DataPageParams, DeleteResult, Error, IpNet, ListResultVec, }; use std::sync::Arc; use uuid::Uuid; @@ -43,21 +40,24 @@ impl super::Nexus { validate_switch_location(params.switch_location.as_str())?; - let saga_params = sagas::loopback_address_create::Params { - serialized_authn: authn::saga::Serialized::for_opctx(opctx), - loopback_address: params.clone(), - }; + // Just a check to make sure a valid rack id was passed in. + self.rack_lookup(&opctx, ¶ms.rack_id).await?; - let saga_output = self.execute_saga::< - sagas::loopback_address_create::SagaLoopbackAddressCreate>( - saga_params).await?; + let address_lot_lookup = + self.address_lot_lookup(&opctx, params.address_lot.clone())?; - let value = saga_output - .lookup_node_output::( - "created_loopback_address_record", - ) - .map_err(|e| Error::internal_error(&format!("{:#}", &e))) - .internal_context("looking up output from loopback create saga")?; + let (.., authz_address_lot) = + address_lot_lookup.lookup_for(authz::Action::CreateChild).await?; + + let value = self + .db_datastore + .loopback_address_create(&opctx, ¶ms, None, &authz_address_lot) + .await?; + + // eagerly propagate changes via rpw + self.background_tasks + .driver + .activate(&self.background_tasks.task_switch_port_settings_manager); Ok(value) } @@ -69,16 +69,24 @@ impl super::Nexus { switch_location: Name, address: IpNet, ) -> DeleteResult { - let saga_params = sagas::loopback_address_delete::Params { - serialized_authn: authn::saga::Serialized::for_opctx(opctx), - address, + let loopback_address_lookup = self.loopback_address_lookup( + &opctx, rack_id, switch_location, - }; + address, + )?; + + let (.., authz_loopback_address) = + loopback_address_lookup.lookup_for(authz::Action::Delete).await?; + + self.db_datastore + .loopback_address_delete(&opctx, &authz_loopback_address) + .await?; - self.execute_saga::< - sagas::loopback_address_delete::SagaLoopbackAddressDelete>( - saga_params).await?; + // eagerly propagate changes via rpw + self.background_tasks + .driver + .activate(&self.background_tasks.task_switch_port_settings_manager); Ok(()) } diff --git a/nexus/src/app/switch_port.rs b/nexus/src/app/switch_port.rs index fc9ad2866a..c7d5272ae1 100644 --- a/nexus/src/app/switch_port.rs +++ b/nexus/src/app/switch_port.rs @@ -92,38 +92,26 @@ impl super::Nexus { ) .await?; - // run the port settings apply saga for each port referencing the - // updated settings - let ports = self .db_datastore .switch_ports_using_settings(opctx, switch_port_settings_id) .await?; - for (switch_port_id, switch_port_name) in ports.into_iter() { - let saga_params = sagas::switch_port_settings_apply::Params { - serialized_authn: authn::saga::Serialized::for_opctx(opctx), + for (switch_port_id, _switch_port_name) in ports.into_iter() { + self.set_switch_port_settings_id( + &opctx, switch_port_id, - switch_port_settings_id: result.settings.id(), - switch_port_name: switch_port_name.to_string(), - }; - - self.execute_saga::< - sagas::switch_port_settings_apply::SagaSwitchPortSettingsApply - >( - saga_params, - ) - .await - .map_err(|e| { - let msg = e.to_string(); - if msg.contains("bad request") { - external::Error::invalid_request(&msg.to_string()) - } else { - e - } - })?; + Some(switch_port_settings_id), + UpdatePrecondition::DontCare, + ) + .await?; } + // eagerly propagate changes via rpw + self.background_tasks + .driver + .activate(&self.background_tasks.task_switch_port_settings_manager); + Ok(result) } @@ -180,15 +168,6 @@ impl super::Nexus { self.db_datastore.switch_port_list(opctx, pagparams).await } - pub(crate) async fn get_switch_port( - &self, - opctx: &OpContext, - params: uuid::Uuid, - ) -> LookupResult { - opctx.authorize(authz::Action::Read, &authz::FLEET).await?; - self.db_datastore.switch_port_get(opctx, params).await - } - pub(crate) async fn list_switch_ports_with_uplinks( &self, opctx: &OpContext, @@ -242,27 +221,18 @@ impl super::Nexus { } }; - let saga_params = sagas::switch_port_settings_apply::Params { - serialized_authn: authn::saga::Serialized::for_opctx(opctx), + self.set_switch_port_settings_id( + &opctx, switch_port_id, - switch_port_settings_id, - switch_port_name: port.to_string(), - }; - - self.execute_saga::< - sagas::switch_port_settings_apply::SagaSwitchPortSettingsApply - >( - saga_params, + Some(switch_port_settings_id), + UpdatePrecondition::DontCare, ) - .await - .map_err(|e| { - let msg = e.to_string(); - if msg.contains("bad request") { - external::Error::invalid_request(&msg.to_string()) - } else { - e - } - })?; + .await?; + + // eagerly propagate changes via rpw + self.background_tasks + .driver + .activate(&self.background_tasks.task_switch_port_settings_manager); Ok(()) } @@ -284,17 +254,20 @@ impl super::Nexus { ) .await?; - let saga_params = sagas::switch_port_settings_clear::Params { - serialized_authn: authn::saga::Serialized::for_opctx(opctx), + // update the switch port settings association + self.set_switch_port_settings_id( + &opctx, switch_port_id, - port_name: port.to_string(), - }; - - self.execute_saga::( - saga_params, + None, + UpdatePrecondition::DontCare, ) .await?; + // eagerly propagate changes via rpw + self.background_tasks + .driver + .activate(&self.background_tasks.task_switch_port_settings_manager); + Ok(()) } @@ -323,25 +296,4 @@ impl super::Nexus { Ok(()) } - - // TODO it would likely be better to do this as a one shot db query. - pub(crate) async fn active_port_settings( - &self, - opctx: &OpContext, - ) -> LookupResult> { - let mut ports = Vec::new(); - let port_list = - self.switch_port_list(opctx, &DataPageParams::max_page()).await?; - - for p in port_list { - if let Some(id) = p.port_settings_id { - ports.push(( - p.clone(), - self.switch_port_settings_get(opctx, &id.into()).await?, - )); - } - } - - LookupResult::Ok(ports) - } } diff --git a/nexus/tests/config.test.toml b/nexus/tests/config.test.toml index 8d37f9e3ef..de3aa4c1f4 100644 --- a/nexus/tests/config.test.toml +++ b/nexus/tests/config.test.toml @@ -10,7 +10,7 @@ session_absolute_timeout_minutes = 480 # List of authentication schemes to support. [authn] -schemes_external = [ "spoof", "session_cookie" ] +schemes_external = ["spoof", "session_cookie"] # # NOTE: for the test suite, if mode = "file", the file path MUST be the sentinel @@ -43,7 +43,7 @@ techport_external_server_port = 0 # Nexus may need to resolve external hosts (e.g. to grab IdP metadata). # These are the DNS servers it should use. -external_dns_servers = [ "1.1.1.1", "9.9.9.9" ] +external_dns_servers = ["1.1.1.1", "9.9.9.9"] [deployment.dropshot_external] # NOTE: for the test suite, the port MUST be 0 (in order to bind to any @@ -103,6 +103,7 @@ phantom_disks.period_secs = 30 blueprints.period_secs_load = 100 blueprints.period_secs_execute = 600 sync_service_zone_nat.period_secs = 30 +switch_port_settings_manager.period_secs = 30 region_replacement.period_secs = 30 [default_region_allocation_strategy] diff --git a/nexus/tests/integration_tests/address_lots.rs b/nexus/tests/integration_tests/address_lots.rs index 40c8865929..7860dd463c 100644 --- a/nexus/tests/integration_tests/address_lots.rs +++ b/nexus/tests/integration_tests/address_lots.rs @@ -4,7 +4,6 @@ //! Integration tests for operating on Address Lots -use dropshot::HttpErrorResponseBody; use http::method::Method; use http::StatusCode; use nexus_test_utils::http_testing::AuthnMode; @@ -79,27 +78,6 @@ async fn test_address_lot_basic_crud(ctx: &ControlPlaneTestContext) { "203.0.113.20".parse::().unwrap() ); - // Verify conflict error on recreate - let error: HttpErrorResponseBody = NexusRequest::new( - RequestBuilder::new( - client, - Method::POST, - "/v1/system/networking/address-lot", - ) - .body(Some(¶ms)) - .expect_status(Some(StatusCode::BAD_REQUEST)), - ) - .authn_as(AuthnMode::PrivilegedUser) - .execute() - .await - .unwrap() - .parsed_body() - .unwrap(); - assert_eq!( - error.message, - "already exists: address-lot \"parkinglot\"".to_string() - ); - // Verify there are lots let lots = NexusRequest::iter_collection_authn::( client, diff --git a/nexus/tests/integration_tests/loopback_address.rs b/nexus/tests/integration_tests/loopback_address.rs deleted file mode 100644 index b92e82b216..0000000000 --- a/nexus/tests/integration_tests/loopback_address.rs +++ /dev/null @@ -1,389 +0,0 @@ -// This Source Code Form is subject to the terms of the Mozilla Public -// License, v. 2.0. If a copy of the MPL was not distributed with this -// file, You can obtain one at https://mozilla.org/MPL/2.0/. - -//! Integration tests for operating on Loopback Addresses - -use dropshot::HttpErrorResponseBody; -use http::method::Method; -use http::StatusCode; -use nexus_test_utils::http_testing::{AuthnMode, NexusRequest, RequestBuilder}; -use nexus_test_utils_macros::nexus_test; -use nexus_types::external_api::params::{ - AddressLotBlockCreate, AddressLotCreate, LoopbackAddressCreate, -}; -use nexus_types::external_api::views::Rack; -use omicron_common::api::external::{ - AddressLotKind, IdentityMetadataCreateParams, LoopbackAddress, NameOrId, -}; - -type ControlPlaneTestContext = - nexus_test_utils::ControlPlaneTestContext; - -#[nexus_test] -async fn test_loopback_address_basic_crud(ctx: &ControlPlaneTestContext) { - let client = &ctx.external_client; - - // Create a lot - let lot_params = AddressLotCreate { - identity: IdentityMetadataCreateParams { - name: "parkinglot".parse().unwrap(), - description: "an address parking lot".into(), - }, - kind: AddressLotKind::Infra, - blocks: vec![AddressLotBlockCreate { - first_address: "203.0.113.10".parse().unwrap(), - last_address: "203.0.113.100".parse().unwrap(), - }], - }; - - NexusRequest::objects_post( - client, - "/v1/system/networking/address-lot", - &lot_params, - ) - .authn_as(AuthnMode::PrivilegedUser) - .execute() - .await - .unwrap(); - - // Verify there are no loopback addresses - let addrs = NexusRequest::iter_collection_authn::( - client, - "/v1/system/networking/loopback-address", - "", - None, - ) - .await - .expect("Failed to list loopback addresses") - .all_items; - - assert_eq!(addrs.len(), 0, "Expected no loopback addresses"); - - let racks_url = "/v1/system/hardware/racks"; - let racks: Vec = - NexusRequest::iter_collection_authn(client, racks_url, "", None) - .await - .expect("failed to list racks") - .all_items; - - let rack_id = racks[0].identity.id; - - // Create a loopback address - let params = LoopbackAddressCreate { - address_lot: NameOrId::Name("parkinglot".parse().unwrap()), - rack_id: rack_id, - switch_location: "switch0".parse().unwrap(), - address: "203.0.113.99".parse().unwrap(), - mask: 24, - anycast: false, - }; - let addr: LoopbackAddress = NexusRequest::objects_post( - client, - "/v1/system/networking/loopback-address", - ¶ms, - ) - .authn_as(AuthnMode::PrivilegedUser) - .execute() - .await - .unwrap() - .parsed_body() - .unwrap(); - - assert_eq!(addr.address.ip(), params.address); - assert_eq!(addr.address.prefix(), params.mask); - assert_eq!(addr.rack_id, params.rack_id); - assert_eq!(addr.switch_location, params.switch_location.to_string()); - - // Verify conflict error on recreate - let error: HttpErrorResponseBody = NexusRequest::new( - RequestBuilder::new( - client, - Method::POST, - "/v1/system/networking/loopback-address", - ) - .body(Some(¶ms)) - .expect_status(Some(StatusCode::BAD_REQUEST)), - ) - .authn_as(AuthnMode::PrivilegedUser) - .execute() - .await - .unwrap() - .parsed_body() - .unwrap(); - assert_eq!(error.message, "address unavailable".to_string()); - - // Verify there loopback addresses - let addrs = NexusRequest::iter_collection_authn::( - client, - "/v1/system/networking/loopback-address", - "", - None, - ) - .await - .expect("Failed to list loopback addresses") - .all_items; - - assert_eq!(addrs.len(), 1, "Expected 1 loopback address"); - assert_eq!(addrs[0].address.ip(), params.address); - assert_eq!(addrs[0].address.prefix(), params.mask); - - // Verify error when deleting lot while in use - let _error: HttpErrorResponseBody = NexusRequest::new( - RequestBuilder::new( - client, - Method::DELETE, - "/v1/system/networking/address-lot/parkinglot", - ) - .expect_status(Some(StatusCode::BAD_REQUEST)), - ) - .authn_as(AuthnMode::PrivilegedUser) - .execute() - .await - .unwrap() - .parsed_body() - .unwrap(); - - // Delete loopback address - NexusRequest::object_delete( - client, - &format!( - "{}/{}/{}/{}/{}", - "/v1/system/networking/loopback-address", - rack_id, - "switch0", - "203.0.113.99", - 24, - ), - ) - .authn_as(AuthnMode::PrivilegedUser) - .execute() - .await - .unwrap(); - - // Verify there are no addresses - let addrs = NexusRequest::iter_collection_authn::( - client, - "/v1/system/networking/loopback-address", - "", - None, - ) - .await - .expect("Failed to list loopback addresses") - .all_items; - - assert_eq!(addrs.len(), 0, "Expected no loopback addresses after delete"); - - // Verify we can now delete the address lot. - NexusRequest::object_delete( - client, - "/v1/system/networking/address-lot/parkinglot", - ) - .authn_as(AuthnMode::PrivilegedUser) - .execute() - .await - .unwrap(); - - // Create again after delete should work without conflict. - NexusRequest::objects_post( - client, - "/v1/system/networking/address-lot", - &lot_params, - ) - .authn_as(AuthnMode::PrivilegedUser) - .execute() - .await - .unwrap(); - - let _addr: LoopbackAddress = NexusRequest::objects_post( - client, - "/v1/system/networking/loopback-address", - ¶ms, - ) - .authn_as(AuthnMode::PrivilegedUser) - .execute() - .await - .unwrap() - .parsed_body() - .unwrap(); -} - -#[nexus_test] -async fn test_anycast_loopback_address_basic_crud( - ctx: &ControlPlaneTestContext, -) { - let client = &ctx.external_client; - - // Create a lot - let lot_params = AddressLotCreate { - identity: IdentityMetadataCreateParams { - name: "parkinglot".parse().unwrap(), - description: "an address parking lot".into(), - }, - kind: AddressLotKind::Infra, - blocks: vec![AddressLotBlockCreate { - first_address: "203.0.113.10".parse().unwrap(), - last_address: "203.0.113.100".parse().unwrap(), - }], - }; - - NexusRequest::objects_post( - client, - "/v1/system/networking/address-lot", - &lot_params, - ) - .authn_as(AuthnMode::PrivilegedUser) - .execute() - .await - .unwrap(); - - // Verify there are no loopback addresses - let addrs = NexusRequest::iter_collection_authn::( - client, - "/v1/system/networking/loopback-address", - "", - None, - ) - .await - .expect("Failed to list loopback addresses") - .all_items; - - assert_eq!(addrs.len(), 0, "Expected no loopback addresses"); - - let racks_url = "/v1/system/hardware/racks"; - let racks: Vec = - NexusRequest::iter_collection_authn(client, racks_url, "", None) - .await - .expect("failed to list racks") - .all_items; - - let rack_id = racks[0].identity.id; - - // Create an anycast loopback address - let params = LoopbackAddressCreate { - address_lot: NameOrId::Name("parkinglot".parse().unwrap()), - rack_id, - switch_location: "switch0".parse().unwrap(), - address: "203.0.113.99".parse().unwrap(), - mask: 24, - anycast: true, - }; - let addr: LoopbackAddress = NexusRequest::objects_post( - client, - "/v1/system/networking/loopback-address", - ¶ms, - ) - .authn_as(AuthnMode::PrivilegedUser) - .execute() - .await - .unwrap() - .parsed_body() - .unwrap(); - - assert_eq!(addr.address.ip(), params.address); - assert_eq!(addr.address.prefix(), params.mask); - assert_eq!(addr.rack_id, params.rack_id); - assert_eq!(addr.switch_location, params.switch_location.to_string()); - - // Create a second anycast record for another switch - let params = LoopbackAddressCreate { - address_lot: NameOrId::Name("parkinglot".parse().unwrap()), - rack_id, - switch_location: "switch1".parse().unwrap(), - address: "203.0.113.99".parse().unwrap(), - mask: 24, - anycast: true, - }; - let addr: LoopbackAddress = NexusRequest::objects_post( - client, - "/v1/system/networking/loopback-address", - ¶ms, - ) - .authn_as(AuthnMode::PrivilegedUser) - .execute() - .await - .unwrap() - .parsed_body() - .unwrap(); - - assert_eq!(addr.address.ip(), params.address); - assert_eq!(addr.address.prefix(), params.mask); - assert_eq!(addr.rack_id, params.rack_id); - assert_eq!(addr.switch_location, params.switch_location.to_string()); - - // Verify there are two anycast loopback addresses - let addrs = NexusRequest::iter_collection_authn::( - client, - "/v1/system/networking/loopback-address", - "", - None, - ) - .await - .expect("Failed to list loopback addresses") - .all_items; - - assert_eq!(addrs.len(), 2, "Expected 2 loopback addresses"); - assert_eq!(addrs[0].address.ip(), params.address); - assert_eq!(addrs[0].address.prefix(), params.mask); - - // Delete anycast loopback addresses - NexusRequest::object_delete( - client, - &format!( - "{}/{}/{}/{}/{}", - "/v1/system/networking/loopback-address", - rack_id, - "switch0", - "203.0.113.99", - 24, - ), - ) - .authn_as(AuthnMode::PrivilegedUser) - .execute() - .await - .unwrap(); - - // Verify there is only one anycast loopback address - let addrs = NexusRequest::iter_collection_authn::( - client, - "/v1/system/networking/loopback-address", - "", - None, - ) - .await - .expect("Failed to list loopback addresses") - .all_items; - - assert_eq!(addrs.len(), 1, "Expected 1 loopback address"); - assert_eq!(addrs[0].address.ip(), params.address); - assert_eq!(addrs[0].address.prefix(), params.mask); - - NexusRequest::object_delete( - client, - &format!( - "{}/{}/{}/{}/{}", - "/v1/system/networking/loopback-address", - rack_id, - "switch1", - "203.0.113.99", - 24, - ), - ) - .authn_as(AuthnMode::PrivilegedUser) - .execute() - .await - .unwrap(); - - // Verify there are no addresses - let addrs = NexusRequest::iter_collection_authn::( - client, - "/v1/system/networking/loopback-address", - "", - None, - ) - .await - .expect("Failed to list loopback addresses") - .all_items; - - assert_eq!(addrs.len(), 0, "Expected no loopback addresses after delete"); -} diff --git a/nexus/tests/integration_tests/mod.rs b/nexus/tests/integration_tests/mod.rs index 84b867252f..804694c0b2 100644 --- a/nexus/tests/integration_tests/mod.rs +++ b/nexus/tests/integration_tests/mod.rs @@ -18,7 +18,6 @@ mod images; mod initialization; mod instances; mod ip_pools; -mod loopback_address; mod metrics; mod oximeter; mod pantry; diff --git a/oximeter/collector/tests/output/self-stat-schema.json b/oximeter/collector/tests/output/self-stat-schema.json index 0caf2d27e9..8017d61880 100644 --- a/oximeter/collector/tests/output/self-stat-schema.json +++ b/oximeter/collector/tests/output/self-stat-schema.json @@ -39,7 +39,7 @@ } ], "datum_type": "cumulative_u64", - "created": "2023-12-04T17:49:47.797495948Z" + "created": "2024-02-05T23:03:00.842290108Z" }, "oximeter_collector:failed_collections": { "timeseries_name": "oximeter_collector:failed_collections", @@ -86,6 +86,6 @@ } ], "datum_type": "cumulative_u64", - "created": "2023-12-04T17:49:47.799970009Z" + "created": "2024-02-05T23:03:00.842943988Z" } } \ No newline at end of file diff --git a/package-manifest.toml b/package-manifest.toml index ef4e6dfb06..0b54e35d4f 100644 --- a/package-manifest.toml +++ b/package-manifest.toml @@ -46,7 +46,7 @@ source.paths = [ # - gimlet: Assumes control of real hardware, does not emulate hardware # - gimlet-standalone: A real gimlet but running in isolation from the rack as a whole # - non-gimlet: Used for testing on development illumos machines - { from = "smf/sled-agent/{{machine}}", to = "pkg" } + { from = "smf/sled-agent/{{machine}}", to = "pkg" }, ] output.type = "tarball" @@ -60,17 +60,14 @@ only_for_targets.image = "trampoline" source.type = "local" source.rust.binary_names = ["installinator"] source.rust.release = true -source.paths = [ { from = "smf/installinator", to = "pkg" } ] +source.paths = [{ from = "smf/installinator", to = "pkg" }] output.type = "tarball" # overlay is a set of packages that are overlaid onto all non-global zones. [package.overlay] service_name = "overlay" source.type = "composite" -source.packages = [ - "logadm.tar.gz", - "profile.tar.gz", -] +source.packages = ["logadm.tar.gz", "profile.tar.gz"] output.type = "zone" # The logadm package is an overlay for all non-global zones to reconfigure log @@ -105,7 +102,7 @@ source.packages = [ "omicron-nexus.tar.gz", "zone-network-setup.tar.gz", "zone-network-install.tar.gz", - "opte-interface-setup.tar.gz" + "opte-interface-setup.tar.gz", ] output.type = "zone" @@ -133,7 +130,11 @@ output.intermediate_only = true service_name = "oximeter" only_for_targets.image = "standard" source.type = "composite" -source.packages = [ "oximeter-collector.tar.gz", "zone-network-setup.tar.gz", "zone-network-install.tar.gz" ] +source.packages = [ + "oximeter-collector.tar.gz", + "zone-network-setup.tar.gz", + "zone-network-install.tar.gz", +] output.type = "zone" [package.oximeter-collector] @@ -143,8 +144,8 @@ source.type = "local" source.rust.binary_names = ["oximeter", "clickhouse-schema-updater"] source.rust.release = true source.paths = [ - { from = "smf/oximeter", to = "/var/svc/manifest/site/oximeter" }, - { from = "oximeter/db/schema", to = "/opt/oxide/oximeter/schema" }, + { from = "smf/oximeter", to = "/var/svc/manifest/site/oximeter" }, + { from = "oximeter/db/schema", to = "/opt/oxide/oximeter/schema" }, ] output.type = "zone" output.intermediate_only = true @@ -157,7 +158,7 @@ source.packages = [ "clickhouse_svc.tar.gz", "internal-dns-cli.tar.gz", "zone-network-setup.tar.gz", - "zone-network-install.tar.gz" + "zone-network-install.tar.gz", ] output.type = "zone" @@ -169,7 +170,7 @@ source.paths = [ { from = "out/clickhouse", to = "/opt/oxide/clickhouse" }, { from = "smf/clickhouse/manifest.xml", to = "/var/svc/manifest/site/clickhouse/manifest.xml" }, { from = "smf/clickhouse/method_script.sh", to = "/opt/oxide/lib/svc/manifest/clickhouse.sh" }, - { from = "smf/clickhouse/config_replica.xml", to = "/opt/oxide/clickhouse/config.d/config_replica.xml" } + { from = "smf/clickhouse/config_replica.xml", to = "/opt/oxide/clickhouse/config.d/config_replica.xml" }, ] output.type = "zone" output.intermediate_only = true @@ -183,7 +184,7 @@ source.packages = [ "clickhouse_keeper_svc.tar.gz", "internal-dns-cli.tar.gz", "zone-network-setup.tar.gz", - "zone-network-install.tar.gz" + "zone-network-install.tar.gz", ] output.type = "zone" @@ -195,7 +196,7 @@ source.paths = [ { from = "out/clickhouse", to = "/opt/oxide/clickhouse_keeper" }, { from = "smf/clickhouse_keeper/manifest.xml", to = "/var/svc/manifest/site/clickhouse_keeper/manifest.xml" }, { from = "smf/clickhouse_keeper/method_script.sh", to = "/opt/oxide/lib/svc/manifest/clickhouse_keeper.sh" }, - { from = "smf/clickhouse_keeper/keeper_config.xml", to = "/opt/oxide/clickhouse_keeper/keeper_config.xml" } + { from = "smf/clickhouse_keeper/keeper_config.xml", to = "/opt/oxide/clickhouse_keeper/keeper_config.xml" }, ] output.type = "zone" output.intermediate_only = true @@ -205,11 +206,11 @@ setup_hint = "Run `./tools/ci_download_clickhouse` to download the necessary bin service_name = "cockroachdb" only_for_targets.image = "standard" source.type = "composite" -source.packages = [ +source.packages = [ "cockroachdb-service.tar.gz", "internal-dns-cli.tar.gz", "zone-network-setup.tar.gz", - "zone-network-install.tar.gz" + "zone-network-install.tar.gz", ] output.type = "zone" @@ -245,7 +246,7 @@ source.packages = [ "dns-server.tar.gz", "internal-dns-customizations.tar.gz", "zone-network-setup.tar.gz", - "zone-network-install.tar.gz" + "zone-network-install.tar.gz", ] output.type = "zone" @@ -253,12 +254,12 @@ output.type = "zone" service_name = "external_dns" only_for_targets.image = "standard" source.type = "composite" -source.packages = [ +source.packages = [ "dns-server.tar.gz", "external-dns-customizations.tar.gz", "zone-network-setup.tar.gz", "zone-network-install.tar.gz", - "opte-interface-setup.tar.gz" + "opte-interface-setup.tar.gz", ] output.type = "zone" @@ -276,7 +277,9 @@ output.intermediate_only = true service_name = "internal-dns-customizations" only_for_targets.image = "standard" source.type = "local" -source.paths = [ { from = "smf/internal-dns", to = "/var/svc/manifest/site/internal_dns" } ] +source.paths = [ + { from = "smf/internal-dns", to = "/var/svc/manifest/site/internal_dns" }, +] output.intermediate_only = true output.type = "zone" @@ -284,7 +287,9 @@ output.type = "zone" service_name = "external-dns-customizations" only_for_targets.image = "standard" source.type = "local" -source.paths = [ { from = "smf/external-dns", to = "/var/svc/manifest/site/external_dns" } ] +source.paths = [ + { from = "smf/external-dns", to = "/var/svc/manifest/site/external_dns" }, +] output.intermediate_only = true output.type = "zone" @@ -296,7 +301,7 @@ source.packages = [ "ntp-svc.tar.gz", "opte-interface-setup.tar.gz", "zone-network-setup.tar.gz", - "zone-network-install.tar.gz" + "zone-network-install.tar.gz", ] output.type = "zone" @@ -305,9 +310,9 @@ service_name = "ntp-svc" only_for_targets.image = "standard" source.type = "local" source.paths = [ - { from = "smf/ntp/manifest", to = "/var/svc/manifest/site/ntp" }, - { from = "smf/ntp/method", to = "/var/svc/method" }, - { from = "smf/ntp/etc", to = "/etc" }, + { from = "smf/ntp/manifest", to = "/var/svc/manifest/site/ntp" }, + { from = "smf/ntp/method", to = "/var/svc/method" }, + { from = "smf/ntp/etc", to = "/etc" }, ] output.intermediate_only = true output.type = "zone" @@ -318,7 +323,7 @@ only_for_targets.image = "standard" source.type = "local" source.rust.binary_names = ["mgs"] source.rust.release = true -source.paths = [ ] +source.paths = [] output.type = "zone" output.intermediate_only = true @@ -327,7 +332,7 @@ service_name = "mgs" only_for_targets.image = "standard" only_for_targets.switch = "asic" source.type = "local" -source.paths = [ { from = "smf/mgs", to = "/var/svc/manifest/site/mgs" } ] +source.paths = [{ from = "smf/mgs", to = "/var/svc/manifest/site/mgs" }] output.intermediate_only = true output.type = "zone" @@ -336,9 +341,9 @@ service_name = "mgs" only_for_targets.image = "standard" only_for_targets.switch = "stub" source.type = "local" -source.paths = [ - { from = "smf/mgs/manifest.xml", to = "/var/svc/manifest/site/mgs/manifest.xml" }, - { from = "smf/mgs-sim/config.toml", to = "/var/svc/manifest/site/mgs/config.toml" } +source.paths = [ + { from = "smf/mgs/manifest.xml", to = "/var/svc/manifest/site/mgs/manifest.xml" }, + { from = "smf/mgs-sim/config.toml", to = "/var/svc/manifest/site/mgs/config.toml" }, ] output.intermediate_only = true output.type = "zone" @@ -348,9 +353,9 @@ service_name = "mgs" only_for_targets.image = "standard" only_for_targets.switch = "softnpu" source.type = "local" -source.paths = [ - { from = "smf/mgs/manifest.xml", to = "/var/svc/manifest/site/mgs/manifest.xml" }, - { from = "smf/mgs-sim/config.toml", to = "/var/svc/manifest/site/mgs/config.toml" } +source.paths = [ + { from = "smf/mgs/manifest.xml", to = "/var/svc/manifest/site/mgs/manifest.xml" }, + { from = "smf/mgs-sim/config.toml", to = "/var/svc/manifest/site/mgs/config.toml" }, ] output.intermediate_only = true output.type = "zone" @@ -360,7 +365,10 @@ service_name = "mgs" only_for_targets.image = "standard" only_for_targets.switch = "asic" source.type = "composite" -source.packages = [ "omicron-gateway.tar.gz", "omicron-gateway-asic-customizations.tar.gz" ] +source.packages = [ + "omicron-gateway.tar.gz", + "omicron-gateway-asic-customizations.tar.gz", +] output.type = "zone" [package.omicron-gateway-stub] @@ -368,7 +376,10 @@ service_name = "mgs" only_for_targets.image = "standard" only_for_targets.switch = "stub" source.type = "composite" -source.packages = [ "omicron-gateway.tar.gz", "omicron-gateway-stub-customizations.tar.gz" ] +source.packages = [ + "omicron-gateway.tar.gz", + "omicron-gateway-stub-customizations.tar.gz", +] output.type = "zone" [package.omicron-gateway-softnpu] @@ -376,7 +387,10 @@ service_name = "mgs" only_for_targets.image = "standard" only_for_targets.switch = "softnpu" source.type = "composite" -source.packages = [ "omicron-gateway.tar.gz", "omicron-gateway-softnpu-customizations.tar.gz" ] +source.packages = [ + "omicron-gateway.tar.gz", + "omicron-gateway-softnpu-customizations.tar.gz", +] output.type = "zone" [package.wicketd] @@ -385,7 +399,7 @@ only_for_targets.image = "standard" source.type = "local" source.rust.binary_names = ["wicketd"] source.rust.release = true -source.paths = [ { from = "smf/wicketd", to = "/var/svc/manifest/site/wicketd" } ] +source.paths = [{ from = "smf/wicketd", to = "/var/svc/manifest/site/wicketd" }] output.type = "zone" output.intermediate_only = true @@ -395,9 +409,7 @@ only_for_targets.image = "standard" source.type = "local" source.rust.binary_names = ["wicket"] source.rust.release = true -source.paths = [ - { from = "wicket/zone-etc", to = "/etc" }, -] +source.paths = [{ from = "wicket/zone-etc", to = "/etc" }] output.type = "zone" output.intermediate_only = true @@ -419,7 +431,7 @@ only_for_targets.image = "standard" source.type = "local" source.rust.binary_names = ["sp-sim"] source.rust.release = true -source.paths = [ { from = "smf/sp-sim", to = "/var/svc/manifest/site/sp-sim" } ] +source.paths = [{ from = "smf/sp-sim", to = "/var/svc/manifest/site/sp-sim" }] output.type = "zone" output.intermediate_only = true @@ -428,7 +440,7 @@ service_name = "sp-sim-customizations" only_for_targets.image = "standard" only_for_targets.switch = "stub" source.type = "composite" -source.packages = [ "sp-sim.tar.gz" ] +source.packages = ["sp-sim.tar.gz"] output.type = "zone" output.intermediate_only = true @@ -437,7 +449,7 @@ service_name = "sp-sim-customizations" only_for_targets.image = "standard" only_for_targets.switch = "softnpu" source.type = "composite" -source.packages = [ "sp-sim.tar.gz" ] +source.packages = ["sp-sim.tar.gz"] output.type = "zone" output.intermediate_only = true @@ -445,7 +457,11 @@ output.intermediate_only = true service_name = "crucible" only_for_targets.image = "standard" source.type = "composite" -source.packages = [ "crucible.tar.gz", "zone-network-setup.tar.gz", "zone-network-install.tar.gz" ] +source.packages = [ + "crucible.tar.gz", + "zone-network-setup.tar.gz", + "zone-network-install.tar.gz", +] output.type = "zone" @@ -453,7 +469,11 @@ output.type = "zone" service_name = "crucible_pantry" only_for_targets.image = "standard" source.type = "composite" -source.packages = [ "crucible-pantry.tar.gz", "zone-network-setup.tar.gz", "zone-network-install.tar.gz" ] +source.packages = [ + "crucible-pantry.tar.gz", + "zone-network-setup.tar.gz", + "zone-network-install.tar.gz", +] output.type = "zone" # Packages not built within Omicron, but which must be imported. @@ -575,8 +595,8 @@ only_for_targets.image = "standard" # 2. Copy dendrite.tar.gz from dendrite/out to omicron/out source.type = "prebuilt" source.repo = "dendrite" -source.commit = "3618dd6017b363c5d34399273453cf50b9c9a43e" -source.sha256 = "eb98985871f321411f7875ef7751dba85ae0dd3034877b63ccb78cedcb96e6e7" +source.commit = "41ddeab9d43d90a51e6fc1c236dc9982fc76f922" +source.sha256 = "8ebb889a555ce59cb0373a1ec9595536e015c951f6fc4d89308b4e3f09c83b20" output.type = "zone" output.intermediate_only = true @@ -600,8 +620,8 @@ only_for_targets.image = "standard" # 2. Copy the output zone image from dendrite/out to omicron/out source.type = "prebuilt" source.repo = "dendrite" -source.commit = "3618dd6017b363c5d34399273453cf50b9c9a43e" -source.sha256 = "cc0429f0d9ce6df94e834cea89cabbdf4d1fbfe623369dd3eb84c5b2677414be" +source.commit = "41ddeab9d43d90a51e6fc1c236dc9982fc76f922" +source.sha256 = "3e8aa5483d22316e1fd629c77277190dafa875938a9ab3900e92a210c5e91e91" output.type = "zone" output.intermediate_only = true @@ -618,8 +638,8 @@ only_for_targets.image = "standard" # 2. Copy dendrite.tar.gz from dendrite/out to omicron/out/dendrite-softnpu.tar.gz source.type = "prebuilt" source.repo = "dendrite" -source.commit = "3618dd6017b363c5d34399273453cf50b9c9a43e" -source.sha256 = "fa25585fb3aa1a888b76133af3060b859cbea8e53287bb1cc64e70889db37679" +source.commit = "41ddeab9d43d90a51e6fc1c236dc9982fc76f922" +source.sha256 = "5e5f2831f3c46253828ea237f701f1fa174061ab0bf73c200d31d09e94890ae7" output.type = "zone" output.intermediate_only = true @@ -629,7 +649,7 @@ only_for_targets.switch = "asic" only_for_targets.image = "standard" source.type = "local" source.paths = [ - { from = "out/transceiver-control/root/opt/oxide/bin/xcvradm", to = "/opt/oxide/bin/xcvradm" } + { from = "out/transceiver-control/root/opt/oxide/bin/xcvradm", to = "/opt/oxide/bin/xcvradm" }, ] output.type = "zone" output.intermediate_only = true @@ -664,7 +684,7 @@ source.packages = [ "mgd.tar.gz", "switch_zone_setup.tar.gz", "xcvradm.tar.gz", - "omicron-omdb.tar.gz" + "omicron-omdb.tar.gz", ] output.type = "zone" @@ -688,7 +708,7 @@ source.packages = [ "mgd.tar.gz", "switch_zone_setup.tar.gz", "sp-sim-stub.tar.gz", - "omicron-omdb.tar.gz" + "omicron-omdb.tar.gz", ] output.type = "zone" @@ -712,7 +732,7 @@ source.packages = [ "mgd.tar.gz", "switch_zone_setup.tar.gz", "sp-sim-softnpu.tar.gz", - "omicron-omdb.tar.gz" + "omicron-omdb.tar.gz", ] output.type = "zone" @@ -721,7 +741,7 @@ service_name = "zone-network-setup" only_for_targets.image = "standard" source.type = "local" source.paths = [ - { from = "smf/zone-network-setup/manifest.xml", to = "/var/svc/manifest/site/zone-network-setup/manifest.xml" }, + { from = "smf/zone-network-setup/manifest.xml", to = "/var/svc/manifest/site/zone-network-setup/manifest.xml" }, ] output.type = "zone" output.intermediate_only = true @@ -740,7 +760,7 @@ service_name = "opte-interface-setup" only_for_targets.image = "standard" source.type = "local" source.paths = [ - { from = "smf/opte-interface-setup/manifest.xml", to = "/var/svc/manifest/site/opte-interface-setup/manifest.xml" }, + { from = "smf/opte-interface-setup/manifest.xml", to = "/var/svc/manifest/site/opte-interface-setup/manifest.xml" }, ] output.type = "zone" output.intermediate_only = true @@ -765,7 +785,5 @@ output.type = "tarball" [package.probe] service_name = "probe" source.type = "composite" -source.packages = [ - "thundermuffin.tar.gz", -] +source.packages = ["thundermuffin.tar.gz"] output.type = "zone" diff --git a/schema/crdb/45.0.0/up01.sql b/schema/crdb/45.0.0/up01.sql new file mode 100644 index 0000000000..429350dd29 --- /dev/null +++ b/schema/crdb/45.0.0/up01.sql @@ -0,0 +1 @@ +CREATE INDEX IF NOT EXISTS rack_initialized ON omicron.public.rack (initialized); diff --git a/schema/crdb/45.0.0/up02.sql b/schema/crdb/45.0.0/up02.sql new file mode 100644 index 0000000000..b1f4bdadff --- /dev/null +++ b/schema/crdb/45.0.0/up02.sql @@ -0,0 +1,8 @@ +CREATE TABLE IF NOT EXISTS omicron.public.bootstore_config ( + key TEXT NOT NULL, + generation INT8 NOT NULL, + PRIMARY KEY (key, generation), + data JSONB NOT NULL, + time_created TIMESTAMPTZ NOT NULL, + time_deleted TIMESTAMPTZ +); diff --git a/schema/crdb/45.0.0/up03.sql b/schema/crdb/45.0.0/up03.sql new file mode 100644 index 0000000000..aa20ae144d --- /dev/null +++ b/schema/crdb/45.0.0/up03.sql @@ -0,0 +1 @@ +CREATE INDEX IF NOT EXISTS address_lot_names ON omicron.public.address_lot(name); diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index c7a4db09f1..b56bec1ba9 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -243,7 +243,7 @@ CREATE TABLE IF NOT EXISTS omicron.public.sled_underlay_subnet_allocation ( subnet_octet INT2 NOT NULL UNIQUE CHECK (subnet_octet BETWEEN 33 AND 255) ); --- Add an index which allows pagination by {rack_id, sled_id} pairs. +-- Add an index which allows pagination by {rack_id, sled_id} pairs. CREATE UNIQUE INDEX IF NOT EXISTS lookup_subnet_allocation_by_rack_and_sled ON omicron.public.sled_underlay_subnet_allocation ( rack_id, sled_id @@ -889,7 +889,7 @@ CREATE TABLE IF NOT EXISTS omicron.public.silo_quotas ( * A view of the amount of provisioned and allocated (set by quotas) resources * on a given silo. */ -CREATE VIEW IF NOT EXISTS omicron.public.silo_utilization +CREATE VIEW IF NOT EXISTS omicron.public.silo_utilization AS SELECT c.id AS silo_id, s.name AS silo_name, @@ -902,7 +902,7 @@ AS SELECT s.discoverable as silo_discoverable FROM omicron.public.virtual_provisioning_collection AS c - RIGHT JOIN omicron.public.silo_quotas AS q + RIGHT JOIN omicron.public.silo_quotas AS q ON c.id = q.silo_id INNER JOIN omicron.public.silo AS s ON c.id = s.id @@ -1006,7 +1006,7 @@ CREATE UNIQUE INDEX IF NOT EXISTS lookup_instance_by_project ON omicron.public.i time_deleted IS NULL; /* - * A special view of an instance provided to operators for insights into what's running + * A special view of an instance provided to operators for insights into what's running * on a sled. * * This view requires the VMM table, which doesn't exist yet, so create a @@ -1155,9 +1155,9 @@ SELECT digest, block_size, size_bytes -FROM +FROM omicron.public.image -WHERE +WHERE project_id IS NOT NULL; CREATE VIEW IF NOT EXISTS omicron.public.silo_image AS @@ -1176,9 +1176,9 @@ SELECT digest, block_size, size_bytes -FROM +FROM omicron.public.image -WHERE +WHERE project_id IS NULL; /* Index for silo images */ @@ -3658,6 +3658,21 @@ CREATE TABLE IF NOT EXISTS omicron.public.downstairs_client_stopped_notification PRIMARY KEY (time, upstairs_id, downstairs_id, reason) ); +CREATE INDEX IF NOT EXISTS rack_initialized ON omicron.public.rack (initialized); + +-- table for tracking bootstore configuration changes over time +-- this makes reconciliation easier and also gives us a visible history of changes +CREATE TABLE IF NOT EXISTS omicron.public.bootstore_config ( + key TEXT NOT NULL, + generation INT8 NOT NULL, + PRIMARY KEY (key, generation), + data JSONB NOT NULL, + time_created TIMESTAMPTZ NOT NULL, + time_deleted TIMESTAMPTZ +); + +CREATE INDEX IF NOT EXISTS address_lot_names ON omicron.public.address_lot(name); + /* * Metadata for the schema itself. This version number isn't great, as there's * nothing to ensure it gets bumped when it should be, but it's a start. @@ -3692,7 +3707,7 @@ INSERT INTO omicron.public.db_metadata ( version, target_version ) VALUES - ( TRUE, NOW(), NOW(), '44.0.0', NULL) + ( TRUE, NOW(), NOW(), '45.0.0', NULL) ON CONFLICT DO NOTHING; COMMIT; diff --git a/sled-agent/src/bootstrap/early_networking.rs b/sled-agent/src/bootstrap/early_networking.rs index acad2b8d3c..c70758f775 100644 --- a/sled-agent/src/bootstrap/early_networking.rs +++ b/sled-agent/src/bootstrap/early_networking.rs @@ -565,11 +565,7 @@ impl<'a> EarlyNetworkSetup<'a> { port_config: &PortConfigV1, ) -> Result<(PortSettings, PortId), EarlyNetworkSetupError> { info!(self.log, "Building Port Configuration"); - let mut dpd_port_settings = PortSettings { - links: HashMap::new(), - v4_routes: HashMap::new(), - v6_routes: HashMap::new(), - }; + let mut dpd_port_settings = PortSettings { links: HashMap::new() }; let link_id = LinkId(0); let mut addrs = Vec::new(); diff --git a/smf/nexus/multi-sled/config-partial.toml b/smf/nexus/multi-sled/config-partial.toml index 40ed41bfda..553cdb0aef 100644 --- a/smf/nexus/multi-sled/config-partial.toml +++ b/smf/nexus/multi-sled/config-partial.toml @@ -51,6 +51,7 @@ phantom_disks.period_secs = 30 blueprints.period_secs_load = 10 blueprints.period_secs_execute = 60 sync_service_zone_nat.period_secs = 30 +switch_port_settings_manager.period_secs = 30 region_replacement.period_secs = 30 [default_region_allocation_strategy] diff --git a/smf/nexus/single-sled/config-partial.toml b/smf/nexus/single-sled/config-partial.toml index 2e259aa42f..9f7cb959d3 100644 --- a/smf/nexus/single-sled/config-partial.toml +++ b/smf/nexus/single-sled/config-partial.toml @@ -51,6 +51,7 @@ phantom_disks.period_secs = 30 blueprints.period_secs_load = 10 blueprints.period_secs_execute = 60 sync_service_zone_nat.period_secs = 30 +switch_port_settings_manager.period_secs = 30 region_replacement.period_secs = 30 [default_region_allocation_strategy] diff --git a/tools/dendrite_openapi_version b/tools/dendrite_openapi_version index 6895170e02..bbc2110a0a 100644 --- a/tools/dendrite_openapi_version +++ b/tools/dendrite_openapi_version @@ -1,2 +1,2 @@ -COMMIT="3618dd6017b363c5d34399273453cf50b9c9a43e" -SHA2="aa670165e5b459fab4caba36ae4d382a09264ff5cf6a2dac0dae0a0db39a378e" +COMMIT="41ddeab9d43d90a51e6fc1c236dc9982fc76f922" +SHA2="50eff6d9f986b7b1af5970d11d8d01b812de37269731c6c691a244b3fdae82ae" diff --git a/tools/dendrite_stub_checksums b/tools/dendrite_stub_checksums index 74b379f359..86cf1a56ec 100644 --- a/tools/dendrite_stub_checksums +++ b/tools/dendrite_stub_checksums @@ -1,3 +1,3 @@ -CIDL_SHA256_ILLUMOS="eb98985871f321411f7875ef7751dba85ae0dd3034877b63ccb78cedcb96e6e7" -CIDL_SHA256_LINUX_DPD="cb9a1978d1fe3a3f2391757f80436d8cc87c0041161652ad2234e7cf83e9ae36" -CIDL_SHA256_LINUX_SWADM="b7e737be56a8a815a95624f0b5c42ce1e339b07feeae7b3d7b9b4bc17c204245" +CIDL_SHA256_ILLUMOS="8ebb889a555ce59cb0373a1ec9595536e015c951f6fc4d89308b4e3f09c83b20" +CIDL_SHA256_LINUX_DPD="f753444cae478cdedcde743a20a9df5965ed28cddab0f9632f3c263c66cd6397" +CIDL_SHA256_LINUX_SWADM="66eab497b955751d0704c3cd97ac5c1ed373aa656fc37ccba86ae9900b5ae96d" diff --git a/tools/virtual_hardware.sh b/tools/virtual_hardware.sh index 119b64ac70..ade7ac58b3 100755 --- a/tools/virtual_hardware.sh +++ b/tools/virtual_hardware.sh @@ -41,7 +41,7 @@ function ensure_zpools { fi success "ZFS vdev $VDEV_PATH exists" if [[ -z "$(zpool list -o name | grep $ZPOOL)" ]]; then - zpool create -f "$ZPOOL" "$VDEV_PATH" + zpool create -o ashift=12 -f "$ZPOOL" "$VDEV_PATH" fi success "ZFS zpool $ZPOOL exists" done diff --git a/wicketd/src/preflight_check/uplink.rs b/wicketd/src/preflight_check/uplink.rs index 31d479a5ed..984b14ff3f 100644 --- a/wicketd/src/preflight_check/uplink.rs +++ b/wicketd/src/preflight_check/uplink.rs @@ -11,7 +11,6 @@ use dpd_client::types::PortFec as DpdPortFec; use dpd_client::types::PortId; use dpd_client::types::PortSettings; use dpd_client::types::PortSpeed as DpdPortSpeed; -use dpd_client::types::RouteSettingsV4; use dpd_client::Client as DpdClient; use dpd_client::ClientState as DpdClientState; use either::Either; @@ -723,11 +722,7 @@ fn add_steps_for_single_local_uplink_preflight_check<'a>( .port_settings_apply( &port_id, Some(OMICRON_DPD_TAG), - &PortSettings { - links: HashMap::new(), - v4_routes: HashMap::new(), - v6_routes: HashMap::new(), - }, + &PortSettings { links: HashMap::new() }, ) .await .map_err(|err| { @@ -765,11 +760,7 @@ fn build_port_settings( OmicronPortSpeed::Speed400G => DpdPortSpeed::Speed400G, }; - let mut port_settings = PortSettings { - links: HashMap::new(), - v4_routes: HashMap::new(), - v6_routes: HashMap::new(), - }; + let mut port_settings = PortSettings { links: HashMap::new() }; let addrs = uplink.addresses.iter().map(|a| a.ip()).collect(); @@ -788,13 +779,10 @@ fn build_port_settings( ); for r in &uplink.routes { - if let (IpNetwork::V4(dst), IpAddr::V4(nexthop)) = + if let (IpNetwork::V4(_dst), IpAddr::V4(_nexthop)) = (r.destination, r.nexthop) { - port_settings.v4_routes.insert( - dst.to_string(), - vec![RouteSettingsV4 { link_id: link_id.0, nexthop }], - ); + // TODO: do we need to create config for mgd? } }