diff --git a/nexus/src/app/instance_network.rs b/nexus/src/app/instance_network.rs index 79d8cb478e1..e84e7961cd3 100644 --- a/nexus/src/app/instance_network.rs +++ b/nexus/src/app/instance_network.rs @@ -13,11 +13,9 @@ use nexus_db_model::Ipv4NatValues; use nexus_db_model::Vni as DbVni; use nexus_db_queries::authz; use nexus_db_queries::context::OpContext; -use nexus_db_queries::db; use nexus_db_queries::db::lookup::LookupPath; use nexus_db_queries::db::DataStore; use omicron_common::api::external::Error; -use omicron_common::api::internal::nexus; use omicron_common::api::internal::shared::NetworkInterface; use omicron_common::api::internal::shared::SwitchLocation; use oxnet::Ipv4Net; @@ -227,180 +225,179 @@ pub(crate) async fn boundary_switches( Ok(boundary_switches) } -/// Given old and new instance runtime states, determines the desired -/// networking configuration for a given instance and ensures it has been -/// propagated to all relevant sleds. -/// -/// # Arguments -/// -/// - `datastore`: the datastore to use for lookups and updates. -/// - `log`: the [`slog::Logger`] to log to. -/// - `resolver`: an internal DNS resolver to look up DPD service addresses. -/// - `opctx`: An operation context for this operation. -/// - `opctx_alloc`: An operational context list permissions for all sleds. When -/// called by methods on the [`Nexus`] type, this is the `OpContext` used for -/// instance allocation. In a background task, this may be the background -/// task's operational context; nothing stops you from passing the same -/// `OpContext` as both `opctx` and `opctx_alloc`. -/// - `authz_instance``: A resolved authorization context for the instance of -/// interest. -/// - `prev_instance_state``: The most-recently-recorded instance runtime -/// state for this instance. -/// - `new_instance_state`: The instance state that the caller of this routine -/// has observed and that should be used to set up this instance's -/// networking state. -/// -/// # Return value -/// -/// `Ok(())` if this routine completed all the operations it wanted to -/// complete, or an appropriate `Err` otherwise. -#[allow(clippy::too_many_arguments)] // Yeah, I know, I know, Clippy... -#[allow(dead_code)] // TODO(eliza): this probably needs to be deleted eventually -pub(crate) async fn ensure_updated_instance_network_config( - datastore: &DataStore, - log: &slog::Logger, - resolver: &internal_dns::resolver::Resolver, - opctx: &OpContext, - opctx_alloc: &OpContext, - authz_instance: &authz::Instance, - prev_instance_state: &db::model::InstanceRuntimeState, - new_instance_state: &nexus::InstanceRuntimeState, - v2p_notification_tx: tokio::sync::watch::Sender<()>, -) -> Result<(), Error> { - let instance_id = authz_instance.id(); - - // If this instance update is stale, do nothing, since the superseding - // update may have allowed the instance's location to change further. - if prev_instance_state.gen >= new_instance_state.gen.into() { - debug!(log, - "instance state generation already advanced, \ - won't touch network config"; - "instance_id" => %instance_id); - - return Ok(()); - } - - // If this update will retire the instance's active VMM, delete its - // networking state. It will be re-established the next time the - // instance starts. - if new_instance_state.propolis_id.is_none() { - info!(log, - "instance cleared its Propolis ID, cleaning network config"; - "instance_id" => %instance_id, - "propolis_id" => ?prev_instance_state.propolis_id); - - clear_instance_networking_state( - datastore, - log, - resolver, - opctx, - opctx_alloc, - authz_instance, - v2p_notification_tx, - ) - .await?; - return Ok(()); - } - - // If the instance still has a migration in progress, don't change - // any networking state until an update arrives that retires that - // migration. - // - // This is needed to avoid the following race: - // - // 1. Migration from S to T completes. - // 2. Migration source sends an update that changes the instance's - // active VMM but leaves the migration ID in place. - // 3. Meanwhile, migration target sends an update that changes the - // instance's active VMM and clears the migration ID. - // 4. The migration target's call updates networking state and commits - // the new instance record. - // 5. The instance migrates from T to T' and Nexus applies networking - // configuration reflecting that the instance is on T'. - // 6. The update in step 2 applies configuration saying the instance - // is on sled T. - if new_instance_state.migration_id.is_some() { - debug!(log, - "instance still has a migration in progress, won't touch \ - network config"; - "instance_id" => %instance_id, - "migration_id" => ?new_instance_state.migration_id); - - return Ok(()); - } - - let new_propolis_id = new_instance_state.propolis_id.unwrap(); - - // Updates that end live migration need to push OPTE V2P state even if - // the instance's active sled did not change (see below). - let migration_retired = prev_instance_state.migration_id.is_some() - && new_instance_state.migration_id.is_none(); - - if (prev_instance_state.propolis_id == new_instance_state.propolis_id) - && !migration_retired - { - debug!(log, "instance didn't move, won't touch network config"; - "instance_id" => %instance_id); - - return Ok(()); - } - - // Either the instance moved from one sled to another, or it attempted - // to migrate and failed. Ensure the correct networking configuration - // exists for its current home. - // - // TODO(#3107) This is necessary even if the instance didn't move, - // because registering a migration target on a sled creates OPTE ports - // for its VNICs, and that creates new V2P mappings on that sled that - // place the relevant virtual IPs on the local sled. Once OPTE stops - // creating these mappings, this path only needs to be taken if an - // instance has changed sleds. - let new_sled_id = match datastore - .vmm_fetch(&opctx, authz_instance, &new_propolis_id) - .await - { - Ok(vmm) => vmm.sled_id, - - // A VMM in the active position should never be destroyed. If the - // sled sending this message is the owner of the instance's last - // active VMM and is destroying it, it should also have retired that - // VMM. - Err(Error::ObjectNotFound { .. }) => { - error!(log, "instance's active vmm unexpectedly not found"; - "instance_id" => %instance_id, - "propolis_id" => %new_propolis_id); - - return Ok(()); - } - - Err(e) => return Err(e), - }; - - if let Err(e) = v2p_notification_tx.send(()) { - error!( - log, - "error notifying background task of v2p change"; - "error" => ?e - ) - }; - - let (.., sled) = - LookupPath::new(opctx, datastore).sled_id(new_sled_id).fetch().await?; - - instance_ensure_dpd_config( - datastore, - log, - resolver, - opctx, - opctx_alloc, - instance_id, - &sled.address(), - None, - ) - .await?; - - Ok(()) -} +// /// Given old and new instance runtime states, determines the desired +// /// networking configuration for a given instance and ensures it has been +// /// propagated to all relevant sleds. +// /// +// /// # Arguments +// /// +// /// - `datastore`: the datastore to use for lookups and updates. +// /// - `log`: the [`slog::Logger`] to log to. +// /// - `resolver`: an internal DNS resolver to look up DPD service addresses. +// /// - `opctx`: An operation context for this operation. +// /// - `opctx_alloc`: An operational context list permissions for all sleds. When +// /// called by methods on the [`Nexus`] type, this is the `OpContext` used for +// /// instance allocation. In a background task, this may be the background +// /// task's operational context; nothing stops you from passing the same +// /// `OpContext` as both `opctx` and `opctx_alloc`. +// /// - `authz_instance``: A resolved authorization context for the instance of +// /// interest. +// /// - `prev_instance_state``: The most-recently-recorded instance runtime +// /// state for this instance. +// /// - `new_instance_state`: The instance state that the caller of this routine +// /// has observed and that should be used to set up this instance's +// /// networking state. +// /// +// /// # Return value +// /// +// /// `Ok(())` if this routine completed all the operations it wanted to +// /// complete, or an appropriate `Err` otherwise. +// #[allow(clippy::too_many_arguments)] // Yeah, I know, I know, Clippy... +// pub(crate) async fn ensure_updated_instance_network_config( +// datastore: &DataStore, +// log: &slog::Logger, +// resolver: &internal_dns::resolver::Resolver, +// opctx: &OpContext, +// opctx_alloc: &OpContext, +// authz_instance: &authz::Instance, +// prev_instance_state: &db::model::InstanceRuntimeState, +// new_instance_state: &nexus::InstanceRuntimeState, +// v2p_notification_tx: tokio::sync::watch::Sender<()>, +// ) -> Result<(), Error> { +// let instance_id = authz_instance.id(); + +// // If this instance update is stale, do nothing, since the superseding +// // update may have allowed the instance's location to change further. +// if prev_instance_state.gen >= new_instance_state.gen.into() { +// debug!(log, +// "instance state generation already advanced, \ +// won't touch network config"; +// "instance_id" => %instance_id); + +// return Ok(()); +// } + +// // If this update will retire the instance's active VMM, delete its +// // networking state. It will be re-established the next time the +// // instance starts. +// if new_instance_state.propolis_id.is_none() { +// info!(log, +// "instance cleared its Propolis ID, cleaning network config"; +// "instance_id" => %instance_id, +// "propolis_id" => ?prev_instance_state.propolis_id); + +// clear_instance_networking_state( +// datastore, +// log, +// resolver, +// opctx, +// opctx_alloc, +// authz_instance, +// v2p_notification_tx, +// ) +// .await?; +// return Ok(()); +// } + +// // If the instance still has a migration in progress, don't change +// // any networking state until an update arrives that retires that +// // migration. +// // +// // This is needed to avoid the following race: +// // +// // 1. Migration from S to T completes. +// // 2. Migration source sends an update that changes the instance's +// // active VMM but leaves the migration ID in place. +// // 3. Meanwhile, migration target sends an update that changes the +// // instance's active VMM and clears the migration ID. +// // 4. The migration target's call updates networking state and commits +// // the new instance record. +// // 5. The instance migrates from T to T' and Nexus applies networking +// // configuration reflecting that the instance is on T'. +// // 6. The update in step 2 applies configuration saying the instance +// // is on sled T. +// if new_instance_state.migration_id.is_some() { +// debug!(log, +// "instance still has a migration in progress, won't touch \ +// network config"; +// "instance_id" => %instance_id, +// "migration_id" => ?new_instance_state.migration_id); + +// return Ok(()); +// } + +// let new_propolis_id = new_instance_state.propolis_id.unwrap(); + +// // Updates that end live migration need to push OPTE V2P state even if +// // the instance's active sled did not change (see below). +// let migration_retired = prev_instance_state.migration_id.is_some() +// && new_instance_state.migration_id.is_none(); + +// if (prev_instance_state.propolis_id == new_instance_state.propolis_id) +// && !migration_retired +// { +// debug!(log, "instance didn't move, won't touch network config"; +// "instance_id" => %instance_id); + +// return Ok(()); +// } + +// // Either the instance moved from one sled to another, or it attempted +// // to migrate and failed. Ensure the correct networking configuration +// // exists for its current home. +// // +// // TODO(#3107) This is necessary even if the instance didn't move, +// // because registering a migration target on a sled creates OPTE ports +// // for its VNICs, and that creates new V2P mappings on that sled that +// // place the relevant virtual IPs on the local sled. Once OPTE stops +// // creating these mappings, this path only needs to be taken if an +// // instance has changed sleds. +// let new_sled_id = match datastore +// .vmm_fetch(&opctx, authz_instance, &new_propolis_id) +// .await +// { +// Ok(vmm) => vmm.sled_id, + +// // A VMM in the active position should never be destroyed. If the +// // sled sending this message is the owner of the instance's last +// // active VMM and is destroying it, it should also have retired that +// // VMM. +// Err(Error::ObjectNotFound { .. }) => { +// error!(log, "instance's active vmm unexpectedly not found"; +// "instance_id" => %instance_id, +// "propolis_id" => %new_propolis_id); + +// return Ok(()); +// } + +// Err(e) => return Err(e), +// }; + +// if let Err(e) = v2p_notification_tx.send(()) { +// error!( +// log, +// "error notifying background task of v2p change"; +// "error" => ?e +// ) +// }; + +// let (.., sled) = +// LookupPath::new(opctx, datastore).sled_id(new_sled_id).fetch().await?; + +// instance_ensure_dpd_config( +// datastore, +// log, +// resolver, +// opctx, +// opctx_alloc, +// instance_id, +// &sled.address(), +// None, +// ) +// .await?; + +// Ok(()) +// } /// Ensures that the Dendrite configuration for the supplied instance is /// up-to-date. diff --git a/nexus/src/app/sagas/instance_update/destroyed.rs b/nexus/src/app/sagas/instance_update/destroyed.rs index d673be00409..7aadb1cd1a2 100644 --- a/nexus/src/app/sagas/instance_update/destroyed.rs +++ b/nexus/src/app/sagas/instance_update/destroyed.rs @@ -15,7 +15,6 @@ use nexus_db_model::Vmm; use nexus_db_queries::authn; use nexus_db_queries::authz; use nexus_db_queries::db::datastore::InstanceAndVmms; -use nexus_db_queries::db::identity::Resource; use omicron_common::api::external; use omicron_common::api::external::Error; use omicron_common::api::external::InstanceState; @@ -154,15 +153,14 @@ async fn siud_release_sled_resources( async fn siud_release_virtual_provisioning( sagactx: NexusActionContext, ) -> Result<(), ActionError> { - let Some((instance, vmm)) = get_destroyed_vmm(&sagactx)? else { - // if the update we are handling is not an active VMM destroyed update, - // bail --- there's nothing to do here. - return Ok(()); - }; - let osagactx = sagactx.user_data(); - let Params { ref serialized_authn, ref authz_instance, vmm_id, .. } = - sagactx.saga_params::()?; + let Params { + ref serialized_authn, + ref authz_instance, + vmm_id, + instance, + .. + } = sagactx.saga_params::()?; let opctx = crate::context::op_context_for_saga_action(&sagactx, serialized_authn); diff --git a/sled-agent/src/common/instance.rs b/sled-agent/src/common/instance.rs index 18047fae3c0..3e1163545d7 100644 --- a/sled-agent/src/common/instance.rs +++ b/sled-agent/src/common/instance.rs @@ -251,20 +251,12 @@ mod test { use chrono::Utc; use omicron_common::api::external::Generation; - use omicron_common::api::internal::nexus::InstanceRuntimeState; use propolis_client::types::InstanceState as Observed; use uuid::Uuid; fn make_instance() -> InstanceStates { let propolis_id = Uuid::new_v4(); let now = Utc::now(); - let instance = InstanceRuntimeState { - propolis_id: Some(propolis_id), - dst_propolis_id: None, - migration_id: None, - gen: Generation::new(), - time_updated: now, - }; let vmm = VmmRuntimeState { state: ApiInstanceState::Starting, @@ -313,7 +305,6 @@ mod test { fn propolis_terminal_states_request_destroy_action() { for state in [Observed::Destroyed, Observed::Failed] { let mut instance_state = make_instance(); - let original_instance_state = instance_state.clone(); let requested_action = instance_state .apply_propolis_observation(&make_observed_state(state.into())); diff --git a/sled-agent/src/instance.rs b/sled-agent/src/instance.rs index 17a0ceac069..b1cbfd87ef1 100644 --- a/sled-agent/src/instance.rs +++ b/sled-agent/src/instance.rs @@ -906,7 +906,6 @@ pub struct Instance { #[derive(Debug)] pub(crate) struct InstanceInitialState { pub hardware: InstanceHardware, - pub instance_runtime: InstanceRuntimeState, pub vmm_runtime: VmmRuntimeState, pub propolis_addr: SocketAddr, } @@ -939,10 +938,7 @@ impl Instance { "state" => ?state); let InstanceInitialState { - hardware, - instance_runtime, - vmm_runtime, - propolis_addr, + hardware, vmm_runtime, propolis_addr, .. } = state; let InstanceManagerServices { diff --git a/sled-agent/src/instance_manager.rs b/sled-agent/src/instance_manager.rs index ee1425f0d7f..080f08ad7db 100644 --- a/sled-agent/src/instance_manager.rs +++ b/sled-agent/src/instance_manager.rs @@ -595,7 +595,6 @@ impl InstanceManagerRunner { let state = crate::instance::InstanceInitialState { hardware, - instance_runtime, vmm_runtime, propolis_addr, }; diff --git a/sled-agent/src/sim/collection.rs b/sled-agent/src/sim/collection.rs index 39700870049..6262d78f86e 100644 --- a/sled-agent/src/sim/collection.rs +++ b/sled-agent/src/sim/collection.rs @@ -423,7 +423,6 @@ mod test { use omicron_common::api::external::Generation; use omicron_common::api::external::InstanceState; use omicron_common::api::internal::nexus::DiskRuntimeState; - use omicron_common::api::internal::nexus::InstanceRuntimeState; use omicron_common::api::internal::nexus::SledInstanceState; use omicron_common::api::internal::nexus::VmmRuntimeState; use omicron_test_utils::dev::test_setup_log; @@ -433,14 +432,6 @@ mod test { logctx: &LogContext, ) -> (SimObject, Receiver<()>) { let propolis_id = Uuid::new_v4(); - let instance_vmm = InstanceRuntimeState { - propolis_id: Some(propolis_id), - dst_propolis_id: None, - migration_id: None, - gen: Generation::new(), - time_updated: Utc::now(), - }; - let vmm_state = VmmRuntimeState { state: InstanceState::Starting, gen: Generation::new(), diff --git a/sled-agent/src/sim/instance.rs b/sled-agent/src/sim/instance.rs index 87d9366f5b3..bf3c09cdeb6 100644 --- a/sled-agent/src/sim/instance.rs +++ b/sled-agent/src/sim/instance.rs @@ -16,9 +16,7 @@ use omicron_common::api::external::Error; use omicron_common::api::external::Generation; use omicron_common::api::external::InstanceState as ApiInstanceState; use omicron_common::api::external::ResourceType; -use omicron_common::api::internal::nexus::{ - InstanceRuntimeState, SledInstanceState, -}; +use omicron_common::api::internal::nexus::SledInstanceState; use propolis_client::types::{ InstanceMigrateStatusResponse as PropolisMigrateStatus, InstanceState as PropolisInstanceState, InstanceStateMonitorResponse, diff --git a/sled-agent/src/sim/sled_agent.rs b/sled-agent/src/sim/sled_agent.rs index 48c81a2897b..a4684d48064 100644 --- a/sled-agent/src/sim/sled_agent.rs +++ b/sled-agent/src/sim/sled_agent.rs @@ -263,7 +263,7 @@ impl SledAgent { instance_id: Uuid, propolis_id: Uuid, hardware: InstanceHardware, - instance_runtime: InstanceRuntimeState, + _instance_runtime: InstanceRuntimeState, vmm_runtime: VmmRuntimeState, metadata: InstanceMetadata, ) -> Result {