From d2ba9ad5597e49f835d7937e6e4e09d68025771a Mon Sep 17 00:00:00 2001 From: Eliza Weisman Date: Wed, 19 Jun 2024 16:35:29 -0700 Subject: [PATCH] tear up way more of sled-agent --- clients/nexus-client/src/lib.rs | 16 +- clients/sled-agent-client/src/lib.rs | 16 +- common/src/api/internal/nexus.rs | 56 +- nexus/db-queries/src/db/datastore/instance.rs | 27 +- nexus/db-queries/src/db/datastore/vmm.rs | 26 +- nexus/db-queries/src/db/queries/instance.rs | 362 ++++++++++--- nexus/src/app/instance.rs | 184 +------ openapi/nexus-internal.json | 35 +- openapi/sled-agent.json | 35 +- sled-agent/src/common/instance.rs | 507 +++++++++--------- sled-agent/src/instance.rs | 13 +- sled-agent/src/sim/collection.rs | 10 +- sled-agent/src/sim/instance.rs | 115 ++-- sled-agent/src/sim/sled_agent.rs | 3 +- 14 files changed, 683 insertions(+), 722 deletions(-) diff --git a/clients/nexus-client/src/lib.rs b/clients/nexus-client/src/lib.rs index ce29749c0ef..1f736522950 100644 --- a/clients/nexus-client/src/lib.rs +++ b/clients/nexus-client/src/lib.rs @@ -133,7 +133,8 @@ impl From Self { propolis_id: s.propolis_id, vmm_state: s.vmm_state.into(), - migration_state: s.migration_state.map(Into::into), + migration_in: s.migration_in.map(Into::into), + migration_out: s.migration_out.map(Into::into), } } } @@ -146,7 +147,6 @@ impl From ) -> Self { Self { migration_id: s.migration_id, - role: s.role.into(), state: s.state.into(), gen: s.gen, time_updated: s.time_updated, @@ -154,18 +154,6 @@ impl From } } -impl From - for types::MigrationRole -{ - fn from(s: omicron_common::api::internal::nexus::MigrationRole) -> Self { - use omicron_common::api::internal::nexus::MigrationRole as Input; - match s { - Input::Source => Self::Source, - Input::Target => Self::Target, - } - } -} - impl From for types::MigrationState { diff --git a/clients/sled-agent-client/src/lib.rs b/clients/sled-agent-client/src/lib.rs index f12d2800285..29c94936005 100644 --- a/clients/sled-agent-client/src/lib.rs +++ b/clients/sled-agent-client/src/lib.rs @@ -329,7 +329,8 @@ impl From Self { propolis_id: s.propolis_id, vmm_state: s.vmm_state.into(), - migration_state: s.migration_state.map(Into::into), + migration_in: s.migration_in.map(Into::into), + migration_out: s.migration_out.map(Into::into), } } } @@ -341,25 +342,12 @@ impl From Self { migration_id: s.migration_id, state: s.state.into(), - role: s.role.into(), gen: s.gen, time_updated: s.time_updated, } } } -impl From - for omicron_common::api::internal::nexus::MigrationRole -{ - fn from(r: types::MigrationRole) -> Self { - use omicron_common::api::internal::nexus::MigrationRole as Output; - match r { - types::MigrationRole::Source => Output::Source, - types::MigrationRole::Target => Output::Target, - } - } -} - impl From for omicron_common::api::internal::nexus::MigrationState { diff --git a/common/src/api/internal/nexus.rs b/common/src/api/internal/nexus.rs index 74c14fe0bcc..bc235eb0e40 100644 --- a/common/src/api/internal/nexus.rs +++ b/common/src/api/internal/nexus.rs @@ -108,9 +108,32 @@ pub struct SledInstanceState { /// The most recent state of the sled's VMM process. pub vmm_state: VmmRuntimeState, - /// The current state of any in-progress migration for this instance, as - /// understood by this sled. - pub migration_state: Option, + /// The current state of any inbound migration to this VMM. + pub migration_in: Option, + + /// The state of any outbound migration to this VMM. + pub migration_out: Option, +} + +#[derive(Copy, Clone, Debug, Default)] +pub struct Migrations<'state> { + pub migration_in: Option<&'state MigrationRuntimeState>, + pub migration_out: Option<&'state MigrationRuntimeState>, +} + +impl Migrations<'_> { + pub fn empty() -> Self { + Self { migration_in: None, migration_out: None } + } +} + +impl SledInstanceState { + pub fn migrations(&self) -> Migrations<'_> { + Migrations { + migration_in: self.migration_in.as_ref(), + migration_out: self.migration_out.as_ref(), + } + } } /// An update from a sled regarding the state of a migration, indicating the @@ -119,7 +142,6 @@ pub struct SledInstanceState { pub struct MigrationRuntimeState { pub migration_id: Uuid, pub state: MigrationState, - pub role: MigrationRole, pub gen: Generation, /// Timestamp for the migration state update. @@ -174,32 +196,6 @@ impl fmt::Display for MigrationState { } } -#[derive( - Clone, Copy, Debug, PartialEq, Eq, Deserialize, Serialize, JsonSchema, -)] -#[serde(rename_all = "snake_case")] -pub enum MigrationRole { - /// This update concerns the source VMM of a migration. - Source, - /// This update concerns the target VMM of a migration. - Target, -} - -impl MigrationRole { - pub fn label(&self) -> &'static str { - match self { - Self::Source => "source", - Self::Target => "target", - } - } -} - -impl fmt::Display for MigrationRole { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - f.write_str(self.label()) - } -} - // Oximeter producer/collector objects. /// The kind of metric producer this is. diff --git a/nexus/db-queries/src/db/datastore/instance.rs b/nexus/db-queries/src/db/datastore/instance.rs index d6a2348ec4e..f008b04819b 100644 --- a/nexus/db-queries/src/db/datastore/instance.rs +++ b/nexus/db-queries/src/db/datastore/instance.rs @@ -47,7 +47,7 @@ use omicron_common::api::external::ListResultVec; use omicron_common::api::external::LookupResult; use omicron_common::api::external::LookupType; use omicron_common::api::external::ResourceType; -use omicron_common::api::internal::nexus::MigrationRuntimeState; +use omicron_common::api::internal::nexus::Migrations; use omicron_common::bail_unless; use omicron_uuid_kinds::GenericUuid; use omicron_uuid_kinds::InstanceUuid; @@ -549,13 +549,13 @@ impl DataStore { new_instance: &InstanceRuntimeState, vmm_id: &PropolisUuid, new_vmm: &VmmRuntimeState, - migration: &Option, + migrations: Migrations<'_>, ) -> Result { let query = crate::db::queries::instance::InstanceAndVmmUpdate::new( *vmm_id, new_vmm.clone(), Some((*instance_id, new_instance.clone())), - migration.clone(), + migrations, ); // The InstanceAndVmmUpdate query handles and indicates failure to find @@ -566,26 +566,21 @@ impl DataStore { .await .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; - let instance_updated = match result.instance_status { - Some(UpdateStatus::Updated) => true, - Some(UpdateStatus::NotUpdatedButExists) => false, - None => false, - }; - + let instance_updated = result.instance_status.was_updated(); let vmm_updated = match result.vmm_status { Some(UpdateStatus::Updated) => true, Some(UpdateStatus::NotUpdatedButExists) => false, None => false, }; - let migration_updated = if migration.is_some() { - Some(match result.migration_status { - Some(UpdateStatus::Updated) => true, - Some(UpdateStatus::NotUpdatedButExists) => false, - None => false, - }) + let migration_updated = if migrations.migration_in.is_some() + || migrations.migration_out.is_some() + { + Some( + result.migration_in_status.was_updated() + || result.migration_out_status.was_updated(), + ) } else { - debug_assert_eq!(result.migration_status, None); None }; diff --git a/nexus/db-queries/src/db/datastore/vmm.rs b/nexus/db-queries/src/db/datastore/vmm.rs index d1570819219..798bdf2b4f5 100644 --- a/nexus/db-queries/src/db/datastore/vmm.rs +++ b/nexus/db-queries/src/db/datastore/vmm.rs @@ -27,7 +27,7 @@ use omicron_common::api::external::LookupResult; use omicron_common::api::external::LookupType; use omicron_common::api::external::ResourceType; use omicron_common::api::external::UpdateResult; -use omicron_common::api::internal::nexus::MigrationRuntimeState; +use omicron_common::api::internal::nexus::Migrations; use omicron_uuid_kinds::GenericUuid; use omicron_uuid_kinds::PropolisUuid; use std::net::SocketAddr; @@ -144,15 +144,15 @@ impl DataStore { pub async fn vmm_and_migration_update_runtime( &self, - vmm_id: Uuid, + vmm_id: PropolisUuid, new_runtime: &VmmRuntimeState, - migration: Option<&MigrationRuntimeState>, + migrations: Migrations<'_>, ) -> Result<(bool, Option), Error> { let query = crate::db::queries::instance::InstanceAndVmmUpdate::new( vmm_id, new_runtime.clone(), None, - migration.cloned(), + migrations, ); // The InstanceAndVmmUpdate query handles and indicates failure to find @@ -163,25 +163,23 @@ impl DataStore { .await .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; - debug_assert_eq!(result.instance_status, None); + // debug_assert_eq!(result.instance_status, ); let vmm_updated = match result.vmm_status { Some(UpdateStatus::Updated) => true, Some(UpdateStatus::NotUpdatedButExists) => false, None => false, }; - - let migration_updated = if migration.is_some() { - Some(match result.migration_status { - Some(UpdateStatus::Updated) => true, - Some(UpdateStatus::NotUpdatedButExists) => false, - None => false, - }) + let migration_updated = if migrations.migration_in.is_some() + || migrations.migration_out.is_some() + { + Some( + result.migration_in_status.was_updated() + || result.migration_out_status.was_updated(), + ) } else { - debug_assert_eq!(result.migration_status, None); None }; - Ok((vmm_updated, migration_updated)) } diff --git a/nexus/db-queries/src/db/queries/instance.rs b/nexus/db-queries/src/db/queries/instance.rs index 011020c10fd..c8bb6a7e091 100644 --- a/nexus/db-queries/src/db/queries/instance.rs +++ b/nexus/db-queries/src/db/queries/instance.rs @@ -18,9 +18,7 @@ use nexus_db_model::{ }, Generation, InstanceRuntimeState, MigrationState, VmmRuntimeState, }; -use omicron_common::api::internal::nexus::{ - MigrationRole, MigrationRuntimeState, -}; +use omicron_common::api::internal::nexus::{MigrationRuntimeState, Migrations}; use omicron_uuid_kinds::{GenericUuid, InstanceUuid, PropolisUuid}; use uuid::Uuid; @@ -88,7 +86,8 @@ pub struct InstanceAndVmmUpdate { vmm_find: Box + Send>, vmm_update: Box + Send>, instance: Option, - migration: Option, + migration_in: Option, + migration_out: Option, } struct Update { @@ -104,16 +103,38 @@ pub struct InstanceAndVmmUpdateResult { /// `Some(status)` if the target instance was found; the wrapped /// `UpdateStatus` indicates whether the row was updated. `None` if the /// instance was not found. - pub instance_status: Option, + pub instance_status: RecordUpdateStatus, /// `Some(status)` if the target VMM was found; the wrapped `UpdateStatus` /// indicates whether the row was updated. `None` if the VMM was not found. pub vmm_status: Option, - /// `Some(status)` if the target migration was found; the wrapped `UpdateStatus` - /// indicates whether the row was updated. `None` if the migration was not - /// found, or no migration update was performed. - pub migration_status: Option, + /// `Some(status)` if the inbound migration was found; the wrapped `UpdateStatus` + /// indicates whether the row was updated. `None` if the inbound migration + /// was not found, or no migration update was performed. + pub migration_in_status: RecordUpdateStatus, + + /// `Some(status)` if the outbound migration was found; the wrapped `UpdateStatus` + /// indicates whether the row was updated. `None` if the inbound migration + /// was not found, or no migration update was performed. + pub migration_out_status: RecordUpdateStatus, +} + +#[derive(Copy, Clone, PartialEq, Debug)] +pub enum RecordUpdateStatus { + /// No record was found for the provided ID. + NotFound, + /// No record for this table was provided as part of the update. + NotProvided, + /// An update for this record was provided, and a a record matching the + /// provided ID exists. + Found(UpdateStatus), +} + +impl RecordUpdateStatus { + pub fn was_updated(self) -> bool { + matches!(self, Self::Found(UpdateStatus::Updated)) + } } /// Computes the update status to return from the results of queries that find @@ -159,7 +180,7 @@ impl InstanceAndVmmUpdate { vmm_id: PropolisUuid, new_vmm_runtime_state: VmmRuntimeState, instance: Option<(InstanceUuid, InstanceRuntimeState)>, - migration: Option, + Migrations { migration_in, migration_out }: Migrations<'_>, ) -> Self { let vmm_find = Box::new( vmm_dsl::vmm @@ -201,75 +222,98 @@ impl InstanceAndVmmUpdate { } }); - let migration = migration.map( + fn migration_find( + migration_id: Uuid, + ) -> Box + Send> { + Box::new( + migration_dsl::migration + .filter(migration_dsl::id.eq(migration_id)) + .filter(migration_dsl::time_deleted.is_null()) + .select(migration_dsl::id), + ) + } + + let migration_in = migration_in.cloned().map( |MigrationRuntimeState { - role, migration_id, state, gen, time_updated, }| { let state = MigrationState::from(state); - let find = Box::new( - migration_dsl::migration + let gen = Generation::from(gen); + let update = Box::new( + diesel::update(migration_dsl::migration) .filter(migration_dsl::id.eq(migration_id)) - .filter(migration_dsl::time_deleted.is_null()) - .select(migration_dsl::id), + .filter( + migration_dsl::target_propolis_id + .eq(vmm_id.into_untyped_uuid()), + ) + .filter(migration_dsl::target_gen.lt(gen)) + .set(( + migration_dsl::target_state.eq(state), + migration_dsl::time_target_updated.eq(time_updated), + )), ); + Update { + find: migration_find(migration_id), + update, + name: "migration_in", + id: migration_dsl::id::NAME, + } + }, + ); + + let migration_out = migration_out.cloned().map( + |MigrationRuntimeState { + migration_id, + state, + gen, + time_updated, + }| { + let state = MigrationState::from(state); let gen = Generation::from(gen); - let update: Box + Send> = match role { - MigrationRole::Target => Box::new( - diesel::update(migration_dsl::migration) - .filter(migration_dsl::id.eq(migration_id)) - .filter( - migration_dsl::target_propolis_id - .eq(vmm_id.into_untyped_uuid()), - ) - .filter(migration_dsl::target_gen.lt(gen)) - .set(( - migration_dsl::target_state.eq(state), - migration_dsl::time_target_updated - .eq(time_updated), - )), - ), - MigrationRole::Source => Box::new( - diesel::update(migration_dsl::migration) - .filter(migration_dsl::id.eq(migration_id)) - .filter( - migration_dsl::source_propolis_id - .eq(vmm_id.into_untyped_uuid()), - ) - .filter(migration_dsl::source_gen.lt(gen)) - .set(( - migration_dsl::source_state.eq(state), - migration_dsl::time_source_updated - .eq(time_updated), - )), - ), - }; + let update = Box::new( + diesel::update(migration_dsl::migration) + .filter(migration_dsl::id.eq(migration_id)) + .filter( + migration_dsl::source_propolis_id + .eq(vmm_id.into_untyped_uuid()), + ) + .filter(migration_dsl::source_gen.lt(gen)) + .set(( + migration_dsl::source_state.eq(state), + migration_dsl::time_source_updated.eq(time_updated), + )), + ); Update { - find, + find: migration_find(migration_id), update, - name: "migration", + name: "migration_out", id: migration_dsl::id::NAME, } }, ); - Self { vmm_find, vmm_update, instance, migration } + Self { vmm_find, vmm_update, instance, migration_in, migration_out } } pub async fn execute_and_check( self, conn: &(impl async_bb8_diesel::AsyncConnection + Sync), ) -> Result { + let has_migration_in = self.migration_in.is_some(); + let has_migration_out = self.migration_out.is_some(); + let has_instance = self.instance.is_some(); let ( vmm_found, vmm_updated, instance_found, instance_updated, - migration_found, - migration_updated, + migration_in_found, + migration_in_updated, + migration_out_found, + migration_out_updated, ) = self .get_result_async::<( Option, @@ -278,19 +322,43 @@ impl InstanceAndVmmUpdate { Option, Option, Option, + Option, + Option, + // WHEW! )>(conn) .await?; - let instance_status = - compute_update_status(instance_found, instance_updated); let vmm_status = compute_update_status(vmm_found, vmm_updated); - let migration_status = - compute_update_status(migration_found, migration_updated); + + let instance_status = if has_instance { + compute_update_status(instance_found, instance_updated) + .map(RecordUpdateStatus::Found) + .unwrap_or(RecordUpdateStatus::NotFound) + } else { + RecordUpdateStatus::NotProvided + }; + + let migration_in_status = if has_migration_in { + compute_update_status(migration_in_found, migration_in_updated) + .map(RecordUpdateStatus::Found) + .unwrap_or(RecordUpdateStatus::NotFound) + } else { + RecordUpdateStatus::NotProvided + }; + + let migration_out_status = if has_migration_out { + compute_update_status(migration_out_found, migration_out_updated) + .map(RecordUpdateStatus::Found) + .unwrap_or(RecordUpdateStatus::NotFound) + } else { + RecordUpdateStatus::NotProvided + }; Ok(InstanceAndVmmUpdateResult { instance_status, vmm_status, - migration_status, + migration_in_status, + migration_out_status, }) } } @@ -308,6 +376,8 @@ impl Query for InstanceAndVmmUpdate { Nullable, Nullable, Nullable, + Nullable, + Nullable, ); } @@ -362,8 +432,13 @@ impl QueryFragment for InstanceAndVmmUpdate { out.push_sql(", "); } - if let Some(ref migration) = self.migration { - migration.push_subqueries(&mut out)?; + if let Some(ref m) = self.migration_in { + m.push_subqueries(&mut out)?; + out.push_sql(", "); + } + + if let Some(ref m) = self.migration_out { + m.push_subqueries(&mut out)?; out.push_sql(", "); } @@ -386,24 +461,38 @@ impl QueryFragment for InstanceAndVmmUpdate { out.push_identifier(vmm_dsl::id::NAME)?; out.push_sql(") "); - out.push_sql("SELECT vmm_result.found, vmm_result.updated, "); - if self.instance.is_some() { - out.push_sql("instance_result.found, instance_result.updated, "); - } else { - out.push_sql("NULL, NULL, "); + fn push_select_from_result( + update: Option<&Update>, + out: &mut AstPass<'_, '_, Pg>, + ) { + if let Some(update) = update { + out.push_sql(update.name); + out.push_sql("_result.found, "); + out.push_sql(update.name); + out.push_sql("_result.updated"); + } else { + out.push_sql("NULL, NULL") + } } - if self.migration.is_some() { - out.push_sql("migration_result.found, migration_result.updated "); - } else { - out.push_sql("NULL, NULL "); - } + out.push_sql("SELECT vmm_result.found, vmm_result.updated, "); + push_select_from_result(self.instance.as_ref(), &mut out); + out.push_sql(", "); + push_select_from_result(self.migration_in.as_ref(), &mut out); + out.push_sql(", "); + push_select_from_result(self.migration_out.as_ref(), &mut out); + out.push_sql(" "); + out.push_sql("FROM vmm_result"); if self.instance.is_some() { out.push_sql(", instance_result"); } - if self.migration.is_some() { - out.push_sql(", migration_result"); + if self.migration_in.is_some() { + out.push_sql(", migration_in_result"); + } + + if self.migration_out.is_some() { + out.push_sql(", migration_out_result"); } Ok(()) @@ -417,7 +506,6 @@ mod test { use crate::db::model::VmmState; use crate::db::raw_query_builder::expectorate_query_contents; use chrono::Utc; - use omicron_common::api::internal::nexus::MigrationRole; use omicron_common::api::internal::nexus::MigrationRuntimeState; use omicron_common::api::internal::nexus::MigrationState; use uuid::Uuid; @@ -439,14 +527,13 @@ mod test { MigrationRuntimeState { migration_id, state: MigrationState::Pending, - role: MigrationRole::Source, gen: Generation::new().into(), time_updated: Utc::now(), } } - fn mk_instance_state() -> (Uuid, InstanceRuntimeState) { - let id = Uuid::nil(); + fn mk_instance_state() -> (InstanceUuid, InstanceRuntimeState) { + let id = InstanceUuid::nil(); let state = InstanceRuntimeState { time_updated: Utc::now(), gen: Generation::new(), @@ -460,10 +547,15 @@ mod test { #[tokio::test] async fn expectorate_query_only_vmm() { - let vmm_id = Uuid::nil(); + let vmm_id = PropolisUuid::nil(); let vmm_state = mk_vmm_state(); - let query = InstanceAndVmmUpdate::new(vmm_id, vmm_state, None, None); + let query = InstanceAndVmmUpdate::new( + vmm_id, + vmm_state, + None, + Migrations::default(), + ); expectorate_query_contents( &query, "tests/output/instance_and_vmm_update_vmm_only.sql", @@ -473,12 +565,16 @@ mod test { #[tokio::test] async fn expectorate_query_vmm_and_instance() { - let vmm_id = Uuid::nil(); + let vmm_id = PropolisUuid::nil(); let vmm_state = mk_vmm_state(); let instance = mk_instance_state(); - let query = - InstanceAndVmmUpdate::new(vmm_id, vmm_state, Some(instance), None); + let query = InstanceAndVmmUpdate::new( + vmm_id, + vmm_state, + Some(instance), + Migrations::default(), + ); expectorate_query_contents( &query, "tests/output/instance_and_vmm_update_vmm_and_instance.sql", @@ -487,23 +583,66 @@ mod test { } #[tokio::test] - async fn expectorate_query_vmm_and_migration() { - let vmm_id = Uuid::nil(); + async fn expectorate_query_vmm_and_migration_in() { + let vmm_id = PropolisUuid::nil(); + let vmm_state = mk_vmm_state(); + let migration = mk_migration_state(); + + let query = InstanceAndVmmUpdate::new( + vmm_id, + vmm_state, + None, + Migrations { migration_in: Some(&migration), migration_out: None }, + ); + expectorate_query_contents( + &query, + "tests/output/instance_and_vmm_update_vmm_and_migration_in.sql", + ) + .await; + } + + #[tokio::test] + async fn expectorate_query_vmm_instance_and_migration_in() { + let vmm_id = PropolisUuid::nil(); let vmm_state = mk_vmm_state(); + let instance = mk_instance_state(); let migration = mk_migration_state(); - let query = - InstanceAndVmmUpdate::new(vmm_id, vmm_state, None, Some(migration)); + let query = InstanceAndVmmUpdate::new( + vmm_id, + vmm_state, + Some(instance), + Migrations { migration_in: Some(&migration), migration_out: None }, + ); expectorate_query_contents( &query, - "tests/output/instance_and_vmm_update_vmm_and_imigration.sql", + "tests/output/instance_and_vmm_update_vmm_instance_and_migration_in.sql", ) .await; } #[tokio::test] - async fn expectorate_query_vmm_instance_and_migration() { - let vmm_id = Uuid::nil(); + async fn expectorate_query_vmm_and_migration_out() { + let vmm_id = PropolisUuid::nil(); + let vmm_state = mk_vmm_state(); + let migration = mk_migration_state(); + + let query = InstanceAndVmmUpdate::new( + vmm_id, + vmm_state, + None, + Migrations { migration_out: Some(&migration), migration_in: None }, + ); + expectorate_query_contents( + &query, + "tests/output/instance_and_vmm_update_vmm_and_migration_out.sql", + ) + .await; + } + + #[tokio::test] + async fn expectorate_query_vmm_instance_and_migration_out() { + let vmm_id = PropolisUuid::nil(); let vmm_state = mk_vmm_state(); let instance = mk_instance_state(); let migration = mk_migration_state(); @@ -512,11 +651,58 @@ mod test { vmm_id, vmm_state, Some(instance), - Some(migration), + Migrations { migration_out: Some(&migration), migration_in: None }, + ); + expectorate_query_contents( + &query, + "tests/output/instance_and_vmm_update_vmm_instance_and_migration_out.sql", + ) + .await; + } + + #[tokio::test] + async fn expectorate_query_vmm_and_both_migrations() { + let vmm_id = PropolisUuid::nil(); + let vmm_state = mk_vmm_state(); + let migration_in = mk_migration_state(); + let migration_out = mk_migration_state(); + + let query = InstanceAndVmmUpdate::new( + vmm_id, + vmm_state, + None, + Migrations { + migration_in: Some(&migration_in), + migration_out: Some(&migration_out), + }, + ); + expectorate_query_contents( + &query, + "tests/output/instance_and_vmm_update_vmm_and_both_migrations.sql", + ) + .await; + } + + #[tokio::test] + async fn expectorate_query_vmm_instance_and_both_migrations() { + let vmm_id = PropolisUuid::nil(); + let vmm_state = mk_vmm_state(); + let instance = mk_instance_state(); + let migration_in = mk_migration_state(); + let migration_out = mk_migration_state(); + + let query = InstanceAndVmmUpdate::new( + vmm_id, + vmm_state, + Some(instance), + Migrations { + migration_in: Some(&migration_in), + migration_out: Some(&migration_out), + }, ); expectorate_query_contents( &query, - "tests/output/instance_and_vmm_update_vmm_instance_and_migration.sql", + "tests/output/instance_and_vmm_update_vmm_instance_and_both_migrations.sql", ) .await; } diff --git a/nexus/src/app/instance.rs b/nexus/src/app/instance.rs index 6259a531469..ce6a7a94795 100644 --- a/nexus/src/app/instance.rs +++ b/nexus/src/app/instance.rs @@ -1347,8 +1347,8 @@ impl super::Nexus { .db_datastore .vmm_and_migration_update_runtime( state.propolis_id, - &state.vmm_state.into(), - state.migration_state.as_ref(), + &state.vmm_state.clone().into(), + state.migrations(), ) .await; @@ -1540,13 +1540,14 @@ impl super::Nexus { instance_id: &InstanceUuid, new_runtime_state: &nexus::SledInstanceState, ) -> Result<(), Error> { - let migration = new_runtime_state.migration_state.as_ref(); + let migrations = new_runtime_state.migrations(); let propolis_id = new_runtime_state.propolis_id; info!(opctx.log, "received new VMM runtime state from sled agent"; "instance_id" => %instance_id, "propolis_id" => %propolis_id, "vmm_state" => ?new_runtime_state.vmm_state, - "migration_state" => ?migration); + "migration_state" => ?migrations, + ); let (vmm_updated, migration_updated) = self .db_datastore @@ -1554,14 +1555,20 @@ impl super::Nexus { propolis_id, // TODO(eliza): probably should take this by value... &new_runtime_state.vmm_state.clone().into(), - migration, + migrations, ) .await?; let updated = vmm_updated || migration_updated.unwrap_or(false); if updated { + info!(opctx.log, "starting update saga for {instance_id}"; + "instance_id" => %instance_id, + "propolis_id" => %propolis_id, + "vmm_state" => ?new_runtime_state.vmm_state, + "migration_state" => ?migrations, + ); let (.., authz_instance) = LookupPath::new(&opctx, &self.db_datastore) - .instance_id(*instance_id) + .instance_id(instance_id.into_untyped_uuid()) .lookup_for(authz::Action::Modify) .await?; let saga_params = sagas::instance_update::Params { @@ -2008,20 +2015,24 @@ pub(crate) async fn notify_instance_updated_background( instance_id: InstanceUuid, new_runtime_state: nexus::SledInstanceState, ) -> Result { + let migrations = new_runtime_state.migrations(); let propolis_id = new_runtime_state.propolis_id; info!(opctx.log, "received new VMM runtime state from sled agent"; "instance_id" => %instance_id, "propolis_id" => %propolis_id, "vmm_state" => ?new_runtime_state.vmm_state, - "migration_state" => ?new_runtime_state.migration_state); + "migration_state" => ?migrations, + ); - let updated = datastore - .vmm_update_runtime( - &propolis_id, + let (vmm_updated, migration_updated) = datastore + .vmm_and_migration_update_runtime( + propolis_id, // TODO(eliza): probably should take this by value... &new_runtime_state.vmm_state.clone().into(), + migrations, ) .await?; + let updated = vmm_updated || migration_updated.unwrap_or(false); if updated { let (.., authz_instance) = LookupPath::new(&opctx, datastore) @@ -2032,6 +2043,12 @@ pub(crate) async fn notify_instance_updated_background( serialized_authn: authn::saga::Serialized::for_opctx(opctx), authz_instance, }; + info!(opctx.log, "queueing update saga for {instance_id}"; + "instance_id" => %instance_id, + "propolis_id" => %propolis_id, + "vmm_state" => ?new_runtime_state.vmm_state, + "migration_state" => ?migrations, + ); saga_request .send(sagas::SagaRequest::InstanceUpdate { params }) .await @@ -2042,153 +2059,6 @@ pub(crate) async fn notify_instance_updated_background( })?; } - // // If the supplied instance state indicates that the instance no longer - // // has an active VMM, attempt to delete the virtual provisioning record, - // // and the assignment of the Propolis metric producer to an oximeter - // // collector. - // // - // // As with updating networking state, this must be done before - // // committing the new runtime state to the database: once the DB is - // // written, a new start saga can arrive and start the instance, which - // // will try to create its own virtual provisioning charges, which will - // // race with this operation. - // if new_runtime_state.instance_state.propolis_id.is_none() { - // datastore - // .virtual_provisioning_collection_delete_instance( - // opctx, - // *instance_id, - // db_instance.project_id, - // i64::from(db_instance.ncpus.0 .0), - // db_instance.memory, - // (&new_runtime_state.instance_state.gen).into(), - // ) - // .await?; - - // Write the new instance and VMM states back to CRDB. This needs to be - // done before trying to clean up the VMM, since the datastore will only - // allow a VMM to be marked as deleted if it is already in a terminal - // state. - // let result = datastore - // .instance_and_vmm_update_runtime( - // instance_id, - // &db::model::InstanceRuntimeState::from( - // new_runtime_state.instance_state.clone(), - // ), - // &propolis_id, - // &db::model::VmmRuntimeState::from( - // new_runtime_state.vmm_state.clone(), - // ), - // &new_runtime_state.migration_state, - // ) - // .await; - - // // Has a migration terminated? If so,mark the migration record as deleted if - // // and only if both sides of the migration are in a terminal state. - // if let Some(nexus::MigrationRuntimeState { - // migration_id, - // state, - // role, - // .. - // }) = new_runtime_state.migration_state - // { - // if state.is_terminal() { - // info!( - // log, - // "migration has terminated, trying to delete it..."; - // "instance_id" => %instance_id, - // "propolis_id" => %propolis_id, - // "migration_id" => %propolis_id, - // "migration_state" => %state, - // "migration_role" => %role, - // ); - // if !datastore.migration_terminate(opctx, migration_id).await? { - // info!( - // log, - // "did not mark migration record as deleted (the other half \ - // may not yet have reported termination)"; - // "instance_id" => %instance_id, - // "propolis_id" => %propolis_id, - // "migration_id" => %propolis_id, - // "migration_state" => %state, - // "migration_role" => %role, - // ); - // } - // } - // } - - // // If the VMM is now in a terminal state, make sure its resources get - // // cleaned up. - // // - // // For idempotency, only check to see if the update was successfully - // // processed and ignore whether the VMM record was actually updated. - // // This is required to handle the case where this routine is called - // // once, writes the terminal VMM state, fails before all per-VMM - // // resources are released, returns a retriable error, and is retried: - // // the per-VMM resources still need to be cleaned up, but the DB update - // // will return Ok(_, false) because the database was already updated. - // // - // // Unlike the pre-update cases, it is legal to do this cleanup *after* - // // committing state to the database, because a terminated VMM cannot be - // // reused (restarting or migrating its former instance will use new VMM - // // IDs). - // if result.is_ok() { - // let propolis_terminated = matches!( - // new_runtime_state.vmm_state.state, - // VmmState::Destroyed | VmmState::Failed - // ); - - // if propolis_terminated { - // info!(log, "vmm is terminated, cleaning up resources"; - // "instance_id" => %instance_id, - // "propolis_id" => %propolis_id); - - // datastore - // .sled_reservation_delete(opctx, propolis_id.into_untyped_uuid()) - // .await?; - - // if !datastore.vmm_mark_deleted(opctx, &propolis_id).await? { - // warn!(log, "failed to mark vmm record as deleted"; - // "instance_id" => %instance_id, - // "propolis_id" => %propolis_id, - // "vmm_state" => ?new_runtime_state.vmm_state); - // } - // } - // } - - // match result { - // Ok((instance_updated, vmm_updated)) => { - // info!(log, "instance and vmm updated by sled agent"; - // "instance_id" => %instance_id, - // "propolis_id" => %propolis_id, - // "instance_updated" => instance_updated, - // "vmm_updated" => vmm_updated); - // Ok(Some(InstanceUpdated { instance_updated, vmm_updated })) - // } - - // // The update command should swallow object-not-found errors and - // // return them back as failures to update, so this error case is - // // unexpected. There's no work to do if this occurs, however. - // Err(Error::ObjectNotFound { .. }) => { - // error!(log, "instance/vmm update unexpectedly returned \ - // an object not found error"; - // "instance_id" => %instance_id, - // "propolis_id" => %propolis_id); - // Ok(None) - // } - - // // If the datastore is unavailable, propagate that to the caller. - // // TODO-robustness Really this should be any _transient_ error. How - // // can we distinguish? Maybe datastore should emit something - // // different from Error with an Into. - // Err(error) => { - // warn!(log, "failed to update instance from sled agent"; - // "instance_id" => %instance_id, - // "propolis_id" => %propolis_id, - // "error" => ?error); - // Err(error) - // } - - // } Ok(updated) } diff --git a/openapi/nexus-internal.json b/openapi/nexus-internal.json index 98153f40226..7ed6ba5be80 100644 --- a/openapi/nexus-internal.json +++ b/openapi/nexus-internal.json @@ -3387,24 +3387,6 @@ "minLength": 5, "maxLength": 17 }, - "MigrationRole": { - "oneOf": [ - { - "description": "This update concerns the source VMM of a migration.", - "type": "string", - "enum": [ - "source" - ] - }, - { - "description": "This update concerns the target VMM of a migration.", - "type": "string", - "enum": [ - "target" - ] - } - ] - }, "MigrationRuntimeState": { "description": "An update from a sled regarding the state of a migration, indicating the role of the VMM whose migration state was updated.", "type": "object", @@ -3416,9 +3398,6 @@ "type": "string", "format": "uuid" }, - "role": { - "$ref": "#/components/schemas/MigrationRole" - }, "state": { "$ref": "#/components/schemas/MigrationState" }, @@ -3431,7 +3410,6 @@ "required": [ "gen", "migration_id", - "role", "state", "time_updated" ] @@ -4605,9 +4583,18 @@ "description": "A wrapper type containing a sled's total knowledge of the state of a specific VMM and the instance it incarnates.", "type": "object", "properties": { - "migration_state": { + "migration_in": { + "nullable": true, + "description": "The current state of any inbound migration to this VMM.", + "allOf": [ + { + "$ref": "#/components/schemas/MigrationRuntimeState" + } + ] + }, + "migration_out": { "nullable": true, - "description": "The current state of any in-progress migration for this instance, as understood by this sled.", + "description": "The state of any outbound migration to this VMM.", "allOf": [ { "$ref": "#/components/schemas/MigrationRuntimeState" diff --git a/openapi/sled-agent.json b/openapi/sled-agent.json index b5c7ed45100..286176063f0 100644 --- a/openapi/sled-agent.json +++ b/openapi/sled-agent.json @@ -3407,24 +3407,6 @@ "minLength": 5, "maxLength": 17 }, - "MigrationRole": { - "oneOf": [ - { - "description": "This update concerns the source VMM of a migration.", - "type": "string", - "enum": [ - "source" - ] - }, - { - "description": "This update concerns the target VMM of a migration.", - "type": "string", - "enum": [ - "target" - ] - } - ] - }, "MigrationRuntimeState": { "description": "An update from a sled regarding the state of a migration, indicating the role of the VMM whose migration state was updated.", "type": "object", @@ -3436,9 +3418,6 @@ "type": "string", "format": "uuid" }, - "role": { - "$ref": "#/components/schemas/MigrationRole" - }, "state": { "$ref": "#/components/schemas/MigrationState" }, @@ -3451,7 +3430,6 @@ "required": [ "gen", "migration_id", - "role", "state", "time_updated" ] @@ -4262,9 +4240,18 @@ "description": "A wrapper type containing a sled's total knowledge of the state of a specific VMM and the instance it incarnates.", "type": "object", "properties": { - "migration_state": { + "migration_in": { + "nullable": true, + "description": "The current state of any inbound migration to this VMM.", + "allOf": [ + { + "$ref": "#/components/schemas/MigrationRuntimeState" + } + ] + }, + "migration_out": { "nullable": true, - "description": "The current state of any in-progress migration for this instance, as understood by this sled.", + "description": "The state of any outbound migration to this VMM.", "allOf": [ { "$ref": "#/components/schemas/MigrationRuntimeState" diff --git a/sled-agent/src/common/instance.rs b/sled-agent/src/common/instance.rs index 0ab2355a3f2..e5651a4048d 100644 --- a/sled-agent/src/common/instance.rs +++ b/sled-agent/src/common/instance.rs @@ -8,21 +8,23 @@ use crate::params::InstanceMigrationSourceParams; use chrono::{DateTime, Utc}; use omicron_common::api::external::Generation; use omicron_common::api::internal::nexus::{ - MigrationRole, MigrationRuntimeState, MigrationState, SledInstanceState, - VmmRuntimeState, VmmState, + MigrationRuntimeState, MigrationState, SledInstanceState, VmmRuntimeState, + VmmState, }; use omicron_uuid_kinds::PropolisUuid; use propolis_client::types::{ - InstanceState as PropolisApiState, InstanceStateMonitorResponse, - MigrationState as PropolisMigrationState, + InstanceMigrationStatus, InstanceState as PropolisApiState, + InstanceStateMonitorResponse, MigrationState as PropolisMigrationState, }; +use uuid::Uuid; /// The instance and VMM state that sled agent maintains on a per-VMM basis. #[derive(Clone, Debug)] pub struct InstanceStates { vmm: VmmRuntimeState, propolis_id: PropolisUuid, - migration: Option, + migration_in: Option, + migration_out: Option, } /// Newtype to allow conversion from Propolis API states (returned by the @@ -100,9 +102,8 @@ pub(crate) struct ObservedPropolisState { /// The state reported by Propolis's instance state monitor API. pub vmm_state: PropolisInstanceState, - /// Information about whether the state observer queried migration status at - /// all and, if so, what response it got from Propolis. - pub migration_status: ObservedMigrationStatus, + pub migration_in: Option, + pub migration_out: Option, /// The approximate time at which this observation was made. pub time: DateTime, @@ -112,66 +113,41 @@ impl ObservedPropolisState { /// Constructs a Propolis state observation from an instance's current /// state and an instance state monitor response received from /// Propolis. - pub fn new( - state: &InstanceStates, - propolis_state: &InstanceStateMonitorResponse, - ) -> Self { - // If there's no migration currently registered with this sled, report - // the current state and that no migration is currently in progress, - // even if Propolis has some migration data to share. (This case arises - // when Propolis returns state from a previous migration that sled agent - // has already retired.) - // - // N.B. This needs to be read from the instance runtime state and not - // the migration runtime state to ensure that, once a migration in - // completes, the "completed" observation is reported to - // `InstanceStates::apply_propolis_observation` exactly once. - // Otherwise that routine will try to apply the "inbound migration - // complete" instance state transition twice. - let Some(migration_id) = instance_runtime.migration_id else { - return Self { - vmm_state: PropolisInstanceState(propolis_state.state), - migration_status: ObservedMigrationStatus::NoMigration, - time: Utc::now(), - }; - }; - - // Sled agent believes a live migration may be in progress. See if - // either of the Propolis migrations corresponds to it. - let propolis_migration = match ( - &propolis_state.migration.migration_in, - &propolis_state.migration.migration_out, - ) { - (Some(inbound), _) if inbound.id == migration_id => inbound, - (_, Some(outbound)) if outbound.id == migration_id => outbound, - _ => { - // Sled agent believes this instance should be migrating, but - // Propolis isn't reporting a matching migration yet, so assume - // the migration is still pending. - return Self { - vmm_state: PropolisInstanceState(propolis_state.state), - migration_status: ObservedMigrationStatus::Pending, - time: Utc::now(), - }; - } - }; - + pub fn new(propolis_state: &InstanceStateMonitorResponse) -> Self { Self { vmm_state: PropolisInstanceState(propolis_state.state), - migration_status: match propolis_migration.state { - PropolisMigrationState::Finish => { - ObservedMigrationStatus::Succeeded - } - PropolisMigrationState::Error => { - ObservedMigrationStatus::Failed - } - _ => ObservedMigrationStatus::InProgress, - }, + migration_in: propolis_state + .migration + .migration_in + .as_ref() + .map(ObservedMigrationState::from), + migration_out: propolis_state + .migration + .migration_out + .as_ref() + .map(ObservedMigrationState::from), time: Utc::now(), } } } +#[derive(Copy, Clone, Debug)] +pub struct ObservedMigrationState { + state: MigrationState, + id: Uuid, +} + +impl From<&'_ InstanceMigrationStatus> for ObservedMigrationState { + fn from(observed: &InstanceMigrationStatus) -> Self { + let state = match observed.state { + PropolisMigrationState::Error => MigrationState::Failed, + PropolisMigrationState::Finish => MigrationState::Completed, + _ => MigrationState::InProgress, + }; + Self { state, id: observed.id } + } +} + /// The set of instance states that sled agent can publish to Nexus. This is /// a subset of the instance states Nexus knows about: the Creating and /// Destroyed states are reserved for Nexus to use for instances that are being @@ -198,8 +174,13 @@ pub enum Action { } impl InstanceStates { - pub fn new(vmm: VmmRuntimeState, propolis_id: Uuid) -> Self { - InstanceStates { vmm, propolis_id, migration: None } + pub fn new(vmm: VmmRuntimeState, propolis_id: PropolisUuid) -> Self { + InstanceStates { + vmm, + propolis_id, + migration_in: None, + migration_out: None, + } } pub fn vmm(&self) -> &VmmRuntimeState { @@ -210,10 +191,6 @@ impl InstanceStates { self.propolis_id } - pub(crate) fn migration(&self) -> Option<&MigrationRuntimeState> { - self.migration.as_ref() - } - /// Creates a `SledInstanceState` structure containing the entirety of this /// structure's runtime state. This requires cloning; for simple read access /// use the `instance` or `vmm` accessors instead. @@ -221,25 +198,9 @@ impl InstanceStates { SledInstanceState { vmm_state: self.vmm.clone(), propolis_id: self.propolis_id, - migration_state: self.migration.clone(), - } - } - - fn transition_migration( - &mut self, - state: MigrationState, - time_updated: DateTime, - ) { - let migration = self.migration.as_mut().expect( - "an ObservedMigrationState should only be constructed when the \ - VMM has an active migration", - ); - // Don't generate spurious state updates if the migration is already in - // the state we're transitioning to. - if migration.state != state { - migration.state = state; - migration.time_updated = time_updated; - migration.gen = migration.gen.next(); + // migration_state: self.migration.clone(), + migration_in: self.migration_in.clone(), + migration_out: self.migration_out.clone(), } } @@ -249,6 +210,49 @@ impl InstanceStates { &mut self, observed: &ObservedPropolisState, ) -> Option { + fn transition_migration( + current: &mut Option, + ObservedMigrationState { id, state }: ObservedMigrationState, + now: DateTime, + ) { + if let Some(ref mut m) = current { + // Don't generate spurious state updates if the migration is already in + // the state we're transitioning to. + if m.migration_id == id && m.state == state { + return; + } + m.state = state; + if m.migration_id == id { + m.gen = m.gen.next(); + } else { + m.migration_id = id; + m.gen = Generation::new(); + } + m.time_updated = now; + } else { + *current = Some(MigrationRuntimeState { + migration_id: id, + gen: Generation::new(), + state, + time_updated: now, + }); + } + } + + fn destroy_migration( + migration: &mut MigrationRuntimeState, + now: DateTime, + ) { + if matches!( + migration.state, + MigrationState::InProgress | MigrationState::Pending + ) { + migration.gen = migration.gen.next(); + migration.time_updated = now; + migration.state = MigrationState::Failed; + } + } + let vmm_gone = matches!( observed.vmm_state.0, PropolisApiState::Destroyed | PropolisApiState::Failed @@ -264,27 +268,11 @@ impl InstanceStates { // Update the instance record to reflect the result of any completed // migration. - match observed.migration_status { - ObservedMigrationStatus::Succeeded => { - self.transition_migration( - MigrationState::Completed, - observed.time, - ); - } - ObservedMigrationStatus::Failed => { - self.transition_migration( - MigrationState::Failed, - observed.time, - ); - } - ObservedMigrationStatus::InProgress => { - self.transition_migration( - MigrationState::InProgress, - observed.time, - ); - } - ObservedMigrationStatus::NoMigration - | ObservedMigrationStatus::Pending => {} + if let Some(m) = observed.migration_in { + transition_migration(&mut self.migration_in, m, observed.time); + } + if let Some(m) = observed.migration_out { + transition_migration(&mut self.migration_out, m, observed.time); } // If this Propolis has exited, tear down its zone. If it was in the @@ -301,13 +289,11 @@ impl InstanceStates { if vmm_gone { // If there's an active migration and the VMM is suddenly gone, // that should constitute a migration failure! - if let Some(MigrationState::Pending | MigrationState::InProgress) = - self.migration.as_ref().map(|m| m.state) - { - self.transition_migration( - MigrationState::Failed, - observed.time, - ); + if let Some(ref mut m) = self.migration_in { + destroy_migration(m, observed.time); + } + if let Some(ref mut m) = self.migration_out { + destroy_migration(m, observed.time); } Some(Action::Destroy) } else { @@ -346,11 +332,10 @@ impl InstanceStates { /// migrating. pub(crate) fn terminate_rudely(&mut self) { let fake_observed = ObservedPropolisState { - migration_status: if self.migration.is_some() { - ObservedMigrationStatus::Failed - } else { - ObservedMigrationStatus::NoMigration - }, + // We don't actually need to populate these, because observing a + // `Destroyed` instance state will fail any in progress migrations anyway. + migration_in: None, + migration_out: None, vmm_state: PropolisInstanceState(PropolisApiState::Destroyed), time: Utc::now(), }; @@ -360,51 +345,23 @@ impl InstanceStates { /// Sets or clears this instance's migration IDs and advances its Propolis /// generation number. + #[deprecated(note = "eliza get rid of this")] pub(crate) fn set_migration_ids( &mut self, ids: &Option, now: DateTime, ) { - if let Some(InstanceMigrationSourceParams { - migration_id, - dst_propolis_id, - }) = *ids - { - let role = if dst_propolis_id == self.propolis_id { - MigrationRole::Target - } else { - MigrationRole::Source - }; - self.migration = Some(MigrationRuntimeState { - migration_id, - state: MigrationState::Pending, - role, - gen: Generation::new(), - time_updated: now, - }) - } else { - self.migration = None; - } } /// Returns true if the migration IDs in this instance are already set as they /// would be on a successful transition from the migration IDs in /// `old_runtime` to the ones in `migration_ids`. + #[deprecated(note = "eliza get rid of this")] pub(crate) fn migration_ids_already_set( &self, migration_ids: &Option, ) -> bool { - match (self.migration.as_ref(), migration_ids) { - // If the migration ID is already set, and this is a request to set - // IDs, the records match if the relevant IDs match. - (Some(migration), Some(ids)) => { - migration.migration_id == ids.migration_id - } - // If the migration ID is already cleared, and this is a request to - // clear IDs, the records match. - (None, None) => true, - _ => false, - } + false } } @@ -434,10 +391,9 @@ mod test { let mut state = make_instance(); state.vmm.state = VmmState::Migrating; let migration_id = Uuid::new_v4(); - state.migration = Some(MigrationRuntimeState { + state.migration_out = Some(MigrationRuntimeState { migration_id, state: MigrationState::InProgress, - role: MigrationRole::Source, // advance the generation once, since we are starting out in the // `InProgress` state. gen: Generation::new().next(), @@ -451,11 +407,9 @@ mod test { let mut state = make_instance(); state.vmm.state = VmmState::Migrating; let migration_id = Uuid::new_v4(); - state.propolis_id = Uuid::new_v4(); - state.migration = Some(MigrationRuntimeState { + state.migration_in = Some(MigrationRuntimeState { migration_id, state: MigrationState::InProgress, - role: MigrationRole::Target, // advance the generation once, since we are starting out in the // `InProgress` state. gen: Generation::new().next(), @@ -469,7 +423,8 @@ mod test { ) -> ObservedPropolisState { ObservedPropolisState { vmm_state: propolis_state, - migration_status: ObservedMigrationStatus::NoMigration, + migration_in: None, + migration_out: None, time: Utc::now(), } } @@ -503,43 +458,58 @@ mod test { fn test_termination_fails_in_progress_migration( mk_instance: impl Fn() -> InstanceStates, ) { + } + + #[test] + fn source_termination_fails_in_progress_migration() { for state in [Observed::Destroyed, Observed::Failed] { - let mut instance_state = mk_instance(); - let original_migration = instance_state.clone().migration.unwrap(); + let mut instance_state = make_migration_source_instance(); + let original_migration = + instance_state.clone().migration_out.unwrap(); let requested_action = instance_state .apply_propolis_observation(&make_observed_state(state.into())); - let migration = - instance_state.migration.expect("state must have a migration"); + let migration = instance_state + .migration_out + .expect("state must have a migration"); assert_eq!(migration.state, MigrationState::Failed); assert!(migration.gen > original_migration.gen); assert!(matches!(requested_action, Some(Action::Destroy))); } } - #[test] - fn source_termination_fails_in_progress_migration() { - test_termination_fails_in_progress_migration( - make_migration_source_instance, - ) - } - #[test] fn target_termination_fails_in_progress_migration() { - test_termination_fails_in_progress_migration( - make_migration_target_instance, - ) + for state in [Observed::Destroyed, Observed::Failed] { + let mut instance_state = make_migration_target_instance(); + let original_migration = + instance_state.clone().migration_in.unwrap(); + let requested_action = instance_state + .apply_propolis_observation(&make_observed_state(state.into())); + + let migration = instance_state + .migration_in + .expect("state must have a migration"); + assert_eq!(migration.state, MigrationState::Failed); + assert!(migration.gen > original_migration.gen); + assert!(matches!(requested_action, Some(Action::Destroy))); + } } #[test] fn destruction_after_migration_out_does_not_transition() { let mut state = make_migration_source_instance(); + let migration_id = state.migration_out.unwrap().migration_id; // After a migration succeeds, the source VM appears to stop but reports // that the migration has succeeded. let mut observed = ObservedPropolisState { vmm_state: PropolisInstanceState(Observed::Stopping), - migration_status: ObservedMigrationStatus::Succeeded, + migration_out: Some(ObservedMigrationState { + state: MigrationState::Completed, + id: migration_id, + }), + migration_in: None, time: Utc::now(), }; @@ -552,11 +522,11 @@ mod test { // The migration state should transition to "completed" let migration = state - .migration + .migration_out .clone() .expect("instance must have a migration state"); let prev_migration = - prev.migration.expect("previous state must have a migration"); + prev.migration_out.expect("previous state must have a migration"); assert_eq!(migration.state, MigrationState::Completed); assert!(migration.gen > prev_migration.gen); let prev_migration = migration; @@ -577,7 +547,7 @@ mod test { // Now that the migration has completed, it should not transition again. let migration = state - .migration + .migration_out .clone() .expect("instance must have a migration state"); assert_eq!(migration.state, MigrationState::Completed); @@ -595,7 +565,7 @@ mod test { assert!(state.vmm.gen > prev.vmm.gen); let migration = state - .migration + .migration_out .clone() .expect("instance must have a migration state"); assert_eq!(migration.state, MigrationState::Completed); @@ -605,12 +575,17 @@ mod test { #[test] fn failure_after_migration_in_does_not_transition() { let mut state = make_migration_target_instance(); + let migration_id = state.migration_in.unwrap().migration_id; // Failure to migrate into an instance should mark the VMM as destroyed // but should not change the instance's migration IDs. let observed = ObservedPropolisState { vmm_state: PropolisInstanceState(Observed::Failed), - migration_status: ObservedMigrationStatus::Failed, + migration_in: Some(ObservedMigrationState { + state: MigrationState::Failed, + id: migration_id, + }), + migration_out: None, time: Utc::now(), }; @@ -625,9 +600,9 @@ mod test { // The migration state should transition. let migration = - state.migration.expect("instance must have a migration state"); + state.migration_in.expect("instance must have a migration state"); let prev_migration = - prev.migration.expect("previous state must have a migration"); + prev.migration_in.expect("previous state must have a migration"); assert_eq!(migration.state, MigrationState::Failed); assert!(migration.gen > prev_migration.gen); } @@ -649,99 +624,101 @@ mod test { // The migration state should transition. let migration = - state.migration.expect("instance must have a migration state"); + state.migration_in.expect("instance must have a migration state"); let prev_migration = - prev.migration.expect("previous state must have a migration"); + prev.migration_in.expect("previous state must have a migration"); assert_eq!(migration.state, MigrationState::Failed); assert!(migration.gen > prev_migration.gen); } #[test] + #[ignore = "this logic is basically trivial now, maybe just get rid of the test?"] fn migration_out_after_migration_in() { - let mut state = make_migration_target_instance(); - let mut observed = ObservedPropolisState { - vmm_state: PropolisInstanceState(Observed::Running), - migration_status: ObservedMigrationStatus::Succeeded, - time: Utc::now(), - }; - - // The transition into the Running state on the migration target should - // take over for the source, updating the Propolis generation. - let prev = state.clone(); - assert!(state.apply_propolis_observation(&observed).is_none()); - assert_state_change_has_gen_change(&prev, &state); - assert_eq!(state.vmm.state, VmmState::Running); - assert!(state.vmm.gen > prev.vmm.gen); - - // The migration state should transition to completed. - let migration = state - .migration - .clone() - .expect("instance must have a migration state"); - let prev_migration = - prev.migration.expect("previous state must have a migration"); - assert_eq!(migration.state, MigrationState::Completed); - assert!(migration.gen > prev_migration.gen); - - // Pretend Nexus set some new migration IDs. - let migration_id = Uuid::new_v4(); - let prev = state.clone(); - state.set_migration_ids( - &Some(InstanceMigrationSourceParams { - migration_id, - dst_propolis_id: PropolisUuid::new_v4(), - }), - Utc::now(), - ); - assert_state_change_has_gen_change(&prev, &state); - assert_eq!(state.vmm.gen, prev.vmm.gen); - - // There should be a new, pending migration state. - let migration = state - .migration - .clone() - .expect("instance must have a migration state"); - assert_eq!(migration.state, MigrationState::Pending); - assert_eq!(migration.migration_id, migration_id); - let prev_migration = migration; - - // Mark that the new migration out is in progress. This doesn't change - // anything in the instance runtime state, but does update the VMM state - // generation. - let prev = state.clone(); - observed.vmm_state = PropolisInstanceState(Observed::Migrating); - assert!(state.apply_propolis_observation(&observed).is_none()); - assert_state_change_has_gen_change(&prev, &state); - assert_eq!(state.vmm.state, VmmState::Migrating); - assert!(state.vmm.gen > prev.vmm.gen); - - // The migration state should transition to in progress. - let migration = state - .migration - .clone() - .expect("instance must have a migration state"); - assert_eq!(migration.state, MigrationState::InProgress); - assert!(migration.gen > prev_migration.gen); - let prev_migration = migration; - - // Propolis will publish that the migration succeeds before changing any - // state. This should transfer control to the target but should not - // touch the migration ID (that is the new target's job). - let prev = state.clone(); - observed.vmm_state = PropolisInstanceState(Observed::Migrating); - assert!(state.apply_propolis_observation(&observed).is_none()); - assert_state_change_has_gen_change(&prev, &state); - assert_eq!(state.vmm.state, VmmState::Migrating); - assert!(state.vmm.gen > prev.vmm.gen); - - // The migration state should transition to completed. - let migration = state - .migration - .clone() - .expect("instance must have a migration state"); - assert_eq!(migration.state, MigrationState::Completed); - assert!(migration.gen > prev_migration.gen); - - // The rest of the destruction sequence is covered by other tests. + todo!("eliza") + // let mut state = make_migration_target_instance(); + // let mut observed = ObservedPropolisState { + // vmm_state: PropolisInstanceState(Observed::Running), + // migration_in: ObservedMigrationStatus::Succeeded, + // time: Utc::now(), + // }; + + // // The transition into the Running state on the migration target should + // // take over for the source, updating the Propolis generation. + // let prev = state.clone(); + // assert!(state.apply_propolis_observation(&observed).is_none()); + // assert_state_change_has_gen_change(&prev, &state); + // assert_eq!(state.vmm.state, VmmState::Running); + // assert!(state.vmm.gen > prev.vmm.gen); + + // // The migration state should transition to completed. + // let migration = state + // .migration + // .clone() + // .expect("instance must have a migration state"); + // let prev_migration = + // prev.migration.expect("previous state must have a migration"); + // assert_eq!(migration.state, MigrationState::Completed); + // assert!(migration.gen > prev_migration.gen); + + // // Pretend Nexus set some new migration IDs. + // let migration_id = Uuid::new_v4(); + // let prev = state.clone(); + // state.set_migration_ids( + // &Some(InstanceMigrationSourceParams { + // migration_id, + // dst_propolis_id: PropolisUuid::new_v4(), + // }), + // Utc::now(), + // ); + // assert_state_change_has_gen_change(&prev, &state); + // assert_eq!(state.vmm.gen, prev.vmm.gen); + + // // There should be a new, pending migration state. + // let migration = state + // .migration + // .clone() + // .expect("instance must have a migration state"); + // assert_eq!(migration.state, MigrationState::Pending); + // assert_eq!(migration.migration_id, migration_id); + // let prev_migration = migration; + + // // Mark that the new migration out is in progress. This doesn't change + // // anything in the instance runtime state, but does update the VMM state + // // generation. + // let prev = state.clone(); + // observed.vmm_state = PropolisInstanceState(Observed::Migrating); + // assert!(state.apply_propolis_observation(&observed).is_none()); + // assert_state_change_has_gen_change(&prev, &state); + // assert_eq!(state.vmm.state, VmmState::Migrating); + // assert!(state.vmm.gen > prev.vmm.gen); + + // // The migration state should transition to in progress. + // let migration = state + // .migration + // .clone() + // .expect("instance must have a migration state"); + // assert_eq!(migration.state, MigrationState::InProgress); + // assert!(migration.gen > prev_migration.gen); + // let prev_migration = migration; + + // // Propolis will publish that the migration succeeds before changing any + // // state. This should transfer control to the target but should not + // // touch the migration ID (that is the new target's job). + // let prev = state.clone(); + // observed.vmm_state = PropolisInstanceState(Observed::Migrating); + // assert!(state.apply_propolis_observation(&observed).is_none()); + // assert_state_change_has_gen_change(&prev, &state); + // assert_eq!(state.vmm.state, VmmState::Migrating); + // assert!(state.vmm.gen > prev.vmm.gen); + + // // The migration state should transition to completed. + // let migration = state + // .migration + // .clone() + // .expect("instance must have a migration state"); + // assert_eq!(migration.state, MigrationState::Completed); + // assert!(migration.gen > prev_migration.gen); + + // // The rest of the destruction sequence is covered by other tests. } } diff --git a/sled-agent/src/instance.rs b/sled-agent/src/instance.rs index 9ee59e6f8d4..d0601df19b0 100644 --- a/sled-agent/src/instance.rs +++ b/sled-agent/src/instance.rs @@ -372,10 +372,7 @@ impl InstanceRunner { use InstanceMonitorRequest::*; match request { Some(Update { state, tx }) => { - let observed = ObservedPropolisState::new( - &self.state, - &state, - ); + let observed = ObservedPropolisState::new(&state); let reaction = self.observe_state(&observed).await; self.publish_state_to_nexus().await; @@ -656,13 +653,7 @@ impl InstanceRunner { let migrate = match migrate { Some(params) => { - let migration_id = self - .state - .migration() - .ok_or_else(|| { - Error::Migration(anyhow!("Missing Migration UUID")) - })? - .migration_id; + let migration_id = todo!("eliza: this probably needs to be sent by Nexus directly now?"); Some(propolis_client::types::InstanceMigrateInitiateRequest { src_addr: params.src_propolis_addr.to_string(), src_uuid: params.src_propolis_id, diff --git a/sled-agent/src/sim/collection.rs b/sled-agent/src/sim/collection.rs index c9197fc3b86..ffb7327ce77 100644 --- a/sled-agent/src/sim/collection.rs +++ b/sled-agent/src/sim/collection.rs @@ -431,15 +431,19 @@ mod test { fn make_instance( logctx: &LogContext, ) -> (SimObject, Receiver<()>) { - let propolis_id = Uuid::new_v4(); + let propolis_id = PropolisUuid::new_v4(); let vmm_state = VmmRuntimeState { state: VmmState::Starting, gen: Generation::new(), time_updated: Utc::now(), }; - let state = - SledInstanceState { vmm_state, propolis_id, migration_state: None }; + let state = SledInstanceState { + vmm_state, + propolis_id, + migration_in: None, + migration_out: None, + }; SimObject::new_simulated_auto(&state, logctx.log.new(o!())) } diff --git a/sled-agent/src/sim/instance.rs b/sled-agent/src/sim/instance.rs index 1c1cc6bcda6..b99b50807c1 100644 --- a/sled-agent/src/sim/instance.rs +++ b/sled-agent/src/sim/instance.rs @@ -16,7 +16,7 @@ use omicron_common::api::external::Error; use omicron_common::api::external::Generation; use omicron_common::api::external::ResourceType; use omicron_common::api::internal::nexus::{ - InstanceRuntimeState, MigrationRole, SledInstanceState, VmmState, + InstanceRuntimeState, SledInstanceState, VmmState, }; use propolis_client::types::{ InstanceMigrateStatusResponse as PropolisMigrateResponse, @@ -81,54 +81,47 @@ impl SimInstanceInner { /// Queue a successful simulated migration. /// - fn queue_successful_migration(&mut self, role: MigrationRole) { + fn queue_successful_migration(&mut self) { // Propolis transitions to the Migrating state once before // actually starting migration. self.queue_propolis_state(PropolisInstanceState::Migrating); - let migration_id = self.state.migration().unwrap_or_else(|| { - panic!( - "should have migration ID set before getting request to - migrate in (current state: {:?})", - self - ) - }); - - match role { - MigrationRole::Source => { - self.queue_migration_update(PropolisMigrateResponse { - migration_in: None, - migration_out: Some(PropolisMigrationStatus { - id: migration_id, - state: propolis_client::types::MigrationState::Sync, - }), - }); - self.queue_migration_update(PropolisMigrateResponse { - migration_in: None, - migration_out: Some(PropolisMigrationStatus { - id: migration_id, - state: propolis_client::types::MigrationState::Finish, - }), - }); - self.queue_graceful_stop(); - } - MigrationRole::Target => { - self.queue_migration_update(PropolisMigrateResponse { - migration_in: Some(PropolisMigrationStatus { - id: migration_id, - state: propolis_client::types::MigrationState::Sync, - }), - migration_out: None, - }); - self.queue_migration_update(PropolisMigrateResponse { - migration_in: Some(PropolisMigrationStatus { - id: migration_id, - state: propolis_client::types::MigrationState::Finish, - }), - migration_out: None, - }); - self.queue_propolis_state(PropolisInstanceState::Running) - } - } + todo!("eliza: fix this bit") + // match role { + // MigrationRole::Source => { + // self.queue_migration_update(PropolisMigrateResponse { + // migration_in: None, + // migration_out: Some(PropolisMigrationStatus { + // id: todo! + // state: propolis_client::types::MigrationState::Sync, + // }), + // }); + // self.queue_migration_update(PropolisMigrateResponse { + // migration_in: None, + // migration_out: Some(PropolisMigrationStatus { + // id: migration_id, + // state: propolis_client::types::MigrationState::Finish, + // }), + // }); + // self.queue_graceful_stop(); + // } + // MigrationRole::Target => { + // self.queue_migration_update(PropolisMigrateResponse { + // migration_in: Some(PropolisMigrationStatus { + // id: migration_id, + // state: propolis_client::types::MigrationState::Sync, + // }), + // migration_out: None, + // }); + // self.queue_migration_update(PropolisMigrateResponse { + // migration_in: Some(PropolisMigrationStatus { + // id: migration_id, + // state: propolis_client::types::MigrationState::Finish, + // }), + // migration_out: None, + // }); + // self.queue_propolis_state(PropolisInstanceState::Running) + // } + // } } fn queue_graceful_stop(&mut self) { @@ -178,7 +171,7 @@ impl SimInstanceInner { ))); } - self.queue_successful_migration(MigrationRole::Target) + // self.queue_successful_migration(MigrationRole::Target) } InstanceStateRequested::Running => { match self.next_resting_state() { @@ -277,7 +270,6 @@ impl SimInstanceInner { } self.state.apply_propolis_observation(&ObservedPropolisState::new( - &self.state, &self.last_response, )) } else { @@ -382,19 +374,20 @@ impl SimInstanceInner { // If we set migration IDs and are the migration source, ensure that we // will perform the correct state transitions to simulate a successful // migration. - if ids.is_some() { - let role = self - .state - .migration() - .expect( - "we just got a `put_migration_ids` request with `Some` IDs, \ - so we should have a migration" - ) - .role; - if role == MigrationRole::Source { - self.queue_successful_migration(MigrationRole::Source) - } - } + // if ids.is_some() { + // let role = self + // .state + // .migration() + // .expect( + // "we just got a `put_migration_ids` request with `Some` IDs, \ + // so we should have a migration" + // ) + // .role; + // if role == MigrationRole::Source { + // self.queue_successful_migration(MigrationRole::Source) + // } + // } + todo!(); Ok(self.state.sled_instance_state()) } diff --git a/sled-agent/src/sim/sled_agent.rs b/sled-agent/src/sim/sled_agent.rs index 26f1ee2364a..8a12e8c5d79 100644 --- a/sled-agent/src/sim/sled_agent.rs +++ b/sled-agent/src/sim/sled_agent.rs @@ -348,7 +348,8 @@ impl SledAgent { SledInstanceState { vmm_state: vmm_runtime, propolis_id, - migration_state: None, + migration_in: None, + migration_out: None, }, None, )