diff --git a/nexus/src/app/sagas/instance_update/mod.rs b/nexus/src/app/sagas/instance_update/mod.rs index c152369d37..22859d0f48 100644 --- a/nexus/src/app/sagas/instance_update/mod.rs +++ b/nexus/src/app/sagas/instance_update/mod.rs @@ -1127,25 +1127,12 @@ mod test { ) { let _project_id = setup_test_project(&cptestctx.external_client).await; let other_sleds = test_helpers::add_sleds(cptestctx, 1).await; - let test = MigrationTest::setup(cptestctx, &other_sleds).await; - - // Pretend the migration source has completed. - test.update_src_state( - cptestctx, - VmmState::Stopping, - MigrationState::Completed, - ) - .await; - - // Run the instance-update saga. - let nexus = &cptestctx.server.server_context().nexus; - nexus - .sagas - .saga_execute::(test.saga_params()) + MigrationOutcome::default() + .source(MigrationState::Completed, VmmState::Stopping) + .setup_test(cptestctx, &other_sleds) .await - .expect("update saga should succeed"); - - test.verify_src_succeeded(cptestctx).await; + .run_saga_basic_usage_succeeds_test(cptestctx) + .await; } #[nexus_test(server = crate::Server)] @@ -1154,27 +1141,13 @@ mod test { ) { let _project_id = setup_test_project(&cptestctx.external_client).await; let other_sleds = test_helpers::add_sleds(cptestctx, 1).await; - let test = MigrationTest::setup(cptestctx, &other_sleds).await; - - // Pretend the migration source has completed. - test.update_src_state( - cptestctx, - VmmState::Stopping, - MigrationState::Completed, - ) - .await; - - // Build the saga DAG with the provided test parameters - let dag = - create_saga_dag::(test.saga_params()).unwrap(); - - crate::app::sagas::test_helpers::actions_succeed_idempotently( - &cptestctx.server.server_context().nexus, - dag, - ) - .await; - test.verify_src_succeeded(cptestctx).await; + MigrationOutcome::default() + .source(MigrationState::Completed, VmmState::Stopping) + .setup_test(cptestctx, &other_sleds) + .await + .run_actions_succeed_idempotently_test(cptestctx) + .await; } #[nexus_test(server = crate::Server)] @@ -1185,20 +1158,17 @@ mod test { let other_sleds = test_helpers::add_sleds(cptestctx, 1).await; let _project_id = setup_test_project(&cptestctx.external_client).await; + let outcome = MigrationOutcome::default() + .source(MigrationState::Completed, VmmState::Stopping); + test_helpers::action_failure_can_unwind::( nexus, || { Box::pin(async { - let test = - MigrationTest::setup(cptestctx, &other_sleds).await; - // Pretend the migration source has completed. - test.update_src_state( - cptestctx, - VmmState::Stopping, - MigrationState::Completed, - ) - .await; - test.saga_params() + outcome + .setup_test(cptestctx, &other_sleds) + .await + .saga_params() }) }, || Box::pin(after_unwinding(cptestctx)), @@ -1215,25 +1185,13 @@ mod test { ) { let _project_id = setup_test_project(&cptestctx.external_client).await; let other_sleds = test_helpers::add_sleds(cptestctx, 1).await; - let test = MigrationTest::setup(cptestctx, &other_sleds).await; - - // Pretend the migration target has completed. - test.update_target_state( - cptestctx, - VmmState::Running, - MigrationState::Completed, - ) - .await; - // Run the instance-update saga. - let nexus = &cptestctx.server.server_context().nexus; - nexus - .sagas - .saga_execute::(test.saga_params()) + MigrationOutcome::default() + .target(MigrationState::Completed, VmmState::Running) + .setup_test(cptestctx, &other_sleds) .await - .expect("update saga should succeed"); - - test.verify_target_succeeded(cptestctx).await; + .run_saga_basic_usage_succeeds_test(cptestctx) + .await; } #[nexus_test(server = crate::Server)] @@ -1242,51 +1200,95 @@ mod test { ) { let _project_id = setup_test_project(&cptestctx.external_client).await; let other_sleds = test_helpers::add_sleds(cptestctx, 1).await; - let test = MigrationTest::setup(cptestctx, &other_sleds).await; - // Pretend the migration target has completed. - test.update_target_state( - cptestctx, - VmmState::Running, - MigrationState::Completed, - ) - .await; + MigrationOutcome::default() + .target(MigrationState::Completed, VmmState::Running) + .setup_test(cptestctx, &other_sleds) + .await + .run_actions_succeed_idempotently_test(cptestctx) + .await; + } - // Build the saga DAG with the provided test parameters - let dag = - create_saga_dag::(test.saga_params()).unwrap(); + #[nexus_test(server = crate::Server)] + async fn test_migration_target_completed_can_unwind( + cptestctx: &ControlPlaneTestContext, + ) { + let nexus = &cptestctx.server.server_context().nexus; + let other_sleds = test_helpers::add_sleds(cptestctx, 1).await; + let _project_id = setup_test_project(&cptestctx.external_client).await; + let outcome = MigrationOutcome::default() + .target(MigrationState::Completed, VmmState::Running); - crate::app::sagas::test_helpers::actions_succeed_idempotently( - &cptestctx.server.server_context().nexus, - dag, + test_helpers::action_failure_can_unwind::( + nexus, + || { + Box::pin(async { + outcome + .setup_test(cptestctx, &other_sleds) + .await + .saga_params() + }) + }, + || Box::pin(after_unwinding(cptestctx)), + &cptestctx.logctx.log, ) .await; + } + + // === migration completed and source destroyed tests === + + #[nexus_test(server = crate::Server)] + async fn test_migration_completed_source_destroyed_succeeds( + cptestctx: &ControlPlaneTestContext, + ) { + let _project_id = setup_test_project(&cptestctx.external_client).await; + let other_sleds = test_helpers::add_sleds(cptestctx, 1).await; - test.verify_target_succeeded(cptestctx).await; + MigrationOutcome::default() + .target(MigrationState::Completed, VmmState::Running) + .source(MigrationState::Completed, VmmState::Destroyed) + .setup_test(cptestctx, &other_sleds) + .await + .run_saga_basic_usage_succeeds_test(cptestctx) + .await; } #[nexus_test(server = crate::Server)] - async fn test_migration_target_completed_can_unwind( + async fn test_migration_completed_source_destroyed_actions_succeed_idempotently( + cptestctx: &ControlPlaneTestContext, + ) { + let _project_id = setup_test_project(&cptestctx.external_client).await; + let other_sleds = test_helpers::add_sleds(cptestctx, 1).await; + + MigrationOutcome::default() + .target(MigrationState::Completed, VmmState::Running) + .source(MigrationState::Completed, VmmState::Destroyed) + .setup_test(cptestctx, &other_sleds) + .await + .run_actions_succeed_idempotently_test(cptestctx) + .await; + } + + #[nexus_test(server = crate::Server)] + async fn test_migration_completed_source_destroyed_can_unwind( cptestctx: &ControlPlaneTestContext, ) { let nexus = &cptestctx.server.server_context().nexus; let other_sleds = test_helpers::add_sleds(cptestctx, 1).await; let _project_id = setup_test_project(&cptestctx.external_client).await; + let outcome = MigrationOutcome::default() + .target(MigrationState::Completed, VmmState::Running) + .source(MigrationState::Completed, VmmState::Destroyed); + test_helpers::action_failure_can_unwind::( nexus, || { Box::pin(async { - let test = - MigrationTest::setup(cptestctx, &other_sleds).await; - // Pretend the migration target has completed. - test.update_target_state( - cptestctx, - VmmState::Running, - MigrationState::Completed, - ) - .await; - test.saga_params() + outcome + .setup_test(cptestctx, &other_sleds) + .await + .saga_params() }) }, || Box::pin(after_unwinding(cptestctx)), @@ -1295,23 +1297,64 @@ mod test { .await; } + #[derive(Clone, Copy, Default)] + struct MigrationOutcome { + source: Option<(MigrationState, VmmState)>, + target: Option<(MigrationState, VmmState)>, + failed: bool, + } + + impl MigrationOutcome { + fn source(self, migration: MigrationState, vmm: VmmState) -> Self { + let failed = self.failed + || migration == MigrationState::Failed + || vmm == VmmState::Failed; + Self { source: Some((migration, vmm)), failed, ..self } + } + + fn target(self, migration: MigrationState, vmm: VmmState) -> Self { + let failed = self.failed + || migration == MigrationState::Failed + || vmm == VmmState::Failed; + Self { target: Some((migration, vmm)), failed, ..self } + } + + async fn setup_test( + self, + cptestctx: &ControlPlaneTestContext, + other_sleds: &[(SledUuid, omicron_sled_agent::sim::Server)], + ) -> MigrationTest { + MigrationTest::setup(self, cptestctx, other_sleds).await + } + } + struct MigrationTest { + outcome: MigrationOutcome, instance_id: InstanceUuid, - state: InstanceSnapshot, + initial_state: InstanceSnapshot, authz_instance: authz::Instance, opctx: OpContext, } impl MigrationTest { fn target_vmm_id(&self) -> Uuid { - self.state + self.initial_state .target_vmm .as_ref() .expect("migrating instance must have a target VMM") .id } + fn src_vmm_id(&self) -> Uuid { + self.initial_state + .active_vmm + .as_ref() + .expect("migrating instance must have a source VMM") + .id + } + async fn setup( + outcome: MigrationOutcome, cptestctx: &ControlPlaneTestContext, other_sleds: &[(SledUuid, omicron_sled_agent::sim::Server)], ) -> Self { @@ -1358,12 +1401,64 @@ mod test { .fetch() .await .expect("test instance should be present in datastore"); - let state = datastore + let initial_state = datastore .instance_fetch_all(&opctx, &authz_instance) .await .expect("test instance should be present in datastore"); - Self { authz_instance, state, opctx, instance_id } + let this = Self { + authz_instance, + initial_state, + outcome, + opctx, + instance_id, + }; + if let Some((migration_state, vmm_state)) = this.outcome.source { + this.update_src_state(cptestctx, vmm_state, migration_state) + .await; + } + + if let Some((migration_state, vmm_state)) = this.outcome.target { + this.update_target_state(cptestctx, vmm_state, migration_state) + .await; + } + + this + } + + async fn run_saga_basic_usage_succeeds_test( + &self, + cptestctx: &ControlPlaneTestContext, + ) { + // Run the instance-update saga. + let nexus = &cptestctx.server.server_context().nexus; + nexus + .sagas + .saga_execute::(self.saga_params()) + .await + .expect("update saga should succeed"); + + // Check the results + self.verify(cptestctx).await; + } + + async fn run_actions_succeed_idempotently_test( + &self, + cptestctx: &ControlPlaneTestContext, + ) { + // Build the saga DAG with the provided test parameters + let dag = create_saga_dag::(self.saga_params()) + .unwrap(); + + // Run the actions-succeed-idempotently test + crate::app::sagas::test_helpers::actions_succeed_idempotently( + &cptestctx.server.server_context().nexus, + dag, + ) + .await; + + // Check the results + self.verify(cptestctx).await; } async fn update_src_state( @@ -1373,7 +1468,7 @@ mod test { migration_state: MigrationState, ) { let src_vmm = self - .state + .initial_state .active_vmm .as_ref() .expect("must have an active VMM"); @@ -1385,7 +1480,7 @@ mod test { }; let migration = self - .state + .initial_state .migration .as_ref() .expect("must have an active migration"); @@ -1428,8 +1523,11 @@ mod test { vmm_state: VmmState, migration_state: MigrationState, ) { - let target_vmm = - self.state.target_vmm.as_ref().expect("must have a target VMM"); + let target_vmm = self + .initial_state + .target_vmm + .as_ref() + .expect("must have a target VMM"); let vmm_id = PropolisUuid::from_untyped_uuid(target_vmm.id); let new_runtime = nexus_db_model::VmmRuntimeState { time_state_updated: Utc::now(), @@ -1438,7 +1536,7 @@ mod test { }; let migration = self - .state + .initial_state .migration .as_ref() .expect("must have an active migration"); @@ -1484,80 +1582,137 @@ mod test { } } - async fn verify_src_succeeded( - &self, - cptestctx: &ControlPlaneTestContext, - ) { - let state = self.verify_migration_succeeded(cptestctx).await; - let instance = state.instance(); - let instance_runtime = instance.runtime(); - assert_eq!( - instance_runtime.dst_propolis_id, - Some(self.target_vmm_id()), - "target VMM ID must remain set until target VMM reports success", - ); - assert_eq!( - instance_runtime.migration_id, - self.state.instance.runtime().migration_id, - "migration ID must remain set until target VMM reports success", - ); - } - - async fn verify_target_succeeded( - &self, - cptestctx: &ControlPlaneTestContext, - ) { - let state = self.verify_migration_succeeded(cptestctx).await; - let instance = state.instance(); - let instance_runtime = instance.runtime(); - assert_eq!( - instance_runtime.dst_propolis_id, None, - "target VMM ID must be unset once VMM reports success", - ); - assert_eq!( - instance_runtime.migration_id, None, - "migration ID must be unset once target VMM reports success", + async fn verify(&self, cptestctx: &ControlPlaneTestContext) { + info!( + cptestctx.logctx.log, + "checking update saga results after migration"; + "source_outcome" => ?self.outcome.source.as_ref(), + "target_outcome" => ?self.outcome.target.as_ref(), + "migration_failed" => self.outcome.failed, ); - } - async fn verify_migration_succeeded( - &self, - cptestctx: &ControlPlaneTestContext, - ) -> InstanceAndActiveVmm { + use test_helpers::*; let state = test_helpers::instance_fetch(cptestctx, self.instance_id).await; let instance = state.instance(); let instance_runtime = instance.runtime(); let active_vmm_id = instance_runtime.propolis_id; + + assert_instance_unlocked(instance); + + if self.outcome.failed { + todo!("eliza: verify migration-failed postconditions"); + } else { + assert_eq!( + active_vmm_id, + Some(self.target_vmm_id()), + "target VMM must be in the active VMM position after migration success", + ); + assert_eq!(instance_runtime.nexus_state, InstanceState::Vmm); + if self + .outcome + .target + .as_ref() + .map(|(state, _)| state == &MigrationState::Completed) + .unwrap_or(false) + { + assert_eq!( + instance_runtime.dst_propolis_id, None, + "target VMM ID must be unset once target VMM reports success", + ); + assert_eq!( + instance_runtime.migration_id, None, + "migration ID must be unset once target VMM reports success", + ); + } else { + assert_eq!( + instance_runtime.dst_propolis_id, + Some(self.target_vmm_id()), + "target VMM ID must remain set until the target VMM reports success", + ); + assert_eq!( + instance_runtime.migration_id, + self.initial_state.instance.runtime().migration_id, + "migration ID must remain set until target VMM reports success", + ); + } + } + + let src_destroyed = self + .outcome + .source + .as_ref() + .map(|(_, state)| state == &VmmState::Destroyed) + .unwrap_or(false); assert_eq!( - active_vmm_id, - Some(self.target_vmm_id()), - "target VMM must be in the active VMM position after migration success", + self.src_resource_records_exist(cptestctx).await, + !src_destroyed, + "source VMM should exist if and only if the source hasn't been destroyed", ); - assert_eq!(instance_runtime.nexus_state, InstanceState::Vmm); - assert_instance_unlocked(instance); - assert!( - !test_helpers::no_virtual_provisioning_resource_records_exist( - cptestctx - ) - .await, - "virtual provisioning records must exist after successful migration", + + let target_destroyed = self + .outcome + .source + .as_ref() + .map(|(_, state)| state == &VmmState::Destroyed) + .unwrap_or(false); + + // TODO(eliza): this doesn't actually work because we don't actually + // poke the target simulated sled agent enough to get it to have + // resource records... + // assert_eq!( + // self.target_resource_records_exist(cptestctx).await, + // !target_destroyed, + // "target VMM should exist if and only if the target hasn't been destroyed", + // ); + + let all_vmms_destroyed = src_destroyed && target_destroyed; + + assert_eq!( + no_virtual_provisioning_resource_records_exist(cptestctx).await, + all_vmms_destroyed, + "virtual provisioning resource records must exist as long as \ + the instance has a VMM", ); - assert!( - !test_helpers::no_virtual_provisioning_collection_records_using_instances(cptestctx) - .await, - "virtual provisioning records must exist after successful migration", - ); - assert!( - !test_helpers::no_sled_resource_instance_records_exist( + assert_eq!( + no_virtual_provisioning_collection_records_using_instances( cptestctx ) .await, - "sled resource records must exist after successful migration", + all_vmms_destroyed, + "virtual provisioning collection records must exist as long \ + as the instance has a VMM", ); - state + let instance_state = if all_vmms_destroyed { + InstanceState::NoVmm + } else { + InstanceState::Vmm + }; + assert_eq!(instance_runtime.nexus_state, instance_state); + } + + async fn src_resource_records_exist( + &self, + cptestctx: &ControlPlaneTestContext, + ) -> bool { + test_helpers::sled_resources_exist_for_vmm( + cptestctx, + PropolisUuid::from_untyped_uuid(self.src_vmm_id()), + ) + .await + } + + async fn target_resource_records_exist( + &self, + cptestctx: &ControlPlaneTestContext, + ) -> bool { + test_helpers::sled_resources_exist_for_vmm( + cptestctx, + PropolisUuid::from_untyped_uuid(self.target_vmm_id()), + ) + .await } } } diff --git a/nexus/src/app/sagas/test_helpers.rs b/nexus/src/app/sagas/test_helpers.rs index 74041c4f52..7a8cdf7f9d 100644 --- a/nexus/src/app/sagas/test_helpers.rs +++ b/nexus/src/app/sagas/test_helpers.rs @@ -28,7 +28,7 @@ use nexus_types::identity::Resource; use omicron_common::api::external::Error; use omicron_common::api::external::NameOrId; use omicron_test_utils::dev::poll; -use omicron_uuid_kinds::{GenericUuid, InstanceUuid, SledUuid}; +use omicron_uuid_kinds::{GenericUuid, InstanceUuid, PropolisUuid, SledUuid}; use sled_agent_client::TestInterfaces as _; use slog::{info, warn, Logger}; use std::{num::NonZeroU32, sync::Arc, time::Duration}; @@ -324,6 +324,12 @@ async fn instance_poll_state( pub async fn no_virtual_provisioning_resource_records_exist( cptestctx: &ControlPlaneTestContext, ) -> bool { + count_virtual_provisioning_resource_records(cptestctx).await == 0 +} + +pub async fn count_virtual_provisioning_resource_records( + cptestctx: &ControlPlaneTestContext, +) -> usize { use nexus_db_queries::db::model::VirtualProvisioningResource; use nexus_db_queries::db::schema::virtual_provisioning_resource::dsl; @@ -331,7 +337,7 @@ pub async fn no_virtual_provisioning_resource_records_exist( let conn = datastore.pool_connection_for_tests().await.unwrap(); datastore - .transaction_retry_wrapper("no_virtual_provisioning_resource_records_exist") + .transaction_retry_wrapper("count_virtual_provisioning_resource_records") .transaction(&conn, |conn| async move { conn .batch_execute_async(nexus_test_utils::db::ALLOW_FULL_TABLE_SCAN_SQL) @@ -345,7 +351,7 @@ pub async fn no_virtual_provisioning_resource_records_exist( .get_results_async::(&conn) .await .unwrap() - .is_empty() + .len() ) }).await.unwrap() } @@ -353,6 +359,14 @@ pub async fn no_virtual_provisioning_resource_records_exist( pub async fn no_virtual_provisioning_collection_records_using_instances( cptestctx: &ControlPlaneTestContext, ) -> bool { + count_virtual_provisioning_collection_records_using_instances(cptestctx) + .await + == 0 +} + +pub async fn count_virtual_provisioning_collection_records_using_instances( + cptestctx: &ControlPlaneTestContext, +) -> usize { use nexus_db_queries::db::model::VirtualProvisioningCollection; use nexus_db_queries::db::schema::virtual_provisioning_collection::dsl; @@ -361,7 +375,7 @@ pub async fn no_virtual_provisioning_collection_records_using_instances( datastore .transaction_retry_wrapper( - "no_virtual_provisioning_collection_records_using_instances", + "count_virtual_provisioning_collection_records_using_instances", ) .transaction(&conn, |conn| async move { conn.batch_execute_async( @@ -377,7 +391,7 @@ pub async fn no_virtual_provisioning_collection_records_using_instances( .get_results_async::(&conn) .await .unwrap() - .is_empty()) + .len()) }) .await .unwrap() @@ -414,6 +428,33 @@ pub async fn no_sled_resource_instance_records_exist( .unwrap() } +pub async fn sled_resources_exist_for_vmm( + cptestctx: &ControlPlaneTestContext, + vmm_id: PropolisUuid, +) -> bool { + use nexus_db_queries::db::model::SledResource; + use nexus_db_queries::db::model::SledResourceKind; + use nexus_db_queries::db::schema::sled_resource::dsl; + + let datastore = cptestctx.server.server_context().nexus.datastore(); + let conn = datastore.pool_connection_for_tests().await.unwrap(); + + let results = dsl::sled_resource + .filter(dsl::kind.eq(SledResourceKind::Instance)) + .filter(dsl::id.eq(vmm_id.into_untyped_uuid())) + .select(SledResource::as_select()) + .load_async(&*conn) + .await + .unwrap(); + info!( + cptestctx.logctx.log, + "queried sled reservation records for VMM"; + "vmm_id" => %vmm_id, + "results" => ?results, + ); + !results.is_empty() +} + /// Tests that the saga described by `dag` succeeds if each of its nodes is /// repeated. ///