From 1788920810fa13bc61ca7d5612eb58a496382e43 Mon Sep 17 00:00:00 2001 From: Eliza Weisman Date: Tue, 9 Jul 2024 11:43:51 -0700 Subject: [PATCH] WHEW OKAY ACTUALLY DO THE MIGRATION --- clients/sled-agent-client/src/lib.rs | 12 +++++ nexus/tests/integration_tests/instances.rs | 57 ++++++++++++++++++++++ sled-agent/src/sim/http_entrypoints.rs | 18 ++++++- sled-agent/src/sim/sled_agent.rs | 4 +- 4 files changed, 88 insertions(+), 3 deletions(-) diff --git a/clients/sled-agent-client/src/lib.rs b/clients/sled-agent-client/src/lib.rs index e4f8099f7ec..3af631ff105 100644 --- a/clients/sled-agent-client/src/lib.rs +++ b/clients/sled-agent-client/src/lib.rs @@ -587,6 +587,7 @@ impl From /// are bonus endpoints, not generated in the real client. #[async_trait] pub trait TestInterfaces { + async fn instance_single_step(&self, id: Uuid); async fn instance_finish_transition(&self, id: Uuid); async fn instance_simulate_migration_source( &self, @@ -598,6 +599,17 @@ pub trait TestInterfaces { #[async_trait] impl TestInterfaces for Client { + async fn instance_single_step(&self, id: Uuid) { + let baseurl = self.baseurl(); + let client = self.client(); + let url = format!("{}/instances/{}/poke-single-step", baseurl, id); + client + .post(url) + .send() + .await + .expect("instance_single_step() failed unexpectedly"); + } + async fn instance_finish_transition(&self, id: Uuid) { let baseurl = self.baseurl(); let client = self.client(); diff --git a/nexus/tests/integration_tests/instances.rs b/nexus/tests/integration_tests/instances.rs index 90bd200a8ba..c46bafa5088 100644 --- a/nexus/tests/integration_tests/instances.rs +++ b/nexus/tests/integration_tests/instances.rs @@ -1367,11 +1367,52 @@ async fn test_instance_metrics_with_migration( .parsed_body::() .unwrap(); + let migration_id = { + let datastore = apictx.nexus.datastore(); + let opctx = OpContext::for_tests( + cptestctx.logctx.log.new(o!()), + datastore.clone(), + ); + let (.., authz_instance) = LookupPath::new(&opctx, &datastore) + .instance_id(instance.identity.id) + .lookup_for(nexus_db_queries::authz::Action::Read) + .await + .unwrap(); + datastore + .instance_refetch(&opctx, &authz_instance) + .await + .unwrap() + .runtime_state + .migration_id + .expect("since we've started a migration, the instance record must have a migration id!") + }; + + // Wait for the instance to be in the `Migrating` state. Otherwise, the + // subsequent `instance_wait_for_state(..., Running)` may see the `Running` + // state from the *old* VMM, rather than waiting for the migration to + // complete. + instance_simulate_migration_source( + cptestctx, + nexus, + original_sled, + instance_id, + migration_id, + ) + .await; + instance_single_step_on_sled(cptestctx, nexus, original_sled, instance_id) + .await; + instance_single_step_on_sled(cptestctx, nexus, dst_sled_id, instance_id) + .await; + instance_wait_for_state(&client, instance_id, InstanceState::Migrating) + .await; + check_provisioning_state(4, 1).await; // Complete migration on the target. Simulated migrations always succeed. // After this the instance should be running and should continue to appear // to be provisioned. + instance_simulate_on_sled(cptestctx, nexus, original_sled, instance_id) + .await; instance_simulate_on_sled(cptestctx, nexus, dst_sled_id, instance_id).await; instance_wait_for_state(&client, instance_id, InstanceState::Running).await; @@ -5014,6 +5055,22 @@ pub async fn instance_simulate(nexus: &Arc, id: &InstanceUuid) { sa.instance_finish_transition(id.into_untyped_uuid()).await; } +/// Simulate one step of an ongoing instance state transition. To do this, we +/// have to look up the instance, then get the sled agent associated with that +/// instance, and then tell it to finish simulating whatever async transition is +/// going on. +async fn instance_single_step_on_sled( + cptestctx: &ControlPlaneTestContext, + nexus: &Arc, + sled_id: SledUuid, + instance_id: InstanceUuid, +) { + info!(&cptestctx.logctx.log, "Single-stepping simulated instance on sled"; + "instance_id" => %instance_id, "sled_id" => %sled_id); + let sa = nexus.sled_client(&sled_id).await.unwrap(); + sa.instance_single_step(instance_id.into_untyped_uuid()).await; +} + pub async fn instance_simulate_with_opctx( nexus: &Arc, id: &InstanceUuid, diff --git a/sled-agent/src/sim/http_entrypoints.rs b/sled-agent/src/sim/http_entrypoints.rs index 361e0207dc3..338f25c7229 100644 --- a/sled-agent/src/sim/http_entrypoints.rs +++ b/sled-agent/src/sim/http_entrypoints.rs @@ -4,6 +4,7 @@ //! HTTP entrypoint functions for the sled agent's exposed API +use super::collection::PokeMode; use crate::bootstrap::early_networking::EarlyNetworkConfig; use crate::bootstrap::params::AddSledRequest; use crate::params::{ @@ -49,6 +50,7 @@ pub fn api() -> SledApiDescription { api.register(instance_put_external_ip)?; api.register(instance_delete_external_ip)?; api.register(instance_poke_post)?; + api.register(instance_poke_single_step_post)?; api.register(instance_post_sim_migration_source)?; api.register(disk_put)?; api.register(disk_poke_post)?; @@ -211,7 +213,21 @@ async fn instance_poke_post( ) -> Result { let sa = rqctx.context(); let instance_id = path_params.into_inner().instance_id; - sa.instance_poke(instance_id).await; + sa.instance_poke(instance_id, PokeMode::Drain).await; + Ok(HttpResponseUpdatedNoContent()) +} + +#[endpoint { + method = POST, + path = "/instances/{instance_id}/poke-single-step", +}] +async fn instance_poke_single_step_post( + rqctx: RequestContext>, + path_params: Path, +) -> Result { + let sa = rqctx.context(); + let instance_id = path_params.into_inner().instance_id; + sa.instance_poke(instance_id, PokeMode::SingleStep).await; Ok(HttpResponseUpdatedNoContent()) } diff --git a/sled-agent/src/sim/sled_agent.rs b/sled-agent/src/sim/sled_agent.rs index b9bc0f79ff7..eda410a0eae 100644 --- a/sled-agent/src/sim/sled_agent.rs +++ b/sled-agent/src/sim/sled_agent.rs @@ -592,8 +592,8 @@ impl SledAgent { self.disks.size().await } - pub async fn instance_poke(&self, id: InstanceUuid) { - self.instances.sim_poke(id.into_untyped_uuid(), PokeMode::Drain).await; + pub async fn instance_poke(&self, id: InstanceUuid, mode: PokeMode) { + self.instances.sim_poke(id.into_untyped_uuid(), mode).await; } pub async fn disk_poke(&self, id: Uuid) {