Skip to content

Commit

Permalink
WHEW OKAY ACTUALLY DO THE MIGRATION
Browse files Browse the repository at this point in the history
  • Loading branch information
hawkw committed Jul 9, 2024
1 parent 04572ed commit 1788920
Show file tree
Hide file tree
Showing 4 changed files with 88 additions and 3 deletions.
12 changes: 12 additions & 0 deletions clients/sled-agent-client/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -587,6 +587,7 @@ impl From<omicron_common::api::internal::shared::NetworkInterfaceKind>
/// are bonus endpoints, not generated in the real client.
#[async_trait]
pub trait TestInterfaces {
async fn instance_single_step(&self, id: Uuid);
async fn instance_finish_transition(&self, id: Uuid);
async fn instance_simulate_migration_source(
&self,
Expand All @@ -598,6 +599,17 @@ pub trait TestInterfaces {

#[async_trait]
impl TestInterfaces for Client {
async fn instance_single_step(&self, id: Uuid) {
let baseurl = self.baseurl();
let client = self.client();
let url = format!("{}/instances/{}/poke-single-step", baseurl, id);
client
.post(url)
.send()
.await
.expect("instance_single_step() failed unexpectedly");
}

async fn instance_finish_transition(&self, id: Uuid) {
let baseurl = self.baseurl();
let client = self.client();
Expand Down
57 changes: 57 additions & 0 deletions nexus/tests/integration_tests/instances.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1367,11 +1367,52 @@ async fn test_instance_metrics_with_migration(
.parsed_body::<Instance>()
.unwrap();

let migration_id = {
let datastore = apictx.nexus.datastore();
let opctx = OpContext::for_tests(
cptestctx.logctx.log.new(o!()),
datastore.clone(),
);
let (.., authz_instance) = LookupPath::new(&opctx, &datastore)
.instance_id(instance.identity.id)
.lookup_for(nexus_db_queries::authz::Action::Read)
.await
.unwrap();
datastore
.instance_refetch(&opctx, &authz_instance)
.await
.unwrap()
.runtime_state
.migration_id
.expect("since we've started a migration, the instance record must have a migration id!")
};

// Wait for the instance to be in the `Migrating` state. Otherwise, the
// subsequent `instance_wait_for_state(..., Running)` may see the `Running`
// state from the *old* VMM, rather than waiting for the migration to
// complete.
instance_simulate_migration_source(
cptestctx,
nexus,
original_sled,
instance_id,
migration_id,
)
.await;
instance_single_step_on_sled(cptestctx, nexus, original_sled, instance_id)
.await;
instance_single_step_on_sled(cptestctx, nexus, dst_sled_id, instance_id)
.await;
instance_wait_for_state(&client, instance_id, InstanceState::Migrating)
.await;

check_provisioning_state(4, 1).await;

// Complete migration on the target. Simulated migrations always succeed.
// After this the instance should be running and should continue to appear
// to be provisioned.
instance_simulate_on_sled(cptestctx, nexus, original_sled, instance_id)
.await;
instance_simulate_on_sled(cptestctx, nexus, dst_sled_id, instance_id).await;
instance_wait_for_state(&client, instance_id, InstanceState::Running).await;

Expand Down Expand Up @@ -5014,6 +5055,22 @@ pub async fn instance_simulate(nexus: &Arc<Nexus>, id: &InstanceUuid) {
sa.instance_finish_transition(id.into_untyped_uuid()).await;
}

/// Simulate one step of an ongoing instance state transition. To do this, we
/// have to look up the instance, then get the sled agent associated with that
/// instance, and then tell it to finish simulating whatever async transition is
/// going on.
async fn instance_single_step_on_sled(
cptestctx: &ControlPlaneTestContext,
nexus: &Arc<Nexus>,
sled_id: SledUuid,
instance_id: InstanceUuid,
) {
info!(&cptestctx.logctx.log, "Single-stepping simulated instance on sled";
"instance_id" => %instance_id, "sled_id" => %sled_id);
let sa = nexus.sled_client(&sled_id).await.unwrap();
sa.instance_single_step(instance_id.into_untyped_uuid()).await;
}

pub async fn instance_simulate_with_opctx(
nexus: &Arc<Nexus>,
id: &InstanceUuid,
Expand Down
18 changes: 17 additions & 1 deletion sled-agent/src/sim/http_entrypoints.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

//! HTTP entrypoint functions for the sled agent's exposed API
use super::collection::PokeMode;
use crate::bootstrap::early_networking::EarlyNetworkConfig;
use crate::bootstrap::params::AddSledRequest;
use crate::params::{
Expand Down Expand Up @@ -49,6 +50,7 @@ pub fn api() -> SledApiDescription {
api.register(instance_put_external_ip)?;
api.register(instance_delete_external_ip)?;
api.register(instance_poke_post)?;
api.register(instance_poke_single_step_post)?;
api.register(instance_post_sim_migration_source)?;
api.register(disk_put)?;
api.register(disk_poke_post)?;
Expand Down Expand Up @@ -211,7 +213,21 @@ async fn instance_poke_post(
) -> Result<HttpResponseUpdatedNoContent, HttpError> {
let sa = rqctx.context();
let instance_id = path_params.into_inner().instance_id;
sa.instance_poke(instance_id).await;
sa.instance_poke(instance_id, PokeMode::Drain).await;
Ok(HttpResponseUpdatedNoContent())
}

#[endpoint {
method = POST,
path = "/instances/{instance_id}/poke-single-step",
}]
async fn instance_poke_single_step_post(
rqctx: RequestContext<Arc<SledAgent>>,
path_params: Path<InstancePathParam>,
) -> Result<HttpResponseUpdatedNoContent, HttpError> {
let sa = rqctx.context();
let instance_id = path_params.into_inner().instance_id;
sa.instance_poke(instance_id, PokeMode::SingleStep).await;
Ok(HttpResponseUpdatedNoContent())
}

Expand Down
4 changes: 2 additions & 2 deletions sled-agent/src/sim/sled_agent.rs
Original file line number Diff line number Diff line change
Expand Up @@ -592,8 +592,8 @@ impl SledAgent {
self.disks.size().await
}

pub async fn instance_poke(&self, id: InstanceUuid) {
self.instances.sim_poke(id.into_untyped_uuid(), PokeMode::Drain).await;
pub async fn instance_poke(&self, id: InstanceUuid, mode: PokeMode) {
self.instances.sim_poke(id.into_untyped_uuid(), mode).await;
}

pub async fn disk_poke(&self, id: Uuid) {
Expand Down

0 comments on commit 1788920

Please sign in to comment.