From 8f54cb7d05a803190730042d211d00bcbf842497 Mon Sep 17 00:00:00 2001 From: lif <> Date: Tue, 19 Dec 2023 01:58:42 -0800 Subject: [PATCH] Since the instance creation request no longer blocks, we need to wait before attempting to send serial console data requests --- end-to-end-tests/src/instance_launch.rs | 15 ++++++++++- nexus/src/app/instance.rs | 33 +++++++++++-------------- 2 files changed, 29 insertions(+), 19 deletions(-) diff --git a/end-to-end-tests/src/instance_launch.rs b/end-to-end-tests/src/instance_launch.rs index b3d14060703..b6523bac9ce 100644 --- a/end-to-end-tests/src/instance_launch.rs +++ b/end-to-end-tests/src/instance_launch.rs @@ -7,7 +7,7 @@ use omicron_test_utils::dev::poll::{wait_for_condition, CondCheckError}; use oxide_client::types::{ ByteCount, DiskCreate, DiskSource, ExternalIpCreate, InstanceCpuCount, InstanceCreate, InstanceDiskAttachment, InstanceNetworkInterfaceAttachment, - SshKeyCreate, + InstanceState, SshKeyCreate, }; use oxide_client::{ClientDisksExt, ClientInstancesExt, ClientSessionExt}; use russh::{ChannelMsg, Disconnect}; @@ -98,6 +98,19 @@ async fn instance_launch() -> Result<()> { type Error = CondCheckError>; + let instance_state = ctx + .client + .instance_view() + .project(ctx.project_name.clone()) + .instance(instance.name.clone()) + .send() + .await? + .run_state; + + if instance_state == InstanceState::Starting { + return Err(Error::NotYet); + } + let data = String::from_utf8_lossy( &ctx.client .instance_serial_console() diff --git a/nexus/src/app/instance.rs b/nexus/src/app/instance.rs index 81adbaa53fd..ec5e21a0fca 100644 --- a/nexus/src/app/instance.rs +++ b/nexus/src/app/instance.rs @@ -971,7 +971,14 @@ impl super::Nexus { } } - /// TODO describe how this relates to [Self::instance_request_state] (above) + /// For calls to [sled_agent_client::Client::instance_put_state] (such as + /// made by [Self::instance_request_state]) that involve a long-running + /// task such as creating a propolis zone (i.e. during instance creation + /// or migration target provisioning), sled-agent may send the resulting + /// instance state to Nexus via the internal API instead of blocking + /// during the request handler and risking an HTTP request timeout. This + /// function writes the asynchronously-returned updated instance state + /// to the database. pub(crate) async fn instance_handle_creation_result( &self, opctx: &OpContext, @@ -983,30 +990,20 @@ impl super::Nexus { .lookup_for(authz::Action::Modify) .await?; - let state = self - .db_datastore - .instance_fetch_with_vmm(opctx, &authz_instance) - .await?; - - // TODO: add param for sled-agent to show its 'previous' and compare with this - // to validate consistency between nexus and sled-agent - let prev_instance_runtime = &state.instance().runtime_state; - match result { Ok(new_state) => self - .db_datastore - .instance_and_vmm_update_runtime( - instance_id, - &new_state.instance_state.into(), - &new_state.propolis_id, - &new_state.vmm_state.into(), - ) + .write_returned_instance_state(instance_id, Some(new_state)) .await .map(|_| ()), Err(error) => { + let state = self + .db_datastore + .instance_fetch_with_vmm(opctx, &authz_instance) + .await?; + self.mark_instance_failed( instance_id, - prev_instance_runtime, + &state.instance().runtime_state, error, ) .await