Skip to content

Commit

Permalink
factor out pieces needed for #5611
Browse files Browse the repository at this point in the history
  • Loading branch information
hawkw committed Apr 25, 2024
1 parent aec3400 commit c63b335
Show file tree
Hide file tree
Showing 5 changed files with 1,198 additions and 860 deletions.
356 changes: 188 additions & 168 deletions nexus/src/app/instance.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ use nexus_db_queries::db::datastore::InstanceAndActiveVmm;
use nexus_db_queries::db::identity::Resource;
use nexus_db_queries::db::lookup;
use nexus_db_queries::db::lookup::LookupPath;
use nexus_db_queries::db::DataStore;
use nexus_types::external_api::views;
use omicron_common::api::external::http_pagination::PaginatedBy;
use omicron_common::api::external::ByteCount;
Expand Down Expand Up @@ -1513,176 +1514,16 @@ impl super::Nexus {
instance_id: &Uuid,
new_runtime_state: &nexus::SledInstanceState,
) -> Result<(), Error> {
let log = &self.log;
let propolis_id = new_runtime_state.propolis_id;

info!(log, "received new runtime state from sled agent";
"instance_id" => %instance_id,
"instance_state" => ?new_runtime_state.instance_state,
"propolis_id" => %propolis_id,
"vmm_state" => ?new_runtime_state.vmm_state);

// Grab the current state of the instance in the DB to reason about
// whether this update is stale or not.
let (.., authz_instance, db_instance) =
LookupPath::new(&opctx, &self.db_datastore)
.instance_id(*instance_id)
.fetch()
.await?;

// Update OPTE and Dendrite if the instance's active sled assignment
// changed or a migration was retired. If these actions fail, sled agent
// is expected to retry this update.
//
// This configuration must be updated before updating any state in CRDB
// so that, if the instance was migrating or has shut down, it will not
// appear to be able to migrate or start again until the appropriate
// networking state has been written. Without this interlock, another
// thread or another Nexus can race with this routine to write
// conflicting configuration.
//
// In the future, this should be replaced by a call to trigger a
// networking state update RPW.
self.ensure_updated_instance_network_config(
notify_instance_updated(
&self.db_datastore,
&self.resolver().await,
&self.opctx_alloc,
opctx,
&authz_instance,
db_instance.runtime(),
&new_runtime_state.instance_state,
&self.log,
instance_id,
new_runtime_state,
)
.await?;

// If the supplied instance state indicates that the instance no longer
// has an active VMM, attempt to delete the virtual provisioning record,
// and the assignment of the Propolis metric producer to an oximeter
// collector.
//
// As with updating networking state, this must be done before
// committing the new runtime state to the database: once the DB is
// written, a new start saga can arrive and start the instance, which
// will try to create its own virtual provisioning charges, which will
// race with this operation.
if new_runtime_state.instance_state.propolis_id.is_none() {
self.db_datastore
.virtual_provisioning_collection_delete_instance(
opctx,
*instance_id,
db_instance.project_id,
i64::from(db_instance.ncpus.0 .0),
db_instance.memory,
(&new_runtime_state.instance_state.gen).into(),
)
.await?;

// TODO-correctness: The `notify_instance_updated` method can run
// concurrently with itself in some situations, such as where a
// sled-agent attempts to update Nexus about a stopped instance;
// that times out; and it makes another request to a different
// Nexus. The call to `unassign_producer` is racy in those
// situations, and we may end with instances with no metrics.
//
// This unfortunate case should be handled as part of
// instance-lifecycle improvements, notably using a reliable
// persistent workflow to correctly update the oximete assignment as
// an instance's state changes.
//
// Tracked in https://github.com/oxidecomputer/omicron/issues/3742.
self.unassign_producer(opctx, instance_id).await?;
}

// Write the new instance and VMM states back to CRDB. This needs to be
// done before trying to clean up the VMM, since the datastore will only
// allow a VMM to be marked as deleted if it is already in a terminal
// state.
let result = self
.db_datastore
.instance_and_vmm_update_runtime(
instance_id,
&db::model::InstanceRuntimeState::from(
new_runtime_state.instance_state.clone(),
),
&propolis_id,
&db::model::VmmRuntimeState::from(
new_runtime_state.vmm_state.clone(),
),
)
.await;

// If the VMM is now in a terminal state, make sure its resources get
// cleaned up.
//
// For idempotency, only check to see if the update was successfully
// processed and ignore whether the VMM record was actually updated.
// This is required to handle the case where this routine is called
// once, writes the terminal VMM state, fails before all per-VMM
// resources are released, returns a retriable error, and is retried:
// the per-VMM resources still need to be cleaned up, but the DB update
// will return Ok(_, false) because the database was already updated.
//
// Unlike the pre-update cases, it is legal to do this cleanup *after*
// committing state to the database, because a terminated VMM cannot be
// reused (restarting or migrating its former instance will use new VMM
// IDs).
if result.is_ok() {
let propolis_terminated = matches!(
new_runtime_state.vmm_state.state,
InstanceState::Destroyed | InstanceState::Failed
);

if propolis_terminated {
info!(log, "vmm is terminated, cleaning up resources";
"instance_id" => %instance_id,
"propolis_id" => %propolis_id);

self.db_datastore
.sled_reservation_delete(opctx, propolis_id)
.await?;

if !self
.db_datastore
.vmm_mark_deleted(opctx, &propolis_id)
.await?
{
warn!(log, "failed to mark vmm record as deleted";
"instance_id" => %instance_id,
"propolis_id" => %propolis_id,
"vmm_state" => ?new_runtime_state.vmm_state);
}
}
}

match result {
Ok((instance_updated, vmm_updated)) => {
info!(log, "instance and vmm updated by sled agent";
"instance_id" => %instance_id,
"propolis_id" => %propolis_id,
"instance_updated" => instance_updated,
"vmm_updated" => vmm_updated);
Ok(())
}

// The update command should swallow object-not-found errors and
// return them back as failures to update, so this error case is
// unexpected. There's no work to do if this occurs, however.
Err(Error::ObjectNotFound { .. }) => {
error!(log, "instance/vmm update unexpectedly returned \
an object not found error";
"instance_id" => %instance_id,
"propolis_id" => %propolis_id);
Ok(())
}

// If the datastore is unavailable, propagate that to the caller.
// TODO-robustness Really this should be any _transient_ error. How
// can we distinguish? Maybe datastore should emit something
// different from Error with an Into<Error>.
Err(error) => {
warn!(log, "failed to update instance from sled agent";
"instance_id" => %instance_id,
"propolis_id" => %propolis_id,
"error" => ?error);
Err(error)
}
}
.await
}

/// Returns the requested range of serial console output bytes,
Expand Down Expand Up @@ -2111,6 +1952,185 @@ impl super::Nexus {
}
}

/// Invoked by a sled agent to publish an updated runtime state for an
/// Instance.
pub(crate) async fn notify_instance_updated(
datastore: &DataStore,
resolver: &internal_dns::resolver::Resolver,
opctx_alloc: &OpContext,
opctx: &OpContext,
log: &slog::Logger,
instance_id: &Uuid,
new_runtime_state: &nexus::SledInstanceState,
) -> Result<(), Error> {
let propolis_id = new_runtime_state.propolis_id;

info!(log, "received new runtime state from sled agent";
"instance_id" => %instance_id,
"instance_state" => ?new_runtime_state.instance_state,
"propolis_id" => %propolis_id,
"vmm_state" => ?new_runtime_state.vmm_state);

// Grab the current state of the instance in the DB to reason about
// whether this update is stale or not.
let (.., authz_instance, db_instance) = LookupPath::new(&opctx, &datastore)
.instance_id(*instance_id)
.fetch()
.await?;

// Update OPTE and Dendrite if the instance's active sled assignment
// changed or a migration was retired. If these actions fail, sled agent
// is expected to retry this update.
//
// This configuration must be updated before updating any state in CRDB
// so that, if the instance was migrating or has shut down, it will not
// appear to be able to migrate or start again until the appropriate
// networking state has been written. Without this interlock, another
// thread or another Nexus can race with this routine to write
// conflicting configuration.
//
// In the future, this should be replaced by a call to trigger a
// networking state update RPW.
super::instance_network::ensure_updated_instance_network_config(
datastore,
log,
resolver,
opctx,
opctx_alloc,
&authz_instance,
db_instance.runtime(),
&new_runtime_state.instance_state,
)
.await?;

// If the supplied instance state indicates that the instance no longer
// has an active VMM, attempt to delete the virtual provisioning record,
// and the assignment of the Propolis metric producer to an oximeter
// collector.
//
// As with updating networking state, this must be done before
// committing the new runtime state to the database: once the DB is
// written, a new start saga can arrive and start the instance, which
// will try to create its own virtual provisioning charges, which will
// race with this operation.
if new_runtime_state.instance_state.propolis_id.is_none() {
datastore
.virtual_provisioning_collection_delete_instance(
opctx,
*instance_id,
db_instance.project_id,
i64::from(db_instance.ncpus.0 .0),
db_instance.memory,
(&new_runtime_state.instance_state.gen).into(),
)
.await?;

// TODO-correctness: The `notify_instance_updated` method can run
// concurrently with itself in some situations, such as where a
// sled-agent attempts to update Nexus about a stopped instance;
// that times out; and it makes another request to a different
// Nexus. The call to `unassign_producer` is racy in those
// situations, and we may end with instances with no metrics.
//
// This unfortunate case should be handled as part of
// instance-lifecycle improvements, notably using a reliable
// persistent workflow to correctly update the oximete assignment as
// an instance's state changes.
//
// Tracked in https://github.com/oxidecomputer/omicron/issues/3742.
super::oximeter::unassign_producer(datastore, log, opctx, instance_id)
.await?;
}

// Write the new instance and VMM states back to CRDB. This needs to be
// done before trying to clean up the VMM, since the datastore will only
// allow a VMM to be marked as deleted if it is already in a terminal
// state.
let result = datastore
.instance_and_vmm_update_runtime(
instance_id,
&db::model::InstanceRuntimeState::from(
new_runtime_state.instance_state.clone(),
),
&propolis_id,
&db::model::VmmRuntimeState::from(
new_runtime_state.vmm_state.clone(),
),
)
.await;

// If the VMM is now in a terminal state, make sure its resources get
// cleaned up.
//
// For idempotency, only check to see if the update was successfully
// processed and ignore whether the VMM record was actually updated.
// This is required to handle the case where this routine is called
// once, writes the terminal VMM state, fails before all per-VMM
// resources are released, returns a retriable error, and is retried:
// the per-VMM resources still need to be cleaned up, but the DB update
// will return Ok(_, false) because the database was already updated.
//
// Unlike the pre-update cases, it is legal to do this cleanup *after*
// committing state to the database, because a terminated VMM cannot be
// reused (restarting or migrating its former instance will use new VMM
// IDs).
if result.is_ok() {
let propolis_terminated = matches!(
new_runtime_state.vmm_state.state,
InstanceState::Destroyed | InstanceState::Failed
);

if propolis_terminated {
info!(log, "vmm is terminated, cleaning up resources";
"instance_id" => %instance_id,
"propolis_id" => %propolis_id);

datastore.sled_reservation_delete(opctx, propolis_id).await?;

if !datastore.vmm_mark_deleted(opctx, &propolis_id).await? {
warn!(log, "failed to mark vmm record as deleted";
"instance_id" => %instance_id,
"propolis_id" => %propolis_id,
"vmm_state" => ?new_runtime_state.vmm_state);
}
}
}

match result {
Ok((instance_updated, vmm_updated)) => {
info!(log, "instance and vmm updated by sled agent";
"instance_id" => %instance_id,
"propolis_id" => %propolis_id,
"instance_updated" => instance_updated,
"vmm_updated" => vmm_updated);
Ok(())
}

// The update command should swallow object-not-found errors and
// return them back as failures to update, so this error case is
// unexpected. There's no work to do if this occurs, however.
Err(Error::ObjectNotFound { .. }) => {
error!(log, "instance/vmm update unexpectedly returned \
an object not found error";
"instance_id" => %instance_id,
"propolis_id" => %propolis_id);
Ok(())
}

// If the datastore is unavailable, propagate that to the caller.
// TODO-robustness Really this should be any _transient_ error. How
// can we distinguish? Maybe datastore should emit something
// different from Error with an Into<Error>.
Err(error) => {
warn!(log, "failed to update instance from sled agent";
"instance_id" => %instance_id,
"propolis_id" => %propolis_id,
"error" => ?error);
Err(error)
}
}
}

#[cfg(test)]
mod tests {
use super::super::Nexus;
Expand Down
Loading

0 comments on commit c63b335

Please sign in to comment.