oxidecomputer · hawkw · Apr 26, 2024 · Apr 25, 2024 · Apr 25, 2024 · Apr 25, 2024
diff --git a/nexus/src/app/instance.rs b/nexus/src/app/instance.rs
@@ -28,6 +28,7 @@ use nexus_db_queries::db::datastore::InstanceAndActiveVmm;
 use nexus_db_queries::db::identity::Resource;
 use nexus_db_queries::db::lookup;
 use nexus_db_queries::db::lookup::LookupPath;
+use nexus_db_queries::db::DataStore;
 use nexus_types::external_api::views;
 use omicron_common::api::external::http_pagination::PaginatedBy;
 use omicron_common::api::external::ByteCount;
@@ -1513,176 +1514,16 @@ impl super::Nexus {
         instance_id: &Uuid,
         new_runtime_state: &nexus::SledInstanceState,
     ) -> Result<(), Error> {
-        let log = &self.log;
-        let propolis_id = new_runtime_state.propolis_id;
-
-        info!(log, "received new runtime state from sled agent";
-              "instance_id" => %instance_id,
-              "instance_state" => ?new_runtime_state.instance_state,
-              "propolis_id" => %propolis_id,
-              "vmm_state" => ?new_runtime_state.vmm_state);
-
-        // Grab the current state of the instance in the DB to reason about
-        // whether this update is stale or not.
-        let (.., authz_instance, db_instance) =
-            LookupPath::new(&opctx, &self.db_datastore)
-                .instance_id(*instance_id)
-                .fetch()
-                .await?;
-
-        // Update OPTE and Dendrite if the instance's active sled assignment
-        // changed or a migration was retired. If these actions fail, sled agent
-        // is expected to retry this update.
-        //
-        // This configuration must be updated before updating any state in CRDB
-        // so that, if the instance was migrating or has shut down, it will not
-        // appear to be able to migrate or start again until the appropriate
-        // networking state has been written. Without this interlock, another
-        // thread or another Nexus can race with this routine to write
-        // conflicting configuration.
-        //
-        // In the future, this should be replaced by a call to trigger a
-        // networking state update RPW.
-        self.ensure_updated_instance_network_config(
+        notify_instance_updated(
+            &self.db_datastore,
+            &self.resolver().await,
+            &self.opctx_alloc,
             opctx,
-            &authz_instance,
-            db_instance.runtime(),
-            &new_runtime_state.instance_state,
+            &self.log,
+            instance_id,
+            new_runtime_state,
         )
-        .await?;
-
-        // If the supplied instance state indicates that the instance no longer
-        // has an active VMM, attempt to delete the virtual provisioning record,
-        // and the assignment of the Propolis metric producer to an oximeter
-        // collector.
-        //
-        // As with updating networking state, this must be done before
-        // committing the new runtime state to the database: once the DB is
-        // written, a new start saga can arrive and start the instance, which
-        // will try to create its own virtual provisioning charges, which will
-        // race with this operation.
-        if new_runtime_state.instance_state.propolis_id.is_none() {
-            self.db_datastore
-                .virtual_provisioning_collection_delete_instance(
-                    opctx,
-                    *instance_id,
-                    db_instance.project_id,
-                    i64::from(db_instance.ncpus.0 .0),
-                    db_instance.memory,
-                    (&new_runtime_state.instance_state.gen).into(),
-                )
-                .await?;
-
-            // TODO-correctness: The `notify_instance_updated` method can run
-            // concurrently with itself in some situations, such as where a
-            // sled-agent attempts to update Nexus about a stopped instance;
-            // that times out; and it makes another request to a different
-            // Nexus. The call to `unassign_producer` is racy in those
-            // situations, and we may end with instances with no metrics.
-            //
-            // This unfortunate case should be handled as part of
-            // instance-lifecycle improvements, notably using a reliable
-            // persistent workflow to correctly update the oximete assignment as
-            // an instance's state changes.
-            //
-            // Tracked in https://github.com/oxidecomputer/omicron/issues/3742.
-            self.unassign_producer(opctx, instance_id).await?;
-        }
-
-        // Write the new instance and VMM states back to CRDB. This needs to be
-        // done before trying to clean up the VMM, since the datastore will only
-        // allow a VMM to be marked as deleted if it is already in a terminal
-        // state.
-        let result = self
-            .db_datastore
-            .instance_and_vmm_update_runtime(
-                instance_id,
-                &db::model::InstanceRuntimeState::from(
-                    new_runtime_state.instance_state.clone(),
-                ),
-                &propolis_id,
-                &db::model::VmmRuntimeState::from(
-                    new_runtime_state.vmm_state.clone(),
-                ),
-            )
-            .await;
-
-        // If the VMM is now in a terminal state, make sure its resources get
-        // cleaned up.
-        //
-        // For idempotency, only check to see if the update was successfully
-        // processed and ignore whether the VMM record was actually updated.
-        // This is required to handle the case where this routine is called
-        // once, writes the terminal VMM state, fails before all per-VMM
-        // resources are released, returns a retriable error, and is retried:
-        // the per-VMM resources still need to be cleaned up, but the DB update
-        // will return Ok(_, false) because the database was already updated.
-        //
-        // Unlike the pre-update cases, it is legal to do this cleanup *after*
-        // committing state to the database, because a terminated VMM cannot be
-        // reused (restarting or migrating its former instance will use new VMM
-        // IDs).
-        if result.is_ok() {
-            let propolis_terminated = matches!(
-                new_runtime_state.vmm_state.state,
-                InstanceState::Destroyed | InstanceState::Failed
-            );
-
-            if propolis_terminated {
-                info!(log, "vmm is terminated, cleaning up resources";
-                      "instance_id" => %instance_id,
-                      "propolis_id" => %propolis_id);
-
-                self.db_datastore
-                    .sled_reservation_delete(opctx, propolis_id)
-                    .await?;
-
-                if !self
-                    .db_datastore
-                    .vmm_mark_deleted(opctx, &propolis_id)
-                    .await?
-                {
-                    warn!(log, "failed to mark vmm record as deleted";
-                      "instance_id" => %instance_id,
-                      "propolis_id" => %propolis_id,
-                      "vmm_state" => ?new_runtime_state.vmm_state);
-                }
-            }
-        }
-
-        match result {
-            Ok((instance_updated, vmm_updated)) => {
-                info!(log, "instance and vmm updated by sled agent";
-                      "instance_id" => %instance_id,
-                      "propolis_id" => %propolis_id,
-                      "instance_updated" => instance_updated,
-                      "vmm_updated" => vmm_updated);
-                Ok(())
-            }
-
-            // The update command should swallow object-not-found errors and
-            // return them back as failures to update, so this error case is
-            // unexpected. There's no work to do if this occurs, however.
-            Err(Error::ObjectNotFound { .. }) => {
-                error!(log, "instance/vmm update unexpectedly returned \
-                       an object not found error";
-                       "instance_id" => %instance_id,
-                       "propolis_id" => %propolis_id);
-                Ok(())
-            }
-
-            // If the datastore is unavailable, propagate that to the caller.
-            // TODO-robustness Really this should be any _transient_ error.  How
-            // can we distinguish?  Maybe datastore should emit something
-            // different from Error with an Into<Error>.
-            Err(error) => {
-                warn!(log, "failed to update instance from sled agent";
-                      "instance_id" => %instance_id,
-                      "propolis_id" => %propolis_id,
-                      "error" => ?error);
-                Err(error)
-            }
-        }
+        .await
     }
 
     /// Returns the requested range of serial console output bytes,
@@ -2111,6 +1952,185 @@ impl super::Nexus {
     }
 }
 
+/// Invoked by a sled agent to publish an updated runtime state for an
+/// Instance.
+pub(crate) async fn notify_instance_updated(
+    datastore: &DataStore,
+    resolver: &internal_dns::resolver::Resolver,
+    opctx_alloc: &OpContext,
+    opctx: &OpContext,
+    log: &slog::Logger,
+    instance_id: &Uuid,
+    new_runtime_state: &nexus::SledInstanceState,
+) -> Result<(), Error> {
+    let propolis_id = new_runtime_state.propolis_id;
+
+    info!(log, "received new runtime state from sled agent";
+            "instance_id" => %instance_id,
+            "instance_state" => ?new_runtime_state.instance_state,
+            "propolis_id" => %propolis_id,
+            "vmm_state" => ?new_runtime_state.vmm_state);
+
+    // Grab the current state of the instance in the DB to reason about
+    // whether this update is stale or not.
+    let (.., authz_instance, db_instance) = LookupPath::new(&opctx, &datastore)
+        .instance_id(*instance_id)
+        .fetch()
+        .await?;
+
+    // Update OPTE and Dendrite if the instance's active sled assignment
+    // changed or a migration was retired. If these actions fail, sled agent
+    // is expected to retry this update.
+    //
+    // This configuration must be updated before updating any state in CRDB
+    // so that, if the instance was migrating or has shut down, it will not
+    // appear to be able to migrate or start again until the appropriate
+    // networking state has been written. Without this interlock, another
+    // thread or another Nexus can race with this routine to write
+    // conflicting configuration.
+    //
+    // In the future, this should be replaced by a call to trigger a
+    // networking state update RPW.
+    super::instance_network::ensure_updated_instance_network_config(
+        datastore,
+        log,
+        resolver,
+        opctx,
+        opctx_alloc,
+        &authz_instance,
+        db_instance.runtime(),
+        &new_runtime_state.instance_state,
+    )
+    .await?;
+
+    // If the supplied instance state indicates that the instance no longer
+    // has an active VMM, attempt to delete the virtual provisioning record,
+    // and the assignment of the Propolis metric producer to an oximeter
+    // collector.
+    //
+    // As with updating networking state, this must be done before
+    // committing the new runtime state to the database: once the DB is
+    // written, a new start saga can arrive and start the instance, which
+    // will try to create its own virtual provisioning charges, which will
+    // race with this operation.
+    if new_runtime_state.instance_state.propolis_id.is_none() {
+        datastore
+            .virtual_provisioning_collection_delete_instance(
+                opctx,
+                *instance_id,
+                db_instance.project_id,
+                i64::from(db_instance.ncpus.0 .0),
+                db_instance.memory,
+                (&new_runtime_state.instance_state.gen).into(),
+            )
+            .await?;
+
+        // TODO-correctness: The `notify_instance_updated` method can run
+        // concurrently with itself in some situations, such as where a
+        // sled-agent attempts to update Nexus about a stopped instance;
+        // that times out; and it makes another request to a different
+        // Nexus. The call to `unassign_producer` is racy in those
+        // situations, and we may end with instances with no metrics.
+        //
+        // This unfortunate case should be handled as part of
+        // instance-lifecycle improvements, notably using a reliable
+        // persistent workflow to correctly update the oximete assignment as
+        // an instance's state changes.
+        //
+        // Tracked in https://github.com/oxidecomputer/omicron/issues/3742.
+        super::oximeter::unassign_producer(datastore, log, opctx, instance_id)
+            .await?;
+    }
+
+    // Write the new instance and VMM states back to CRDB. This needs to be
+    // done before trying to clean up the VMM, since the datastore will only
+    // allow a VMM to be marked as deleted if it is already in a terminal
+    // state.
+    let result = datastore
+        .instance_and_vmm_update_runtime(
+            instance_id,
+            &db::model::InstanceRuntimeState::from(
+                new_runtime_state.instance_state.clone(),
+            ),
+            &propolis_id,
+            &db::model::VmmRuntimeState::from(
+                new_runtime_state.vmm_state.clone(),
+            ),
+        )
+        .await;
+
+    // If the VMM is now in a terminal state, make sure its resources get
+    // cleaned up.
+    //
+    // For idempotency, only check to see if the update was successfully
+    // processed and ignore whether the VMM record was actually updated.
+    // This is required to handle the case where this routine is called
+    // once, writes the terminal VMM state, fails before all per-VMM
+    // resources are released, returns a retriable error, and is retried:
+    // the per-VMM resources still need to be cleaned up, but the DB update
+    // will return Ok(_, false) because the database was already updated.
+    //
+    // Unlike the pre-update cases, it is legal to do this cleanup *after*
+    // committing state to the database, because a terminated VMM cannot be
+    // reused (restarting or migrating its former instance will use new VMM
+    // IDs).
+    if result.is_ok() {
+        let propolis_terminated = matches!(
+            new_runtime_state.vmm_state.state,
+            InstanceState::Destroyed | InstanceState::Failed
+        );
+
+        if propolis_terminated {
+            info!(log, "vmm is terminated, cleaning up resources";
+                    "instance_id" => %instance_id,
+                    "propolis_id" => %propolis_id);
+
+            datastore.sled_reservation_delete(opctx, propolis_id).await?;
+
+            if !datastore.vmm_mark_deleted(opctx, &propolis_id).await? {
+                warn!(log, "failed to mark vmm record as deleted";
+                    "instance_id" => %instance_id,
+                    "propolis_id" => %propolis_id,
+                    "vmm_state" => ?new_runtime_state.vmm_state);
+            }
+        }
+    }
+
+    match result {
+        Ok((instance_updated, vmm_updated)) => {
+            info!(log, "instance and vmm updated by sled agent";
+                    "instance_id" => %instance_id,
+                    "propolis_id" => %propolis_id,
+                    "instance_updated" => instance_updated,
+                    "vmm_updated" => vmm_updated);
+            Ok(())
+        }
+
+        // The update command should swallow object-not-found errors and
+        // return them back as failures to update, so this error case is
+        // unexpected. There's no work to do if this occurs, however.
+        Err(Error::ObjectNotFound { .. }) => {
+            error!(log, "instance/vmm update unexpectedly returned \
+                    an object not found error";
+                    "instance_id" => %instance_id,
+                    "propolis_id" => %propolis_id);
+            Ok(())
+        }
+
+        // If the datastore is unavailable, propagate that to the caller.
+        // TODO-robustness Really this should be any _transient_ error.  How
+        // can we distinguish?  Maybe datastore should emit something
+        // different from Error with an Into<Error>.
+        Err(error) => {
+            warn!(log, "failed to update instance from sled agent";
+                    "instance_id" => %instance_id,
+                    "propolis_id" => %propolis_id,
+                    "error" => ?error);
+            Err(error)
+        }
+    }
+}
+
 #[cfg(test)]
 mod tests {
     use super::super::Nexus;