diff --git a/nexus/db-model/src/schema.rs b/nexus/db-model/src/schema.rs index 61a05754c6..9189b6db7b 100644 --- a/nexus/db-model/src/schema.rs +++ b/nexus/db-model/src/schema.rs @@ -812,6 +812,11 @@ table! { } } +allow_tables_to_appear_in_same_query! { + virtual_provisioning_resource, + instance +} + table! { zpool (id) { id -> Uuid, diff --git a/nexus/db-queries/src/db/datastore/virtual_provisioning_collection.rs b/nexus/db-queries/src/db/datastore/virtual_provisioning_collection.rs index 18ff58735e..83856e10c7 100644 --- a/nexus/db-queries/src/db/datastore/virtual_provisioning_collection.rs +++ b/nexus/db-queries/src/db/datastore/virtual_provisioning_collection.rs @@ -272,7 +272,11 @@ impl DataStore { Ok(provisions) } - /// Transitively updates all CPU/RAM provisions from project -> fleet. + /// Transitively removes the CPU and memory charges for an instance from the + /// instance's project, silo, and fleet, provided that the instance's state + /// generation is less than `max_instance_gen`. This allows a caller who is + /// about to apply generation G to an instance to avoid deleting resources + /// if its update was superseded. pub async fn virtual_provisioning_collection_delete_instance( &self, opctx: &OpContext, @@ -280,10 +284,15 @@ impl DataStore { project_id: Uuid, cpus_diff: i64, ram_diff: ByteCount, + max_instance_gen: i64, ) -> Result, Error> { let provisions = VirtualProvisioningCollectionUpdate::new_delete_instance( - id, cpus_diff, ram_diff, project_id, + id, + max_instance_gen, + cpus_diff, + ram_diff, + project_id, ) .get_results_async(&*self.pool_connection_authorized(opctx).await?) .await diff --git a/nexus/db-queries/src/db/queries/virtual_provisioning_collection_update.rs b/nexus/db-queries/src/db/queries/virtual_provisioning_collection_update.rs index b7271f3f49..0a383eb6f1 100644 --- a/nexus/db-queries/src/db/queries/virtual_provisioning_collection_update.rs +++ b/nexus/db-queries/src/db/queries/virtual_provisioning_collection_update.rs @@ -368,10 +368,12 @@ impl VirtualProvisioningCollectionUpdate { pub fn new_delete_instance( id: uuid::Uuid, + max_instance_gen: i64, cpus_diff: i64, ram_diff: ByteCount, project_id: uuid::Uuid, ) -> Self { + use crate::db::schema::instance::dsl as instance_dsl; use virtual_provisioning_collection::dsl as collection_dsl; use virtual_provisioning_resource::dsl as resource_dsl; @@ -379,9 +381,36 @@ impl VirtualProvisioningCollectionUpdate { // We should delete the record if it exists. DoUpdate::new_for_delete(id), // The query to actually delete the record. + // + // The filter condition here ensures that the provisioning record is + // only deleted if the corresponding instance has a generation + // number less than the supplied `max_instance_gen`. This allows a + // caller that is about to apply an instance update that will stop + // the instance and that bears generation G to avoid deleting + // resources if the instance generation was already advanced to or + // past G. + // + // If the relevant instance ID is not in the database, then some + // other operation must have ensured the instance was previously + // stopped (because that's the only way it could have been deleted), + // and that operation should have cleaned up the resources already, + // in which case there's nothing to do here. + // + // There is an additional "direct" filter on the target resource ID + // to avoid a full scan of the resource table. UnreferenceableSubquery( diesel::delete(resource_dsl::virtual_provisioning_resource) .filter(resource_dsl::id.eq(id)) + .filter( + resource_dsl::id.nullable().eq(instance_dsl::instance + .filter(instance_dsl::id.eq(id)) + .filter( + instance_dsl::state_generation + .lt(max_instance_gen), + ) + .select(instance_dsl::id) + .single_value()), + ) .returning(virtual_provisioning_resource::all_columns), ), // Within this project, silo, fleet... diff --git a/nexus/src/app/instance.rs b/nexus/src/app/instance.rs index 8f4b1b320a..2b7ef6b8fb 100644 --- a/nexus/src/app/instance.rs +++ b/nexus/src/app/instance.rs @@ -1313,20 +1313,15 @@ impl super::Nexus { ) .await?; - // If the supplied instance state is at least as new as what's currently - // in the database, and it indicates the instance has no active VMM, the - // instance has been stopped and should have its virtual provisioning - // charges released. + // If the supplied instance state indicates that the instance no longer + // has an active VMM, attempt to delete the virtual provisioning record // // As with updating networking state, this must be done before // committing the new runtime state to the database: once the DB is // written, a new start saga can arrive and start the instance, which // will try to create its own virtual provisioning charges, which will // race with this operation. - if new_runtime_state.instance_state.propolis_id.is_none() - && new_runtime_state.instance_state.gen - >= db_instance.runtime().gen.0 - { + if new_runtime_state.instance_state.propolis_id.is_none() { self.db_datastore .virtual_provisioning_collection_delete_instance( opctx, @@ -1334,6 +1329,7 @@ impl super::Nexus { db_instance.project_id, i64::from(db_instance.ncpus.0 .0), db_instance.memory, + (&new_runtime_state.instance_state.gen).into(), ) .await?; } diff --git a/nexus/src/app/sagas/instance_start.rs b/nexus/src/app/sagas/instance_start.rs index 06c34fd473..76773d6369 100644 --- a/nexus/src/app/sagas/instance_start.rs +++ b/nexus/src/app/sagas/instance_start.rs @@ -42,11 +42,6 @@ declare_saga_actions! { + sis_alloc_propolis_ip } - ADD_VIRTUAL_RESOURCES -> "virtual_resources" { - + sis_account_virtual_resources - - sis_account_virtual_resources_undo - } - CREATE_VMM_RECORD -> "vmm_record" { + sis_create_vmm_record - sis_destroy_vmm_record @@ -57,6 +52,11 @@ declare_saga_actions! { - sis_move_to_starting_undo } + ADD_VIRTUAL_RESOURCES -> "virtual_resources" { + + sis_account_virtual_resources + - sis_account_virtual_resources_undo + } + // TODO(#3879) This can be replaced with an action that triggers the NAT RPW // once such an RPW is available. DPD_ENSURE -> "dpd_ensure" { @@ -101,9 +101,9 @@ impl NexusSaga for SagaInstanceStart { builder.append(alloc_server_action()); builder.append(alloc_propolis_ip_action()); - builder.append(add_virtual_resources_action()); builder.append(create_vmm_record_action()); builder.append(mark_as_starting_action()); + builder.append(add_virtual_resources_action()); builder.append(dpd_ensure_action()); builder.append(v2p_ensure_action()); builder.append(ensure_registered_action()); @@ -155,56 +155,6 @@ async fn sis_alloc_propolis_ip( allocate_sled_ipv6(&opctx, sagactx.user_data().datastore(), sled_uuid).await } -async fn sis_account_virtual_resources( - sagactx: NexusActionContext, -) -> Result<(), ActionError> { - let osagactx = sagactx.user_data(); - let params = sagactx.saga_params::()?; - let instance_id = params.db_instance.id(); - - let opctx = crate::context::op_context_for_saga_action( - &sagactx, - ¶ms.serialized_authn, - ); - osagactx - .datastore() - .virtual_provisioning_collection_insert_instance( - &opctx, - instance_id, - params.db_instance.project_id, - i64::from(params.db_instance.ncpus.0 .0), - nexus_db_model::ByteCount(*params.db_instance.memory), - ) - .await - .map_err(ActionError::action_failed)?; - Ok(()) -} - -async fn sis_account_virtual_resources_undo( - sagactx: NexusActionContext, -) -> Result<(), anyhow::Error> { - let osagactx = sagactx.user_data(); - let params = sagactx.saga_params::()?; - let instance_id = params.db_instance.id(); - - let opctx = crate::context::op_context_for_saga_action( - &sagactx, - ¶ms.serialized_authn, - ); - osagactx - .datastore() - .virtual_provisioning_collection_delete_instance( - &opctx, - instance_id, - params.db_instance.project_id, - i64::from(params.db_instance.ncpus.0 .0), - nexus_db_model::ByteCount(*params.db_instance.memory), - ) - .await - .map_err(ActionError::action_failed)?; - Ok(()) -} - async fn sis_create_vmm_record( sagactx: NexusActionContext, ) -> Result { @@ -361,6 +311,66 @@ async fn sis_move_to_starting_undo( Ok(()) } +async fn sis_account_virtual_resources( + sagactx: NexusActionContext, +) -> Result<(), ActionError> { + let osagactx = sagactx.user_data(); + let params = sagactx.saga_params::()?; + let instance_id = params.db_instance.id(); + + let opctx = crate::context::op_context_for_saga_action( + &sagactx, + ¶ms.serialized_authn, + ); + + osagactx + .datastore() + .virtual_provisioning_collection_insert_instance( + &opctx, + instance_id, + params.db_instance.project_id, + i64::from(params.db_instance.ncpus.0 .0), + nexus_db_model::ByteCount(*params.db_instance.memory), + ) + .await + .map_err(ActionError::action_failed)?; + Ok(()) +} + +async fn sis_account_virtual_resources_undo( + sagactx: NexusActionContext, +) -> Result<(), anyhow::Error> { + let osagactx = sagactx.user_data(); + let params = sagactx.saga_params::()?; + let instance_id = params.db_instance.id(); + + let opctx = crate::context::op_context_for_saga_action( + &sagactx, + ¶ms.serialized_authn, + ); + + let started_record = + sagactx.lookup::("started_record")?; + + osagactx + .datastore() + .virtual_provisioning_collection_delete_instance( + &opctx, + instance_id, + params.db_instance.project_id, + i64::from(params.db_instance.ncpus.0 .0), + nexus_db_model::ByteCount(*params.db_instance.memory), + // Use the next instance generation number as the generation limit + // to ensure the provisioning counters are released. (The "mark as + // starting" undo step will "publish" this new state generation when + // it moves the instance back to Stopped.) + (&started_record.runtime().gen.next()).into(), + ) + .await + .map_err(ActionError::action_failed)?; + Ok(()) +} + async fn sis_dpd_ensure( sagactx: NexusActionContext, ) -> Result<(), ActionError> {