diff --git a/.github/workflows/hakari.yml b/.github/workflows/hakari.yml index 5c861c56bf..236b9b5023 100644 --- a/.github/workflows/hakari.yml +++ b/.github/workflows/hakari.yml @@ -24,7 +24,7 @@ jobs: with: toolchain: stable - name: Install cargo-hakari - uses: taiki-e/install-action@0fc560009ad92371154ca652dcf2620d19331eee # v2 + uses: taiki-e/install-action@7491b900536dd0dae2e47ce7c17f140e46328dc4 # v2 with: tool: cargo-hakari - name: Check workspace-hack Cargo.toml is up-to-date diff --git a/dev-tools/omdb/src/bin/omdb/nexus.rs b/dev-tools/omdb/src/bin/omdb/nexus.rs index 22fe1894cf..09ae82b5d9 100644 --- a/dev-tools/omdb/src/bin/omdb/nexus.rs +++ b/dev-tools/omdb/src/bin/omdb/nexus.rs @@ -996,6 +996,59 @@ fn print_task_details(bgtask: &BackgroundTask, details: &serde_json::Value) { eprintln!(" unexpected return value from task: {:?}", val) } }; + } else if name == "abandoned_vmm_reaper" { + #[derive(Deserialize)] + struct TaskSuccess { + /// total number of abandoned VMMs found + found: usize, + + /// number of abandoned VMM records that were deleted + vmms_deleted: usize, + + /// number of abandoned VMM records that were already deleted when + /// we tried to delete them. + vmms_already_deleted: usize, + + /// sled resource reservations that were released + sled_reservations_deleted: usize, + + /// number of errors that occurred during the activation + error_count: usize, + + /// the last error that occurred during execution. + error: Option, + } + match serde_json::from_value::(details.clone()) { + Err(error) => eprintln!( + "warning: failed to interpret task details: {:?}: {:?}", + error, details + ), + Ok(TaskSuccess { + found, + vmms_deleted, + vmms_already_deleted, + sled_reservations_deleted, + error_count, + error, + }) => { + if let Some(error) = error { + println!(" task did not complete successfully!"); + println!(" total errors: {error_count}"); + println!(" most recent error: {error}"); + } + + println!(" total abandoned VMMs found: {found}"); + println!(" VMM records deleted: {vmms_deleted}"); + println!( + " VMM records already deleted by another Nexus: {}", + vmms_already_deleted, + ); + println!( + " sled resource reservations deleted: {}", + sled_reservations_deleted, + ); + } + }; } else { println!( "warning: unknown background task: {:?} \ diff --git a/dev-tools/omdb/tests/env.out b/dev-tools/omdb/tests/env.out index d187c47d18..ccb824cda4 100644 --- a/dev-tools/omdb/tests/env.out +++ b/dev-tools/omdb/tests/env.out @@ -25,6 +25,11 @@ EXECUTING COMMAND: omdb ["nexus", "--nexus-internal-url", "http://127.0.0.1:REDA termination: Exited(0) --------------------------------------------- stdout: +task: "abandoned_vmm_reaper" + deletes sled reservations for VMMs that have been abandoned by their + instances + + task: "bfd_manager" Manages bidirectional fowarding detection (BFD) configuration on rack switches @@ -140,6 +145,11 @@ EXECUTING COMMAND: omdb ["nexus", "background-tasks", "doc"] termination: Exited(0) --------------------------------------------- stdout: +task: "abandoned_vmm_reaper" + deletes sled reservations for VMMs that have been abandoned by their + instances + + task: "bfd_manager" Manages bidirectional fowarding detection (BFD) configuration on rack switches @@ -242,6 +252,11 @@ EXECUTING COMMAND: omdb ["--dns-server", "[::1]:REDACTED_PORT", "nexus", "backgr termination: Exited(0) --------------------------------------------- stdout: +task: "abandoned_vmm_reaper" + deletes sled reservations for VMMs that have been abandoned by their + instances + + task: "bfd_manager" Manages bidirectional fowarding detection (BFD) configuration on rack switches diff --git a/dev-tools/omdb/tests/successes.out b/dev-tools/omdb/tests/successes.out index 8f7dbb4c1b..07ebeb10bf 100644 --- a/dev-tools/omdb/tests/successes.out +++ b/dev-tools/omdb/tests/successes.out @@ -202,6 +202,11 @@ EXECUTING COMMAND: omdb ["nexus", "background-tasks", "doc"] termination: Exited(0) --------------------------------------------- stdout: +task: "abandoned_vmm_reaper" + deletes sled reservations for VMMs that have been abandoned by their + instances + + task: "bfd_manager" Manages bidirectional fowarding detection (BFD) configuration on rack switches @@ -380,6 +385,16 @@ task: "blueprint_executor" started at (s ago) and ran for ms last completion reported error: no blueprint +task: "abandoned_vmm_reaper" + configured period: every 1m + currently executing: no + last completed activation: , triggered by an explicit signal + started at (s ago) and ran for ms + total abandoned VMMs found: 0 + VMM records deleted: 0 + VMM records already deleted by another Nexus: 0 + sled resource reservations deleted: 0 + task: "bfd_manager" configured period: every 30s currently executing: no diff --git a/nexus-config/src/nexus_config.rs b/nexus-config/src/nexus_config.rs index 08517026ef..321064df49 100644 --- a/nexus-config/src/nexus_config.rs +++ b/nexus-config/src/nexus_config.rs @@ -381,6 +381,8 @@ pub struct BackgroundTaskConfig { pub service_firewall_propagation: ServiceFirewallPropagationConfig, /// configuration for v2p mapping propagation task pub v2p_mapping_propagation: V2PMappingPropagationConfig, + /// configuration for abandoned VMM reaper task + pub abandoned_vmm_reaper: AbandonedVmmReaperConfig, } #[serde_as] @@ -549,6 +551,14 @@ pub struct V2PMappingPropagationConfig { pub period_secs: Duration, } +#[serde_as] +#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)] +pub struct AbandonedVmmReaperConfig { + /// period (in seconds) for periodic activations of this background task + #[serde_as(as = "DurationSeconds")] + pub period_secs: Duration, +} + /// Configuration for a nexus server #[derive(Clone, Debug, Deserialize, PartialEq, Serialize)] pub struct PackageConfig { @@ -788,6 +798,7 @@ mod test { instance_watcher.period_secs = 30 service_firewall_propagation.period_secs = 300 v2p_mapping_propagation.period_secs = 30 + abandoned_vmm_reaper.period_secs = 60 [default_region_allocation_strategy] type = "random" seed = 0 @@ -926,6 +937,9 @@ mod test { v2p_mapping_propagation: V2PMappingPropagationConfig { period_secs: Duration::from_secs(30) }, + abandoned_vmm_reaper: AbandonedVmmReaperConfig { + period_secs: Duration::from_secs(60), + } }, default_region_allocation_strategy: crate::nexus_config::RegionAllocationStrategy::Random { @@ -995,6 +1009,7 @@ mod test { instance_watcher.period_secs = 30 service_firewall_propagation.period_secs = 300 v2p_mapping_propagation.period_secs = 30 + abandoned_vmm_reaper.period_secs = 60 [default_region_allocation_strategy] type = "random" "##, diff --git a/nexus/db-queries/src/db/datastore/vmm.rs b/nexus/db-queries/src/db/datastore/vmm.rs index a837d1289b..b8fb47de26 100644 --- a/nexus/db-queries/src/db/datastore/vmm.rs +++ b/nexus/db-queries/src/db/datastore/vmm.rs @@ -9,8 +9,10 @@ use crate::authz; use crate::context::OpContext; use crate::db::error::public_error_from_diesel; use crate::db::error::ErrorHandler; +use crate::db::model::InstanceState as DbInstanceState; use crate::db::model::Vmm; use crate::db::model::VmmRuntimeState; +use crate::db::pagination::paginated; use crate::db::schema::vmm::dsl; use crate::db::update_and_check::UpdateAndCheck; use crate::db::update_and_check::UpdateStatus; @@ -18,7 +20,10 @@ use async_bb8_diesel::AsyncRunQueryDsl; use chrono::Utc; use diesel::prelude::*; use omicron_common::api::external::CreateResult; +use omicron_common::api::external::DataPageParams; use omicron_common::api::external::Error; +use omicron_common::api::external::InstanceState as ApiInstanceState; +use omicron_common::api::external::ListResultVec; use omicron_common::api::external::LookupResult; use omicron_common::api::external::LookupType; use omicron_common::api::external::ResourceType; @@ -50,9 +55,6 @@ impl DataStore { opctx: &OpContext, vmm_id: &Uuid, ) -> UpdateResult { - use crate::db::model::InstanceState as DbInstanceState; - use omicron_common::api::external::InstanceState as ApiInstanceState; - let valid_states = vec![ DbInstanceState::new(ApiInstanceState::Destroyed), DbInstanceState::new(ApiInstanceState::Failed), @@ -61,9 +63,15 @@ impl DataStore { let updated = diesel::update(dsl::vmm) .filter(dsl::id.eq(*vmm_id)) .filter(dsl::state.eq_any(valid_states)) + .filter(dsl::time_deleted.is_null()) .set(dsl::time_deleted.eq(Utc::now())) - .execute_async(&*self.pool_connection_authorized(opctx).await?) + .check_if_exists::(*vmm_id) + .execute_and_check(&*self.pool_connection_authorized(opctx).await?) .await + .map(|r| match r.status { + UpdateStatus::Updated => true, + UpdateStatus::NotUpdatedButExists => false, + }) .map_err(|e| { public_error_from_diesel( e, @@ -74,7 +82,7 @@ impl DataStore { ) })?; - Ok(updated != 0) + Ok(updated) } pub async fn vmm_fetch( @@ -164,4 +172,63 @@ impl DataStore { Ok(vmm) } + + /// Lists VMMs which have been abandoned by their instances after a + /// migration and are in need of cleanup. + /// + /// A VMM is considered "abandoned" if (and only if): + /// + /// - It is in the `Destroyed` state. + /// - It is not currently running an instance, and it is also not the + /// migration target of any instance (i.e. it is not pointed to by + /// any instance record's `active_propolis_id` and `target_propolis_id` + /// fields). + /// - It has not been deleted yet. + pub async fn vmm_list_abandoned( + &self, + opctx: &OpContext, + pagparams: &DataPageParams<'_, Uuid>, + ) -> ListResultVec { + use crate::db::schema::instance::dsl as instance_dsl; + let destroyed = DbInstanceState::new(ApiInstanceState::Destroyed); + paginated(dsl::vmm, dsl::id, pagparams) + // In order to be considered "abandoned", a VMM must be: + // - in the `Destroyed` state + .filter(dsl::state.eq(destroyed)) + // - not deleted yet + .filter(dsl::time_deleted.is_null()) + // - not pointed to by any instance's `active_propolis_id` or + // `target_propolis_id`. + // + .left_join( + // Left join with the `instance` table on the VMM's instance ID, so + // that we can check if the instance pointed to by this VMM (if + // any exists) has this VMM pointed to by its + // `active_propolis_id` or `target_propolis_id` fields. + instance_dsl::instance + .on(instance_dsl::id.eq(dsl::instance_id)), + ) + .filter( + dsl::id + .nullable() + .ne(instance_dsl::active_propolis_id) + // In SQL, *all* comparisons with NULL are `false`, even `!= + // NULL`, so we have to explicitly check for nulls here, or + // else VMMs whose instances have no `active_propolis_id` + // will not be considered abandoned (incorrectly). + .or(instance_dsl::active_propolis_id.is_null()), + ) + .filter( + dsl::id + .nullable() + .ne(instance_dsl::target_propolis_id) + // As above, we must add this clause because SQL nulls have + // the most irritating behavior possible. + .or(instance_dsl::target_propolis_id.is_null()), + ) + .select(Vmm::as_select()) + .load_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + } } diff --git a/nexus/examples/config.toml b/nexus/examples/config.toml index cba2edb7e6..d90c240e8e 100644 --- a/nexus/examples/config.toml +++ b/nexus/examples/config.toml @@ -117,6 +117,7 @@ region_replacement.period_secs = 30 instance_watcher.period_secs = 30 service_firewall_propagation.period_secs = 300 v2p_mapping_propagation.period_secs = 30 +abandoned_vmm_reaper.period_secs = 60 [default_region_allocation_strategy] # allocate region on 3 random distinct zpools, on 3 random distinct sleds. diff --git a/nexus/src/app/background/abandoned_vmm_reaper.rs b/nexus/src/app/background/abandoned_vmm_reaper.rs new file mode 100644 index 0000000000..b24c543575 --- /dev/null +++ b/nexus/src/app/background/abandoned_vmm_reaper.rs @@ -0,0 +1,467 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Ensures abandoned VMMs are fully destroyed. +//! +//! A VMM is considered "abandoned" if (and only if): +//! +//! - It is in the `Destroyed` state. +//! - It is not currently running an instance, and it is also not the +//! migration target of any instance (i.e. it is not pointed to by +//! any instance record's `active_propolis_id` and `target_propolis_id` +//! fields). +//! - It has not been deleted yet. +//! +//! VMMs are abandoned when the instance they are responsible for migrates. +//! Should the migration succeed, the previously occupied VMM process is now +//! abandoned. If a migration is attempted but fails, the *target* VMM is now +//! abandoned, as the instance remains on the source VMM. +//! +//! Such VMMs may be deleted fairly simply: any sled resources reserved for the +//! VMM process can be deallocated, and the VMM record in the database is then +//! marked as deleted. Note that reaping abandoned VMMs does not require +//! deallocating virtual provisioning resources, NAT entries, and other such +//! resources which are owned by the *instance*, rather than the VMM process; +//! this task is only responsible for cleaning up VMMs left behind by an +//! instance that has moved to *another* VMM process. The instance itself +//! remains alive and continues to own its virtual provisioning resources. +//! +//! Cleanup of instance resources when an instance's *active* VMM is destroyed +//! is handled elsewhere, by `notify_instance_updated` and (eventually) the +//! `instance-update` saga. + +use super::common::BackgroundTask; +use anyhow::Context; +use futures::future::BoxFuture; +use futures::FutureExt; +use nexus_db_model::Vmm; +use nexus_db_queries::context::OpContext; +use nexus_db_queries::db::pagination::Paginator; +use nexus_db_queries::db::DataStore; +use std::num::NonZeroU32; +use std::sync::Arc; + +/// Background task that searches for abandoned VMM records and deletes them. +pub struct AbandonedVmmReaper { + datastore: Arc, +} + +#[derive(Debug, Default)] +struct ActivationResults { + found: usize, + sled_reservations_deleted: usize, + vmms_deleted: usize, + vmms_already_deleted: usize, + error_count: usize, +} + +const MAX_BATCH: NonZeroU32 = unsafe { + // Safety: last time I checked, 100 was greater than zero. + NonZeroU32::new_unchecked(100) +}; + +impl AbandonedVmmReaper { + pub fn new(datastore: Arc) -> Self { + Self { datastore } + } + + /// List abandoned VMMs and clean up all of their database records. + async fn reap_all( + &mut self, + results: &mut ActivationResults, + opctx: &OpContext, + ) -> Result<(), anyhow::Error> { + slog::info!(opctx.log, "Abandoned VMM reaper running"); + + let mut paginator = Paginator::new(MAX_BATCH); + let mut last_err = Ok(()); + while let Some(p) = paginator.next() { + let vmms = self + .datastore + .vmm_list_abandoned(opctx, &p.current_pagparams()) + .await + .context("failed to list abandoned VMMs")?; + paginator = p.found_batch(&vmms, &|vmm| vmm.id); + self.reap_batch(results, &mut last_err, opctx, &vmms).await; + } + + last_err + } + + /// Clean up a batch of abandoned VMMs. + /// + /// This is separated out from `reap_all` to facilitate testing situations + /// where we race with another Nexus instance to delete an abandoned VMM. In + /// order to deterministically simulate such cases, we have to perform the + /// query to list abandoned VMMs, ensure that the VMM record is deleted, and + /// *then* perform the cleanup with the stale list of abandoned VMMs, rather + /// than doing it all in one go. Thus, this is factored out. + async fn reap_batch( + &mut self, + results: &mut ActivationResults, + last_err: &mut Result<(), anyhow::Error>, + opctx: &OpContext, + vmms: &[Vmm], + ) { + results.found += vmms.len(); + slog::debug!(opctx.log, "Found abandoned VMMs"; "count" => vmms.len()); + + for vmm in vmms { + let vmm_id = vmm.id; + slog::trace!(opctx.log, "Deleting abandoned VMM"; "vmm" => %vmm_id); + // Attempt to remove the abandoned VMM's sled resource reservation. + match self.datastore.sled_reservation_delete(opctx, vmm_id).await { + Ok(_) => { + slog::trace!( + opctx.log, + "Deleted abandoned VMM's sled reservation"; + "vmm" => %vmm_id, + ); + results.sled_reservations_deleted += 1; + } + Err(e) => { + slog::warn!( + opctx.log, + "Failed to delete sled reservation for abandoned VMM"; + "vmm" => %vmm_id, + "error" => %e, + ); + results.error_count += 1; + *last_err = Err(e).with_context(|| { + format!( + "failed to delete sled reservation for VMM {vmm_id}" + ) + }); + } + } + + // Now, attempt to mark the VMM record as deleted. + match self.datastore.vmm_mark_deleted(opctx, &vmm_id).await { + Ok(true) => { + slog::trace!( + opctx.log, + "Deleted abandoned VMM"; + "vmm" => %vmm_id, + ); + results.vmms_deleted += 1; + } + Ok(false) => { + slog::trace!( + opctx.log, + "Abandoned VMM was already deleted"; + "vmm" => %vmm_id, + ); + results.vmms_already_deleted += 1; + } + Err(e) => { + slog::warn!( + opctx.log, + "Failed to mark abandoned VMM as deleted"; + "vmm" => %vmm_id, + "error" => %e, + ); + results.error_count += 1; + *last_err = Err(e).with_context(|| { + format!("failed to mark VMM {vmm_id} as deleted") + }); + } + } + } + } +} + +impl BackgroundTask for AbandonedVmmReaper { + fn activate<'a>( + &'a mut self, + opctx: &'a OpContext, + ) -> BoxFuture<'a, serde_json::Value> { + async move { + let mut results = ActivationResults::default(); + let error = match self.reap_all(&mut results, opctx).await { + Ok(_) => { + slog::info!(opctx.log, "Abandoned VMMs reaped"; + "found" => results.found, + "sled_reservations_deleted" => results.sled_reservations_deleted, + "vmms_deleted" => results.vmms_deleted, + "vmms_already_deleted" => results.vmms_already_deleted, + ); + None + } + Err(err) => { + slog::error!(opctx.log, "Abandoned VMM reaper activation failed"; + "error" => %err, + "found" => results.found, + "sled_reservations_deleted" => results.sled_reservations_deleted, + "vmms_deleted" => results.vmms_deleted, + "vmms_already_deleted" => results.vmms_already_deleted, + ); + Some(err.to_string()) + } + }; + serde_json::json!({ + "found": results.found, + "vmms_deleted": results.vmms_deleted, + "vmms_already_deleted": results.vmms_already_deleted, + "sled_reservations_deleted": results.sled_reservations_deleted, + "error_count": results.error_count, + "error": error, + }) + } + .boxed() + } +} + +#[cfg(test)] +mod tests { + + use super::*; + use chrono::Utc; + use nexus_db_model::ByteCount; + use nexus_db_model::Generation; + use nexus_db_model::InstanceState; + use nexus_db_model::Resources; + use nexus_db_model::SledResource; + use nexus_db_model::SledResourceKind; + use nexus_db_model::Vmm; + use nexus_db_model::VmmRuntimeState; + use nexus_test_utils::resource_helpers; + use nexus_test_utils_macros::nexus_test; + use omicron_common::api::external::InstanceState as ApiInstanceState; + use uuid::Uuid; + + type ControlPlaneTestContext = + nexus_test_utils::ControlPlaneTestContext; + + const PROJECT_NAME: &str = "carcosa"; + + struct TestFixture { + destroyed_vmm_id: Uuid, + } + + impl TestFixture { + async fn setup( + client: &dropshot::test_util::ClientTestContext, + datastore: &Arc, + opctx: &OpContext, + ) -> Self { + resource_helpers::create_default_ip_pool(&client).await; + + let _project = + resource_helpers::create_project(client, PROJECT_NAME).await; + let instance = resource_helpers::create_instance( + client, + PROJECT_NAME, + "cassilda", + ) + .await; + + let destroyed_vmm_id = Uuid::new_v4(); + datastore + .vmm_insert( + &opctx, + dbg!(Vmm { + id: destroyed_vmm_id, + time_created: Utc::now(), + time_deleted: None, + instance_id: instance.identity.id, + sled_id: Uuid::new_v4(), + propolis_ip: "::1".parse().unwrap(), + propolis_port: 12345.into(), + runtime: VmmRuntimeState { + state: InstanceState::new( + ApiInstanceState::Destroyed + ), + time_state_updated: Utc::now(), + gen: Generation::new(), + } + }), + ) + .await + .expect("destroyed vmm record should be created successfully"); + let resources = Resources::new( + 1, + // Just require the bare non-zero amount of RAM. + ByteCount::try_from(1024).unwrap(), + ByteCount::try_from(1024).unwrap(), + ); + let constraints = + nexus_db_model::SledReservationConstraints::none(); + dbg!(datastore + .sled_reservation_create( + &opctx, + destroyed_vmm_id, + SledResourceKind::Instance, + resources.clone(), + constraints, + ) + .await + .expect("sled reservation should be created successfully")); + Self { destroyed_vmm_id } + } + + async fn assert_reaped(&self, datastore: &DataStore) { + use async_bb8_diesel::AsyncRunQueryDsl; + use diesel::{ + ExpressionMethods, OptionalExtension, QueryDsl, + SelectableHelper, + }; + use nexus_db_queries::db::schema::sled_resource::dsl as sled_resource_dsl; + use nexus_db_queries::db::schema::vmm::dsl as vmm_dsl; + + let conn = datastore.pool_connection_for_tests().await.unwrap(); + let fetched_vmm = vmm_dsl::vmm + .filter(vmm_dsl::id.eq(self.destroyed_vmm_id)) + .filter(vmm_dsl::time_deleted.is_null()) + .select(Vmm::as_select()) + .first_async::(&*conn) + .await + .optional() + .expect("VMM query should succeed"); + assert!( + dbg!(fetched_vmm).is_none(), + "VMM record should have been deleted" + ); + + let fetched_sled_resource = sled_resource_dsl::sled_resource + .filter(sled_resource_dsl::id.eq(self.destroyed_vmm_id)) + .select(SledResource::as_select()) + .first_async::(&*conn) + .await + .optional() + .expect("sled resource query should succeed"); + assert!( + dbg!(fetched_sled_resource).is_none(), + "sled resource record should have been deleted" + ); + } + } + + #[nexus_test(server = crate::Server)] + async fn test_abandoned_vmms_are_reaped( + cptestctx: &ControlPlaneTestContext, + ) { + let nexus = &cptestctx.server.server_context().nexus; + let datastore = nexus.datastore(); + let opctx = OpContext::for_tests( + cptestctx.logctx.log.clone(), + datastore.clone(), + ); + let fixture = + TestFixture::setup(&cptestctx.external_client, datastore, &opctx) + .await; + + let mut task = AbandonedVmmReaper::new(datastore.clone()); + + let mut results = ActivationResults::default(); + dbg!(task.reap_all(&mut results, &opctx,).await) + .expect("activation completes successfully"); + dbg!(&results); + + assert_eq!(results.vmms_deleted, 1); + assert_eq!(results.sled_reservations_deleted, 1); + assert_eq!(results.vmms_already_deleted, 0); + assert_eq!(results.error_count, 0); + fixture.assert_reaped(datastore).await; + } + + #[nexus_test(server = crate::Server)] + async fn vmm_already_deleted(cptestctx: &ControlPlaneTestContext) { + let nexus = &cptestctx.server.server_context().nexus; + let datastore = nexus.datastore(); + let opctx = OpContext::for_tests( + cptestctx.logctx.log.clone(), + datastore.clone(), + ); + let fixture = + TestFixture::setup(&cptestctx.external_client, datastore, &opctx) + .await; + + // For this test, we separate the database query run by the background + // task to list abandoned VMMs from the actual cleanup of those VMMs, in + // order to simulate a condition where the VMM record was deleted + // between when the listing query was run and when the bg task attempted + // to delete the VMM record. + let paginator = Paginator::new(MAX_BATCH); + let p = paginator.next().unwrap(); + let abandoned_vmms = datastore + .vmm_list_abandoned(&opctx, &p.current_pagparams()) + .await + .expect("must list abandoned vmms"); + + assert!(!abandoned_vmms.is_empty()); + + datastore + .vmm_mark_deleted(&opctx, &fixture.destroyed_vmm_id) + .await + .expect("simulate another nexus marking the VMM deleted"); + + let mut results = ActivationResults::default(); + let mut last_err = Ok(()); + let mut task = AbandonedVmmReaper::new(datastore.clone()); + task.reap_batch(&mut results, &mut last_err, &opctx, &abandoned_vmms) + .await; + dbg!(last_err).expect("should not have errored"); + dbg!(&results); + + assert_eq!(results.found, 1); + assert_eq!(results.vmms_deleted, 0); + assert_eq!(results.sled_reservations_deleted, 1); + assert_eq!(results.vmms_already_deleted, 1); + assert_eq!(results.error_count, 0); + + fixture.assert_reaped(datastore).await + } + + #[nexus_test(server = crate::Server)] + async fn sled_resource_already_deleted( + cptestctx: &ControlPlaneTestContext, + ) { + let nexus = &cptestctx.server.server_context().nexus; + let datastore = nexus.datastore(); + let opctx = OpContext::for_tests( + cptestctx.logctx.log.clone(), + datastore.clone(), + ); + let fixture = + TestFixture::setup(&cptestctx.external_client, datastore, &opctx) + .await; + + // For this test, we separate the database query run by the background + // task to list abandoned VMMs from the actual cleanup of those VMMs, in + // order to simulate a condition where the sled reservation record was + // deleted between when the listing query was run and when the bg task + // attempted to delete the sled reservation.. + let paginator = Paginator::new(MAX_BATCH); + let p = paginator.next().unwrap(); + let abandoned_vmms = datastore + .vmm_list_abandoned(&opctx, &p.current_pagparams()) + .await + .expect("must list abandoned vmms"); + + assert!(!abandoned_vmms.is_empty()); + + datastore + .sled_reservation_delete(&opctx, fixture.destroyed_vmm_id) + .await + .expect( + "simulate another nexus marking the sled reservation deleted", + ); + + let mut results = ActivationResults::default(); + let mut last_err = Ok(()); + let mut task = AbandonedVmmReaper::new(datastore.clone()); + task.reap_batch(&mut results, &mut last_err, &opctx, &abandoned_vmms) + .await; + dbg!(last_err).expect("should not have errored"); + dbg!(&results); + + assert_eq!(results.found, 1); + assert_eq!(results.vmms_deleted, 1); + assert_eq!(results.sled_reservations_deleted, 1); + assert_eq!(results.vmms_already_deleted, 0); + assert_eq!(results.error_count, 0); + + fixture.assert_reaped(datastore).await + } +} diff --git a/nexus/src/app/background/init.rs b/nexus/src/app/background/init.rs index f7b7291c59..a87c53860d 100644 --- a/nexus/src/app/background/init.rs +++ b/nexus/src/app/background/init.rs @@ -4,6 +4,7 @@ //! Background task initialization +use super::abandoned_vmm_reaper; use super::bfd; use super::blueprint_execution; use super::blueprint_load; @@ -104,6 +105,10 @@ pub struct BackgroundTasks { /// task handle for propagation of VPC firewall rules for Omicron services /// with external network connectivity, pub task_service_firewall_propagation: common::TaskHandle, + + /// task handle for deletion of database records for VMMs abandoned by their + /// instances. + pub task_abandoned_vmm_reaper: common::TaskHandle, } impl BackgroundTasks { @@ -397,12 +402,26 @@ impl BackgroundTasks { ), config.service_firewall_propagation.period_secs, Box::new(service_firewall_rules::ServiceRulePropagator::new( - datastore, + datastore.clone(), )), opctx.child(BTreeMap::new()), vec![], ); + // Background task: abandoned VMM reaping + let task_abandoned_vmm_reaper = driver.register( + String::from("abandoned_vmm_reaper"), + String::from( + "deletes sled reservations for VMMs that have been abandoned by their instances", + ), + config.abandoned_vmm_reaper.period_secs, + Box::new(abandoned_vmm_reaper::AbandonedVmmReaper::new( + datastore, + )), + opctx.child(BTreeMap::new()), + vec![], + ); + BackgroundTasks { driver, task_internal_dns_config, @@ -425,6 +444,7 @@ impl BackgroundTasks { task_region_replacement, task_instance_watcher, task_service_firewall_propagation, + task_abandoned_vmm_reaper, } } diff --git a/nexus/src/app/background/mod.rs b/nexus/src/app/background/mod.rs index 38bde3c048..6de9e6f4d3 100644 --- a/nexus/src/app/background/mod.rs +++ b/nexus/src/app/background/mod.rs @@ -4,6 +4,7 @@ //! Background tasks +mod abandoned_vmm_reaper; mod bfd; mod blueprint_execution; mod blueprint_load; diff --git a/nexus/tests/config.test.toml b/nexus/tests/config.test.toml index 49a61cfa36..861d78e20c 100644 --- a/nexus/tests/config.test.toml +++ b/nexus/tests/config.test.toml @@ -112,6 +112,7 @@ region_replacement.period_secs = 30 instance_watcher.period_secs = 30 service_firewall_propagation.period_secs = 300 v2p_mapping_propagation.period_secs = 30 +abandoned_vmm_reaper.period_secs = 60 [default_region_allocation_strategy] # we only have one sled in the test environment, so we need to use the diff --git a/smf/nexus/multi-sled/config-partial.toml b/smf/nexus/multi-sled/config-partial.toml index 0ed7a0562b..3827cbb38c 100644 --- a/smf/nexus/multi-sled/config-partial.toml +++ b/smf/nexus/multi-sled/config-partial.toml @@ -58,6 +58,7 @@ region_replacement.period_secs = 30 service_firewall_propagation.period_secs = 300 v2p_mapping_propagation.period_secs = 30 instance_watcher.period_secs = 30 +abandoned_vmm_reaper.period_secs = 60 [default_region_allocation_strategy] # by default, allocate across 3 distinct sleds diff --git a/smf/nexus/single-sled/config-partial.toml b/smf/nexus/single-sled/config-partial.toml index c57d2d3ba2..ee04f88e59 100644 --- a/smf/nexus/single-sled/config-partial.toml +++ b/smf/nexus/single-sled/config-partial.toml @@ -58,6 +58,7 @@ region_replacement.period_secs = 30 service_firewall_propagation.period_secs = 300 v2p_mapping_propagation.period_secs = 30 instance_watcher.period_secs = 30 +abandoned_vmm_reaper.period_secs = 60 [default_region_allocation_strategy] # by default, allocate without requirement for distinct sleds. diff --git a/tools/permslip_staging b/tools/permslip_staging index 20a362ade0..d6ca94b280 100644 --- a/tools/permslip_staging +++ b/tools/permslip_staging @@ -1,4 +1,4 @@ 03df89d44ad8b653abbeb7fbb83821869f008733e9da946457e72a13cb11d6cc manifest-gimlet-v1.0.19.toml b973cc9feb20f7bba447e7f5291c4070387fa9992deab81301f67f0a3844cd0c manifest-oxide-rot-1-v1.0.11.toml -aae829e02d79ec0fe19019c783b6426c6fcc1fe4427aea70b65afc2884f53db8 manifest-psc-v1.0.17.toml +f5118f97d92cd0d56566114ca50ed149b7d5e71c452cabb33367651d86975471 manifest-psc-v1.0.18.toml ae00003c288ec4f520167c68de4999e1dfa15b63afe2f89e5ed1cfb8d707ebb9 manifest-sidecar-v1.0.19.toml