diff --git a/Cargo.lock b/Cargo.lock index 2bcc144406..ee0804559a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6609,7 +6609,7 @@ dependencies = [ "indoc 2.0.3", "itertools 0.11.0", "paste", - "strum 0.25.0", + "strum", "unicode-segmentation", "unicode-width", ] diff --git a/nexus/db-model/src/region_snapshot.rs b/nexus/db-model/src/region_snapshot.rs index 9addeb83e3..af1cf8b2b3 100644 --- a/nexus/db-model/src/region_snapshot.rs +++ b/nexus/db-model/src/region_snapshot.rs @@ -32,4 +32,7 @@ pub struct RegionSnapshot { // how many volumes reference this? pub volume_references: i64, + + // true if part of a volume's `resources_to_clean_up` already + pub deleting: bool, } diff --git a/nexus/db-model/src/schema.rs b/nexus/db-model/src/schema.rs index 3fde9ee715..befde1bf4a 100644 --- a/nexus/db-model/src/schema.rs +++ b/nexus/db-model/src/schema.rs @@ -856,6 +856,7 @@ table! { snapshot_id -> Uuid, snapshot_addr -> Text, volume_references -> Int8, + deleting -> Bool, } } @@ -1130,7 +1131,7 @@ table! { /// /// This should be updated whenever the schema is changed. For more details, /// refer to: schema/crdb/README.adoc -pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(4, 0, 0); +pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(5, 0, 0); allow_tables_to_appear_in_same_query!( system_update, diff --git a/nexus/db-queries/src/db/datastore/region_snapshot.rs b/nexus/db-queries/src/db/datastore/region_snapshot.rs index dab3a90bcb..7100820c23 100644 --- a/nexus/db-queries/src/db/datastore/region_snapshot.rs +++ b/nexus/db-queries/src/db/datastore/region_snapshot.rs @@ -10,9 +10,11 @@ use crate::db::error::public_error_from_diesel_pool; use crate::db::error::ErrorHandler; use crate::db::model::RegionSnapshot; use async_bb8_diesel::AsyncRunQueryDsl; +use async_bb8_diesel::OptionalExtension; use diesel::prelude::*; use omicron_common::api::external::CreateResult; use omicron_common::api::external::DeleteResult; +use omicron_common::api::external::LookupResult; use uuid::Uuid; impl DataStore { @@ -31,6 +33,25 @@ impl DataStore { .map_err(|e| public_error_from_diesel_pool(e, ErrorHandler::Server)) } + pub async fn region_snapshot_get( + &self, + dataset_id: Uuid, + region_id: Uuid, + snapshot_id: Uuid, + ) -> LookupResult> { + use db::schema::region_snapshot::dsl; + + dsl::region_snapshot + .filter(dsl::dataset_id.eq(dataset_id)) + .filter(dsl::region_id.eq(region_id)) + .filter(dsl::snapshot_id.eq(snapshot_id)) + .select(RegionSnapshot::as_select()) + .first_async::(self.pool()) + .await + .optional() + .map_err(|e| public_error_from_diesel_pool(e, ErrorHandler::Server)) + } + pub async fn region_snapshot_remove( &self, dataset_id: Uuid, diff --git a/nexus/db-queries/src/db/datastore/volume.rs b/nexus/db-queries/src/db/datastore/volume.rs index 901cf16f63..2551e8e76e 100644 --- a/nexus/db-queries/src/db/datastore/volume.rs +++ b/nexus/db-queries/src/db/datastore/volume.rs @@ -120,6 +120,7 @@ impl DataStore { .filter( rs_dsl::snapshot_addr.eq(read_only_target.clone()), ) + .filter(rs_dsl::deleting.eq(false)) .set( rs_dsl::volume_references .eq(rs_dsl::volume_references + 1), @@ -637,7 +638,9 @@ impl DataStore { } }; - // Decrease the number of uses for each referenced region snapshot. + // Decrease the number of uses for each non-deleted referenced + // region snapshot. + use db::schema::region_snapshot::dsl; diesel::update(dsl::region_snapshot) @@ -645,9 +648,34 @@ impl DataStore { dsl::snapshot_addr .eq_any(&crucible_targets.read_only_targets), ) + .filter(dsl::volume_references.gt(0)) + .filter(dsl::deleting.eq(false)) .set(dsl::volume_references.eq(dsl::volume_references - 1)) .execute(conn)?; + // Then, note anything that was set to zero from the above + // UPDATE, and then mark all those as deleted. + let snapshots_to_delete: Vec = + dsl::region_snapshot + .filter( + dsl::snapshot_addr + .eq_any(&crucible_targets.read_only_targets), + ) + .filter(dsl::volume_references.eq(0)) + .filter(dsl::deleting.eq(false)) + .select(RegionSnapshot::as_select()) + .load(conn)?; + + diesel::update(dsl::region_snapshot) + .filter( + dsl::snapshot_addr + .eq_any(&crucible_targets.read_only_targets), + ) + .filter(dsl::volume_references.eq(0)) + .filter(dsl::deleting.eq(false)) + .set(dsl::deleting.eq(true)) + .execute(conn)?; + // Return what results can be cleaned up let result = CrucibleResources::V1(CrucibleResourcesV1 { // The only use of a read-write region will be at the top level of a @@ -673,6 +701,7 @@ impl DataStore { .eq(region_dsl::id) .and(dsl::dataset_id.eq(dataset_dsl::id))), ) + .filter(dsl::deleting.eq(true)) .filter( dsl::volume_references .eq(0) @@ -707,12 +736,41 @@ impl DataStore { // delete a read-only downstairs running for a // snapshot that doesn't exist will return a 404, // causing the saga to error and unwind. + // + // XXX are any other filters required, except the + // three from snapshots_to_delete? .filter( dsl::snapshot_addr.eq_any( &crucible_targets.read_only_targets, ), ) - .filter(dsl::volume_references.eq(0)) + // XXX is there a better way to do this, rather than + // one filter per struct field? + .filter( + dsl::dataset_id.eq_any( + snapshots_to_delete + .iter() + .map(|x| x.dataset_id), + ), + ) + .filter(dsl::region_id.eq_any( + snapshots_to_delete.iter().map(|x| x.region_id), + )) + .filter( + dsl::snapshot_id.eq_any( + snapshots_to_delete + .iter() + .map(|x| x.snapshot_id), + ), + ) + .filter(dsl::deleting.eq(true)) + .filter( + dsl::volume_references + .eq(0) + // Despite the SQL specifying that this column is NOT NULL, + // this null check is required for this function to work! + .or(dsl::volume_references.is_null()), + ) .inner_join( dataset_dsl::dataset .on(dsl::dataset_id.eq(dataset_dsl::id)), @@ -960,7 +1018,7 @@ impl DataStore { #[derive(Default, Debug, Serialize, Deserialize)] pub struct CrucibleTargets { - read_only_targets: Vec, + pub read_only_targets: Vec, } // Serialize this enum into the `resources_to_clean_up` column to handle diff --git a/nexus/src/app/sagas/snapshot_create.rs b/nexus/src/app/sagas/snapshot_create.rs index d9a7d3f812..6b59daafa4 100644 --- a/nexus/src/app/sagas/snapshot_create.rs +++ b/nexus/src/app/sagas/snapshot_create.rs @@ -1275,6 +1275,7 @@ async fn ssc_start_running_snapshot( snapshot_id, snapshot_addr, volume_references: 0, // to be filled later + deleting: false, }) .await .map_err(ActionError::action_failed)?; diff --git a/nexus/tests/integration_tests/snapshots.rs b/nexus/tests/integration_tests/snapshots.rs index d212175415..026479559f 100644 --- a/nexus/tests/integration_tests/snapshots.rs +++ b/nexus/tests/integration_tests/snapshots.rs @@ -1094,6 +1094,7 @@ async fn test_region_snapshot_create_idempotent( snapshot_addr: "[::]:12345".to_string(), volume_references: 1, + deleting: false, }; datastore.region_snapshot_create(region_snapshot.clone()).await.unwrap(); diff --git a/nexus/tests/integration_tests/volume_management.rs b/nexus/tests/integration_tests/volume_management.rs index 2e62c498d2..df68ad2041 100644 --- a/nexus/tests/integration_tests/volume_management.rs +++ b/nexus/tests/integration_tests/volume_management.rs @@ -19,6 +19,7 @@ use nexus_test_utils::resource_helpers::DiskTest; use nexus_test_utils_macros::nexus_test; use nexus_types::external_api::params; use nexus_types::external_api::views; +use nexus_types::identity::Asset; use omicron_common::api::external::ByteCount; use omicron_common::api::external::Disk; use omicron_common::api::external::IdentityMetadataCreateParams; @@ -1813,6 +1814,303 @@ async fn test_volume_checkout_updates_sparse_mid_multiple_gen( volume_match_gen(new_vol, vec![Some(8), None, Some(10)]); } +/// Test that the Crucible agent's port reuse does not confuse +/// `decrease_crucible_resource_count_and_soft_delete_volume`, due to the +/// `[ipv6]:port` targets being reused. +#[nexus_test] +async fn test_keep_your_targets_straight(cptestctx: &ControlPlaneTestContext) { + let nexus = &cptestctx.server.apictx().nexus; + let datastore = nexus.datastore(); + + // Four zpools, one dataset each + let mut disk_test = DiskTest::new(&cptestctx).await; + disk_test + .add_zpool_with_dataset(&cptestctx, DiskTest::DEFAULT_ZPOOL_SIZE_GIB) + .await; + + // This bug occurs when region_snapshot records share a snapshot_addr, so + // insert those here manually. + + // (dataset_id, region_id, snapshot_id, snapshot_addr) + let region_snapshots = vec![ + // first snapshot-create + ( + disk_test.zpools[0].datasets[0].id, + Uuid::new_v4(), + Uuid::new_v4(), + String::from("[fd00:1122:3344:101:7]:19016"), + ), + ( + disk_test.zpools[1].datasets[0].id, + Uuid::new_v4(), + Uuid::new_v4(), + String::from("[fd00:1122:3344:102:7]:19016"), + ), + ( + disk_test.zpools[2].datasets[0].id, + Uuid::new_v4(), + Uuid::new_v4(), + String::from("[fd00:1122:3344:103:7]:19016"), + ), + // second snapshot-create + ( + disk_test.zpools[0].datasets[0].id, + Uuid::new_v4(), + Uuid::new_v4(), + String::from("[fd00:1122:3344:101:7]:19016"), // duplicate! + ), + ( + disk_test.zpools[3].datasets[0].id, + Uuid::new_v4(), + Uuid::new_v4(), + String::from("[fd00:1122:3344:104:7]:19016"), + ), + ( + disk_test.zpools[2].datasets[0].id, + Uuid::new_v4(), + Uuid::new_v4(), + String::from("[fd00:1122:3344:103:7]:19017"), + ), + ]; + + // First, three `region_snapshot` records created in the snapshot-create + // saga, which are then used to make snapshot's volume construction request + + for i in 0..3 { + let (dataset_id, region_id, snapshot_id, snapshot_addr) = + ®ion_snapshots[i]; + datastore + .region_snapshot_create(nexus_db_model::RegionSnapshot { + dataset_id: *dataset_id, + region_id: *region_id, + snapshot_id: *snapshot_id, + snapshot_addr: snapshot_addr.clone(), + volume_references: 0, + deleting: false, + }) + .await + .unwrap(); + } + + let volume_id = Uuid::new_v4(); + let volume = datastore + .volume_create(nexus_db_model::Volume::new( + volume_id, + serde_json::to_string(&VolumeConstructionRequest::Volume { + id: volume_id, + block_size: 512, + sub_volumes: vec![], + read_only_parent: Some(Box::new( + VolumeConstructionRequest::Region { + block_size: 512, + blocks_per_extent: 1, + extent_count: 1, + gen: 1, + opts: CrucibleOpts { + id: Uuid::new_v4(), + target: vec![ + region_snapshots[0].3.clone(), + region_snapshots[1].3.clone(), + region_snapshots[2].3.clone(), + ], + lossy: false, + flush_timeout: None, + key: None, + cert_pem: None, + key_pem: None, + root_cert_pem: None, + control: None, + read_only: true, + }, + }, + )), + }) + .unwrap(), + )) + .await + .unwrap(); + + // Sanity check + + assert_eq!(volume.id(), volume_id); + + // Make sure the volume has only three read-only targets: + + let crucible_targets = datastore + .read_only_resources_associated_with_volume(volume_id) + .await + .unwrap(); + assert_eq!(crucible_targets.read_only_targets.len(), 3); + + // Also validate the volume's region_snapshots got incremented by + // volume_create + + for i in 0..3 { + let (dataset_id, region_id, snapshot_id, _) = region_snapshots[i]; + let region_snapshot = datastore + .region_snapshot_get(dataset_id, region_id, snapshot_id) + .await + .unwrap() + .unwrap(); + + assert_eq!(region_snapshot.volume_references, 1); + assert_eq!(region_snapshot.deleting, false); + } + + // Soft delete the volume, and validate that only three region_snapshot + // records are returned. + + let cr = datastore + .decrease_crucible_resource_count_and_soft_delete_volume(volume_id) + .await + .unwrap(); + + for i in 0..3 { + let (dataset_id, region_id, snapshot_id, _) = region_snapshots[i]; + let region_snapshot = datastore + .region_snapshot_get(dataset_id, region_id, snapshot_id) + .await + .unwrap() + .unwrap(); + + assert_eq!(region_snapshot.volume_references, 0); + assert_eq!(region_snapshot.deleting, true); + } + + match cr { + nexus_db_queries::db::datastore::CrucibleResources::V1(cr) => { + assert!(cr.datasets_and_regions.is_empty()); + assert_eq!(cr.datasets_and_snapshots.len(), 3); + } + } + + // Now, let's say we're at a spot where the running snapshots have been + // deleted, but before volume_hard_delete or region_snapshot_remove are + // called. Pretend another snapshot-create and snapshot-delete snuck in + // here, and the second snapshot hits a agent that reuses the first target. + + for i in 3..6 { + let (dataset_id, region_id, snapshot_id, snapshot_addr) = + ®ion_snapshots[i]; + datastore + .region_snapshot_create(nexus_db_model::RegionSnapshot { + dataset_id: *dataset_id, + region_id: *region_id, + snapshot_id: *snapshot_id, + snapshot_addr: snapshot_addr.clone(), + volume_references: 0, + deleting: false, + }) + .await + .unwrap(); + } + + let volume_id = Uuid::new_v4(); + let volume = datastore + .volume_create(nexus_db_model::Volume::new( + volume_id, + serde_json::to_string(&VolumeConstructionRequest::Volume { + id: volume_id, + block_size: 512, + sub_volumes: vec![], + read_only_parent: Some(Box::new( + VolumeConstructionRequest::Region { + block_size: 512, + blocks_per_extent: 1, + extent_count: 1, + gen: 1, + opts: CrucibleOpts { + id: Uuid::new_v4(), + target: vec![ + region_snapshots[3].3.clone(), + region_snapshots[4].3.clone(), + region_snapshots[5].3.clone(), + ], + lossy: false, + flush_timeout: None, + key: None, + cert_pem: None, + key_pem: None, + root_cert_pem: None, + control: None, + read_only: true, + }, + }, + )), + }) + .unwrap(), + )) + .await + .unwrap(); + + // Sanity check + + assert_eq!(volume.id(), volume_id); + + // Make sure the volume has only three read-only targets: + + let crucible_targets = datastore + .read_only_resources_associated_with_volume(volume_id) + .await + .unwrap(); + assert_eq!(crucible_targets.read_only_targets.len(), 3); + + // Also validate only the volume's region_snapshots got incremented by + // volume_create. + + for i in 0..3 { + let (dataset_id, region_id, snapshot_id, _) = region_snapshots[i]; + let region_snapshot = datastore + .region_snapshot_get(dataset_id, region_id, snapshot_id) + .await + .unwrap() + .unwrap(); + + assert_eq!(region_snapshot.volume_references, 0); + assert_eq!(region_snapshot.deleting, true); + } + for i in 3..6 { + let (dataset_id, region_id, snapshot_id, _) = region_snapshots[i]; + let region_snapshot = datastore + .region_snapshot_get(dataset_id, region_id, snapshot_id) + .await + .unwrap() + .unwrap(); + + assert_eq!(region_snapshot.volume_references, 1); + assert_eq!(region_snapshot.deleting, false); + } + + // Soft delete the volume, and validate that only three region_snapshot + // records are returned. + + let cr = datastore + .decrease_crucible_resource_count_and_soft_delete_volume(volume_id) + .await + .unwrap(); + + // Make sure every region_snapshot is now 0, and deleting + + for i in 0..6 { + let (dataset_id, region_id, snapshot_id, _) = region_snapshots[i]; + let region_snapshot = datastore + .region_snapshot_get(dataset_id, region_id, snapshot_id) + .await + .unwrap() + .unwrap(); + + assert_eq!(region_snapshot.volume_references, 0); + assert_eq!(region_snapshot.deleting, true); + } + + match cr { + nexus_db_queries::db::datastore::CrucibleResources::V1(cr) => { + assert!(cr.datasets_and_regions.is_empty()); + assert_eq!(cr.datasets_and_snapshots.len(), 3); + } + } +} + // Test function that creates a VolumeConstructionRequest::Region With gen, // and UUID you passed in. fn create_region( diff --git a/schema/crdb/5.0.0/up.sql b/schema/crdb/5.0.0/up.sql new file mode 100644 index 0000000000..2d551d3be4 --- /dev/null +++ b/schema/crdb/5.0.0/up.sql @@ -0,0 +1,25 @@ +-- CRDB documentation recommends the following: +-- "Execute schema changes either as single statements (as an implicit transaction), +-- or in an explicit transaction consisting of the single schema change statement." +-- +-- For each schema change, we transactionally: +-- 1. Check the current version +-- 2. Apply the idempotent update + +BEGIN; +SELECT CAST( + IF( + ( + SELECT version = '4.0.0' and target_version = '5.0.0' + FROM omicron.public.db_metadata WHERE singleton = true + ), + 'true', + 'Invalid starting version for schema change' + ) AS BOOL +); + +ALTER TABLE omicron.public.region_snapshot + ADD COLUMN IF NOT EXISTS deleting BOOL NOT NULL; + +COMMIT; + diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index 4b38b7dfe4..e6034288ef 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -493,6 +493,9 @@ CREATE TABLE IF NOT EXISTS omicron.public.region_snapshot ( /* How many volumes reference this? */ volume_references INT8 NOT NULL, + /* Is this currently part of some resources_to_delete? */ + deleting BOOL NOT NULL, + PRIMARY KEY (dataset_id, region_id, snapshot_id) ); @@ -2562,7 +2565,7 @@ INSERT INTO omicron.public.db_metadata ( version, target_version ) VALUES - ( TRUE, NOW(), NOW(), '4.0.0', NULL) + ( TRUE, NOW(), NOW(), '5.0.0', NULL) ON CONFLICT DO NOTHING; COMMIT;