From 1bb75f28043e4e4d8c522b7dfaeb7df43a32cbd7 Mon Sep 17 00:00:00 2001 From: James MacMahon Date: Mon, 29 Jul 2024 13:28:20 -0400 Subject: [PATCH] Regions can now be read-only (#6150) Up until this point, regions were only ever read-write, and region snapshots were read-only. In order to support snapshot replacement, Crucible recently gained support for read-only downstairs that performs a "clone" operation to copy blocks from another read-only downstairs. This commit adds a "read-only" flag to Region, and adds support for Nexus initializing a downstairs with this new clone option. --- nexus/db-model/src/region.rs | 8 ++ nexus/db-model/src/schema.rs | 2 + nexus/db-model/src/schema_versions.rs | 3 +- nexus/db-queries/src/db/datastore/region.rs | 10 +- nexus/db-queries/src/db/datastore/volume.rs | 1 + .../src/db/queries/region_allocation.rs | 74 ++++++++------- .../output/region_allocate_distinct_sleds.sql | 31 ++++--- .../output/region_allocate_random_sleds.sql | 31 ++++--- ..._allocate_with_snapshot_distinct_sleds.sql | 31 ++++--- ...on_allocate_with_snapshot_random_sleds.sql | 31 ++++--- .../execution/src/omicron_physical_disks.rs | 1 + .../tasks/decommissioned_disk_cleaner.rs | 1 + .../tasks/region_replacement_driver.rs | 6 ++ nexus/src/app/crucible.rs | 93 ++++++++++--------- .../app/sagas/region_replacement_finish.rs | 1 + .../src/app/sagas/region_replacement_start.rs | 11 +++ schema/crdb/dbinit.sql | 6 +- schema/crdb/region-read-only/up01.sql | 4 + schema/crdb/region-read-only/up02.sql | 2 + 19 files changed, 214 insertions(+), 133 deletions(-) create mode 100644 schema/crdb/region-read-only/up01.sql create mode 100644 schema/crdb/region-read-only/up02.sql diff --git a/nexus/db-model/src/region.rs b/nexus/db-model/src/region.rs index 666a7ef456..e38c5543ef 100644 --- a/nexus/db-model/src/region.rs +++ b/nexus/db-model/src/region.rs @@ -43,6 +43,9 @@ pub struct Region { // The port that was returned when the region was created. This field didn't // originally exist, so records may not have it filled in. port: Option, + + // A region may be read-only + read_only: bool, } impl Region { @@ -53,6 +56,7 @@ impl Region { blocks_per_extent: u64, extent_count: u64, port: u16, + read_only: bool, ) -> Self { Self { identity: RegionIdentity::new(Uuid::new_v4()), @@ -62,6 +66,7 @@ impl Region { blocks_per_extent: blocks_per_extent as i64, extent_count: extent_count as i64, port: Some(port.into()), + read_only, } } @@ -91,4 +96,7 @@ impl Region { pub fn port(&self) -> Option { self.port.map(|port| port.into()) } + pub fn read_only(&self) -> bool { + self.read_only + } } diff --git a/nexus/db-model/src/schema.rs b/nexus/db-model/src/schema.rs index dc57de9263..81c8712279 100644 --- a/nexus/db-model/src/schema.rs +++ b/nexus/db-model/src/schema.rs @@ -1046,6 +1046,8 @@ table! { extent_count -> Int8, port -> Nullable, + + read_only -> Bool, } } diff --git a/nexus/db-model/src/schema_versions.rs b/nexus/db-model/src/schema_versions.rs index cc34a3581c..97c933c987 100644 --- a/nexus/db-model/src/schema_versions.rs +++ b/nexus/db-model/src/schema_versions.rs @@ -17,7 +17,7 @@ use std::collections::BTreeMap; /// /// This must be updated when you change the database schema. Refer to /// schema/crdb/README.adoc in the root of this repository for details. -pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(83, 0, 0); +pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(84, 0, 0); /// List of all past database schema versions, in *reverse* order /// @@ -29,6 +29,7 @@ static KNOWN_VERSIONS: Lazy> = Lazy::new(|| { // | leaving the first copy as an example for the next person. // v // KnownVersion::new(next_int, "unique-dirname-with-the-sql-files"), + KnownVersion::new(84, "region-read-only"), KnownVersion::new(83, "dataset-address-optional"), KnownVersion::new(82, "region-port"), KnownVersion::new(81, "add-nullable-filesystem-pool"), diff --git a/nexus/db-queries/src/db/datastore/region.rs b/nexus/db-queries/src/db/datastore/region.rs index 3b1c20c1df..df733af5b5 100644 --- a/nexus/db-queries/src/db/datastore/region.rs +++ b/nexus/db-queries/src/db/datastore/region.rs @@ -19,6 +19,7 @@ use crate::db::model::Region; use crate::db::model::SqlU16; use crate::db::pagination::paginated; use crate::db::pagination::Paginator; +use crate::db::queries::region_allocation::RegionParameters; use crate::db::update_and_check::UpdateAndCheck; use crate::db::update_and_check::UpdateStatus; use crate::transaction_retry::OptionalError; @@ -259,9 +260,12 @@ impl DataStore { let query = crate::db::queries::region_allocation::allocation_query( volume_id, maybe_snapshot_id, - block_size, - blocks_per_extent, - extent_count, + RegionParameters { + block_size, + blocks_per_extent, + extent_count, + read_only: false, + }, allocation_strategy, num_regions_required, ); diff --git a/nexus/db-queries/src/db/datastore/volume.rs b/nexus/db-queries/src/db/datastore/volume.rs index b13006aa95..4ecba530b1 100644 --- a/nexus/db-queries/src/db/datastore/volume.rs +++ b/nexus/db-queries/src/db/datastore/volume.rs @@ -2349,6 +2349,7 @@ mod tests { 10, 10, 10001, + false, ); region_and_volume_ids[i].0 = region.id(); diff --git a/nexus/db-queries/src/db/queries/region_allocation.rs b/nexus/db-queries/src/db/queries/region_allocation.rs index cefca5914f..7cf378d53b 100644 --- a/nexus/db-queries/src/db/queries/region_allocation.rs +++ b/nexus/db-queries/src/db/queries/region_allocation.rs @@ -67,6 +67,18 @@ type SelectableSql = < >::SelectExpression as diesel::Expression >::SqlType; +/// Parameters for the region(s) being allocated +#[derive(Debug, Clone, Copy)] +pub struct RegionParameters { + pub block_size: u64, + pub blocks_per_extent: u64, + pub extent_count: u64, + + /// True if the region will be filled with a Clone operation and is meant to + /// be read-only. + pub read_only: bool, +} + /// For a given volume, idempotently allocate enough regions (according to some /// allocation strategy) to meet some redundancy level. This should only be used /// for the region set that is in the top level of the Volume (not the deeper @@ -75,9 +87,7 @@ type SelectableSql = < pub fn allocation_query( volume_id: uuid::Uuid, snapshot_id: Option, - block_size: u64, - blocks_per_extent: u64, - extent_count: u64, + params: RegionParameters, allocation_strategy: &RegionAllocationStrategy, redundancy: usize, ) -> TypedSqlQuery<(SelectableSql, SelectableSql)> { @@ -104,7 +114,8 @@ pub fn allocation_query( let seed = seed.to_le_bytes().to_vec(); - let size_delta = block_size * blocks_per_extent * extent_count; + let size_delta = + params.block_size * params.blocks_per_extent * params.extent_count; let redundancy: i64 = i64::try_from(redundancy).unwrap(); let builder = QueryBuilder::new().sql( @@ -243,7 +254,8 @@ pub fn allocation_query( ").param().sql(" AS block_size, ").param().sql(" AS blocks_per_extent, ").param().sql(" AS extent_count, - NULL AS port + NULL AS port, + ").param().sql(" AS read_only FROM shuffled_candidate_datasets") // Only select the *additional* number of candidate regions for the required // redundancy level @@ -253,9 +265,10 @@ pub fn allocation_query( )) ),") .bind::(volume_id) - .bind::(block_size as i64) - .bind::(blocks_per_extent as i64) - .bind::(extent_count as i64) + .bind::(params.block_size as i64) + .bind::(params.blocks_per_extent as i64) + .bind::(params.extent_count as i64) + .bind::(params.read_only) .bind::(redundancy) // A subquery which summarizes the changes we intend to make, showing: @@ -355,7 +368,7 @@ pub fn allocation_query( .sql(" inserted_regions AS ( INSERT INTO region - (id, time_created, time_modified, dataset_id, volume_id, block_size, blocks_per_extent, extent_count, port) + (id, time_created, time_modified, dataset_id, volume_id, block_size, blocks_per_extent, extent_count, port, read_only) SELECT ").sql(AllColumnsOfRegion::with_prefix("candidate_regions")).sql(" FROM candidate_regions WHERE @@ -405,9 +418,12 @@ mod test { #[tokio::test] async fn expectorate_query() { let volume_id = Uuid::nil(); - let block_size = 512; - let blocks_per_extent = 4; - let extent_count = 8; + let params = RegionParameters { + block_size: 512, + blocks_per_extent: 4, + extent_count: 8, + read_only: false, + }; // Start with snapshot_id = None @@ -418,14 +434,13 @@ mod test { let region_allocate = allocation_query( volume_id, snapshot_id, - block_size, - blocks_per_extent, - extent_count, + params, &RegionAllocationStrategy::RandomWithDistinctSleds { seed: Some(1), }, REGION_REDUNDANCY_THRESHOLD, ); + expectorate_query_contents( ®ion_allocate, "tests/output/region_allocate_distinct_sleds.sql", @@ -437,9 +452,7 @@ mod test { let region_allocate = allocation_query( volume_id, snapshot_id, - block_size, - blocks_per_extent, - extent_count, + params, &RegionAllocationStrategy::Random { seed: Some(1) }, REGION_REDUNDANCY_THRESHOLD, ); @@ -458,9 +471,7 @@ mod test { let region_allocate = allocation_query( volume_id, snapshot_id, - block_size, - blocks_per_extent, - extent_count, + params, &RegionAllocationStrategy::RandomWithDistinctSleds { seed: Some(1), }, @@ -477,9 +488,7 @@ mod test { let region_allocate = allocation_query( volume_id, snapshot_id, - block_size, - blocks_per_extent, - extent_count, + params, &RegionAllocationStrategy::Random { seed: Some(1) }, REGION_REDUNDANCY_THRESHOLD, ); @@ -502,18 +511,19 @@ mod test { let conn = pool.pool().get().await.unwrap(); let volume_id = Uuid::new_v4(); - let block_size = 512; - let blocks_per_extent = 4; - let extent_count = 8; + let params = RegionParameters { + block_size: 512, + blocks_per_extent: 4, + extent_count: 8, + read_only: false, + }; // First structure: Explain the query with "RandomWithDistinctSleds" let region_allocate = allocation_query( volume_id, None, - block_size, - blocks_per_extent, - extent_count, + params, &RegionAllocationStrategy::RandomWithDistinctSleds { seed: None }, REGION_REDUNDANCY_THRESHOLD, ); @@ -527,9 +537,7 @@ mod test { let region_allocate = allocation_query( volume_id, None, - block_size, - blocks_per_extent, - extent_count, + params, &RegionAllocationStrategy::Random { seed: None }, REGION_REDUNDANCY_THRESHOLD, ); diff --git a/nexus/db-queries/tests/output/region_allocate_distinct_sleds.sql b/nexus/db-queries/tests/output/region_allocate_distinct_sleds.sql index 624428c217..6331770ef5 100644 --- a/nexus/db-queries/tests/output/region_allocate_distinct_sleds.sql +++ b/nexus/db-queries/tests/output/region_allocate_distinct_sleds.sql @@ -10,7 +10,8 @@ WITH region.block_size, region.blocks_per_extent, region.extent_count, - region.port + region.port, + region.read_only FROM region WHERE @@ -99,11 +100,12 @@ WITH $8 AS block_size, $9 AS blocks_per_extent, $10 AS extent_count, - NULL AS port + NULL AS port, + $11 AS read_only FROM shuffled_candidate_datasets LIMIT - $11 - (SELECT count(*) FROM old_regions) + $12 - (SELECT count(*) FROM old_regions) ), proposed_dataset_changes AS ( @@ -122,7 +124,7 @@ WITH SELECT ( ( - (SELECT count(*) FROM old_regions LIMIT 1) < $12 + (SELECT count(*) FROM old_regions LIMIT 1) < $13 AND CAST( IF( ( @@ -132,7 +134,7 @@ WITH + (SELECT count(*) FROM existing_zpools LIMIT 1) ) ) - >= $13 + >= $14 ), 'TRUE', 'Not enough space' @@ -149,7 +151,7 @@ WITH + (SELECT count(*) FROM old_regions LIMIT 1) ) ) - >= $14 + >= $15 ), 'TRUE', 'Not enough datasets' @@ -185,7 +187,7 @@ WITH 1 ) ) - >= $15 + >= $16 ), 'TRUE', 'Not enough unique zpools selected' @@ -208,7 +210,8 @@ WITH block_size, blocks_per_extent, extent_count, - port + port, + read_only ) SELECT candidate_regions.id, @@ -219,7 +222,8 @@ WITH candidate_regions.block_size, candidate_regions.blocks_per_extent, candidate_regions.extent_count, - candidate_regions.port + candidate_regions.port, + candidate_regions.read_only FROM candidate_regions WHERE @@ -233,7 +237,8 @@ WITH region.block_size, region.blocks_per_extent, region.extent_count, - region.port + region.port, + region.read_only ), updated_datasets AS ( @@ -287,7 +292,8 @@ WITH old_regions.block_size, old_regions.blocks_per_extent, old_regions.extent_count, - old_regions.port + old_regions.port, + old_regions.read_only FROM old_regions INNER JOIN dataset ON old_regions.dataset_id = dataset.id ) @@ -312,7 +318,8 @@ UNION inserted_regions.block_size, inserted_regions.blocks_per_extent, inserted_regions.extent_count, - inserted_regions.port + inserted_regions.port, + inserted_regions.read_only FROM inserted_regions INNER JOIN updated_datasets ON inserted_regions.dataset_id = updated_datasets.id diff --git a/nexus/db-queries/tests/output/region_allocate_random_sleds.sql b/nexus/db-queries/tests/output/region_allocate_random_sleds.sql index 748e0d7b15..e713121d34 100644 --- a/nexus/db-queries/tests/output/region_allocate_random_sleds.sql +++ b/nexus/db-queries/tests/output/region_allocate_random_sleds.sql @@ -10,7 +10,8 @@ WITH region.block_size, region.blocks_per_extent, region.extent_count, - region.port + region.port, + region.read_only FROM region WHERE @@ -97,11 +98,12 @@ WITH $7 AS block_size, $8 AS blocks_per_extent, $9 AS extent_count, - NULL AS port + NULL AS port, + $10 AS read_only FROM shuffled_candidate_datasets LIMIT - $10 - (SELECT count(*) FROM old_regions) + $11 - (SELECT count(*) FROM old_regions) ), proposed_dataset_changes AS ( @@ -120,7 +122,7 @@ WITH SELECT ( ( - (SELECT count(*) FROM old_regions LIMIT 1) < $11 + (SELECT count(*) FROM old_regions LIMIT 1) < $12 AND CAST( IF( ( @@ -130,7 +132,7 @@ WITH + (SELECT count(*) FROM existing_zpools LIMIT 1) ) ) - >= $12 + >= $13 ), 'TRUE', 'Not enough space' @@ -147,7 +149,7 @@ WITH + (SELECT count(*) FROM old_regions LIMIT 1) ) ) - >= $13 + >= $14 ), 'TRUE', 'Not enough datasets' @@ -183,7 +185,7 @@ WITH 1 ) ) - >= $14 + >= $15 ), 'TRUE', 'Not enough unique zpools selected' @@ -206,7 +208,8 @@ WITH block_size, blocks_per_extent, extent_count, - port + port, + read_only ) SELECT candidate_regions.id, @@ -217,7 +220,8 @@ WITH candidate_regions.block_size, candidate_regions.blocks_per_extent, candidate_regions.extent_count, - candidate_regions.port + candidate_regions.port, + candidate_regions.read_only FROM candidate_regions WHERE @@ -231,7 +235,8 @@ WITH region.block_size, region.blocks_per_extent, region.extent_count, - region.port + region.port, + region.read_only ), updated_datasets AS ( @@ -285,7 +290,8 @@ WITH old_regions.block_size, old_regions.blocks_per_extent, old_regions.extent_count, - old_regions.port + old_regions.port, + old_regions.read_only FROM old_regions INNER JOIN dataset ON old_regions.dataset_id = dataset.id ) @@ -310,7 +316,8 @@ UNION inserted_regions.block_size, inserted_regions.blocks_per_extent, inserted_regions.extent_count, - inserted_regions.port + inserted_regions.port, + inserted_regions.read_only FROM inserted_regions INNER JOIN updated_datasets ON inserted_regions.dataset_id = updated_datasets.id diff --git a/nexus/db-queries/tests/output/region_allocate_with_snapshot_distinct_sleds.sql b/nexus/db-queries/tests/output/region_allocate_with_snapshot_distinct_sleds.sql index 8faca9ed4f..0b8dc4fca6 100644 --- a/nexus/db-queries/tests/output/region_allocate_with_snapshot_distinct_sleds.sql +++ b/nexus/db-queries/tests/output/region_allocate_with_snapshot_distinct_sleds.sql @@ -10,7 +10,8 @@ WITH region.block_size, region.blocks_per_extent, region.extent_count, - region.port + region.port, + region.read_only FROM region WHERE @@ -110,11 +111,12 @@ WITH $9 AS block_size, $10 AS blocks_per_extent, $11 AS extent_count, - NULL AS port + NULL AS port, + $12 AS read_only FROM shuffled_candidate_datasets LIMIT - $12 - (SELECT count(*) FROM old_regions) + $13 - (SELECT count(*) FROM old_regions) ), proposed_dataset_changes AS ( @@ -133,7 +135,7 @@ WITH SELECT ( ( - (SELECT count(*) FROM old_regions LIMIT 1) < $13 + (SELECT count(*) FROM old_regions LIMIT 1) < $14 AND CAST( IF( ( @@ -143,7 +145,7 @@ WITH + (SELECT count(*) FROM existing_zpools LIMIT 1) ) ) - >= $14 + >= $15 ), 'TRUE', 'Not enough space' @@ -160,7 +162,7 @@ WITH + (SELECT count(*) FROM old_regions LIMIT 1) ) ) - >= $15 + >= $16 ), 'TRUE', 'Not enough datasets' @@ -196,7 +198,7 @@ WITH 1 ) ) - >= $16 + >= $17 ), 'TRUE', 'Not enough unique zpools selected' @@ -219,7 +221,8 @@ WITH block_size, blocks_per_extent, extent_count, - port + port, + read_only ) SELECT candidate_regions.id, @@ -230,7 +233,8 @@ WITH candidate_regions.block_size, candidate_regions.blocks_per_extent, candidate_regions.extent_count, - candidate_regions.port + candidate_regions.port, + candidate_regions.read_only FROM candidate_regions WHERE @@ -244,7 +248,8 @@ WITH region.block_size, region.blocks_per_extent, region.extent_count, - region.port + region.port, + region.read_only ), updated_datasets AS ( @@ -298,7 +303,8 @@ WITH old_regions.block_size, old_regions.blocks_per_extent, old_regions.extent_count, - old_regions.port + old_regions.port, + old_regions.read_only FROM old_regions INNER JOIN dataset ON old_regions.dataset_id = dataset.id ) @@ -323,7 +329,8 @@ UNION inserted_regions.block_size, inserted_regions.blocks_per_extent, inserted_regions.extent_count, - inserted_regions.port + inserted_regions.port, + inserted_regions.read_only FROM inserted_regions INNER JOIN updated_datasets ON inserted_regions.dataset_id = updated_datasets.id diff --git a/nexus/db-queries/tests/output/region_allocate_with_snapshot_random_sleds.sql b/nexus/db-queries/tests/output/region_allocate_with_snapshot_random_sleds.sql index c1c03672a4..9ac945f71d 100644 --- a/nexus/db-queries/tests/output/region_allocate_with_snapshot_random_sleds.sql +++ b/nexus/db-queries/tests/output/region_allocate_with_snapshot_random_sleds.sql @@ -10,7 +10,8 @@ WITH region.block_size, region.blocks_per_extent, region.extent_count, - region.port + region.port, + region.read_only FROM region WHERE @@ -108,11 +109,12 @@ WITH $8 AS block_size, $9 AS blocks_per_extent, $10 AS extent_count, - NULL AS port + NULL AS port, + $11 AS read_only FROM shuffled_candidate_datasets LIMIT - $11 - (SELECT count(*) FROM old_regions) + $12 - (SELECT count(*) FROM old_regions) ), proposed_dataset_changes AS ( @@ -131,7 +133,7 @@ WITH SELECT ( ( - (SELECT count(*) FROM old_regions LIMIT 1) < $12 + (SELECT count(*) FROM old_regions LIMIT 1) < $13 AND CAST( IF( ( @@ -141,7 +143,7 @@ WITH + (SELECT count(*) FROM existing_zpools LIMIT 1) ) ) - >= $13 + >= $14 ), 'TRUE', 'Not enough space' @@ -158,7 +160,7 @@ WITH + (SELECT count(*) FROM old_regions LIMIT 1) ) ) - >= $14 + >= $15 ), 'TRUE', 'Not enough datasets' @@ -194,7 +196,7 @@ WITH 1 ) ) - >= $15 + >= $16 ), 'TRUE', 'Not enough unique zpools selected' @@ -217,7 +219,8 @@ WITH block_size, blocks_per_extent, extent_count, - port + port, + read_only ) SELECT candidate_regions.id, @@ -228,7 +231,8 @@ WITH candidate_regions.block_size, candidate_regions.blocks_per_extent, candidate_regions.extent_count, - candidate_regions.port + candidate_regions.port, + candidate_regions.read_only FROM candidate_regions WHERE @@ -242,7 +246,8 @@ WITH region.block_size, region.blocks_per_extent, region.extent_count, - region.port + region.port, + region.read_only ), updated_datasets AS ( @@ -296,7 +301,8 @@ WITH old_regions.block_size, old_regions.blocks_per_extent, old_regions.extent_count, - old_regions.port + old_regions.port, + old_regions.read_only FROM old_regions INNER JOIN dataset ON old_regions.dataset_id = dataset.id ) @@ -321,7 +327,8 @@ UNION inserted_regions.block_size, inserted_regions.blocks_per_extent, inserted_regions.extent_count, - inserted_regions.port + inserted_regions.port, + inserted_regions.read_only FROM inserted_regions INNER JOIN updated_datasets ON inserted_regions.dataset_id = updated_datasets.id diff --git a/nexus/reconfigurator/execution/src/omicron_physical_disks.rs b/nexus/reconfigurator/execution/src/omicron_physical_disks.rs index 9ae72d2049..7adc41213e 100644 --- a/nexus/reconfigurator/execution/src/omicron_physical_disks.rs +++ b/nexus/reconfigurator/execution/src/omicron_physical_disks.rs @@ -450,6 +450,7 @@ mod test { 10, 10, 1, + false, ) }; let conn = datastore.pool_connection_for_tests().await.unwrap(); diff --git a/nexus/src/app/background/tasks/decommissioned_disk_cleaner.rs b/nexus/src/app/background/tasks/decommissioned_disk_cleaner.rs index cb3ef9a569..602f3f85e8 100644 --- a/nexus/src/app/background/tasks/decommissioned_disk_cleaner.rs +++ b/nexus/src/app/background/tasks/decommissioned_disk_cleaner.rs @@ -261,6 +261,7 @@ mod tests { 10, 10, 1, + false, ) }; let region_id = region.id(); diff --git a/nexus/src/app/background/tasks/region_replacement_driver.rs b/nexus/src/app/background/tasks/region_replacement_driver.rs index c3af324f44..284ed2c368 100644 --- a/nexus/src/app/background/tasks/region_replacement_driver.rs +++ b/nexus/src/app/background/tasks/region_replacement_driver.rs @@ -351,6 +351,7 @@ mod test { 10, 10, 27015, + false, ) }; @@ -364,6 +365,7 @@ mod test { 10, 10, 27016, + false, ) }; @@ -448,6 +450,7 @@ mod test { 10, 10, 27015, + false, ) }; @@ -461,6 +464,7 @@ mod test { 10, 10, 27016, + false, ) }; @@ -595,6 +599,7 @@ mod test { 10, 10, 27015, + false, ) }; @@ -608,6 +613,7 @@ mod test { 10, 10, 27016, + false, ) }; diff --git a/nexus/src/app/crucible.rs b/nexus/src/app/crucible.rs index 72a5c80baf..fdd67da617 100644 --- a/nexus/src/app/crucible.rs +++ b/nexus/src/app/crucible.rs @@ -26,7 +26,8 @@ use slog::Logger; // within a disk at the same time. const MAX_CONCURRENT_REGION_REQUESTS: usize = 3; -/// Provides a way for (with BackoffError) Permanent errors to have a different error type than +/// Provides a way for (with BackoffError) Permanent errors to have a different +/// error type than /// Transient errors. #[derive(Debug, thiserror::Error)] enum WaitError { @@ -146,12 +147,14 @@ impl super::Nexus { } } - /// Call out to Crucible agent and perform region creation. - async fn ensure_region_in_dataset( + /// Call out to Crucible agent and perform region creation. Optionally, + /// supply a read-only source to invoke a clone. + pub async fn ensure_region_in_dataset( &self, log: &Logger, dataset: &db::model::Dataset, region: &db::model::Region, + source: Option, ) -> Result { let client = self.crucible_agent_client_for_dataset(dataset)?; let dataset_id = dataset.id(); @@ -173,7 +176,7 @@ impl super::Nexus { cert_pem: None, key_pem: None, root_pem: None, - source: None, + source, }; let create_region = || async { @@ -593,9 +596,10 @@ impl super::Nexus { .await .map_err(|e| match e { WaitError::Transient(e) => { - // The backoff crate can be configured with a maximum elapsed time - // before giving up, which means that Transient could be returned - // here. Our current policies do **not** set this though. + // The backoff crate can be configured with a maximum elapsed + // time before giving up, which means that Transient could be + // returned here. Our current policies do **not** set this + // though. Error::internal_error(&e.to_string()) } @@ -625,12 +629,10 @@ impl super::Nexus { backoff::retry_notify( backoff::retry_policy_internal_service_aggressive(), || async { - let response = match self.get_crucible_region_snapshots( - log, - dataset, - region_id, - ) - .await { + let response = match self + .get_crucible_region_snapshots(log, dataset, region_id) + .await + { Ok(v) => Ok(v), // Return Ok if the dataset's agent is gone, no @@ -645,7 +647,9 @@ impl super::Nexus { return Ok(()); } - Err(e) => Err(BackoffError::Permanent(WaitError::Permanent(e))), + Err(e) => { + Err(BackoffError::Permanent(WaitError::Permanent(e))) + } }?; match response.running_snapshots.get(&snapshot_id.to_string()) { @@ -662,27 +666,23 @@ impl super::Nexus { RegionState::Tombstoned => { Err(BackoffError::transient( WaitError::Transient(anyhow!( - "running_snapshot tombstoned, not deleted yet", - ) - ))) + "running_snapshot tombstoned, not \ + deleted yet", + )), + )) } RegionState::Destroyed => { - info!( - log, - "running_snapshot deleted", - ); + info!(log, "running_snapshot deleted",); Ok(()) } - _ => { - Err(BackoffError::transient( - WaitError::Transient(anyhow!( - "running_snapshot unexpected state", - ) - ))) - } + _ => Err(BackoffError::transient( + WaitError::Transient(anyhow!( + "running_snapshot unexpected state", + )), + )), } } @@ -710,20 +710,19 @@ impl super::Nexus { "region_id" => %region_id, "snapshot_id" => %snapshot_id, ); - } + }, ) .await .map_err(|e| match e { WaitError::Transient(e) => { - // The backoff crate can be configured with a maximum elapsed time - // before giving up, which means that Transient could be returned - // here. Our current policies do **not** set this though. + // The backoff crate can be configured with a maximum elapsed + // time before giving up, which means that Transient could be + // returned here. Our current policies do **not** set this + // though. Error::internal_error(&e.to_string()) } - WaitError::Permanent(e) => { - e - } + WaitError::Permanent(e) => e, }) } @@ -745,11 +744,12 @@ impl super::Nexus { region_id: Uuid, snapshot_id: Uuid, ) -> Result<(), Error> { - // Unlike other Crucible agent endpoints, this one is synchronous in that it - // is not only a request to the Crucible agent: `zfs destroy` is performed - // right away. However this is still a request to illumos that may not take - // effect right away. Wait until the snapshot no longer appears in the list - // of region snapshots, meaning it was not returned from `zfs list`. + // Unlike other Crucible agent endpoints, this one is synchronous in + // that it is not only a request to the Crucible agent: `zfs destroy` is + // performed right away. However this is still a request to illumos that + // may not take effect right away. Wait until the snapshot no longer + // appears in the list of region snapshots, meaning it was not returned + // from `zfs list`. let dataset_id = dataset.id(); @@ -838,9 +838,10 @@ impl super::Nexus { .await .map_err(|e| match e { WaitError::Transient(e) => { - // The backoff crate can be configured with a maximum elapsed time - // before giving up, which means that Transient could be returned - // here. Our current policies do **not** set this though. + // The backoff crate can be configured with a maximum elapsed + // time before giving up, which means that Transient could be + // returned here. Our current policies do **not** set this + // though. Error::internal_error(&e.to_string()) } @@ -860,13 +861,13 @@ impl super::Nexus { return Ok(vec![]); } - // Allocate regions, and additionally return the dataset that the region was - // allocated in. + // Allocate regions, and additionally return the dataset that the region + // was allocated in. let datasets_and_regions: Vec<(db::model::Dataset, Region)> = futures::stream::iter(datasets_and_regions) .map(|(dataset, region)| async move { match self - .ensure_region_in_dataset(log, &dataset, ®ion) + .ensure_region_in_dataset(log, &dataset, ®ion, None) .await { Ok(result) => Ok((dataset, result)), diff --git a/nexus/src/app/sagas/region_replacement_finish.rs b/nexus/src/app/sagas/region_replacement_finish.rs index a6964a8d06..e17f3405a0 100644 --- a/nexus/src/app/sagas/region_replacement_finish.rs +++ b/nexus/src/app/sagas/region_replacement_finish.rs @@ -249,6 +249,7 @@ pub(crate) mod test { 10, 10, 12345, + false, ) }; diff --git a/nexus/src/app/sagas/region_replacement_start.rs b/nexus/src/app/sagas/region_replacement_start.rs index 1297158b24..d4d455f927 100644 --- a/nexus/src/app/sagas/region_replacement_start.rs +++ b/nexus/src/app/sagas/region_replacement_start.rs @@ -221,6 +221,13 @@ async fn srrs_get_existing_datasets_and_regions( .await .map_err(ActionError::action_failed)?; + // XXX for now, bail out if requesting the replacement of a read-only region + if db_region.read_only() { + return Err(ActionError::action_failed(String::from( + "replacing read-only region currently unsupported", + ))); + } + // Find out the existing datasets and regions that back the volume let datasets_and_regions = osagactx .datastore() @@ -925,6 +932,7 @@ pub(crate) mod test { 10, 10, 1001, + false, ), Region::new( datasets[1].id(), @@ -933,6 +941,7 @@ pub(crate) mod test { 10, 10, 1002, + false, ), Region::new( datasets[2].id(), @@ -941,6 +950,7 @@ pub(crate) mod test { 10, 10, 1003, + false, ), Region::new( datasets[3].id(), @@ -949,6 +959,7 @@ pub(crate) mod test { 10, 10, 1004, + false, ), ]; diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index 7fc83ad5d0..f6ded221cd 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -581,7 +581,9 @@ CREATE TABLE IF NOT EXISTS omicron.public.region ( blocks_per_extent INT NOT NULL, extent_count INT NOT NULL, - port INT4 + port INT4, + + read_only BOOL NOT NULL ); /* @@ -4145,7 +4147,7 @@ INSERT INTO omicron.public.db_metadata ( version, target_version ) VALUES - (TRUE, NOW(), NOW(), '83.0.0', NULL) + (TRUE, NOW(), NOW(), '84.0.0', NULL) ON CONFLICT DO NOTHING; COMMIT; diff --git a/schema/crdb/region-read-only/up01.sql b/schema/crdb/region-read-only/up01.sql new file mode 100644 index 0000000000..f69f585562 --- /dev/null +++ b/schema/crdb/region-read-only/up01.sql @@ -0,0 +1,4 @@ +ALTER TABLE omicron.public.region + ADD COLUMN IF NOT EXISTS read_only BOOLEAN NOT NULL + DEFAULT false; + diff --git a/schema/crdb/region-read-only/up02.sql b/schema/crdb/region-read-only/up02.sql new file mode 100644 index 0000000000..b8c0aff92a --- /dev/null +++ b/schema/crdb/region-read-only/up02.sql @@ -0,0 +1,2 @@ +ALTER TABLE omicron.public.region + ALTER COLUMN read_only DROP DEFAULT;