From bb083ae3ed2c62381e76706e7aaabbd6dbeb8692 Mon Sep 17 00:00:00 2001 From: James MacMahon Date: Thu, 4 Jul 2024 18:14:25 -0400 Subject: [PATCH] Record Crucible region ports in CRDB (#5955) It was a mistake not to record the port that an Agent assigns a Region when it's created, and this PR rectifies that: ports are recorded when regions are ensured, and a background task will scan for regions that are missing a port, and attempt to contact the appropriate Agent in order to fill that in. Fixes #5846 --------- Co-authored-by: Sean Klein --- dev-tools/omdb/src/bin/omdb/db.rs | 31 ++++ dev-tools/omdb/src/bin/omdb/nexus.rs | 21 +++ dev-tools/omdb/tests/env.out | 12 ++ dev-tools/omdb/tests/successes.out | 12 ++ dev-tools/omdb/tests/usage_errors.out | 2 + nexus-config/src/nexus_config.rs | 17 +- nexus/db-model/src/region.rs | 10 ++ nexus/db-model/src/schema.rs | 2 + nexus/db-model/src/schema_versions.rs | 3 +- nexus/db-queries/src/db/datastore/region.rs | 94 +++++++++++ nexus/db-queries/src/db/datastore/volume.rs | 1 + .../src/db/queries/region_allocation.rs | 5 +- .../output/region_allocate_distinct_sleds.sql | 21 ++- .../output/region_allocate_random_sleds.sql | 21 ++- ..._allocate_with_snapshot_distinct_sleds.sql | 21 ++- ...on_allocate_with_snapshot_random_sleds.sql | 21 ++- nexus/examples/config.toml | 1 + nexus/src/app/background/init.rs | 18 ++- .../background/tasks/lookup_region_port.rs | 149 ++++++++++++++++++ nexus/src/app/background/tasks/mod.rs | 1 + .../tasks/region_replacement_driver.rs | 6 + nexus/src/app/crucible.rs | 60 ++++++- nexus/src/app/sagas/common_storage.rs | 40 ----- .../app/sagas/region_replacement_finish.rs | 1 + .../src/app/sagas/region_replacement_start.rs | 125 +++++++-------- nexus/tests/config.test.toml | 1 + nexus/types/src/internal_api/background.rs | 7 + schema/crdb/dbinit.sql | 10 +- schema/crdb/region-port/up01.sql | 2 + schema/crdb/region-port/up02.sql | 2 + schema/crdb/region-port/up03.sql | 3 + smf/nexus/multi-sled/config-partial.toml | 1 + smf/nexus/single-sled/config-partial.toml | 1 + 33 files changed, 574 insertions(+), 148 deletions(-) create mode 100644 nexus/src/app/background/tasks/lookup_region_port.rs create mode 100644 schema/crdb/region-port/up01.sql create mode 100644 schema/crdb/region-port/up02.sql create mode 100644 schema/crdb/region-port/up03.sql diff --git a/dev-tools/omdb/src/bin/omdb/db.rs b/dev-tools/omdb/src/bin/omdb/db.rs index 7df457b247..26c7ddd3b9 100644 --- a/dev-tools/omdb/src/bin/omdb/db.rs +++ b/dev-tools/omdb/src/bin/omdb/db.rs @@ -289,6 +289,8 @@ enum DbCommands { Inventory(InventoryArgs), /// Save the current Reconfigurator inputs to a file ReconfiguratorSave(ReconfiguratorSaveArgs), + /// Print information about regions + Region(RegionArgs), /// Query for information about region replacements, optionally manually /// triggering one. RegionReplacement(RegionReplacementArgs), @@ -456,6 +458,18 @@ struct SledsArgs { filter: Option, } +#[derive(Debug, Args)] +struct RegionArgs { + #[command(subcommand)] + command: RegionCommands, +} + +#[derive(Debug, Subcommand)] +enum RegionCommands { + /// List regions that are still missing ports + ListRegionsMissingPorts, +} + #[derive(Debug, Args)] struct RegionReplacementArgs { #[command(subcommand)] @@ -605,6 +619,9 @@ impl DbArgs { ) .await } + DbCommands::Region(RegionArgs { + command: RegionCommands::ListRegionsMissingPorts, + }) => cmd_db_region_missing_porst(&opctx, &datastore).await, DbCommands::RegionReplacement(RegionReplacementArgs { command: RegionReplacementCommands::List(args), }) => { @@ -1523,6 +1540,20 @@ async fn cmd_db_snapshot_info( Ok(()) } +/// List all regions still missing ports +async fn cmd_db_region_missing_porst( + opctx: &OpContext, + datastore: &DataStore, +) -> Result<(), anyhow::Error> { + let regions: Vec = datastore.regions_missing_ports(opctx).await?; + + for region in regions { + println!("{:?}", region.id()); + } + + Ok(()) +} + /// List all region replacement requests async fn cmd_db_region_replacement_list( datastore: &DataStore, diff --git a/dev-tools/omdb/src/bin/omdb/nexus.rs b/dev-tools/omdb/src/bin/omdb/nexus.rs index 0b1c6c77f4..fb74ddd89b 100644 --- a/dev-tools/omdb/src/bin/omdb/nexus.rs +++ b/dev-tools/omdb/src/bin/omdb/nexus.rs @@ -28,6 +28,7 @@ use nexus_client::types::SledSelector; use nexus_client::types::UninitializedSledId; use nexus_db_queries::db::lookup::LookupPath; use nexus_types::deployment::Blueprint; +use nexus_types::internal_api::background::LookupRegionPortStatus; use nexus_types::internal_api::background::RegionReplacementDriverStatus; use nexus_types::inventory::BaseboardId; use omicron_uuid_kinds::CollectionUuid; @@ -1082,6 +1083,26 @@ fn print_task_details(bgtask: &BackgroundTask, details: &serde_json::Value) { } } }; + } else if name == "lookup_region_port" { + match serde_json::from_value::(details.clone()) + { + Err(error) => eprintln!( + "warning: failed to interpret task details: {:?}: {:?}", + error, details + ), + + Ok(LookupRegionPortStatus { found_port_ok, errors }) => { + println!(" total filled in ports: {}", found_port_ok.len()); + for line in &found_port_ok { + println!(" > {line}"); + } + + println!(" errors: {}", errors.len()); + for line in &errors { + println!(" > {line}"); + } + } + }; } else { println!( "warning: unknown background task: {:?} \ diff --git a/dev-tools/omdb/tests/env.out b/dev-tools/omdb/tests/env.out index 252313e6c8..348ff5e9ac 100644 --- a/dev-tools/omdb/tests/env.out +++ b/dev-tools/omdb/tests/env.out @@ -89,6 +89,10 @@ task: "inventory_collection" collects hardware and software inventory data from the whole system +task: "lookup_region_port" + fill in missing ports for region records + + task: "metrics_producer_gc" unregisters Oximeter metrics producers that have not renewed their lease @@ -221,6 +225,10 @@ task: "inventory_collection" collects hardware and software inventory data from the whole system +task: "lookup_region_port" + fill in missing ports for region records + + task: "metrics_producer_gc" unregisters Oximeter metrics producers that have not renewed their lease @@ -340,6 +348,10 @@ task: "inventory_collection" collects hardware and software inventory data from the whole system +task: "lookup_region_port" + fill in missing ports for region records + + task: "metrics_producer_gc" unregisters Oximeter metrics producers that have not renewed their lease diff --git a/dev-tools/omdb/tests/successes.out b/dev-tools/omdb/tests/successes.out index 47d091443d..b5147b66f9 100644 --- a/dev-tools/omdb/tests/successes.out +++ b/dev-tools/omdb/tests/successes.out @@ -290,6 +290,10 @@ task: "inventory_collection" collects hardware and software inventory data from the whole system +task: "lookup_region_port" + fill in missing ports for region records + + task: "metrics_producer_gc" unregisters Oximeter metrics producers that have not renewed their lease @@ -483,6 +487,14 @@ task: "inventory_collection" last collection started: last collection done: +task: "lookup_region_port" + configured period: every 1m + currently executing: no + last completed activation: , triggered by a periodic timer firing + started at (s ago) and ran for ms + total filled in ports: 0 + errors: 0 + task: "metrics_producer_gc" configured period: every 1m currently executing: no diff --git a/dev-tools/omdb/tests/usage_errors.out b/dev-tools/omdb/tests/usage_errors.out index 9524d217c9..b4679001fa 100644 --- a/dev-tools/omdb/tests/usage_errors.out +++ b/dev-tools/omdb/tests/usage_errors.out @@ -107,6 +107,7 @@ Commands: dns Print information about internal and external DNS inventory Print information about collected hardware/software inventory reconfigurator-save Save the current Reconfigurator inputs to a file + region Print information about regions region-replacement Query for information about region replacements, optionally manually triggering one sleds Print information about sleds @@ -147,6 +148,7 @@ Commands: dns Print information about internal and external DNS inventory Print information about collected hardware/software inventory reconfigurator-save Save the current Reconfigurator inputs to a file + region Print information about regions region-replacement Query for information about region replacements, optionally manually triggering one sleds Print information about sleds diff --git a/nexus-config/src/nexus_config.rs b/nexus-config/src/nexus_config.rs index a8c863298e..5b7069d06f 100644 --- a/nexus-config/src/nexus_config.rs +++ b/nexus-config/src/nexus_config.rs @@ -385,6 +385,8 @@ pub struct BackgroundTaskConfig { pub v2p_mapping_propagation: V2PMappingPropagationConfig, /// configuration for abandoned VMM reaper task pub abandoned_vmm_reaper: AbandonedVmmReaperConfig, + /// configuration for lookup region port task + pub lookup_region_port: LookupRegionPortConfig, } #[serde_as] @@ -574,6 +576,14 @@ pub struct RegionReplacementDriverConfig { pub period_secs: Duration, } +#[serde_as] +#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)] +pub struct LookupRegionPortConfig { + /// period (in seconds) for periodic activations of this background task + #[serde_as(as = "DurationSeconds")] + pub period_secs: Duration, +} + /// Configuration for a nexus server #[derive(Clone, Debug, Deserialize, PartialEq, Serialize)] pub struct PackageConfig { @@ -816,6 +826,7 @@ mod test { service_firewall_propagation.period_secs = 300 v2p_mapping_propagation.period_secs = 30 abandoned_vmm_reaper.period_secs = 60 + lookup_region_port.period_secs = 60 [default_region_allocation_strategy] type = "random" seed = 0 @@ -962,7 +973,10 @@ mod test { }, abandoned_vmm_reaper: AbandonedVmmReaperConfig { period_secs: Duration::from_secs(60), - } + }, + lookup_region_port: LookupRegionPortConfig { + period_secs: Duration::from_secs(60), + }, }, default_region_allocation_strategy: crate::nexus_config::RegionAllocationStrategy::Random { @@ -1035,6 +1049,7 @@ mod test { service_firewall_propagation.period_secs = 300 v2p_mapping_propagation.period_secs = 30 abandoned_vmm_reaper.period_secs = 60 + lookup_region_port.period_secs = 60 [default_region_allocation_strategy] type = "random" "##, diff --git a/nexus/db-model/src/region.rs b/nexus/db-model/src/region.rs index 441f928405..666a7ef456 100644 --- a/nexus/db-model/src/region.rs +++ b/nexus/db-model/src/region.rs @@ -4,6 +4,7 @@ use super::ByteCount; use crate::schema::region; +use crate::SqlU16; use db_macros::Asset; use omicron_common::api::external; use serde::{Deserialize, Serialize}; @@ -38,6 +39,10 @@ pub struct Region { // never expect them to be negative. blocks_per_extent: i64, extent_count: i64, + + // The port that was returned when the region was created. This field didn't + // originally exist, so records may not have it filled in. + port: Option, } impl Region { @@ -47,6 +52,7 @@ impl Region { block_size: ByteCount, blocks_per_extent: u64, extent_count: u64, + port: u16, ) -> Self { Self { identity: RegionIdentity::new(Uuid::new_v4()), @@ -55,6 +61,7 @@ impl Region { block_size, blocks_per_extent: blocks_per_extent as i64, extent_count: extent_count as i64, + port: Some(port.into()), } } @@ -81,4 +88,7 @@ impl Region { // external, customer-supplied keys is a non-requirement. true } + pub fn port(&self) -> Option { + self.port.map(|port| port.into()) + } } diff --git a/nexus/db-model/src/schema.rs b/nexus/db-model/src/schema.rs index 990aab151c..d47c21cd1d 100644 --- a/nexus/db-model/src/schema.rs +++ b/nexus/db-model/src/schema.rs @@ -1044,6 +1044,8 @@ table! { block_size -> Int8, blocks_per_extent -> Int8, extent_count -> Int8, + + port -> Nullable, } } diff --git a/nexus/db-model/src/schema_versions.rs b/nexus/db-model/src/schema_versions.rs index 8255d684cb..3e740590c5 100644 --- a/nexus/db-model/src/schema_versions.rs +++ b/nexus/db-model/src/schema_versions.rs @@ -17,7 +17,7 @@ use std::collections::BTreeMap; /// /// This must be updated when you change the database schema. Refer to /// schema/crdb/README.adoc in the root of this repository for details. -pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(81, 0, 0); +pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(82, 0, 0); /// List of all past database schema versions, in *reverse* order /// @@ -29,6 +29,7 @@ static KNOWN_VERSIONS: Lazy> = Lazy::new(|| { // | leaving the first copy as an example for the next person. // v // KnownVersion::new(next_int, "unique-dirname-with-the-sql-files"), + KnownVersion::new(82, "region-port"), KnownVersion::new(81, "add-nullable-filesystem-pool"), KnownVersion::new(80, "add-instance-id-to-migrations"), KnownVersion::new(79, "nic-spoof-allow"), diff --git a/nexus/db-queries/src/db/datastore/region.rs b/nexus/db-queries/src/db/datastore/region.rs index ac86c6a7d3..6832665944 100644 --- a/nexus/db-queries/src/db/datastore/region.rs +++ b/nexus/db-queries/src/db/datastore/region.rs @@ -9,12 +9,18 @@ use super::RunnableQuery; use crate::context::OpContext; use crate::db; use crate::db::datastore::REGION_REDUNDANCY_THRESHOLD; +use crate::db::datastore::SQL_BATCH_SIZE; use crate::db::error::public_error_from_diesel; use crate::db::error::ErrorHandler; use crate::db::lookup::LookupPath; use crate::db::model::Dataset; use crate::db::model::PhysicalDiskPolicy; use crate::db::model::Region; +use crate::db::model::SqlU16; +use crate::db::pagination::paginated; +use crate::db::pagination::Paginator; +use crate::db::update_and_check::UpdateAndCheck; +use crate::db::update_and_check::UpdateStatus; use crate::transaction_retry::OptionalError; use async_bb8_diesel::AsyncRunQueryDsl; use diesel::prelude::*; @@ -23,8 +29,11 @@ use nexus_types::external_api::params; use omicron_common::api::external; use omicron_common::api::external::DeleteResult; use omicron_common::api::external::Error; +use omicron_common::api::external::ListResultVec; use omicron_common::api::external::LookupResult; +use omicron_common::api::external::UpdateResult; use slog::Logger; +use std::net::SocketAddrV6; use uuid::Uuid; pub enum RegionAllocationFor { @@ -434,6 +443,91 @@ impl DataStore { .await .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) } + + pub async fn region_set_port( + &self, + region_id: Uuid, + region_port: u16, + ) -> UpdateResult<()> { + use db::schema::region::dsl; + + let conn = self.pool_connection_unauthorized().await?; + + let updated = diesel::update(dsl::region) + .filter(dsl::id.eq(region_id)) + .set(dsl::port.eq(Some::(region_port.into()))) + .check_if_exists::(region_id) + .execute_and_check(&conn) + .await; + + match updated { + Ok(result) => match result.status { + UpdateStatus::Updated => Ok(()), + + UpdateStatus::NotUpdatedButExists => { + let record = result.found; + + if record.port() == Some(region_port) { + Ok(()) + } else { + Err(Error::conflict(format!( + "region {region_id} port set to {:?}", + record.port(), + ))) + } + } + }, + + Err(e) => Err(public_error_from_diesel(e, ErrorHandler::Server)), + } + } + + /// If a region's port was recorded, return its associated address, + /// otherwise return None. + pub async fn region_addr( + &self, + region_id: Uuid, + ) -> LookupResult> { + let region = self.get_region(region_id).await?; + + let Some(port) = region.port() else { + return Ok(None); + }; + + let dataset = self.dataset_get(region.dataset_id()).await?; + + Ok(Some(SocketAddrV6::new(*dataset.address().ip(), port, 0, 0))) + } + + pub async fn regions_missing_ports( + &self, + opctx: &OpContext, + ) -> ListResultVec { + opctx.check_complex_operations_allowed()?; + + let mut records = Vec::new(); + + let mut paginator = Paginator::new(SQL_BATCH_SIZE); + let conn = self.pool_connection_authorized(opctx).await?; + + while let Some(p) = paginator.next() { + use db::schema::region::dsl; + + let batch = paginated(dsl::region, dsl::id, &p.current_pagparams()) + .filter(dsl::port.is_null()) + .select(Region::as_select()) + .load_async::(&*conn) + .await + .map_err(|e| { + public_error_from_diesel(e, ErrorHandler::Server) + })?; + + paginator = p.found_batch(&batch, &|r| r.id()); + records.extend(batch); + } + + Ok(records) + } } #[cfg(test)] diff --git a/nexus/db-queries/src/db/datastore/volume.rs b/nexus/db-queries/src/db/datastore/volume.rs index f8cfd4789c..84f8e211a8 100644 --- a/nexus/db-queries/src/db/datastore/volume.rs +++ b/nexus/db-queries/src/db/datastore/volume.rs @@ -2346,6 +2346,7 @@ mod tests { 512_i64.try_into().unwrap(), 10, 10, + 10001, ); region_and_volume_ids[i].0 = region.id(); diff --git a/nexus/db-queries/src/db/queries/region_allocation.rs b/nexus/db-queries/src/db/queries/region_allocation.rs index 44a89376d4..cefca5914f 100644 --- a/nexus/db-queries/src/db/queries/region_allocation.rs +++ b/nexus/db-queries/src/db/queries/region_allocation.rs @@ -242,7 +242,8 @@ pub fn allocation_query( ").param().sql(" AS volume_id, ").param().sql(" AS block_size, ").param().sql(" AS blocks_per_extent, - ").param().sql(" AS extent_count + ").param().sql(" AS extent_count, + NULL AS port FROM shuffled_candidate_datasets") // Only select the *additional* number of candidate regions for the required // redundancy level @@ -354,7 +355,7 @@ pub fn allocation_query( .sql(" inserted_regions AS ( INSERT INTO region - (id, time_created, time_modified, dataset_id, volume_id, block_size, blocks_per_extent, extent_count) + (id, time_created, time_modified, dataset_id, volume_id, block_size, blocks_per_extent, extent_count, port) SELECT ").sql(AllColumnsOfRegion::with_prefix("candidate_regions")).sql(" FROM candidate_regions WHERE diff --git a/nexus/db-queries/tests/output/region_allocate_distinct_sleds.sql b/nexus/db-queries/tests/output/region_allocate_distinct_sleds.sql index b797e0bef7..624428c217 100644 --- a/nexus/db-queries/tests/output/region_allocate_distinct_sleds.sql +++ b/nexus/db-queries/tests/output/region_allocate_distinct_sleds.sql @@ -9,7 +9,8 @@ WITH region.volume_id, region.block_size, region.blocks_per_extent, - region.extent_count + region.extent_count, + region.port FROM region WHERE @@ -97,7 +98,8 @@ WITH $7 AS volume_id, $8 AS block_size, $9 AS blocks_per_extent, - $10 AS extent_count + $10 AS extent_count, + NULL AS port FROM shuffled_candidate_datasets LIMIT @@ -205,7 +207,8 @@ WITH volume_id, block_size, blocks_per_extent, - extent_count + extent_count, + port ) SELECT candidate_regions.id, @@ -215,7 +218,8 @@ WITH candidate_regions.volume_id, candidate_regions.block_size, candidate_regions.blocks_per_extent, - candidate_regions.extent_count + candidate_regions.extent_count, + candidate_regions.port FROM candidate_regions WHERE @@ -228,7 +232,8 @@ WITH region.volume_id, region.block_size, region.blocks_per_extent, - region.extent_count + region.extent_count, + region.port ), updated_datasets AS ( @@ -281,7 +286,8 @@ WITH old_regions.volume_id, old_regions.block_size, old_regions.blocks_per_extent, - old_regions.extent_count + old_regions.extent_count, + old_regions.port FROM old_regions INNER JOIN dataset ON old_regions.dataset_id = dataset.id ) @@ -305,7 +311,8 @@ UNION inserted_regions.volume_id, inserted_regions.block_size, inserted_regions.blocks_per_extent, - inserted_regions.extent_count + inserted_regions.extent_count, + inserted_regions.port FROM inserted_regions INNER JOIN updated_datasets ON inserted_regions.dataset_id = updated_datasets.id diff --git a/nexus/db-queries/tests/output/region_allocate_random_sleds.sql b/nexus/db-queries/tests/output/region_allocate_random_sleds.sql index 4f60ddf5fe..748e0d7b15 100644 --- a/nexus/db-queries/tests/output/region_allocate_random_sleds.sql +++ b/nexus/db-queries/tests/output/region_allocate_random_sleds.sql @@ -9,7 +9,8 @@ WITH region.volume_id, region.block_size, region.blocks_per_extent, - region.extent_count + region.extent_count, + region.port FROM region WHERE @@ -95,7 +96,8 @@ WITH $6 AS volume_id, $7 AS block_size, $8 AS blocks_per_extent, - $9 AS extent_count + $9 AS extent_count, + NULL AS port FROM shuffled_candidate_datasets LIMIT @@ -203,7 +205,8 @@ WITH volume_id, block_size, blocks_per_extent, - extent_count + extent_count, + port ) SELECT candidate_regions.id, @@ -213,7 +216,8 @@ WITH candidate_regions.volume_id, candidate_regions.block_size, candidate_regions.blocks_per_extent, - candidate_regions.extent_count + candidate_regions.extent_count, + candidate_regions.port FROM candidate_regions WHERE @@ -226,7 +230,8 @@ WITH region.volume_id, region.block_size, region.blocks_per_extent, - region.extent_count + region.extent_count, + region.port ), updated_datasets AS ( @@ -279,7 +284,8 @@ WITH old_regions.volume_id, old_regions.block_size, old_regions.blocks_per_extent, - old_regions.extent_count + old_regions.extent_count, + old_regions.port FROM old_regions INNER JOIN dataset ON old_regions.dataset_id = dataset.id ) @@ -303,7 +309,8 @@ UNION inserted_regions.volume_id, inserted_regions.block_size, inserted_regions.blocks_per_extent, - inserted_regions.extent_count + inserted_regions.extent_count, + inserted_regions.port FROM inserted_regions INNER JOIN updated_datasets ON inserted_regions.dataset_id = updated_datasets.id diff --git a/nexus/db-queries/tests/output/region_allocate_with_snapshot_distinct_sleds.sql b/nexus/db-queries/tests/output/region_allocate_with_snapshot_distinct_sleds.sql index b90b2f2adf..8faca9ed4f 100644 --- a/nexus/db-queries/tests/output/region_allocate_with_snapshot_distinct_sleds.sql +++ b/nexus/db-queries/tests/output/region_allocate_with_snapshot_distinct_sleds.sql @@ -9,7 +9,8 @@ WITH region.volume_id, region.block_size, region.blocks_per_extent, - region.extent_count + region.extent_count, + region.port FROM region WHERE @@ -108,7 +109,8 @@ WITH $8 AS volume_id, $9 AS block_size, $10 AS blocks_per_extent, - $11 AS extent_count + $11 AS extent_count, + NULL AS port FROM shuffled_candidate_datasets LIMIT @@ -216,7 +218,8 @@ WITH volume_id, block_size, blocks_per_extent, - extent_count + extent_count, + port ) SELECT candidate_regions.id, @@ -226,7 +229,8 @@ WITH candidate_regions.volume_id, candidate_regions.block_size, candidate_regions.blocks_per_extent, - candidate_regions.extent_count + candidate_regions.extent_count, + candidate_regions.port FROM candidate_regions WHERE @@ -239,7 +243,8 @@ WITH region.volume_id, region.block_size, region.blocks_per_extent, - region.extent_count + region.extent_count, + region.port ), updated_datasets AS ( @@ -292,7 +297,8 @@ WITH old_regions.volume_id, old_regions.block_size, old_regions.blocks_per_extent, - old_regions.extent_count + old_regions.extent_count, + old_regions.port FROM old_regions INNER JOIN dataset ON old_regions.dataset_id = dataset.id ) @@ -316,7 +322,8 @@ UNION inserted_regions.volume_id, inserted_regions.block_size, inserted_regions.blocks_per_extent, - inserted_regions.extent_count + inserted_regions.extent_count, + inserted_regions.port FROM inserted_regions INNER JOIN updated_datasets ON inserted_regions.dataset_id = updated_datasets.id diff --git a/nexus/db-queries/tests/output/region_allocate_with_snapshot_random_sleds.sql b/nexus/db-queries/tests/output/region_allocate_with_snapshot_random_sleds.sql index daf13f18ce..c1c03672a4 100644 --- a/nexus/db-queries/tests/output/region_allocate_with_snapshot_random_sleds.sql +++ b/nexus/db-queries/tests/output/region_allocate_with_snapshot_random_sleds.sql @@ -9,7 +9,8 @@ WITH region.volume_id, region.block_size, region.blocks_per_extent, - region.extent_count + region.extent_count, + region.port FROM region WHERE @@ -106,7 +107,8 @@ WITH $7 AS volume_id, $8 AS block_size, $9 AS blocks_per_extent, - $10 AS extent_count + $10 AS extent_count, + NULL AS port FROM shuffled_candidate_datasets LIMIT @@ -214,7 +216,8 @@ WITH volume_id, block_size, blocks_per_extent, - extent_count + extent_count, + port ) SELECT candidate_regions.id, @@ -224,7 +227,8 @@ WITH candidate_regions.volume_id, candidate_regions.block_size, candidate_regions.blocks_per_extent, - candidate_regions.extent_count + candidate_regions.extent_count, + candidate_regions.port FROM candidate_regions WHERE @@ -237,7 +241,8 @@ WITH region.volume_id, region.block_size, region.blocks_per_extent, - region.extent_count + region.extent_count, + region.port ), updated_datasets AS ( @@ -290,7 +295,8 @@ WITH old_regions.volume_id, old_regions.block_size, old_regions.blocks_per_extent, - old_regions.extent_count + old_regions.extent_count, + old_regions.port FROM old_regions INNER JOIN dataset ON old_regions.dataset_id = dataset.id ) @@ -314,7 +320,8 @@ UNION inserted_regions.volume_id, inserted_regions.block_size, inserted_regions.blocks_per_extent, - inserted_regions.extent_count + inserted_regions.extent_count, + inserted_regions.port FROM inserted_regions INNER JOIN updated_datasets ON inserted_regions.dataset_id = updated_datasets.id diff --git a/nexus/examples/config.toml b/nexus/examples/config.toml index 407f5479d5..8c1ab5ca5f 100644 --- a/nexus/examples/config.toml +++ b/nexus/examples/config.toml @@ -120,6 +120,7 @@ instance_watcher.period_secs = 30 service_firewall_propagation.period_secs = 300 v2p_mapping_propagation.period_secs = 30 abandoned_vmm_reaper.period_secs = 60 +lookup_region_port.period_secs = 60 [default_region_allocation_strategy] # allocate region on 3 random distinct zpools, on 3 random distinct sleds. diff --git a/nexus/src/app/background/init.rs b/nexus/src/app/background/init.rs index 3c66d3242b..5f420773e0 100644 --- a/nexus/src/app/background/init.rs +++ b/nexus/src/app/background/init.rs @@ -99,6 +99,7 @@ use super::tasks::dns_servers; use super::tasks::external_endpoints; use super::tasks::instance_watcher; use super::tasks::inventory_collection; +use super::tasks::lookup_region_port; use super::tasks::metrics_producer_gc; use super::tasks::nat_cleanup; use super::tasks::phantom_disks; @@ -152,6 +153,7 @@ pub struct BackgroundTasks { pub task_service_firewall_propagation: Activator, pub task_abandoned_vmm_reaper: Activator, pub task_vpc_route_manager: Activator, + pub task_lookup_region_port: Activator, // Handles to activate background tasks that do not get used by Nexus // at-large. These background tasks are implementation details as far as @@ -229,6 +231,7 @@ impl BackgroundTasksInitializer { task_service_firewall_propagation: Activator::new(), task_abandoned_vmm_reaper: Activator::new(), task_vpc_route_manager: Activator::new(), + task_lookup_region_port: Activator::new(), task_internal_dns_propagation: Activator::new(), task_external_dns_propagation: Activator::new(), @@ -288,6 +291,7 @@ impl BackgroundTasksInitializer { task_service_firewall_propagation, task_abandoned_vmm_reaper, task_vpc_route_manager, + task_lookup_region_port, // Add new background tasks here. Be sure to use this binding in a // call to `Driver::register()` below. That's what actually wires // up the Activator to the corresponding background task. @@ -640,13 +644,25 @@ impl BackgroundTasksInitializer { by their instances", period: config.abandoned_vmm_reaper.period_secs, task_impl: Box::new(abandoned_vmm_reaper::AbandonedVmmReaper::new( - datastore, + datastore.clone(), )), opctx: opctx.child(BTreeMap::new()), watchers: vec![], activator: task_abandoned_vmm_reaper, }); + driver.register(TaskDefinition { + name: "lookup_region_port", + description: "fill in missing ports for region records", + period: config.lookup_region_port.period_secs, + task_impl: Box::new(lookup_region_port::LookupRegionPort::new( + datastore, + )), + opctx: opctx.child(BTreeMap::new()), + watchers: vec![], + activator: task_lookup_region_port, + }); + driver } } diff --git a/nexus/src/app/background/tasks/lookup_region_port.rs b/nexus/src/app/background/tasks/lookup_region_port.rs new file mode 100644 index 0000000000..b0f13ac986 --- /dev/null +++ b/nexus/src/app/background/tasks/lookup_region_port.rs @@ -0,0 +1,149 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Background task that fills in the Region record's port if it is missing. +//! +//! Originally the Region model did not contain the port that the Agent selected +//! for it, and this wasn't required early on. However, that column was added, +//! and this background task is responsible for filling in Regions that don't +//! have a recorded port. + +use crate::app::background::BackgroundTask; +use anyhow::Result; +use crucible_agent_client::types::Region; +use crucible_agent_client::types::RegionId; +use crucible_agent_client::Client as CrucibleAgentClient; +use futures::future::BoxFuture; +use futures::FutureExt; +use nexus_db_queries::context::OpContext; +use nexus_db_queries::db::DataStore; +use nexus_types::internal_api::background::LookupRegionPortStatus; +use serde_json::json; +use std::net::SocketAddrV6; +use std::sync::Arc; +use uuid::Uuid; + +pub struct LookupRegionPort { + datastore: Arc, +} + +impl LookupRegionPort { + pub fn new(datastore: Arc) -> Self { + LookupRegionPort { datastore } + } +} + +async fn get_region_from_agent( + agent_address: &SocketAddrV6, + region_id: Uuid, +) -> Result { + let url = format!("http://{}", agent_address); + let client = CrucibleAgentClient::new(&url); + + let result = client.region_get(&RegionId(region_id.to_string())).await?; + + Ok(result.into_inner()) +} + +impl BackgroundTask for LookupRegionPort { + fn activate<'a>( + &'a mut self, + opctx: &'a OpContext, + ) -> BoxFuture<'a, serde_json::Value> { + async { + let log = &opctx.log; + info!(&log, "lookup region port task started"); + + let mut status = LookupRegionPortStatus::default(); + + let regions_missing_ports = + match self.datastore.regions_missing_ports(opctx).await { + Ok(regions) => regions, + + Err(e) => { + let s = format!( + "could not find regions missing ports: {e}" + ); + + error!(log, "{s}"); + status.errors.push(s); + + return json!(status); + } + }; + + for region in regions_missing_ports { + let dataset_id = region.dataset_id(); + + let dataset = match self.datastore.dataset_get(dataset_id).await + { + Ok(dataset) => dataset, + + Err(e) => { + let s = + format!("could not get dataset {dataset_id}: {e}"); + + error!(log, "{s}"); + status.errors.push(s); + + continue; + } + }; + + let returned_region = match get_region_from_agent( + &dataset.address(), + region.id(), + ) + .await + { + Ok(returned_region) => returned_region, + + Err(e) => { + let s = format!( + "could not get region {} from agent: {e}", + region.id(), + ); + + error!(log, "{s}"); + status.errors.push(s); + + continue; + } + }; + + match self + .datastore + .region_set_port(region.id(), returned_region.port_number) + .await + { + Ok(()) => { + let s = format!( + "set region {} port as {}", + region.id(), + returned_region.port_number, + ); + + info!(log, "{s}"); + status.found_port_ok.push(s); + } + + Err(e) => { + let s = format!( + "could not set region {} port: {e}", + region.id(), + ); + + error!(log, "{s}"); + status.errors.push(s); + } + } + } + + info!(&log, "lookup region port task done"); + + json!(status) + } + .boxed() + } +} diff --git a/nexus/src/app/background/tasks/mod.rs b/nexus/src/app/background/tasks/mod.rs index cb2ab46c2a..5eb44ed7c3 100644 --- a/nexus/src/app/background/tasks/mod.rs +++ b/nexus/src/app/background/tasks/mod.rs @@ -15,6 +15,7 @@ pub mod dns_servers; pub mod external_endpoints; pub mod instance_watcher; pub mod inventory_collection; +pub mod lookup_region_port; pub mod metrics_producer_gc; pub mod nat_cleanup; pub mod networking; diff --git a/nexus/src/app/background/tasks/region_replacement_driver.rs b/nexus/src/app/background/tasks/region_replacement_driver.rs index 9dd8b6055f..c3af324f44 100644 --- a/nexus/src/app/background/tasks/region_replacement_driver.rs +++ b/nexus/src/app/background/tasks/region_replacement_driver.rs @@ -350,6 +350,7 @@ mod test { 512_i64.try_into().unwrap(), 10, 10, + 27015, ) }; @@ -362,6 +363,7 @@ mod test { 512_i64.try_into().unwrap(), 10, 10, + 27016, ) }; @@ -445,6 +447,7 @@ mod test { 512_i64.try_into().unwrap(), 10, 10, + 27015, ) }; @@ -457,6 +460,7 @@ mod test { 512_i64.try_into().unwrap(), 10, 10, + 27016, ) }; @@ -590,6 +594,7 @@ mod test { 512_i64.try_into().unwrap(), 10, 10, + 27015, ) }; @@ -602,6 +607,7 @@ mod test { 512_i64.try_into().unwrap(), 10, 10, + 27016, ) }; diff --git a/nexus/src/app/crucible.rs b/nexus/src/app/crucible.rs index 9acb4ee492..caa65255e5 100644 --- a/nexus/src/app/crucible.rs +++ b/nexus/src/app/crucible.rs @@ -91,6 +91,55 @@ impl super::Nexus { Ok(!on_in_service_physical_disk) } + /// Return a region's associated address + pub async fn region_addr( + &self, + log: &Logger, + region_id: Uuid, + ) -> Result { + // If a region port was previously recorded, return the address right + // away + + if let Some(addr) = self.datastore().region_addr(region_id).await? { + return Ok(addr); + } + + // Otherwise, ask the appropriate Crucible agent + + let dataset = { + let region = self.datastore().get_region(region_id).await?; + self.datastore().dataset_get(region.dataset_id()).await? + }; + + let Some(returned_region) = + self.maybe_get_crucible_region(log, &dataset, region_id).await? + else { + // The Crucible agent didn't think the region exists? It could have + // been concurrently deleted, or otherwise garbage collected. + warn!(log, "no region for id {region_id} from Crucible Agent"); + return Err(Error::Gone); + }; + + // Record the returned port + self.datastore() + .region_set_port(region_id, returned_region.port_number) + .await?; + + // Return the address with the port that was just recorded - guard again + // against the case where the region record could have been concurrently + // deleted + match self.datastore().region_addr(region_id).await { + Ok(Some(addr)) => Ok(addr), + + Ok(None) => { + warn!(log, "region {region_id} deleted"); + Err(Error::Gone) + } + + Err(e) => Err(e), + } + } + /// Call out to Crucible agent and perform region creation. async fn ensure_region_in_dataset( &self, @@ -176,7 +225,7 @@ impl super::Nexus { ); }; - let region = backoff::retry_notify( + let returned_region = backoff::retry_notify( backoff::retry_policy_internal_service(), create_region, log_create_failure, @@ -194,7 +243,14 @@ impl super::Nexus { WaitError::Permanent(e) => e, })?; - Ok(region.into_inner()) + let returned_region = returned_region.into_inner(); + + // Record the returned port + self.datastore() + .region_set_port(region.id(), returned_region.port_number) + .await?; + + Ok(returned_region) } /// Returns a Ok(Some(Region)) if a region with id {region_id} exists, diff --git a/nexus/src/app/sagas/common_storage.rs b/nexus/src/app/sagas/common_storage.rs index eaf144eaa9..592463f5bb 100644 --- a/nexus/src/app/sagas/common_storage.rs +++ b/nexus/src/app/sagas/common_storage.rs @@ -7,9 +7,6 @@ use super::*; use crate::Nexus; -use crucible_agent_client::types::Region; -use crucible_agent_client::types::RegionId; -use crucible_agent_client::Client as CrucibleAgentClient; use crucible_pantry_client::types::VolumeConstructionRequest; use internal_dns::ServiceName; use nexus_db_queries::authz; @@ -110,40 +107,3 @@ pub(crate) async fn call_pantry_detach_for_disk( Ok(()) } - -/// GET a Region from a Crucible Agent -pub(crate) async fn get_region_from_agent( - agent_address: &SocketAddrV6, - region_id: Uuid, -) -> Result { - let url = format!("http://{}", agent_address); - let client = CrucibleAgentClient::new(&url); - - let result = client.region_get(&RegionId(region_id.to_string())).await; - - match result { - Ok(v) => Ok(v.into_inner()), - - Err(e) => match e { - crucible_agent_client::Error::ErrorResponse(rv) => { - match rv.status() { - http::StatusCode::NOT_FOUND => { - Err(Error::non_resourcetype_not_found(format!( - "{region_id} not found" - ))) - } - - status if status.is_client_error() => { - Err(Error::invalid_request(&rv.message)) - } - - _ => Err(Error::internal_error(&rv.message)), - } - } - - _ => Err(Error::internal_error( - "unexpected failure during `region_get`", - )), - }, - } -} diff --git a/nexus/src/app/sagas/region_replacement_finish.rs b/nexus/src/app/sagas/region_replacement_finish.rs index f200156ce6..a6964a8d06 100644 --- a/nexus/src/app/sagas/region_replacement_finish.rs +++ b/nexus/src/app/sagas/region_replacement_finish.rs @@ -248,6 +248,7 @@ pub(crate) mod test { 512_i64.try_into().unwrap(), 10, 10, + 12345, ) }; diff --git a/nexus/src/app/sagas/region_replacement_start.rs b/nexus/src/app/sagas/region_replacement_start.rs index be16656813..a4ba10775a 100644 --- a/nexus/src/app/sagas/region_replacement_start.rs +++ b/nexus/src/app/sagas/region_replacement_start.rs @@ -48,7 +48,6 @@ use super::{ ActionRegistry, NexusActionContext, NexusSaga, SagaInitError, ACTION_GENERATE_ID, }; -use crate::app::sagas::common_storage::get_region_from_agent; use crate::app::sagas::declare_saga_actions; use crate::app::RegionAllocationStrategy; use crate::app::{authn, db}; @@ -450,76 +449,54 @@ async fn srrs_get_old_region_address( let osagactx = sagactx.user_data(); let params = sagactx.saga_params::()?; - // It was a mistake not to record the port of a region in the Region record. - // However, we haven't needed it until now! If the Crucible agent is gone - // (which it will be if the disk is expunged), assume that the region in the - // read/write portion of the volume with the same dataset address (there - // should only be one due to the allocation strategy!) is the old region. - - let opctx = crate::context::op_context_for_saga_action( - &sagactx, - ¶ms.serialized_authn, - ); - - let db_region = osagactx - .datastore() - .get_region(params.request.old_region_id) - .await - .map_err(ActionError::action_failed)?; - - let targets = osagactx - .datastore() - .get_dataset_rw_regions_in_volume( - &opctx, - db_region.dataset_id(), - db_region.volume_id(), - ) - .await - .map_err(ActionError::action_failed)?; - - if targets.len() == 1 { - // If there's a single RW region in the volume that matches this - // region's dataset, then it must match. Return the target - Ok(targets[0]) - } else { - // Otherwise, Nexus cannot know which region to target for replacement. - // Attempt grabbing the id from the corresponding Crucible agent: the - // sled or disk may not be physically gone, or we may be running in a - // test where the allocation strategy does not mandate distinct sleds. - - let db_dataset = osagactx - .datastore() - .dataset_get(db_region.dataset_id()) - .await - .map_err(ActionError::action_failed)?; - - match get_region_from_agent( - &db_dataset.address(), - params.request.old_region_id, - ) - .await - { - Ok(region) => { - // If the Crucible agent is still answering (i.e. if a region - // replacement was requested and the sled is still there, or if - // this is running in a test), then we know the port number for - // the region. - Ok(SocketAddrV6::new( - *db_dataset.address().ip(), - region.port_number, - 0, - 0, - )) - } - - Err(e) => { - error!( - log, - "error contacting crucible agent: {e}"; - "address" => ?db_dataset.address(), - ); - - // Bail out here! + // Either retrieve the address from the database (because the port was + // previously recorded), or attempt grabbing the port from the corresponding + // Crucible agent: the sled or disk may not be physically gone, or we may be + // running in a test where the allocation strategy does not mandate distinct + // sleds. + + let maybe_addr = + osagactx.nexus().region_addr(log, params.request.old_region_id).await; + + match maybe_addr { + Ok(addr) => Ok(addr), + + Err(Error::Gone) => { + // It was a mistake not to record the port of a region in the Region + // record. However, we haven't needed it until now! If the Crucible + // agent is gone (which it will be if the disk is expunged), assume + // that the region in the read/write portion of the volume with the + // same dataset address (there should only be one due to the + // allocation strategy!) is the old region. + + let opctx = crate::context::op_context_for_saga_action( + &sagactx, + ¶ms.serialized_authn, + ); + + let db_region = osagactx + .datastore() + .get_region(params.request.old_region_id) + .await + .map_err(ActionError::action_failed)?; + + let targets = osagactx + .datastore() + .get_dataset_rw_regions_in_volume( + &opctx, + db_region.dataset_id(), + db_region.volume_id(), + ) + .await + .map_err(ActionError::action_failed)?; + + if targets.len() == 1 { + // If there's a single RW region in the volume that matches this + // region's dataset, then it must match. Return the target. + Ok(targets[0]) + } else { + // Otherwise, Nexus cannot know the region's port. Return an + // error. Err(ActionError::action_failed(format!( "{} regions match dataset {} in volume {}", targets.len(), @@ -528,6 +505,8 @@ async fn srrs_get_old_region_address( ))) } } + + Err(e) => Err(ActionError::action_failed(e)), } } @@ -945,6 +924,7 @@ pub(crate) mod test { 512_i64.try_into().unwrap(), 10, 10, + 1001, ), Region::new( datasets[1].id(), @@ -952,6 +932,7 @@ pub(crate) mod test { 512_i64.try_into().unwrap(), 10, 10, + 1002, ), Region::new( datasets[2].id(), @@ -959,6 +940,7 @@ pub(crate) mod test { 512_i64.try_into().unwrap(), 10, 10, + 1003, ), Region::new( datasets[3].id(), @@ -966,6 +948,7 @@ pub(crate) mod test { 512_i64.try_into().unwrap(), 10, 10, + 1004, ), ]; diff --git a/nexus/tests/config.test.toml b/nexus/tests/config.test.toml index f90a035de6..8415a192b1 100644 --- a/nexus/tests/config.test.toml +++ b/nexus/tests/config.test.toml @@ -119,6 +119,7 @@ instance_watcher.period_secs = 30 service_firewall_propagation.period_secs = 300 v2p_mapping_propagation.period_secs = 30 abandoned_vmm_reaper.period_secs = 60 +lookup_region_port.period_secs = 60 [default_region_allocation_strategy] # we only have one sled in the test environment, so we need to use the diff --git a/nexus/types/src/internal_api/background.rs b/nexus/types/src/internal_api/background.rs index be4c2ec9c0..6463aa8ab6 100644 --- a/nexus/types/src/internal_api/background.rs +++ b/nexus/types/src/internal_api/background.rs @@ -12,3 +12,10 @@ pub struct RegionReplacementDriverStatus { pub finish_invoked_ok: Vec, pub errors: Vec, } + +/// The status of a `lookup_region_port` background task activation +#[derive(Serialize, Deserialize, Default)] +pub struct LookupRegionPortStatus { + pub found_port_ok: Vec, + pub errors: Vec, +} diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index 57f6838a55..7d93a5d5bd 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -574,7 +574,9 @@ CREATE TABLE IF NOT EXISTS omicron.public.region ( /* Metadata describing the region */ block_size INT NOT NULL, blocks_per_extent INT NOT NULL, - extent_count INT NOT NULL + extent_count INT NOT NULL, + + port INT4 ); /* @@ -593,6 +595,10 @@ CREATE UNIQUE INDEX IF NOT EXISTS lookup_region_by_dataset on omicron.public.reg id ); +CREATE INDEX IF NOT EXISTS lookup_regions_missing_ports + on omicron.public.region (id) + WHERE port IS NULL; + /* * A snapshot of a region, within a dataset. */ @@ -4137,7 +4143,7 @@ INSERT INTO omicron.public.db_metadata ( version, target_version ) VALUES - (TRUE, NOW(), NOW(), '81.0.0', NULL) + (TRUE, NOW(), NOW(), '82.0.0', NULL) ON CONFLICT DO NOTHING; COMMIT; diff --git a/schema/crdb/region-port/up01.sql b/schema/crdb/region-port/up01.sql new file mode 100644 index 0000000000..9605cf8a65 --- /dev/null +++ b/schema/crdb/region-port/up01.sql @@ -0,0 +1,2 @@ +ALTER TABLE omicron.public.region + ADD COLUMN IF NOT EXISTS port INT4 DEFAULT NULL; diff --git a/schema/crdb/region-port/up02.sql b/schema/crdb/region-port/up02.sql new file mode 100644 index 0000000000..12cb566e27 --- /dev/null +++ b/schema/crdb/region-port/up02.sql @@ -0,0 +1,2 @@ +ALTER TABLE omicron.public.region + ALTER COLUMN port DROP DEFAULT; diff --git a/schema/crdb/region-port/up03.sql b/schema/crdb/region-port/up03.sql new file mode 100644 index 0000000000..4247a88696 --- /dev/null +++ b/schema/crdb/region-port/up03.sql @@ -0,0 +1,3 @@ +CREATE INDEX IF NOT EXISTS lookup_regions_missing_ports + on omicron.public.region (id) + WHERE port IS NULL; diff --git a/smf/nexus/multi-sled/config-partial.toml b/smf/nexus/multi-sled/config-partial.toml index e63eb411c3..92d3d6e392 100644 --- a/smf/nexus/multi-sled/config-partial.toml +++ b/smf/nexus/multi-sled/config-partial.toml @@ -61,6 +61,7 @@ service_firewall_propagation.period_secs = 300 v2p_mapping_propagation.period_secs = 30 instance_watcher.period_secs = 30 abandoned_vmm_reaper.period_secs = 60 +lookup_region_port.period_secs = 60 [default_region_allocation_strategy] # by default, allocate across 3 distinct sleds diff --git a/smf/nexus/single-sled/config-partial.toml b/smf/nexus/single-sled/config-partial.toml index ced1da17b3..8de9b6cb79 100644 --- a/smf/nexus/single-sled/config-partial.toml +++ b/smf/nexus/single-sled/config-partial.toml @@ -61,6 +61,7 @@ service_firewall_propagation.period_secs = 300 v2p_mapping_propagation.period_secs = 30 instance_watcher.period_secs = 30 abandoned_vmm_reaper.period_secs = 60 +lookup_region_port.period_secs = 60 [default_region_allocation_strategy] # by default, allocate without requirement for distinct sleds.