diff --git a/dev-tools/omdb/src/bin/omdb/db.rs b/dev-tools/omdb/src/bin/omdb/db.rs index 44b34b0220..98669ddc06 100644 --- a/dev-tools/omdb/src/bin/omdb/db.rs +++ b/dev-tools/omdb/src/bin/omdb/db.rs @@ -2879,7 +2879,12 @@ async fn cmd_db_validate_region_snapshots( use crucible_agent_client::types::State; use crucible_agent_client::Client as CrucibleAgentClient; - let url = format!("http://{}", dataset.address()); + let Some(dataset_addr) = dataset.address() else { + eprintln!("Dataset {} missing an IP address", dataset.id()); + continue; + }; + + let url = format!("http://{}", dataset_addr); let client = CrucibleAgentClient::new(&url); let actual_region_snapshots = client @@ -2940,7 +2945,7 @@ async fn cmd_db_validate_region_snapshots( dataset_id: region_snapshot.dataset_id, region_id: region_snapshot.region_id, snapshot_id: region_snapshot.snapshot_id, - dataset_addr: dataset.address(), + dataset_addr, error: String::from( "region snapshot was deleted, please remove its record", ), @@ -2955,7 +2960,7 @@ async fn cmd_db_validate_region_snapshots( dataset_id: region_snapshot.dataset_id, region_id: region_snapshot.region_id, snapshot_id: region_snapshot.snapshot_id, - dataset_addr: dataset.address(), + dataset_addr, error: String::from( "NEXUS BUG: region snapshot was deleted, but the higher level snapshot was not!", ), @@ -2984,7 +2989,7 @@ async fn cmd_db_validate_region_snapshots( dataset_id: region_snapshot.dataset_id, region_id: region_snapshot.region_id, snapshot_id: region_snapshot.snapshot_id, - dataset_addr: dataset.address(), + dataset_addr, error: format!( "AGENT BUG: region snapshot was deleted but has a running snapshot in state {:?}!", running_snapshot.state, @@ -3034,7 +3039,12 @@ async fn cmd_db_validate_region_snapshots( use crucible_agent_client::types::State; use crucible_agent_client::Client as CrucibleAgentClient; - let url = format!("http://{}", dataset.address()); + let Some(dataset_addr) = dataset.address() else { + eprintln!("Dataset {} missing an IP address", dataset.id()); + continue; + }; + + let url = format!("http://{}", dataset_addr); let client = CrucibleAgentClient::new(&url); let actual_region_snapshots = client @@ -3052,7 +3062,7 @@ async fn cmd_db_validate_region_snapshots( dataset_id: dataset.id(), region_id: region.id(), snapshot_id, - dataset_addr: dataset.address(), + dataset_addr, error: String::from( "Nexus does not know about this snapshot!", ), @@ -3077,7 +3087,7 @@ async fn cmd_db_validate_region_snapshots( dataset_id: dataset.id(), region_id: region.id(), snapshot_id, - dataset_addr: dataset.address(), + dataset_addr, error: String::from( "Nexus does not know about this running snapshot!" ), diff --git a/nexus/db-model/src/dataset.rs b/nexus/db-model/src/dataset.rs index 65c0070509..a9dee990b9 100644 --- a/nexus/db-model/src/dataset.rs +++ b/nexus/db-model/src/dataset.rs @@ -36,8 +36,8 @@ pub struct Dataset { pub pool_id: Uuid, - ip: ipv6::Ipv6Addr, - port: SqlU16, + ip: Option, + port: Option, pub kind: DatasetKind, pub size_used: Option, @@ -47,7 +47,7 @@ impl Dataset { pub fn new( id: Uuid, pool_id: Uuid, - addr: SocketAddrV6, + addr: Option, kind: DatasetKind, ) -> Self { let size_used = match kind { @@ -59,19 +59,19 @@ impl Dataset { time_deleted: None, rcgen: Generation::new(), pool_id, - ip: addr.ip().into(), - port: addr.port().into(), + ip: addr.map(|addr| addr.ip().into()), + port: addr.map(|addr| addr.port().into()), kind, size_used, } } - pub fn address(&self) -> SocketAddrV6 { - self.address_with_port(self.port.into()) + pub fn address(&self) -> Option { + self.address_with_port(self.port?.into()) } - pub fn address_with_port(&self, port: u16) -> SocketAddrV6 { - SocketAddrV6::new(Ipv6Addr::from(self.ip), port, 0, 0) + pub fn address_with_port(&self, port: u16) -> Option { + Some(SocketAddrV6::new(Ipv6Addr::from(self.ip?), port, 0, 0)) } } diff --git a/nexus/db-model/src/schema.rs b/nexus/db-model/src/schema.rs index 89ae6c18c5..dc57de9263 100644 --- a/nexus/db-model/src/schema.rs +++ b/nexus/db-model/src/schema.rs @@ -1022,8 +1022,8 @@ table! { pool_id -> Uuid, - ip -> Inet, - port -> Int4, + ip -> Nullable, + port -> Nullable, kind -> crate::DatasetKindEnum, size_used -> Nullable, diff --git a/nexus/db-model/src/schema_versions.rs b/nexus/db-model/src/schema_versions.rs index 3e740590c5..cc34a3581c 100644 --- a/nexus/db-model/src/schema_versions.rs +++ b/nexus/db-model/src/schema_versions.rs @@ -17,7 +17,7 @@ use std::collections::BTreeMap; /// /// This must be updated when you change the database schema. Refer to /// schema/crdb/README.adoc in the root of this repository for details. -pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(82, 0, 0); +pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(83, 0, 0); /// List of all past database schema versions, in *reverse* order /// @@ -29,6 +29,7 @@ static KNOWN_VERSIONS: Lazy> = Lazy::new(|| { // | leaving the first copy as an example for the next person. // v // KnownVersion::new(next_int, "unique-dirname-with-the-sql-files"), + KnownVersion::new(83, "dataset-address-optional"), KnownVersion::new(82, "region-port"), KnownVersion::new(81, "add-nullable-filesystem-pool"), KnownVersion::new(80, "add-instance-id-to-migrations"), diff --git a/nexus/db-queries/src/db/datastore/dataset.rs b/nexus/db-queries/src/db/datastore/dataset.rs index 3f1df24e45..a08e346fe8 100644 --- a/nexus/db-queries/src/db/datastore/dataset.rs +++ b/nexus/db-queries/src/db/datastore/dataset.rs @@ -290,7 +290,7 @@ mod test { .dataset_insert_if_not_exists(Dataset::new( Uuid::new_v4(), zpool_id, - "[::1]:0".parse().unwrap(), + Some("[::1]:0".parse().unwrap()), DatasetKind::Crucible, )) .await @@ -323,7 +323,7 @@ mod test { .dataset_insert_if_not_exists(Dataset::new( dataset1.id(), zpool_id, - "[::1]:12345".parse().unwrap(), + Some("[::1]:12345".parse().unwrap()), DatasetKind::Cockroach, )) .await @@ -339,7 +339,7 @@ mod test { .dataset_upsert(Dataset::new( Uuid::new_v4(), zpool_id, - "[::1]:0".parse().unwrap(), + Some("[::1]:0".parse().unwrap()), DatasetKind::Cockroach, )) .await @@ -371,7 +371,7 @@ mod test { .dataset_insert_if_not_exists(Dataset::new( dataset1.id(), zpool_id, - "[::1]:12345".parse().unwrap(), + Some("[::1]:12345".parse().unwrap()), DatasetKind::Cockroach, )) .await diff --git a/nexus/db-queries/src/db/datastore/mod.rs b/nexus/db-queries/src/db/datastore/mod.rs index 461e71d88a..07b98c0542 100644 --- a/nexus/db-queries/src/db/datastore/mod.rs +++ b/nexus/db-queries/src/db/datastore/mod.rs @@ -892,7 +892,8 @@ mod test { .collect() .await; - let bogus_addr = SocketAddrV6::new(Ipv6Addr::LOCALHOST, 8080, 0, 0); + let bogus_addr = + Some(SocketAddrV6::new(Ipv6Addr::LOCALHOST, 8080, 0, 0)); let datasets = stream::iter(zpools) .map(|zpool| { @@ -1266,7 +1267,8 @@ mod test { .collect() .await; - let bogus_addr = SocketAddrV6::new(Ipv6Addr::LOCALHOST, 8080, 0, 0); + let bogus_addr = + Some(SocketAddrV6::new(Ipv6Addr::LOCALHOST, 8080, 0, 0)); // 1 dataset per zpool stream::iter(zpool_ids.clone()) @@ -1365,7 +1367,8 @@ mod test { .collect() .await; - let bogus_addr = SocketAddrV6::new(Ipv6Addr::LOCALHOST, 8080, 0, 0); + let bogus_addr = + Some(SocketAddrV6::new(Ipv6Addr::LOCALHOST, 8080, 0, 0)); // 1 dataset per zpool stream::iter(zpool_ids) @@ -1444,7 +1447,8 @@ mod test { physical_disk_id, ) .await; - let bogus_addr = SocketAddrV6::new(Ipv6Addr::LOCALHOST, 8080, 0, 0); + let bogus_addr = + Some(SocketAddrV6::new(Ipv6Addr::LOCALHOST, 8080, 0, 0)); let dataset = Dataset::new( Uuid::new_v4(), zpool_id, diff --git a/nexus/db-queries/src/db/datastore/region.rs b/nexus/db-queries/src/db/datastore/region.rs index 6832665944..3b1c20c1df 100644 --- a/nexus/db-queries/src/db/datastore/region.rs +++ b/nexus/db-queries/src/db/datastore/region.rs @@ -496,7 +496,13 @@ impl DataStore { let dataset = self.dataset_get(region.dataset_id()).await?; - Ok(Some(SocketAddrV6::new(*dataset.address().ip(), port, 0, 0))) + let Some(address) = dataset.address() else { + return Err(Error::internal_error( + "Dataset for Crucible region does know IP address", + )); + }; + + Ok(Some(SocketAddrV6::new(*address.ip(), port, 0, 0))) } pub async fn regions_missing_ports( diff --git a/nexus/db-queries/src/db/datastore/volume.rs b/nexus/db-queries/src/db/datastore/volume.rs index 84f8e211a8..b13006aa95 100644 --- a/nexus/db-queries/src/db/datastore/volume.rs +++ b/nexus/db-queries/src/db/datastore/volume.rs @@ -1164,12 +1164,14 @@ impl DataStore { let mut targets: Vec = vec![]; - find_matching_rw_regions_in_volume( - &vcr, - dataset.address().ip(), - &mut targets, - ) - .map_err(|e| Error::internal_error(&e.to_string()))?; + let Some(address) = dataset.address() else { + return Err(Error::internal_error( + "Crucible Dataset missing IP address", + )); + }; + + find_matching_rw_regions_in_volume(&vcr, address.ip(), &mut targets) + .map_err(|e| Error::internal_error(&e.to_string()))?; Ok(targets) } diff --git a/nexus/reconfigurator/execution/src/datasets.rs b/nexus/reconfigurator/execution/src/datasets.rs index 51ac45c9df..139c94c53f 100644 --- a/nexus/reconfigurator/execution/src/datasets.rs +++ b/nexus/reconfigurator/execution/src/datasets.rs @@ -66,7 +66,7 @@ pub(crate) async fn ensure_dataset_records_exist( let dataset = Dataset::new( id.into_untyped_uuid(), pool_id.into_untyped_uuid(), - address, + Some(address), kind.into(), ); let maybe_inserted = datastore diff --git a/nexus/src/app/background/tasks/lookup_region_port.rs b/nexus/src/app/background/tasks/lookup_region_port.rs index b0f13ac986..fbfc5c5af2 100644 --- a/nexus/src/app/background/tasks/lookup_region_port.rs +++ b/nexus/src/app/background/tasks/lookup_region_port.rs @@ -91,26 +91,33 @@ impl BackgroundTask for LookupRegionPort { } }; - let returned_region = match get_region_from_agent( - &dataset.address(), - region.id(), - ) - .await - { - Ok(returned_region) => returned_region, + let Some(dataset_addr) = dataset.address() else { + let s = format!( + "Missing dataset address for dataset: {dataset_id}" + ); + error!(log, "{s}"); + status.errors.push(s); + continue; + }; - Err(e) => { - let s = format!( - "could not get region {} from agent: {e}", - region.id(), - ); + let returned_region = + match get_region_from_agent(&dataset_addr, region.id()) + .await + { + Ok(returned_region) => returned_region, - error!(log, "{s}"); - status.errors.push(s); + Err(e) => { + let s = format!( + "could not get region {} from agent: {e}", + region.id(), + ); - continue; - } - }; + error!(log, "{s}"); + status.errors.push(s); + + continue; + } + }; match self .datastore diff --git a/nexus/src/app/crucible.rs b/nexus/src/app/crucible.rs index caa65255e5..72a5c80baf 100644 --- a/nexus/src/app/crucible.rs +++ b/nexus/src/app/crucible.rs @@ -69,11 +69,17 @@ impl super::Nexus { fn crucible_agent_client_for_dataset( &self, dataset: &db::model::Dataset, - ) -> CrucibleAgentClient { - CrucibleAgentClient::new_with_client( - &format!("http://{}", dataset.address()), + ) -> Result { + let Some(addr) = dataset.address() else { + return Err(Error::internal_error( + "Missing crucible dataset address", + )); + }; + + Ok(CrucibleAgentClient::new_with_client( + &format!("http://{}", addr), self.reqwest_client.clone(), - ) + )) } /// Return if the Crucible agent is expected to be there and answer Nexus: @@ -147,7 +153,7 @@ impl super::Nexus { dataset: &db::model::Dataset, region: &db::model::Region, ) -> Result { - let client = self.crucible_agent_client_for_dataset(dataset); + let client = self.crucible_agent_client_for_dataset(dataset)?; let dataset_id = dataset.id(); let Ok(extent_count) = u32::try_from(region.extent_count()) else { @@ -261,7 +267,7 @@ impl super::Nexus { dataset: &db::model::Dataset, region_id: Uuid, ) -> Result, Error> { - let client = self.crucible_agent_client_for_dataset(dataset); + let client = self.crucible_agent_client_for_dataset(dataset)?; let dataset_id = dataset.id(); let result = ProgenitorOperationRetry::new( @@ -303,7 +309,7 @@ impl super::Nexus { dataset: &db::model::Dataset, region_id: Uuid, ) -> Result { - let client = self.crucible_agent_client_for_dataset(dataset); + let client = self.crucible_agent_client_for_dataset(dataset)?; let dataset_id = dataset.id(); let result = ProgenitorOperationRetry::new( @@ -343,7 +349,7 @@ impl super::Nexus { dataset: &db::model::Dataset, region_id: Uuid, ) -> Result<(), Error> { - let client = self.crucible_agent_client_for_dataset(dataset); + let client = self.crucible_agent_client_for_dataset(dataset)?; let dataset_id = dataset.id(); let result = ProgenitorOperationRetry::new( @@ -386,7 +392,7 @@ impl super::Nexus { region_id: Uuid, snapshot_id: Uuid, ) -> Result<(), Error> { - let client = self.crucible_agent_client_for_dataset(dataset); + let client = self.crucible_agent_client_for_dataset(dataset)?; let dataset_id = dataset.id(); let result = ProgenitorOperationRetry::new( @@ -435,7 +441,7 @@ impl super::Nexus { region_id: Uuid, snapshot_id: Uuid, ) -> Result<(), Error> { - let client = self.crucible_agent_client_for_dataset(dataset); + let client = self.crucible_agent_client_for_dataset(dataset)?; let dataset_id = dataset.id(); let result = ProgenitorOperationRetry::new( diff --git a/nexus/src/app/rack.rs b/nexus/src/app/rack.rs index 422540c0b8..13b30fd47a 100644 --- a/nexus/src/app/rack.rs +++ b/nexus/src/app/rack.rs @@ -145,7 +145,7 @@ impl super::Nexus { db::model::Dataset::new( dataset.dataset_id, dataset.zpool_id, - dataset.request.address, + Some(dataset.request.address), dataset.request.kind.into(), ) }) diff --git a/nexus/src/app/sagas/disk_create.rs b/nexus/src/app/sagas/disk_create.rs index bdccd7f79b..c350534617 100644 --- a/nexus/src/app/sagas/disk_create.rs +++ b/nexus/src/app/sagas/disk_create.rs @@ -498,9 +498,17 @@ async fn sdc_regions_ensure( .map(|(dataset, region)| { dataset .address_with_port(region.port_number) - .to_string() + .ok_or_else(|| { + ActionError::action_failed( + Error::internal_error(&format!( + "missing IP address for dataset {}", + dataset.id(), + )), + ) + }) + .map(|addr| addr.to_string()) }) - .collect(), + .collect::, ActionError>>()?, lossy: false, flush_timeout: None, diff --git a/nexus/src/app/sagas/region_replacement_start.rs b/nexus/src/app/sagas/region_replacement_start.rs index a4ba10775a..1297158b24 100644 --- a/nexus/src/app/sagas/region_replacement_start.rs +++ b/nexus/src/app/sagas/region_replacement_start.rs @@ -534,12 +534,13 @@ async fn srrs_replace_region_in_volume( "ensured_dataset_and_region", )?; - let new_region_address = SocketAddrV6::new( - *new_dataset.address().ip(), - ensured_region.port_number, - 0, - 0, - ); + let Some(new_address) = new_dataset.address() else { + return Err(ActionError::action_failed(Error::internal_error( + "Dataset missing IP address", + ))); + }; + let new_region_address = + SocketAddrV6::new(*new_address.ip(), ensured_region.port_number, 0, 0); // If this node is rerun, the forward action will have overwritten // db_region's volume id, so get the cached copy. @@ -611,12 +612,11 @@ async fn srrs_replace_region_in_volume_undo( "ensured_dataset_and_region", )?; - let new_region_address = SocketAddrV6::new( - *new_dataset.address().ip(), - ensured_region.port_number, - 0, - 0, - ); + let Some(new_address) = new_dataset.address() else { + anyhow::bail!("Dataset missing IP address"); + }; + let new_region_address = + SocketAddrV6::new(*new_address.ip(), ensured_region.port_number, 0, 0); // The forward action will have overwritten db_region's volume id, so get // the cached copy. @@ -894,25 +894,25 @@ pub(crate) mod test { Dataset::new( Uuid::new_v4(), Uuid::new_v4(), - "[fd00:1122:3344:101::1]:12345".parse().unwrap(), + Some("[fd00:1122:3344:101::1]:12345".parse().unwrap()), DatasetKind::Crucible, ), Dataset::new( Uuid::new_v4(), Uuid::new_v4(), - "[fd00:1122:3344:102::1]:12345".parse().unwrap(), + Some("[fd00:1122:3344:102::1]:12345".parse().unwrap()), DatasetKind::Crucible, ), Dataset::new( Uuid::new_v4(), Uuid::new_v4(), - "[fd00:1122:3344:103::1]:12345".parse().unwrap(), + Some("[fd00:1122:3344:103::1]:12345".parse().unwrap()), DatasetKind::Crucible, ), Dataset::new( Uuid::new_v4(), Uuid::new_v4(), - "[fd00:1122:3344:104::1]:12345".parse().unwrap(), + Some("[fd00:1122:3344:104::1]:12345".parse().unwrap()), DatasetKind::Crucible, ), ]; diff --git a/nexus/src/app/sagas/snapshot_create.rs b/nexus/src/app/sagas/snapshot_create.rs index 9e665a1de1..5a8313229a 100644 --- a/nexus/src/app/sagas/snapshot_create.rs +++ b/nexus/src/app/sagas/snapshot_create.rs @@ -411,9 +411,17 @@ async fn ssc_regions_ensure( .map(|(dataset, region)| { dataset .address_with_port(region.port_number) - .to_string() + .ok_or_else(|| { + ActionError::action_failed( + Error::internal_error(&format!( + "missing IP address for dataset {}", + dataset.id(), + )), + ) + }) + .map(|addr| addr.to_string()) }) - .collect(), + .collect::, ActionError>>()?, lossy: false, flush_timeout: None, @@ -1232,8 +1240,14 @@ async fn ssc_start_running_snapshot( let mut map: BTreeMap = BTreeMap::new(); for (dataset, region) in datasets_and_regions { + let Some(dataset_addr) = dataset.address() else { + return Err(ActionError::action_failed(Error::internal_error( + &format!("Missing IP address for dataset {}", dataset.id(),), + ))); + }; + // Create a Crucible agent client - let url = format!("http://{}", dataset.address()); + let url = format!("http://{}", dataset_addr); let client = CrucibleAgentClient::new(&url); info!( @@ -1299,11 +1313,21 @@ async fn ssc_start_running_snapshot( // Map from the region to the snapshot let region_addr = format!( "{}", - dataset.address_with_port(crucible_region.port_number) + SocketAddrV6::new( + *dataset_addr.ip(), + crucible_region.port_number, + 0, + 0 + ) ); let snapshot_addr = format!( "{}", - dataset.address_with_port(crucible_running_snapshot.port_number) + SocketAddrV6::new( + *dataset_addr.ip(), + crucible_running_snapshot.port_number, + 0, + 0 + ) ); info!(log, "map {} to {}", region_addr, snapshot_addr); map.insert(region_addr, snapshot_addr.clone()); diff --git a/nexus/src/app/sled.rs b/nexus/src/app/sled.rs index 0165b2d261..6e21470368 100644 --- a/nexus/src/app/sled.rs +++ b/nexus/src/app/sled.rs @@ -306,7 +306,8 @@ impl super::Nexus { "dataset_id" => id.to_string(), "address" => address.to_string() ); - let dataset = db::model::Dataset::new(id, zpool_id, address, kind); + let dataset = + db::model::Dataset::new(id, zpool_id, Some(address), kind); self.db_datastore.dataset_upsert(dataset).await?; Ok(()) } diff --git a/schema/crdb/dataset-address-optional/up01.sql b/schema/crdb/dataset-address-optional/up01.sql new file mode 100644 index 0000000000..e29215251d --- /dev/null +++ b/schema/crdb/dataset-address-optional/up01.sql @@ -0,0 +1 @@ +ALTER TABLE omicron.public.dataset ALTER COLUMN ip DROP NOT NULL; diff --git a/schema/crdb/dataset-address-optional/up02.sql b/schema/crdb/dataset-address-optional/up02.sql new file mode 100644 index 0000000000..997294fa12 --- /dev/null +++ b/schema/crdb/dataset-address-optional/up02.sql @@ -0,0 +1 @@ +ALTER TABLE omicron.public.dataset ALTER COLUMN port DROP NOT NULL; diff --git a/schema/crdb/dataset-address-optional/up03.sql b/schema/crdb/dataset-address-optional/up03.sql new file mode 100644 index 0000000000..0af212e320 --- /dev/null +++ b/schema/crdb/dataset-address-optional/up03.sql @@ -0,0 +1,4 @@ +ALTER TABLE omicron.public.dataset ADD CONSTRAINT IF NOT EXISTS ip_and_port_set_for_crucible CHECK ( + (kind != 'crucible') OR + (kind = 'crucible' AND ip IS NOT NULL and port IS NOT NULL) +); diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index a40e148683..7fc83ad5d0 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -525,8 +525,8 @@ CREATE TABLE IF NOT EXISTS omicron.public.dataset ( pool_id UUID NOT NULL, /* Contact information for the dataset */ - ip INET NOT NULL, - port INT4 CHECK (port BETWEEN 0 AND 65535) NOT NULL, + ip INET, + port INT4 CHECK (port BETWEEN 0 AND 65535), kind omicron.public.dataset_kind NOT NULL, @@ -537,6 +537,11 @@ CREATE TABLE IF NOT EXISTS omicron.public.dataset ( CONSTRAINT size_used_column_set_for_crucible CHECK ( (kind != 'crucible') OR (kind = 'crucible' AND size_used IS NOT NULL) + ), + + CONSTRAINT ip_and_port_set_for_crucible CHECK ( + (kind != 'crucible') OR + (kind = 'crucible' AND ip IS NOT NULL and port IS NOT NULL) ) ); @@ -4140,7 +4145,7 @@ INSERT INTO omicron.public.db_metadata ( version, target_version ) VALUES - (TRUE, NOW(), NOW(), '82.0.0', NULL) + (TRUE, NOW(), NOW(), '83.0.0', NULL) ON CONFLICT DO NOTHING; COMMIT;