From 53d8d3a6c3e250b6ad4b4c4072b15c7d73917548 Mon Sep 17 00:00:00 2001 From: bnaecker Date: Tue, 26 Mar 2024 12:58:22 -0700 Subject: [PATCH 001/334] Periodically refresh a collector's list of assigned producers (#5326) - Add an endpoint in Nexus's internal API for listing the assigned producers for a collector. - Spawn a task in the `oximeter-collector` which will periodically fetch the list; remove any producers not in that list; and ensure any that are. - Adds the time of this last refresh to the `oximeter-collector` server's API for fetching info about the collector - Remove old use of `reqwest` directly to register collector, opt-in to the generated Nexus client. - Adds some type conversions in the nexus client crate to simplify these new interfaces --- clients/nexus-client/src/lib.rs | 32 +++ dev-tools/omdb/src/bin/omdb/oximeter.rs | 5 + nexus/db-model/src/producer_endpoint.rs | 15 ++ nexus/db-queries/src/db/datastore/oximeter.rs | 19 +- nexus/src/app/instance.rs | 2 +- nexus/src/app/oximeter.rs | 97 +++----- nexus/src/internal_api/http_entrypoints.rs | 63 ++++- nexus/test-utils/src/lib.rs | 1 + nexus/tests/integration_tests/oximeter.rs | 92 ------- openapi/nexus-internal.json | 87 +++++++ openapi/oximeter.json | 6 + oximeter/collector/Cargo.toml | 2 +- oximeter/collector/src/agent.rs | 226 +++++++++++++++++- oximeter/collector/src/http_entrypoints.rs | 8 +- oximeter/collector/src/lib.rs | 53 +++- 15 files changed, 509 insertions(+), 199 deletions(-) diff --git a/clients/nexus-client/src/lib.rs b/clients/nexus-client/src/lib.rs index ad8269e675..0a1a569f42 100644 --- a/clients/nexus-client/src/lib.rs +++ b/clients/nexus-client/src/lib.rs @@ -382,3 +382,35 @@ impl From } } } + +impl From + for omicron_common::api::internal::nexus::ProducerKind +{ + fn from(kind: types::ProducerKind) -> Self { + use omicron_common::api::internal::nexus::ProducerKind; + match kind { + types::ProducerKind::SledAgent => ProducerKind::SledAgent, + types::ProducerKind::Instance => ProducerKind::Instance, + types::ProducerKind::Service => ProducerKind::Service, + } + } +} + +impl TryFrom + for omicron_common::api::internal::nexus::ProducerEndpoint +{ + type Error = String; + + fn try_from(ep: types::ProducerEndpoint) -> Result { + let Ok(address) = ep.address.parse() else { + return Err(format!("Invalid IP address: {}", ep.address)); + }; + Ok(Self { + id: ep.id, + kind: ep.kind.into(), + address, + base_route: ep.base_route, + interval: ep.interval.into(), + }) + } +} diff --git a/dev-tools/omdb/src/bin/omdb/oximeter.rs b/dev-tools/omdb/src/bin/omdb/oximeter.rs index e0f20556a2..29491bb083 100644 --- a/dev-tools/omdb/src/bin/omdb/oximeter.rs +++ b/dev-tools/omdb/src/bin/omdb/oximeter.rs @@ -67,6 +67,11 @@ impl OximeterArgs { .with(tabled::settings::Padding::new(0, 1, 0, 0)) .to_string(); println!("Collector ID: {}\n", info.id); + let last_refresh = info + .last_refresh + .map(|r| r.to_string()) + .unwrap_or(String::from("Never")); + println!("Last refresh: {}\n", last_refresh); println!("{table}"); Ok(()) } diff --git a/nexus/db-model/src/producer_endpoint.rs b/nexus/db-model/src/producer_endpoint.rs index 55533690f1..1a38781ce5 100644 --- a/nexus/db-model/src/producer_endpoint.rs +++ b/nexus/db-model/src/producer_endpoint.rs @@ -2,6 +2,9 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. +use std::net::SocketAddr; +use std::time::Duration; + use super::SqlU16; use crate::impl_enum_type; use crate::schema::metric_producer; @@ -44,6 +47,18 @@ impl From for internal::nexus::ProducerKind { } } +impl From for internal::nexus::ProducerEndpoint { + fn from(ep: ProducerEndpoint) -> Self { + internal::nexus::ProducerEndpoint { + id: ep.id(), + kind: ep.kind.into(), + address: SocketAddr::new(ep.ip.ip(), *ep.port), + base_route: ep.base_route.clone(), + interval: Duration::from_secs_f64(ep.interval), + } + } +} + /// Information announced by a metric server, used so that clients can contact it and collect /// available metric data from it. #[derive(Queryable, Insertable, Debug, Clone, Selectable, Asset)] diff --git a/nexus/db-queries/src/db/datastore/oximeter.rs b/nexus/db-queries/src/db/datastore/oximeter.rs index 116e8586b0..55e8e0f5f6 100644 --- a/nexus/db-queries/src/db/datastore/oximeter.rs +++ b/nexus/db-queries/src/db/datastore/oximeter.rs @@ -5,6 +5,7 @@ //! [`DataStore`] methods related to Oximeter. use super::DataStore; +use crate::context::OpContext; use crate::db; use crate::db::error::public_error_from_diesel; use crate::db::error::ErrorHandler; @@ -24,12 +25,13 @@ impl DataStore { /// Lookup an oximeter instance by its ID. pub async fn oximeter_lookup( &self, + opctx: &OpContext, id: &Uuid, ) -> Result { use db::schema::oximeter::dsl; dsl::oximeter .find(*id) - .first_async(&*self.pool_connection_unauthorized().await?) + .first_async(&*self.pool_connection_authorized(opctx).await?) .await .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) } @@ -37,6 +39,7 @@ impl DataStore { /// Create a record for a new Oximeter instance pub async fn oximeter_create( &self, + opctx: &OpContext, info: &OximeterInfo, ) -> Result<(), Error> { use db::schema::oximeter::dsl; @@ -54,7 +57,7 @@ impl DataStore { dsl::ip.eq(info.ip), dsl::port.eq(info.port), )) - .execute_async(&*self.pool_connection_unauthorized().await?) + .execute_async(&*self.pool_connection_authorized(opctx).await?) .await .map_err(|e| { public_error_from_diesel( @@ -71,12 +74,13 @@ impl DataStore { /// List the oximeter collector instances pub async fn oximeter_list( &self, + opctx: &OpContext, page_params: &DataPageParams<'_, Uuid>, ) -> ListResultVec { use db::schema::oximeter::dsl; paginated(dsl::oximeter, dsl::id, page_params) .load_async::( - &*self.pool_connection_unauthorized().await?, + &*self.pool_connection_authorized(opctx).await?, ) .await .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) @@ -85,6 +89,7 @@ impl DataStore { /// Create a record for a new producer endpoint pub async fn producer_endpoint_create( &self, + opctx: &OpContext, producer: &ProducerEndpoint, ) -> Result<(), Error> { use db::schema::metric_producer::dsl; @@ -102,7 +107,7 @@ impl DataStore { dsl::interval.eq(producer.interval), dsl::base_route.eq(producer.base_route.clone()), )) - .execute_async(&*self.pool_connection_unauthorized().await?) + .execute_async(&*self.pool_connection_authorized(opctx).await?) .await .map_err(|e| { public_error_from_diesel( @@ -123,13 +128,14 @@ impl DataStore { /// returned. If there was no record, `None` is returned. pub async fn producer_endpoint_delete( &self, + opctx: &OpContext, id: &Uuid, ) -> Result, Error> { use db::schema::metric_producer::dsl; diesel::delete(dsl::metric_producer.find(*id)) .returning(dsl::oximeter_id) .get_result_async::( - &*self.pool_connection_unauthorized().await?, + &*self.pool_connection_authorized(opctx).await?, ) .await .optional() @@ -139,6 +145,7 @@ impl DataStore { /// List the producer endpoint records by the oximeter instance to which they're assigned. pub async fn producers_list_by_oximeter_id( &self, + opctx: &OpContext, oximeter_id: Uuid, pagparams: &DataPageParams<'_, Uuid>, ) -> ListResultVec { @@ -147,7 +154,7 @@ impl DataStore { .filter(dsl::oximeter_id.eq(oximeter_id)) .order_by((dsl::oximeter_id, dsl::id)) .select(ProducerEndpoint::as_select()) - .load_async(&*self.pool_connection_unauthorized().await?) + .load_async(&*self.pool_connection_authorized(opctx).await?) .await .map_err(|e| { public_error_from_diesel( diff --git a/nexus/src/app/instance.rs b/nexus/src/app/instance.rs index 2300bd56f2..e29ed21192 100644 --- a/nexus/src/app/instance.rs +++ b/nexus/src/app/instance.rs @@ -1562,7 +1562,7 @@ impl super::Nexus { // an instance's state changes. // // Tracked in https://github.com/oxidecomputer/omicron/issues/3742. - self.unassign_producer(instance_id).await?; + self.unassign_producer(opctx, instance_id).await?; } // Write the new instance and VMM states back to CRDB. This needs to be diff --git a/nexus/src/app/oximeter.rs b/nexus/src/app/oximeter.rs index a168b35293..f178bffc8c 100644 --- a/nexus/src/app/oximeter.rs +++ b/nexus/src/app/oximeter.rs @@ -9,13 +9,12 @@ use crate::internal_api::params::OximeterInfo; use dropshot::PaginationParams; use internal_dns::resolver::{ResolveError, Resolver}; use internal_dns::ServiceName; +use nexus_db_queries::context::OpContext; use nexus_db_queries::db; -use nexus_db_queries::db::identity::Asset; use omicron_common::address::CLICKHOUSE_PORT; -use omicron_common::api::external::DataPageParams; use omicron_common::api::external::Error; -use omicron_common::api::external::PaginationOrder; -use omicron_common::api::internal::nexus; +use omicron_common::api::external::{DataPageParams, ListResultVec}; +use omicron_common::api::internal::nexus::{self, ProducerEndpoint}; use omicron_common::backoff; use oximeter_client::Client as OximeterClient; use oximeter_db::query::Timestamp; @@ -73,77 +72,34 @@ impl super::Nexus { /// Insert a new record of an Oximeter collector server. pub(crate) async fn upsert_oximeter_collector( &self, + opctx: &OpContext, oximeter_info: &OximeterInfo, ) -> Result<(), Error> { // Insert the Oximeter instance into the DB. Note that this _updates_ the record, // specifically, the time_modified, ip, and port columns, if the instance has already been // registered. let db_info = db::model::OximeterInfo::new(&oximeter_info); - self.db_datastore.oximeter_create(&db_info).await?; + self.db_datastore.oximeter_create(opctx, &db_info).await?; info!( self.log, "registered new oximeter metric collection server"; "collector_id" => ?oximeter_info.collector_id, "address" => oximeter_info.address, ); + Ok(()) + } - // Regardless, notify the collector of any assigned metric producers. - // - // This should be empty if this Oximeter collector is registering for - // the first time, but may not be if the service is re-registering after - // failure. - let client = self.build_oximeter_client( - &oximeter_info.collector_id, - oximeter_info.address, - ); - let mut last_producer_id = None; - loop { - let pagparams = DataPageParams { - marker: last_producer_id.as_ref(), - direction: PaginationOrder::Ascending, - limit: std::num::NonZeroU32::new(100).unwrap(), - }; - let producers = self - .db_datastore - .producers_list_by_oximeter_id( - oximeter_info.collector_id, - &pagparams, - ) - .await?; - if producers.is_empty() { - return Ok(()); - } - debug!( - self.log, - "re-assigning existing metric producers to a collector"; - "n_producers" => producers.len(), - "collector_id" => ?oximeter_info.collector_id, - ); - // Be sure to continue paginating from the last producer. - // - // Safety: We check just above if the list is empty, so there is a - // last element. - last_producer_id.replace(producers.last().unwrap().id()); - for producer in producers.into_iter() { - let producer_info = oximeter_client::types::ProducerEndpoint { - id: producer.id(), - kind: nexus::ProducerKind::from(producer.kind).into(), - address: SocketAddr::new( - producer.ip.ip(), - producer.port.try_into().unwrap(), - ) - .to_string(), - base_route: producer.base_route, - interval: oximeter_client::types::Duration::from( - Duration::from_secs_f64(producer.interval), - ), - }; - client - .producers_post(&producer_info) - .await - .map_err(Error::from)?; - } - } + /// List the producers assigned to an oximeter collector. + pub(crate) async fn list_assigned_producers( + &self, + opctx: &OpContext, + collector_id: Uuid, + pagparams: &DataPageParams<'_, Uuid>, + ) -> ListResultVec { + self.db_datastore + .producers_list_by_oximeter_id(opctx, collector_id, pagparams) + .await + .map(|list| list.into_iter().map(ProducerEndpoint::from).collect()) } /// Register as a metric producer with the oximeter metric collection server. @@ -179,11 +135,12 @@ impl super::Nexus { /// Assign a newly-registered metric producer to an oximeter collector server. pub(crate) async fn assign_producer( &self, + opctx: &OpContext, producer_info: nexus::ProducerEndpoint, ) -> Result<(), Error> { - let (collector, id) = self.next_collector().await?; + let (collector, id) = self.next_collector(opctx).await?; let db_info = db::model::ProducerEndpoint::new(&producer_info, id); - self.db_datastore.producer_endpoint_create(&db_info).await?; + self.db_datastore.producer_endpoint_create(opctx, &db_info).await?; collector .producers_post(&oximeter_client::types::ProducerEndpoint::from( &producer_info, @@ -202,10 +159,11 @@ impl super::Nexus { /// Idempotently un-assign a producer from an oximeter collector. pub(crate) async fn unassign_producer( &self, + opctx: &OpContext, id: &Uuid, ) -> Result<(), Error> { if let Some(collector_id) = - self.db_datastore.producer_endpoint_delete(id).await? + self.db_datastore.producer_endpoint_delete(opctx, id).await? { debug!( self.log, @@ -214,7 +172,7 @@ impl super::Nexus { "collector_id" => %collector_id, ); let oximeter_info = - self.db_datastore.oximeter_lookup(&collector_id).await?; + self.db_datastore.oximeter_lookup(opctx, &collector_id).await?; let address = SocketAddr::new(oximeter_info.ip.ip(), *oximeter_info.port); let client = self.build_oximeter_client(&id, address); @@ -380,14 +338,17 @@ impl super::Nexus { } // Return an oximeter collector to assign a newly-registered producer - async fn next_collector(&self) -> Result<(OximeterClient, Uuid), Error> { + async fn next_collector( + &self, + opctx: &OpContext, + ) -> Result<(OximeterClient, Uuid), Error> { // TODO-robustness Replace with a real load-balancing strategy. let page_params = DataPageParams { marker: None, direction: dropshot::PaginationOrder::Ascending, limit: std::num::NonZeroU32::new(1).unwrap(), }; - let oxs = self.db_datastore.oximeter_list(&page_params).await?; + let oxs = self.db_datastore.oximeter_list(opctx, &page_params).await?; let info = oxs.first().ok_or_else(|| Error::ServiceUnavailable { internal_message: String::from("no oximeter collectors available"), })?; diff --git a/nexus/src/internal_api/http_entrypoints.rs b/nexus/src/internal_api/http_entrypoints.rs index 0676ace70c..3758b5289b 100644 --- a/nexus/src/internal_api/http_entrypoints.rs +++ b/nexus/src/internal_api/http_entrypoints.rs @@ -83,6 +83,7 @@ pub(crate) fn internal_api() -> NexusApiDescription { api.register(cpapi_volume_remove_read_only_parent)?; api.register(cpapi_disk_remove_read_only_parent)?; api.register(cpapi_producers_post)?; + api.register(cpapi_assigned_producers_list)?; api.register(cpapi_collectors_post)?; api.register(cpapi_metrics_collect)?; api.register(cpapi_artifact_download)?; @@ -454,10 +455,12 @@ async fn cpapi_producers_post( producer_info: TypedBody, ) -> Result { let context = request_context.context(); - let nexus = &context.nexus; - let producer_info = producer_info.into_inner(); let handler = async { - nexus.assign_producer(producer_info).await?; + let nexus = &context.nexus; + let producer_info = producer_info.into_inner(); + let opctx = + crate::context::op_context_for_internal_api(&request_context).await; + nexus.assign_producer(&opctx, producer_info).await?; Ok(HttpResponseUpdatedNoContent()) }; context @@ -466,6 +469,52 @@ async fn cpapi_producers_post( .await } +#[derive( + Clone, + Copy, + Debug, + serde::Deserialize, + schemars::JsonSchema, + serde::Serialize, +)] +pub struct CollectorIdPathParams { + /// The ID of the oximeter collector. + pub collector_id: Uuid, +} + +/// List all metric producers assigned to an oximeter collector. +#[endpoint { + method = GET, + path = "/metrics/collectors/{collector_id}/producers", + }] +async fn cpapi_assigned_producers_list( + request_context: RequestContext>, + path_params: Path, + query_params: Query, +) -> Result>, HttpError> { + let context = request_context.context(); + let handler = async { + let nexus = &context.nexus; + let collector_id = path_params.into_inner().collector_id; + let query = query_params.into_inner(); + let pagparams = data_page_params_for(&request_context, &query)?; + let opctx = + crate::context::op_context_for_internal_api(&request_context).await; + let producers = nexus + .list_assigned_producers(&opctx, collector_id, &pagparams) + .await?; + Ok(HttpResponseOk(ScanById::results_page( + &query, + producers, + &|_, producer: &ProducerEndpoint| producer.id, + )?)) + }; + context + .internal_latencies + .instrument_dropshot_handler(&request_context, handler) + .await +} + /// Accept a notification of a new oximeter collection server. #[endpoint { method = POST, @@ -476,10 +525,12 @@ async fn cpapi_collectors_post( oximeter_info: TypedBody, ) -> Result { let context = request_context.context(); - let nexus = &context.nexus; - let oximeter_info = oximeter_info.into_inner(); let handler = async { - nexus.upsert_oximeter_collector(&oximeter_info).await?; + let nexus = &context.nexus; + let oximeter_info = oximeter_info.into_inner(); + let opctx = + crate::context::op_context_for_internal_api(&request_context).await; + nexus.upsert_oximeter_collector(&opctx, &oximeter_info).await?; Ok(HttpResponseUpdatedNoContent()) }; context diff --git a/nexus/test-utils/src/lib.rs b/nexus/test-utils/src/lib.rs index cc9c8c43df..e5616a4641 100644 --- a/nexus/test-utils/src/lib.rs +++ b/nexus/test-utils/src/lib.rs @@ -1411,6 +1411,7 @@ pub async fn start_oximeter( let config = oximeter_collector::Config { nexus_address: Some(nexus_address), db, + refresh_interval: oximeter_collector::default_refresh_interval(), log: ConfigLogging::StderrTerminal { level: ConfigLoggingLevel::Error }, }; let args = oximeter_collector::OximeterArguments { diff --git a/nexus/tests/integration_tests/oximeter.rs b/nexus/tests/integration_tests/oximeter.rs index 9663e10fa0..20e098ec08 100644 --- a/nexus/tests/integration_tests/oximeter.rs +++ b/nexus/tests/integration_tests/oximeter.rs @@ -4,18 +4,11 @@ //! Integration tests for oximeter collectors and producers. -use dropshot::Method; -use http::StatusCode; use nexus_test_interface::NexusServer; use nexus_test_utils_macros::nexus_test; -use omicron_common::api::internal::nexus::ProducerEndpoint; -use omicron_common::api::internal::nexus::ProducerKind; use omicron_test_utils::dev::poll::{wait_for_condition, CondCheckError}; use oximeter_db::DbWrite; -use std::collections::BTreeSet; use std::net; -use std::net::Ipv6Addr; -use std::net::SocketAddr; use std::time::Duration; use uuid::Uuid; @@ -339,88 +332,3 @@ async fn test_oximeter_reregistration() { ); context.teardown().await; } - -// A regression test for https://github.com/oxidecomputer/omicron/issues/4498 -#[tokio::test] -async fn test_oximeter_collector_reregistration_gets_all_assignments() { - let mut context = nexus_test_utils::test_setup::( - "test_oximeter_collector_reregistration_gets_all_assignments", - ) - .await; - let oximeter_id = nexus_test_utils::OXIMETER_UUID.parse().unwrap(); - - // Create a bunch of producer records. - // - // Note that the actual count is arbitrary, but it should be larger than the - // internal pagination limit used in `Nexus::upsert_oximeter_collector()`, - // which is currently 100. - const N_PRODUCERS: usize = 150; - let mut ids = BTreeSet::new(); - for _ in 0..N_PRODUCERS { - let id = Uuid::new_v4(); - ids.insert(id); - let info = ProducerEndpoint { - id, - kind: ProducerKind::Service, - address: SocketAddr::new(Ipv6Addr::LOCALHOST.into(), 12345), - base_route: String::from("/collect"), - interval: Duration::from_secs(1), - }; - context - .internal_client - .make_request( - Method::POST, - "/metrics/producers", - Some(&info), - StatusCode::NO_CONTENT, - ) - .await - .expect("failed to register test producer"); - } - - // Check that `oximeter` has these registered. - let producers = - context.oximeter.list_producers(None, N_PRODUCERS * 2).await; - let actual_ids: BTreeSet<_> = - producers.iter().map(|info| info.id).collect(); - - // There is an additional producer that's created as part of the normal test - // setup, so we'll check that all of the new producers exist, and that - // there's exactly 1 additional one. - assert!( - ids.is_subset(&actual_ids), - "oximeter did not get the right set of producers" - ); - assert_eq!( - ids.len(), - actual_ids.len() - 1, - "oximeter did not get the right set of producers" - ); - - // Drop and restart oximeter, which should result in the exact same set of - // producers again. - drop(context.oximeter); - context.oximeter = nexus_test_utils::start_oximeter( - context.logctx.log.new(o!("component" => "oximeter")), - context.server.get_http_server_internal_address().await, - context.clickhouse.port(), - oximeter_id, - ) - .await - .expect("failed to restart oximeter"); - - let producers = - context.oximeter.list_producers(None, N_PRODUCERS * 2).await; - let actual_ids: BTreeSet<_> = - producers.iter().map(|info| info.id).collect(); - assert!( - ids.is_subset(&actual_ids), - "oximeter did not get the right set of producers after re-registering" - ); - assert_eq!( - ids.len(), - actual_ids.len() - 1, - "oximeter did not get the right set of producers after re-registering" - ); - context.teardown().await; -} diff --git a/openapi/nexus-internal.json b/openapi/nexus-internal.json index cba8063b7e..db3199833e 100644 --- a/openapi/nexus-internal.json +++ b/openapi/nexus-internal.json @@ -775,6 +775,72 @@ } } }, + "/metrics/collectors/{collector_id}/producers": { + "get": { + "summary": "List all metric producers assigned to an oximeter collector.", + "operationId": "cpapi_assigned_producers_list", + "parameters": [ + { + "in": "path", + "name": "collector_id", + "description": "The ID of the oximeter collector.", + "required": true, + "schema": { + "type": "string", + "format": "uuid" + } + }, + { + "in": "query", + "name": "limit", + "description": "Maximum number of items returned by a single call", + "schema": { + "nullable": true, + "type": "integer", + "format": "uint32", + "minimum": 1 + } + }, + { + "in": "query", + "name": "page_token", + "description": "Token returned by previous call to retrieve the subsequent page", + "schema": { + "nullable": true, + "type": "string" + } + }, + { + "in": "query", + "name": "sort_by", + "schema": { + "$ref": "#/components/schemas/IdSortMode" + } + } + ], + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ProducerEndpointResultsPage" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + }, + "x-dropshot-pagination": { + "required": [] + } + } + }, "/metrics/producers": { "post": { "summary": "Accept a registration from a new metric producer", @@ -6171,6 +6237,27 @@ "kind" ] }, + "ProducerEndpointResultsPage": { + "description": "A single page of results", + "type": "object", + "properties": { + "items": { + "description": "list of items on this page of results", + "type": "array", + "items": { + "$ref": "#/components/schemas/ProducerEndpoint" + } + }, + "next_page": { + "nullable": true, + "description": "token used to fetch the next page of results (if any)", + "type": "string" + } + }, + "required": [ + "items" + ] + }, "ProducerKind": { "description": "The kind of metric producer this is.", "oneOf": [ diff --git a/openapi/oximeter.json b/openapi/oximeter.json index f5c78d53cd..4c609474ca 100644 --- a/openapi/oximeter.json +++ b/openapi/oximeter.json @@ -142,6 +142,12 @@ "description": "The collector's UUID.", "type": "string", "format": "uuid" + }, + "last_refresh": { + "nullable": true, + "description": "Last time we refreshed our producer list with Nexus.", + "type": "string", + "format": "date-time" } }, "required": [ diff --git a/oximeter/collector/Cargo.toml b/oximeter/collector/Cargo.toml index 92c91ca101..b7dac716c6 100644 --- a/oximeter/collector/Cargo.toml +++ b/oximeter/collector/Cargo.toml @@ -13,7 +13,6 @@ clap.workspace = true dropshot.workspace = true futures.workspace = true internal-dns.workspace = true -nexus-client.workspace = true nexus-types.workspace = true omicron-common.workspace = true oximeter.workspace = true @@ -33,6 +32,7 @@ tokio.workspace = true toml.workspace = true uuid.workspace = true omicron-workspace-hack.workspace = true +nexus-client.workspace = true [dev-dependencies] expectorate.workspace = true diff --git a/oximeter/collector/src/agent.rs b/oximeter/collector/src/agent.rs index 8fff44bb2d..33146b3579 100644 --- a/oximeter/collector/src/agent.rs +++ b/oximeter/collector/src/agent.rs @@ -11,9 +11,16 @@ use crate::DbConfig; use crate::Error; use crate::ProducerEndpoint; use anyhow::anyhow; +use chrono::DateTime; +use chrono::Utc; +use futures::TryStreamExt; use internal_dns::resolver::Resolver; use internal_dns::ServiceName; +use nexus_client::types::IdSortMode; use omicron_common::address::CLICKHOUSE_PORT; +use omicron_common::address::NEXUS_INTERNAL_PORT; +use omicron_common::backoff; +use omicron_common::backoff::BackoffError; use oximeter::types::ProducerResults; use oximeter::types::ProducerResultsItem; use oximeter_db::Client; @@ -29,12 +36,15 @@ use std::collections::btree_map::Entry; use std::collections::BTreeMap; use std::net::SocketAddr; use std::net::SocketAddrV6; +use std::num::NonZeroU32; use std::ops::Bound; use std::sync::Arc; +use std::sync::Mutex as StdMutex; use std::time::Duration; use tokio::sync::mpsc; use tokio::sync::oneshot; use tokio::sync::Mutex; +use tokio::sync::MutexGuard; use tokio::task::JoinHandle; use tokio::time::interval; use uuid::Uuid; @@ -343,7 +353,7 @@ async fn results_sink( } /// The internal agent the oximeter server uses to collect metrics from producers. -#[derive(Debug)] +#[derive(Clone, Debug)] pub struct OximeterAgent { /// The collector ID for this agent pub id: Uuid, @@ -355,6 +365,12 @@ pub struct OximeterAgent { // The actual tokio tasks running the collection on a timer. collection_tasks: Arc>>, + // The interval on which we refresh our list of producers from Nexus + refresh_interval: Duration, + // Handle to the task used to periodically refresh the list of producers. + refresh_task: Arc>>>, + /// The last time we've refreshed our list of producers from Nexus. + pub last_refresh_time: Arc>>>, } impl OximeterAgent { @@ -362,6 +378,7 @@ impl OximeterAgent { pub async fn with_id( id: Uuid, address: SocketAddrV6, + refresh_interval: Duration, db_config: DbConfig, resolver: &Resolver, log: &Logger, @@ -435,13 +452,30 @@ impl OximeterAgent { ) .await }); - Ok(Self { + + let self_ = Self { id, log, collection_target, result_sender, collection_tasks: Arc::new(Mutex::new(BTreeMap::new())), - }) + refresh_interval, + refresh_task: Arc::new(StdMutex::new(None)), + last_refresh_time: Arc::new(StdMutex::new(None)), + }; + + Ok(self_) + } + + /// Ensure the backgrouund task that polls Nexus periodically for our list of + /// assigned producers is running. + pub(crate) fn ensure_producer_refresh_task(&self, resolver: Resolver) { + let mut task = self.refresh_task.lock().unwrap(); + if task.is_none() { + let refresh_task = + tokio::spawn(refresh_producer_list(self.clone(), resolver)); + *task = Some(refresh_task); + } } /// Construct a new standalone `oximeter` collector. @@ -455,6 +489,7 @@ impl OximeterAgent { pub async fn new_standalone( id: Uuid, address: SocketAddrV6, + refresh_interval: Duration, db_config: Option, log: &Logger, ) -> Result { @@ -503,12 +538,21 @@ impl OximeterAgent { collector_ip: (*address.ip()).into(), collector_port: address.port(), }; + + // We don't spawn the task to periodically refresh producers when run + // in standalone mode. We can just pretend we registered once, and + // that's it. + let last_refresh_time = Arc::new(StdMutex::new(Some(Utc::now()))); + Ok(Self { id, log, collection_target, result_sender, collection_tasks: Arc::new(Mutex::new(BTreeMap::new())), + refresh_interval, + refresh_task: Arc::new(StdMutex::new(None)), + last_refresh_time, }) } @@ -517,8 +561,23 @@ impl OximeterAgent { &self, info: ProducerEndpoint, ) -> Result<(), Error> { + let mut tasks = self.collection_tasks.lock().await; + self.register_producer_locked(&mut tasks, info).await; + Ok(()) + } + + // Internal implementation that registers a producer, assuming the lock on + // the map is held. + async fn register_producer_locked( + &self, + tasks: &mut MutexGuard< + '_, + BTreeMap, + >, + info: ProducerEndpoint, + ) { let id = info.id; - match self.collection_tasks.lock().await.entry(id) { + match tasks.entry(id) { Entry::Vacant(value) => { debug!( self.log, @@ -557,7 +616,6 @@ impl OximeterAgent { .unwrap(); } } - Ok(()) } /// Forces a collection from all producers. @@ -607,12 +665,22 @@ impl OximeterAgent { /// Delete a producer by ID, stopping its collection task. pub async fn delete_producer(&self, id: Uuid) -> Result<(), Error> { - let (_info, task) = self - .collection_tasks - .lock() - .await - .remove(&id) - .ok_or_else(|| Error::NoSuchProducer(id))?; + let mut tasks = self.collection_tasks.lock().await; + self.delete_producer_locked(&mut tasks, id).await + } + + // Internal implementation that deletes a producer, assuming the lock on + // the map is held. + async fn delete_producer_locked( + &self, + tasks: &mut MutexGuard< + '_, + BTreeMap, + >, + id: Uuid, + ) -> Result<(), Error> { + let (_info, task) = + tasks.remove(&id).ok_or_else(|| Error::NoSuchProducer(id))?; debug!( self.log, "removed collection task from set"; @@ -633,6 +701,139 @@ impl OximeterAgent { } Ok(()) } + + // Ensure that exactly the set of producers is registered with `self`. + // + // Errors logged, but not returned, and an attempt to register all producers + // is made, even if an error is encountered part-way through. + // + // This returns the number of pruned tasks. + async fn ensure_producers( + &self, + expected_producers: BTreeMap, + ) -> usize { + let mut tasks = self.collection_tasks.lock().await; + + // First prune unwanted collection tasks. + // + // This is set of all producers that we currently have, which are not in + // the new list from Nexus. + let ids_to_prune: Vec<_> = tasks + .keys() + .filter(|id| !expected_producers.contains_key(id)) + .copied() + .collect(); + let n_pruned = ids_to_prune.len(); + for id in ids_to_prune.into_iter() { + // This method only returns an error if the provided ID does not + // exist in the current tasks. That is impossible, because we hold + // the lock, and we've just computed this as the set that _is_ in + // the map, and not in the new set from Nexus. + self.delete_producer_locked(&mut tasks, id).await.unwrap(); + } + + // And then ensure everything in the list. + // + // This will insert new tasks, and update any that we already know + // about. + for info in expected_producers.into_values() { + self.register_producer_locked(&mut tasks, info).await; + } + n_pruned + } +} + +// A task which periodically updates our list of producers from Nexus. +async fn refresh_producer_list(agent: OximeterAgent, resolver: Resolver) { + let mut interval = tokio::time::interval(agent.refresh_interval); + let page_size = Some(NonZeroU32::new(100).unwrap()); + loop { + interval.tick().await; + info!(agent.log, "refreshing list of producers from Nexus"); + let nexus_addr = + resolve_nexus_with_backoff(&agent.log, &resolver).await; + let url = format!("http://{}", nexus_addr); + let client = nexus_client::Client::new(&url, agent.log.clone()); + let mut stream = client.cpapi_assigned_producers_list_stream( + &agent.id, + page_size, + Some(IdSortMode::IdAscending), + ); + let mut expected_producers = BTreeMap::new(); + loop { + match stream.try_next().await { + Err(e) => { + error!( + agent.log, + "error fetching next assigned producer"; + "err" => ?e, + ); + } + Ok(Some(p)) => { + let endpoint = match ProducerEndpoint::try_from(p) { + Ok(ep) => ep, + Err(e) => { + error!( + agent.log, + "failed to convert producer description \ + from Nexus, skipping producer"; + "err" => e + ); + continue; + } + }; + let old = expected_producers.insert(endpoint.id, endpoint); + if let Some(ProducerEndpoint { id, .. }) = old { + error!( + agent.log, + "Nexus appears to have sent duplicate producer info"; + "producer_id" => %id, + ); + } + } + Ok(None) => break, + } + } + let n_current_tasks = expected_producers.len(); + let n_pruned_tasks = agent.ensure_producers(expected_producers).await; + *agent.last_refresh_time.lock().unwrap() = Some(Utc::now()); + info!( + agent.log, + "refreshed list of producers from Nexus"; + "n_pruned_tasks" => n_pruned_tasks, + "n_current_tasks" => n_current_tasks, + ); + } +} + +async fn resolve_nexus_with_backoff( + log: &Logger, + resolver: &Resolver, +) -> SocketAddr { + let log_failure = |error, delay| { + warn!( + log, + "failed to lookup Nexus IP, will retry"; + "delay" => ?delay, + "error" => ?error, + ); + }; + let do_lookup = || async { + resolver + .lookup_ipv6(ServiceName::Nexus) + .await + .map_err(|e| BackoffError::transient(e.to_string())) + .map(|ip| { + SocketAddr::V6(SocketAddrV6::new(ip, NEXUS_INTERNAL_PORT, 0, 0)) + }) + }; + backoff::retry_notify( + backoff::retry_policy_internal_service(), + do_lookup, + log_failure, + ) + .await + .expect("Expected infinite retry loop resolving Nexus address") } #[cfg(test)] @@ -696,6 +897,7 @@ mod tests { let collector = OximeterAgent::new_standalone( Uuid::new_v4(), SocketAddrV6::new(Ipv6Addr::LOCALHOST, 0, 0, 0), + crate::default_refresh_interval(), None, log, ) @@ -772,6 +974,7 @@ mod tests { let collector = OximeterAgent::new_standalone( Uuid::new_v4(), SocketAddrV6::new(Ipv6Addr::LOCALHOST, 0, 0, 0), + crate::default_refresh_interval(), None, log, ) @@ -842,6 +1045,7 @@ mod tests { let collector = OximeterAgent::new_standalone( Uuid::new_v4(), SocketAddrV6::new(Ipv6Addr::LOCALHOST, 0, 0, 0), + crate::default_refresh_interval(), None, log, ) diff --git a/oximeter/collector/src/http_entrypoints.rs b/oximeter/collector/src/http_entrypoints.rs index 493083a40d..e876ed047d 100644 --- a/oximeter/collector/src/http_entrypoints.rs +++ b/oximeter/collector/src/http_entrypoints.rs @@ -7,6 +7,8 @@ // Copyright 2023 Oxide Computer Company use crate::OximeterAgent; +use chrono::DateTime; +use chrono::Utc; use dropshot::endpoint; use dropshot::ApiDescription; use dropshot::EmptyScanParams; @@ -117,6 +119,8 @@ async fn producer_delete( pub struct CollectorInfo { /// The collector's UUID. pub id: Uuid, + /// Last time we refreshed our producer list with Nexus. + pub last_refresh: Option>, } // Return identifying information about this collector @@ -128,6 +132,8 @@ async fn collector_info( request_context: RequestContext>, ) -> Result, HttpError> { let agent = request_context.context(); - let info = CollectorInfo { id: agent.id }; + let id = agent.id; + let last_refresh = *agent.last_refresh_time.lock().unwrap(); + let info = CollectorInfo { id, last_refresh }; Ok(HttpResponseOk(info)) } diff --git a/oximeter/collector/src/lib.rs b/oximeter/collector/src/lib.rs index f3c793d5c2..596c0dc785 100644 --- a/oximeter/collector/src/lib.rs +++ b/oximeter/collector/src/lib.rs @@ -31,6 +31,7 @@ use std::net::SocketAddr; use std::net::SocketAddrV6; use std::path::Path; use std::sync::Arc; +use std::time::Duration; use thiserror::Error; use uuid::Uuid; @@ -114,6 +115,11 @@ impl DbConfig { } } +/// Default interval on which we refresh our list of producers from Nexus. +pub const fn default_refresh_interval() -> Duration { + Duration::from_secs(60 * 10) +} + /// Configuration used to initialize an oximeter server #[derive(Clone, Debug, Deserialize, Serialize)] pub struct Config { @@ -123,6 +129,11 @@ pub struct Config { #[serde(default, skip_serializing_if = "Option::is_none")] pub nexus_address: Option, + /// The interval on which we periodically refresh our list of producers from + /// Nexus. + #[serde(default = "default_refresh_interval")] + pub refresh_interval: Duration, + /// Configuration for working with ClickHouse pub db: DbConfig, @@ -202,6 +213,7 @@ impl Oximeter { OximeterAgent::with_id( args.id, args.address, + config.refresh_interval, config.db, &resolver, &log, @@ -239,7 +251,10 @@ impl Oximeter { .start(); // Notify Nexus that this oximeter instance is available. - let client = reqwest::Client::new(); + let our_info = nexus_client::types::OximeterInfo { + address: server.local_addr().to_string(), + collector_id: agent.id, + }; let notify_nexus = || async { debug!(log, "contacting nexus"); let nexus_address = if let Some(address) = config.nexus_address { @@ -254,18 +269,25 @@ impl Oximeter { 0, )) }; - - client - .post(format!("http://{}/metrics/collectors", nexus_address,)) - .json(&nexus_client::types::OximeterInfo { - address: server.local_addr().to_string(), - collector_id: agent.id, - }) - .send() - .await - .map_err(|e| backoff::BackoffError::transient(e.to_string()))? - .error_for_status() - .map_err(|e| backoff::BackoffError::transient(e.to_string())) + let client = nexus_client::Client::new( + &format!("http://{nexus_address}"), + log.clone(), + ); + client.cpapi_collectors_post(&our_info).await.map_err(|e| { + match &e { + // Failures to reach nexus, or server errors on its side + // are retryable. Everything else is permanent. + nexus_client::Error::CommunicationError(_) => { + backoff::BackoffError::transient(e.to_string()) + } + nexus_client::Error::ErrorResponse(inner) + if inner.status().is_server_error() => + { + backoff::BackoffError::transient(e.to_string()) + } + _ => backoff::BackoffError::permanent(e.to_string()), + } + }) }; let log_notification_failure = |error, delay| { warn!( @@ -282,6 +304,10 @@ impl Oximeter { .await .expect("Expected an infinite retry loop contacting Nexus"); + // Now that we've successfully registered, we'll start periodically + // polling for our list of producers from Nexus. + agent.ensure_producer_refresh_task(resolver); + info!(log, "oximeter registered with nexus"; "id" => ?agent.id); Ok(Self { agent, server }) } @@ -298,6 +324,7 @@ impl Oximeter { OximeterAgent::new_standalone( args.id, args.address, + crate::default_refresh_interval(), db_config, &log, ) From 8fae16ae9e23615dd481b236866df24e77c87a43 Mon Sep 17 00:00:00 2001 From: Alan Hanson Date: Tue, 26 Mar 2024 12:58:53 -0700 Subject: [PATCH 002/334] Add a --running option to omdb db instances (#5291) Added a flag to the `omdb db instances` command: `--running`. It will only show running instances (or instances that are in that state, specifically). Co-authored-by: Alan Hanson --- dev-tools/omdb/src/bin/omdb/db.rs | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/dev-tools/omdb/src/bin/omdb/db.rs b/dev-tools/omdb/src/bin/omdb/db.rs index a4243fff31..5b5e23ea8d 100644 --- a/dev-tools/omdb/src/bin/omdb/db.rs +++ b/dev-tools/omdb/src/bin/omdb/db.rs @@ -24,6 +24,7 @@ use async_bb8_diesel::AsyncRunQueryDsl; use async_bb8_diesel::AsyncSimpleConnection; use camino::Utf8PathBuf; use chrono::SecondsFormat; +use clap::ArgAction; use clap::Args; use clap::Subcommand; use clap::ValueEnum; @@ -92,6 +93,7 @@ use nexus_types::inventory::RotPageWhich; use omicron_common::address::NEXUS_REDUNDANCY; use omicron_common::api::external::DataPageParams; use omicron_common::api::external::Generation; +use omicron_common::api::external::InstanceState; use omicron_common::api::external::LookupType; use omicron_common::api::external::MacAddr; use sled_agent_client::types::VolumeConstructionRequest; @@ -257,7 +259,7 @@ enum DbCommands { /// Print information about sleds Sleds, /// Print information about customer instances - Instances, + Instances(InstancesOptions), /// Print information about the network Network(NetworkArgs), /// Print information about snapshots @@ -346,6 +348,13 @@ impl CliDnsGroup { } } +#[derive(Debug, Args)] +struct InstancesOptions { + /// Only show the running instances + #[arg(short, long, action=ArgAction::SetTrue)] + running: bool, +} + #[derive(Debug, Args)] struct InventoryArgs { #[command(subcommand)] @@ -539,8 +548,14 @@ impl DbArgs { DbCommands::Sleds => { cmd_db_sleds(&opctx, &datastore, &self.fetch_opts).await } - DbCommands::Instances => { - cmd_db_instances(&opctx, &datastore, &self.fetch_opts).await + DbCommands::Instances(instances_options) => { + cmd_db_instances( + &opctx, + &datastore, + &self.fetch_opts, + instances_options.running, + ) + .await } DbCommands::Network(NetworkArgs { command: NetworkCommands::ListEips, @@ -1628,6 +1643,7 @@ async fn cmd_db_instances( opctx: &OpContext, datastore: &DataStore, fetch_opts: &DbFetchOptions, + running: bool, ) -> Result<(), anyhow::Error> { use db::schema::instance::dsl; use db::schema::vmm::dsl as vmm_dsl; @@ -1681,6 +1697,10 @@ async fn cmd_db_instances( "-".to_string() }; + if running && i.effective_state() != InstanceState::Running { + continue; + } + let cir = CustomerInstanceRow { id: i.instance().id().to_string(), name: i.instance().name().to_string(), From dad3f11500500957fdefd7607d80534601605dcd Mon Sep 17 00:00:00 2001 From: Ryan Goodfellow Date: Tue, 26 Mar 2024 20:42:00 -0700 Subject: [PATCH 003/334] disable a4x2 ci job (#5334) --- .github/buildomat/jobs/a4x2-deploy.sh | 2 +- .github/buildomat/jobs/a4x2-prepare.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/buildomat/jobs/a4x2-deploy.sh b/.github/buildomat/jobs/a4x2-deploy.sh index dfc9191611..8f0f24c8d1 100755 --- a/.github/buildomat/jobs/a4x2-deploy.sh +++ b/.github/buildomat/jobs/a4x2-deploy.sh @@ -13,7 +13,7 @@ #: "%/out/dhcp-server.log", #: ] #: skip_clone = true -#: enable = true +#: enable = false #: #: [dependencies.a4x2] #: job = "a4x2-prepare" diff --git a/.github/buildomat/jobs/a4x2-prepare.sh b/.github/buildomat/jobs/a4x2-prepare.sh index 79fa037139..bc88ddd4c0 100755 --- a/.github/buildomat/jobs/a4x2-prepare.sh +++ b/.github/buildomat/jobs/a4x2-prepare.sh @@ -20,7 +20,7 @@ #: access_repos = [ #: "oxidecomputer/testbed", #: ] -#: enable = true +#: enable = false source ./env.sh From 4ed3933309cdf507b952420ae049f682fa796670 Mon Sep 17 00:00:00 2001 From: David Crespo Date: Wed, 27 Mar 2024 06:15:52 -0500 Subject: [PATCH 004/334] Bump web console (minor) (#5336) https://github.com/oxidecomputer/console/compare/01142889...c0dd895e * [c0dd895e](https://github.com/oxidecomputer/console/commit/c0dd895e) oxidecomputer/console#2099 * [fe78c655](https://github.com/oxidecomputer/console/commit/fe78c655) oxidecomputer/console#2098 --- tools/console_version | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/console_version b/tools/console_version index 38a4cab257..33d87ded32 100644 --- a/tools/console_version +++ b/tools/console_version @@ -1,2 +1,2 @@ -COMMIT="0114288974fb9e36bf392277fe59b0de357ec137" -SHA2="12e4e0205665127b6f99911484b7d61e50ef740f7a94ac6c95e11918f0ce7ada" +COMMIT="c0dd895eb5f1bfe7f4824a09998cd0b34594e253" +SHA2="c66ec4a376b011e000cf8396d7dc43b2f044cb173ff91585357de267ccee9398" From cf185c558347a894056c154087442914c4820905 Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Wed, 27 Mar 2024 10:31:03 -0400 Subject: [PATCH 005/334] sled-agent: Configure mgd and mg-ddm even if we don't have rack/sled IDs yet (#5323) --- Cargo.lock | 6 +-- Cargo.toml | 4 +- package-manifest.toml | 12 ++--- sled-agent/src/services.rs | 89 +++++++++++++++++++++++++++++--------- 4 files changed, 79 insertions(+), 32 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f7b9f27b1f..8188a1feb7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1651,7 +1651,7 @@ dependencies = [ [[package]] name = "ddm-admin-client" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/maghemite?rev=de065a84831e66c829603d9a098e237e8f5faaa1#de065a84831e66c829603d9a098e237e8f5faaa1" +source = "git+https://github.com/oxidecomputer/maghemite?rev=8207cb9c90cd7144c3f351823bfb2ae3e221ad10#8207cb9c90cd7144c3f351823bfb2ae3e221ad10" dependencies = [ "percent-encoding", "progenitor", @@ -4062,7 +4062,7 @@ dependencies = [ "colored", "dlpi", "libc", - "num_enum 0.5.11", + "num_enum", "nvpair", "nvpair-sys", "rusty-doors", @@ -4314,7 +4314,7 @@ dependencies = [ [[package]] name = "mg-admin-client" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/maghemite?rev=de065a84831e66c829603d9a098e237e8f5faaa1#de065a84831e66c829603d9a098e237e8f5faaa1" +source = "git+https://github.com/oxidecomputer/maghemite?rev=8207cb9c90cd7144c3f351823bfb2ae3e221ad10#8207cb9c90cd7144c3f351823bfb2ae3e221ad10" dependencies = [ "anyhow", "chrono", diff --git a/Cargo.toml b/Cargo.toml index 98a397978d..018941f081 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -266,8 +266,8 @@ maplit = "1.0.2" mime_guess = "2.0.4" mockall = "0.12" newtype_derive = "0.1.6" -mg-admin-client = { git = "https://github.com/oxidecomputer/maghemite", rev = "de065a84831e66c829603d9a098e237e8f5faaa1" } -ddm-admin-client = { git = "https://github.com/oxidecomputer/maghemite", rev = "de065a84831e66c829603d9a098e237e8f5faaa1" } +mg-admin-client = { git = "https://github.com/oxidecomputer/maghemite", rev = "8207cb9c90cd7144c3f351823bfb2ae3e221ad10" } +ddm-admin-client = { git = "https://github.com/oxidecomputer/maghemite", rev = "8207cb9c90cd7144c3f351823bfb2ae3e221ad10" } multimap = "0.10.0" nexus-client = { path = "clients/nexus-client" } nexus-config = { path = "nexus-config" } diff --git a/package-manifest.toml b/package-manifest.toml index e3198cef8b..0987280906 100644 --- a/package-manifest.toml +++ b/package-manifest.toml @@ -535,10 +535,10 @@ source.repo = "maghemite" # `tools/maghemite_openapi_version`. Failing to do so will cause a failure when # building `ddm-admin-client` (which will instruct you to update # `tools/maghemite_openapi_version`). -source.commit = "de065a84831e66c829603d9a098e237e8f5faaa1" +source.commit = "8207cb9c90cd7144c3f351823bfb2ae3e221ad10" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/maghemite/image//maghemite.sha256.txt -source.sha256 = "8a7525f8329c5178ebf07cecc623a017806b81d5d1ca55cf76b88e737ae57dec" +source.sha256 = "dc58a0b4b1fe739e535e881e5e0678067fb8661e61cb837841224dd14608d54c" output.type = "tarball" [package.mg-ddm] @@ -551,10 +551,10 @@ source.repo = "maghemite" # `tools/maghemite_openapi_version`. Failing to do so will cause a failure when # building `ddm-admin-client` (which will instruct you to update # `tools/maghemite_openapi_version`). -source.commit = "de065a84831e66c829603d9a098e237e8f5faaa1" +source.commit = "8207cb9c90cd7144c3f351823bfb2ae3e221ad10" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/maghemite/image//mg-ddm.sha256.txt -source.sha256 = "0cc9cbef39103d3e651334574ebdd0e6ef71670cbe6a720d22b1efb005b5a71c" +source.sha256 = "4221a80d6ffb16b0f4d8b67a198a3da517154c3e7d8c1f0eaebb4eda6c36bdeb" output.type = "zone" output.intermediate_only = true @@ -566,10 +566,10 @@ source.repo = "maghemite" # `tools/maghemite_openapi_version`. Failing to do so will cause a failure when # building `ddm-admin-client` (which will instruct you to update # `tools/maghemite_openapi_version`). -source.commit = "de065a84831e66c829603d9a098e237e8f5faaa1" +source.commit = "8207cb9c90cd7144c3f351823bfb2ae3e221ad10" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/maghemite/image//mg-ddm.sha256.txt -source.sha256 = "4256d320b1ec310d56679eca5f65c9149072fe647f66021fd0cce1411fc39e0c" +source.sha256 = "ee3ef45706641784a8cfb093310bf5603755b59714db92bce058bb7cc1483099" output.type = "zone" output.intermediate_only = true diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index c0dbaebcc2..e23cdf58b9 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -2755,16 +2755,13 @@ impl ServiceManager { SwitchService::Mgd => { info!(self.inner.log, "Setting up mgd service"); smfh.delpropvalue("config/dns_servers", "*")?; - let info = self - .inner - .sled_info - .get() - .ok_or(Error::SledAgentNotReady)?; - smfh.setprop("config/rack_uuid", info.rack_id)?; - smfh.setprop( - "config/sled_uuid", - info.config.sled_id, - )?; + if let Some(info) = self.inner.sled_info.get() { + smfh.setprop("config/rack_uuid", info.rack_id)?; + smfh.setprop( + "config/sled_uuid", + info.config.sled_id, + )?; + } for address in &request.zone.addresses { if *address != Ipv6Addr::LOCALHOST { let az_prefix = @@ -2785,16 +2782,13 @@ impl ServiceManager { SwitchService::MgDdm { mode } => { info!(self.inner.log, "Setting up mg-ddm service"); smfh.setprop("config/mode", &mode)?; - let info = self - .inner - .sled_info - .get() - .ok_or(Error::SledAgentNotReady)?; - smfh.setprop("config/rack_uuid", info.rack_id)?; - smfh.setprop( - "config/sled_uuid", - info.config.sled_id, - )?; + if let Some(info) = self.inner.sled_info.get() { + smfh.setprop("config/rack_uuid", info.rack_id)?; + smfh.setprop( + "config/sled_uuid", + info.config.sled_id, + )?; + } smfh.delpropvalue("config/dns_servers", "*")?; for address in &request.zone.addresses { if *address != Ipv6Addr::LOCALHOST { @@ -3993,12 +3987,65 @@ impl ServiceManager { // Only configured in // `ensure_switch_zone_uplinks_configured` } + SwitchService::SpSim => { + // nothing to configure + } + SwitchService::Mgd => { + info!(self.inner.log, "configuring mgd service"); + smfh.delpropvalue("config/dns_servers", "*")?; + if let Some(info) = self.inner.sled_info.get() { + smfh.setprop("config/rack_uuid", info.rack_id)?; + smfh.setprop( + "config/sled_uuid", + info.config.sled_id, + )?; + } + for address in &request.addresses { + if *address != Ipv6Addr::LOCALHOST { + let az_prefix = + Ipv6Subnet::::new(*address); + for addr in + Resolver::servers_from_subnet(az_prefix) + { + smfh.addpropvalue( + "config/dns_servers", + &format!("{addr}"), + )?; + } + break; + } + } + smfh.refresh()?; + } SwitchService::MgDdm { mode } => { + info!(self.inner.log, "configuring mg-ddm service"); smfh.delpropvalue("config/mode", "*")?; smfh.addpropvalue("config/mode", &mode)?; + if let Some(info) = self.inner.sled_info.get() { + smfh.setprop("config/rack_uuid", info.rack_id)?; + smfh.setprop( + "config/sled_uuid", + info.config.sled_id, + )?; + } + smfh.delpropvalue("config/dns_servers", "*")?; + for address in &request.addresses { + if *address != Ipv6Addr::LOCALHOST { + let az_prefix = + Ipv6Subnet::::new(*address); + for addr in + Resolver::servers_from_subnet(az_prefix) + { + smfh.addpropvalue( + "config/dns_servers", + &format!("{addr}"), + )?; + } + break; + } + } smfh.refresh()?; } - _ => (), } } } From 7b29090576a985c87b0c17487395ea7804394d3c Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 27 Mar 2024 09:54:07 -0700 Subject: [PATCH 006/334] [db-queries] Decouples CTE usage from Diesel (RegionAllocation) (#5063) Diesel's complex type have been a pain point in the region allocation CTE in particular: - It relies on several JOINs, which requires explicitly adding Diesel macros to make the type system happy - With ongoing work by @jmpesp , it would require additional `alias!` calls to allow a table to appear in a `SELECT` clause in multiple spots - Generally, the disconnect between "what SQL do I want to write" and "what invocations will make Diesel happy" has been "not really worth it" in this space. This PR does the following: - It relies heavily on https://docs.diesel.rs/master/diesel/query_builder/struct.SqlQuery.html , which is Diesel's "you do whatever you want" query type. Although this is still using Diesel, this usage is actually pretty aligned with other simpler DB interfaces - other crates (see: [tokio_postgres](https://docs.rs/tokio-postgres/latest/tokio_postgres/struct.Client.html#method.query)) take arguments like "a String + list of bind parameters", in some form. - It adds support in `raw_query_builder.rs` for a wrapper around Diesel's `SqlQuery` object, to make SQL injections less possible and to track bind parameter counting. - It fully converts the `RegionAllocation` CTE to use this builder, interleaved with raw SQL. - Since a large portion of the CTE was rote "repeated columns", I also added a function, accessible as `AllColumnsOf::with_prefix(&'static str)`, to enumerate all the columns of a table as strings. - I also added a simple `EXPLAIN` test to the CTE, to quickly validate that CockroachDB thinks it's producing valid output. Here are my thoughts for future improvements: - [ ] I spent a while trying to make the "query construction" a compile-time operation. I think that this is possible with nightly features. I think this is extremely difficult with stable rust. However, I think this is a great direction for future work, as statically-known queries would be easier to cache and validate. - [ ] I'd like to encapsulate more type information about the constructed query, as an "input/output" object. Right now, we're relying on existing integration tests for validation, but it seems possible to send these "example" queries (like the ones I'm using in my `EXPLAIN` tests) to ask CockroachDB to validate type information for us. - [ ] I want to make this format as digestible as possible. If there's anything I can do to make this easier to read, write, and validate, I'm totally on-board. I have been debating adding macro support for SQL formatting the raw strings, but I'm on the fence about whether or not that would make interleaved code harder to parse by humans. - As a follow-up: I'm auto-formatting the output of these queries in the EXPECTORATE-d output files --- Cargo.lock | 21 + Cargo.toml | 1 + nexus/db-model/src/queries/mod.rs | 1 - .../db-model/src/queries/region_allocation.rs | 195 ---- nexus/db-queries/Cargo.toml | 1 + nexus/db-queries/src/db/cast_uuid_as_bytea.rs | 62 -- nexus/db-queries/src/db/column_walker.rs | 27 +- nexus/db-queries/src/db/datastore/mod.rs | 11 + nexus/db-queries/src/db/datastore/region.rs | 8 +- nexus/db-queries/src/db/mod.rs | 2 +- .../src/db/queries/region_allocation.rs | 941 ++++++------------ nexus/db-queries/src/db/raw_query_builder.rs | 195 ++++ .../output/region_allocate_distinct_sleds.sql | 267 +++++ .../output/region_allocate_random_sleds.sql | 265 +++++ nexus/src/app/sagas/snapshot_create.rs | 28 +- test-utils/src/dev/db.rs | 49 + 16 files changed, 1198 insertions(+), 876 deletions(-) delete mode 100644 nexus/db-model/src/queries/region_allocation.rs delete mode 100644 nexus/db-queries/src/db/cast_uuid_as_bytea.rs create mode 100644 nexus/db-queries/src/db/raw_query_builder.rs create mode 100644 nexus/db-queries/tests/output/region_allocate_distinct_sleds.sql create mode 100644 nexus/db-queries/tests/output/region_allocate_random_sleds.sql diff --git a/Cargo.lock b/Cargo.lock index 8188a1feb7..0421ec6653 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1147,6 +1147,26 @@ version = "0.9.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c2459377285ad874054d797f3ccebf984978aa39129f6eafde5cdc8315b612f8" +[[package]] +name = "const_format" +version = "0.2.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3a214c7af3d04997541b18d432afaff4c455e79e2029079647e72fc2bd27673" +dependencies = [ + "const_format_proc_macros", +] + +[[package]] +name = "const_format_proc_macros" +version = "0.2.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7f6ff08fd20f4f299298a28e2dfa8a8ba1036e6cd2460ac1de7b425d76f2500" +dependencies = [ + "proc-macro2", + "quote", + "unicode-xid", +] + [[package]] name = "constant_time_eq" version = "0.2.6" @@ -4590,6 +4610,7 @@ dependencies = [ "camino", "camino-tempfile", "chrono", + "const_format", "cookie 0.18.0", "db-macros", "diesel", diff --git a/Cargo.toml b/Cargo.toml index 018941f081..6546100e3f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -188,6 +188,7 @@ cfg-if = "1.0" chrono = { version = "0.4", features = [ "serde" ] } clap = { version = "4.5", features = ["cargo", "derive", "env", "wrap_help"] } colored = "2.1" +const_format = "0.2.32" cookie = "0.18" criterion = { version = "0.5.1", features = [ "async_tokio" ] } crossbeam = "0.8" diff --git a/nexus/db-model/src/queries/mod.rs b/nexus/db-model/src/queries/mod.rs index 7724d48bab..e138508f84 100644 --- a/nexus/db-model/src/queries/mod.rs +++ b/nexus/db-model/src/queries/mod.rs @@ -4,5 +4,4 @@ //! Subqueries used in CTEs. -pub mod region_allocation; pub mod virtual_provisioning_collection_update; diff --git a/nexus/db-model/src/queries/region_allocation.rs b/nexus/db-model/src/queries/region_allocation.rs deleted file mode 100644 index a1b9e0373a..0000000000 --- a/nexus/db-model/src/queries/region_allocation.rs +++ /dev/null @@ -1,195 +0,0 @@ -// This Source Code Form is subject to the terms of the Mozilla Public -// License, v. 2.0. If a copy of the MPL was not distributed with this -// file, You can obtain one at https://mozilla.org/MPL/2.0/. - -//! Describes subqueries which may be issues as a part of CTEs. -//! -//! When possible, it's preferable to define subqueries close to their -//! usage. However, certain Diesel traits (such as those enabling joins) -//! require the table structures to be defined in the same crate. - -// TODO: We're currently piggy-backing on the table macro for convenience. -// We actually do not want to generate an entire table for each subquery - we'd -// like to have a query source (which we can use to generate SELECT statements, -// JOIN, etc), but we don't want this to be an INSERT/UPDATE/DELETE target. -// -// Similarly, we don't want to force callers to supply a "primary key". -// -// I've looked into Diesel's `alias!` macro for this purpose, but unfortunately -// that implementation is too opinionated about the output QueryFragment. -// It expects to use the form: -// -// " as ", which is actually the opposite of what we want in -// a CTE (where we want the alias name to come first). - -use crate::schema::dataset; -use crate::schema::sled; -use crate::schema::zpool; - -table! { - old_regions { - id -> Uuid, - time_created -> Timestamptz, - time_modified -> Timestamptz, - - dataset_id -> Uuid, - volume_id -> Uuid, - - block_size -> Int8, - blocks_per_extent -> Int8, - extent_count -> Int8, - } -} - -table! { - candidate_datasets { - id -> Uuid, - pool_id -> Uuid, - } -} - -table! { - shuffled_candidate_datasets { - id -> Uuid, - pool_id -> Uuid, - } -} - -table! { - candidate_regions { - id -> Uuid, - time_created -> Timestamptz, - time_modified -> Timestamptz, - - dataset_id -> Uuid, - volume_id -> Uuid, - - block_size -> Int8, - blocks_per_extent -> Int8, - extent_count -> Int8, - } -} - -table! { - proposed_dataset_changes { - id -> Uuid, - pool_id -> Uuid, - size_used_delta -> Int8, - } -} - -table! { - old_zpool_usage (pool_id) { - pool_id -> Uuid, - size_used -> Numeric, - } -} - -table! { - candidate_zpools (pool_id) { - pool_id -> Uuid - } -} - -table! { - do_insert (insert) { - insert -> Bool, - } -} - -table! { - one_zpool_per_sled (pool_id) { - pool_id -> Uuid - } -} - -table! { - one_dataset_per_zpool { - id -> Uuid, - pool_id -> Uuid - } -} - -table! { - inserted_regions { - id -> Uuid, - time_created -> Timestamptz, - time_modified -> Timestamptz, - - dataset_id -> Uuid, - volume_id -> Uuid, - - block_size -> Int8, - blocks_per_extent -> Int8, - extent_count -> Int8, - } -} - -table! { - updated_datasets (id) { - id -> Uuid, - time_created -> Timestamptz, - time_modified -> Timestamptz, - time_deleted -> Nullable, - rcgen -> Int8, - - pool_id -> Uuid, - - ip -> Inet, - port -> Int4, - - kind -> crate::DatasetKindEnum, - size_used -> Nullable, - } -} - -diesel::allow_tables_to_appear_in_same_query!( - proposed_dataset_changes, - dataset, -); - -diesel::allow_tables_to_appear_in_same_query!( - do_insert, - candidate_regions, - dataset, - zpool, -); - -diesel::allow_tables_to_appear_in_same_query!( - old_zpool_usage, - zpool, - sled, - proposed_dataset_changes, -); - -diesel::allow_tables_to_appear_in_same_query!(old_regions, dataset,); -diesel::allow_tables_to_appear_in_same_query!(old_regions, zpool,); - -diesel::allow_tables_to_appear_in_same_query!( - inserted_regions, - updated_datasets, -); - -diesel::allow_tables_to_appear_in_same_query!(candidate_zpools, dataset,); -diesel::allow_tables_to_appear_in_same_query!(candidate_zpools, zpool,); -diesel::allow_tables_to_appear_in_same_query!(candidate_datasets, dataset); - -// == Needed for random region allocation == - -pub mod cockroach_md5 { - pub mod functions { - use diesel::sql_types::*; - diesel::sql_function!(fn md5(x: Bytea) -> Bytea); - } - - pub mod helper_types { - pub type Md5 = super::functions::md5::HelperType; - } - - pub mod dsl { - pub use super::functions::*; - pub use super::helper_types::*; - } -} - -// == End random region allocation dependencies == diff --git a/nexus/db-queries/Cargo.toml b/nexus/db-queries/Cargo.toml index 595280780e..5f99b904fc 100644 --- a/nexus/db-queries/Cargo.toml +++ b/nexus/db-queries/Cargo.toml @@ -15,6 +15,7 @@ base64.workspace = true bb8.workspace = true camino.workspace = true chrono.workspace = true +const_format.workspace = true cookie.workspace = true diesel.workspace = true diesel-dtrace.workspace = true diff --git a/nexus/db-queries/src/db/cast_uuid_as_bytea.rs b/nexus/db-queries/src/db/cast_uuid_as_bytea.rs deleted file mode 100644 index c50c88971f..0000000000 --- a/nexus/db-queries/src/db/cast_uuid_as_bytea.rs +++ /dev/null @@ -1,62 +0,0 @@ -// This Source Code Form is subject to the terms of the Mozilla Public -// License, v. 2.0. If a copy of the MPL was not distributed with this -// file, You can obtain one at https://mozilla.org/MPL/2.0/. - -//! Cast UUID to BYTES - -use diesel::expression::ValidGrouping; -use diesel::pg::Pg; -use diesel::query_builder::AstPass; -use diesel::query_builder::QueryFragment; -use diesel::query_builder::QueryId; -use diesel::Expression; -use diesel::SelectableExpression; - -/// Cast an expression which evaluates to a Uuid and cast it to a Bytea. It's -/// that simple! -#[derive(ValidGrouping, QueryId)] -pub struct CastUuidToBytea { - expression: E, -} - -impl CastUuidToBytea -where - E: Expression, -{ - pub const fn new(expression: E) -> Self { - Self { expression } - } -} - -impl Expression for CastUuidToBytea -where - E: Expression, -{ - type SqlType = diesel::sql_types::Bytea; -} - -impl diesel::AppearsOnTable for CastUuidToBytea where - E: diesel::AppearsOnTable -{ -} - -impl SelectableExpression for CastUuidToBytea where - E: SelectableExpression -{ -} - -impl QueryFragment for CastUuidToBytea -where - E: QueryFragment, -{ - fn walk_ast<'a>( - &'a self, - mut out: AstPass<'_, 'a, Pg>, - ) -> diesel::QueryResult<()> { - out.push_sql("CAST("); - self.expression.walk_ast(out.reborrow())?; - out.push_sql(" as BYTEA)"); - - Ok(()) - } -} diff --git a/nexus/db-queries/src/db/column_walker.rs b/nexus/db-queries/src/db/column_walker.rs index 64c3b450c8..cace2ba5fb 100644 --- a/nexus/db-queries/src/db/column_walker.rs +++ b/nexus/db-queries/src/db/column_walker.rs @@ -4,6 +4,7 @@ //! CTE utility for iterating over all columns in a table. +use crate::db::raw_query_builder::TrustedStr; use diesel::prelude::*; use std::marker::PhantomData; @@ -17,14 +18,30 @@ pub(crate) struct ColumnWalker { remaining: PhantomData, } +pub type AllColumnsOf = ColumnWalker<::AllColumns>; + impl ColumnWalker { - pub fn new() -> Self { + pub const fn new() -> Self { Self { remaining: PhantomData } } } macro_rules! impl_column_walker { ( $len:literal $($column:ident)+ ) => ( + #[allow(dead_code)] + impl<$($column: Column),+> ColumnWalker<($($column,)+)> { + pub fn with_prefix(prefix: &'static str) -> TrustedStr { + // This string is derived from: + // - The "table" type, with associated columns, which + // are not controlled by an arbitrary user, and + // - The "prefix" type, which is a "&'static str" (AKA, + // hopefully known at compile-time, and not leaked). + TrustedStr::i_take_responsibility_for_validating_this_string( + [$([prefix, $column::NAME].join("."),)+].join(", ") + ) + } + } + impl<$($column: Column),+> IntoIterator for ColumnWalker<($($column,)+)> { type Item = &'static str; type IntoIter = std::array::IntoIter; @@ -109,4 +126,12 @@ mod test { assert_eq!(iter.next(), Some("value")); assert_eq!(iter.next(), None); } + + #[test] + fn test_all_columns_with_prefix() { + assert_eq!( + AllColumnsOf::::with_prefix("foo").as_str(), + "foo.id, foo.value, foo.time_deleted" + ); + } } diff --git a/nexus/db-queries/src/db/datastore/mod.rs b/nexus/db-queries/src/db/datastore/mod.rs index 2a3f969183..0020cf99b3 100644 --- a/nexus/db-queries/src/db/datastore/mod.rs +++ b/nexus/db-queries/src/db/datastore/mod.rs @@ -949,10 +949,21 @@ mod test { assert_eq!(expected_region_count, dataset_and_regions.len()); let mut disk_datasets = HashSet::new(); let mut disk_zpools = HashSet::new(); + let mut regions = HashSet::new(); for (dataset, region) in dataset_and_regions { // Must be 3 unique datasets assert!(disk_datasets.insert(dataset.id())); + // All regions should be unique + assert!(regions.insert(region.id())); + + // Check there's no cross contamination between returned UUIDs + // + // This is a little goofy, but it catches a bug that has + // happened before. The returned columns share names (like + // "id"), so we need to process them in-order. + assert!(regions.get(&dataset.id()).is_none()); + assert!(disk_datasets.get(®ion.id()).is_none()); // Dataset must not be eligible for provisioning. if let Some(kind) = diff --git a/nexus/db-queries/src/db/datastore/region.rs b/nexus/db-queries/src/db/datastore/region.rs index 52e0ce4d88..ad89a9ca93 100644 --- a/nexus/db-queries/src/db/datastore/region.rs +++ b/nexus/db-queries/src/db/datastore/region.rs @@ -128,7 +128,7 @@ impl DataStore { let (blocks_per_extent, extent_count) = Self::get_crucible_allocation(&block_size, size); - let query = crate::db::queries::region_allocation::RegionAllocate::new( + let query = crate::db::queries::region_allocation::allocation_query( volume_id, block_size.to_bytes() as u64, blocks_per_extent, @@ -141,6 +141,12 @@ impl DataStore { crate::db::queries::region_allocation::from_diesel(e) })?; + info!( + self.log, + "Allocated regions for volume"; + "volume_id" => %volume_id, + "datasets_and_regions" => ?dataset_and_regions, + ); Ok(dataset_and_regions) } diff --git a/nexus/db-queries/src/db/mod.rs b/nexus/db-queries/src/db/mod.rs index d5262166ee..9b3d71970c 100644 --- a/nexus/db-queries/src/db/mod.rs +++ b/nexus/db-queries/src/db/mod.rs @@ -5,7 +5,6 @@ //! Facilities for working with the Omicron database pub(crate) mod alias; -pub(crate) mod cast_uuid_as_bytea; // This is not intended to be public, but this is necessary to use it from // doctests pub mod collection_attach; @@ -29,6 +28,7 @@ mod pool_connection; // This is marked public because the error types are used elsewhere, e.g., in // sagas. pub mod queries; +mod raw_query_builder; mod saga_recovery; mod sec_store; pub mod subquery; diff --git a/nexus/db-queries/src/db/queries/region_allocation.rs b/nexus/db-queries/src/db/queries/region_allocation.rs index a657d21c97..2e4f4cd776 100644 --- a/nexus/db-queries/src/db/queries/region_allocation.rs +++ b/nexus/db-queries/src/db/queries/region_allocation.rs @@ -4,35 +4,22 @@ //! Implementation of queries for provisioning regions. -use crate::db::alias::ExpressionAlias; -use crate::db::cast_uuid_as_bytea::CastUuidToBytea; +use crate::db::column_walker::AllColumnsOf; use crate::db::datastore::REGION_REDUNDANCY_THRESHOLD; -use crate::db::model::{Dataset, DatasetKind, Region}; -use crate::db::pool::DbConnection; -use crate::db::subquery::{AsQuerySource, Cte, CteBuilder, CteQuery}; -use crate::db::true_or_cast_error::{matches_sentinel, TrueOrCastError}; -use db_macros::Subquery; +use crate::db::model::{Dataset, Region}; +use crate::db::raw_query_builder::{QueryBuilder, TypedSqlQuery}; +use crate::db::schema; +use crate::db::true_or_cast_error::matches_sentinel; +use const_format::concatcp; use diesel::pg::Pg; -use diesel::query_builder::{AstPass, Query, QueryFragment, QueryId}; use diesel::result::Error as DieselError; -use diesel::PgBinaryExpressionMethods; -use diesel::{ - sql_types, BoolExpressionMethods, CombineDsl, ExpressionMethods, - Insertable, IntoSql, JoinOnDsl, NullableExpressionMethods, QueryDsl, - RunQueryDsl, -}; +use diesel::sql_types; use nexus_config::RegionAllocationStrategy; -use nexus_db_model::queries::region_allocation::{ - candidate_datasets, candidate_regions, candidate_zpools, cockroach_md5, - do_insert, inserted_regions, old_regions, old_zpool_usage, - proposed_dataset_changes, shuffled_candidate_datasets, updated_datasets, -}; -use nexus_db_model::schema; -use nexus_db_model::to_db_sled_policy; -use nexus_db_model::SledState; -use nexus_types::external_api::views::SledPolicy; use omicron_common::api::external; +type AllColumnsOfRegion = AllColumnsOf; +type AllColumnsOfDataset = AllColumnsOf; + const NOT_ENOUGH_DATASETS_SENTINEL: &'static str = "Not enough datasets"; const NOT_ENOUGH_ZPOOL_SPACE_SENTINEL: &'static str = "Not enough space"; const NOT_ENOUGH_UNIQUE_ZPOOLS_SENTINEL: &'static str = @@ -77,611 +64,345 @@ pub fn from_diesel(e: DieselError) -> external::Error { error::public_error_from_diesel(e, error::ErrorHandler::Server) } -/// A subquery to find all old regions associated with a particular volume. -#[derive(Subquery, QueryId)] -#[subquery(name = old_regions)] -struct OldRegions { - query: Box>, -} - -impl OldRegions { - fn new(volume_id: uuid::Uuid) -> Self { - use crate::db::schema::region::dsl; - Self { - query: Box::new(dsl::region.filter(dsl::volume_id.eq(volume_id))), - } - } -} - -/// A subquery to find datasets which could be used for provisioning regions. -/// -/// We only consider datasets which are already allocated as "Crucible". -/// This implicitly distinguishes between "M.2s" and "U.2s" -- Nexus needs to -/// determine during dataset provisioning which devices should be considered for -/// usage as Crucible storage. -/// -/// We select only one dataset from each zpool. -#[derive(Subquery, QueryId)] -#[subquery(name = candidate_datasets)] -struct CandidateDatasets { - query: Box>, -} - -impl CandidateDatasets { - fn new(candidate_zpools: &CandidateZpools, seed: u128) -> Self { - use crate::db::schema::dataset::dsl as dataset_dsl; - use candidate_zpools::dsl as candidate_zpool_dsl; - - let seed_bytes = seed.to_le_bytes(); - - let query: Box> = - Box::new( - dataset_dsl::dataset - .inner_join(candidate_zpools.query_source().on( - dataset_dsl::pool_id.eq(candidate_zpool_dsl::pool_id), - )) - .filter(dataset_dsl::time_deleted.is_null()) - .filter(dataset_dsl::size_used.is_not_null()) - .filter(dataset_dsl::kind.eq(DatasetKind::Crucible)) - .distinct_on(dataset_dsl::pool_id) - .order_by(( - dataset_dsl::pool_id, - cockroach_md5::dsl::md5( - CastUuidToBytea::new(dataset_dsl::id) - .concat(seed_bytes.to_vec()), - ), - )) - .select((dataset_dsl::id, dataset_dsl::pool_id)), - ); - Self { query } - } -} - -/// Shuffle the candidate datasets, and select REGION_REDUNDANCY_THRESHOLD -/// regions from it. -#[derive(Subquery, QueryId)] -#[subquery(name = shuffled_candidate_datasets)] -struct ShuffledCandidateDatasets { - query: Box>, -} - -impl ShuffledCandidateDatasets { - fn new(candidate_datasets: &CandidateDatasets, seed: u128) -> Self { - use candidate_datasets::dsl as candidate_datasets_dsl; - - let seed_bytes = seed.to_le_bytes(); - - let query: Box> = - Box::new( - candidate_datasets - .query_source() - // We order by md5 to shuffle the ordering of the datasets. - // md5 has a uniform output distribution so it does the job. - .order(cockroach_md5::dsl::md5( - CastUuidToBytea::new(candidate_datasets_dsl::id) - .concat(seed_bytes.to_vec()), - )) - .select(( - candidate_datasets_dsl::id, - candidate_datasets_dsl::pool_id, - )) - .limit(REGION_REDUNDANCY_THRESHOLD.try_into().unwrap()), - ); - Self { query } - } -} - -/// A subquery to create the regions-to-be-inserted for the volume. -#[derive(Subquery, QueryId)] -#[subquery(name = candidate_regions)] -struct CandidateRegions { - query: Box>, -} - -diesel::sql_function!(fn gen_random_uuid() -> Uuid); -diesel::sql_function!(fn now() -> Timestamptz); - -impl CandidateRegions { - fn new( - shuffled_candidate_datasets: &ShuffledCandidateDatasets, - volume_id: uuid::Uuid, - block_size: u64, - blocks_per_extent: u64, - extent_count: u64, - ) -> Self { - use schema::region; - use shuffled_candidate_datasets::dsl as shuffled_candidate_datasets_dsl; - - let volume_id = volume_id.into_sql::(); - let block_size = (block_size as i64).into_sql::(); - let blocks_per_extent = - (blocks_per_extent as i64).into_sql::(); - let extent_count = - (extent_count as i64).into_sql::(); - Self { - query: Box::new(shuffled_candidate_datasets.query_source().select( - ( - ExpressionAlias::new::(gen_random_uuid()), - ExpressionAlias::new::(now()), - ExpressionAlias::new::(now()), - ExpressionAlias::new::( - shuffled_candidate_datasets_dsl::id, - ), - ExpressionAlias::new::(volume_id), - ExpressionAlias::new::(block_size), - ExpressionAlias::new::( - blocks_per_extent, - ), - ExpressionAlias::new::(extent_count), - ), - )), - } - } -} - -/// A subquery which summarizes the changes we intend to make, showing: -/// -/// 1. Which datasets will have size adjustments -/// 2. Which pools those datasets belong to -/// 3. The delta in size-used -#[derive(Subquery, QueryId)] -#[subquery(name = proposed_dataset_changes)] -struct ProposedChanges { - query: Box>, -} - -impl ProposedChanges { - fn new(candidate_regions: &CandidateRegions) -> Self { - use crate::db::schema::dataset::dsl as dataset_dsl; - use candidate_regions::dsl as candidate_regions_dsl; - Self { - query: Box::new( - candidate_regions.query_source() - .inner_join( - dataset_dsl::dataset.on(dataset_dsl::id.eq(candidate_regions_dsl::dataset_id)) - ) - .select(( - ExpressionAlias::new::(candidate_regions_dsl::dataset_id), - ExpressionAlias::new::(dataset_dsl::pool_id), - ExpressionAlias::new::( - candidate_regions_dsl::block_size * - candidate_regions_dsl::blocks_per_extent * - candidate_regions_dsl::extent_count - ), - )) - ), - } - } -} - -/// A subquery which calculates the old size being used by zpools -/// under consideration as targets for region allocation. -#[derive(Subquery, QueryId)] -#[subquery(name = old_zpool_usage)] -struct OldPoolUsage { - query: Box>, -} - -impl OldPoolUsage { - fn new() -> Self { - use crate::db::schema::dataset::dsl as dataset_dsl; - Self { - query: Box::new( - dataset_dsl::dataset - .group_by(dataset_dsl::pool_id) - .filter(dataset_dsl::size_used.is_not_null()) - .filter(dataset_dsl::time_deleted.is_null()) - .select(( - dataset_dsl::pool_id, - ExpressionAlias::new::( - diesel::dsl::sum(dataset_dsl::size_used) - .assume_not_null(), - ), - )), - ), - } - } -} - -/// A subquery which identifies zpools with enough space for a region allocation. -#[derive(Subquery, QueryId)] -#[subquery(name = candidate_zpools)] -struct CandidateZpools { - query: Box>, -} - -impl CandidateZpools { - fn new( - old_zpool_usage: &OldPoolUsage, - zpool_size_delta: u64, - seed: u128, - distinct_sleds: bool, - ) -> Self { - use schema::sled::dsl as sled_dsl; - use schema::zpool::dsl as zpool_dsl; - - // Why are we using raw `diesel::dsl::sql` here? - // - // When SQL performs the "SUM" operation on "bigint" type, the result - // is promoted to "numeric" (see: old_zpool_usage::dsl::size_used). - // - // However, we'd like to compare that value with a different value - // (zpool_dsl::total_size) which is still a "bigint". This comparison - // is safe (after all, we basically want to promote "total_size" to a - // Numeric too) but Diesel demands that the input and output SQL types - // of expression methods like ".le" match exactly. - // - // For similar reasons, we use `diesel::dsl::sql` with zpool_size_delta. - // We would like to add it, but diesel only permits us to `to_sql()` it - // into a BigInt, not a Numeric. I welcome a better solution. - let it_will_fit = (old_zpool_usage::dsl::size_used - + diesel::dsl::sql(&zpool_size_delta.to_string())) - .le(diesel::dsl::sql( - "(SELECT total_size FROM omicron.public.inv_zpool WHERE - inv_zpool.id = old_zpool_usage.pool_id - ORDER BY inv_zpool.time_collected DESC LIMIT 1)", - )); - - // We need to join on the sled table to access provision_state. - let with_sled = sled_dsl::sled.on(zpool_dsl::sled_id.eq(sled_dsl::id)); - let with_zpool = zpool_dsl::zpool - .on(zpool_dsl::id.eq(old_zpool_usage::dsl::pool_id)) - .inner_join(with_sled); - - let sled_is_provisionable = sled_dsl::sled_policy - .eq(to_db_sled_policy(SledPolicy::provisionable())); - let sled_is_active = sled_dsl::sled_state.eq(SledState::Active); - - let base_query = old_zpool_usage - .query_source() - .inner_join(with_zpool) - .filter(it_will_fit) - .filter(sled_is_provisionable) - .filter(sled_is_active) - .select((old_zpool_usage::dsl::pool_id,)); - - let query = if distinct_sleds { - let seed_bytes = seed.to_le_bytes(); - - let query: Box> = - Box::new( - base_query - .order_by(( - zpool_dsl::sled_id, - cockroach_md5::dsl::md5( - CastUuidToBytea::new(zpool_dsl::id) - .concat(seed_bytes.to_vec()), - ), - )) - .distinct_on(zpool_dsl::sled_id), - ); - - query - } else { - let query: Box> = - Box::new(base_query); +type SelectableSql = < + >::SelectExpression as diesel::Expression +>::SqlType; - query +pub fn allocation_query( + volume_id: uuid::Uuid, + block_size: u64, + blocks_per_extent: u64, + extent_count: u64, + allocation_strategy: &RegionAllocationStrategy, +) -> TypedSqlQuery<(SelectableSql, SelectableSql)> { + let (seed, distinct_sleds) = { + let (input_seed, distinct_sleds) = match allocation_strategy { + RegionAllocationStrategy::Random { seed } => (seed, false), + RegionAllocationStrategy::RandomWithDistinctSleds { seed } => { + (seed, true) + } }; - - Self { query } - } -} - -diesel::sql_function! { - #[aggregate] - fn bool_and(b: sql_types::Bool) -> sql_types::Bool; -} - -/// A subquery which confirms whether or not the insertion and updates should -/// occur. -/// -/// This subquery additionally exits the CTE early with an error if either: -/// 1. Not enough datasets exist to provision regions with our required -/// redundancy, or -/// 2. Not enough space exists on zpools to perform the provisioning. -#[derive(Subquery, QueryId)] -#[subquery(name = do_insert)] -struct DoInsert { - query: Box>, -} - -impl DoInsert { - fn new( - old_regions: &OldRegions, - candidate_regions: &CandidateRegions, - candidate_zpools: &CandidateZpools, - ) -> Self { - let redundancy = REGION_REDUNDANCY_THRESHOLD as i64; - let not_allocated_yet = old_regions - .query_source() - .count() - .single_value() - .assume_not_null() - .lt(redundancy); - - let enough_candidate_zpools = candidate_zpools - .query_source() - .count() - .single_value() - .assume_not_null() - .ge(redundancy); - - let enough_candidate_regions = candidate_regions - .query_source() - .count() - .single_value() - .assume_not_null() - .ge(redundancy); - - // We want to ensure that we do not allocate on two datasets in the same - // zpool, for two reasons - // - Data redundancy: If a drive fails it should only take one of the 3 - // regions with it - // - Risk of overallocation: We only check that each zpool as enough - // room for one region, so we should not allocate more than one region - // to it. - // - // Selecting two datasets on the same zpool will not initially be - // possible, as at the time of writing each zpool only has one dataset. - // Additionally, we intend to modify the allocation strategy to select - // from 3 distinct sleds, removing the possibility entirely. But, if we - // introduce a change that adds another crucible dataset to zpools - // before we improve the allocation strategy, this check will make sure - // we don't violate drive redundancy, and generate an error instead. - use crate::db::schema::dataset::dsl as dataset_dsl; - use candidate_regions::dsl as candidate_dsl; - let enough_unique_candidate_zpools = candidate_regions - .query_source() - .inner_join( - dataset_dsl::dataset - .on(candidate_dsl::dataset_id.eq(dataset_dsl::id)), - ) - .select(diesel::dsl::count_distinct(dataset_dsl::pool_id)) - .single_value() - .assume_not_null() - .ge(redundancy); - - Self { - query: Box::new(diesel::select((ExpressionAlias::new::< - do_insert::insert, - >( - not_allocated_yet - .and(TrueOrCastError::new( - enough_candidate_zpools, - NOT_ENOUGH_ZPOOL_SPACE_SENTINEL, - )) - .and(TrueOrCastError::new( - enough_candidate_regions, - NOT_ENOUGH_DATASETS_SENTINEL, - )) - .and(TrueOrCastError::new( - enough_unique_candidate_zpools, - NOT_ENOUGH_UNIQUE_ZPOOLS_SENTINEL, - )), - ),))), - } - } -} - -/// A subquery which actually inserts the regions. -#[derive(Subquery, QueryId)] -#[subquery(name = inserted_regions)] -struct InsertRegions { - query: Box>, -} - -impl InsertRegions { - fn new(do_insert: &DoInsert, candidate_regions: &CandidateRegions) -> Self { - use crate::db::schema::region; - - Self { - query: Box::new( - candidate_regions - .query_source() - .select(candidate_regions::all_columns) - .filter( - do_insert - .query_source() - .select(do_insert::insert) - .single_value() - .assume_not_null(), - ) - .insert_into(region::table) - .returning(region::all_columns), + ( + input_seed.map_or_else( + || { + std::time::SystemTime::now() + .duration_since(std::time::UNIX_EPOCH) + .unwrap() + .as_nanos() + }, + |seed| seed as u128, ), - } - } -} - -/// A subquery which updates dataset size usage based on inserted regions. -#[derive(Subquery, QueryId)] -#[subquery(name = updated_datasets)] -struct UpdateDatasets { - query: Box>, -} - -impl UpdateDatasets { - fn new( - do_insert: &DoInsert, - proposed_dataset_changes: &ProposedChanges, - ) -> Self { - use crate::db::schema::dataset::dsl as dataset_dsl; - - let datasets_with_updates = proposed_dataset_changes - .query_source() - .select(proposed_dataset_changes::columns::id) - .into_boxed(); - - Self { - query: Box::new( - diesel::update( - dataset_dsl::dataset.filter( - dataset_dsl::id.eq_any(datasets_with_updates) - ) - ) - .filter( - do_insert.query_source() - .select(do_insert::insert) - .single_value() - .assume_not_null() - ) - .set( - dataset_dsl::size_used.eq( - dataset_dsl::size_used + proposed_dataset_changes.query_source() - .filter(proposed_dataset_changes::columns::id.eq(dataset_dsl::id)) - .select(proposed_dataset_changes::columns::size_used_delta) - .single_value() - ) - ) - .returning(crate::db::schema::dataset::all_columns) - ) - } + distinct_sleds, + ) + }; + + let seed = seed.to_le_bytes().to_vec(); + + let size_delta = block_size * blocks_per_extent * extent_count; + let redundancy: i64 = i64::try_from(REGION_REDUNDANCY_THRESHOLD).unwrap(); + + let builder = QueryBuilder::new().sql( + // Find all old regions associated with a particular volume +"WITH + old_regions AS ( + SELECT ").sql(AllColumnsOfRegion::with_prefix("region")).sql(" + FROM region WHERE (region.volume_id = ").param().sql(")),") + .bind::(volume_id) + + // Calculates the old size being used by zpools under consideration as targets for region + // allocation. + .sql(" + old_zpool_usage AS ( + SELECT + dataset.pool_id, + sum(dataset.size_used) AS size_used + FROM dataset WHERE ((dataset.size_used IS NOT NULL) AND (dataset.time_deleted IS NULL)) GROUP BY dataset.pool_id),") + .sql(" + candidate_zpools AS ("); + + // Identifies zpools with enough space for region allocation. + // + // NOTE: 'distinct_sleds' changes the format of the underlying SQL query, as it uses + // distinct bind parameters depending on the conditional branch. + let builder = if distinct_sleds { + builder.sql("SELECT DISTINCT ON (zpool.sled_id) ") + } else { + builder.sql("SELECT ") + }; + let builder = builder.sql(" + old_zpool_usage.pool_id + FROM ( + old_zpool_usage + INNER JOIN + (zpool INNER JOIN sled ON (zpool.sled_id = sled.id)) ON (zpool.id = old_zpool_usage.pool_id) + ) + WHERE ( + ((old_zpool_usage.size_used + ").param().sql(" ) <= + (SELECT total_size FROM omicron.public.inv_zpool WHERE + inv_zpool.id = old_zpool_usage.pool_id + ORDER BY inv_zpool.time_collected DESC LIMIT 1) + ) + AND + (sled.sled_policy = 'in_service') + AND + (sled.sled_state = 'active') + )" + ).bind::(size_delta as i64); + + let builder = if distinct_sleds { + builder + .sql("ORDER BY zpool.sled_id, md5((CAST(zpool.id as BYTEA) || ") + .param() + .sql("))") + .bind::(seed.clone()) + } else { + builder } + .sql("),"); + + // Find datasets which could be used for provisioning regions. + // + // We only consider datasets which are already allocated as "Crucible". + // This implicitly distinguishes between "M.2s" and "U.2s" -- Nexus needs to + // determine during dataset provisioning which devices should be considered for + // usage as Crucible storage. + // + // We select only one dataset from each zpool. + builder.sql(" + candidate_datasets AS ( + SELECT DISTINCT ON (dataset.pool_id) + dataset.id, + dataset.pool_id + FROM (dataset INNER JOIN candidate_zpools ON (dataset.pool_id = candidate_zpools.pool_id)) + WHERE ( + ((dataset.time_deleted IS NULL) AND + (dataset.size_used IS NOT NULL)) AND + (dataset.kind = 'crucible') + ) + ORDER BY dataset.pool_id, md5((CAST(dataset.id as BYTEA) || ").param().sql(")) + ),") + .bind::(seed.clone()) + // We order by md5 to shuffle the ordering of the datasets. + // md5 has a uniform output distribution so it does the job. + .sql(" + shuffled_candidate_datasets AS ( + SELECT + candidate_datasets.id, + candidate_datasets.pool_id + FROM candidate_datasets + ORDER BY md5((CAST(candidate_datasets.id as BYTEA) || ").param().sql(")) LIMIT ").param().sql(" + ),") + .bind::(seed) + .bind::(redundancy) + // Create the regions-to-be-inserted for the volume. + .sql(" + candidate_regions AS ( + SELECT + gen_random_uuid() AS id, + now() AS time_created, + now() AS time_modified, + shuffled_candidate_datasets.id AS dataset_id, + ").param().sql(" AS volume_id, + ").param().sql(" AS block_size, + ").param().sql(" AS blocks_per_extent, + ").param().sql(" AS extent_count + FROM shuffled_candidate_datasets + ),") + .bind::(volume_id) + .bind::(block_size as i64) + .bind::(blocks_per_extent as i64) + .bind::(extent_count as i64) + // A subquery which summarizes the changes we intend to make, showing: + // + // 1. Which datasets will have size adjustments + // 2. Which pools those datasets belong to + // 3. The delta in size-used + .sql(" + proposed_dataset_changes AS ( + SELECT + candidate_regions.dataset_id AS id, + dataset.pool_id AS pool_id, + ((candidate_regions.block_size * candidate_regions.blocks_per_extent) * candidate_regions.extent_count) AS size_used_delta + FROM (candidate_regions INNER JOIN dataset ON (dataset.id = candidate_regions.dataset_id)) + ),") + // Confirms whether or not the insertion and updates should + // occur. + // + // This subquery additionally exits the CTE early with an error if either: + // 1. Not enough datasets exist to provision regions with our required + // redundancy, or + // 2. Not enough space exists on zpools to perform the provisioning. + // + // We want to ensure that we do not allocate on two datasets in the same + // zpool, for two reasons + // - Data redundancy: If a drive fails it should only take one of the 3 + // regions with it + // - Risk of overallocation: We only check that each zpool as enough + // room for one region, so we should not allocate more than one region + // to it. + // + // Selecting two datasets on the same zpool will not initially be + // possible, as at the time of writing each zpool only has one dataset. + // Additionally, provide a configuration option ("distinct_sleds") to modify + // the allocation strategy to select from 3 distinct sleds, removing the + // possibility entirely. But, if we introduce a change that adds another + // crucible dataset to zpools before we improve the allocation strategy, + // this check will make sure we don't violate drive redundancy, and generate + // an error instead. + .sql(" + do_insert AS ( + SELECT ((( + ((SELECT COUNT(*) FROM old_regions LIMIT 1) < ").param().sql(") AND + CAST(IF(((SELECT COUNT(*) FROM candidate_zpools LIMIT 1) >= ").param().sql(concatcp!("), 'TRUE', '", NOT_ENOUGH_ZPOOL_SPACE_SENTINEL, "') AS BOOL)) AND + CAST(IF(((SELECT COUNT(*) FROM candidate_regions LIMIT 1) >= ")).param().sql(concatcp!("), 'TRUE', '", NOT_ENOUGH_DATASETS_SENTINEL, "') AS BOOL)) AND + CAST(IF(((SELECT COUNT(DISTINCT dataset.pool_id) FROM (candidate_regions INNER JOIN dataset ON (candidate_regions.dataset_id = dataset.id)) LIMIT 1) >= ")).param().sql(concatcp!("), 'TRUE', '", NOT_ENOUGH_UNIQUE_ZPOOLS_SENTINEL, "') AS BOOL) + ) AS insert + ),")) + .bind::(redundancy) + .bind::(redundancy) + .bind::(redundancy) + .bind::(redundancy) + .sql(" + inserted_regions AS ( + INSERT INTO region + (id, time_created, time_modified, dataset_id, volume_id, block_size, blocks_per_extent, extent_count) + SELECT ").sql(AllColumnsOfRegion::with_prefix("candidate_regions")).sql(" + FROM candidate_regions + WHERE + (SELECT do_insert.insert FROM do_insert LIMIT 1) + RETURNING ").sql(AllColumnsOfRegion::with_prefix("region")).sql(" + ), + updated_datasets AS ( + UPDATE dataset SET + size_used = (dataset.size_used + (SELECT proposed_dataset_changes.size_used_delta FROM proposed_dataset_changes WHERE (proposed_dataset_changes.id = dataset.id) LIMIT 1)) + WHERE ( + (dataset.id = ANY(SELECT proposed_dataset_changes.id FROM proposed_dataset_changes)) AND + (SELECT do_insert.insert FROM do_insert LIMIT 1)) + RETURNING ").sql(AllColumnsOfDataset::with_prefix("dataset")).sql(" + ) +( + SELECT ") + .sql(AllColumnsOfDataset::with_prefix("dataset")) + .sql(", ") + .sql(AllColumnsOfRegion::with_prefix("old_regions")).sql(" + FROM + (old_regions INNER JOIN dataset ON (old_regions.dataset_id = dataset.id)) +) +UNION +( + SELECT ") + .sql(AllColumnsOfDataset::with_prefix("updated_datasets")) + .sql(", ") + .sql(AllColumnsOfRegion::with_prefix("inserted_regions")).sql(" + FROM (inserted_regions INNER JOIN updated_datasets ON (inserted_regions.dataset_id = updated_datasets.id)) +)" + ).query() } -/// Constructs a CTE for allocating new regions, and updating the datasets to -/// which those regions belong. -#[derive(QueryId)] -pub struct RegionAllocate { - cte: Cte, -} - -impl RegionAllocate { - pub fn new( - volume_id: uuid::Uuid, - block_size: u64, - blocks_per_extent: u64, - extent_count: u64, - allocation_strategy: &RegionAllocationStrategy, - ) -> Self { - let (seed, distinct_sleds) = { - let (input_seed, distinct_sleds) = match allocation_strategy { - RegionAllocationStrategy::Random { seed } => (seed, false), - RegionAllocationStrategy::RandomWithDistinctSleds { seed } => { - (seed, true) - } - }; - ( - input_seed.map_or_else( - || { - std::time::SystemTime::now() - .duration_since(std::time::UNIX_EPOCH) - .unwrap() - .as_nanos() - }, - |seed| seed as u128, - ), - distinct_sleds, - ) - }; - - let size_delta = block_size * blocks_per_extent * extent_count; - - let old_regions = OldRegions::new(volume_id); - - let old_pool_usage = OldPoolUsage::new(); - let candidate_zpools = CandidateZpools::new( - &old_pool_usage, - size_delta, - seed, - distinct_sleds, +#[cfg(test)] +mod test { + use super::*; + use crate::db::explain::ExplainableAsync; + use nexus_test_utils::db::test_setup_database; + use omicron_test_utils::dev; + use uuid::Uuid; + + // This test is a bit of a "change detector", but it's here to help with + // debugging too. If you change this query, it can be useful to see exactly + // how the output SQL has been altered. + #[tokio::test] + async fn expectorate_query() { + let volume_id = Uuid::nil(); + let block_size = 512; + let blocks_per_extent = 4; + let extent_count = 8; + + // First structure: "RandomWithDistinctSleds" + + let region_allocate = allocation_query( + volume_id, + block_size, + blocks_per_extent, + extent_count, + &RegionAllocationStrategy::RandomWithDistinctSleds { + seed: Some(1), + }, + ); + let s = dev::db::format_sql( + &diesel::debug_query::(®ion_allocate).to_string(), + ) + .await + .unwrap(); + expectorate::assert_contents( + "tests/output/region_allocate_distinct_sleds.sql", + &s, ); - let candidate_datasets = - CandidateDatasets::new(&candidate_zpools, seed); - - let shuffled_candidate_datasets = - ShuffledCandidateDatasets::new(&candidate_datasets, seed); + // Second structure: "Random" - let candidate_regions = CandidateRegions::new( - &shuffled_candidate_datasets, + let region_allocate = allocation_query( volume_id, block_size, blocks_per_extent, extent_count, + &RegionAllocationStrategy::Random { seed: Some(1) }, ); - let proposed_changes = ProposedChanges::new(&candidate_regions); - let do_insert = - DoInsert::new(&old_regions, &candidate_regions, &candidate_zpools); - let insert_regions = InsertRegions::new(&do_insert, &candidate_regions); - let updated_datasets = - UpdateDatasets::new(&do_insert, &proposed_changes); - - // Gather together all "(dataset, region)" rows for all regions which - // are allocated to the volume. - // - // This roughly translates to: - // - // old_regions INNER JOIN old_datasets - // UNION - // new_regions INNER JOIN updated_datasets - // - // Note that we cannot simply JOIN the old + new regions, and query for - // their associated datasets: doing so would return the pre-UPDATE - // values of datasets that are updated by this CTE. - let final_select = Box::new( - old_regions - .query_source() - .inner_join( - crate::db::schema::dataset::dsl::dataset - .on(old_regions::dataset_id - .eq(crate::db::schema::dataset::dsl::id)), - ) - .select(( - crate::db::schema::dataset::all_columns, - old_regions::all_columns, - )) - .union( - insert_regions - .query_source() - .inner_join( - updated_datasets::dsl::updated_datasets - .on(inserted_regions::dataset_id - .eq(updated_datasets::id)), - ) - .select(( - updated_datasets::all_columns, - inserted_regions::all_columns, - )), - ), + let s = dev::db::format_sql( + &diesel::debug_query::(®ion_allocate).to_string(), + ) + .await + .unwrap(); + expectorate::assert_contents( + "tests/output/region_allocate_random_sleds.sql", + &s, ); - - let cte = CteBuilder::new() - .add_subquery(old_regions) - .add_subquery(old_pool_usage) - .add_subquery(candidate_zpools) - .add_subquery(candidate_datasets) - .add_subquery(shuffled_candidate_datasets) - .add_subquery(candidate_regions) - .add_subquery(proposed_changes) - .add_subquery(do_insert) - .add_subquery(insert_regions) - .add_subquery(updated_datasets) - .build(final_select); - - Self { cte } } -} -impl QueryFragment for RegionAllocate { - fn walk_ast<'a>( - &'a self, - mut out: AstPass<'_, 'a, Pg>, - ) -> diesel::QueryResult<()> { - out.unsafe_to_cache_prepared(); + // Explain the possible forms of the SQL query to ensure that it + // creates a valid SQL string. + #[tokio::test] + async fn explainable() { + let logctx = dev::test_setup_log("explainable"); + let log = logctx.log.new(o!()); + let mut db = test_setup_database(&log).await; + let cfg = crate::db::Config { url: db.pg_config().clone() }; + let pool = crate::db::Pool::new(&logctx.log, &cfg); + let conn = pool.pool().get().await.unwrap(); + + let volume_id = Uuid::new_v4(); + let block_size = 512; + let blocks_per_extent = 4; + let extent_count = 8; + + // First structure: Explain the query with "RandomWithDistinctSleds" + + let region_allocate = allocation_query( + volume_id, + block_size, + blocks_per_extent, + extent_count, + &RegionAllocationStrategy::RandomWithDistinctSleds { seed: None }, + ); + let _ = region_allocate + .explain_async(&conn) + .await + .expect("Failed to explain query - is it valid SQL?"); - self.cte.walk_ast(out.reborrow())?; - Ok(()) - } -} + // Second structure: Explain the query with "Random" -type SelectableSql = < - >::SelectExpression as diesel::Expression ->::SqlType; + let region_allocate = allocation_query( + volume_id, + block_size, + blocks_per_extent, + extent_count, + &RegionAllocationStrategy::Random { seed: None }, + ); + let _ = region_allocate + .explain_async(&conn) + .await + .expect("Failed to explain query - is it valid SQL?"); -impl Query for RegionAllocate { - type SqlType = (SelectableSql, SelectableSql); + db.cleanup().await.unwrap(); + logctx.cleanup_successful(); + } } - -impl RunQueryDsl for RegionAllocate {} diff --git a/nexus/db-queries/src/db/raw_query_builder.rs b/nexus/db-queries/src/db/raw_query_builder.rs new file mode 100644 index 0000000000..5c803e20ac --- /dev/null +++ b/nexus/db-queries/src/db/raw_query_builder.rs @@ -0,0 +1,195 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Utilities for building string-based queries. +//! +//! These largely side-step Diesel's type system, +//! and are recommended for more complex CTE + +use crate::db::pool::DbConnection; +use diesel::pg::Pg; +use diesel::query_builder::{AstPass, Query, QueryFragment, QueryId}; +use diesel::sql_types; +use diesel::RunQueryDsl; +use std::cell::Cell; +use std::marker::PhantomData; + +// Keeps a counter to "how many bind parameters have been used" to +// aid in the construction of the query string. +struct BindParamCounter(Cell); +impl BindParamCounter { + fn new() -> Self { + Self(0.into()) + } + fn next(&self) -> i32 { + self.0.set(self.0.get() + 1); + self.0.get() + } +} + +/// A "trusted" string, which can be used to construct SQL queries even +/// though it isn't static. We use "trust" to refer to "protection from +/// SQL injections". +/// +/// This is basically a workaround for cases where we haven't yet been +/// able to construct a query at compile-time. +pub struct TrustedStr(TrustedStrVariants); + +impl TrustedStr { + /// Explicitly constructs a string, with a name that hopefully + /// gives callers some pause when calling this API. + /// + /// If arbitrary user input is provided here, this string COULD + /// cause SQL injection attacks, so each call-site should have a + /// justification for "why it's safe". + pub fn i_take_responsibility_for_validating_this_string(s: String) -> Self { + Self(TrustedStrVariants::ValidatedExplicitly(s)) + } + + #[cfg(test)] + pub fn as_str(&self) -> &str { + match &self.0 { + TrustedStrVariants::Static(s) => s, + TrustedStrVariants::ValidatedExplicitly(s) => s.as_str(), + } + } +} + +impl From<&'static str> for TrustedStr { + fn from(s: &'static str) -> Self { + Self(TrustedStrVariants::Static(s)) + } +} + +// This enum should be kept non-pub to make it harder to accidentally +// construct a "ValidatedExplicitly" variant. +enum TrustedStrVariants { + Static(&'static str), + ValidatedExplicitly(String), +} + +trait SqlQueryBinds { + fn add_bind(self, bind_counter: &BindParamCounter) -> Self; +} + +impl<'a, Query> SqlQueryBinds + for diesel::query_builder::BoxedSqlQuery<'a, Pg, Query> +{ + fn add_bind(self, bind_counter: &BindParamCounter) -> Self { + self.sql("$").sql(bind_counter.next().to_string()) + } +} + +type BoxedQuery = diesel::query_builder::BoxedSqlQuery< + 'static, + Pg, + diesel::query_builder::SqlQuery, +>; + +/// A small wrapper around [diesel::query_builder::BoxedSqlQuery] which +/// assists with counting bind parameters and recommends avoiding the usage of +/// any non-static strings in query construction. +// NOTE: I'd really like to eventually be able to construct SQL statements +// entirely at compile-time, but the combination of "const generics" and "const +// fns" in stable Rust just isn't there yet. +// +// It's definitely possible to create static string builders that operate +// entirely at compile-time, like: +// https://play.rust-lang.org/?version=nightly&mode=debug&edition=2021&gist=26d0276648c3315f285372a19d0d492f +// +// But this relies on nightly features. +pub struct QueryBuilder { + query: BoxedQuery, + bind_counter: BindParamCounter, +} + +impl QueryBuilder { + pub fn new() -> Self { + Self { + query: diesel::sql_query("").into_boxed(), + bind_counter: BindParamCounter::new(), + } + } + + /// Identifies that a bind parameter should exist in this location within + /// the SQL string. + /// + /// This should be called the same number of times as [Self::bind]. It is, + /// however, a distinct method, as "identifying bind params" should be + /// decoupled from "using bind parameters" to have an efficient statement + /// cache. + pub fn param(self) -> Self { + Self { + query: self + .query + .sql("$") + .sql(self.bind_counter.next().to_string()), + bind_counter: self.bind_counter, + } + } + + /// Slightly more strict than the "sql" method of Diesel's SqlQuery. + /// Only permits strings which have been validated intentionally to limit + /// susceptibility to SQL injection. + /// + /// See the documentation of [TrustedStr] for more details. + pub fn sql>(self, s: S) -> Self { + let query = match s.into().0 { + TrustedStrVariants::Static(s) => self.query.sql(s), + TrustedStrVariants::ValidatedExplicitly(s) => self.query.sql(s), + }; + Self { query, bind_counter: self.bind_counter } + } + + /// A call-through function to [diesel::query_builder::BoxedSqlQuery]. + pub fn bind(self, b: Value) -> Self + where + Pg: sql_types::HasSqlType, + Value: diesel::serialize::ToSql + Send + 'static, + BindSt: Send + 'static, + { + Self { query: self.query.bind(b), bind_counter: self.bind_counter } + } + + /// Takes the final boxed query + pub fn query(self) -> TypedSqlQuery { + TypedSqlQuery { inner: self.query, _phantom: PhantomData } + } +} + +/// Diesel's [diesel::query_builder::BoxedSqlQuery] has a few drawbacks that +/// make this wrapper more palatable: +/// +/// - It always implements "Query" with SqlType = Untyped, so a caller could try to +/// execute this query and get back any type. +/// - It forces the usage of "QueryableByName", which acts wrong if we're +/// returning multiple columns with the same name (this is normal! If you want +/// to UNION two objects that both have "id" columns, this happens). +#[derive(QueryId)] +pub struct TypedSqlQuery { + inner: diesel::query_builder::BoxedSqlQuery< + 'static, + Pg, + diesel::query_builder::SqlQuery, + >, + _phantom: PhantomData, +} + +impl QueryFragment for TypedSqlQuery { + fn walk_ast<'a>( + &'a self, + mut out: AstPass<'_, 'a, Pg>, + ) -> diesel::QueryResult<()> { + out.unsafe_to_cache_prepared(); + + self.inner.walk_ast(out.reborrow())?; + Ok(()) + } +} + +impl RunQueryDsl for TypedSqlQuery {} + +impl Query for TypedSqlQuery { + type SqlType = T; +} diff --git a/nexus/db-queries/tests/output/region_allocate_distinct_sleds.sql b/nexus/db-queries/tests/output/region_allocate_distinct_sleds.sql new file mode 100644 index 0000000000..7aa85458a6 --- /dev/null +++ b/nexus/db-queries/tests/output/region_allocate_distinct_sleds.sql @@ -0,0 +1,267 @@ +WITH + old_regions + AS ( + SELECT + region.id, + region.time_created, + region.time_modified, + region.dataset_id, + region.volume_id, + region.block_size, + region.blocks_per_extent, + region.extent_count + FROM + region + WHERE + region.volume_id = $1 + ), + old_zpool_usage + AS ( + SELECT + dataset.pool_id, sum(dataset.size_used) AS size_used + FROM + dataset + WHERE + (dataset.size_used IS NOT NULL) AND (dataset.time_deleted IS NULL) + GROUP BY + dataset.pool_id + ), + candidate_zpools + AS ( + SELECT + DISTINCT ON (zpool.sled_id) old_zpool_usage.pool_id + FROM + old_zpool_usage + INNER JOIN (zpool INNER JOIN sled ON zpool.sled_id = sled.id) ON + zpool.id = old_zpool_usage.pool_id + WHERE + (old_zpool_usage.size_used + $2) + <= ( + SELECT + total_size + FROM + omicron.public.inv_zpool + WHERE + inv_zpool.id = old_zpool_usage.pool_id + ORDER BY + inv_zpool.time_collected DESC + LIMIT + 1 + ) + AND sled.sled_policy = 'in_service' + AND sled.sled_state = 'active' + ORDER BY + zpool.sled_id, md5(CAST(zpool.id AS BYTES) || $3) + ), + candidate_datasets + AS ( + SELECT + DISTINCT ON (dataset.pool_id) dataset.id, dataset.pool_id + FROM + dataset INNER JOIN candidate_zpools ON dataset.pool_id = candidate_zpools.pool_id + WHERE + ((dataset.time_deleted IS NULL) AND (dataset.size_used IS NOT NULL)) + AND dataset.kind = 'crucible' + ORDER BY + dataset.pool_id, md5(CAST(dataset.id AS BYTES) || $4) + ), + shuffled_candidate_datasets + AS ( + SELECT + candidate_datasets.id, candidate_datasets.pool_id + FROM + candidate_datasets + ORDER BY + md5(CAST(candidate_datasets.id AS BYTES) || $5) + LIMIT + $6 + ), + candidate_regions + AS ( + SELECT + gen_random_uuid() AS id, + now() AS time_created, + now() AS time_modified, + shuffled_candidate_datasets.id AS dataset_id, + $7 AS volume_id, + $8 AS block_size, + $9 AS blocks_per_extent, + $10 AS extent_count + FROM + shuffled_candidate_datasets + ), + proposed_dataset_changes + AS ( + SELECT + candidate_regions.dataset_id AS id, + dataset.pool_id AS pool_id, + candidate_regions.block_size + * candidate_regions.blocks_per_extent + * candidate_regions.extent_count + AS size_used_delta + FROM + candidate_regions INNER JOIN dataset ON dataset.id = candidate_regions.dataset_id + ), + do_insert + AS ( + SELECT + ( + ( + (SELECT count(*) FROM old_regions LIMIT 1) < $11 + AND CAST( + IF( + ((SELECT count(*) FROM candidate_zpools LIMIT 1) >= $12), + 'TRUE', + 'Not enough space' + ) + AS BOOL + ) + ) + AND CAST( + IF( + ((SELECT count(*) FROM candidate_regions LIMIT 1) >= $13), + 'TRUE', + 'Not enough datasets' + ) + AS BOOL + ) + ) + AND CAST( + IF( + ( + ( + SELECT + count(DISTINCT dataset.pool_id) + FROM + candidate_regions + INNER JOIN dataset ON candidate_regions.dataset_id = dataset.id + LIMIT + 1 + ) + >= $14 + ), + 'TRUE', + 'Not enough unique zpools selected' + ) + AS BOOL + ) + AS insert + ), + inserted_regions + AS ( + INSERT + INTO + region + ( + id, + time_created, + time_modified, + dataset_id, + volume_id, + block_size, + blocks_per_extent, + extent_count + ) + SELECT + candidate_regions.id, + candidate_regions.time_created, + candidate_regions.time_modified, + candidate_regions.dataset_id, + candidate_regions.volume_id, + candidate_regions.block_size, + candidate_regions.blocks_per_extent, + candidate_regions.extent_count + FROM + candidate_regions + WHERE + (SELECT do_insert.insert FROM do_insert LIMIT 1) + RETURNING + region.id, + region.time_created, + region.time_modified, + region.dataset_id, + region.volume_id, + region.block_size, + region.blocks_per_extent, + region.extent_count + ), + updated_datasets + AS ( + UPDATE + dataset + SET + size_used + = dataset.size_used + + ( + SELECT + proposed_dataset_changes.size_used_delta + FROM + proposed_dataset_changes + WHERE + proposed_dataset_changes.id = dataset.id + LIMIT + 1 + ) + WHERE + dataset.id = ANY (SELECT proposed_dataset_changes.id FROM proposed_dataset_changes) + AND (SELECT do_insert.insert FROM do_insert LIMIT 1) + RETURNING + dataset.id, + dataset.time_created, + dataset.time_modified, + dataset.time_deleted, + dataset.rcgen, + dataset.pool_id, + dataset.ip, + dataset.port, + dataset.kind, + dataset.size_used + ) +( + SELECT + dataset.id, + dataset.time_created, + dataset.time_modified, + dataset.time_deleted, + dataset.rcgen, + dataset.pool_id, + dataset.ip, + dataset.port, + dataset.kind, + dataset.size_used, + old_regions.id, + old_regions.time_created, + old_regions.time_modified, + old_regions.dataset_id, + old_regions.volume_id, + old_regions.block_size, + old_regions.blocks_per_extent, + old_regions.extent_count + FROM + old_regions INNER JOIN dataset ON old_regions.dataset_id = dataset.id +) +UNION + ( + SELECT + updated_datasets.id, + updated_datasets.time_created, + updated_datasets.time_modified, + updated_datasets.time_deleted, + updated_datasets.rcgen, + updated_datasets.pool_id, + updated_datasets.ip, + updated_datasets.port, + updated_datasets.kind, + updated_datasets.size_used, + inserted_regions.id, + inserted_regions.time_created, + inserted_regions.time_modified, + inserted_regions.dataset_id, + inserted_regions.volume_id, + inserted_regions.block_size, + inserted_regions.blocks_per_extent, + inserted_regions.extent_count + FROM + inserted_regions + INNER JOIN updated_datasets ON inserted_regions.dataset_id = updated_datasets.id + ) diff --git a/nexus/db-queries/tests/output/region_allocate_random_sleds.sql b/nexus/db-queries/tests/output/region_allocate_random_sleds.sql new file mode 100644 index 0000000000..0918c8f2d1 --- /dev/null +++ b/nexus/db-queries/tests/output/region_allocate_random_sleds.sql @@ -0,0 +1,265 @@ +WITH + old_regions + AS ( + SELECT + region.id, + region.time_created, + region.time_modified, + region.dataset_id, + region.volume_id, + region.block_size, + region.blocks_per_extent, + region.extent_count + FROM + region + WHERE + region.volume_id = $1 + ), + old_zpool_usage + AS ( + SELECT + dataset.pool_id, sum(dataset.size_used) AS size_used + FROM + dataset + WHERE + (dataset.size_used IS NOT NULL) AND (dataset.time_deleted IS NULL) + GROUP BY + dataset.pool_id + ), + candidate_zpools + AS ( + SELECT + old_zpool_usage.pool_id + FROM + old_zpool_usage + INNER JOIN (zpool INNER JOIN sled ON zpool.sled_id = sled.id) ON + zpool.id = old_zpool_usage.pool_id + WHERE + (old_zpool_usage.size_used + $2) + <= ( + SELECT + total_size + FROM + omicron.public.inv_zpool + WHERE + inv_zpool.id = old_zpool_usage.pool_id + ORDER BY + inv_zpool.time_collected DESC + LIMIT + 1 + ) + AND sled.sled_policy = 'in_service' + AND sled.sled_state = 'active' + ), + candidate_datasets + AS ( + SELECT + DISTINCT ON (dataset.pool_id) dataset.id, dataset.pool_id + FROM + dataset INNER JOIN candidate_zpools ON dataset.pool_id = candidate_zpools.pool_id + WHERE + ((dataset.time_deleted IS NULL) AND (dataset.size_used IS NOT NULL)) + AND dataset.kind = 'crucible' + ORDER BY + dataset.pool_id, md5(CAST(dataset.id AS BYTES) || $3) + ), + shuffled_candidate_datasets + AS ( + SELECT + candidate_datasets.id, candidate_datasets.pool_id + FROM + candidate_datasets + ORDER BY + md5(CAST(candidate_datasets.id AS BYTES) || $4) + LIMIT + $5 + ), + candidate_regions + AS ( + SELECT + gen_random_uuid() AS id, + now() AS time_created, + now() AS time_modified, + shuffled_candidate_datasets.id AS dataset_id, + $6 AS volume_id, + $7 AS block_size, + $8 AS blocks_per_extent, + $9 AS extent_count + FROM + shuffled_candidate_datasets + ), + proposed_dataset_changes + AS ( + SELECT + candidate_regions.dataset_id AS id, + dataset.pool_id AS pool_id, + candidate_regions.block_size + * candidate_regions.blocks_per_extent + * candidate_regions.extent_count + AS size_used_delta + FROM + candidate_regions INNER JOIN dataset ON dataset.id = candidate_regions.dataset_id + ), + do_insert + AS ( + SELECT + ( + ( + (SELECT count(*) FROM old_regions LIMIT 1) < $10 + AND CAST( + IF( + ((SELECT count(*) FROM candidate_zpools LIMIT 1) >= $11), + 'TRUE', + 'Not enough space' + ) + AS BOOL + ) + ) + AND CAST( + IF( + ((SELECT count(*) FROM candidate_regions LIMIT 1) >= $12), + 'TRUE', + 'Not enough datasets' + ) + AS BOOL + ) + ) + AND CAST( + IF( + ( + ( + SELECT + count(DISTINCT dataset.pool_id) + FROM + candidate_regions + INNER JOIN dataset ON candidate_regions.dataset_id = dataset.id + LIMIT + 1 + ) + >= $13 + ), + 'TRUE', + 'Not enough unique zpools selected' + ) + AS BOOL + ) + AS insert + ), + inserted_regions + AS ( + INSERT + INTO + region + ( + id, + time_created, + time_modified, + dataset_id, + volume_id, + block_size, + blocks_per_extent, + extent_count + ) + SELECT + candidate_regions.id, + candidate_regions.time_created, + candidate_regions.time_modified, + candidate_regions.dataset_id, + candidate_regions.volume_id, + candidate_regions.block_size, + candidate_regions.blocks_per_extent, + candidate_regions.extent_count + FROM + candidate_regions + WHERE + (SELECT do_insert.insert FROM do_insert LIMIT 1) + RETURNING + region.id, + region.time_created, + region.time_modified, + region.dataset_id, + region.volume_id, + region.block_size, + region.blocks_per_extent, + region.extent_count + ), + updated_datasets + AS ( + UPDATE + dataset + SET + size_used + = dataset.size_used + + ( + SELECT + proposed_dataset_changes.size_used_delta + FROM + proposed_dataset_changes + WHERE + proposed_dataset_changes.id = dataset.id + LIMIT + 1 + ) + WHERE + dataset.id = ANY (SELECT proposed_dataset_changes.id FROM proposed_dataset_changes) + AND (SELECT do_insert.insert FROM do_insert LIMIT 1) + RETURNING + dataset.id, + dataset.time_created, + dataset.time_modified, + dataset.time_deleted, + dataset.rcgen, + dataset.pool_id, + dataset.ip, + dataset.port, + dataset.kind, + dataset.size_used + ) +( + SELECT + dataset.id, + dataset.time_created, + dataset.time_modified, + dataset.time_deleted, + dataset.rcgen, + dataset.pool_id, + dataset.ip, + dataset.port, + dataset.kind, + dataset.size_used, + old_regions.id, + old_regions.time_created, + old_regions.time_modified, + old_regions.dataset_id, + old_regions.volume_id, + old_regions.block_size, + old_regions.blocks_per_extent, + old_regions.extent_count + FROM + old_regions INNER JOIN dataset ON old_regions.dataset_id = dataset.id +) +UNION + ( + SELECT + updated_datasets.id, + updated_datasets.time_created, + updated_datasets.time_modified, + updated_datasets.time_deleted, + updated_datasets.rcgen, + updated_datasets.pool_id, + updated_datasets.ip, + updated_datasets.port, + updated_datasets.kind, + updated_datasets.size_used, + inserted_regions.id, + inserted_regions.time_created, + inserted_regions.time_modified, + inserted_regions.dataset_id, + inserted_regions.volume_id, + inserted_regions.block_size, + inserted_regions.blocks_per_extent, + inserted_regions.extent_count + FROM + inserted_regions + INNER JOIN updated_datasets ON inserted_regions.dataset_id = updated_datasets.id + ) diff --git a/nexus/src/app/sagas/snapshot_create.rs b/nexus/src/app/sagas/snapshot_create.rs index f1d1a2bd02..290868aae2 100644 --- a/nexus/src/app/sagas/snapshot_create.rs +++ b/nexus/src/app/sagas/snapshot_create.rs @@ -1175,7 +1175,7 @@ async fn ssc_start_running_snapshot( ); let snapshot_id = sagactx.lookup::("snapshot_id")?; - info!(log, "starting running snapshot for {snapshot_id}"); + info!(log, "starting running snapshot"; "snapshot_id" => %snapshot_id); let (.., disk) = LookupPath::new(&opctx, &osagactx.datastore()) .disk_id(params.disk_id) @@ -1198,7 +1198,13 @@ async fn ssc_start_running_snapshot( let url = format!("http://{}", dataset.address()); let client = CrucibleAgentClient::new(&url); - info!(log, "dataset {:?} region {:?} url {}", dataset, region, url); + info!( + log, + "contacting crucible agent to confirm region exists"; + "dataset" => ?dataset, + "region" => ?region, + "url" => url, + ); // Validate with the Crucible agent that the snapshot exists let crucible_region = retry_until_known_result(log, || async { @@ -1208,7 +1214,11 @@ async fn ssc_start_running_snapshot( .map_err(|e| e.to_string()) .map_err(ActionError::action_failed)?; - info!(log, "crucible region {:?}", crucible_region); + info!( + log, + "confirmed the region exists with crucible agent"; + "crucible region" => ?crucible_region + ); let crucible_snapshot = retry_until_known_result(log, || async { client @@ -1222,7 +1232,11 @@ async fn ssc_start_running_snapshot( .map_err(|e| e.to_string()) .map_err(ActionError::action_failed)?; - info!(log, "crucible snapshot {:?}", crucible_snapshot); + info!( + log, + "successfully accessed crucible snapshot"; + "crucible snapshot" => ?crucible_snapshot + ); // Start the snapshot running let crucible_running_snapshot = @@ -1238,7 +1252,11 @@ async fn ssc_start_running_snapshot( .map_err(|e| e.to_string()) .map_err(ActionError::action_failed)?; - info!(log, "crucible running snapshot {:?}", crucible_running_snapshot); + info!( + log, + "successfully started running region snapshot"; + "crucible running snapshot" => ?crucible_running_snapshot + ); // Map from the region to the snapshot let region_addr = format!( diff --git a/test-utils/src/dev/db.rs b/test-utils/src/dev/db.rs index c148a60e1c..d8b15520a4 100644 --- a/test-utils/src/dev/db.rs +++ b/test-utils/src/dev/db.rs @@ -21,6 +21,7 @@ use std::time::Duration; use tempfile::tempdir; use tempfile::TempDir; use thiserror::Error; +use tokio::io::AsyncWriteExt; use tokio_postgres::config::Host; use tokio_postgres::config::SslMode; @@ -497,6 +498,15 @@ pub enum CockroachStartError { )] TimedOut { pid: u32, time_waited: Duration }, + #[error("failed to write input to cockroachdb")] + FailedToWrite(#[source] std::io::Error), + + #[error("failed to await cockroachdb completing")] + FailedToWait(#[source] std::io::Error), + + #[error("Invalid cockroachdb output")] + InvalidOutput(#[from] std::string::FromUtf8Error), + #[error("unknown error waiting for cockroach to start")] Unknown { #[source] @@ -653,6 +663,45 @@ impl Drop for CockroachInstance { } } +/// Uses cockroachdb to run the "sqlfmt" command. +pub async fn format_sql(input: &str) -> Result { + let mut cmd = tokio::process::Command::new(COCKROACHDB_BIN); + let mut child = cmd + .stdin(Stdio::piped()) + .stdout(Stdio::piped()) + .args(&[ + "sqlfmt", + "--tab-width", + "2", + "--use-spaces", + "true", + "--print-width", + "100", + ]) + .spawn() + .map_err(|source| CockroachStartError::BadCmd { + cmd: COCKROACHDB_BIN.to_string(), + source, + })?; + let stdin = child.stdin.as_mut().unwrap(); + stdin + .write_all(input.as_bytes()) + .await + .map_err(CockroachStartError::FailedToWrite)?; + let output = child + .wait_with_output() + .await + .map_err(CockroachStartError::FailedToWait)?; + + if !output.status.success() { + return Err(CockroachStartError::Exited { + exit_code: output.status.code().unwrap_or_else(|| -1), + }); + } + + Ok(String::from_utf8(output.stdout)?) +} + /// Verify that CockroachDB has the correct version pub async fn check_db_version() -> Result<(), CockroachStartError> { let mut cmd = tokio::process::Command::new(COCKROACHDB_BIN); From 951d74f88cddbd454ea84db173dc2516fd073274 Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Wed, 27 Mar 2024 14:37:34 -0400 Subject: [PATCH 007/334] Add a (disabled) RPW to garbage collect stale Oximeter producers (#5311) This is the RPW portion of #5284. It's currently statically disabled (via the `if true {}` block at the top of `MetricProducerGc::activate()`); we should remove that block once [all Oximeter producers start refreshing leases](https://github.com/oxidecomputer/omicron/issues/5284#issuecomment-2008291919). This picks a 10 minute lease expiration time - a complete WAG. --- Cargo.lock | 28 ++ Cargo.toml | 3 + dev-tools/omdb/tests/env.out | 12 + dev-tools/omdb/tests/successes.out | 11 + nexus-config/src/nexus_config.rs | 16 + nexus/Cargo.toml | 1 + nexus/db-model/src/producer_endpoint.rs | 2 +- nexus/db-model/src/schema_versions.rs | 3 +- nexus/db-queries/Cargo.toml | 7 + nexus/db-queries/src/db/datastore/mod.rs | 2 + nexus/db-queries/src/db/datastore/oximeter.rs | 187 +++++++++ .../src/db/datastore/pub_test_utils.rs | 66 ++++ .../db-queries/src/db/datastore/test_utils.rs | 45 +-- nexus/examples/config.toml | 1 + nexus/metrics-producer-gc/Cargo.toml | 37 ++ nexus/metrics-producer-gc/build.rs | 10 + nexus/metrics-producer-gc/src/lib.rs | 371 ++++++++++++++++++ nexus/src/app/background/init.rs | 24 ++ .../src/app/background/metrics_producer_gc.rs | 258 ++++++++++++ nexus/src/app/background/mod.rs | 1 + nexus/src/app/oximeter.rs | 6 + nexus/tests/config.test.toml | 1 + .../up.sql | 3 + schema/crdb/dbinit.sql | 6 +- smf/nexus/multi-sled/config-partial.toml | 1 + smf/nexus/single-sled/config-partial.toml | 1 + 26 files changed, 1057 insertions(+), 46 deletions(-) create mode 100644 nexus/db-queries/src/db/datastore/pub_test_utils.rs create mode 100644 nexus/metrics-producer-gc/Cargo.toml create mode 100644 nexus/metrics-producer-gc/build.rs create mode 100644 nexus/metrics-producer-gc/src/lib.rs create mode 100644 nexus/src/app/background/metrics_producer_gc.rs create mode 100644 schema/crdb/add-metrics-producers-time-modified-index/up.sql diff --git a/Cargo.lock b/Cargo.lock index 0421ec6653..c8511806b7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4729,6 +4729,33 @@ dependencies = [ "syn 2.0.52", ] +[[package]] +name = "nexus-metrics-producer-gc" +version = "0.1.0" +dependencies = [ + "async-bb8-diesel", + "chrono", + "diesel", + "futures", + "httptest", + "ipnetwork", + "nexus-db-model", + "nexus-db-queries", + "nexus-test-utils", + "nexus-types", + "omicron-common", + "omicron-rpaths", + "omicron-test-utils", + "omicron-workspace-hack", + "oximeter-client", + "pq-sys", + "slog", + "slog-error-chain", + "thiserror", + "tokio", + "uuid 1.7.0", +] + [[package]] name = "nexus-networking" version = "0.1.0" @@ -5391,6 +5418,7 @@ dependencies = [ "nexus-db-queries", "nexus-defaults", "nexus-inventory", + "nexus-metrics-producer-gc", "nexus-networking", "nexus-reconfigurator-execution", "nexus-reconfigurator-planning", diff --git a/Cargo.toml b/Cargo.toml index 6546100e3f..9cfb3ed283 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -44,6 +44,7 @@ members = [ "nexus/defaults", "nexus/inventory", "nexus/macros-common", + "nexus/metrics-producer-gc", "nexus/networking", "nexus/reconfigurator/execution", "nexus/reconfigurator/planning", @@ -120,6 +121,7 @@ default-members = [ "nexus-config", "nexus/authz-macros", "nexus/macros-common", + "nexus/metrics-producer-gc", "nexus/networking", "nexus/db-macros", "nexus/db-model", @@ -277,6 +279,7 @@ nexus-db-queries = { path = "nexus/db-queries" } nexus-defaults = { path = "nexus/defaults" } nexus-inventory = { path = "nexus/inventory" } nexus-macros-common = { path = "nexus/macros-common" } +nexus-metrics-producer-gc = { path = "nexus/metrics-producer-gc" } nexus-networking = { path = "nexus/networking" } nexus-reconfigurator-execution = { path = "nexus/reconfigurator/execution" } nexus-reconfigurator-planning = { path = "nexus/reconfigurator/planning" } diff --git a/dev-tools/omdb/tests/env.out b/dev-tools/omdb/tests/env.out index 0f0aff1789..512c05fc86 100644 --- a/dev-tools/omdb/tests/env.out +++ b/dev-tools/omdb/tests/env.out @@ -75,6 +75,10 @@ task: "inventory_collection" collects hardware and software inventory data from the whole system +task: "metrics_producer_gc" + unregisters Oximeter metrics producers that have not renewed their lease + + task: "nat_v4_garbage_collector" prunes soft-deleted IPV4 NAT entries from ipv4_nat_entry table based on a predetermined retention policy @@ -169,6 +173,10 @@ task: "inventory_collection" collects hardware and software inventory data from the whole system +task: "metrics_producer_gc" + unregisters Oximeter metrics producers that have not renewed their lease + + task: "nat_v4_garbage_collector" prunes soft-deleted IPV4 NAT entries from ipv4_nat_entry table based on a predetermined retention policy @@ -250,6 +258,10 @@ task: "inventory_collection" collects hardware and software inventory data from the whole system +task: "metrics_producer_gc" + unregisters Oximeter metrics producers that have not renewed their lease + + task: "nat_v4_garbage_collector" prunes soft-deleted IPV4 NAT entries from ipv4_nat_entry table based on a predetermined retention policy diff --git a/dev-tools/omdb/tests/successes.out b/dev-tools/omdb/tests/successes.out index dcdd3b3e26..8876a293a5 100644 --- a/dev-tools/omdb/tests/successes.out +++ b/dev-tools/omdb/tests/successes.out @@ -282,6 +282,10 @@ task: "inventory_collection" collects hardware and software inventory data from the whole system +task: "metrics_producer_gc" + unregisters Oximeter metrics producers that have not renewed their lease + + task: "nat_v4_garbage_collector" prunes soft-deleted IPV4 NAT entries from ipv4_nat_entry table based on a predetermined retention policy @@ -421,6 +425,13 @@ task: "inventory_collection" last collection started: last collection done: +task: "metrics_producer_gc" + configured period: every 1m + currently executing: no + last completed activation: iter 2, triggered by an explicit signal + started at (s ago) and ran for ms + last completion reported error: metric producer gc disabled (omicron#5284) + task: "phantom_disks" configured period: every 30s currently executing: no diff --git a/nexus-config/src/nexus_config.rs b/nexus-config/src/nexus_config.rs index 8b94d0154b..93f7bcccdb 100644 --- a/nexus-config/src/nexus_config.rs +++ b/nexus-config/src/nexus_config.rs @@ -353,6 +353,8 @@ pub struct BackgroundTaskConfig { pub dns_internal: DnsTasksConfig, /// configuration for external DNS background tasks pub dns_external: DnsTasksConfig, + /// configuration for metrics producer garbage collection background task + pub metrics_producer_gc: MetricsProducerGcConfig, /// configuration for external endpoint list watcher pub external_endpoints: ExternalEndpointsConfig, /// configuration for nat table garbage collector @@ -395,6 +397,15 @@ pub struct DnsTasksConfig { pub max_concurrent_server_updates: usize, } +#[serde_as] +#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)] +pub struct MetricsProducerGcConfig { + /// period (in seconds) for periodic activations of the background task that + /// garbage collects metrics producers whose leases have expired + #[serde_as(as = "DurationSeconds")] + pub period_secs: Duration, +} + #[serde_as] #[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)] pub struct ExternalEndpointsConfig { @@ -714,6 +725,7 @@ mod test { dns_external.period_secs_servers = 6 dns_external.period_secs_propagation = 7 dns_external.max_concurrent_server_updates = 8 + metrics_producer_gc.period_secs = 60 external_endpoints.period_secs = 9 nat_cleanup.period_secs = 30 bfd_manager.period_secs = 30 @@ -816,6 +828,9 @@ mod test { period_secs_propagation: Duration::from_secs(7), max_concurrent_server_updates: 8, }, + metrics_producer_gc: MetricsProducerGcConfig { + period_secs: Duration::from_secs(60) + }, external_endpoints: ExternalEndpointsConfig { period_secs: Duration::from_secs(9), }, @@ -899,6 +914,7 @@ mod test { dns_external.period_secs_servers = 6 dns_external.period_secs_propagation = 7 dns_external.max_concurrent_server_updates = 8 + metrics_producer_gc.period_secs = 60 external_endpoints.period_secs = 9 nat_cleanup.period_secs = 30 bfd_manager.period_secs = 30 diff --git a/nexus/Cargo.toml b/nexus/Cargo.toml index 57d929d44d..1fc9f56b75 100644 --- a/nexus/Cargo.toml +++ b/nexus/Cargo.toml @@ -84,6 +84,7 @@ nexus-defaults.workspace = true nexus-db-model.workspace = true nexus-db-queries.workspace = true nexus-inventory.workspace = true +nexus-metrics-producer-gc.workspace = true nexus-reconfigurator-execution.workspace = true nexus-reconfigurator-planning.workspace = true nexus-reconfigurator-preparation.workspace = true diff --git a/nexus/db-model/src/producer_endpoint.rs b/nexus/db-model/src/producer_endpoint.rs index 1a38781ce5..aea087360b 100644 --- a/nexus/db-model/src/producer_endpoint.rs +++ b/nexus/db-model/src/producer_endpoint.rs @@ -61,7 +61,7 @@ impl From for internal::nexus::ProducerEndpoint { /// Information announced by a metric server, used so that clients can contact it and collect /// available metric data from it. -#[derive(Queryable, Insertable, Debug, Clone, Selectable, Asset)] +#[derive(Queryable, Insertable, Debug, Clone, Selectable, Asset, PartialEq)] #[diesel(table_name = metric_producer)] pub struct ProducerEndpoint { #[diesel(embed)] diff --git a/nexus/db-model/src/schema_versions.rs b/nexus/db-model/src/schema_versions.rs index e35cc3c38a..487d557c06 100644 --- a/nexus/db-model/src/schema_versions.rs +++ b/nexus/db-model/src/schema_versions.rs @@ -17,7 +17,7 @@ use std::collections::BTreeMap; /// /// This must be updated when you change the database schema. Refer to /// schema/crdb/README.adoc in the root of this repository for details. -pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(47, 0, 0); +pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(48, 0, 0); /// List of all past database schema versions, in *reverse* order /// @@ -29,6 +29,7 @@ static KNOWN_VERSIONS: Lazy> = Lazy::new(|| { // | leaving the first copy as an example for the next person. // v // KnownVersion::new(next_int, "unique-dirname-with-the-sql-files"), + KnownVersion::new(48, "add-metrics-producers-time-modified-index"), KnownVersion::new(47, "add-view-for-bgp-peer-configs"), KnownVersion::new(46, "first-named-migration"), // The first many schema versions only vary by major or patch number and diff --git a/nexus/db-queries/Cargo.toml b/nexus/db-queries/Cargo.toml index 5f99b904fc..354a2b0ac0 100644 --- a/nexus/db-queries/Cargo.toml +++ b/nexus/db-queries/Cargo.toml @@ -65,6 +65,13 @@ omicron-uuid-kinds.workspace = true oximeter.workspace = true omicron-workspace-hack.workspace = true +# only enabled during tests or via the `testing` feature +omicron-test-utils = { workspace = true, optional = true } + +[features] +# Enable to export `datastore_test` +testing = ["omicron-test-utils"] + [dev-dependencies] assert_matches.workspace = true camino-tempfile.workspace = true diff --git a/nexus/db-queries/src/db/datastore/mod.rs b/nexus/db-queries/src/db/datastore/mod.rs index 0020cf99b3..6db65f7173 100644 --- a/nexus/db-queries/src/db/datastore/mod.rs +++ b/nexus/db-queries/src/db/datastore/mod.rs @@ -71,6 +71,8 @@ mod oximeter; mod physical_disk; mod probe; mod project; +#[cfg(any(test, feature = "testing"))] +pub mod pub_test_utils; mod quota; mod rack; mod region; diff --git a/nexus/db-queries/src/db/datastore/oximeter.rs b/nexus/db-queries/src/db/datastore/oximeter.rs index 55e8e0f5f6..9ac16eafac 100644 --- a/nexus/db-queries/src/db/datastore/oximeter.rs +++ b/nexus/db-queries/src/db/datastore/oximeter.rs @@ -5,14 +5,18 @@ //! [`DataStore`] methods related to Oximeter. use super::DataStore; +use super::SQL_BATCH_SIZE; use crate::context::OpContext; use crate::db; use crate::db::error::public_error_from_diesel; use crate::db::error::ErrorHandler; +use crate::db::identity::Asset; use crate::db::model::OximeterInfo; use crate::db::model::ProducerEndpoint; use crate::db::pagination::paginated; +use crate::db::pagination::Paginator; use async_bb8_diesel::AsyncRunQueryDsl; +use chrono::DateTime; use chrono::Utc; use diesel::prelude::*; use omicron_common::api::external::DataPageParams; @@ -166,4 +170,187 @@ impl DataStore { ) }) } + + /// Fetches a page of the list of producer endpoint records with a + /// `time_modified` date older than `expiration` + pub async fn producers_list_expired( + &self, + opctx: &OpContext, + expiration: DateTime, + pagparams: &DataPageParams<'_, Uuid>, + ) -> ListResultVec { + use db::schema::metric_producer::dsl; + + paginated(dsl::metric_producer, dsl::id, pagparams) + .filter(dsl::time_modified.lt(expiration)) + .order_by((dsl::oximeter_id, dsl::id)) + .select(ProducerEndpoint::as_select()) + .load_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + } + + /// List all producer endpoint records with a `time_modified` date older + /// than `expiration`, making as many queries as needed to get them all + /// + /// This should generally not be used in API handlers or other + /// latency-sensitive contexts, but it can make sense in saga actions or + /// background tasks. + pub async fn producers_list_expired_batched( + &self, + opctx: &OpContext, + expiration: DateTime, + ) -> ListResultVec { + opctx.check_complex_operations_allowed()?; + + let mut producers = Vec::new(); + let mut paginator = Paginator::new(SQL_BATCH_SIZE); + while let Some(p) = paginator.next() { + let batch = self + .producers_list_expired( + opctx, + expiration, + &p.current_pagparams(), + ) + .await?; + paginator = p.found_batch(&batch, &|p: &ProducerEndpoint| p.id()); + producers.extend(batch); + } + + Ok(producers) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use db::datastore::pub_test_utils::datastore_test; + use nexus_test_utils::db::test_setup_database; + use nexus_types::internal_api::params; + use omicron_common::api::internal::nexus; + use omicron_test_utils::dev; + use std::time::Duration; + + async fn read_time_modified( + datastore: &DataStore, + producer_id: Uuid, + ) -> DateTime { + use db::schema::metric_producer::dsl; + + let conn = datastore.pool_connection_for_tests().await.unwrap(); + match dsl::metric_producer + .filter(dsl::id.eq(producer_id)) + .select(dsl::time_modified) + .first_async(&*conn) + .await + { + Ok(time_modified) => time_modified, + Err(err) => panic!( + "failed to read time_modified for producer {producer_id}: \ + {err}" + ), + } + } + + async fn read_expired_producers( + opctx: &OpContext, + datastore: &DataStore, + expiration: DateTime, + ) -> Vec { + let expired_one_page = datastore + .producers_list_expired( + opctx, + expiration, + &DataPageParams::max_page(), + ) + .await + .expect("failed to read max_page of expired producers"); + let expired_batched = datastore + .producers_list_expired_batched(opctx, expiration) + .await + .expect("failed to read batched expired producers"); + assert_eq!(expired_one_page, expired_batched); + expired_batched + } + + #[tokio::test] + async fn test_producers_list_expired() { + // Setup + let logctx = dev::test_setup_log("test_producers_list_expired"); + let mut db = test_setup_database(&logctx.log).await; + let (opctx, datastore) = + datastore_test(&logctx, &db, Uuid::new_v4()).await; + + // Insert an Oximeter collector + let collector_info = OximeterInfo::new(¶ms::OximeterInfo { + collector_id: Uuid::new_v4(), + address: "[::1]:0".parse().unwrap(), // unused + }); + datastore + .oximeter_create(&opctx, &collector_info) + .await + .expect("failed to insert collector"); + + // Insert a producer + let producer = ProducerEndpoint::new( + &nexus::ProducerEndpoint { + id: Uuid::new_v4(), + kind: nexus::ProducerKind::Service, + address: "[::1]:0".parse().unwrap(), // unused + base_route: "/".to_string(), // unused + interval: Duration::from_secs(0), // unused + }, + collector_info.id, + ); + datastore + .producer_endpoint_create(&opctx, &producer) + .await + .expect("failed to insert producer"); + + // Our producer should show up when we list by its collector + let mut all_producers = datastore + .producers_list_by_oximeter_id( + &opctx, + collector_info.id, + &DataPageParams::max_page(), + ) + .await + .expect("failed to list all producers"); + assert_eq!(all_producers.len(), 1); + assert_eq!(all_producers[0].id(), producer.id()); + + // Steal this producer so we have a database-precision timestamp and can + // use full equality checks moving forward. + let producer = all_producers.pop().unwrap(); + + let producer_time_modified = + read_time_modified(&datastore, producer.id()).await; + + // Whether it's expired depends on the expiration date we specify; it + // should show up if the expiration time is newer than the producer's + // time_modified... + let expired_producers = read_expired_producers( + &opctx, + &datastore, + producer_time_modified + Duration::from_secs(1), + ) + .await; + assert_eq!( + expired_producers.as_slice(), + std::slice::from_ref(&producer) + ); + + // ... but not if the the producer has been modified since the + // expiration. + let expired_producers = read_expired_producers( + &opctx, + &datastore, + producer_time_modified - Duration::from_secs(1), + ) + .await; + assert_eq!(expired_producers.as_slice(), &[]); + + db.cleanup().await.unwrap(); + logctx.cleanup_successful(); + } } diff --git a/nexus/db-queries/src/db/datastore/pub_test_utils.rs b/nexus/db-queries/src/db/datastore/pub_test_utils.rs new file mode 100644 index 0000000000..5259a03656 --- /dev/null +++ b/nexus/db-queries/src/db/datastore/pub_test_utils.rs @@ -0,0 +1,66 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Test support code that can be enabled by dependencies via this crate's +//! `testing` feature. +//! +//! This feature should only be enabled under `dev-dependencies` to avoid this +//! test support code leaking into release binaries. + +use crate::authz; +use crate::context::OpContext; +use crate::db; +use crate::db::DataStore; +use dropshot::test_util::LogContext; +use omicron_test_utils::dev::db::CockroachInstance; +use std::sync::Arc; +use uuid::Uuid; + +/// Constructs a DataStore for use in test suites that has preloaded the +/// built-in users, roles, and role assignments that are needed for basic +/// operation +#[cfg(any(test, feature = "testing"))] +pub async fn datastore_test( + logctx: &LogContext, + db: &CockroachInstance, + rack_id: Uuid, +) -> (OpContext, Arc) { + use crate::authn; + + let cfg = db::Config { url: db.pg_config().clone() }; + let pool = Arc::new(db::Pool::new(&logctx.log, &cfg)); + let datastore = + Arc::new(DataStore::new(&logctx.log, pool, None).await.unwrap()); + + // Create an OpContext with the credentials of "db-init" just for the + // purpose of loading the built-in users, roles, and assignments. + let opctx = OpContext::for_background( + logctx.log.new(o!()), + Arc::new(authz::Authz::new(&logctx.log)), + authn::Context::internal_db_init(), + Arc::clone(&datastore), + ); + + // TODO: Can we just call "Populate" instead of doing this? + datastore.load_builtin_users(&opctx).await.unwrap(); + datastore.load_builtin_roles(&opctx).await.unwrap(); + datastore.load_builtin_role_asgns(&opctx).await.unwrap(); + datastore.load_builtin_silos(&opctx).await.unwrap(); + datastore.load_builtin_projects(&opctx).await.unwrap(); + datastore.load_builtin_vpcs(&opctx).await.unwrap(); + datastore.load_silo_users(&opctx).await.unwrap(); + datastore.load_silo_user_role_assignments(&opctx).await.unwrap(); + datastore + .load_builtin_fleet_virtual_provisioning_collection(&opctx) + .await + .unwrap(); + datastore.load_builtin_rack_data(&opctx, rack_id).await.unwrap(); + + // Create an OpContext with the credentials of "test-privileged" for general + // testing. + let opctx = + OpContext::for_tests(logctx.log.new(o!()), Arc::clone(&datastore)); + + (opctx, datastore) +} diff --git a/nexus/db-queries/src/db/datastore/test_utils.rs b/nexus/db-queries/src/db/datastore/test_utils.rs index 6d26ad044b..a678bccd49 100644 --- a/nexus/db-queries/src/db/datastore/test_utils.rs +++ b/nexus/db-queries/src/db/datastore/test_utils.rs @@ -6,7 +6,6 @@ use crate::authz; use crate::context::OpContext; -use crate::db; use crate::db::datastore::ValidateTransition; use crate::db::lookup::LookupPath; use crate::db::DataStore; @@ -23,52 +22,12 @@ use std::sync::Arc; use strum::EnumCount; use uuid::Uuid; -/// Constructs a DataStore for use in test suites that has preloaded the -/// built-in users, roles, and role assignments that are needed for basic -/// operation -#[cfg(test)] -pub async fn datastore_test( +pub(crate) async fn datastore_test( logctx: &LogContext, db: &CockroachInstance, ) -> (OpContext, Arc) { - use crate::authn; - - let cfg = db::Config { url: db.pg_config().clone() }; - let pool = Arc::new(db::Pool::new(&logctx.log, &cfg)); - let datastore = - Arc::new(DataStore::new(&logctx.log, pool, None).await.unwrap()); - - // Create an OpContext with the credentials of "db-init" just for the - // purpose of loading the built-in users, roles, and assignments. - let opctx = OpContext::for_background( - logctx.log.new(o!()), - Arc::new(authz::Authz::new(&logctx.log)), - authn::Context::internal_db_init(), - Arc::clone(&datastore), - ); - - // TODO: Can we just call "Populate" instead of doing this? let rack_id = Uuid::parse_str(nexus_test_utils::RACK_UUID).unwrap(); - datastore.load_builtin_users(&opctx).await.unwrap(); - datastore.load_builtin_roles(&opctx).await.unwrap(); - datastore.load_builtin_role_asgns(&opctx).await.unwrap(); - datastore.load_builtin_silos(&opctx).await.unwrap(); - datastore.load_builtin_projects(&opctx).await.unwrap(); - datastore.load_builtin_vpcs(&opctx).await.unwrap(); - datastore.load_silo_users(&opctx).await.unwrap(); - datastore.load_silo_user_role_assignments(&opctx).await.unwrap(); - datastore - .load_builtin_fleet_virtual_provisioning_collection(&opctx) - .await - .unwrap(); - datastore.load_builtin_rack_data(&opctx, rack_id).await.unwrap(); - - // Create an OpContext with the credentials of "test-privileged" for general - // testing. - let opctx = - OpContext::for_tests(logctx.log.new(o!()), Arc::clone(&datastore)); - - (opctx, datastore) + super::pub_test_utils::datastore_test(logctx, db, rack_id).await } /// Denotes a specific way in which a sled is ineligible. diff --git a/nexus/examples/config.toml b/nexus/examples/config.toml index 2e946a9c38..6ba7fd2089 100644 --- a/nexus/examples/config.toml +++ b/nexus/examples/config.toml @@ -91,6 +91,7 @@ dns_external.period_secs_config = 60 dns_external.period_secs_servers = 60 dns_external.period_secs_propagation = 60 dns_external.max_concurrent_server_updates = 5 +metrics_producer_gc.period_secs = 60 # How frequently we check the list of stored TLS certificates. This is # approximately an upper bound on how soon after updating the list of # certificates it will take _other_ Nexus instances to notice and stop serving diff --git a/nexus/metrics-producer-gc/Cargo.toml b/nexus/metrics-producer-gc/Cargo.toml new file mode 100644 index 0000000000..9daa52b543 --- /dev/null +++ b/nexus/metrics-producer-gc/Cargo.toml @@ -0,0 +1,37 @@ +[package] +name = "nexus-metrics-producer-gc" +version = "0.1.0" +edition = "2021" + +[build-dependencies] +omicron-rpaths.workspace = true + +[dependencies] +chrono.workspace = true +futures.workspace = true +nexus-db-queries.workspace = true +omicron-common.workspace = true +oximeter-client.workspace = true +slog.workspace = true +slog-error-chain.workspace = true +thiserror.workspace = true +uuid.workspace = true + +# See omicron-rpaths for more about the "pq-sys" dependency. This is needed +# because we use the database in the test suite, though it doesn't appear to +# work to put the pq-sys dependency only in dev-dependencies. +pq-sys = "*" + +omicron-workspace-hack.workspace = true + +[dev-dependencies] +async-bb8-diesel.workspace = true +diesel.workspace = true +httptest.workspace = true +ipnetwork.workspace = true +nexus-db-model.workspace = true +nexus-db-queries = { workspace = true, features = ["testing"] } +nexus-test-utils.workspace = true +nexus-types.workspace = true +omicron-test-utils.workspace = true +tokio.workspace = true diff --git a/nexus/metrics-producer-gc/build.rs b/nexus/metrics-producer-gc/build.rs new file mode 100644 index 0000000000..1ba9acd41c --- /dev/null +++ b/nexus/metrics-producer-gc/build.rs @@ -0,0 +1,10 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +// See omicron-rpaths for documentation. +// NOTE: This file MUST be kept in sync with the other build.rs files in this +// repository. +fn main() { + omicron_rpaths::configure_default_omicron_rpaths(); +} diff --git a/nexus/metrics-producer-gc/src/lib.rs b/nexus/metrics-producer-gc/src/lib.rs new file mode 100644 index 0000000000..ba2cd0460b --- /dev/null +++ b/nexus/metrics-producer-gc/src/lib.rs @@ -0,0 +1,371 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Garbage collection of expired metrics producers +//! +//! A metrics producer is expected to reregister itself periodically. This crate +//! provides a mechanism to clean up any producers that have stopped +//! reregistering, both removing their registration records from the database +//! and notifying their assigned collector. It is expected to be invoked from a +//! Nexus background task. + +use chrono::DateTime; +use chrono::Utc; +use futures::stream::FuturesUnordered; +use futures::StreamExt; +use nexus_db_queries::context::OpContext; +use nexus_db_queries::db::identity::Asset; +use nexus_db_queries::db::model::ProducerEndpoint; +use nexus_db_queries::db::DataStore; +use omicron_common::api::external::Error as DbError; +use oximeter_client::Client as OximeterClient; +use slog::info; +use slog::o; +use slog::warn; +use slog::Logger; +use slog_error_chain::InlineErrorChain; +use std::collections::btree_map::Entry; +use std::collections::BTreeMap; +use std::collections::BTreeSet; +use std::net::SocketAddr; +use uuid::Uuid; + +#[derive(Debug, Clone)] +pub struct PrunedProducers { + pub successes: BTreeSet, + pub failures: BTreeMap, +} + +#[derive(Debug, thiserror::Error, slog_error_chain::SlogInlineError)] +pub enum Error { + #[error("failed to list expired producers")] + ListExpiredProducers(#[source] DbError), + #[error("failed to get Oximeter info for {id}")] + GetOximterInfo { + id: Uuid, + #[source] + err: DbError, + }, +} + +/// Make one garbage collection pass over the metrics producers. +pub async fn prune_expired_producers( + opctx: &OpContext, + datastore: &DataStore, + expiration: DateTime, +) -> Result { + // Get the list of expired producers we need to prune. + let expired_producers = + ExpiredProducers::new(opctx, datastore, expiration).await?; + + // Build a FuturesUnordered to prune each expired producer. + let mut all_prunes = expired_producers + .producer_client_pairs() + .map(|(producer, client)| async { + let result = unregister_producer( + opctx, datastore, producer, client, &opctx.log, + ) + .await; + (producer.id(), result) + }) + .collect::>(); + + // Collect all the results. + let mut successes = BTreeSet::new(); + let mut failures = BTreeMap::new(); + while let Some((id, result)) = all_prunes.next().await { + match result { + Ok(()) => { + successes.insert(id); + } + Err(err) => { + failures.insert(id, err); + } + } + } + Ok(PrunedProducers { successes, failures }) +} + +async fn unregister_producer( + opctx: &OpContext, + datastore: &DataStore, + producer: &ProducerEndpoint, + client: &OximeterClient, + log: &Logger, +) -> Result<(), DbError> { + // Attempt to notify this producer's collector that the producer's lease has + // expired. This is an optimistic notification: if it fails, we will still + // prune the producer from the database, so that the next time this + // collector asks Nexus for its list of producers, this expired producer is + // gone. + match client.producer_delete(&producer.id()).await { + Ok(_) => { + info!( + log, "successfully notified Oximeter of expired producer"; + "collector-id" => %producer.oximeter_id, + "producer-id" => %producer.id(), + ); + } + Err(err) => { + warn!( + log, "failed to notify Oximeter of expired producer"; + "collector-id" => %producer.oximeter_id, + "producer-id" => %producer.id(), + InlineErrorChain::new(&err), + ); + } + } + + datastore.producer_endpoint_delete(opctx, &producer.id()).await.map(|_| ()) +} + +// Internal combination of all expired producers and a set of OximeterClients +// for each producer. +struct ExpiredProducers { + producers: Vec, + clients: BTreeMap, +} + +impl ExpiredProducers { + async fn new( + opctx: &OpContext, + datastore: &DataStore, + expiration: DateTime, + ) -> Result { + let producers = datastore + .producers_list_expired_batched(opctx, expiration) + .await + .map_err(Error::ListExpiredProducers)?; + + let mut clients = BTreeMap::new(); + for producer in &producers { + let entry = match clients.entry(producer.oximeter_id) { + Entry::Vacant(entry) => entry, + Entry::Occupied(_) => continue, + }; + let info = datastore + .oximeter_lookup(opctx, &producer.oximeter_id) + .await + .map_err(|err| Error::GetOximterInfo { + id: producer.oximeter_id, + err, + })?; + let client_log = + opctx.log.new(o!("oximeter-collector" => info.id.to_string())); + let address = SocketAddr::new(info.ip.ip(), *info.port); + let client = + OximeterClient::new(&format!("http://{address}"), client_log); + entry.insert(client); + } + + Ok(Self { producers, clients }) + } + + fn producer_client_pairs( + &self, + ) -> impl Iterator { + self.producers.iter().map(|producer| { + // In `new()` we add a client for every producer.oximeter_id, so we + // can unwrap this lookup. + let client = self.clients.get(&producer.oximeter_id).unwrap(); + (producer, client) + }) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use async_bb8_diesel::AsyncRunQueryDsl; + use diesel::ExpressionMethods; + use diesel::QueryDsl; + use httptest::matchers::request; + use httptest::responders::status_code; + use httptest::Expectation; + use nexus_db_model::OximeterInfo; + use nexus_db_queries::db::datastore::pub_test_utils::datastore_test; + use nexus_test_utils::db::test_setup_database; + use nexus_types::internal_api::params; + use omicron_common::api::internal::nexus; + use omicron_test_utils::dev; + use std::time::Duration; + + async fn read_time_modified( + datastore: &DataStore, + producer_id: Uuid, + ) -> DateTime { + use nexus_db_queries::db::schema::metric_producer::dsl; + + let conn = datastore.pool_connection_for_tests().await.unwrap(); + match dsl::metric_producer + .filter(dsl::id.eq(producer_id)) + .select(dsl::time_modified) + .first_async(&*conn) + .await + { + Ok(time_modified) => time_modified, + Err(err) => panic!( + "failed to read time_modified for producer {producer_id}: \ + {err}" + ), + } + } + + #[tokio::test] + async fn test_prune_expired_producers() { + // Setup + let logctx = dev::test_setup_log("test_prune_expired_producers"); + let mut db = test_setup_database(&logctx.log).await; + let (opctx, datastore) = + datastore_test(&logctx, &db, Uuid::new_v4()).await; + + // Insert an Oximeter collector + let collector_info = OximeterInfo::new(¶ms::OximeterInfo { + collector_id: Uuid::new_v4(), + address: "[::1]:0".parse().unwrap(), + }); + datastore + .oximeter_create(&opctx, &collector_info) + .await + .expect("failed to insert collector"); + + // GC'ing expired producers should succeed if there are no producers at + // all. + let pruned = prune_expired_producers(&opctx, &datastore, Utc::now()) + .await + .expect("failed to prune expired producers"); + assert!(pruned.successes.is_empty()); + assert!(pruned.failures.is_empty()); + + // Insert a producer. + let producer = ProducerEndpoint::new( + &nexus::ProducerEndpoint { + id: Uuid::new_v4(), + kind: nexus::ProducerKind::Service, + address: "[::1]:0".parse().unwrap(), // unused + base_route: "/".to_string(), // unused + interval: Duration::from_secs(0), // unused + }, + collector_info.id, + ); + datastore + .producer_endpoint_create(&opctx, &producer) + .await + .expect("failed to insert producer"); + + let producer_time_modified = + read_time_modified(&datastore, producer.id()).await; + + // GC'ing expired producers with an expiration time older than our + // producer's `time_modified` should not prune anything. + let pruned = prune_expired_producers( + &opctx, + &datastore, + producer_time_modified - Duration::from_secs(1), + ) + .await + .expect("failed to prune expired producers"); + assert!(pruned.successes.is_empty()); + assert!(pruned.failures.is_empty()); + + // GC'ing expired producers with an expiration time _newer_ than our + // producer's `time_modified` should prune our one producer. + let pruned = prune_expired_producers( + &opctx, + &datastore, + producer_time_modified + Duration::from_secs(1), + ) + .await + .expect("failed to prune expired producers"); + let expected_success = + [producer.id()].into_iter().collect::>(); + assert_eq!(pruned.successes, expected_success); + assert!(pruned.failures.is_empty()); + + // GC'ing again with the same expiration should do nothing, because we + // already pruned the producer. + let pruned = prune_expired_producers( + &opctx, + &datastore, + producer_time_modified + Duration::from_secs(1), + ) + .await + .expect("failed to prune expired producers"); + assert!(pruned.successes.is_empty()); + assert!(pruned.failures.is_empty()); + + db.cleanup().await.unwrap(); + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn test_prune_expired_producers_notifies_collector() { + // Setup + let logctx = dev::test_setup_log( + "test_prune_expired_producers_notifies_collector", + ); + let mut db = test_setup_database(&logctx.log).await; + let (opctx, datastore) = + datastore_test(&logctx, &db, Uuid::new_v4()).await; + + let mut collector = httptest::Server::run(); + + // Insert an Oximeter collector + let collector_info = OximeterInfo::new(¶ms::OximeterInfo { + collector_id: Uuid::new_v4(), + address: collector.addr(), + }); + datastore + .oximeter_create(&opctx, &collector_info) + .await + .expect("failed to insert collector"); + + // Insert a producer. + let producer = ProducerEndpoint::new( + &nexus::ProducerEndpoint { + id: Uuid::new_v4(), + kind: nexus::ProducerKind::Service, + address: "[::1]:0".parse().unwrap(), // unused + base_route: "/".to_string(), // unused + interval: Duration::from_secs(0), // unused + }, + collector_info.id, + ); + datastore + .producer_endpoint_create(&opctx, &producer) + .await + .expect("failed to insert producer"); + + let producer_time_modified = + read_time_modified(&datastore, producer.id()).await; + + // GC'ing expired producers with an expiration time _newer_ than our + // producer's `time_modified` should prune our one producer and notify + // the collector that it's doing so. + collector.expect( + Expectation::matching(request::method_path( + "DELETE", + format!("/producers/{}", producer.id()), + )) + .respond_with(status_code(204)), + ); + + let pruned = prune_expired_producers( + &opctx, + &datastore, + producer_time_modified + Duration::from_secs(1), + ) + .await + .expect("failed to prune expired producers"); + let expected_success = + [producer.id()].into_iter().collect::>(); + assert_eq!(pruned.successes, expected_success); + assert!(pruned.failures.is_empty()); + + collector.verify_and_clear(); + + db.cleanup().await.unwrap(); + logctx.cleanup_successful(); + } +} diff --git a/nexus/src/app/background/init.rs b/nexus/src/app/background/init.rs index e3f2154046..e260e9a87b 100644 --- a/nexus/src/app/background/init.rs +++ b/nexus/src/app/background/init.rs @@ -13,11 +13,13 @@ use super::dns_propagation; use super::dns_servers; use super::external_endpoints; use super::inventory_collection; +use super::metrics_producer_gc; use super::nat_cleanup; use super::phantom_disks; use super::region_replacement; use super::sync_service_zone_nat::ServiceZoneNatTracker; use super::sync_switch_configuration::SwitchPortSettingsManager; +use crate::app::oximeter::PRODUCER_LEASE_DURATION; use crate::app::sagas::SagaRequest; use nexus_config::BackgroundTaskConfig; use nexus_config::DnsTasksConfig; @@ -48,6 +50,9 @@ pub struct BackgroundTasks { /// task handle for the external DNS servers background task pub task_external_dns_servers: common::TaskHandle, + /// task handle for pruning metrics producers with expired leases + pub task_metrics_producer_gc: common::TaskHandle, + /// task handle for the task that keeps track of external endpoints pub task_external_endpoints: common::TaskHandle, /// external endpoints read by the background task @@ -113,6 +118,24 @@ impl BackgroundTasks { &config.dns_external, ); + let task_metrics_producer_gc = { + let gc = metrics_producer_gc::MetricProducerGc::new( + datastore.clone(), + PRODUCER_LEASE_DURATION, + ); + driver.register( + String::from("metrics_producer_gc"), + String::from( + "unregisters Oximeter metrics producers that have not \ + renewed their lease", + ), + config.metrics_producer_gc.period_secs, + Box::new(gc), + opctx.child(BTreeMap::new()), + vec![], + ) + }; + // Background task: External endpoints list watcher let (task_external_endpoints, external_endpoints) = { let watcher = external_endpoints::ExternalEndpointsWatcher::new( @@ -301,6 +324,7 @@ impl BackgroundTasks { task_internal_dns_servers, task_external_dns_config, task_external_dns_servers, + task_metrics_producer_gc, task_external_endpoints, external_endpoints, nat_cleanup, diff --git a/nexus/src/app/background/metrics_producer_gc.rs b/nexus/src/app/background/metrics_producer_gc.rs new file mode 100644 index 0000000000..edf3e00067 --- /dev/null +++ b/nexus/src/app/background/metrics_producer_gc.rs @@ -0,0 +1,258 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Background task for garbage collecting metrics producers that have not +//! renewed their lease + +use super::common::BackgroundTask; +use chrono::TimeDelta; +use chrono::Utc; +use futures::future::BoxFuture; +use futures::FutureExt; +use nexus_db_queries::context::OpContext; +use nexus_db_queries::db::DataStore; +use serde_json::json; +use slog_error_chain::InlineErrorChain; +use std::sync::Arc; +use std::time::Duration; + +/// Background task that prunes metrics producers that have failed to renew +/// their lease. +pub struct MetricProducerGc { + datastore: Arc, + lease_duration: Duration, + disabled: bool, +} + +impl MetricProducerGc { + pub fn new(datastore: Arc, lease_duration: Duration) -> Self { + Self { + datastore, + lease_duration, + // TODO We should turn this task on as a part of landing the rest of + // the move to metric producer leases. For now, we leave it disabled + // to avoid pruning producers that don't know to renew leases, but + // make this a boolean so our unit test can enable it. + disabled: true, + } + } + + async fn activate(&mut self, opctx: &OpContext) -> serde_json::Value { + if self.disabled { + warn!( + opctx.log, + "Metric producer GC: statically disabled pending omicron#5284" + ); + return json!({ + "error": "metric producer gc disabled (omicron#5284)", + }); + } + + let Some(expiration) = TimeDelta::from_std(self.lease_duration) + .ok() + .and_then(|delta| Utc::now().checked_sub_signed(delta)) + else { + error!( + opctx.log, + "Metric producer GC: out of bounds lease_duration"; + "lease_duration" => ?self.lease_duration, + ); + return json!({ + "error": "out of bounds lease duration", + "lease_duration": self.lease_duration, + }); + }; + + info!( + opctx.log, "Metric producer GC running"; + "expiration" => %expiration, + ); + let pruned = match nexus_metrics_producer_gc::prune_expired_producers( + opctx, + &self.datastore, + expiration, + ) + .await + { + Ok(pruned) => pruned, + Err(err) => { + warn!(opctx.log, "Metric producer GC failed"; &err); + return json!({ + "error": InlineErrorChain::new(&err).to_string(), + }); + } + }; + + if pruned.failures.is_empty() { + info!( + opctx.log, "Metric producer GC complete (no errors)"; + "expiration" => %expiration, + "pruned" => ?pruned.successes, + ); + json!({ + "expiration": expiration, + "pruned": pruned.successes, + }) + } else { + warn!( + opctx.log, + "Metric producer GC complete ({} errors)", + pruned.failures.len(); + "expiration" => %expiration, + "pruned" => ?pruned.successes, + "failures" => ?pruned.failures, + ); + json!({ + "expiration": expiration, + "pruned": pruned.successes, + "errors": pruned.failures, + }) + } + } +} + +impl BackgroundTask for MetricProducerGc { + fn activate<'a>( + &'a mut self, + opctx: &'a OpContext, + ) -> BoxFuture<'a, serde_json::Value> { + self.activate(opctx).boxed() + } +} + +#[cfg(test)] +mod tests { + use super::*; + use async_bb8_diesel::AsyncRunQueryDsl; + use chrono::DateTime; + use chrono::Utc; + use diesel::ExpressionMethods; + use httptest::matchers::request; + use httptest::responders::status_code; + use httptest::Expectation; + use nexus_db_model::OximeterInfo; + use nexus_db_queries::context::OpContext; + use nexus_db_queries::db::model::ProducerEndpoint; + use nexus_test_utils_macros::nexus_test; + use nexus_types::identity::Asset; + use nexus_types::internal_api::params; + use omicron_common::api::internal::nexus; + use serde_json::json; + use uuid::Uuid; + + type ControlPlaneTestContext = + nexus_test_utils::ControlPlaneTestContext; + + async fn set_time_modified( + datastore: &DataStore, + producer_id: Uuid, + time_modified: DateTime, + ) { + use nexus_db_queries::db::schema::metric_producer::dsl; + + let conn = datastore.pool_connection_for_tests().await.unwrap(); + if let Err(err) = diesel::update(dsl::metric_producer) + .filter(dsl::id.eq(producer_id)) + .set(dsl::time_modified.eq(time_modified)) + .execute_async(&*conn) + .await + { + panic!( + "failed to update time_modified for producer {producer_id}: \ + {err}" + ); + } + } + + #[nexus_test(server = crate::Server)] + async fn test_pruning(cptestctx: &ControlPlaneTestContext) { + let nexus = &cptestctx.server.apictx().nexus; + let datastore = nexus.datastore(); + let opctx = OpContext::for_tests( + cptestctx.logctx.log.clone(), + datastore.clone(), + ); + + let mut collector = httptest::Server::run(); + + // Insert an Oximeter collector + let collector_info = OximeterInfo::new(¶ms::OximeterInfo { + collector_id: Uuid::new_v4(), + address: collector.addr(), + }); + datastore + .oximeter_create(&opctx, &collector_info) + .await + .expect("failed to insert collector"); + + // Insert a producer. + let producer = ProducerEndpoint::new( + &nexus::ProducerEndpoint { + id: Uuid::new_v4(), + kind: nexus::ProducerKind::Service, + address: "[::1]:0".parse().unwrap(), // unused + base_route: "/".to_string(), // unused + interval: Duration::from_secs(0), // unused + }, + collector_info.id, + ); + datastore + .producer_endpoint_create(&opctx, &producer) + .await + .expect("failed to insert producer"); + + // Activate the task. It should immediately return because our GC is + // currently statically disabled (remove this check once that is no + // longer true!). + let mut gc = + MetricProducerGc::new(datastore.clone(), Duration::from_secs(3600)); + let value = gc.activate(&opctx).await; + assert_eq!( + value, + json!({ + "error": "metric producer gc disabled (omicron#5284)", + }) + ); + + // Enable the task and activate it. Technically this is racy, but if + // it's been an hour since we inserted the producer in the previous + // statement, we have bigger problems. This should _not_ prune the + // producer, since it's been active within the last hour. + gc.disabled = false; + let value = gc.activate(&opctx).await; + let value = value.as_object().expect("non-object"); + assert!(!value.contains_key("failures")); + assert!(value.contains_key("expiration")); + assert_eq!(*value.get("pruned").expect("missing `pruned`"), json!([])); + + // Move our producer backwards in time: pretend it registered two hours + // ago, which should result in it being pruned. + set_time_modified( + &datastore, + producer.id(), + Utc::now() - chrono::TimeDelta::hours(2), + ) + .await; + + // Pruning should also notify the collector. + collector.expect( + Expectation::matching(request::method_path( + "DELETE", + format!("/producers/{}", producer.id()), + )) + .respond_with(status_code(204)), + ); + + let value = gc.activate(&opctx).await; + let value = value.as_object().expect("non-object"); + assert!(!value.contains_key("failures")); + assert!(value.contains_key("expiration")); + assert_eq!( + *value.get("pruned").expect("missing `pruned`"), + json!([producer.id()]) + ); + + collector.verify_and_clear(); + } +} diff --git a/nexus/src/app/background/mod.rs b/nexus/src/app/background/mod.rs index 9867f1dc6d..2b8db422b4 100644 --- a/nexus/src/app/background/mod.rs +++ b/nexus/src/app/background/mod.rs @@ -14,6 +14,7 @@ mod dns_servers; mod external_endpoints; mod init; mod inventory_collection; +mod metrics_producer_gc; mod nat_cleanup; mod networking; mod phantom_disks; diff --git a/nexus/src/app/oximeter.rs b/nexus/src/app/oximeter.rs index f178bffc8c..8b204392eb 100644 --- a/nexus/src/app/oximeter.rs +++ b/nexus/src/app/oximeter.rs @@ -27,6 +27,12 @@ use std::num::NonZeroU32; use std::time::Duration; use uuid::Uuid; +/// How long a metrics producer remains registered to a collector. +/// +/// Producers are expected to renew their registration lease periodically, at +/// some interval of this overall duration. +pub const PRODUCER_LEASE_DURATION: Duration = Duration::from_secs(10 * 60); + /// A client which knows how to connect to Clickhouse, but does so /// only when a request is actually made. /// diff --git a/nexus/tests/config.test.toml b/nexus/tests/config.test.toml index de3aa4c1f4..ba3f145bb6 100644 --- a/nexus/tests/config.test.toml +++ b/nexus/tests/config.test.toml @@ -85,6 +85,7 @@ dns_external.period_secs_config = 60 dns_external.period_secs_servers = 60 dns_external.period_secs_propagation = 60 dns_external.max_concurrent_server_updates = 5 +metrics_producer_gc.period_secs = 60 # How frequently we check the list of stored TLS certificates. This is # approximately an upper bound on how soon after updating the list of # certificates it will take _other_ Nexus instances to notice and stop serving diff --git a/schema/crdb/add-metrics-producers-time-modified-index/up.sql b/schema/crdb/add-metrics-producers-time-modified-index/up.sql new file mode 100644 index 0000000000..35136ca759 --- /dev/null +++ b/schema/crdb/add-metrics-producers-time-modified-index/up.sql @@ -0,0 +1,3 @@ +CREATE INDEX IF NOT EXISTS lookup_producer_by_time_modified ON omicron.public.metric_producer ( + time_modified +); diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index ec6a7c192f..89546415e7 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -1286,6 +1286,10 @@ CREATE UNIQUE INDEX IF NOT EXISTS lookup_producer_by_oximeter ON omicron.public. id ); +CREATE INDEX IF NOT EXISTS lookup_producer_by_time_modified ON omicron.public.metric_producer ( + time_modified +); + /* * VPCs and networking primitives */ @@ -3729,7 +3733,7 @@ INSERT INTO omicron.public.db_metadata ( version, target_version ) VALUES - ( TRUE, NOW(), NOW(), '47.0.0', NULL) + ( TRUE, NOW(), NOW(), '48.0.0', NULL) ON CONFLICT DO NOTHING; COMMIT; diff --git a/smf/nexus/multi-sled/config-partial.toml b/smf/nexus/multi-sled/config-partial.toml index 553cdb0aef..dcb8ea041c 100644 --- a/smf/nexus/multi-sled/config-partial.toml +++ b/smf/nexus/multi-sled/config-partial.toml @@ -33,6 +33,7 @@ dns_external.period_secs_config = 60 dns_external.period_secs_servers = 60 dns_external.period_secs_propagation = 60 dns_external.max_concurrent_server_updates = 5 +metrics_producer_gc.period_secs = 60 # How frequently we check the list of stored TLS certificates. This is # approximately an upper bound on how soon after updating the list of # certificates it will take _other_ Nexus instances to notice and stop serving diff --git a/smf/nexus/single-sled/config-partial.toml b/smf/nexus/single-sled/config-partial.toml index 9f7cb959d3..2cd520653f 100644 --- a/smf/nexus/single-sled/config-partial.toml +++ b/smf/nexus/single-sled/config-partial.toml @@ -33,6 +33,7 @@ dns_external.period_secs_config = 60 dns_external.period_secs_servers = 60 dns_external.period_secs_propagation = 60 dns_external.max_concurrent_server_updates = 5 +metrics_producer_gc.period_secs = 60 # How frequently we check the list of stored TLS certificates. This is # approximately an upper bound on how soon after updating the list of # certificates it will take _other_ Nexus instances to notice and stop serving From 7e5afbe45a43a202ed0e94d6890675e35a909161 Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Wed, 27 Mar 2024 16:13:09 -0400 Subject: [PATCH 008/334] Add MGS `sp_sensor_read_value` endpoint (#5312) This exposes the `faux-mgs read-sensor-value` command. Sensor IDs currently come from the host OS via topo. Example on `madrid`: ```console root@oxz_switch1:~# curl http://[::1]:12225/sp/sled/14/sensor/value/111 {"timestamp":1566837559,"result":{"kind":"success","value":60.0}} ``` --- gateway/src/http_entrypoints.rs | 76 +++++++++ gateway/src/http_entrypoints/conversions.rs | 27 +++ openapi/gateway.json | 174 ++++++++++++++++++++ 3 files changed, 277 insertions(+) diff --git a/gateway/src/http_entrypoints.rs b/gateway/src/http_entrypoints.rs index b5a765a8a8..727ba0950d 100644 --- a/gateway/src/http_entrypoints.rs +++ b/gateway/src/http_entrypoints.rs @@ -298,6 +298,49 @@ struct UpdatePreparationProgress { total: u32, } +/// Result of reading an SP sensor. +#[derive( + Debug, + Clone, + Copy, + PartialEq, + PartialOrd, + Serialize, + Deserialize, + JsonSchema, +)] +pub struct SpSensorReading { + /// SP-centric timestamp of when `result` was recorded from this sensor. + /// + /// Currently this value represents "milliseconds since the last SP boot" + /// and is primarily useful as a delta between sensors on this SP (assuming + /// no reboot in between). The meaning could change with future SP releases. + pub timestamp: u64, + /// Value (or error) from the sensor. + pub result: SpSensorReadingResult, +} + +/// Single reading (or error) from an SP sensor. +#[derive( + Debug, + Clone, + Copy, + PartialEq, + PartialOrd, + Deserialize, + Serialize, + JsonSchema, +)] +#[serde(tag = "kind", rename_all = "snake_case")] +pub enum SpSensorReadingResult { + Success { value: f32 }, + DeviceOff, + DeviceError, + DeviceNotPresent, + DeviceUnavailable, + DeviceTimeout, +} + /// List of components from a single SP. #[derive(Debug, PartialEq, Eq, Serialize, Deserialize, JsonSchema)] pub struct SpComponentList { @@ -535,6 +578,16 @@ struct PathSp { sp: SpIdentifier, } +#[derive(Deserialize, JsonSchema)] +struct PathSpSensorId { + /// ID for the SP that the gateway service translates into the appropriate + /// port for communicating with the given SP. + #[serde(flatten)] + sp: SpIdentifier, + /// ID for the sensor on the SP. + sensor_id: u32, +} + #[derive(Serialize, Deserialize, JsonSchema)] struct PathSpComponent { /// ID for the SP that the gateway service translates into the appropriate @@ -625,6 +678,28 @@ async fn sp_startup_options_set( Ok(HttpResponseUpdatedNoContent {}) } +/// Read the current value of a sensor by ID +/// +/// Sensor IDs come from the host topo tree. +#[endpoint { + method = GET, + path = "/sp/{type}/{slot}/sensor/{sensor_id}/value", +}] +async fn sp_sensor_read_value( + rqctx: RequestContext>, + path: Path, +) -> Result, HttpError> { + let apictx = rqctx.context(); + let PathSpSensorId { sp, sensor_id } = path.into_inner(); + let sp_id = sp.into(); + let sp = apictx.mgmt_switch.sp(sp_id)?; + let value = sp.read_sensor_value(sensor_id).await.map_err(|err| { + SpCommsError::SpCommunicationFailed { sp: sp_id, err } + })?; + + Ok(HttpResponseOk(value.into())) +} + /// List components of an SP /// /// A component is a distinct entity under an SP's direct control. This lists @@ -1511,6 +1586,7 @@ pub fn api() -> GatewayApiDescription { api.register(sp_power_state_set)?; api.register(sp_installinator_image_id_set)?; api.register(sp_installinator_image_id_delete)?; + api.register(sp_sensor_read_value)?; api.register(sp_component_list)?; api.register(sp_component_get)?; api.register(sp_component_caboose_get)?; diff --git a/gateway/src/http_entrypoints/conversions.rs b/gateway/src/http_entrypoints/conversions.rs index a4aef7425e..df3d1c5436 100644 --- a/gateway/src/http_entrypoints/conversions.rs +++ b/gateway/src/http_entrypoints/conversions.rs @@ -20,6 +20,8 @@ use super::SpComponentPresence; use super::SpIdentifier; use super::SpIgnition; use super::SpIgnitionSystemType; +use super::SpSensorReading; +use super::SpSensorReadingResult; use super::SpState; use super::SpType; use super::SpUpdateStatus; @@ -40,6 +42,31 @@ pub(super) fn component_from_str(s: &str) -> Result { }) } +impl From for SpSensorReading { + fn from(value: gateway_messages::SensorReading) -> Self { + Self { + timestamp: value.timestamp, + result: match value.value { + Ok(value) => SpSensorReadingResult::Success { value }, + Err(err) => err.into(), + }, + } + } +} + +impl From for SpSensorReadingResult { + fn from(value: gateway_messages::SensorDataMissing) -> Self { + use gateway_messages::SensorDataMissing; + match value { + SensorDataMissing::DeviceOff => Self::DeviceOff, + SensorDataMissing::DeviceError => Self::DeviceError, + SensorDataMissing::DeviceNotPresent => Self::DeviceNotPresent, + SensorDataMissing::DeviceUnavailable => Self::DeviceUnavailable, + SensorDataMissing::DeviceTimeout => Self::DeviceTimeout, + } + } +} + impl From for SpUpdateStatus { fn from(status: UpdateStatus) -> Self { match status { diff --git a/openapi/gateway.json b/openapi/gateway.json index 5961b670ed..f3a5642b6e 100644 --- a/openapi/gateway.json +++ b/openapi/gateway.json @@ -1300,6 +1300,62 @@ } } }, + "/sp/{type}/{slot}/sensor/{sensor_id}/value": { + "get": { + "summary": "Read the current value of a sensor by ID", + "description": "Sensor IDs come from the host topo tree.", + "operationId": "sp_sensor_read_value", + "parameters": [ + { + "in": "path", + "name": "sensor_id", + "description": "ID for the sensor on the SP.", + "required": true, + "schema": { + "type": "integer", + "format": "uint32", + "minimum": 0 + } + }, + { + "in": "path", + "name": "slot", + "required": true, + "schema": { + "type": "integer", + "format": "uint32", + "minimum": 0 + } + }, + { + "in": "path", + "name": "type", + "required": true, + "schema": { + "$ref": "#/components/schemas/SpType" + } + } + ], + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/SpSensorReading" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, "/sp/{type}/{slot}/startup-options": { "get": { "summary": "Get host startup options for a sled", @@ -2788,6 +2844,124 @@ } ] }, + "SpSensorReading": { + "description": "Result of reading an SP sensor.", + "type": "object", + "properties": { + "result": { + "description": "Value (or error) from the sensor.", + "allOf": [ + { + "$ref": "#/components/schemas/SpSensorReadingResult" + } + ] + }, + "timestamp": { + "description": "SP-centric timestamp of when `result` was recorded from this sensor.\n\nCurrently this value represents \"milliseconds since the last SP boot\" and is primarily useful as a delta between sensors on this SP (assuming no reboot in between). The meaning could change with future SP releases.", + "type": "integer", + "format": "uint64", + "minimum": 0 + } + }, + "required": [ + "result", + "timestamp" + ] + }, + "SpSensorReadingResult": { + "description": "Single reading (or error) from an SP sensor.", + "oneOf": [ + { + "type": "object", + "properties": { + "kind": { + "type": "string", + "enum": [ + "success" + ] + }, + "value": { + "type": "number", + "format": "float" + } + }, + "required": [ + "kind", + "value" + ] + }, + { + "type": "object", + "properties": { + "kind": { + "type": "string", + "enum": [ + "device_off" + ] + } + }, + "required": [ + "kind" + ] + }, + { + "type": "object", + "properties": { + "kind": { + "type": "string", + "enum": [ + "device_error" + ] + } + }, + "required": [ + "kind" + ] + }, + { + "type": "object", + "properties": { + "kind": { + "type": "string", + "enum": [ + "device_not_present" + ] + } + }, + "required": [ + "kind" + ] + }, + { + "type": "object", + "properties": { + "kind": { + "type": "string", + "enum": [ + "device_unavailable" + ] + } + }, + "required": [ + "kind" + ] + }, + { + "type": "object", + "properties": { + "kind": { + "type": "string", + "enum": [ + "device_timeout" + ] + } + }, + "required": [ + "kind" + ] + } + ] + }, "SpState": { "type": "object", "properties": { From 4ca89cad08ba43f868d6d43f5b240a16020d82ed Mon Sep 17 00:00:00 2001 From: David Pacheco Date: Thu, 28 Mar 2024 12:44:03 -0700 Subject: [PATCH 009/334] preview DNS changes in reconfigurator-cli (#5338) --- Cargo.lock | 4 + clients/dns-service-client/Cargo.toml | 1 + clients/dns-service-client/src/diff.rs | 329 ++++++++++------ .../tests/output/diff_example_different.out | 8 + .../diff_example_different_reversed.out | 8 + .../tests/output/diff_example_empty.out | 5 + dev-tools/omdb/src/bin/omdb/db.rs | 61 ++- dev-tools/reconfigurator-cli/Cargo.toml | 4 + dev-tools/reconfigurator-cli/build.rs | 10 + dev-tools/reconfigurator-cli/src/main.rs | 358 +++++++++++++++++- internal-dns/src/config.rs | 32 +- internal-dns/src/resolver.rs | 17 +- nexus/db-queries/src/db/datastore/dns.rs | 49 +++ nexus/db-queries/src/db/datastore/mod.rs | 1 + nexus/db-queries/src/db/datastore/rack.rs | 70 ++-- nexus/reconfigurator/execution/src/dns.rs | 317 +++++++--------- nexus/reconfigurator/execution/src/lib.rs | 19 +- nexus/src/app/silo.rs | 16 +- nexus/test-utils/src/lib.rs | 7 +- nexus/types/src/deployment.rs | 5 + sled-agent/src/fakes/nexus.rs | 3 +- sled-agent/src/instance.rs | 2 +- sled-agent/src/rack_setup/plan/service.rs | 2 +- sled-agent/src/sim/server.rs | 3 +- 24 files changed, 945 insertions(+), 386 deletions(-) create mode 100644 clients/dns-service-client/tests/output/diff_example_different.out create mode 100644 clients/dns-service-client/tests/output/diff_example_different_reversed.out create mode 100644 clients/dns-service-client/tests/output/diff_example_empty.out create mode 100644 dev-tools/reconfigurator-cli/build.rs diff --git a/Cargo.lock b/Cargo.lock index c8511806b7..eca65905da 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2031,6 +2031,7 @@ version = "0.1.0" dependencies = [ "anyhow", "chrono", + "expectorate", "http 0.2.12", "omicron-workspace-hack", "progenitor", @@ -7425,16 +7426,19 @@ dependencies = [ "camino", "camino-tempfile", "clap 4.5.1", + "dns-service-client", "dropshot", "expectorate", "humantime", "indexmap 2.2.5", + "nexus-reconfigurator-execution", "nexus-reconfigurator-planning", "nexus-types", "omicron-common", "omicron-rpaths", "omicron-test-utils", "omicron-workspace-hack", + "pq-sys", "reedline", "regex", "serde_json", diff --git a/clients/dns-service-client/Cargo.toml b/clients/dns-service-client/Cargo.toml index 27ffb66d88..fb393d77b1 100644 --- a/clients/dns-service-client/Cargo.toml +++ b/clients/dns-service-client/Cargo.toml @@ -7,6 +7,7 @@ license = "MPL-2.0" [dependencies] anyhow.workspace = true chrono.workspace = true +expectorate.workspace = true http.workspace = true progenitor.workspace = true reqwest = { workspace = true, features = ["json", "rustls-tls", "stream"] } diff --git a/clients/dns-service-client/src/diff.rs b/clients/dns-service-client/src/diff.rs index ce04319dff..2ae7036c86 100644 --- a/clients/dns-service-client/src/diff.rs +++ b/clients/dns-service-client/src/diff.rs @@ -2,173 +2,252 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -use crate::types::DnsConfigParams; +use crate::types::DnsConfigZone; use crate::types::DnsRecord; +use crate::types::Srv; use crate::DnsRecords; use anyhow::ensure; -use anyhow::Context; +use std::collections::BTreeSet; + +#[derive(Debug)] +enum NameDiff<'a> { + Added(&'a str, &'a [DnsRecord]), + Removed(&'a str, &'a [DnsRecord]), + Changed(&'a str, &'a [DnsRecord], &'a [DnsRecord]), + Unchanged(&'a str, &'a [DnsRecord]), +} /// Compare the DNS records contained in two sets of DNS configuration #[derive(Debug)] pub struct DnsDiff<'a> { left: &'a DnsRecords, right: &'a DnsRecords, + zone_name: &'a str, + all_names: BTreeSet<&'a String>, } impl<'a> DnsDiff<'a> { - /// Compare the DNS records contained in two sets of DNS configuration + /// Compare the DNS records contained in two DNS zones' configs /// - /// Both configurations are expected to contain exactly one zone and they - /// should have the same name. + /// Both zones are expected to have the same name. pub fn new( - left: &'a DnsConfigParams, - right: &'a DnsConfigParams, + left_zone: &'a DnsConfigZone, + right_zone: &'a DnsConfigZone, ) -> Result, anyhow::Error> { - let left_zone = left.sole_zone().context("left side of diff")?; - let right_zone = right.sole_zone().context("right side of diff")?; - ensure!( left_zone.zone_name == right_zone.zone_name, "cannot compare DNS configuration from zones with different names: \ {:?} vs. {:?}", left_zone.zone_name, right_zone.zone_name, ); - Ok(DnsDiff { left: &left_zone.records, right: &right_zone.records }) + let all_names = + left_zone.records.keys().chain(right_zone.records.keys()).collect(); + + Ok(DnsDiff { + left: &left_zone.records, + right: &right_zone.records, + zone_name: &left_zone.zone_name, + all_names, + }) + } + + fn iter_names(&self) -> impl Iterator> { + self.all_names.iter().map(|k| { + let name = k.as_str(); + let v1 = self.left.get(*k); + let v2 = self.right.get(*k); + match (v1, v2) { + (None, Some(v2)) => NameDiff::Added(name, v2.as_ref()), + (Some(v1), None) => NameDiff::Removed(name, v1.as_ref()), + (Some(v1), Some(v2)) => { + let mut v1_sorted = v1.clone(); + let mut v2_sorted = v2.clone(); + v1_sorted.sort(); + v2_sorted.sort(); + if v1_sorted == v2_sorted { + NameDiff::Unchanged(name, v1.as_ref()) + } else { + NameDiff::Changed(name, v1.as_ref(), v2.as_ref()) + } + } + (None, None) => unreachable!(), + } + }) } /// Iterate over the names that are present in the `right` config but /// absent in the `left` one (i.e., added between `left` and `right`) pub fn names_added(&self) -> impl Iterator { - self.right - .iter() - .filter(|(k, _)| !self.left.contains_key(*k)) - .map(|(k, v)| (k.as_ref(), v.as_ref())) + self.iter_names().filter_map(|nd| { + if let NameDiff::Added(k, v) = nd { + Some((k, v)) + } else { + None + } + }) } /// Iterate over the names that are present in the `left` config but /// absent in the `right` one (i.e., removed between `left` and `right`) pub fn names_removed(&self) -> impl Iterator { - self.left - .iter() - .filter(|(k, _)| !self.right.contains_key(*k)) - .map(|(k, v)| (k.as_ref(), v.as_ref())) + self.iter_names().filter_map(|nd| { + if let NameDiff::Removed(k, v) = nd { + Some((k, v)) + } else { + None + } + }) } /// Iterate over the names whose records changed between `left` and `right`. pub fn names_changed( &self, ) -> impl Iterator { - self.left.iter().filter_map(|(k, v1)| match self.right.get(k) { - Some(v2) => { - let mut v1_sorted = v1.clone(); - let mut v2_sorted = v2.clone(); - v1_sorted.sort(); - v2_sorted.sort(); - (v1_sorted != v2_sorted) - .then(|| (k.as_ref(), v1.as_ref(), v2.as_ref())) + self.iter_names().filter_map(|nd| { + if let NameDiff::Changed(k, v1, v2) = nd { + Some((k, v1, v2)) + } else { + None + } + }) + } + + /// Iterate over the names whose records were unchanged between `left` and + /// `right` + pub fn names_unchanged( + &self, + ) -> impl Iterator { + self.iter_names().filter_map(|nd| { + if let NameDiff::Unchanged(k, v) = nd { + Some((k, v)) + } else { + None } - _ => None, }) } /// Returns true iff there are no differences in the DNS names and records /// described by the given configurations pub fn is_empty(&self) -> bool { - self.names_added().next().is_none() - && self.names_removed().next().is_none() - && self.names_changed().next().is_none() + self.iter_names().all(|nd| matches!(nd, NameDiff::Unchanged(_, _))) + } +} + +impl<'a> std::fmt::Display for DnsDiff<'a> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + let names_changed = !self.is_empty(); + let zone_name = &self.zone_name; + + if !names_changed { + writeln!(f, " DNS zone: {:?} (unchanged)", zone_name,)?; + } else { + writeln!(f, "* DNS zone: {:?}: ", zone_name)?; + } + + let print_records = |f: &mut std::fmt::Formatter<'_>, + prefix, + records: &[DnsRecord]| + -> std::fmt::Result { + for r in records.iter() { + writeln!( + f, + "{} {}", + prefix, + match r { + DnsRecord::A(addr) => format!("A {}", addr), + DnsRecord::Aaaa(addr) => format!("AAAA {}", addr), + DnsRecord::Srv(Srv { port, target, .. }) => { + format!("SRV port {:5} {}", port, target) + } + } + )?; + } + + Ok(()) + }; + + for name_diff in self.iter_names() { + match name_diff { + NameDiff::Added(name, records) => { + writeln!( + f, + "+ name: {:50} (records: {})", + name, + records.len() + )?; + print_records(f, "+", records)?; + } + NameDiff::Removed(name, records) => { + writeln!( + f, + "- name: {:50} (records: {})", + name, + records.len() + )?; + print_records(f, "-", records)?; + } + NameDiff::Unchanged(name, records) => { + writeln!( + f, + " name: {:50} (records: {})", + name, + records.len() + )?; + print_records(f, " ", records)?; + } + NameDiff::Changed(name, records1, records2) => { + writeln!( + f, + "* name: {:50} (records: {} -> {})", + name, + records1.len(), + records2.len(), + )?; + print_records(f, "-", records1)?; + print_records(f, "+", records2)?; + } + } + } + + Ok(()) } } #[cfg(test)] mod test { use super::DnsDiff; - use crate::types::DnsConfigParams; use crate::types::DnsConfigZone; use crate::types::DnsRecord; - use chrono::Utc; use std::collections::HashMap; use std::net::Ipv4Addr; const ZONE_NAME: &str = "dummy"; - fn example() -> DnsConfigParams { - DnsConfigParams { - generation: 4, - time_created: Utc::now(), - zones: vec![DnsConfigZone { - zone_name: ZONE_NAME.to_string(), - records: HashMap::from([ - ( - "ex1".to_string(), - vec![DnsRecord::A(Ipv4Addr::LOCALHOST)], - ), - ( - "ex2".to_string(), - vec![DnsRecord::A("192.168.1.3".parse().unwrap())], - ), - ]), - }], + fn example() -> DnsConfigZone { + DnsConfigZone { + zone_name: ZONE_NAME.to_string(), + records: HashMap::from([ + ("ex1".to_string(), vec![DnsRecord::A(Ipv4Addr::LOCALHOST)]), + ( + "ex2".to_string(), + vec![DnsRecord::A("192.168.1.3".parse().unwrap())], + ), + ]), } } #[test] fn diff_invalid() { - let example_empty = DnsConfigParams { - generation: 3, - time_created: Utc::now(), - zones: vec![], - }; - - // Configs must have at least one zone. - let error = DnsDiff::new(&example_empty, &example_empty) - .expect_err("unexpectedly succeeded comparing two empty configs"); - assert!( - format!("{:#}", error).contains("expected exactly one DNS zone") - ); - - let example = example(); - let error = DnsDiff::new(&example_empty, &example) - .expect_err("unexpectedly succeeded comparing an empty config"); - assert!( - format!("{:#}", error).contains("expected exactly one DNS zone") - ); - - // Configs must not have more than one zone. - let example_multiple = DnsConfigParams { - generation: 3, - time_created: Utc::now(), - zones: vec![ - DnsConfigZone { - zone_name: ZONE_NAME.to_string(), - records: HashMap::new(), - }, - DnsConfigZone { - zone_name: "two".to_string(), - records: HashMap::new(), - }, - ], - }; - let error = DnsDiff::new(&example_multiple, &example).expect_err( - "unexpectedly succeeded comparing config with multiple zones", - ); - assert!( - format!("{:#}", error).contains("expected exactly one DNS zone") - ); - // Cannot compare different zone names - let example_different_zone = DnsConfigParams { - generation: 3, - time_created: Utc::now(), - zones: vec![DnsConfigZone { - zone_name: format!("{}-other", ZONE_NAME), - records: HashMap::new(), - }], + let example_different_zone = DnsConfigZone { + zone_name: format!("{}-other", ZONE_NAME), + records: HashMap::new(), }; - let error = DnsDiff::new(&example_different_zone, &example).expect_err( - "unexpectedly succeeded comparing configs with \ + let error = DnsDiff::new(&example_different_zone, &example()) + .expect_err( + "unexpectedly succeeded comparing configs with \ different zone names", - ); + ); assert_eq!( format!("{:#}", error), "cannot compare DNS configuration from zones with different \ @@ -184,27 +263,27 @@ mod test { assert_eq!(diff.names_removed().count(), 0); assert_eq!(diff.names_added().count(), 0); assert_eq!(diff.names_changed().count(), 0); + expectorate::assert_contents( + "tests/output/diff_example_empty.out", + &diff.to_string(), + ); } #[test] fn diff_different() { let example = example(); - let example2 = DnsConfigParams { - generation: 4, - time_created: Utc::now(), - zones: vec![DnsConfigZone { - zone_name: ZONE_NAME.to_string(), - records: HashMap::from([ - ( - "ex2".to_string(), - vec![DnsRecord::A("192.168.1.4".parse().unwrap())], - ), - ( - "ex3".to_string(), - vec![DnsRecord::A(std::net::Ipv4Addr::LOCALHOST)], - ), - ]), - }], + let example2 = DnsConfigZone { + zone_name: ZONE_NAME.to_string(), + records: HashMap::from([ + ( + "ex2".to_string(), + vec![DnsRecord::A("192.168.1.4".parse().unwrap())], + ), + ( + "ex3".to_string(), + vec![DnsRecord::A(std::net::Ipv4Addr::LOCALHOST)], + ), + ]), }; let diff = DnsDiff::new(&example, &example2).unwrap(); @@ -231,5 +310,19 @@ mod test { changed[0].2, vec![DnsRecord::A("192.168.1.4".parse().unwrap())] ); + + expectorate::assert_contents( + "tests/output/diff_example_different.out", + &diff.to_string(), + ); + + // Diff'ing the reverse direction exercises different cases (e.g., what + // was added now appears as removed). Also, the generation number + // should really be different. + let diff = DnsDiff::new(&example2, &example).unwrap(); + expectorate::assert_contents( + "tests/output/diff_example_different_reversed.out", + &diff.to_string(), + ); } } diff --git a/clients/dns-service-client/tests/output/diff_example_different.out b/clients/dns-service-client/tests/output/diff_example_different.out new file mode 100644 index 0000000000..7f2f73fed6 --- /dev/null +++ b/clients/dns-service-client/tests/output/diff_example_different.out @@ -0,0 +1,8 @@ +* DNS zone: "dummy": +- name: ex1 (records: 1) +- A 127.0.0.1 +* name: ex2 (records: 1 -> 1) +- A 192.168.1.3 ++ A 192.168.1.4 ++ name: ex3 (records: 1) ++ A 127.0.0.1 diff --git a/clients/dns-service-client/tests/output/diff_example_different_reversed.out b/clients/dns-service-client/tests/output/diff_example_different_reversed.out new file mode 100644 index 0000000000..ba52d4720c --- /dev/null +++ b/clients/dns-service-client/tests/output/diff_example_different_reversed.out @@ -0,0 +1,8 @@ +* DNS zone: "dummy": ++ name: ex1 (records: 1) ++ A 127.0.0.1 +* name: ex2 (records: 1 -> 1) +- A 192.168.1.4 ++ A 192.168.1.3 +- name: ex3 (records: 1) +- A 127.0.0.1 diff --git a/clients/dns-service-client/tests/output/diff_example_empty.out b/clients/dns-service-client/tests/output/diff_example_empty.out new file mode 100644 index 0000000000..1e3ba76bc9 --- /dev/null +++ b/clients/dns-service-client/tests/output/diff_example_empty.out @@ -0,0 +1,5 @@ + DNS zone: "dummy" (unchanged) + name: ex1 (records: 1) + A 127.0.0.1 + name: ex2 (records: 1) + A 192.168.1.3 diff --git a/dev-tools/omdb/src/bin/omdb/db.rs b/dev-tools/omdb/src/bin/omdb/db.rs index 5b5e23ea8d..855bbe063b 100644 --- a/dev-tools/omdb/src/bin/omdb/db.rs +++ b/dev-tools/omdb/src/bin/omdb/db.rs @@ -73,7 +73,9 @@ use nexus_db_queries::db; use nexus_db_queries::db::datastore::read_only_resources_associated_with_volume; use nexus_db_queries::db::datastore::CrucibleTargets; use nexus_db_queries::db::datastore::DataStoreConnection; +use nexus_db_queries::db::datastore::DataStoreDnsTest; use nexus_db_queries::db::datastore::DataStoreInventoryTest; +use nexus_db_queries::db::datastore::Discoverability; use nexus_db_queries::db::datastore::InstanceAndActiveVmm; use nexus_db_queries::db::identity::Asset; use nexus_db_queries::db::lookup::LookupPath; @@ -3383,8 +3385,63 @@ async fn cmd_db_reconfigurator_save( .await; eprintln!("done."); - let state = - UnstableReconfiguratorState { policy: policy, collections, blueprints }; + // It's also useful to include information about any DNS generations + // mentioned in any blueprints. + let blueprints_list = &blueprints; + let fetch_dns_group = |dns_group: DnsGroup| async move { + let latest_version = datastore + .dns_group_latest_version(&opctx, dns_group) + .await + .with_context(|| { + format!("reading latest {:?} version", dns_group) + })?; + let dns_generations_needed: BTreeSet<_> = blueprints_list + .iter() + .map(|blueprint| match dns_group { + DnsGroup::Internal => blueprint.internal_dns_version, + DnsGroup::External => blueprint.external_dns_version, + }) + .chain(std::iter::once(*latest_version.version)) + .collect(); + let mut rv = BTreeMap::new(); + for gen in dns_generations_needed { + let config = datastore + .dns_config_read_version(&opctx, dns_group, gen) + .await + .with_context(|| { + format!("reading {:?} DNS version {}", dns_group, gen) + })?; + rv.insert(gen, config); + } + + Ok::, anyhow::Error>(rv) + }; + + let internal_dns = fetch_dns_group(DnsGroup::Internal).await?; + let external_dns = fetch_dns_group(DnsGroup::External).await?; + let silo_names = datastore + .silo_list_all_batched(&opctx, Discoverability::All) + .await + .context("listing all Silos")? + .into_iter() + .map(|s| s.name().clone()) + .collect(); + let external_dns_zone_names = datastore + .dns_zones_list_all(&opctx, DnsGroup::External) + .await + .context("listing external DNS zone names")? + .into_iter() + .map(|dns_zone| dns_zone.zone_name) + .collect(); + let state = UnstableReconfiguratorState { + policy, + collections, + blueprints, + internal_dns, + external_dns, + silo_names, + external_dns_zone_names, + }; let output_path = &reconfig_save_args.output_file; let file = std::fs::OpenOptions::new() diff --git a/dev-tools/reconfigurator-cli/Cargo.toml b/dev-tools/reconfigurator-cli/Cargo.toml index 8a8ea85544..c8d32513c9 100644 --- a/dev-tools/reconfigurator-cli/Cargo.toml +++ b/dev-tools/reconfigurator-cli/Cargo.toml @@ -11,12 +11,16 @@ omicron-rpaths.workspace = true anyhow.workspace = true camino.workspace = true clap.workspace = true +dns-service-client.workspace = true dropshot.workspace = true humantime.workspace = true indexmap.workspace = true nexus-reconfigurator-planning.workspace = true +nexus-reconfigurator-execution.workspace = true nexus-types.workspace = true omicron-common.workspace = true +# See omicron-rpaths for more about the "pq-sys" dependency. +pq-sys = "*" reedline.workspace = true serde_json.workspace = true slog-error-chain.workspace = true diff --git a/dev-tools/reconfigurator-cli/build.rs b/dev-tools/reconfigurator-cli/build.rs new file mode 100644 index 0000000000..1ba9acd41c --- /dev/null +++ b/dev-tools/reconfigurator-cli/build.rs @@ -0,0 +1,10 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +// See omicron-rpaths for documentation. +// NOTE: This file MUST be kept in sync with the other build.rs files in this +// repository. +fn main() { + omicron_rpaths::configure_default_omicron_rpaths(); +} diff --git a/dev-tools/reconfigurator-cli/src/main.rs b/dev-tools/reconfigurator-cli/src/main.rs index b59fc96703..8ba71d9819 100644 --- a/dev-tools/reconfigurator-cli/src/main.rs +++ b/dev-tools/reconfigurator-cli/src/main.rs @@ -8,18 +8,26 @@ use anyhow::{anyhow, bail, Context}; use camino::Utf8PathBuf; use clap::CommandFactory; use clap::FromArgMatches; +use clap::ValueEnum; use clap::{Args, Parser, Subcommand}; +use dns_service_client::DnsDiff; use indexmap::IndexMap; +use nexus_reconfigurator_execution::blueprint_external_dns_config; +use nexus_reconfigurator_execution::blueprint_internal_dns_config; use nexus_reconfigurator_planning::blueprint_builder::BlueprintBuilder; use nexus_reconfigurator_planning::planner::Planner; use nexus_reconfigurator_planning::system::{ SledBuilder, SledHwInventory, SystemDescription, }; use nexus_types::deployment::{Blueprint, UnstableReconfiguratorState}; +use nexus_types::internal_api::params::DnsConfigParams; use nexus_types::inventory::Collection; use nexus_types::inventory::OmicronZonesConfig; +use nexus_types::inventory::SledRole; use omicron_common::api::external::Generation; +use omicron_common::api::external::Name; use reedline::{Reedline, Signal}; +use std::collections::BTreeMap; use std::io::BufRead; use swrite::{swriteln, SWrite}; use tabled::Tabled; @@ -41,6 +49,22 @@ struct ReconfiguratorSim { /// blueprints created by the user blueprints: IndexMap, + /// internal DNS configurations + internal_dns: BTreeMap, + /// external DNS configurations + external_dns: BTreeMap, + + /// Set of silo names configured + /// + /// These are used to determine the contents of external DNS. + silo_names: Vec, + + /// External DNS zone name configured + external_dns_zone_name: String, + + /// Policy overrides + num_nexus: Option, + log: slog::Logger, } @@ -65,7 +89,12 @@ fn main() -> anyhow::Result<()> { system: SystemDescription::new(), collections: IndexMap::new(), blueprints: IndexMap::new(), + internal_dns: BTreeMap::new(), + external_dns: BTreeMap::new(), log, + silo_names: vec!["example-silo".parse().unwrap()], + external_dns_zone_name: String::from("oxide.example"), + num_nexus: None, }; if let Some(input_file) = cmd.input_file { @@ -162,6 +191,9 @@ fn process_entry(sim: &mut ReconfiguratorSim, entry: String) -> LoopResult { Commands::SledList => cmd_sled_list(sim), Commands::SledAdd(args) => cmd_sled_add(sim, args), Commands::SledShow(args) => cmd_sled_show(sim, args), + Commands::SiloList => cmd_silo_list(sim), + Commands::SiloAdd(args) => cmd_silo_add(sim, args), + Commands::SiloRemove(args) => cmd_silo_remove(sim, args), Commands::InventoryList => cmd_inventory_list(sim), Commands::InventoryGenerate => cmd_inventory_generate(sim), Commands::BlueprintList => cmd_blueprint_list(sim), @@ -171,9 +203,12 @@ fn process_entry(sim: &mut ReconfiguratorSim, entry: String) -> LoopResult { Commands::BlueprintPlan(args) => cmd_blueprint_plan(sim, args), Commands::BlueprintShow(args) => cmd_blueprint_show(sim, args), Commands::BlueprintDiff(args) => cmd_blueprint_diff(sim, args), + Commands::BlueprintDiffDns(args) => cmd_blueprint_diff_dns(sim, args), Commands::BlueprintDiffInventory(args) => { cmd_blueprint_diff_inventory(sim, args) } + Commands::Show => cmd_show(sim), + Commands::Set(args) => cmd_set(sim, args), Commands::Load(args) => cmd_load(sim, args), Commands::FileContents(args) => cmd_file_contents(args), Commands::Save(args) => cmd_save(sim, args), @@ -206,6 +241,13 @@ enum Commands { /// show details about one sled SledShow(SledArgs), + /// list silos + SiloList, + /// add a silo + SiloAdd(SiloAddRemoveArgs), + /// remove a silo + SiloRemove(SiloAddRemoveArgs), + /// list all inventory collections InventoryList, /// generates an inventory collection from the configured sleds @@ -221,9 +263,17 @@ enum Commands { BlueprintShow(BlueprintArgs), /// show differences between two blueprints BlueprintDiff(BlueprintDiffArgs), + /// show differences between a blueprint and a particular DNS version + BlueprintDiffDns(BlueprintDiffDnsArgs), /// show differences between a blueprint and an inventory collection BlueprintDiffInventory(BlueprintDiffInventoryArgs), + /// show system properties + Show, + /// set system properties + #[command(subcommand)] + Set(SetArgs), + /// save state to a file Save(SaveArgs), /// load state from a file @@ -244,6 +294,12 @@ struct SledArgs { sled_id: Uuid, } +#[derive(Debug, Args)] +struct SiloAddRemoveArgs { + /// name of the silo + silo_name: Name, +} + #[derive(Debug, Args)] struct InventoryArgs { /// id of the inventory collection to use in planning @@ -264,6 +320,22 @@ struct BlueprintArgs { blueprint_id: Uuid, } +#[derive(Debug, Args)] +struct BlueprintDiffDnsArgs { + /// DNS group (internal or external) + dns_group: CliDnsGroup, + /// DNS version to diff against + dns_version: u32, + /// id of the blueprint + blueprint_id: Uuid, +} + +#[derive(Clone, Copy, Debug, ValueEnum)] +enum CliDnsGroup { + Internal, + External, +} + #[derive(Debug, Args)] struct BlueprintDiffInventoryArgs { /// id of the inventory collection @@ -280,6 +352,14 @@ struct BlueprintDiffArgs { blueprint2_id: Uuid, } +#[derive(Debug, Subcommand)] +enum SetArgs { + /// target number of Nexus instances (for planning) + NumNexus { num_nexus: u16 }, + /// system's external DNS zone name (suffix) + ExternalDnsZoneName { zone_name: String }, +} + #[derive(Debug, Args)] struct LoadArgs { /// input file @@ -304,6 +384,40 @@ struct SaveArgs { // Command handlers +fn cmd_silo_list( + sim: &mut ReconfiguratorSim, +) -> anyhow::Result> { + let mut s = String::new(); + for silo_name in &sim.silo_names { + swriteln!(s, "{}", silo_name); + } + Ok(Some(s)) +} + +fn cmd_silo_add( + sim: &mut ReconfiguratorSim, + args: SiloAddRemoveArgs, +) -> anyhow::Result> { + if sim.silo_names.contains(&args.silo_name) { + bail!("silo already exists: {:?}", &args.silo_name); + } + + sim.silo_names.push(args.silo_name); + Ok(None) +} + +fn cmd_silo_remove( + sim: &mut ReconfiguratorSim, + args: SiloAddRemoveArgs, +) -> anyhow::Result> { + let size_before = sim.silo_names.len(); + sim.silo_names.retain(|n| *n != args.silo_name); + if sim.silo_names.len() == size_before { + bail!("no such silo: {:?}", &args.silo_name); + } + Ok(None) +} + fn cmd_sled_list( sim: &mut ReconfiguratorSim, ) -> anyhow::Result> { @@ -481,14 +595,39 @@ fn cmd_blueprint_plan( .collections .get(&collection_id) .ok_or_else(|| anyhow!("no such collection: {}", collection_id))?; - let dns_version = Generation::new(); let policy = sim.system.to_policy().context("generating policy")?; let creator = "reconfigurator-sim"; + let planner = Planner::new_based_on( sim.log.clone(), parent_blueprint, - dns_version, - dns_version, + // The internal and external DNS numbers that go here are supposed to be + // the _current_ internal and external DNS generations at the point + // when planning happened. This is racy (these generations can change + // immediately after they're fetched from the database) but correctness + // only requires that the values here be *no newer* than the real + // values so it's okay if the real values get changed. + // + // The problem is we have no real system here to fetch these values + // from. What should the value be? + // + // - If we assume that the parent blueprint here was successfully + // executed immediately before generating this plan, then the values + // here should come from the generation number produced by executing + // the parent blueprint. + // + // - If the parent blueprint was never executed, or execution is still + // in progress, or if other blueprints have been executed in the + // meantime that changed DNS, then the values here could be different + // (older if the blueprint was never executed or is currently + // executing and newer if other blueprints have changed DNS in the + // meantime). + // + // But in this CLI, there's no execution at all. As a result, there's + // no way to really choose between these -- and it doesn't really + // matter, either. We'll just pick the parent blueprint's. + parent_blueprint.internal_dns_version, + parent_blueprint.external_dns_version, &policy, creator, collection, @@ -518,6 +657,7 @@ fn cmd_blueprint_diff( sim: &mut ReconfiguratorSim, args: BlueprintDiffArgs, ) -> anyhow::Result> { + let mut rv = String::new(); let blueprint1_id = args.blueprint1_id; let blueprint2_id = args.blueprint2_id; let blueprint1 = sim @@ -529,8 +669,116 @@ fn cmd_blueprint_diff( .get(&blueprint2_id) .ok_or_else(|| anyhow!("no such blueprint: {}", blueprint2_id))?; - let diff = blueprint1.diff_sleds(&blueprint2); - Ok(Some(diff.display().to_string())) + let sled_diff = blueprint1.diff_sleds(&blueprint2).display().to_string(); + swriteln!(rv, "{}", sled_diff); + + // Diff'ing DNS is a little trickier. First, compute what DNS should be for + // each blueprint. To do that we need to construct a list of sleds suitable + // for the executor. + let sleds_by_id = make_sleds_by_id(&sim)?; + let internal_dns_config1 = blueprint_internal_dns_config( + &blueprint1, + &sleds_by_id, + &Default::default(), + )?; + let internal_dns_config2 = blueprint_internal_dns_config( + &blueprint2, + &sleds_by_id, + &Default::default(), + )?; + let dns_diff = DnsDiff::new(&internal_dns_config1, &internal_dns_config2) + .context("failed to assemble DNS diff")?; + swriteln!(rv, "internal DNS:\n{}", dns_diff); + + let external_dns_config1 = blueprint_external_dns_config( + &blueprint1, + &sim.silo_names, + sim.external_dns_zone_name.clone(), + ); + let external_dns_config2 = blueprint_external_dns_config( + &blueprint2, + &sim.silo_names, + sim.external_dns_zone_name.clone(), + ); + let dns_diff = DnsDiff::new(&external_dns_config1, &external_dns_config2) + .context("failed to assemble external DNS diff")?; + swriteln!(rv, "external DNS:\n{}", dns_diff); + + Ok(Some(rv)) +} + +fn make_sleds_by_id( + sim: &ReconfiguratorSim, +) -> Result, anyhow::Error> +{ + let collection = sim + .system + .to_collection_builder() + .context( + "unexpectedly failed to create collection for current set of sleds", + )? + .build(); + let sleds_by_id: BTreeMap<_, _> = collection + .sled_agents + .iter() + .map(|(sled_id, sled_agent_info)| { + let sled = nexus_reconfigurator_execution::Sled::new( + *sled_id, + sled_agent_info.sled_agent_address, + sled_agent_info.sled_role == SledRole::Scrimlet, + ); + (*sled_id, sled) + }) + .collect(); + Ok(sleds_by_id) +} + +fn cmd_blueprint_diff_dns( + sim: &mut ReconfiguratorSim, + args: BlueprintDiffDnsArgs, +) -> anyhow::Result> { + let dns_group = args.dns_group; + let dns_version = Generation::from(args.dns_version); + let blueprint_id = args.blueprint_id; + let blueprint = sim + .blueprints + .get(&blueprint_id) + .ok_or_else(|| anyhow!("no such blueprint: {}", blueprint_id))?; + + let existing_dns_config = match dns_group { + CliDnsGroup::Internal => sim.internal_dns.get(&dns_version), + CliDnsGroup::External => sim.external_dns.get(&dns_version), + } + .ok_or_else(|| { + anyhow!("no such {:?} DNS version: {}", dns_group, dns_version) + })?; + + let blueprint_dns_zone = match dns_group { + CliDnsGroup::Internal => { + let sleds_by_id = make_sleds_by_id(sim)?; + blueprint_internal_dns_config( + &blueprint, + &sleds_by_id, + &Default::default(), + ) + .with_context(|| { + format!( + "computing internal DNS config for blueprint {}", + blueprint_id + ) + })? + } + CliDnsGroup::External => blueprint_external_dns_config( + &blueprint, + &sim.silo_names, + sim.external_dns_zone_name.clone(), + ), + }; + + let existing_dns_zone = existing_dns_config.sole_zone()?; + let dns_diff = DnsDiff::new(&existing_dns_zone, &blueprint_dns_zone) + .context("failed to assemble DNS diff")?; + Ok(Some(dns_diff.to_string())) } fn cmd_blueprint_diff_inventory( @@ -560,6 +808,10 @@ fn cmd_save( policy, collections: sim.collections.values().cloned().collect(), blueprints: sim.blueprints.values().cloned().collect(), + internal_dns: sim.internal_dns.clone(), + external_dns: sim.external_dns.clone(), + silo_names: sim.silo_names.clone(), + external_dns_zone_names: vec![sim.external_dns_zone_name.clone()], }; let output_path = &args.filename; @@ -577,6 +829,75 @@ fn cmd_save( ))) } +fn cmd_show(sim: &mut ReconfiguratorSim) -> anyhow::Result> { + let mut s = String::new(); + do_print_properties(&mut s, sim); + swriteln!( + s, + "target number of Nexus instances: {}", + match sim.num_nexus { + Some(n) => n.to_string(), + None => String::from("default"), + } + ); + Ok(Some(s)) +} + +fn do_print_properties(s: &mut String, sim: &ReconfiguratorSim) { + swriteln!( + s, + "configured external DNS zone name: {}", + sim.external_dns_zone_name, + ); + swriteln!( + s, + "configured silo names: {}", + sim.silo_names + .iter() + .map(|s| s.as_str()) + .collect::>() + .join(", ") + ); + swriteln!( + s, + "internal DNS generations: {}", + sim.internal_dns + .keys() + .map(|s| s.to_string()) + .collect::>() + .join(", "), + ); + swriteln!( + s, + "external DNS generations: {}", + sim.external_dns + .keys() + .map(|s| s.to_string()) + .collect::>() + .join(", "), + ); +} + +fn cmd_set( + sim: &mut ReconfiguratorSim, + args: SetArgs, +) -> anyhow::Result> { + Ok(Some(match args { + SetArgs::NumNexus { num_nexus } => { + let rv = format!("{:?} -> {}", sim.num_nexus, num_nexus); + sim.num_nexus = Some(num_nexus); + sim.system.target_nexus_zone_count(usize::from(num_nexus)); + rv + } + SetArgs::ExternalDnsZoneName { zone_name } => { + let rv = + format!("{:?} -> {:?}", sim.external_dns_zone_name, zone_name); + sim.external_dns_zone_name = zone_name; + rv + } + })) +} + fn read_file( input_path: &camino::Utf8Path, ) -> anyhow::Result { @@ -726,6 +1047,24 @@ fn cmd_load( } } + sim.internal_dns = loaded.internal_dns; + sim.external_dns = loaded.external_dns; + sim.silo_names = loaded.silo_names; + + let nnames = loaded.external_dns_zone_names.len(); + if nnames > 0 { + if nnames > 1 { + swriteln!( + s, + "warn: found {} external DNS names; using only the first one", + nnames + ); + } + sim.external_dns_zone_name = + loaded.external_dns_zone_names.into_iter().next().unwrap(); + } + do_print_properties(&mut s, sim); + swriteln!(s, "loaded data from {:?}", input_path); Ok(Some(s)) } @@ -765,5 +1104,14 @@ fn cmd_file_contents(args: FileContentsArgs) -> anyhow::Result> { ); } + swriteln!(s, "internal DNS generations: {:?}", loaded.internal_dns.keys(),); + swriteln!(s, "external DNS generations: {:?}", loaded.external_dns.keys(),); + swriteln!(s, "silo names: {:?}", loaded.silo_names); + swriteln!( + s, + "external DNS zone names: {}", + loaded.external_dns_zone_names.join(", ") + ); + Ok(Some(s)) } diff --git a/internal-dns/src/config.rs b/internal-dns/src/config.rs index 5eee34bd51..192a390afd 100644 --- a/internal-dns/src/config.rs +++ b/internal-dns/src/config.rs @@ -161,9 +161,6 @@ pub struct DnsConfigBuilder { /// similar to service_instances_zones, but for services that run on sleds service_instances_sleds: BTreeMap>, - - /// generation number for this config - generation: Generation, } /// Describes a host of type "sled" in the control plane DNS zone @@ -195,7 +192,6 @@ impl DnsConfigBuilder { zones: BTreeMap::new(), service_instances_zones: BTreeMap::new(), service_instances_sleds: BTreeMap::new(), - generation: Generation::new(), } } @@ -400,14 +396,9 @@ impl DnsConfigBuilder { self.service_backend_zone(ServiceName::Mgd, &zone, mgd_port) } - pub fn generation(&mut self, generation: Generation) { - self.generation = generation; - } - - /// Construct a complete [`DnsConfigParams`] (suitable for propagating to - /// our DNS servers) for the control plane DNS zone described up to this - /// point - pub fn build(self) -> DnsConfigParams { + /// Construct a `DnsConfigZone` describing the control plane zone described + /// up to this point + pub fn build_zone(self) -> DnsConfigZone { // Assemble the set of "AAAA" records for sleds. let sled_records = self.sleds.into_iter().map(|(sled, sled_ip)| { let name = Host::Sled(sled.0).dns_name(); @@ -465,13 +456,18 @@ impl DnsConfigBuilder { .chain(srv_records_zones) .collect(); + DnsConfigZone { zone_name: DNS_ZONE.to_owned(), records: all_records } + } + + /// Construct a complete [`DnsConfigParams`] (suitable for propagating to + /// our DNS servers) for the control plane DNS zone described up to this + /// point + pub fn build_full_config_for_initial_generation(self) -> DnsConfigParams { + let zone = self.build_zone(); DnsConfigParams { - generation: u64::from(self.generation), + generation: u64::from(Generation::new()), time_created: chrono::Utc::now(), - zones: vec![DnsConfigZone { - zone_name: DNS_ZONE.to_owned(), - records: all_records, - }], + zones: vec![zone], } } } @@ -609,7 +605,7 @@ mod test { ("zones_only", builder_zones_only), ("non_trivial", builder_non_trivial), ] { - let config = builder.build(); + let config = builder.build_full_config_for_initial_generation(); assert_eq!(config.generation, 1); assert_eq!(config.zones.len(), 1); assert_eq!(config.zones[0].zone_name, DNS_ZONE); diff --git a/internal-dns/src/resolver.rs b/internal-dns/src/resolver.rs index 114333cb61..f5987df7c6 100644 --- a/internal-dns/src/resolver.rs +++ b/internal-dns/src/resolver.rs @@ -530,7 +530,7 @@ mod test { dns_config .service_backend_zone(ServiceName::Cockroach, &zone, 12345) .unwrap(); - let dns_config = dns_config.build(); + let dns_config = dns_config.build_full_config_for_initial_generation(); dns_server.update(&dns_config).await.unwrap(); let resolver = dns_server.resolver().unwrap(); @@ -608,7 +608,8 @@ mod test { .service_backend_zone(srv_backend, &zone, crucible_addr.port()) .unwrap(); - let mut dns_config = dns_builder.build(); + let mut dns_config = + dns_builder.build_full_config_for_initial_generation(); dns_server.update(&dns_config).await.unwrap(); // Look up Cockroach @@ -687,7 +688,7 @@ mod test { let zone = dns_builder.host_zone(Uuid::new_v4(), ip1).unwrap(); let srv_crdb = ServiceName::Cockroach; dns_builder.service_backend_zone(srv_crdb, &zone, 12345).unwrap(); - let dns_config = dns_builder.build(); + let dns_config = dns_builder.build_full_config_for_initial_generation(); dns_server.update(&dns_config).await.unwrap(); let found_ip = resolver .lookup_ipv6(ServiceName::Cockroach) @@ -702,7 +703,8 @@ mod test { let zone = dns_builder.host_zone(Uuid::new_v4(), ip2).unwrap(); let srv_crdb = ServiceName::Cockroach; dns_builder.service_backend_zone(srv_crdb, &zone, 54321).unwrap(); - let mut dns_config = dns_builder.build(); + let mut dns_config = + dns_builder.build_full_config_for_initial_generation(); dns_config.generation += 1; dns_server.update(&dns_config).await.unwrap(); let found_ip = resolver @@ -836,7 +838,7 @@ mod test { dns_config .service_backend_zone(ServiceName::Nexus, &zone, port) .unwrap(); - let dns_config = dns_config.build(); + let dns_config = dns_config.build_full_config_for_initial_generation(); dns_server.update(&dns_config).await.unwrap(); // Confirm that we can access this record manually. @@ -918,7 +920,7 @@ mod test { dns_config .service_backend_zone(ServiceName::Nexus, &zone, port) .unwrap(); - let dns_config = dns_config.build(); + let dns_config = dns_config.build_full_config_for_initial_generation(); dns_server1.cleanup_successful(); dns_server2.update(&dns_config).await.unwrap(); @@ -967,7 +969,8 @@ mod test { .unwrap(); // Plumb records onto DNS server - let mut dns_config = dns_config.build(); + let mut dns_config = + dns_config.build_full_config_for_initial_generation(); dns_server.update(&dns_config).await.unwrap(); // Using the resolver we should get back both addresses diff --git a/nexus/db-queries/src/db/datastore/dns.rs b/nexus/db-queries/src/db/datastore/dns.rs index b12df1875f..6fe524686d 100644 --- a/nexus/db-queries/src/db/datastore/dns.rs +++ b/nexus/db-queries/src/db/datastore/dns.rs @@ -6,6 +6,7 @@ use super::DataStore; use crate::authz; use crate::context::OpContext; use crate::db; +use crate::db::datastore::SQL_BATCH_SIZE; use crate::db::error::public_error_from_diesel; use crate::db::error::ErrorHandler; use crate::db::model::DnsGroup; @@ -21,6 +22,8 @@ use crate::db::TransactionError; use async_bb8_diesel::AsyncConnection; use async_bb8_diesel::AsyncRunQueryDsl; use diesel::prelude::*; +use futures::future::BoxFuture; +use futures::FutureExt; use nexus_types::internal_api::params::DnsConfigParams; use nexus_types::internal_api::params::DnsConfigZone; use nexus_types::internal_api::params::DnsRecord; @@ -686,6 +689,52 @@ impl DnsVersionUpdateBuilder { } } +/// Extra interfaces that are not intended for use in Nexus, but useful for +/// testing and `omdb` +pub trait DataStoreDnsTest: Send + Sync { + /// Fetch the DNS configuration for a specific group and version + fn dns_config_read_version<'a>( + &'a self, + opctx: &'a OpContext, + dns_group: DnsGroup, + version: omicron_common::api::external::Generation, + ) -> BoxFuture<'_, Result>; +} + +impl DataStoreDnsTest for DataStore { + fn dns_config_read_version<'a>( + &'a self, + opctx: &'a OpContext, + dns_group: DnsGroup, + version: omicron_common::api::external::Generation, + ) -> BoxFuture<'_, Result> { + async move { + use db::schema::dns_version::dsl; + let dns_version = dsl::dns_version + .filter(dsl::dns_group.eq(dns_group)) + .filter(dsl::version.eq(Generation::from(version))) + .select(DnsVersion::as_select()) + .first_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map_err(|e| { + // Technically, we could produce a `NotFound` error here. + // But since this is only for testing, it's okay to produce + // an InternalError. + public_error_from_diesel(e, ErrorHandler::Server) + })?; + + self.dns_config_read_version( + opctx, + &opctx.log, + SQL_BATCH_SIZE, + &dns_version, + ) + .await + } + .boxed() + } +} + #[cfg(test)] mod test { use crate::db::datastore::test_utils::datastore_test; diff --git a/nexus/db-queries/src/db/datastore/mod.rs b/nexus/db-queries/src/db/datastore/mod.rs index 6db65f7173..13d6bfcc8d 100644 --- a/nexus/db-queries/src/db/datastore/mod.rs +++ b/nexus/db-queries/src/db/datastore/mod.rs @@ -101,6 +101,7 @@ mod vpc; mod zpool; pub use address_lot::AddressLotCreateResult; +pub use dns::DataStoreDnsTest; pub use dns::DnsVersionUpdateBuilder; pub use instance::InstanceAndActiveVmm; pub use inventory::DataStoreInventoryTest; diff --git a/nexus/db-queries/src/db/datastore/rack.rs b/nexus/db-queries/src/db/datastore/rack.rs index 94e033ec3c..e753a0cf09 100644 --- a/nexus/db-queries/src/db/datastore/rack.rs +++ b/nexus/db-queries/src/db/datastore/rack.rs @@ -34,8 +34,6 @@ use diesel::prelude::*; use diesel::result::Error as DieselError; use diesel::upsert::excluded; use ipnetwork::IpNetwork; -use nexus_db_model::DnsGroup; -use nexus_db_model::DnsZone; use nexus_db_model::ExternalIp; use nexus_db_model::IncompleteNetworkInterface; use nexus_db_model::InitialDnsGroup; @@ -45,7 +43,6 @@ use nexus_db_model::SiloUserPasswordHash; use nexus_db_model::SledUnderlaySubnetAllocation; use nexus_types::deployment::Blueprint; use nexus_types::deployment::BlueprintTarget; -use nexus_types::deployment::OmicronZoneType; use nexus_types::external_api::params as external_params; use nexus_types::external_api::shared; use nexus_types::external_api::shared::IdentityType; @@ -56,7 +53,6 @@ use nexus_types::internal_api::params as internal_params; use omicron_common::api::external::DataPageParams; use omicron_common::api::external::Error; use omicron_common::api::external::IdentityMetadataCreateParams; -use omicron_common::api::external::InternalContext; use omicron_common::api::external::ListResultVec; use omicron_common::api::external::LookupType; use omicron_common::api::external::ResourceType; @@ -871,52 +867,36 @@ impl DataStore { Ok(()) } - pub async fn nexus_external_addresses( + // TODO once we eliminate the service table, we can eliminate this function + // and the branch in the sole caller + pub async fn nexus_external_addresses_from_service_table( &self, opctx: &OpContext, - blueprint: Option<&Blueprint>, - ) -> Result<(Vec, Vec), Error> { + ) -> Result, Error> { opctx.authorize(authz::Action::Read, &authz::DNS_CONFIG).await?; - let dns_zones = self - .dns_zones_list_all(opctx, DnsGroup::External) - .await - .internal_context("listing DNS zones to list external addresses")?; - - let nexus_external_ips = if let Some(blueprint) = blueprint { - blueprint - .all_omicron_zones() - .filter_map(|(_, z)| match z.zone_type { - OmicronZoneType::Nexus { external_ip, .. } => { - Some(external_ip) - } - _ => None, - }) - .collect() - } else { - use crate::db::schema::external_ip::dsl as extip_dsl; - use crate::db::schema::service::dsl as service_dsl; - - let conn = self.pool_connection_authorized(opctx).await?; - - extip_dsl::external_ip - .inner_join(service_dsl::service.on( - service_dsl::id.eq(extip_dsl::parent_id.assume_not_null()), - )) - .filter(extip_dsl::parent_id.is_not_null()) - .filter(extip_dsl::time_deleted.is_null()) - .filter(extip_dsl::is_service) - .filter(service_dsl::kind.eq(db::model::ServiceKind::Nexus)) - .select(ExternalIp::as_select()) - .get_results_async(&*conn) - .await - .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))? - .into_iter() - .map(|external_ip| external_ip.ip.ip()) - .collect() - }; + use crate::db::schema::external_ip::dsl as extip_dsl; + use crate::db::schema::service::dsl as service_dsl; - Ok((nexus_external_ips, dns_zones)) + let conn = self.pool_connection_authorized(opctx).await?; + + Ok(extip_dsl::external_ip + .inner_join( + service_dsl::service + .on(service_dsl::id + .eq(extip_dsl::parent_id.assume_not_null())), + ) + .filter(extip_dsl::parent_id.is_not_null()) + .filter(extip_dsl::time_deleted.is_null()) + .filter(extip_dsl::is_service) + .filter(service_dsl::kind.eq(db::model::ServiceKind::Nexus)) + .select(ExternalIp::as_select()) + .get_results_async(&*conn) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))? + .into_iter() + .map(|external_ip| external_ip.ip.ip()) + .collect()) } } diff --git a/nexus/reconfigurator/execution/src/dns.rs b/nexus/reconfigurator/execution/src/dns.rs index 7d7e24b6cf..782e673a17 100644 --- a/nexus/reconfigurator/execution/src/dns.rs +++ b/nexus/reconfigurator/execution/src/dns.rs @@ -11,11 +11,10 @@ use dns_service_client::DnsDiff; use internal_dns::DnsConfigBuilder; use internal_dns::ServiceName; use nexus_db_model::DnsGroup; -use nexus_db_model::Silo; use nexus_db_queries::context::OpContext; use nexus_db_queries::db::datastore::Discoverability; use nexus_db_queries::db::datastore::DnsVersionUpdateBuilder; -use nexus_db_queries::db::fixed_data::silo::DEFAULT_SILO_ID; +use nexus_db_queries::db::fixed_data::silo::DEFAULT_SILO; use nexus_db_queries::db::DataStore; use nexus_types::deployment::Blueprint; use nexus_types::deployment::BlueprintZoneFilter; @@ -27,6 +26,8 @@ use nexus_types::internal_api::params::DnsRecord; use omicron_common::api::external::Error; use omicron_common::api::external::Generation; use omicron_common::api::external::InternalContext; +use omicron_common::api::external::Name; +use omicron_common::bail_unless; use slog::{debug, info, o}; use std::collections::BTreeMap; use std::collections::HashMap; @@ -59,7 +60,7 @@ pub(crate) async fn deploy_dns( // we know it's being hit when we exercise this condition. // Next, construct the DNS config represented by the blueprint. - let internal_dns_config_blueprint = + let internal_dns_zone_blueprint = blueprint_internal_dns_config(blueprint, sleds_by_id, overrides) .map_err(|e| { Error::internal_error(&format!( @@ -72,21 +73,30 @@ pub(crate) async fn deploy_dns( .await .internal_context("listing Silos (for configuring external DNS)")? .into_iter() - // We do not generate a DNS name for the "default" Silo. - .filter(|silo| silo.id() != *DEFAULT_SILO_ID) + .map(|silo| silo.name().clone()) .collect::>(); - let (nexus_external_ips, nexus_external_dns_zones) = - datastore.nexus_external_addresses(opctx, Some(blueprint)).await?; - let nexus_external_dns_zone_names = nexus_external_dns_zones + let nexus_external_dns_zone_names = datastore + .dns_zones_list_all(opctx, DnsGroup::External) + .await + .internal_context("listing DNS zones")? .into_iter() .map(|z| z.zone_name) .collect::>(); - let external_dns_config_blueprint = blueprint_external_dns_config( + // Other parts of the system support multiple external DNS zone names. We + // do not here. If we decide to support this in the future, this mechanism + // will need to be updated. + bail_unless!( + nexus_external_dns_zone_names.len() == 1, + "expected exactly one external DNS zone" + ); + // unwrap: we just checked the length. + let external_dns_zone_name = + nexus_external_dns_zone_names.into_iter().next().unwrap(); + let external_dns_zone_blueprint = blueprint_external_dns_config( blueprint, - &nexus_external_ips, &silos, - &nexus_external_dns_zone_names, + external_dns_zone_name, ); // Deploy the changes. @@ -96,7 +106,7 @@ pub(crate) async fn deploy_dns( creator.clone(), blueprint, &internal_dns_config_current, - &internal_dns_config_blueprint, + internal_dns_zone_blueprint, DnsGroup::Internal, ) .await?; @@ -106,7 +116,7 @@ pub(crate) async fn deploy_dns( creator, blueprint, &external_dns_config_current, - &external_dns_config_blueprint, + external_dns_zone_blueprint, DnsGroup::External, ) .await?; @@ -119,13 +129,19 @@ pub(crate) async fn deploy_dns_one( creator: String, blueprint: &Blueprint, dns_config_current: &DnsConfigParams, - dns_config_blueprint: &DnsConfigParams, + dns_zone_blueprint: DnsConfigZone, dns_group: DnsGroup, ) -> Result<(), Error> { let log = opctx .log .new(o!("blueprint_execution" => format!("dns {:?}", dns_group))); + // Other parts of the system support multiple external DNS zones. We do not + // do so here. + let dns_zone_current = dns_config_current + .sole_zone() + .map_err(|e| Error::internal_error(&format!("{:#}", e)))?; + // Looking at the current contents of DNS, prepare an update that will make // it match what it should be. let comment = format!("blueprint {} ({})", blueprint.id, blueprint.comment); @@ -134,8 +150,8 @@ pub(crate) async fn deploy_dns_one( dns_group, comment, creator, - dns_config_current, - dns_config_blueprint, + dns_zone_current, + &dns_zone_blueprint, )?; let Some(update) = maybe_update else { // Nothing to do. @@ -208,6 +224,16 @@ pub(crate) async fn deploy_dns_one( // In both cases, the system will (1) converge to having successfully // executed the target blueprint, and (2) never have rolled any changes back // -- DNS only ever moves forward, closer to the latest desired state. + let blueprint_generation = match dns_group { + DnsGroup::Internal => blueprint.internal_dns_version, + DnsGroup::External => blueprint.external_dns_version, + }; + let dns_config_blueprint = DnsConfigParams { + zones: vec![dns_zone_blueprint], + time_created: chrono::Utc::now(), + generation: u64::from(blueprint_generation.next()), + }; + info!( log, "attempting to update from generation {} to generation {}", @@ -231,7 +257,7 @@ pub fn blueprint_internal_dns_config( blueprint: &Blueprint, sleds_by_id: &BTreeMap, overrides: &Overridables, -) -> Result { +) -> Result { // The DNS names configured here should match what RSS configures for the // same zones. It's tricky to have RSS share the same code because it uses // Sled Agent's _internal_ `OmicronZoneConfig` (and friends), whereas we're @@ -336,44 +362,43 @@ pub fn blueprint_internal_dns_config( .unwrap(); } - // We set the generation number for the internal DNS to be newer than - // whatever it was when this blueprint was generated. This will only be - // used if the generated DNS contents are different from what's current. - dns_builder.generation(blueprint.internal_dns_version.next()); - Ok(dns_builder.build()) + Ok(dns_builder.build_zone()) } pub fn blueprint_external_dns_config( blueprint: &Blueprint, - nexus_external_ips: &[IpAddr], - silos: &[Silo], - external_dns_zone_names: &[String], -) -> DnsConfigParams { + silos: &[Name], + external_dns_zone_name: String, +) -> DnsConfigZone { + let nexus_external_ips = blueprint_nexus_external_ips(blueprint); + let dns_records: Vec = nexus_external_ips .into_iter() .map(|addr| match addr { - IpAddr::V4(addr) => DnsRecord::A(*addr), - IpAddr::V6(addr) => DnsRecord::Aaaa(*addr), + IpAddr::V4(addr) => DnsRecord::A(addr), + IpAddr::V6(addr) => DnsRecord::Aaaa(addr), }) .collect(); let records = silos .into_iter() - .map(|silo| (silo_dns_name(&silo.name()), dns_records.clone())) - .collect::>>(); - - let zones = external_dns_zone_names - .into_iter() - .map(|zone_name| DnsConfigZone { - zone_name: zone_name.to_owned(), - records: records.clone(), + // We do not generate a DNS name for the "default" Silo. + // + // We use the name here rather than the id. It shouldn't really matter + // since every system will have this silo and so no other Silo could + // have this name. But callers (particularly the test suite and + // reconfigurator-cli) specify silos by name, not id, so if we used the + // id here then they'd have to apply this filter themselves (and this + // abstraction, such as it is, would be leakier). + .filter_map(|silo_name| { + (silo_name != DEFAULT_SILO.name()) + .then(|| (silo_dns_name(&silo_name), dns_records.clone())) }) - .collect(); + .collect::>>(); - DnsConfigParams { - generation: u64::from(blueprint.external_dns_version.next()), - time_created: chrono::Utc::now(), - zones, + DnsConfigZone { + zone_name: external_dns_zone_name, + records: records.clone(), } } @@ -382,12 +407,12 @@ fn dns_compute_update( dns_group: DnsGroup, comment: String, creator: String, - current_config: &DnsConfigParams, - new_config: &DnsConfigParams, + current_zone: &DnsConfigZone, + new_zone: &DnsConfigZone, ) -> Result, Error> { let mut update = DnsVersionUpdateBuilder::new(dns_group, comment, creator); - let diff = DnsDiff::new(¤t_config, &new_config) + let diff = DnsDiff::new(¤t_zone, &new_zone) .map_err(|e| Error::internal_error(&format!("{:#}", e)))?; if diff.is_empty() { info!(log, "no changes"); @@ -446,6 +471,17 @@ pub fn silo_dns_name(name: &omicron_common::api::external::Name) -> String { format!("{}.sys", name) } +/// Return the Nexus external addresses according to the given blueprint +pub fn blueprint_nexus_external_ips(blueprint: &Blueprint) -> Vec { + blueprint + .all_omicron_zones() + .filter_map(|(_, z)| match z.zone_type { + OmicronZoneType::Nexus { external_ip, .. } => Some(external_ip), + _ => None, + }) + .collect() +} + #[cfg(test)] mod test { use super::*; @@ -548,7 +584,7 @@ mod test { &Default::default(), ) .unwrap(); - assert!(blueprint_dns.sole_zone().unwrap().records.is_empty()); + assert!(blueprint_dns.records.is_empty()); } /// test blueprint_dns_config(): exercise various different conditions @@ -647,17 +683,12 @@ mod test { }) .collect(); - let dns_config_blueprint = blueprint_internal_dns_config( + let blueprint_dns_zone = blueprint_internal_dns_config( &blueprint, &sleds_by_id, &Default::default(), ) .unwrap(); - assert_eq!( - dns_config_blueprint.generation, - u64::from(initial_dns_generation.next()) - ); - let blueprint_dns_zone = dns_config_blueprint.sole_zone().unwrap(); assert_eq!(blueprint_dns_zone.zone_name, DNS_ZONE); // Now, verify a few different properties about the generated DNS @@ -858,84 +889,24 @@ mod test { }) .unwrap(); - let nexus_external_ips: Vec<_> = blueprint - .all_omicron_zones() - .filter_map(|(_, z)| match &z.zone_type { - OmicronZoneType::Nexus { external_ip, .. } => { - Some(*external_ip) - } - _ => None, - }) - .collect(); - // It shouldn't ever be possible to have no Silos at all, but at least // make sure we don't panic. - let external_dns_config = blueprint_external_dns_config( - &blueprint, - &nexus_external_ips, - &[], - &[String::from("oxide.test")], - ); - assert_eq!( - external_dns_config.generation, - u64::from(initial_external_dns_generation.next()) - ); - assert_eq!(external_dns_config.zones.len(), 1); - assert_eq!(external_dns_config.zones[0].zone_name, "oxide.test"); - assert!(external_dns_config.zones[0].records.is_empty()); - - // Same with external DNS zones. - let external_dns_config = blueprint_external_dns_config( + let external_dns_zone = blueprint_external_dns_config( &blueprint, - &nexus_external_ips, - std::slice::from_ref(&my_silo), &[], + String::from("oxide.test"), ); - assert_eq!( - external_dns_config.generation, - u64::from(initial_external_dns_generation.next()) - ); - assert!(external_dns_config.zones.is_empty()); - - // Same with external IPs. - let external_dns_config = blueprint_external_dns_config( - &blueprint, - &[], - std::slice::from_ref(&my_silo), - &[String::from("oxide.test")], - ); - assert_eq!( - external_dns_config.generation, - u64::from(initial_external_dns_generation.next()) - ); + assert_eq!(external_dns_zone.zone_name, "oxide.test"); + assert!(external_dns_zone.records.is_empty()); - // Now check a more typical case. (Although we wouldn't normally have - // more than one external DNS zone, it's a more general case and pretty - // easy to test.) - let external_dns_config = blueprint_external_dns_config( + // Now check a more typical case. + let external_dns_zone = blueprint_external_dns_config( &blueprint, - &nexus_external_ips, - std::slice::from_ref(&my_silo), - &[String::from("oxide1.test"), String::from("oxide2.test")], - ); - assert_eq!( - external_dns_config.generation, - u64::from(initial_external_dns_generation.next()) - ); - assert_eq!(external_dns_config.zones.len(), 2); - assert_eq!( - external_dns_config.zones[0].records, - external_dns_config.zones[1].records - ); - assert_eq!( - external_dns_config.zones[0].zone_name, - String::from("oxide1.test"), - ); - assert_eq!( - external_dns_config.zones[1].zone_name, - String::from("oxide2.test"), + std::slice::from_ref(my_silo.name()), + String::from("oxide.test"), ); - let records = &external_dns_config.zones[0].records; + assert_eq!(external_dns_zone.zone_name, String::from("oxide.test")); + let records = &external_dns_zone.records; assert_eq!(records.len(), 1); let silo_records = records .get(&silo_dns_name(my_silo.name())) @@ -972,14 +943,14 @@ mod test { // Start with an empty DNS config. There's no database update needed // when updating the DNS config to itself. - let dns_empty = dns_config_empty(); + let dns_empty = &dns_config_empty().zones[0]; match dns_compute_update( &logctx.log, DnsGroup::Internal, "test-suite".to_string(), "test-suite".to_string(), - &dns_empty, - &dns_empty, + dns_empty, + dns_empty, ) { Ok(None) => (), Err(error) => { @@ -991,40 +962,26 @@ mod test { // Now let's do something a little less trivial. Set up two slightly // different DNS configurations, compute the database update, and make // sure it matches what we expect. - let dns_config1 = DnsConfigParams { - generation: 4, - time_created: chrono::Utc::now(), - zones: vec![DnsConfigZone { - zone_name: "my-zone".to_string(), - records: HashMap::from([ - ( - "ex1".to_string(), - vec![DnsRecord::A(Ipv4Addr::LOCALHOST)], - ), - ( - "ex2".to_string(), - vec![DnsRecord::A("192.168.1.3".parse().unwrap())], - ), - ]), - }], + let dns_zone1 = DnsConfigZone { + zone_name: "my-zone".to_string(), + records: HashMap::from([ + ("ex1".to_string(), vec![DnsRecord::A(Ipv4Addr::LOCALHOST)]), + ( + "ex2".to_string(), + vec![DnsRecord::A("192.168.1.3".parse().unwrap())], + ), + ]), }; - let dns_config2 = DnsConfigParams { - generation: 4, - time_created: chrono::Utc::now(), - zones: vec![DnsConfigZone { - zone_name: "my-zone".to_string(), - records: HashMap::from([ - ( - "ex2".to_string(), - vec![DnsRecord::A("192.168.1.4".parse().unwrap())], - ), - ( - "ex3".to_string(), - vec![DnsRecord::A(Ipv4Addr::LOCALHOST)], - ), - ]), - }], + let dns_zone2 = DnsConfigZone { + zone_name: "my-zone".to_string(), + records: HashMap::from([ + ( + "ex2".to_string(), + vec![DnsRecord::A("192.168.1.4".parse().unwrap())], + ), + ("ex3".to_string(), vec![DnsRecord::A(Ipv4Addr::LOCALHOST)]), + ]), }; let update = dns_compute_update( @@ -1032,8 +989,8 @@ mod test { DnsGroup::Internal, "test-suite".to_string(), "test-suite".to_string(), - &dns_config1, - &dns_config2, + &dns_zone1, + &dns_zone2, ) .expect("failed to compute update") .expect("unexpectedly produced no update"); @@ -1056,8 +1013,8 @@ mod test { ); // Test the difference between two configs whose SRV records differ. - let mut dns_config1 = dns_config1.clone(); - dns_config1.zones[0].records.insert( + let mut dns_zone1 = dns_zone1.clone(); + dns_zone1.records.insert( String::from("_nexus._tcp"), vec![ DnsRecord::Srv(Srv { @@ -1075,40 +1032,36 @@ mod test { ], ); // A clone of the same one should of course be the same as the original. - let mut dns_config2 = dns_config1.clone(); + let mut dns_zone2 = dns_zone1.clone(); let update = dns_compute_update( &logctx.log, DnsGroup::Internal, "test-suite".to_string(), "test-suite".to_string(), - &dns_config1, - &dns_config2, + &dns_zone1, + &dns_zone2, ) .expect("failed to compute update"); assert!(update.is_none()); // If we shift the order of the items, it should still reflect no // changes. - let records = - dns_config2.zones[0].records.get_mut("_nexus._tcp").unwrap(); + let records = dns_zone2.records.get_mut("_nexus._tcp").unwrap(); records.rotate_left(1); - assert!( - records != dns_config1.zones[0].records.get("_nexus._tcp").unwrap() - ); + assert!(records != dns_zone1.records.get("_nexus._tcp").unwrap()); let update = dns_compute_update( &logctx.log, DnsGroup::Internal, "test-suite".to_string(), "test-suite".to_string(), - &dns_config1, - &dns_config2, + &dns_zone1, + &dns_zone2, ) .expect("failed to compute update"); assert!(update.is_none()); // If we add another record, there should indeed be a new update. - let records = - dns_config2.zones[0].records.get_mut("_nexus._tcp").unwrap(); + let records = dns_zone2.records.get_mut("_nexus._tcp").unwrap(); records.push(DnsRecord::Srv(Srv { port: 123, prio: 1, @@ -1122,8 +1075,8 @@ mod test { DnsGroup::Internal, "test-suite".to_string(), "test-suite".to_string(), - &dns_config1, - &dns_config2, + &dns_zone1, + &dns_zone2, ) .expect("failed to compute update") .expect("expected an update"); @@ -1140,6 +1093,15 @@ mod test { logctx.cleanup_successful(); } + fn diff_sole_zones<'a>( + left: &'a DnsConfigParams, + right: &'a DnsConfigParams, + ) -> DnsDiff<'a> { + let left_zone = left.sole_zone().unwrap(); + let right_zone = right.sole_zone().unwrap(); + DnsDiff::new(left_zone, right_zone).unwrap() + } + // Tests end-to-end DNS behavior: // // - If we create a blueprint matching the current system, and then apply @@ -1328,8 +1290,7 @@ mod test { dns_initial_internal.generation + 1, ); - let diff = - DnsDiff::new(&dns_initial_internal, &dns_latest_internal).unwrap(); + let diff = diff_sole_zones(&dns_initial_internal, &dns_latest_internal); // There should be one new AAAA record for the zone itself. let new_records: Vec<_> = diff.names_added().collect(); let (new_name, &[DnsRecord::Aaaa(_)]) = new_records[0] else { @@ -1368,7 +1329,7 @@ mod test { dns_previous_external.generation + 1, ); let diff = - DnsDiff::new(&dns_previous_external, &dns_latest_external).unwrap(); + diff_sole_zones(&dns_previous_external, &dns_latest_external); assert!(diff.names_added().next().is_none()); assert!(diff.names_removed().next().is_none()); let changed: Vec<_> = diff.names_changed().collect(); @@ -1494,7 +1455,7 @@ mod test { assert_eq!(old_external.generation + 1, dns_latest_external.generation); // Specifically, there should be one new name (for the new Silo). - let diff = DnsDiff::new(&old_external, &dns_latest_external).unwrap(); + let diff = diff_sole_zones(&old_external, &dns_latest_external); assert!(diff.names_removed().next().is_none()); assert!(diff.names_changed().next().is_none()); let added = diff.names_added().collect::>(); diff --git a/nexus/reconfigurator/execution/src/lib.rs b/nexus/reconfigurator/execution/src/lib.rs index 1373c9a31f..30b1ab0ce6 100644 --- a/nexus/reconfigurator/execution/src/lib.rs +++ b/nexus/reconfigurator/execution/src/lib.rs @@ -20,22 +20,33 @@ use std::collections::BTreeMap; use std::net::SocketAddrV6; use uuid::Uuid; -pub use dns::silo_dns_name; - mod datasets; mod dns; mod omicron_zones; mod overridables; mod resource_allocation; -struct Sled { +pub use dns::blueprint_external_dns_config; +pub use dns::blueprint_internal_dns_config; +pub use dns::blueprint_nexus_external_ips; +pub use dns::silo_dns_name; + +pub struct Sled { id: Uuid, sled_agent_address: SocketAddrV6, is_scrimlet: bool, } impl Sled { - pub fn subnet(&self) -> Ipv6Subnet { + pub fn new( + id: Uuid, + sled_agent_address: SocketAddrV6, + is_scrimlet: bool, + ) -> Sled { + Sled { id, sled_agent_address, is_scrimlet } + } + + pub(crate) fn subnet(&self) -> Ipv6Subnet { Ipv6Subnet::::new(*self.sled_agent_address.ip()) } } diff --git a/nexus/src/app/silo.rs b/nexus/src/app/silo.rs index 487af96aab..d07dc7013a 100644 --- a/nexus/src/app/silo.rs +++ b/nexus/src/app/silo.rs @@ -16,6 +16,7 @@ use nexus_db_queries::db::identity::{Asset, Resource}; use nexus_db_queries::db::lookup::LookupPath; use nexus_db_queries::db::{self, lookup}; use nexus_db_queries::{authn, authz}; +use nexus_reconfigurator_execution::blueprint_nexus_external_ips; use nexus_reconfigurator_execution::silo_dns_name; use nexus_types::internal_api::params::DnsRecord; use omicron_common::api::external::http_pagination::PaginatedBy; @@ -96,13 +97,22 @@ impl super::Nexus { // Set up an external DNS name for this Silo's API and console // endpoints (which are the same endpoint). + let nexus_external_dns_zones = datastore + .dns_zones_list_all(nexus_opctx, DnsGroup::External) + .await + .internal_context("listing external DNS zones")?; let target_blueprint = datastore .blueprint_target_get_current_full(opctx) .await .internal_context("loading target blueprint")?; - let target = target_blueprint.as_ref().map(|(_, blueprint)| blueprint); - let (nexus_external_ips, nexus_external_dns_zones) = - datastore.nexus_external_addresses(nexus_opctx, target).await?; + let nexus_external_ips = match target_blueprint { + Some((_, blueprint)) => blueprint_nexus_external_ips(&blueprint), + None => { + datastore + .nexus_external_addresses_from_service_table(nexus_opctx) + .await? + } + }; let dns_records: Vec = nexus_external_ips .into_iter() .map(|addr| match addr { diff --git a/nexus/test-utils/src/lib.rs b/nexus/test-utils/src/lib.rs index e5616a4641..76ef600fbb 100644 --- a/nexus/test-utils/src/lib.rs +++ b/nexus/test-utils/src/lib.rs @@ -749,8 +749,11 @@ impl<'a, N: NexusServer> ControlPlaneTestContextBuilder<'a, N> { log.clone(), ); - let dns_config = - self.rack_init_builder.internal_dns_config.clone().build(); + let dns_config = self + .rack_init_builder + .internal_dns_config + .clone() + .build_full_config_for_initial_generation(); slog::info!(log, "DNS population: {:#?}", dns_config); dns_config_client.dns_config_put(&dns_config).await.expect( diff --git a/nexus/types/src/deployment.rs b/nexus/types/src/deployment.rs index b435964b53..22eb6b7dbc 100644 --- a/nexus/types/src/deployment.rs +++ b/nexus/types/src/deployment.rs @@ -14,6 +14,7 @@ use crate::external_api::views::SledPolicy; use crate::external_api::views::SledState; +use crate::internal_api::params::DnsConfigParams; use crate::inventory::Collection; pub use crate::inventory::OmicronZoneConfig; pub use crate::inventory::OmicronZoneDataset; @@ -926,4 +927,8 @@ pub struct UnstableReconfiguratorState { pub policy: Policy, pub collections: Vec, pub blueprints: Vec, + pub internal_dns: BTreeMap, + pub external_dns: BTreeMap, + pub silo_names: Vec, + pub external_dns_zone_names: Vec, } diff --git a/sled-agent/src/fakes/nexus.rs b/sled-agent/src/fakes/nexus.rs index de37b77bcd..719f08888a 100644 --- a/sled-agent/src/fakes/nexus.rs +++ b/sled-agent/src/fakes/nexus.rs @@ -187,7 +187,8 @@ pub async fn start_dns_server( nexus_addr.port(), ) .expect("failed to set up DNS"); - let dns_config = dns_config_builder.build(); + let dns_config = + dns_config_builder.build_full_config_for_initial_generation(); dns.initialize_with_config(log, &dns_config).await.unwrap(); dns } diff --git a/sled-agent/src/instance.rs b/sled-agent/src/instance.rs index 7b76466964..b859c08a94 100644 --- a/sled-agent/src/instance.rs +++ b/sled-agent/src/instance.rs @@ -1685,7 +1685,7 @@ mod tests { nexus_server.local_addr().port(), ) .unwrap(); - let dns_config = dns_config.build(); + let dns_config = dns_config.build_full_config_for_initial_generation(); dns_dropshot_client.dns_config_put(&dns_config).await.unwrap(); let resolver = Arc::new( diff --git a/sled-agent/src/rack_setup/plan/service.rs b/sled-agent/src/rack_setup/plan/service.rs index dd6936132b..153031a545 100644 --- a/sled-agent/src/rack_setup/plan/service.rs +++ b/sled-agent/src/rack_setup/plan/service.rs @@ -690,7 +690,7 @@ impl Plan { .map(|sled_info| (sled_info.sled_address, sled_info.request)) .collect(); - let dns_config = dns_builder.build(); + let dns_config = dns_builder.build_full_config_for_initial_generation(); Ok(Self { services, dns_config }) } diff --git a/sled-agent/src/sim/server.rs b/sled-agent/src/sim/server.rs index fea6b738a6..dc770d179d 100644 --- a/sled-agent/src/sim/server.rs +++ b/sled-agent/src/sim/server.rs @@ -341,7 +341,8 @@ pub async fn run_standalone_server( .expect("failed to set up DNS"); // Initialize the internal DNS entries - let dns_config = dns_config_builder.build(); + let dns_config = + dns_config_builder.build_full_config_for_initial_generation(); dns.initialize_with_config(&log, &dns_config).await?; let internal_dns_version = Generation::try_from(dns_config.generation) .expect("invalid internal dns version"); From 7484017b49024ef9a3ccaacfd3f31ac451866e8e Mon Sep 17 00:00:00 2001 From: Rain Date: Thu, 28 Mar 2024 15:24:47 -0700 Subject: [PATCH 010/334] [nexus] move UuidRng code out to a common crate, make CollectionBuilder use it (#5341) In #5270, we need determinism not just from blueprints but also collections. So move the UuidRng into a common place. As part of that, I also decided to make it its own crate and write some documentation about it, making it more generic along the way. I think this should be a pretty clean representation of what this is trying to do. --- Cargo.lock | 14 +- Cargo.toml | 4 + nexus/inventory/Cargo.toml | 1 + nexus/inventory/src/builder.rs | 19 +- nexus/reconfigurator/planning/Cargo.toml | 2 +- .../planning/src/blueprint_builder.rs | 61 +--- nexus/reconfigurator/planning/src/example.rs | 6 +- nexus/reconfigurator/planning/src/planner.rs | 2 +- typed-rng/Cargo.toml | 11 + typed-rng/src/lib.rs | 260 ++++++++++++++++++ 10 files changed, 326 insertions(+), 54 deletions(-) create mode 100644 typed-rng/Cargo.toml create mode 100644 typed-rng/src/lib.rs diff --git a/Cargo.lock b/Cargo.lock index eca65905da..63dc1cc735 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4717,6 +4717,7 @@ dependencies = [ "strum 0.26.1", "thiserror", "tokio", + "typed-rng", "uuid 1.7.0", ] @@ -4827,10 +4828,10 @@ dependencies = [ "omicron-test-utils", "omicron-workspace-hack", "rand 0.8.5", - "rand_seeder", "sled-agent-client", "slog", "thiserror", + "typed-rng", "uuid 1.7.0", ] @@ -10297,6 +10298,17 @@ version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4a90726108dab678edab76459751e1cc7c597c3484a6384d6423191255fa641b" +[[package]] +name = "typed-rng" +version = "0.1.0" +dependencies = [ + "omicron-workspace-hack", + "rand 0.8.5", + "rand_core 0.6.4", + "rand_seeder", + "uuid 1.7.0", +] + [[package]] name = "typenum" version = "1.16.0" diff --git a/Cargo.toml b/Cargo.toml index 9cfb3ed283..0d91aa076b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -70,6 +70,7 @@ members = [ "test-utils", "tufaceous-lib", "tufaceous", + "typed-rng", "update-common", "update-engine", "uuid-kinds", @@ -149,6 +150,7 @@ default-members = [ "test-utils", "tufaceous-lib", "tufaceous", + "typed-rng", "update-common", "update-engine", "uuid-kinds", @@ -343,6 +345,7 @@ propolis-mock-server = { git = "https://github.com/oxidecomputer/propolis", rev proptest = "1.4.0" quote = "1.0" rand = "0.8.5" +rand_core = "0.6.4" rand_seeder = "0.2.3" ratatui = "0.26.1" rayon = "1.9" @@ -434,6 +437,7 @@ trybuild = "1.0.89" tufaceous = { path = "tufaceous" } tufaceous-lib = { path = "tufaceous-lib" } tui-tree-widget = "0.17.0" +typed-rng = { path = "typed-rng" } unicode-width = "0.1.11" update-common = { path = "update-common" } update-engine = { path = "update-engine" } diff --git a/nexus/inventory/Cargo.toml b/nexus/inventory/Cargo.toml index 1c20e8f8b6..43041ab146 100644 --- a/nexus/inventory/Cargo.toml +++ b/nexus/inventory/Cargo.toml @@ -19,6 +19,7 @@ sled-agent-client.workspace = true slog.workspace = true strum.workspace = true thiserror.workspace = true +typed-rng.workspace = true uuid.workspace = true omicron-workspace-hack.workspace = true diff --git a/nexus/inventory/src/builder.rs b/nexus/inventory/src/builder.rs index 2e482fcebf..0506e8286a 100644 --- a/nexus/inventory/src/builder.rs +++ b/nexus/inventory/src/builder.rs @@ -29,8 +29,10 @@ use nexus_types::inventory::SledAgent; use nexus_types::inventory::Zpool; use std::collections::BTreeMap; use std::collections::BTreeSet; +use std::hash::Hash; use std::sync::Arc; use thiserror::Error; +use typed_rng::UuidRng; use uuid::Uuid; /// Describes an operational error encountered during the collection process @@ -86,6 +88,8 @@ pub struct CollectionBuilder { BTreeMap, RotPageFound>>, sleds: BTreeMap, omicron_zones: BTreeMap, + // We just generate one UUID for each collection. + id_rng: UuidRng, } impl CollectionBuilder { @@ -111,6 +115,7 @@ impl CollectionBuilder { rot_pages_found: BTreeMap::new(), sleds: BTreeMap::new(), omicron_zones: BTreeMap::new(), + id_rng: UuidRng::from_entropy(), } } @@ -123,7 +128,7 @@ impl CollectionBuilder { } Collection { - id: Uuid::new_v4(), + id: self.id_rng.next(), errors: self.errors.into_iter().map(|e| e.to_string()).collect(), time_started: self.time_started, time_done: now_db_precision(), @@ -140,6 +145,18 @@ impl CollectionBuilder { } } + /// Within tests, set a seeded RNG for deterministic results. + /// + /// This will ensure that tests that use this builder will produce the same + /// results each time they are run. + pub fn set_rng_seed(&mut self, seed: H) -> &mut Self { + // Important to add some more bytes here, so that builders with the + // same seed but different purposes don't end up with the same UUIDs. + const SEED_EXTRA: &str = "collection-builder"; + self.id_rng.set_seed(seed, SEED_EXTRA); + self + } + /// Record service processor state `sp_state` reported by MGS /// /// `sp_type` and `slot` identify which SP this was. diff --git a/nexus/reconfigurator/planning/Cargo.toml b/nexus/reconfigurator/planning/Cargo.toml index 3c4ba12ee4..cb55d9aa7c 100644 --- a/nexus/reconfigurator/planning/Cargo.toml +++ b/nexus/reconfigurator/planning/Cargo.toml @@ -16,10 +16,10 @@ nexus-inventory.workspace = true nexus-types.workspace = true omicron-common.workspace = true rand.workspace = true -rand_seeder.workspace = true sled-agent-client.workspace = true slog.workspace = true thiserror.workspace = true +typed-rng.workspace = true uuid.workspace = true omicron-workspace-hack.workspace = true diff --git a/nexus/reconfigurator/planning/src/blueprint_builder.rs b/nexus/reconfigurator/planning/src/blueprint_builder.rs index 0b0d422916..ab40f3bbb7 100644 --- a/nexus/reconfigurator/planning/src/blueprint_builder.rs +++ b/nexus/reconfigurator/planning/src/blueprint_builder.rs @@ -38,7 +38,6 @@ use omicron_common::api::external::Vni; use omicron_common::api::internal::shared::NetworkInterface; use omicron_common::api::internal::shared::NetworkInterfaceKind; use rand::rngs::StdRng; -use rand::RngCore; use rand::SeedableRng; use slog::o; use slog::Logger; @@ -50,6 +49,7 @@ use std::net::Ipv4Addr; use std::net::Ipv6Addr; use std::net::SocketAddrV6; use thiserror::Error; +use typed_rng::UuidRng; use uuid::Uuid; /// Errors encountered while assembling blueprints @@ -223,7 +223,7 @@ impl<'a> BlueprintBuilder<'a> { }) .collect::>()?; Ok(Blueprint { - id: rng.blueprint_rng.next_uuid(), + id: rng.blueprint_rng.next(), blueprint_zones, parent_blueprint_id: None, internal_dns_version, @@ -375,7 +375,7 @@ impl<'a> BlueprintBuilder<'a> { let blueprint_zones = self.zones.into_zones_map(self.policy.sleds.keys().copied()); Blueprint { - id: self.rng.blueprint_rng.next_uuid(), + id: self.rng.blueprint_rng.next(), blueprint_zones, parent_blueprint_id: Some(self.parent_blueprint.id), internal_dns_version: self.internal_dns_version, @@ -452,7 +452,7 @@ impl<'a> BlueprintBuilder<'a> { .collect(); let zone = OmicronZoneConfig { - id: self.rng.zone_rng.next_uuid(), + id: self.rng.zone_rng.next(), underlay_address: ip, zone_type: OmicronZoneType::InternalNtp { address: ntp_address.to_string(), @@ -502,7 +502,7 @@ impl<'a> BlueprintBuilder<'a> { let port = omicron_common::address::CRUCIBLE_PORT; let address = SocketAddrV6::new(ip, port, 0, 0).to_string(); let zone = OmicronZoneConfig { - id: self.rng.zone_rng.next_uuid(), + id: self.rng.zone_rng.next(), underlay_address: ip, zone_type: OmicronZoneType::Crucible { address, @@ -589,7 +589,7 @@ impl<'a> BlueprintBuilder<'a> { }; for _ in 0..num_nexus_to_add { - let nexus_id = self.rng.zone_rng.next_uuid(); + let nexus_id = self.rng.zone_rng.next(); let external_ip = self .available_external_ips .next() @@ -617,7 +617,7 @@ impl<'a> BlueprintBuilder<'a> { .next() .ok_or(Error::NoSystemMacAddressAvailable)?; NetworkInterface { - id: self.rng.network_interface_rng.next_uuid(), + id: self.rng.network_interface_rng.next(), kind: NetworkInterfaceKind::Service { id: nexus_id }, name: format!("nexus-{nexus_id}").parse().unwrap(), ip, @@ -739,14 +739,14 @@ struct BlueprintBuilderRng { impl BlueprintBuilderRng { fn new() -> Self { - Self::new_from_rng(StdRng::from_entropy()) + Self::new_from_parent(StdRng::from_entropy()) } - fn new_from_rng(mut root_rng: StdRng) -> Self { - let blueprint_rng = UuidRng::from_root_rng(&mut root_rng, "blueprint"); - let zone_rng = UuidRng::from_root_rng(&mut root_rng, "zone"); + fn new_from_parent(mut parent: StdRng) -> Self { + let blueprint_rng = UuidRng::from_parent_rng(&mut parent, "blueprint"); + let zone_rng = UuidRng::from_parent_rng(&mut parent, "zone"); let network_interface_rng = - UuidRng::from_root_rng(&mut root_rng, "network_interface"); + UuidRng::from_parent_rng(&mut parent, "network_interface"); BlueprintBuilderRng { blueprint_rng, zone_rng, network_interface_rng } } @@ -755,40 +755,7 @@ impl BlueprintBuilderRng { // Important to add some more bytes here, so that builders with the // same seed but different purposes don't end up with the same UUIDs. const SEED_EXTRA: &str = "blueprint-builder"; - let mut seeder = rand_seeder::Seeder::from((seed, SEED_EXTRA)); - *self = Self::new_from_rng(seeder.make_rng::()); - } -} - -#[derive(Debug)] -pub(crate) struct UuidRng { - rng: StdRng, -} - -impl UuidRng { - /// Returns a new `UuidRng` generated from the root RNG. - /// - /// `extra` is a string that should be unique to the purpose of the UUIDs. - fn from_root_rng(root_rng: &mut StdRng, extra: &'static str) -> Self { - let seed = root_rng.next_u64(); - let mut seeder = rand_seeder::Seeder::from((seed, extra)); - Self { rng: seeder.make_rng::() } - } - - /// `extra` is a string that should be unique to the purpose of the UUIDs. - pub(crate) fn from_seed(seed: H, extra: &'static str) -> Self { - let mut seeder = rand_seeder::Seeder::from((seed, extra)); - Self { rng: seeder.make_rng::() } - } - - /// Returns a new UUIDv4 generated from the RNG. - pub(crate) fn next_uuid(&mut self) -> Uuid { - let mut bytes = [0; 16]; - self.rng.fill_bytes(&mut bytes); - // Builder::from_random_bytes will turn the random bytes into a valid - // UUIDv4. (Parts of the system depend on the UUID actually being valid - // v4, so it's important that we don't just use `uuid::from_bytes`.) - uuid::Builder::from_random_bytes(bytes).into_uuid() + *self = Self::new_from_parent(typed_rng::from_seed(seed, SEED_EXTRA)); } } @@ -1013,7 +980,7 @@ pub mod test { assert_eq!(diff.sleds_changed().count(), 0); // The next step is adding these zones to a new sled. - let new_sled_id = example.sled_rng.next_uuid(); + let new_sled_id = example.sled_rng.next(); let _ = example.system.sled(SledBuilder::new().id(new_sled_id)).unwrap(); let policy = example.system.to_policy().unwrap(); diff --git a/nexus/reconfigurator/planning/src/example.rs b/nexus/reconfigurator/planning/src/example.rs index 23df35e9ae..a18e3b71cf 100644 --- a/nexus/reconfigurator/planning/src/example.rs +++ b/nexus/reconfigurator/planning/src/example.rs @@ -5,7 +5,6 @@ //! Example blueprints use crate::blueprint_builder::BlueprintBuilder; -use crate::blueprint_builder::UuidRng; use crate::system::SledBuilder; use crate::system::SystemDescription; use nexus_types::deployment::Blueprint; @@ -14,6 +13,7 @@ use nexus_types::deployment::Policy; use nexus_types::inventory::Collection; use omicron_common::api::external::Generation; use sled_agent_client::types::OmicronZonesConfig; +use typed_rng::UuidRng; pub struct ExampleSystem { pub system: SystemDescription, @@ -38,8 +38,7 @@ impl ExampleSystem { ) -> ExampleSystem { let mut system = SystemDescription::new(); let mut sled_rng = UuidRng::from_seed(test_name, "ExampleSystem"); - let sled_ids: Vec<_> = - (0..nsleds).map(|_| sled_rng.next_uuid()).collect(); + let sled_ids: Vec<_> = (0..nsleds).map(|_| sled_rng.next()).collect(); for sled_id in &sled_ids { let _ = system.sled(SledBuilder::new().id(*sled_id)).unwrap(); } @@ -107,6 +106,7 @@ impl ExampleSystem { let blueprint = builder.build(); let mut builder = system.to_collection_builder().expect("failed to build collection"); + builder.set_rng_seed((test_name, "ExampleSystem collection")); for sled_id in blueprint.sleds() { let Some(zones) = blueprint.blueprint_zones.get(&sled_id) else { diff --git a/nexus/reconfigurator/planning/src/planner.rs b/nexus/reconfigurator/planning/src/planner.rs index 60eef225d3..84360aded9 100644 --- a/nexus/reconfigurator/planning/src/planner.rs +++ b/nexus/reconfigurator/planning/src/planner.rs @@ -402,7 +402,7 @@ mod test { verify_blueprint(&blueprint2); // Now add a new sled. - let new_sled_id = example.sled_rng.next_uuid(); + let new_sled_id = example.sled_rng.next(); let _ = example.system.sled(SledBuilder::new().id(new_sled_id)).unwrap(); let policy = example.system.to_policy().unwrap(); diff --git a/typed-rng/Cargo.toml b/typed-rng/Cargo.toml new file mode 100644 index 0000000000..b02a6b974a --- /dev/null +++ b/typed-rng/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "typed-rng" +version = "0.1.0" +edition = "2021" + +[dependencies] +omicron-workspace-hack.workspace = true +rand.workspace = true +rand_core.workspace = true +rand_seeder.workspace = true +uuid.workspace = true diff --git a/typed-rng/src/lib.rs b/typed-rng/src/lib.rs new file mode 100644 index 0000000000..5d5e4b1665 --- /dev/null +++ b/typed-rng/src/lib.rs @@ -0,0 +1,260 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Typed RNGs with support for tree-based RNGs. +//! +//! ## [`TypedRng`] +//! +//! This library contains [`TypedRng`], a simple wrapper around a random number +//! generator that generates values of a particular type. +//! +//! At the moment, it only supports stateless value creation, where the +//! `Generatable::generate` method does not have access to anything other than +//! the RNG state. It may be extended in the future with the capability to pass +//! in persistent state. +//! +//! ### Tree-based RNGs +//! +//! Many RNG models are organized in a tree structure, where a parent RNG +//! generates a child RNG. The main benefit to this kind of structure is +//! stability of output: because the different child RNGs are all independent +//! of each other, making more calls to one RNG will not affect any others. +//! +//! The `TypedRng` struct provides a method to generate a new RNG from a parent +//! RNG and a seed. This is useful when you want to generate a new RNG that is +//! independent of the parent RNG, but still deterministic. +//! +//! ### Comparison with property-based testing +//! +//! In a sense, this is a very simple version of how random values are +//! generated with property-based testing, with e.g. `Arbitrary` or proptest's +//! `Strategy`. +//! +//! But with property-based tests, the goal is for small changes in the random +//! input to result in _small_ changes in the output. High-quality libraries +//! like proptest also focus on shrinking. +//! +//! With `Generatable`, the goal is for small changes in the random input (the +//! seed used to initialize the RNG) to result in _large_ changes in output. +//! (However, it is possible to delegate the bulk of the operation to a value +//! generator from a PBT framework). There is also no need for shrinking. +//! +//! Overall, this means that `Generatable` can be used in both testing and +//! production. +//! +//! ## Other functionality +//! +//! This crate also provides two additional convenience functions: +//! +//! - [`from_seed`], which generates a new RNG from a seed. +//! - [`from_parent_and_seed`], which generates a new RNG from a parent RNG and +//! a seed. +//! +//! Both these methods are short, but more ergonomic and less prone to misuse +//! than using the underlying libraries directly. + +use std::{fmt, hash::Hash, marker::PhantomData}; + +use rand::rngs::StdRng; +use rand_core::{RngCore, SeedableRng}; +use uuid::Uuid; + +/// Returns a new RNG generated only from the given seeds `seed` and `extra`. +/// `seed` may be passed down from another caller, e.g. a test, and `extra` +/// should be a fixed value specific to the callsite. +/// +/// This takes two hashable arguments rather than one, because when one is +/// passing down a seed, it is all too easy to not include any extra +/// information. That may result in multiple different RNGs generating the same +/// random values. So we expect that callers will also provide bytes of their +/// own, specific to the callsite, and gently guide them towards doing the +/// right thing. +pub fn from_seed(seed: H, extra: H2) -> R +where + R: SeedableRng, + H: Hash, + H2: Hash, +{ + // XXX: is Hash really the right thing to use here? That's what + // rand_seeder uses, but something like https://docs.rs/stable-hash may + // be more correct. + + let mut seeder = rand_seeder::Seeder::from((seed, extra)); + seeder.make_rng::() +} + +/// Generates a new RNG from a parent RNG and a hashable seed. +pub fn from_parent_and_seed(parent_rng: &mut R2, seed: H) -> R +where + R: SeedableRng, + R2: RngCore, + H: Hash, +{ + let rng_seed = parent_rng.next_u64(); + + let mut seeder = rand_seeder::Seeder::from((rng_seed, seed)); + seeder.make_rng::() +} + +/// An RNG that can be used to generate values of a single type. +/// +/// This is a convenience wrapper around a random number generator that +/// generates values of a particular type. It works against any type that +/// implements [`Generatable`], and any RNG that implements [`RngCore`]. +pub struct TypedRng { + rng: R, + // PhantomData T> is like PhantomData, but it doesn't inherit + // Send/Sync from T. See + // https://doc.rust-lang.org/nomicon/phantom-data.html#table-of-phantomdata-patterns. + _marker: PhantomData T>, +} + +impl TypedRng { + /// Returns a new typed RNG from entropy. + pub fn from_entropy() -> Self { + Self::new(StdRng::from_entropy()) + } +} + +impl TypedRng +where + T: Generatable, + R: RngCore, +{ + /// Returns a new typed RNG from the given RNG. + pub fn new(rng: R) -> Self { + Self { rng, _marker: PhantomData } + } + + /// Returns a new typed RNG generated from the parent RNG, along with a + /// seed. + /// + /// Many RNG models are organized in a tree structure, where a parent RNG + /// generates a child RNG. The main benefit to this kind of structure is + /// stability of output: because the different child RNGs are all + /// independent of each other, making more calls to one RNG will not affect + /// any others. + pub fn from_parent_rng( + parent_rng: &mut R2, + seed: H, + ) -> Self + where + R: SeedableRng, + { + Self::new(from_parent_and_seed(parent_rng, seed)) + } + + /// Returns a new typed RNG generated only from the given seeds `seed` and + /// `extra`. + /// + /// This takes two hashable arguments rather than one, because when one is + /// passing down a seed set by e.g. a test, it is all too easy to just pass + /// down that seed here. That may result in multiple different RNGs + /// generating the same random values. So we expect that callers will also + /// provide bytes of their own, specific to the call-site, and gently guide + /// them towards doing the right thing. + pub fn from_seed(seed: H, extra: H2) -> Self + where + R: SeedableRng, + H: Hash, + H2: Hash, + { + let mut seeder = rand_seeder::Seeder::from((seed, extra)); + Self::new(seeder.make_rng::()) + } + + /// Sets the seed for this RNG to the given value. + /// + /// This takes two hashable arguments rather than one, for much the same + /// reason as [`Self::from_seed`]. + pub fn set_seed(&mut self, seed: H, extra: H2) + where + R: SeedableRng, + H: Hash, + H2: Hash, + { + let mut seeder = rand_seeder::Seeder::from((seed, extra)); + self.rng = seeder.make_rng::(); + } + + /// Returns a mutable reference to the RNG inside. + pub fn inner_mut(&mut self) -> &mut R { + &mut self.rng + } + + /// Consumes self, returning the RNG inside. + pub fn into_inner(self) -> R { + self.rng + } + + /// Returns the next value. + pub fn next(&mut self) -> T { + T::generate(&mut self.rng) + } +} + +// --- Trait impls --- +// +// These have to be done by hand to avoid a dependency on T. + +impl Clone for TypedRng { + fn clone(&self) -> Self { + Self { rng: self.rng.clone(), _marker: PhantomData } + } +} + +impl Copy for TypedRng {} + +impl Default for TypedRng { + fn default() -> Self { + Self { rng: R::default(), _marker: PhantomData } + } +} + +impl fmt::Debug for TypedRng { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + f.debug_struct("TypedRng").field("rng", &self.rng).finish() + } +} + +impl PartialEq for TypedRng { + fn eq(&self, other: &Self) -> bool { + self.rng == other.rng + } +} + +impl Eq for TypedRng {} + +/// Represents a value that can be generated. +/// +/// This is used to generate random values of a type in a deterministic manner, +/// given a random number generator. +pub trait Generatable { + fn generate(rng: &mut R) -> Self; +} + +impl Generatable for Uuid { + fn generate(rng: &mut R) -> Self { + let mut bytes = [0; 16]; + rng.fill_bytes(&mut bytes); + // Builder::from_random_bytes will turn the random bytes into a valid + // UUIDv4. (Parts of the system depend on the UUID actually being valid + // v4, so it's important that we don't just use `uuid::from_bytes`.) + uuid::Builder::from_random_bytes(bytes).into_uuid() + } +} + +pub type UuidRng = TypedRng; + +#[cfg(test)] +mod tests { + use super::*; + + // Test that TypedRng is Send and Sync even if T isn't. + const _: fn() = || { + fn assert_send_sync() {} + struct NotSendSync(*mut u8); + assert_send_sync::>(); + }; +} From e5094dceedd7bb00df307650de50e365f128d041 Mon Sep 17 00:00:00 2001 From: Rain Date: Thu, 28 Mar 2024 18:59:14 -0700 Subject: [PATCH 011/334] [reconfigurator] use tabled to display blueprints and diffs (#5270) While developing #5238, I noticed that the output was getting significantly busier and less aligned. I decided to prototype out using `tabled` to display outputs, and I really liked the results. Examples that cover all of the cases are included in the PR. In the future I'd also like to add color support on the CLI, and expand it to inventory and `omdb` (it's similar except it doesn't have the zone policy table). Some other changes that are bundled into this PR: * Sort by (zone type, zone ID) rather than zone ID, to keep zones of the same type grouped together. * Moved unchanged data to the top to allow users to see less scrollback. * Moved metadata to the bottom for the same reason. * Add information about the zone config being changed. * Change `Blueprint::diff_sleds` and `Blueprint::diff_sleds_from_collection` to `Blueprint::diff_since_blueprint` and `diff_since_collection` recently. * Reordered `diff_since_blueprint`'s arguments so that `self` is after and the argument is before, to align with `diff_since_collection`. (I found that surprising!) * Renamed the diff type from `OmicronZonesDiff` to `BlueprintDiff`, since it's going to contain a lot more than zones. * Return an error from the diff methods, specifically if the before and after have the same zone ID but different types. Depends on #5238 and #5341. --- Cargo.lock | 1 + clients/sled-agent-client/src/lib.rs | 71 +- dev-tools/omdb/src/bin/omdb/db.rs | 2 +- dev-tools/omdb/src/bin/omdb/nexus.rs | 3 +- dev-tools/reconfigurator-cli/src/main.rs | 10 +- .../db-queries/src/db/datastore/deployment.rs | 11 +- nexus/inventory/src/collector.rs | 2 +- nexus/reconfigurator/execution/src/dns.rs | 2 +- .../planning/src/blueprint_builder.rs | 42 +- nexus/reconfigurator/planning/src/planner.rs | 183 ++- .../output/blueprint_builder_initial_diff.txt | 54 + .../output/planner_basic_add_sled_2_3.txt | 107 +- .../output/planner_basic_add_sled_3_5.txt | 128 +- .../output/planner_nonprovisionable_1_2.txt | 181 ++- .../output/planner_nonprovisionable_2_2a.txt | 104 ++ .../output/planner_nonprovisionable_bp2.txt | 94 ++ nexus/types/Cargo.toml | 1 + nexus/types/src/deployment.rs | 1381 +++++++++++++---- nexus/types/src/lib.rs | 1 + nexus/types/src/sectioned_table.rs | 357 +++++ 20 files changed, 2123 insertions(+), 612 deletions(-) create mode 100644 nexus/reconfigurator/planning/tests/output/blueprint_builder_initial_diff.txt create mode 100644 nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_2_2a.txt create mode 100644 nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_bp2.txt create mode 100644 nexus/types/src/sectioned_table.rs diff --git a/Cargo.lock b/Cargo.lock index 63dc1cc735..e1d684da52 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4937,6 +4937,7 @@ dependencies = [ "sled-agent-client", "steno", "strum 0.26.1", + "tabled", "thiserror", "uuid 1.7.0", ] diff --git a/clients/sled-agent-client/src/lib.rs b/clients/sled-agent-client/src/lib.rs index 0426982d3e..2901226d16 100644 --- a/clients/sled-agent-client/src/lib.rs +++ b/clients/sled-agent-client/src/lib.rs @@ -8,6 +8,7 @@ use anyhow::Context; use async_trait::async_trait; use omicron_common::api::internal::shared::NetworkInterface; use std::convert::TryFrom; +use std::fmt; use std::hash::Hash; use std::net::IpAddr; use std::net::SocketAddr; @@ -56,25 +57,65 @@ impl Eq for types::OmicronZoneConfig {} impl Eq for types::OmicronZoneType {} impl Eq for types::OmicronZoneDataset {} +/// Like [`types::OmicronZoneType`], but without any associated data. +/// +/// We have a few enums of this form floating around. This particular one is +/// meant to correspond exactly 1:1 with `OmicronZoneType`. +/// +/// The [`fmt::Display`] impl for this type is a human-readable label, meant +/// for testing and reporting. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)] +pub enum ZoneKind { + BoundaryNtp, + Clickhouse, + ClickhouseKeeper, + CockroachDb, + Crucible, + CruciblePantry, + ExternalDns, + InternalDns, + InternalNtp, + Nexus, + Oximeter, +} + +impl fmt::Display for ZoneKind { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + ZoneKind::BoundaryNtp => write!(f, "boundary_ntp"), + ZoneKind::Clickhouse => write!(f, "clickhouse"), + ZoneKind::ClickhouseKeeper => write!(f, "clickhouse_keeper"), + ZoneKind::CockroachDb => write!(f, "cockroach_db"), + ZoneKind::Crucible => write!(f, "crucible"), + ZoneKind::CruciblePantry => write!(f, "crucible_pantry"), + ZoneKind::ExternalDns => write!(f, "external_dns"), + ZoneKind::InternalDns => write!(f, "internal_dns"), + ZoneKind::InternalNtp => write!(f, "internal_ntp"), + ZoneKind::Nexus => write!(f, "nexus"), + ZoneKind::Oximeter => write!(f, "oximeter"), + } + } +} + impl types::OmicronZoneType { - /// Human-readable label describing what kind of zone this is - /// - /// This is just use for testing and reporting. - pub fn label(&self) -> impl std::fmt::Display { + /// Returns the [`ZoneKind`] corresponding to this variant. + pub fn kind(&self) -> ZoneKind { match self { - types::OmicronZoneType::BoundaryNtp { .. } => "boundary_ntp", - types::OmicronZoneType::Clickhouse { .. } => "clickhouse", + types::OmicronZoneType::BoundaryNtp { .. } => ZoneKind::BoundaryNtp, + types::OmicronZoneType::Clickhouse { .. } => ZoneKind::Clickhouse, types::OmicronZoneType::ClickhouseKeeper { .. } => { - "clickhouse_keeper" + ZoneKind::ClickhouseKeeper + } + types::OmicronZoneType::CockroachDb { .. } => ZoneKind::CockroachDb, + types::OmicronZoneType::Crucible { .. } => ZoneKind::Crucible, + types::OmicronZoneType::CruciblePantry { .. } => { + ZoneKind::CruciblePantry } - types::OmicronZoneType::CockroachDb { .. } => "cockroach_db", - types::OmicronZoneType::Crucible { .. } => "crucible", - types::OmicronZoneType::CruciblePantry { .. } => "crucible_pantry", - types::OmicronZoneType::ExternalDns { .. } => "external_dns", - types::OmicronZoneType::InternalDns { .. } => "internal_dns", - types::OmicronZoneType::InternalNtp { .. } => "internal_ntp", - types::OmicronZoneType::Nexus { .. } => "nexus", - types::OmicronZoneType::Oximeter { .. } => "oximeter", + types::OmicronZoneType::ExternalDns { .. } => ZoneKind::ExternalDns, + types::OmicronZoneType::InternalDns { .. } => ZoneKind::InternalDns, + types::OmicronZoneType::InternalNtp { .. } => ZoneKind::InternalNtp, + types::OmicronZoneType::Nexus { .. } => ZoneKind::Nexus, + types::OmicronZoneType::Oximeter { .. } => ZoneKind::Oximeter, } } diff --git a/dev-tools/omdb/src/bin/omdb/db.rs b/dev-tools/omdb/src/bin/omdb/db.rs index 855bbe063b..e1e71ff3d1 100644 --- a/dev-tools/omdb/src/bin/omdb/db.rs +++ b/dev-tools/omdb/src/bin/omdb/db.rs @@ -3244,7 +3244,7 @@ fn inv_collection_print_sleds(collection: &Collection) { println!(" ZONES FOUND"); for z in &zones.zones.zones { - println!(" zone {} (type {})", z.id, z.zone_type.label()); + println!(" zone {} (type {})", z.id, z.zone_type.kind()); } } else { println!(" warning: no zone information found"); diff --git a/dev-tools/omdb/src/bin/omdb/nexus.rs b/dev-tools/omdb/src/bin/omdb/nexus.rs index 26f2e07a41..d3d539cb2c 100644 --- a/dev-tools/omdb/src/bin/omdb/nexus.rs +++ b/dev-tools/omdb/src/bin/omdb/nexus.rs @@ -972,7 +972,8 @@ async fn cmd_nexus_blueprints_diff( let b2 = client.blueprint_view(&args.blueprint2_id).await.with_context( || format!("fetching blueprint {}", args.blueprint2_id), )?; - println!("{}", b1.diff_sleds(&b2).display()); + let diff = b2.diff_since_blueprint(&b1).context("diffing blueprints")?; + println!("{}", diff.display()); Ok(()) } diff --git a/dev-tools/reconfigurator-cli/src/main.rs b/dev-tools/reconfigurator-cli/src/main.rs index 8ba71d9819..358873db44 100644 --- a/dev-tools/reconfigurator-cli/src/main.rs +++ b/dev-tools/reconfigurator-cli/src/main.rs @@ -669,8 +669,10 @@ fn cmd_blueprint_diff( .get(&blueprint2_id) .ok_or_else(|| anyhow!("no such blueprint: {}", blueprint2_id))?; - let sled_diff = blueprint1.diff_sleds(&blueprint2).display().to_string(); - swriteln!(rv, "{}", sled_diff); + let sled_diff = blueprint2 + .diff_since_blueprint(&blueprint1) + .context("failed to diff blueprints")?; + swriteln!(rv, "{}", sled_diff.display()); // Diff'ing DNS is a little trickier. First, compute what DNS should be for // each blueprint. To do that we need to construct a list of sleds suitable @@ -795,7 +797,9 @@ fn cmd_blueprint_diff_inventory( .get(&blueprint_id) .ok_or_else(|| anyhow!("no such blueprint: {}", blueprint_id))?; - let diff = blueprint.diff_sleds_from_collection(&collection); + let diff = blueprint + .diff_since_collection(&collection) + .context("failed to diff blueprint from inventory collection")?; Ok(Some(diff.display().to_string())) } diff --git a/nexus/db-queries/src/db/datastore/deployment.rs b/nexus/db-queries/src/db/datastore/deployment.rs index 02645ca4f6..8f6b9abf58 100644 --- a/nexus/db-queries/src/db/datastore/deployment.rs +++ b/nexus/db-queries/src/db/datastore/deployment.rs @@ -486,6 +486,11 @@ impl DataStore { } } + // Sort all zones to match what blueprint builders do. + for (_, zones_config) in blueprint_zones.iter_mut() { + zones_config.sort(); + } + bail_unless!( omicron_zone_nics.is_empty(), "found extra Omicron zone NICs: {:?}", @@ -1185,6 +1190,7 @@ mod tests { use omicron_common::address::Ipv6Subnet; use omicron_common::api::external::Generation; use omicron_test_utils::dev; + use pretty_assertions::assert_eq; use rand::thread_rng; use rand::Rng; use std::mem; @@ -1515,7 +1521,10 @@ mod tests { .blueprint_read(&opctx, &authz_blueprint2) .await .expect("failed to read collection back"); - println!("diff: {}", blueprint2.diff_sleds(&blueprint_read).display()); + let diff = blueprint_read + .diff_since_blueprint(&blueprint2) + .expect("failed to diff blueprints"); + println!("diff: {}", diff.display()); assert_eq!(blueprint2, blueprint_read); assert_eq!(blueprint2.internal_dns_version, new_internal_dns_version); assert_eq!(blueprint2.external_dns_version, new_external_dns_version); diff --git a/nexus/inventory/src/collector.rs b/nexus/inventory/src/collector.rs index ad5ae7d024..7dbffc396c 100644 --- a/nexus/inventory/src/collector.rs +++ b/nexus/inventory/src/collector.rs @@ -490,7 +490,7 @@ mod test { &mut s, " zone {} type {}\n", zone.id, - zone.zone_type.label(), + zone.zone_type.kind(), ) .unwrap(); } diff --git a/nexus/reconfigurator/execution/src/dns.rs b/nexus/reconfigurator/execution/src/dns.rs index 782e673a17..fc95414103 100644 --- a/nexus/reconfigurator/execution/src/dns.rs +++ b/nexus/reconfigurator/execution/src/dns.rs @@ -281,7 +281,7 @@ pub fn blueprint_internal_dns_config( let context = || { format!( "parsing {} zone with id {}", - zone.config.zone_type.label(), + zone.config.zone_type.kind(), zone.config.id ) }; diff --git a/nexus/reconfigurator/planning/src/blueprint_builder.rs b/nexus/reconfigurator/planning/src/blueprint_builder.rs index ab40f3bbb7..dc0f1e501c 100644 --- a/nexus/reconfigurator/planning/src/blueprint_builder.rs +++ b/nexus/reconfigurator/planning/src/blueprint_builder.rs @@ -860,6 +860,7 @@ pub mod test { use crate::example::example; use crate::example::ExampleSystem; use crate::system::SledBuilder; + use expectorate::assert_contents; use omicron_common::address::IpRange; use omicron_test_utils::dev::test_setup_log; use sled_agent_client::types::{OmicronZoneConfig, OmicronZoneType}; @@ -904,14 +905,23 @@ pub mod test { .expect("failed to create initial blueprint"); verify_blueprint(&blueprint_initial); - let diff = blueprint_initial.diff_sleds_from_collection(&collection); + let diff = + blueprint_initial.diff_since_collection(&collection).unwrap(); + // There are some differences with even a no-op diff between a + // collection and a blueprint, such as new data being added to + // blueprints like DNS generation numbers. println!( - "collection -> initial blueprint (expected no changes):\n{}", + "collection -> initial blueprint \ + (expected no non-trivial changes):\n{}", diff.display() ); - assert_eq!(diff.sleds_added().count(), 0); - assert_eq!(diff.sleds_removed().count(), 0); - assert_eq!(diff.sleds_changed().count(), 0); + assert_contents( + "tests/output/blueprint_builder_initial_diff.txt", + &diff.display().to_string(), + ); + assert_eq!(diff.sleds_added().len(), 0); + assert_eq!(diff.sleds_removed().len(), 0); + assert_eq!(diff.sleds_modified().count(), 0); // Test a no-op blueprint. let builder = BlueprintBuilder::new_based_on( @@ -925,14 +935,14 @@ pub mod test { .expect("failed to create builder"); let blueprint = builder.build(); verify_blueprint(&blueprint); - let diff = blueprint_initial.diff_sleds(&blueprint); + let diff = blueprint.diff_since_blueprint(&blueprint_initial).unwrap(); println!( "initial blueprint -> next blueprint (expected no changes):\n{}", diff.display() ); - assert_eq!(diff.sleds_added().count(), 0); - assert_eq!(diff.sleds_removed().count(), 0); - assert_eq!(diff.sleds_changed().count(), 0); + assert_eq!(diff.sleds_added().len(), 0); + assert_eq!(diff.sleds_removed().len(), 0); + assert_eq!(diff.sleds_modified().count(), 0); logctx.cleanup_successful(); } @@ -970,14 +980,14 @@ pub mod test { let blueprint2 = builder.build(); verify_blueprint(&blueprint2); - let diff = blueprint1.diff_sleds(&blueprint2); + let diff = blueprint2.diff_since_blueprint(&blueprint1).unwrap(); println!( "initial blueprint -> next blueprint (expected no changes):\n{}", diff.display() ); - assert_eq!(diff.sleds_added().count(), 0); - assert_eq!(diff.sleds_removed().count(), 0); - assert_eq!(diff.sleds_changed().count(), 0); + assert_eq!(diff.sleds_added().len(), 0); + assert_eq!(diff.sleds_removed().len(), 0); + assert_eq!(diff.sleds_modified().count(), 0); // The next step is adding these zones to a new sled. let new_sled_id = example.sled_rng.next(); @@ -1003,12 +1013,12 @@ pub mod test { let blueprint3 = builder.build(); verify_blueprint(&blueprint3); - let diff = blueprint2.diff_sleds(&blueprint3); + let diff = blueprint3.diff_since_blueprint(&blueprint2).unwrap(); println!("expecting new NTP and Crucible zones:\n{}", diff.display()); // No sleds were changed or removed. - assert_eq!(diff.sleds_changed().count(), 0); - assert_eq!(diff.sleds_removed().count(), 0); + assert_eq!(diff.sleds_modified().count(), 0); + assert_eq!(diff.sleds_removed().len(), 0); // One sled was added. let sleds: Vec<_> = diff.sleds_added().collect(); diff --git a/nexus/reconfigurator/planning/src/planner.rs b/nexus/reconfigurator/planning/src/planner.rs index 84360aded9..ce5660e7f6 100644 --- a/nexus/reconfigurator/planning/src/planner.rs +++ b/nexus/reconfigurator/planning/src/planner.rs @@ -338,8 +338,12 @@ mod test { use crate::example::example; use crate::example::ExampleSystem; use crate::system::SledBuilder; + use chrono::NaiveDateTime; + use chrono::TimeZone; + use chrono::Utc; use expectorate::assert_contents; use nexus_inventory::now_db_precision; + use nexus_types::deployment::BlueprintZoneDisposition; use nexus_types::deployment::BlueprintZoneFilter; use nexus_types::external_api::views::SledPolicy; use nexus_types::external_api::views::SledProvisionPolicy; @@ -394,11 +398,11 @@ mod test { .plan() .expect("failed to plan"); - let diff = blueprint1.diff_sleds(&blueprint2); + let diff = blueprint2.diff_since_blueprint(&blueprint1).unwrap(); println!("1 -> 2 (expected no changes):\n{}", diff.display()); - assert_eq!(diff.sleds_added().count(), 0); - assert_eq!(diff.sleds_removed().count(), 0); - assert_eq!(diff.sleds_changed().count(), 0); + assert_eq!(diff.sleds_added().len(), 0); + assert_eq!(diff.sleds_removed().len(), 0); + assert_eq!(diff.sleds_modified().count(), 0); verify_blueprint(&blueprint2); // Now add a new sled. @@ -422,7 +426,7 @@ mod test { .plan() .expect("failed to plan"); - let diff = blueprint2.diff_sleds(&blueprint3); + let diff = blueprint3.diff_since_blueprint(&blueprint2).unwrap(); println!( "2 -> 3 (expect new NTP zone on new sled):\n{}", diff.display() @@ -443,8 +447,8 @@ mod test { sled_zones.zones[0].config.zone_type, OmicronZoneType::InternalNtp { .. } )); - assert_eq!(diff.sleds_removed().count(), 0); - assert_eq!(diff.sleds_changed().count(), 0); + assert_eq!(diff.sleds_removed().len(), 0); + assert_eq!(diff.sleds_modified().count(), 0); verify_blueprint(&blueprint3); // Check that with no change in inventory, the planner makes no changes. @@ -463,11 +467,11 @@ mod test { .with_rng_seed((TEST_NAME, "bp4")) .plan() .expect("failed to plan"); - let diff = blueprint3.diff_sleds(&blueprint4); + let diff = blueprint4.diff_since_blueprint(&blueprint3).unwrap(); println!("3 -> 4 (expected no changes):\n{}", diff.display()); - assert_eq!(diff.sleds_added().count(), 0); - assert_eq!(diff.sleds_removed().count(), 0); - assert_eq!(diff.sleds_changed().count(), 0); + assert_eq!(diff.sleds_added().len(), 0); + assert_eq!(diff.sleds_removed().len(), 0); + assert_eq!(diff.sleds_modified().count(), 0); verify_blueprint(&blueprint4); // Now update the inventory to have the requested NTP zone. @@ -506,15 +510,15 @@ mod test { .plan() .expect("failed to plan"); - let diff = blueprint3.diff_sleds(&blueprint5); + let diff = blueprint5.diff_since_blueprint(&blueprint3).unwrap(); println!("3 -> 5 (expect Crucible zones):\n{}", diff.display()); assert_contents( "tests/output/planner_basic_add_sled_3_5.txt", &diff.display().to_string(), ); - assert_eq!(diff.sleds_added().count(), 0); - assert_eq!(diff.sleds_removed().count(), 0); - let sleds = diff.sleds_changed().collect::>(); + assert_eq!(diff.sleds_added().len(), 0); + assert_eq!(diff.sleds_removed().len(), 0); + let sleds = diff.sleds_modified().collect::>(); assert_eq!(sleds.len(), 1); let (sled_id, sled_changes) = &sleds[0]; assert_eq!( @@ -522,8 +526,8 @@ mod test { sled_changes.generation_before.next() ); assert_eq!(*sled_id, new_sled_id); - assert_eq!(sled_changes.zones_removed().count(), 0); - assert_eq!(sled_changes.zones_changed().count(), 0); + assert_eq!(sled_changes.zones_removed().len(), 0); + assert_eq!(sled_changes.zones_modified().count(), 0); let zones = sled_changes.zones_added().collect::>(); assert_eq!(zones.len(), 10); for zone in &zones { @@ -548,11 +552,11 @@ mod test { .plan() .expect("failed to plan"); - let diff = blueprint5.diff_sleds(&blueprint6); + let diff = blueprint6.diff_since_blueprint(&blueprint5).unwrap(); println!("5 -> 6 (expect no changes):\n{}", diff.display()); - assert_eq!(diff.sleds_added().count(), 0); - assert_eq!(diff.sleds_removed().count(), 0); - assert_eq!(diff.sleds_changed().count(), 0); + assert_eq!(diff.sleds_added().len(), 0); + assert_eq!(diff.sleds_removed().len(), 0); + assert_eq!(diff.sleds_modified().count(), 0); verify_blueprint(&blueprint6); logctx.cleanup_successful(); @@ -624,7 +628,7 @@ mod test { internal_dns_version, external_dns_version, &policy, - "add more Nexus", + "test_blueprint2", &collection, ) .expect("failed to create planner") @@ -632,16 +636,16 @@ mod test { .plan() .expect("failed to plan"); - let diff = blueprint1.diff_sleds(&blueprint2); + let diff = blueprint2.diff_since_blueprint(&blueprint1).unwrap(); println!("1 -> 2 (added additional Nexus zones):\n{}", diff.display()); - assert_eq!(diff.sleds_added().count(), 0); - assert_eq!(diff.sleds_removed().count(), 0); - let mut sleds = diff.sleds_changed().collect::>(); + assert_eq!(diff.sleds_added().len(), 0); + assert_eq!(diff.sleds_removed().len(), 0); + let mut sleds = diff.sleds_modified().collect::>(); assert_eq!(sleds.len(), 1); let (changed_sled_id, sled_changes) = sleds.pop().unwrap(); assert_eq!(changed_sled_id, sled_id); - assert_eq!(sled_changes.zones_removed().count(), 0); - assert_eq!(sled_changes.zones_changed().count(), 0); + assert_eq!(sled_changes.zones_removed().len(), 0); + assert_eq!(sled_changes.zones_modified().count(), 0); let zones = sled_changes.zones_added().collect::>(); assert_eq!(zones.len(), policy.target_nexus_zone_count - 1); for zone in &zones { @@ -698,7 +702,7 @@ mod test { Generation::new(), Generation::new(), &policy, - "add more Nexus", + "test_blueprint2", &collection, ) .expect("failed to create planner") @@ -706,11 +710,11 @@ mod test { .plan() .expect("failed to plan"); - let diff = blueprint1.diff_sleds(&blueprint2); + let diff = blueprint2.diff_since_blueprint(&blueprint1).unwrap(); println!("1 -> 2 (added additional Nexus zones):\n{}", diff.display()); - assert_eq!(diff.sleds_added().count(), 0); - assert_eq!(diff.sleds_removed().count(), 0); - let sleds = diff.sleds_changed().collect::>(); + assert_eq!(diff.sleds_added().len(), 0); + assert_eq!(diff.sleds_removed().len(), 0); + let sleds = diff.sleds_modified().collect::>(); // All 3 sleds should get additional Nexus zones. We expect a total of // 11 new Nexus zones, which should be spread evenly across the three @@ -718,8 +722,8 @@ mod test { assert_eq!(sleds.len(), 3); let mut total_new_nexus_zones = 0; for (sled_id, sled_changes) in sleds { - assert_eq!(sled_changes.zones_removed().count(), 0); - assert_eq!(sled_changes.zones_changed().count(), 0); + assert_eq!(sled_changes.zones_removed().len(), 0); + assert_eq!(sled_changes.zones_modified().count(), 0); let zones = sled_changes.zones_added().collect::>(); match zones.len() { n @ (3 | 4) => { @@ -814,13 +818,13 @@ mod test { // When the planner gets smarter about removing zones from expunged // and/or removed sleds, we'll have to adjust this number. policy.target_nexus_zone_count = 16; - let blueprint2 = Planner::new_based_on( + let mut blueprint2 = Planner::new_based_on( logctx.log.clone(), &blueprint1, Generation::new(), Generation::new(), &policy, - "add more Nexus", + "test_blueprint2", &collection, ) .expect("failed to create planner") @@ -828,15 +832,24 @@ mod test { .plan() .expect("failed to plan"); - let diff = blueprint1.diff_sleds(&blueprint2); + // Define a time_created for consistent output across runs. + blueprint2.time_created = + Utc.from_utc_datetime(&NaiveDateTime::UNIX_EPOCH); + + assert_contents( + "tests/output/planner_nonprovisionable_bp2.txt", + &blueprint2.display().to_string(), + ); + + let diff = blueprint2.diff_since_blueprint(&blueprint1).unwrap(); println!("1 -> 2 (added additional Nexus zones):\n{}", diff.display()); assert_contents( "tests/output/planner_nonprovisionable_1_2.txt", &diff.display().to_string(), ); - assert_eq!(diff.sleds_added().count(), 0); - assert_eq!(diff.sleds_removed().count(), 0); - let sleds = diff.sleds_changed().collect::>(); + assert_eq!(diff.sleds_added().len(), 0); + assert_eq!(diff.sleds_removed().len(), 0); + let sleds = diff.sleds_modified().collect::>(); // Only 2 of the 3 sleds should get additional Nexus zones. We expect a // total of 12 new Nexus zones, which should be spread evenly across the @@ -848,8 +861,8 @@ mod test { assert!(sled_id != nonprovisionable_sled_id); assert!(sled_id != expunged_sled_id); assert!(sled_id != decommissioned_sled_id); - assert_eq!(sled_changes.zones_removed().count(), 0); - assert_eq!(sled_changes.zones_changed().count(), 0); + assert_eq!(sled_changes.zones_removed().len(), 0); + assert_eq!(sled_changes.zones_modified().count(), 0); let zones = sled_changes.zones_added().collect::>(); match zones.len() { n @ (5 | 6) => { @@ -868,6 +881,90 @@ mod test { } assert_eq!(total_new_nexus_zones, 11); + // --- + + // Also poke at some of the config by hand; we'll use this to test out + // diff output. This isn't a real blueprint, just one that we're + // creating to test diff output. + // + // Some of the things we're testing here: + // + // * modifying zones + // * removing zones + // * removing sleds + // * for modified sleds' zone config generation, both a bump and the + // generation staying the same (the latter should produce a warning) + let mut blueprint2a = blueprint2.clone(); + + enum NextCrucibleMutate { + Modify, + Remove, + Done, + } + let mut next = NextCrucibleMutate::Modify; + + // Leave the non-provisionable sled's generation alone. + let zones = &mut blueprint2a + .blueprint_zones + .get_mut(&nonprovisionable_sled_id) + .unwrap() + .zones; + + zones.retain_mut(|zone| { + if let OmicronZoneType::Nexus { internal_address, .. } = + &mut zone.config.zone_type + { + // Change one of these params to ensure that the diff output + // makes sense. + *internal_address = format!("{internal_address}foo"); + true + } else if let OmicronZoneType::Crucible { .. } = + zone.config.zone_type + { + match next { + NextCrucibleMutate::Modify => { + zone.disposition = BlueprintZoneDisposition::Quiesced; + next = NextCrucibleMutate::Remove; + true + } + NextCrucibleMutate::Remove => { + next = NextCrucibleMutate::Done; + false + } + NextCrucibleMutate::Done => true, + } + } else if let OmicronZoneType::InternalNtp { .. } = + &mut zone.config.zone_type + { + // Change the underlay IP. + let mut segments = zone.config.underlay_address.segments(); + segments[0] += 1; + zone.config.underlay_address = segments.into(); + true + } else { + true + } + }); + + let expunged_zones = + blueprint2a.blueprint_zones.get_mut(&expunged_sled_id).unwrap(); + expunged_zones.zones.clear(); + expunged_zones.generation = expunged_zones.generation.next(); + + blueprint2a.blueprint_zones.remove(&decommissioned_sled_id); + + blueprint2a.external_dns_version = + blueprint2a.external_dns_version.next(); + + let diff = blueprint2a.diff_since_blueprint(&blueprint2).unwrap(); + println!("2 -> 2a (manually modified zones):\n{}", diff.display()); + assert_contents( + "tests/output/planner_nonprovisionable_2_2a.txt", + &diff.display().to_string(), + ); + + // --- + logctx.cleanup_successful(); } } diff --git a/nexus/reconfigurator/planning/tests/output/blueprint_builder_initial_diff.txt b/nexus/reconfigurator/planning/tests/output/blueprint_builder_initial_diff.txt new file mode 100644 index 0000000000..7323008ad1 --- /dev/null +++ b/nexus/reconfigurator/planning/tests/output/blueprint_builder_initial_diff.txt @@ -0,0 +1,54 @@ +from: collection 094d362b-7d79-49e7-a244-134276cca8fe +to: blueprint 9d2c007b-46f1-4ff2-8b4c-8a5767030f76 + + ------------------------------------------------------------------------------------------------------ + zone type zone ID disposition underlay IP status + ------------------------------------------------------------------------------------------------------ + + UNCHANGED SLEDS: + + sled 08c7046b-c9c4-4368-881f-19a72df22143: zones at generation 2 + crucible 44afce85-3377-4b20-a398-517c1579df4d in service fd00:1122:3344:103::23 + crucible 4644ea0c-0ec3-41be-a356-660308e1c3fc in service fd00:1122:3344:103::2c + crucible 55f4d117-0b9d-4256-a2c0-f46d3ed5fff9 in service fd00:1122:3344:103::25 + crucible 5c6a4628-8831-483b-995f-79b9126c4d04 in service fd00:1122:3344:103::28 + crucible 6a01210c-45ed-41a5-9230-8e05ecf5dd8f in service fd00:1122:3344:103::29 + crucible 7004cab9-dfc0-43ba-92d3-58d4ced66025 in service fd00:1122:3344:103::24 + crucible 79552859-fbd3-43bb-a9d3-6baba25558f8 in service fd00:1122:3344:103::26 + crucible 90696819-9b53-485a-9c65-ca63602e843e in service fd00:1122:3344:103::27 + crucible c99525b3-3680-4df6-9214-2ee3e1020e8b in service fd00:1122:3344:103::2a + crucible f42959d3-9eef-4e3b-b404-6177ce3ec7a1 in service fd00:1122:3344:103::2b + internal_ntp c81c9d4a-36d7-4796-9151-f564d3735152 in service fd00:1122:3344:103::21 + nexus b2573120-9c91-4ed7-8b4f-a7bfe8dbc807 in service fd00:1122:3344:103::22 + + sled 84ac367e-9b03-4e9d-a846-df1a08deee6c: zones at generation 2 + crucible 0faa9350-2c02-47c7-a0a6-9f4afd69152c in service fd00:1122:3344:101::2c + crucible 5b44003e-1a3d-4152-b606-872c72efce0e in service fd00:1122:3344:101::25 + crucible 943fea7a-9458-4935-9dc7-01ee5cfe5a02 in service fd00:1122:3344:101::29 + crucible 95c3b6d1-2592-4252-b5c1-5d0faf3ce9c9 in service fd00:1122:3344:101::24 + crucible a5a0b7a9-37c9-4dbd-8393-ec7748ada3b0 in service fd00:1122:3344:101::2b + crucible a9a6a974-8953-4783-b815-da46884f2c02 in service fd00:1122:3344:101::23 + crucible aa25add8-60b0-4ace-ac60-15adcdd32d50 in service fd00:1122:3344:101::2a + crucible b6f2dd1e-7f98-4a68-9df2-b33c69d1f7ea in service fd00:1122:3344:101::27 + crucible dc22d470-dc46-436b-9750-25c8d7d369e2 in service fd00:1122:3344:101::26 + crucible f7e434f9-6d4a-476b-a9e2-48d6ee28a08e in service fd00:1122:3344:101::28 + internal_ntp 38b047ea-e3de-4859-b8e0-70cac5871446 in service fd00:1122:3344:101::21 + nexus fb36b9dc-273a-4bc3-aaa9-19ee4d0ef552 in service fd00:1122:3344:101::22 + + sled be7f4375-2a6b-457f-b1a4-3074a715e5fe: zones at generation 2 + crucible 248db330-56e6-4c7e-b5ff-9cd6cbcb210a in service fd00:1122:3344:102::2c + crucible 353b0aff-4c71-4fae-a6bd-adcb1d2a1a1d in service fd00:1122:3344:102::29 + crucible 4330134c-41b9-4097-aa0b-3eaefa06d473 in service fd00:1122:3344:102::24 + crucible 65d03287-e43f-45f4-902e-0a5e4638f31a in service fd00:1122:3344:102::25 + crucible 6a5901b1-f9d7-425c-8ecb-a786c900f217 in service fd00:1122:3344:102::27 + crucible 9b722fea-a186-4bc3-bc37-ce7f6de6a796 in service fd00:1122:3344:102::23 + crucible b3583b5f-4a62-4471-9be7-41e61578de4c in service fd00:1122:3344:102::2a + crucible bac92034-b9e6-4e8b-9ffb-dbba9caec88d in service fd00:1122:3344:102::28 + crucible d9653001-f671-4905-a410-6a7abc358318 in service fd00:1122:3344:102::2b + crucible edaca77e-5806-446a-b00c-125962cd551d in service fd00:1122:3344:102::26 + internal_ntp aac3ab51-9e2b-4605-9bf6-e3eb3681c2b5 in service fd00:1122:3344:102::21 + nexus 29278a22-1ba1-4117-bfdb-39fcb9ae7fd1 in service fd00:1122:3344:102::22 + + METADATA: ++ internal DNS version: (not present in collection) -> 1 ++ external DNS version: (not present in collection) -> 1 diff --git a/nexus/reconfigurator/planning/tests/output/planner_basic_add_sled_2_3.txt b/nexus/reconfigurator/planning/tests/output/planner_basic_add_sled_2_3.txt index 9f7cab737f..3aad697aa0 100644 --- a/nexus/reconfigurator/planning/tests/output/planner_basic_add_sled_2_3.txt +++ b/nexus/reconfigurator/planning/tests/output/planner_basic_add_sled_2_3.txt @@ -1,48 +1,59 @@ -diff blueprint 979ef428-0bdd-4622-8a72-0719e942b415 blueprint 4171ad05-89dd-474b-846b-b007e4346366 ---- blueprint 979ef428-0bdd-4622-8a72-0719e942b415 -+++ blueprint 4171ad05-89dd-474b-846b-b007e4346366 - sled 41f45d9f-766e-4ca6-a881-61ee45c80f57 - zone config generation 2 - 267ed614-92af-4b9d-bdba-c2881c2e43a2 in service internal_ntp [underlay IP fd00:1122:3344:103::21] (unchanged) - 322ee9f1-8903-4542-a0a8-a54cefabdeca in service crucible [underlay IP fd00:1122:3344:103::24] (unchanged) - 4ab1650f-32c5-447f-939d-64b8103a7645 in service crucible [underlay IP fd00:1122:3344:103::2a] (unchanged) - 64aa65f8-1ccb-4cd6-9953-027aebdac8ff in service crucible [underlay IP fd00:1122:3344:103::27] (unchanged) - 6e811d86-8aa7-4660-935b-84b4b7721b10 in service crucible [underlay IP fd00:1122:3344:103::2b] (unchanged) - 747d2426-68bf-4c22-8806-41d290b5d5f5 in service crucible [underlay IP fd00:1122:3344:103::25] (unchanged) - 7fbd2c38-5dc3-48c4-b061-558a2041d70f in service crucible [underlay IP fd00:1122:3344:103::2c] (unchanged) - 8e9e923e-62b1-4cbc-9f59-d6397e338b6b in service crucible [underlay IP fd00:1122:3344:103::29] (unchanged) - b14d5478-1a0e-4b90-b526-36b06339dfc4 in service crucible [underlay IP fd00:1122:3344:103::28] (unchanged) - b40f7c7b-526c-46c8-ae33-67280c280eb7 in service crucible [underlay IP fd00:1122:3344:103::23] (unchanged) - be97b92b-38d6-422a-8c76-d37060f75bd2 in service crucible [underlay IP fd00:1122:3344:103::26] (unchanged) - cc816cfe-3869-4dde-b596-397d41198628 in service nexus [underlay IP fd00:1122:3344:103::22] (unchanged) - sled 43677374-8d2f-4deb-8a41-eeea506db8e0 - zone config generation 2 - 02acbe6a-1c88-47e3-94c3-94084cbde098 in service crucible [underlay IP fd00:1122:3344:101::27] (unchanged) - 07c3c805-8888-4fe5-9543-3d2479dbe6f3 in service crucible [underlay IP fd00:1122:3344:101::26] (unchanged) - 08c7f8aa-1ea9-469b-8cac-2fdbfc11ebcb in service internal_ntp [underlay IP fd00:1122:3344:101::21] (unchanged) - 10d98a73-ec88-4aff-a7e8-7db6a87880e6 in service crucible [underlay IP fd00:1122:3344:101::24] (unchanged) - 2a455c35-eb3c-4c73-ab6c-d0a706e25316 in service crucible [underlay IP fd00:1122:3344:101::29] (unchanged) - 3eda924f-22a9-4f3e-9a1b-91d1c47601ab in service crucible [underlay IP fd00:1122:3344:101::23] (unchanged) - 587be699-a320-4c79-b320-128d9ecddc0b in service crucible [underlay IP fd00:1122:3344:101::2b] (unchanged) - 6fa06115-4959-4913-8e7b-dd70d7651f07 in service crucible [underlay IP fd00:1122:3344:101::2c] (unchanged) - 8f3a1cc5-9195-4a30-ad02-b804278fe639 in service crucible [underlay IP fd00:1122:3344:101::28] (unchanged) - a1696cd4-588c-484a-b95b-66e824c0ce05 in service crucible [underlay IP fd00:1122:3344:101::25] (unchanged) - a2079cbc-a69e-41a1-b1e0-fbcb972d03f6 in service crucible [underlay IP fd00:1122:3344:101::2a] (unchanged) - c66ab6d5-ff7a-46d1-9fd0-70cefa352d25 in service nexus [underlay IP fd00:1122:3344:101::22] (unchanged) - sled 590e3034-d946-4166-b0e5-2d0034197a07 - zone config generation 2 - 18f8fe40-646e-4962-b17a-20e201f3a6e5 in service crucible [underlay IP fd00:1122:3344:102::2a] (unchanged) - 47199d48-534c-4267-a654-d2d90e64b498 in service internal_ntp [underlay IP fd00:1122:3344:102::21] (unchanged) - 56d5d7cf-db2c-40a3-a775-003241ad4820 in service crucible [underlay IP fd00:1122:3344:102::29] (unchanged) - 6af7f4d6-33b6-4eb3-a146-d8e9e4ae9d66 in service crucible [underlay IP fd00:1122:3344:102::2b] (unchanged) - 704e1fed-f8d6-4cfa-a470-bad27fdc06d1 in service nexus [underlay IP fd00:1122:3344:102::22] (unchanged) - 7a9f60d3-2b66-4547-9b63-7d4f7a8b6382 in service crucible [underlay IP fd00:1122:3344:102::26] (unchanged) - 93f2f40c-5616-4d8d-8519-ec6debdcede0 in service crucible [underlay IP fd00:1122:3344:102::2c] (unchanged) - ab7ba6df-d401-40bd-940e-faf57c57aa2a in service crucible [underlay IP fd00:1122:3344:102::28] (unchanged) - af322036-371f-437c-8c08-7f40f3f1403b in service crucible [underlay IP fd00:1122:3344:102::23] (unchanged) - d637264f-6f40-44c2-8b7e-a179430210d2 in service crucible [underlay IP fd00:1122:3344:102::25] (unchanged) - dce226c9-7373-4bfa-8a94-79dc472857a6 in service crucible [underlay IP fd00:1122:3344:102::27] (unchanged) - edabedf3-839c-488d-ad6f-508ffa864674 in service crucible [underlay IP fd00:1122:3344:102::24] (unchanged) -+ sled b59ec570-2abb-4017-80ce-129d94e7a025 (added) -+ zone config generation 2 -+ 2d73d30e-ca47-46a8-9c12-917d4ab824b6 in service internal_ntp [underlay IP fd00:1122:3344:104::21] (added) +from: blueprint 979ef428-0bdd-4622-8a72-0719e942b415 +to: blueprint 4171ad05-89dd-474b-846b-b007e4346366 + + ------------------------------------------------------------------------------------------------------ + zone type zone ID disposition underlay IP status + ------------------------------------------------------------------------------------------------------ + + UNCHANGED SLEDS: + + sled 41f45d9f-766e-4ca6-a881-61ee45c80f57: zones at generation 2 + crucible 322ee9f1-8903-4542-a0a8-a54cefabdeca in service fd00:1122:3344:103::24 + crucible 4ab1650f-32c5-447f-939d-64b8103a7645 in service fd00:1122:3344:103::2a + crucible 64aa65f8-1ccb-4cd6-9953-027aebdac8ff in service fd00:1122:3344:103::27 + crucible 6e811d86-8aa7-4660-935b-84b4b7721b10 in service fd00:1122:3344:103::2b + crucible 747d2426-68bf-4c22-8806-41d290b5d5f5 in service fd00:1122:3344:103::25 + crucible 7fbd2c38-5dc3-48c4-b061-558a2041d70f in service fd00:1122:3344:103::2c + crucible 8e9e923e-62b1-4cbc-9f59-d6397e338b6b in service fd00:1122:3344:103::29 + crucible b14d5478-1a0e-4b90-b526-36b06339dfc4 in service fd00:1122:3344:103::28 + crucible b40f7c7b-526c-46c8-ae33-67280c280eb7 in service fd00:1122:3344:103::23 + crucible be97b92b-38d6-422a-8c76-d37060f75bd2 in service fd00:1122:3344:103::26 + internal_ntp 267ed614-92af-4b9d-bdba-c2881c2e43a2 in service fd00:1122:3344:103::21 + nexus cc816cfe-3869-4dde-b596-397d41198628 in service fd00:1122:3344:103::22 + + sled 43677374-8d2f-4deb-8a41-eeea506db8e0: zones at generation 2 + crucible 02acbe6a-1c88-47e3-94c3-94084cbde098 in service fd00:1122:3344:101::27 + crucible 07c3c805-8888-4fe5-9543-3d2479dbe6f3 in service fd00:1122:3344:101::26 + crucible 10d98a73-ec88-4aff-a7e8-7db6a87880e6 in service fd00:1122:3344:101::24 + crucible 2a455c35-eb3c-4c73-ab6c-d0a706e25316 in service fd00:1122:3344:101::29 + crucible 3eda924f-22a9-4f3e-9a1b-91d1c47601ab in service fd00:1122:3344:101::23 + crucible 587be699-a320-4c79-b320-128d9ecddc0b in service fd00:1122:3344:101::2b + crucible 6fa06115-4959-4913-8e7b-dd70d7651f07 in service fd00:1122:3344:101::2c + crucible 8f3a1cc5-9195-4a30-ad02-b804278fe639 in service fd00:1122:3344:101::28 + crucible a1696cd4-588c-484a-b95b-66e824c0ce05 in service fd00:1122:3344:101::25 + crucible a2079cbc-a69e-41a1-b1e0-fbcb972d03f6 in service fd00:1122:3344:101::2a + internal_ntp 08c7f8aa-1ea9-469b-8cac-2fdbfc11ebcb in service fd00:1122:3344:101::21 + nexus c66ab6d5-ff7a-46d1-9fd0-70cefa352d25 in service fd00:1122:3344:101::22 + + sled 590e3034-d946-4166-b0e5-2d0034197a07: zones at generation 2 + crucible 18f8fe40-646e-4962-b17a-20e201f3a6e5 in service fd00:1122:3344:102::2a + crucible 56d5d7cf-db2c-40a3-a775-003241ad4820 in service fd00:1122:3344:102::29 + crucible 6af7f4d6-33b6-4eb3-a146-d8e9e4ae9d66 in service fd00:1122:3344:102::2b + crucible 7a9f60d3-2b66-4547-9b63-7d4f7a8b6382 in service fd00:1122:3344:102::26 + crucible 93f2f40c-5616-4d8d-8519-ec6debdcede0 in service fd00:1122:3344:102::2c + crucible ab7ba6df-d401-40bd-940e-faf57c57aa2a in service fd00:1122:3344:102::28 + crucible af322036-371f-437c-8c08-7f40f3f1403b in service fd00:1122:3344:102::23 + crucible d637264f-6f40-44c2-8b7e-a179430210d2 in service fd00:1122:3344:102::25 + crucible dce226c9-7373-4bfa-8a94-79dc472857a6 in service fd00:1122:3344:102::27 + crucible edabedf3-839c-488d-ad6f-508ffa864674 in service fd00:1122:3344:102::24 + internal_ntp 47199d48-534c-4267-a654-d2d90e64b498 in service fd00:1122:3344:102::21 + nexus 704e1fed-f8d6-4cfa-a470-bad27fdc06d1 in service fd00:1122:3344:102::22 + + ADDED SLEDS: + ++ sled b59ec570-2abb-4017-80ce-129d94e7a025: zones at generation 2 ++ internal_ntp 2d73d30e-ca47-46a8-9c12-917d4ab824b6 in service fd00:1122:3344:104::21 added + + METADATA: + internal DNS version: 1 (unchanged) + external DNS version: 1 (unchanged) diff --git a/nexus/reconfigurator/planning/tests/output/planner_basic_add_sled_3_5.txt b/nexus/reconfigurator/planning/tests/output/planner_basic_add_sled_3_5.txt index 9d98daac36..233821412f 100644 --- a/nexus/reconfigurator/planning/tests/output/planner_basic_add_sled_3_5.txt +++ b/nexus/reconfigurator/planning/tests/output/planner_basic_add_sled_3_5.txt @@ -1,59 +1,69 @@ -diff blueprint 4171ad05-89dd-474b-846b-b007e4346366 blueprint f432fcd5-1284-4058-8b4a-9286a3de6163 ---- blueprint 4171ad05-89dd-474b-846b-b007e4346366 -+++ blueprint f432fcd5-1284-4058-8b4a-9286a3de6163 - sled 41f45d9f-766e-4ca6-a881-61ee45c80f57 - zone config generation 2 - 267ed614-92af-4b9d-bdba-c2881c2e43a2 in service internal_ntp [underlay IP fd00:1122:3344:103::21] (unchanged) - 322ee9f1-8903-4542-a0a8-a54cefabdeca in service crucible [underlay IP fd00:1122:3344:103::24] (unchanged) - 4ab1650f-32c5-447f-939d-64b8103a7645 in service crucible [underlay IP fd00:1122:3344:103::2a] (unchanged) - 64aa65f8-1ccb-4cd6-9953-027aebdac8ff in service crucible [underlay IP fd00:1122:3344:103::27] (unchanged) - 6e811d86-8aa7-4660-935b-84b4b7721b10 in service crucible [underlay IP fd00:1122:3344:103::2b] (unchanged) - 747d2426-68bf-4c22-8806-41d290b5d5f5 in service crucible [underlay IP fd00:1122:3344:103::25] (unchanged) - 7fbd2c38-5dc3-48c4-b061-558a2041d70f in service crucible [underlay IP fd00:1122:3344:103::2c] (unchanged) - 8e9e923e-62b1-4cbc-9f59-d6397e338b6b in service crucible [underlay IP fd00:1122:3344:103::29] (unchanged) - b14d5478-1a0e-4b90-b526-36b06339dfc4 in service crucible [underlay IP fd00:1122:3344:103::28] (unchanged) - b40f7c7b-526c-46c8-ae33-67280c280eb7 in service crucible [underlay IP fd00:1122:3344:103::23] (unchanged) - be97b92b-38d6-422a-8c76-d37060f75bd2 in service crucible [underlay IP fd00:1122:3344:103::26] (unchanged) - cc816cfe-3869-4dde-b596-397d41198628 in service nexus [underlay IP fd00:1122:3344:103::22] (unchanged) - sled 43677374-8d2f-4deb-8a41-eeea506db8e0 - zone config generation 2 - 02acbe6a-1c88-47e3-94c3-94084cbde098 in service crucible [underlay IP fd00:1122:3344:101::27] (unchanged) - 07c3c805-8888-4fe5-9543-3d2479dbe6f3 in service crucible [underlay IP fd00:1122:3344:101::26] (unchanged) - 08c7f8aa-1ea9-469b-8cac-2fdbfc11ebcb in service internal_ntp [underlay IP fd00:1122:3344:101::21] (unchanged) - 10d98a73-ec88-4aff-a7e8-7db6a87880e6 in service crucible [underlay IP fd00:1122:3344:101::24] (unchanged) - 2a455c35-eb3c-4c73-ab6c-d0a706e25316 in service crucible [underlay IP fd00:1122:3344:101::29] (unchanged) - 3eda924f-22a9-4f3e-9a1b-91d1c47601ab in service crucible [underlay IP fd00:1122:3344:101::23] (unchanged) - 587be699-a320-4c79-b320-128d9ecddc0b in service crucible [underlay IP fd00:1122:3344:101::2b] (unchanged) - 6fa06115-4959-4913-8e7b-dd70d7651f07 in service crucible [underlay IP fd00:1122:3344:101::2c] (unchanged) - 8f3a1cc5-9195-4a30-ad02-b804278fe639 in service crucible [underlay IP fd00:1122:3344:101::28] (unchanged) - a1696cd4-588c-484a-b95b-66e824c0ce05 in service crucible [underlay IP fd00:1122:3344:101::25] (unchanged) - a2079cbc-a69e-41a1-b1e0-fbcb972d03f6 in service crucible [underlay IP fd00:1122:3344:101::2a] (unchanged) - c66ab6d5-ff7a-46d1-9fd0-70cefa352d25 in service nexus [underlay IP fd00:1122:3344:101::22] (unchanged) - sled 590e3034-d946-4166-b0e5-2d0034197a07 - zone config generation 2 - 18f8fe40-646e-4962-b17a-20e201f3a6e5 in service crucible [underlay IP fd00:1122:3344:102::2a] (unchanged) - 47199d48-534c-4267-a654-d2d90e64b498 in service internal_ntp [underlay IP fd00:1122:3344:102::21] (unchanged) - 56d5d7cf-db2c-40a3-a775-003241ad4820 in service crucible [underlay IP fd00:1122:3344:102::29] (unchanged) - 6af7f4d6-33b6-4eb3-a146-d8e9e4ae9d66 in service crucible [underlay IP fd00:1122:3344:102::2b] (unchanged) - 704e1fed-f8d6-4cfa-a470-bad27fdc06d1 in service nexus [underlay IP fd00:1122:3344:102::22] (unchanged) - 7a9f60d3-2b66-4547-9b63-7d4f7a8b6382 in service crucible [underlay IP fd00:1122:3344:102::26] (unchanged) - 93f2f40c-5616-4d8d-8519-ec6debdcede0 in service crucible [underlay IP fd00:1122:3344:102::2c] (unchanged) - ab7ba6df-d401-40bd-940e-faf57c57aa2a in service crucible [underlay IP fd00:1122:3344:102::28] (unchanged) - af322036-371f-437c-8c08-7f40f3f1403b in service crucible [underlay IP fd00:1122:3344:102::23] (unchanged) - d637264f-6f40-44c2-8b7e-a179430210d2 in service crucible [underlay IP fd00:1122:3344:102::25] (unchanged) - dce226c9-7373-4bfa-8a94-79dc472857a6 in service crucible [underlay IP fd00:1122:3344:102::27] (unchanged) - edabedf3-839c-488d-ad6f-508ffa864674 in service crucible [underlay IP fd00:1122:3344:102::24] (unchanged) - sled b59ec570-2abb-4017-80ce-129d94e7a025 -- zone config generation 2 -+ zone config generation 3 - 2d73d30e-ca47-46a8-9c12-917d4ab824b6 in service internal_ntp [underlay IP fd00:1122:3344:104::21] (unchanged) -+ 1a20ee3c-f66e-4fca-ab85-2a248aa3d79d in service crucible [underlay IP fd00:1122:3344:104::2b] (added) -+ 28852beb-d0e5-4cba-9adb-e7f0cd4bb864 in service crucible [underlay IP fd00:1122:3344:104::29] (added) -+ 45556184-7092-4a3d-873f-637976bb133b in service crucible [underlay IP fd00:1122:3344:104::22] (added) -+ 8215bf7a-10d6-4f40-aeb7-27a196307c37 in service crucible [underlay IP fd00:1122:3344:104::25] (added) -+ 9d75abfe-47ab-434a-93dd-af50dc0dddde in service crucible [underlay IP fd00:1122:3344:104::23] (added) -+ a36d291c-7f68-462f-830e-bc29e5841ce2 in service crucible [underlay IP fd00:1122:3344:104::27] (added) -+ b3a4d434-aaee-4752-8c99-69d88fbcb8c5 in service crucible [underlay IP fd00:1122:3344:104::2a] (added) -+ cf5b636b-a505-4db6-bc32-baf9f53f4371 in service crucible [underlay IP fd00:1122:3344:104::28] (added) -+ f6125d45-b9cc-4721-ba60-ed4dbb177e41 in service crucible [underlay IP fd00:1122:3344:104::26] (added) -+ f86e19d2-9145-41cf-be89-6aaa34a73873 in service crucible [underlay IP fd00:1122:3344:104::24] (added) +from: blueprint 4171ad05-89dd-474b-846b-b007e4346366 +to: blueprint f432fcd5-1284-4058-8b4a-9286a3de6163 + + ------------------------------------------------------------------------------------------------------ + zone type zone ID disposition underlay IP status + ------------------------------------------------------------------------------------------------------ + + UNCHANGED SLEDS: + + sled 41f45d9f-766e-4ca6-a881-61ee45c80f57: zones at generation 2 + crucible 322ee9f1-8903-4542-a0a8-a54cefabdeca in service fd00:1122:3344:103::24 + crucible 4ab1650f-32c5-447f-939d-64b8103a7645 in service fd00:1122:3344:103::2a + crucible 64aa65f8-1ccb-4cd6-9953-027aebdac8ff in service fd00:1122:3344:103::27 + crucible 6e811d86-8aa7-4660-935b-84b4b7721b10 in service fd00:1122:3344:103::2b + crucible 747d2426-68bf-4c22-8806-41d290b5d5f5 in service fd00:1122:3344:103::25 + crucible 7fbd2c38-5dc3-48c4-b061-558a2041d70f in service fd00:1122:3344:103::2c + crucible 8e9e923e-62b1-4cbc-9f59-d6397e338b6b in service fd00:1122:3344:103::29 + crucible b14d5478-1a0e-4b90-b526-36b06339dfc4 in service fd00:1122:3344:103::28 + crucible b40f7c7b-526c-46c8-ae33-67280c280eb7 in service fd00:1122:3344:103::23 + crucible be97b92b-38d6-422a-8c76-d37060f75bd2 in service fd00:1122:3344:103::26 + internal_ntp 267ed614-92af-4b9d-bdba-c2881c2e43a2 in service fd00:1122:3344:103::21 + nexus cc816cfe-3869-4dde-b596-397d41198628 in service fd00:1122:3344:103::22 + + sled 43677374-8d2f-4deb-8a41-eeea506db8e0: zones at generation 2 + crucible 02acbe6a-1c88-47e3-94c3-94084cbde098 in service fd00:1122:3344:101::27 + crucible 07c3c805-8888-4fe5-9543-3d2479dbe6f3 in service fd00:1122:3344:101::26 + crucible 10d98a73-ec88-4aff-a7e8-7db6a87880e6 in service fd00:1122:3344:101::24 + crucible 2a455c35-eb3c-4c73-ab6c-d0a706e25316 in service fd00:1122:3344:101::29 + crucible 3eda924f-22a9-4f3e-9a1b-91d1c47601ab in service fd00:1122:3344:101::23 + crucible 587be699-a320-4c79-b320-128d9ecddc0b in service fd00:1122:3344:101::2b + crucible 6fa06115-4959-4913-8e7b-dd70d7651f07 in service fd00:1122:3344:101::2c + crucible 8f3a1cc5-9195-4a30-ad02-b804278fe639 in service fd00:1122:3344:101::28 + crucible a1696cd4-588c-484a-b95b-66e824c0ce05 in service fd00:1122:3344:101::25 + crucible a2079cbc-a69e-41a1-b1e0-fbcb972d03f6 in service fd00:1122:3344:101::2a + internal_ntp 08c7f8aa-1ea9-469b-8cac-2fdbfc11ebcb in service fd00:1122:3344:101::21 + nexus c66ab6d5-ff7a-46d1-9fd0-70cefa352d25 in service fd00:1122:3344:101::22 + + sled 590e3034-d946-4166-b0e5-2d0034197a07: zones at generation 2 + crucible 18f8fe40-646e-4962-b17a-20e201f3a6e5 in service fd00:1122:3344:102::2a + crucible 56d5d7cf-db2c-40a3-a775-003241ad4820 in service fd00:1122:3344:102::29 + crucible 6af7f4d6-33b6-4eb3-a146-d8e9e4ae9d66 in service fd00:1122:3344:102::2b + crucible 7a9f60d3-2b66-4547-9b63-7d4f7a8b6382 in service fd00:1122:3344:102::26 + crucible 93f2f40c-5616-4d8d-8519-ec6debdcede0 in service fd00:1122:3344:102::2c + crucible ab7ba6df-d401-40bd-940e-faf57c57aa2a in service fd00:1122:3344:102::28 + crucible af322036-371f-437c-8c08-7f40f3f1403b in service fd00:1122:3344:102::23 + crucible d637264f-6f40-44c2-8b7e-a179430210d2 in service fd00:1122:3344:102::25 + crucible dce226c9-7373-4bfa-8a94-79dc472857a6 in service fd00:1122:3344:102::27 + crucible edabedf3-839c-488d-ad6f-508ffa864674 in service fd00:1122:3344:102::24 + internal_ntp 47199d48-534c-4267-a654-d2d90e64b498 in service fd00:1122:3344:102::21 + nexus 704e1fed-f8d6-4cfa-a470-bad27fdc06d1 in service fd00:1122:3344:102::22 + + MODIFIED SLEDS: + +* sled b59ec570-2abb-4017-80ce-129d94e7a025: zones at generation: 2 -> 3 + internal_ntp 2d73d30e-ca47-46a8-9c12-917d4ab824b6 in service fd00:1122:3344:104::21 ++ crucible 1a20ee3c-f66e-4fca-ab85-2a248aa3d79d in service fd00:1122:3344:104::2b added ++ crucible 28852beb-d0e5-4cba-9adb-e7f0cd4bb864 in service fd00:1122:3344:104::29 added ++ crucible 45556184-7092-4a3d-873f-637976bb133b in service fd00:1122:3344:104::22 added ++ crucible 8215bf7a-10d6-4f40-aeb7-27a196307c37 in service fd00:1122:3344:104::25 added ++ crucible 9d75abfe-47ab-434a-93dd-af50dc0dddde in service fd00:1122:3344:104::23 added ++ crucible a36d291c-7f68-462f-830e-bc29e5841ce2 in service fd00:1122:3344:104::27 added ++ crucible b3a4d434-aaee-4752-8c99-69d88fbcb8c5 in service fd00:1122:3344:104::2a added ++ crucible cf5b636b-a505-4db6-bc32-baf9f53f4371 in service fd00:1122:3344:104::28 added ++ crucible f6125d45-b9cc-4721-ba60-ed4dbb177e41 in service fd00:1122:3344:104::26 added ++ crucible f86e19d2-9145-41cf-be89-6aaa34a73873 in service fd00:1122:3344:104::24 added + + METADATA: + internal DNS version: 1 (unchanged) + external DNS version: 1 (unchanged) diff --git a/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_1_2.txt b/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_1_2.txt index 17d3db6228..380beaecf5 100644 --- a/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_1_2.txt +++ b/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_1_2.txt @@ -1,86 +1,95 @@ -diff blueprint 55502b1b-e255-438b-a16a-2680a4b5f962 blueprint 9f71f5d3-a272-4382-9154-6ea2e171a6c6 ---- blueprint 55502b1b-e255-438b-a16a-2680a4b5f962 -+++ blueprint 9f71f5d3-a272-4382-9154-6ea2e171a6c6 - sled 2d1cb4f2-cf44-40fc-b118-85036eb732a9 - zone config generation 2 - 19fbc4f8-a683-4f22-8f5a-e74782b935be in service crucible [underlay IP fd00:1122:3344:105::26] (unchanged) - 4f1ce8a2-d3a5-4a38-be4c-9817de52db37 in service crucible [underlay IP fd00:1122:3344:105::2c] (unchanged) - 6b53ab2e-d98c-485f-87a3-4d5df595390f in service crucible [underlay IP fd00:1122:3344:105::27] (unchanged) - 6dff7633-66bb-4924-a6ff-2c896e66964b in service nexus [underlay IP fd00:1122:3344:105::22] (unchanged) - 7f4e9f9f-08f8-4d14-885d-e977c05525ad in service internal_ntp [underlay IP fd00:1122:3344:105::21] (unchanged) - 93b137a1-a1d6-4b5b-b2cb-21a9f11e2883 in service crucible [underlay IP fd00:1122:3344:105::23] (unchanged) - 9f0abbad-dbd3-4d43-9675-78092217ffd9 in service crucible [underlay IP fd00:1122:3344:105::25] (unchanged) - b0c63f48-01ea-4aae-bb26-fb0dd59d1662 in service crucible [underlay IP fd00:1122:3344:105::28] (unchanged) - c406da50-34b9-4bb4-a460-8f49875d2a6a in service crucible [underlay IP fd00:1122:3344:105::24] (unchanged) - d660d7ed-28c0-45ae-9ace-dc3ecf7e8786 in service crucible [underlay IP fd00:1122:3344:105::2a] (unchanged) - e98cc0de-abf6-4da4-a20d-d05c7a9bb1d7 in service crucible [underlay IP fd00:1122:3344:105::2b] (unchanged) - f55e6aaf-e8fc-4913-9e3c-8cd1bd4bdad3 in service crucible [underlay IP fd00:1122:3344:105::29] (unchanged) - sled 48d95fef-bc9f-4f50-9a53-1e075836291d - zone config generation 2 - 094f27af-1acb-4d1e-ba97-1fc1377d4bf2 in service crucible [underlay IP fd00:1122:3344:103::2c] (unchanged) - 0dcfdfc5-481e-4153-b97c-11cf02b648ea in service crucible [underlay IP fd00:1122:3344:103::25] (unchanged) - 2aa0ea4f-3561-4989-a98c-9ab7d9a240fb in service nexus [underlay IP fd00:1122:3344:103::22] (unchanged) - 2f5e8010-a94d-43a4-9c5c-3f52832f5f7f in service crucible [underlay IP fd00:1122:3344:103::27] (unchanged) - 4a9a0a9d-87f0-4f1d-9181-27f6b435e637 in service crucible [underlay IP fd00:1122:3344:103::28] (unchanged) - 56ac1706-9e2a-49ba-bd6f-a99c44cb2ccb in service crucible [underlay IP fd00:1122:3344:103::24] (unchanged) - 67622d61-2df4-414d-aa0e-d1277265f405 in service crucible [underlay IP fd00:1122:3344:103::23] (unchanged) - 67d913e0-0005-4599-9b28-0abbf6cc2916 in service internal_ntp [underlay IP fd00:1122:3344:103::21] (unchanged) - b91b271d-8d80-4f49-99a0-34006ae86063 in service crucible [underlay IP fd00:1122:3344:103::2a] (unchanged) - d6ee1338-3127-43ec-9aaa-b973ccf05496 in service crucible [underlay IP fd00:1122:3344:103::26] (unchanged) - e39d7c9e-182b-48af-af87-58079d723583 in service crucible [underlay IP fd00:1122:3344:103::29] (unchanged) - f69f92a1-5007-4bb0-a85b-604dc217154b in service crucible [underlay IP fd00:1122:3344:103::2b] (unchanged) - sled 68d24ac5-f341-49ea-a92a-0381b52ab387 - zone config generation 2 - 01d58626-e1b0-480f-96be-ac784863c7dc in service nexus [underlay IP fd00:1122:3344:102::22] (unchanged) - 3b3c14b6-a8e2-4054-a577-8d96cb576230 in service crucible [underlay IP fd00:1122:3344:102::2c] (unchanged) - 47a87c6e-ef45-4d52-9a3e-69cdd96737cc in service crucible [underlay IP fd00:1122:3344:102::23] (unchanged) - 6464d025-4652-4948-919e-740bec5699b1 in service crucible [underlay IP fd00:1122:3344:102::24] (unchanged) - 6939ce48-b17c-4616-b176-8a419a7697be in service crucible [underlay IP fd00:1122:3344:102::29] (unchanged) - 878dfddd-3113-4197-a3ea-e0d4dbe9b476 in service crucible [underlay IP fd00:1122:3344:102::25] (unchanged) - 8d4d2b28-82bb-4e36-80da-1408d8c35d82 in service crucible [underlay IP fd00:1122:3344:102::2b] (unchanged) - 9fd52961-426f-4e62-a644-b70871103fca in service crucible [underlay IP fd00:1122:3344:102::26] (unchanged) - b44cdbc0-0ce0-46eb-8b21-a09e113aa1d0 in service crucible [underlay IP fd00:1122:3344:102::27] (unchanged) - b6b759d0-f60d-42b7-bbbc-9d61c9e895a9 in service crucible [underlay IP fd00:1122:3344:102::28] (unchanged) - c407795c-6c8b-428e-8ab8-b962913c447f in service crucible [underlay IP fd00:1122:3344:102::2a] (unchanged) - f3f2e4f3-0985-4ef6-8336-ce479382d05d in service internal_ntp [underlay IP fd00:1122:3344:102::21] (unchanged) - sled 75bc286f-2b4b-482c-9431-59272af529da -- zone config generation 2 -+ zone config generation 3 - 15bb9def-69b8-4d2e-b04f-9fee1143387c in service crucible [underlay IP fd00:1122:3344:104::25] (unchanged) - 23a8fa2b-ef3e-4017-a43f-f7a83953bd7c in service crucible [underlay IP fd00:1122:3344:104::2c] (unchanged) - 57b96d5c-b71e-43e4-8869-7d514003d00d in service internal_ntp [underlay IP fd00:1122:3344:104::21] (unchanged) - 621509d6-3772-4009-aca1-35eefd1098fb in service crucible [underlay IP fd00:1122:3344:104::28] (unchanged) - 85b8c68a-160d-461d-94dd-1baf175fa75c in service crucible [underlay IP fd00:1122:3344:104::2a] (unchanged) - 996d7570-b0df-46d5-aaa4-0c97697cf484 in service crucible [underlay IP fd00:1122:3344:104::26] (unchanged) - a732c489-d29a-4f75-b900-5966385943af in service crucible [underlay IP fd00:1122:3344:104::29] (unchanged) - b1783e95-9598-451d-b6ba-c50b52b428c3 in service crucible [underlay IP fd00:1122:3344:104::24] (unchanged) - b4947d31-f70e-4ee0-8817-0ca6cea9b16b in service nexus [underlay IP fd00:1122:3344:104::22] (unchanged) - c6dd531e-2d1d-423b-acc8-358533dab78c in service crucible [underlay IP fd00:1122:3344:104::27] (unchanged) - e4b3e159-3dbe-48cb-8497-e3da92a90e5a in service crucible [underlay IP fd00:1122:3344:104::23] (unchanged) - f0ff59e8-4105-4980-a4bb-a1f4c58de1e3 in service crucible [underlay IP fd00:1122:3344:104::2b] (unchanged) -+ 2ec75441-3d7d-4b4b-9614-af03de5a3666 in service nexus [underlay IP fd00:1122:3344:104::2d] (added) -+ 3ca5292f-8a59-4475-bb72-0f43714d0fff in service nexus [underlay IP fd00:1122:3344:104::31] (added) -+ 508abd03-cbfe-4654-9a6d-7f15a1ad32e5 in service nexus [underlay IP fd00:1122:3344:104::2e] (added) -+ 59950bc8-1497-44dd-8cbf-b6502ba921b2 in service nexus [underlay IP fd00:1122:3344:104::2f] (added) -+ 99f6d544-8599-4e2b-a55a-82d9e0034662 in service nexus [underlay IP fd00:1122:3344:104::30] (added) -+ c26b3bda-5561-44a1-a69f-22103fe209a1 in service nexus [underlay IP fd00:1122:3344:104::32] (added) - sled affab35f-600a-4109-8ea0-34a067a4e0bc -- zone config generation 2 -+ zone config generation 3 - 0dfbf374-9ef9-430f-b06d-f271bf7f84c4 in service crucible [underlay IP fd00:1122:3344:101::27] (unchanged) - 15c103f0-ac63-423b-ba5d-1b5fcd563ba3 in service nexus [underlay IP fd00:1122:3344:101::22] (unchanged) - 3aa07966-5899-4789-ace5-f8eeb375c6c3 in service crucible [underlay IP fd00:1122:3344:101::24] (unchanged) - 4ad0e9da-08f8-4d40-b4d3-d17e711b5bbf in service crucible [underlay IP fd00:1122:3344:101::29] (unchanged) - 72c5a909-077d-4ec1-a9d5-ae64ef9d716e in service crucible [underlay IP fd00:1122:3344:101::26] (unchanged) - 95482c25-1e7f-43e8-adf1-e3548a1b3ae0 in service crucible [underlay IP fd00:1122:3344:101::23] (unchanged) - a1c03689-fc62-4ea5-bb72-4d01f5138614 in service crucible [underlay IP fd00:1122:3344:101::2a] (unchanged) - a568e92e-4fbd-4b69-acd8-f16277073031 in service crucible [underlay IP fd00:1122:3344:101::2c] (unchanged) - bf79a56a-97af-4cc4-94a5-8b20d64c2cda in service crucible [underlay IP fd00:1122:3344:101::28] (unchanged) - c60379ba-4e30-4628-a79a-0ae509aef4c5 in service crucible [underlay IP fd00:1122:3344:101::25] (unchanged) - d47f4996-fac0-4657-bcea-01b1fee6404d in service crucible [underlay IP fd00:1122:3344:101::2b] (unchanged) - f1a7b9a7-fc6a-4b23-b829-045ff33117ff in service internal_ntp [underlay IP fd00:1122:3344:101::21] (unchanged) -+ 6f86d5cb-17d7-424b-9d4c-39f670532cbe in service nexus [underlay IP fd00:1122:3344:101::2e] (added) -+ 87c299eb-470e-4b6d-b8c7-6759694e66b6 in service nexus [underlay IP fd00:1122:3344:101::30] (added) -+ c72b7930-0580-4f00-93b9-8cba2c8d344e in service nexus [underlay IP fd00:1122:3344:101::2d] (added) -+ d0095508-bdb8-4faf-b091-964276a20b15 in service nexus [underlay IP fd00:1122:3344:101::31] (added) -+ ff422442-4b31-4ade-a11a-9e5a25f0404c in service nexus [underlay IP fd00:1122:3344:101::2f] (added) +from: blueprint 55502b1b-e255-438b-a16a-2680a4b5f962 +to: blueprint 9f71f5d3-a272-4382-9154-6ea2e171a6c6 + + ------------------------------------------------------------------------------------------------------ + zone type zone ID disposition underlay IP status + ------------------------------------------------------------------------------------------------------ + + UNCHANGED SLEDS: + + sled 2d1cb4f2-cf44-40fc-b118-85036eb732a9: zones at generation 2 + crucible 19fbc4f8-a683-4f22-8f5a-e74782b935be in service fd00:1122:3344:105::26 + crucible 4f1ce8a2-d3a5-4a38-be4c-9817de52db37 in service fd00:1122:3344:105::2c + crucible 6b53ab2e-d98c-485f-87a3-4d5df595390f in service fd00:1122:3344:105::27 + crucible 93b137a1-a1d6-4b5b-b2cb-21a9f11e2883 in service fd00:1122:3344:105::23 + crucible 9f0abbad-dbd3-4d43-9675-78092217ffd9 in service fd00:1122:3344:105::25 + crucible b0c63f48-01ea-4aae-bb26-fb0dd59d1662 in service fd00:1122:3344:105::28 + crucible c406da50-34b9-4bb4-a460-8f49875d2a6a in service fd00:1122:3344:105::24 + crucible d660d7ed-28c0-45ae-9ace-dc3ecf7e8786 in service fd00:1122:3344:105::2a + crucible e98cc0de-abf6-4da4-a20d-d05c7a9bb1d7 in service fd00:1122:3344:105::2b + crucible f55e6aaf-e8fc-4913-9e3c-8cd1bd4bdad3 in service fd00:1122:3344:105::29 + internal_ntp 7f4e9f9f-08f8-4d14-885d-e977c05525ad in service fd00:1122:3344:105::21 + nexus 6dff7633-66bb-4924-a6ff-2c896e66964b in service fd00:1122:3344:105::22 + + sled 48d95fef-bc9f-4f50-9a53-1e075836291d: zones at generation 2 + crucible 094f27af-1acb-4d1e-ba97-1fc1377d4bf2 in service fd00:1122:3344:103::2c + crucible 0dcfdfc5-481e-4153-b97c-11cf02b648ea in service fd00:1122:3344:103::25 + crucible 2f5e8010-a94d-43a4-9c5c-3f52832f5f7f in service fd00:1122:3344:103::27 + crucible 4a9a0a9d-87f0-4f1d-9181-27f6b435e637 in service fd00:1122:3344:103::28 + crucible 56ac1706-9e2a-49ba-bd6f-a99c44cb2ccb in service fd00:1122:3344:103::24 + crucible 67622d61-2df4-414d-aa0e-d1277265f405 in service fd00:1122:3344:103::23 + crucible b91b271d-8d80-4f49-99a0-34006ae86063 in service fd00:1122:3344:103::2a + crucible d6ee1338-3127-43ec-9aaa-b973ccf05496 in service fd00:1122:3344:103::26 + crucible e39d7c9e-182b-48af-af87-58079d723583 in service fd00:1122:3344:103::29 + crucible f69f92a1-5007-4bb0-a85b-604dc217154b in service fd00:1122:3344:103::2b + internal_ntp 67d913e0-0005-4599-9b28-0abbf6cc2916 in service fd00:1122:3344:103::21 + nexus 2aa0ea4f-3561-4989-a98c-9ab7d9a240fb in service fd00:1122:3344:103::22 + + sled 68d24ac5-f341-49ea-a92a-0381b52ab387: zones at generation 2 + crucible 3b3c14b6-a8e2-4054-a577-8d96cb576230 in service fd00:1122:3344:102::2c + crucible 47a87c6e-ef45-4d52-9a3e-69cdd96737cc in service fd00:1122:3344:102::23 + crucible 6464d025-4652-4948-919e-740bec5699b1 in service fd00:1122:3344:102::24 + crucible 6939ce48-b17c-4616-b176-8a419a7697be in service fd00:1122:3344:102::29 + crucible 878dfddd-3113-4197-a3ea-e0d4dbe9b476 in service fd00:1122:3344:102::25 + crucible 8d4d2b28-82bb-4e36-80da-1408d8c35d82 in service fd00:1122:3344:102::2b + crucible 9fd52961-426f-4e62-a644-b70871103fca in service fd00:1122:3344:102::26 + crucible b44cdbc0-0ce0-46eb-8b21-a09e113aa1d0 in service fd00:1122:3344:102::27 + crucible b6b759d0-f60d-42b7-bbbc-9d61c9e895a9 in service fd00:1122:3344:102::28 + crucible c407795c-6c8b-428e-8ab8-b962913c447f in service fd00:1122:3344:102::2a + internal_ntp f3f2e4f3-0985-4ef6-8336-ce479382d05d in service fd00:1122:3344:102::21 + nexus 01d58626-e1b0-480f-96be-ac784863c7dc in service fd00:1122:3344:102::22 + + MODIFIED SLEDS: + +* sled 75bc286f-2b4b-482c-9431-59272af529da: zones at generation: 2 -> 3 + crucible 15bb9def-69b8-4d2e-b04f-9fee1143387c in service fd00:1122:3344:104::25 + crucible 23a8fa2b-ef3e-4017-a43f-f7a83953bd7c in service fd00:1122:3344:104::2c + crucible 621509d6-3772-4009-aca1-35eefd1098fb in service fd00:1122:3344:104::28 + crucible 85b8c68a-160d-461d-94dd-1baf175fa75c in service fd00:1122:3344:104::2a + crucible 996d7570-b0df-46d5-aaa4-0c97697cf484 in service fd00:1122:3344:104::26 + crucible a732c489-d29a-4f75-b900-5966385943af in service fd00:1122:3344:104::29 + crucible b1783e95-9598-451d-b6ba-c50b52b428c3 in service fd00:1122:3344:104::24 + crucible c6dd531e-2d1d-423b-acc8-358533dab78c in service fd00:1122:3344:104::27 + crucible e4b3e159-3dbe-48cb-8497-e3da92a90e5a in service fd00:1122:3344:104::23 + crucible f0ff59e8-4105-4980-a4bb-a1f4c58de1e3 in service fd00:1122:3344:104::2b + internal_ntp 57b96d5c-b71e-43e4-8869-7d514003d00d in service fd00:1122:3344:104::21 + nexus b4947d31-f70e-4ee0-8817-0ca6cea9b16b in service fd00:1122:3344:104::22 ++ nexus 2ec75441-3d7d-4b4b-9614-af03de5a3666 in service fd00:1122:3344:104::2d added ++ nexus 3ca5292f-8a59-4475-bb72-0f43714d0fff in service fd00:1122:3344:104::31 added ++ nexus 508abd03-cbfe-4654-9a6d-7f15a1ad32e5 in service fd00:1122:3344:104::2e added ++ nexus 59950bc8-1497-44dd-8cbf-b6502ba921b2 in service fd00:1122:3344:104::2f added ++ nexus 99f6d544-8599-4e2b-a55a-82d9e0034662 in service fd00:1122:3344:104::30 added ++ nexus c26b3bda-5561-44a1-a69f-22103fe209a1 in service fd00:1122:3344:104::32 added + +* sled affab35f-600a-4109-8ea0-34a067a4e0bc: zones at generation: 2 -> 3 + crucible 0dfbf374-9ef9-430f-b06d-f271bf7f84c4 in service fd00:1122:3344:101::27 + crucible 3aa07966-5899-4789-ace5-f8eeb375c6c3 in service fd00:1122:3344:101::24 + crucible 4ad0e9da-08f8-4d40-b4d3-d17e711b5bbf in service fd00:1122:3344:101::29 + crucible 72c5a909-077d-4ec1-a9d5-ae64ef9d716e in service fd00:1122:3344:101::26 + crucible 95482c25-1e7f-43e8-adf1-e3548a1b3ae0 in service fd00:1122:3344:101::23 + crucible a1c03689-fc62-4ea5-bb72-4d01f5138614 in service fd00:1122:3344:101::2a + crucible a568e92e-4fbd-4b69-acd8-f16277073031 in service fd00:1122:3344:101::2c + crucible bf79a56a-97af-4cc4-94a5-8b20d64c2cda in service fd00:1122:3344:101::28 + crucible c60379ba-4e30-4628-a79a-0ae509aef4c5 in service fd00:1122:3344:101::25 + crucible d47f4996-fac0-4657-bcea-01b1fee6404d in service fd00:1122:3344:101::2b + internal_ntp f1a7b9a7-fc6a-4b23-b829-045ff33117ff in service fd00:1122:3344:101::21 + nexus 15c103f0-ac63-423b-ba5d-1b5fcd563ba3 in service fd00:1122:3344:101::22 ++ nexus 6f86d5cb-17d7-424b-9d4c-39f670532cbe in service fd00:1122:3344:101::2e added ++ nexus 87c299eb-470e-4b6d-b8c7-6759694e66b6 in service fd00:1122:3344:101::30 added ++ nexus c72b7930-0580-4f00-93b9-8cba2c8d344e in service fd00:1122:3344:101::2d added ++ nexus d0095508-bdb8-4faf-b091-964276a20b15 in service fd00:1122:3344:101::31 added ++ nexus ff422442-4b31-4ade-a11a-9e5a25f0404c in service fd00:1122:3344:101::2f added + + METADATA: + internal DNS version: 1 (unchanged) + external DNS version: 1 (unchanged) diff --git a/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_2_2a.txt b/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_2_2a.txt new file mode 100644 index 0000000000..58fbbd26be --- /dev/null +++ b/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_2_2a.txt @@ -0,0 +1,104 @@ +from: blueprint 9f71f5d3-a272-4382-9154-6ea2e171a6c6 +to: blueprint 9f71f5d3-a272-4382-9154-6ea2e171a6c6 + + -------------------------------------------------------------------------------------------------------- + zone type zone ID disposition underlay IP status + -------------------------------------------------------------------------------------------------------- + + UNCHANGED SLEDS: + + sled 75bc286f-2b4b-482c-9431-59272af529da: zones at generation 3 + crucible 15bb9def-69b8-4d2e-b04f-9fee1143387c in service fd00:1122:3344:104::25 + crucible 23a8fa2b-ef3e-4017-a43f-f7a83953bd7c in service fd00:1122:3344:104::2c + crucible 621509d6-3772-4009-aca1-35eefd1098fb in service fd00:1122:3344:104::28 + crucible 85b8c68a-160d-461d-94dd-1baf175fa75c in service fd00:1122:3344:104::2a + crucible 996d7570-b0df-46d5-aaa4-0c97697cf484 in service fd00:1122:3344:104::26 + crucible a732c489-d29a-4f75-b900-5966385943af in service fd00:1122:3344:104::29 + crucible b1783e95-9598-451d-b6ba-c50b52b428c3 in service fd00:1122:3344:104::24 + crucible c6dd531e-2d1d-423b-acc8-358533dab78c in service fd00:1122:3344:104::27 + crucible e4b3e159-3dbe-48cb-8497-e3da92a90e5a in service fd00:1122:3344:104::23 + crucible f0ff59e8-4105-4980-a4bb-a1f4c58de1e3 in service fd00:1122:3344:104::2b + internal_ntp 57b96d5c-b71e-43e4-8869-7d514003d00d in service fd00:1122:3344:104::21 + nexus 2ec75441-3d7d-4b4b-9614-af03de5a3666 in service fd00:1122:3344:104::2d + nexus 3ca5292f-8a59-4475-bb72-0f43714d0fff in service fd00:1122:3344:104::31 + nexus 508abd03-cbfe-4654-9a6d-7f15a1ad32e5 in service fd00:1122:3344:104::2e + nexus 59950bc8-1497-44dd-8cbf-b6502ba921b2 in service fd00:1122:3344:104::2f + nexus 99f6d544-8599-4e2b-a55a-82d9e0034662 in service fd00:1122:3344:104::30 + nexus b4947d31-f70e-4ee0-8817-0ca6cea9b16b in service fd00:1122:3344:104::22 + nexus c26b3bda-5561-44a1-a69f-22103fe209a1 in service fd00:1122:3344:104::32 + + sled affab35f-600a-4109-8ea0-34a067a4e0bc: zones at generation 3 + crucible 0dfbf374-9ef9-430f-b06d-f271bf7f84c4 in service fd00:1122:3344:101::27 + crucible 3aa07966-5899-4789-ace5-f8eeb375c6c3 in service fd00:1122:3344:101::24 + crucible 4ad0e9da-08f8-4d40-b4d3-d17e711b5bbf in service fd00:1122:3344:101::29 + crucible 72c5a909-077d-4ec1-a9d5-ae64ef9d716e in service fd00:1122:3344:101::26 + crucible 95482c25-1e7f-43e8-adf1-e3548a1b3ae0 in service fd00:1122:3344:101::23 + crucible a1c03689-fc62-4ea5-bb72-4d01f5138614 in service fd00:1122:3344:101::2a + crucible a568e92e-4fbd-4b69-acd8-f16277073031 in service fd00:1122:3344:101::2c + crucible bf79a56a-97af-4cc4-94a5-8b20d64c2cda in service fd00:1122:3344:101::28 + crucible c60379ba-4e30-4628-a79a-0ae509aef4c5 in service fd00:1122:3344:101::25 + crucible d47f4996-fac0-4657-bcea-01b1fee6404d in service fd00:1122:3344:101::2b + internal_ntp f1a7b9a7-fc6a-4b23-b829-045ff33117ff in service fd00:1122:3344:101::21 + nexus 15c103f0-ac63-423b-ba5d-1b5fcd563ba3 in service fd00:1122:3344:101::22 + nexus 6f86d5cb-17d7-424b-9d4c-39f670532cbe in service fd00:1122:3344:101::2e + nexus 87c299eb-470e-4b6d-b8c7-6759694e66b6 in service fd00:1122:3344:101::30 + nexus c72b7930-0580-4f00-93b9-8cba2c8d344e in service fd00:1122:3344:101::2d + nexus d0095508-bdb8-4faf-b091-964276a20b15 in service fd00:1122:3344:101::31 + nexus ff422442-4b31-4ade-a11a-9e5a25f0404c in service fd00:1122:3344:101::2f + + REMOVED SLEDS: + +- sled 68d24ac5-f341-49ea-a92a-0381b52ab387: zones at generation 2 +- crucible 3b3c14b6-a8e2-4054-a577-8d96cb576230 in service fd00:1122:3344:102::2c removed +- crucible 47a87c6e-ef45-4d52-9a3e-69cdd96737cc in service fd00:1122:3344:102::23 removed +- crucible 6464d025-4652-4948-919e-740bec5699b1 in service fd00:1122:3344:102::24 removed +- crucible 6939ce48-b17c-4616-b176-8a419a7697be in service fd00:1122:3344:102::29 removed +- crucible 878dfddd-3113-4197-a3ea-e0d4dbe9b476 in service fd00:1122:3344:102::25 removed +- crucible 8d4d2b28-82bb-4e36-80da-1408d8c35d82 in service fd00:1122:3344:102::2b removed +- crucible 9fd52961-426f-4e62-a644-b70871103fca in service fd00:1122:3344:102::26 removed +- crucible b44cdbc0-0ce0-46eb-8b21-a09e113aa1d0 in service fd00:1122:3344:102::27 removed +- crucible b6b759d0-f60d-42b7-bbbc-9d61c9e895a9 in service fd00:1122:3344:102::28 removed +- crucible c407795c-6c8b-428e-8ab8-b962913c447f in service fd00:1122:3344:102::2a removed +- internal_ntp f3f2e4f3-0985-4ef6-8336-ce479382d05d in service fd00:1122:3344:102::21 removed +- nexus 01d58626-e1b0-480f-96be-ac784863c7dc in service fd00:1122:3344:102::22 removed + + MODIFIED SLEDS: + +* sled 2d1cb4f2-cf44-40fc-b118-85036eb732a9: zones at generation: 2 +! warning: generation should have changed + crucible 6b53ab2e-d98c-485f-87a3-4d5df595390f in service fd00:1122:3344:105::27 + crucible 93b137a1-a1d6-4b5b-b2cb-21a9f11e2883 in service fd00:1122:3344:105::23 + crucible 9f0abbad-dbd3-4d43-9675-78092217ffd9 in service fd00:1122:3344:105::25 + crucible b0c63f48-01ea-4aae-bb26-fb0dd59d1662 in service fd00:1122:3344:105::28 + crucible c406da50-34b9-4bb4-a460-8f49875d2a6a in service fd00:1122:3344:105::24 + crucible d660d7ed-28c0-45ae-9ace-dc3ecf7e8786 in service fd00:1122:3344:105::2a + crucible e98cc0de-abf6-4da4-a20d-d05c7a9bb1d7 in service fd00:1122:3344:105::2b + crucible f55e6aaf-e8fc-4913-9e3c-8cd1bd4bdad3 in service fd00:1122:3344:105::29 +- crucible 4f1ce8a2-d3a5-4a38-be4c-9817de52db37 in service fd00:1122:3344:105::2c removed +- crucible 19fbc4f8-a683-4f22-8f5a-e74782b935be in service fd00:1122:3344:105::26 modified ++ ├─ quiesced fd00:1122:3344:105::26 +* └─ changed: disposition +- internal_ntp 7f4e9f9f-08f8-4d14-885d-e977c05525ad in service fd00:1122:3344:105::21 modified ++ ├─ in service fd01:1122:3344:105::21 +* └─ changed: underlay IP +- nexus 6dff7633-66bb-4924-a6ff-2c896e66964b in service fd00:1122:3344:105::22 modified ++ ├─ in service fd00:1122:3344:105::22 +* └─ changed: zone type config + +* sled 48d95fef-bc9f-4f50-9a53-1e075836291d: zones at generation: 2 -> 3 +- crucible 094f27af-1acb-4d1e-ba97-1fc1377d4bf2 in service fd00:1122:3344:103::2c removed +- crucible 0dcfdfc5-481e-4153-b97c-11cf02b648ea in service fd00:1122:3344:103::25 removed +- crucible 2f5e8010-a94d-43a4-9c5c-3f52832f5f7f in service fd00:1122:3344:103::27 removed +- crucible 4a9a0a9d-87f0-4f1d-9181-27f6b435e637 in service fd00:1122:3344:103::28 removed +- crucible 56ac1706-9e2a-49ba-bd6f-a99c44cb2ccb in service fd00:1122:3344:103::24 removed +- crucible 67622d61-2df4-414d-aa0e-d1277265f405 in service fd00:1122:3344:103::23 removed +- crucible b91b271d-8d80-4f49-99a0-34006ae86063 in service fd00:1122:3344:103::2a removed +- crucible d6ee1338-3127-43ec-9aaa-b973ccf05496 in service fd00:1122:3344:103::26 removed +- crucible e39d7c9e-182b-48af-af87-58079d723583 in service fd00:1122:3344:103::29 removed +- crucible f69f92a1-5007-4bb0-a85b-604dc217154b in service fd00:1122:3344:103::2b removed +- internal_ntp 67d913e0-0005-4599-9b28-0abbf6cc2916 in service fd00:1122:3344:103::21 removed +- nexus 2aa0ea4f-3561-4989-a98c-9ab7d9a240fb in service fd00:1122:3344:103::22 removed + + METADATA: + internal DNS version: 1 (unchanged) +* external DNS version: 1 -> 2 diff --git a/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_bp2.txt b/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_bp2.txt new file mode 100644 index 0000000000..46920c47f3 --- /dev/null +++ b/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_bp2.txt @@ -0,0 +1,94 @@ +blueprint 9f71f5d3-a272-4382-9154-6ea2e171a6c6 +parent: 55502b1b-e255-438b-a16a-2680a4b5f962 + + -------------------------------------------------------------------------------------------- + zone type zone ID disposition underlay IP + -------------------------------------------------------------------------------------------- + + sled 2d1cb4f2-cf44-40fc-b118-85036eb732a9: zones at generation 2 + crucible 19fbc4f8-a683-4f22-8f5a-e74782b935be in service fd00:1122:3344:105::26 + crucible 4f1ce8a2-d3a5-4a38-be4c-9817de52db37 in service fd00:1122:3344:105::2c + crucible 6b53ab2e-d98c-485f-87a3-4d5df595390f in service fd00:1122:3344:105::27 + crucible 93b137a1-a1d6-4b5b-b2cb-21a9f11e2883 in service fd00:1122:3344:105::23 + crucible 9f0abbad-dbd3-4d43-9675-78092217ffd9 in service fd00:1122:3344:105::25 + crucible b0c63f48-01ea-4aae-bb26-fb0dd59d1662 in service fd00:1122:3344:105::28 + crucible c406da50-34b9-4bb4-a460-8f49875d2a6a in service fd00:1122:3344:105::24 + crucible d660d7ed-28c0-45ae-9ace-dc3ecf7e8786 in service fd00:1122:3344:105::2a + crucible e98cc0de-abf6-4da4-a20d-d05c7a9bb1d7 in service fd00:1122:3344:105::2b + crucible f55e6aaf-e8fc-4913-9e3c-8cd1bd4bdad3 in service fd00:1122:3344:105::29 + internal_ntp 7f4e9f9f-08f8-4d14-885d-e977c05525ad in service fd00:1122:3344:105::21 + nexus 6dff7633-66bb-4924-a6ff-2c896e66964b in service fd00:1122:3344:105::22 + + sled 48d95fef-bc9f-4f50-9a53-1e075836291d: zones at generation 2 + crucible 094f27af-1acb-4d1e-ba97-1fc1377d4bf2 in service fd00:1122:3344:103::2c + crucible 0dcfdfc5-481e-4153-b97c-11cf02b648ea in service fd00:1122:3344:103::25 + crucible 2f5e8010-a94d-43a4-9c5c-3f52832f5f7f in service fd00:1122:3344:103::27 + crucible 4a9a0a9d-87f0-4f1d-9181-27f6b435e637 in service fd00:1122:3344:103::28 + crucible 56ac1706-9e2a-49ba-bd6f-a99c44cb2ccb in service fd00:1122:3344:103::24 + crucible 67622d61-2df4-414d-aa0e-d1277265f405 in service fd00:1122:3344:103::23 + crucible b91b271d-8d80-4f49-99a0-34006ae86063 in service fd00:1122:3344:103::2a + crucible d6ee1338-3127-43ec-9aaa-b973ccf05496 in service fd00:1122:3344:103::26 + crucible e39d7c9e-182b-48af-af87-58079d723583 in service fd00:1122:3344:103::29 + crucible f69f92a1-5007-4bb0-a85b-604dc217154b in service fd00:1122:3344:103::2b + internal_ntp 67d913e0-0005-4599-9b28-0abbf6cc2916 in service fd00:1122:3344:103::21 + nexus 2aa0ea4f-3561-4989-a98c-9ab7d9a240fb in service fd00:1122:3344:103::22 + + sled 68d24ac5-f341-49ea-a92a-0381b52ab387: zones at generation 2 + crucible 3b3c14b6-a8e2-4054-a577-8d96cb576230 in service fd00:1122:3344:102::2c + crucible 47a87c6e-ef45-4d52-9a3e-69cdd96737cc in service fd00:1122:3344:102::23 + crucible 6464d025-4652-4948-919e-740bec5699b1 in service fd00:1122:3344:102::24 + crucible 6939ce48-b17c-4616-b176-8a419a7697be in service fd00:1122:3344:102::29 + crucible 878dfddd-3113-4197-a3ea-e0d4dbe9b476 in service fd00:1122:3344:102::25 + crucible 8d4d2b28-82bb-4e36-80da-1408d8c35d82 in service fd00:1122:3344:102::2b + crucible 9fd52961-426f-4e62-a644-b70871103fca in service fd00:1122:3344:102::26 + crucible b44cdbc0-0ce0-46eb-8b21-a09e113aa1d0 in service fd00:1122:3344:102::27 + crucible b6b759d0-f60d-42b7-bbbc-9d61c9e895a9 in service fd00:1122:3344:102::28 + crucible c407795c-6c8b-428e-8ab8-b962913c447f in service fd00:1122:3344:102::2a + internal_ntp f3f2e4f3-0985-4ef6-8336-ce479382d05d in service fd00:1122:3344:102::21 + nexus 01d58626-e1b0-480f-96be-ac784863c7dc in service fd00:1122:3344:102::22 + + sled 75bc286f-2b4b-482c-9431-59272af529da: zones at generation 3 + crucible 15bb9def-69b8-4d2e-b04f-9fee1143387c in service fd00:1122:3344:104::25 + crucible 23a8fa2b-ef3e-4017-a43f-f7a83953bd7c in service fd00:1122:3344:104::2c + crucible 621509d6-3772-4009-aca1-35eefd1098fb in service fd00:1122:3344:104::28 + crucible 85b8c68a-160d-461d-94dd-1baf175fa75c in service fd00:1122:3344:104::2a + crucible 996d7570-b0df-46d5-aaa4-0c97697cf484 in service fd00:1122:3344:104::26 + crucible a732c489-d29a-4f75-b900-5966385943af in service fd00:1122:3344:104::29 + crucible b1783e95-9598-451d-b6ba-c50b52b428c3 in service fd00:1122:3344:104::24 + crucible c6dd531e-2d1d-423b-acc8-358533dab78c in service fd00:1122:3344:104::27 + crucible e4b3e159-3dbe-48cb-8497-e3da92a90e5a in service fd00:1122:3344:104::23 + crucible f0ff59e8-4105-4980-a4bb-a1f4c58de1e3 in service fd00:1122:3344:104::2b + internal_ntp 57b96d5c-b71e-43e4-8869-7d514003d00d in service fd00:1122:3344:104::21 + nexus 2ec75441-3d7d-4b4b-9614-af03de5a3666 in service fd00:1122:3344:104::2d + nexus 3ca5292f-8a59-4475-bb72-0f43714d0fff in service fd00:1122:3344:104::31 + nexus 508abd03-cbfe-4654-9a6d-7f15a1ad32e5 in service fd00:1122:3344:104::2e + nexus 59950bc8-1497-44dd-8cbf-b6502ba921b2 in service fd00:1122:3344:104::2f + nexus 99f6d544-8599-4e2b-a55a-82d9e0034662 in service fd00:1122:3344:104::30 + nexus b4947d31-f70e-4ee0-8817-0ca6cea9b16b in service fd00:1122:3344:104::22 + nexus c26b3bda-5561-44a1-a69f-22103fe209a1 in service fd00:1122:3344:104::32 + + sled affab35f-600a-4109-8ea0-34a067a4e0bc: zones at generation 3 + crucible 0dfbf374-9ef9-430f-b06d-f271bf7f84c4 in service fd00:1122:3344:101::27 + crucible 3aa07966-5899-4789-ace5-f8eeb375c6c3 in service fd00:1122:3344:101::24 + crucible 4ad0e9da-08f8-4d40-b4d3-d17e711b5bbf in service fd00:1122:3344:101::29 + crucible 72c5a909-077d-4ec1-a9d5-ae64ef9d716e in service fd00:1122:3344:101::26 + crucible 95482c25-1e7f-43e8-adf1-e3548a1b3ae0 in service fd00:1122:3344:101::23 + crucible a1c03689-fc62-4ea5-bb72-4d01f5138614 in service fd00:1122:3344:101::2a + crucible a568e92e-4fbd-4b69-acd8-f16277073031 in service fd00:1122:3344:101::2c + crucible bf79a56a-97af-4cc4-94a5-8b20d64c2cda in service fd00:1122:3344:101::28 + crucible c60379ba-4e30-4628-a79a-0ae509aef4c5 in service fd00:1122:3344:101::25 + crucible d47f4996-fac0-4657-bcea-01b1fee6404d in service fd00:1122:3344:101::2b + internal_ntp f1a7b9a7-fc6a-4b23-b829-045ff33117ff in service fd00:1122:3344:101::21 + nexus 15c103f0-ac63-423b-ba5d-1b5fcd563ba3 in service fd00:1122:3344:101::22 + nexus 6f86d5cb-17d7-424b-9d4c-39f670532cbe in service fd00:1122:3344:101::2e + nexus 87c299eb-470e-4b6d-b8c7-6759694e66b6 in service fd00:1122:3344:101::30 + nexus c72b7930-0580-4f00-93b9-8cba2c8d344e in service fd00:1122:3344:101::2d + nexus d0095508-bdb8-4faf-b091-964276a20b15 in service fd00:1122:3344:101::31 + nexus ff422442-4b31-4ade-a11a-9e5a25f0404c in service fd00:1122:3344:101::2f + +METADATA: + created by: test_blueprint2 + created at: 1970-01-01T00:00:00.000Z + comment: (none) + internal DNS version: 1 + external DNS version: 1 diff --git a/nexus/types/Cargo.toml b/nexus/types/Cargo.toml index ecc180b6db..aff45d07de 100644 --- a/nexus/types/Cargo.toml +++ b/nexus/types/Cargo.toml @@ -19,6 +19,7 @@ serde_json.workspace = true serde_with.workspace = true steno.workspace = true strum.workspace = true +tabled.workspace = true thiserror.workspace = true uuid.workspace = true diff --git a/nexus/types/src/deployment.rs b/nexus/types/src/deployment.rs index 22eb6b7dbc..4c4f3823c6 100644 --- a/nexus/types/src/deployment.rs +++ b/nexus/types/src/deployment.rs @@ -29,11 +29,14 @@ use omicron_common::api::external::Generation; use schemars::JsonSchema; use serde::Deserialize; use serde::Serialize; +use sled_agent_client::ZoneKind; use std::collections::BTreeMap; use std::collections::BTreeSet; +use std::collections::HashMap; use std::fmt; use strum::EnumIter; use strum::IntoEnumIterator; +use thiserror::Error; use uuid::Uuid; /// Fleet-wide deployment policy @@ -170,6 +173,19 @@ pub struct Blueprint { } impl Blueprint { + /// Return metadata for this blueprint. + pub fn metadata(&self) -> BlueprintMetadata { + BlueprintMetadata { + id: self.id, + parent_blueprint_id: self.parent_blueprint_id, + internal_dns_version: self.internal_dns_version, + external_dns_version: self.external_dns_version, + time_created: self.time_created, + creator: self.creator.clone(), + comment: self.comment.clone(), + } + } + /// Iterate over the [`BlueprintZoneConfig`] instances in the blueprint /// that match the provided filter, along with the associated sled id. pub fn all_blueprint_zones( @@ -198,36 +214,42 @@ impl Blueprint { self.blueprint_zones.keys().copied() } - /// Summarize the difference between sleds and zones between two blueprints - pub fn diff_sleds<'a>( - &'a self, - other: &'a Blueprint, - ) -> OmicronZonesDiff<'a> { - OmicronZonesDiff { - before_label: format!("blueprint {}", self.id), - before_zones: self.blueprint_zones.clone(), - after_label: format!("blueprint {}", other.id), - after_zones: &other.blueprint_zones, - } + /// Summarize the difference between sleds and zones between two + /// blueprints. + /// + /// The argument provided is the "before" side, and `self` is the "after" + /// side. This matches the order of arguments to + /// [`Blueprint::diff_since_collection`]. + pub fn diff_since_blueprint( + &self, + before: &Blueprint, + ) -> Result { + BlueprintDiff::new( + DiffBeforeMetadata::Blueprint(Box::new(before.metadata())), + before.blueprint_zones.clone(), + self.metadata(), + self.blueprint_zones.clone(), + ) } /// Summarize the differences in sleds and zones between a collection and a - /// blueprint + /// blueprint. /// /// This gives an idea about what would change about a running system if /// one were to execute the blueprint. /// - /// Note that collections do not currently include information about what - /// zones are in-service, so it is assumed that all zones in the collection - /// are in-service. (This is the same assumption made by + /// Note that collections do not include information about zone + /// disposition, so it is assumed that all zones in the collection have the + /// [`InService`](BlueprintZoneDisposition::InService) disposition. (This + /// is the same assumption made by /// [`BlueprintZonesConfig::initial_from_collection`]. The logic here may /// also be expanded to handle cases where not all zones in the collection /// are in-service.) - pub fn diff_sleds_from_collection( + pub fn diff_since_collection( &self, - collection: &Collection, - ) -> OmicronZonesDiff<'_> { - let before_zones = collection + before: &Collection, + ) -> Result { + let before_zones = before .omicron_zones .iter() .map(|(sled_id, zones_found)| { @@ -247,12 +269,13 @@ impl Blueprint { (*sled_id, zones) }) .collect(); - OmicronZonesDiff { - before_label: format!("collection {}", collection.id), + + BlueprintDiff::new( + DiffBeforeMetadata::Collection { id: before.id }, before_zones, - after_label: format!("blueprint {}", self.id), - after_zones: &self.blueprint_zones, - } + self.metadata(), + self.blueprint_zones.clone(), + ) } /// Return a struct that can be displayed to present information about the @@ -283,35 +306,11 @@ impl<'a> fmt::Display for BlueprintDisplay<'a> { .map(|u| u.to_string()) .unwrap_or_else(|| String::from("")) )?; - writeln!( - f, - "created by {}{}", - b.creator, - if b.creator.parse::().is_ok() { - " (likely a Nexus instance)" - } else { - "" - } - )?; - writeln!( - f, - "created at {}", - humantime::format_rfc3339_millis(b.time_created.into(),) - )?; - writeln!(f, "internal DNS version: {}", b.internal_dns_version)?; - writeln!(f, "comment: {}", b.comment)?; - writeln!(f, "zones:\n")?; - for (sled_id, sled_zones) in &b.blueprint_zones { - writeln!( - f, - " sled {}: Omicron zones at generation {}", - sled_id, sled_zones.generation - )?; - for z in &sled_zones.zones { - writeln!(f, " {}", z.display())?; - } - } + writeln!(f, "\n{}", self.make_zone_table())?; + + writeln!(f, "\n{}", table_display::metadata_heading())?; + writeln!(f, "{}", self.make_metadata_table())?; Ok(()) } @@ -339,7 +338,8 @@ impl BlueprintZonesConfig { /// Constructs a new [`BlueprintZonesConfig`] from a collection's zones. /// /// For the initial blueprint, all zones within a collection are assumed to - /// be in-service. + /// have the [`InService`](BlueprintZoneDisposition::InService) + /// disposition. pub fn initial_from_collection(collection: &OmicronZonesConfig) -> Self { let zones = collection .zones @@ -364,10 +364,10 @@ impl BlueprintZonesConfig { /// Sorts the list of zones stored in this configuration. /// - /// This is not strictly necessary. But for testing, it's helpful for - /// zones to be in sorted order. + /// This is not strictly necessary. But for testing (particularly snapshot + /// testing), it's helpful for zones to be in sorted order. pub fn sort(&mut self) { - self.zones.sort_unstable_by_key(|z| z.config.id); + self.zones.sort_unstable_by_key(zone_sort_key); } /// Converts self to an [`OmicronZonesConfig`], applying the provided @@ -392,6 +392,12 @@ impl BlueprintZonesConfig { } } +fn zone_sort_key(z: &BlueprintZoneConfig) -> impl Ord { + // First sort by kind, then by ID. This makes it so that zones of the same + // kind (e.g. Crucible zones) are grouped together. + (z.config.zone_type.kind(), z.config.id) +} + /// Describes one Omicron-managed zone in a blueprint. /// /// This is a wrapper around an [`OmicronZoneConfig`] that also includes a @@ -407,39 +413,6 @@ pub struct BlueprintZoneConfig { pub disposition: BlueprintZoneDisposition, } -impl BlueprintZoneConfig { - /// Return a struct that can be displayed to present information about the - /// zone. - pub fn display(&self) -> BlueprintZoneConfigDisplay<'_> { - BlueprintZoneConfigDisplay { zone: self } - } -} - -/// A wrapper to allow a [`BlueprintZoneConfig`] to be displayed with -/// information. -/// -/// Returned by [`BlueprintZoneConfig::display()`]. -#[derive(Clone, Debug)] -#[must_use = "this struct does nothing unless displayed"] -pub struct BlueprintZoneConfigDisplay<'a> { - zone: &'a BlueprintZoneConfig, -} - -impl<'a> fmt::Display for BlueprintZoneConfigDisplay<'a> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let z = self.zone; - write!( - f, - "{} {: bool { // This code could be written in three ways: @@ -573,6 +543,12 @@ pub struct BlueprintMetadata { pub comment: String, } +impl BlueprintMetadata { + pub fn display_id(&self) -> String { + format!("blueprint {}", self.id) + } +} + /// Describes what blueprint, if any, the system is currently working toward #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, JsonSchema)] pub struct BlueprintTarget { @@ -595,75 +571,386 @@ pub struct BlueprintTargetSet { /// Summarizes the differences between two blueprints #[derive(Debug)] -pub struct OmicronZonesDiff<'a> { - before_label: String, - // We store an owned copy of "before_zones" to make it easier to support - // collections here, where we need to assemble this map ourselves. - before_zones: BTreeMap, - after_label: String, - after_zones: &'a BTreeMap, +pub struct BlueprintDiff { + before_meta: DiffBeforeMetadata, + after_meta: BlueprintMetadata, + sleds: DiffSleds, +} + +impl BlueprintDiff { + /// Build a diff with the provided contents, verifying that the provided + /// data is valid. + fn new( + before_meta: DiffBeforeMetadata, + before_zones: BTreeMap, + after_meta: BlueprintMetadata, + after_zones: BTreeMap, + ) -> Result { + let mut errors = Vec::new(); + + let sleds = DiffSleds::new(before_zones, after_zones, &mut errors); + + if errors.is_empty() { + Ok(Self { before_meta, after_meta, sleds }) + } else { + Err(BlueprintDiffError { + before_meta, + after_meta: Box::new(after_meta), + errors, + }) + } + } + + /// Returns metadata about the source of the "before" data. + pub fn before_meta(&self) -> &DiffBeforeMetadata { + &self.before_meta + } + + /// Returns metadata about the source of the "after" data. + pub fn after_meta(&self) -> &BlueprintMetadata { + &self.after_meta + } + + /// Iterate over sleds only present in the second blueprint of a diff + pub fn sleds_added( + &self, + ) -> impl ExactSizeIterator + '_ { + self.sleds.added.iter().map(|(sled_id, zones)| (*sled_id, zones)) + } + + /// Iterate over sleds only present in the first blueprint of a diff + pub fn sleds_removed( + &self, + ) -> impl ExactSizeIterator + '_ { + self.sleds.removed.iter().map(|(sled_id, zones)| (*sled_id, zones)) + } + + /// Iterate over sleds present in both blueprints in a diff that have + /// changes. + pub fn sleds_modified( + &self, + ) -> impl ExactSizeIterator + '_ { + self.sleds.modified.iter().map(|(sled_id, sled)| (*sled_id, sled)) + } + + /// Iterate over sleds present in both blueprints in a diff that have no + /// changes. + pub fn sleds_unchanged( + &self, + ) -> impl Iterator + '_ { + self.sleds.unchanged.iter().map(|(sled_id, zones)| (*sled_id, zones)) + } + + /// Return a struct that can be used to display the diff. + pub fn display(&self) -> BlueprintDiffDisplay<'_> { + BlueprintDiffDisplay::new(self) + } } -/// Describes a sled that appeared on both sides of a diff (possibly changed) #[derive(Debug)] -pub struct DiffSledCommon<'a> { +struct DiffSleds { + added: BTreeMap, + removed: BTreeMap, + modified: BTreeMap, + unchanged: BTreeMap, +} + +impl DiffSleds { + /// Builds added, removed and common maps, verifying that the provided data + /// is valid. + /// + /// The return value only contains the sleds that are present in both + /// blueprints. + fn new( + before: BTreeMap, + mut after: BTreeMap, + errors: &mut Vec, + ) -> Self { + let mut removed = BTreeMap::new(); + let mut modified = BTreeMap::new(); + let mut unchanged = BTreeMap::new(); + + for (sled_id, mut before_z) in before { + if let Some(mut after_z) = after.remove(&sled_id) { + // Sort before_z and after_z so they can be compared directly. + before_z.sort(); + after_z.sort(); + + if before_z == after_z { + unchanged.insert(sled_id, before_z); + } else { + let sled_modified = DiffSledModified::new( + sled_id, before_z, after_z, errors, + ); + modified.insert(sled_id, sled_modified); + } + } else { + removed.insert(sled_id, before_z); + } + } + + // We removed everything common from `after` above, so anything left is + // an added sled. + Self { added: after, removed, modified, unchanged } + } +} + +/// Wrapper to allow a [`BlueprintDiff`] to be displayed. +/// +/// Returned by [`BlueprintDiff::display()`]. +#[derive(Clone, Debug)] +#[must_use = "this struct does nothing unless displayed"] +pub struct BlueprintDiffDisplay<'diff> { + diff: &'diff BlueprintDiff, + // TODO: add colorization with a stylesheet +} + +impl<'diff> BlueprintDiffDisplay<'diff> { + #[inline] + fn new(diff: &'diff BlueprintDiff) -> Self { + Self { diff } + } +} + +impl<'diff> fmt::Display for BlueprintDiffDisplay<'diff> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let diff = self.diff; + + // Print things differently based on whether the diff is between a + // collection and a blueprint, or a blueprint and a blueprint. + match &diff.before_meta { + DiffBeforeMetadata::Collection { id } => { + writeln!( + f, + "from: collection {}\n\ + to: blueprint {}", + id, diff.after_meta.id, + )?; + } + DiffBeforeMetadata::Blueprint(before) => { + writeln!( + f, + "from: blueprint {}\n\ + to: blueprint {}", + before.id, diff.after_meta.id + )?; + } + } + + writeln!(f, "\n{}", self.make_zone_diff_table())?; + + writeln!(f, "\n{}", table_display::metadata_diff_heading())?; + writeln!(f, "{}", self.make_metadata_diff_table())?; + + Ok(()) + } +} + +#[derive(Clone, Debug, Error)] +pub struct BlueprintDiffError { + pub before_meta: DiffBeforeMetadata, + pub after_meta: Box, + pub errors: Vec, +} + +impl fmt::Display for BlueprintDiffError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + writeln!( + f, + "errors in diff between {} and {}:", + self.before_meta.display_id(), + self.after_meta.display_id() + )?; + for e in &self.errors { + writeln!(f, " - {}", e)?; + } + Ok(()) + } +} + +/// An individual error within a [`BlueprintDiffError`]. +#[derive(Clone, Debug)] +pub enum BlueprintDiffSingleError { + /// The [`OmicronZoneType`] of a particular zone changed between the before + /// and after blueprints. + /// + /// For a particular zone, the type should never change. + ZoneTypeChanged { + sled_id: Uuid, + zone_id: Uuid, + before: ZoneKind, + after: ZoneKind, + }, +} + +impl fmt::Display for BlueprintDiffSingleError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + BlueprintDiffSingleError::ZoneTypeChanged { + sled_id, + zone_id, + before, + after, + } => write!( + f, + "on sled {}, zone {} changed type from {} to {}", + zone_id, sled_id, before, after + ), + } + } +} + +/// Data about the "before" version within a [`BlueprintDiff`]. +#[derive(Clone, Debug)] +pub enum DiffBeforeMetadata { + /// The diff was made from a collection. + Collection { id: Uuid }, + /// The diff was made from a blueprint. + Blueprint(Box), +} + +impl DiffBeforeMetadata { + pub fn display_id(&self) -> String { + match self { + DiffBeforeMetadata::Collection { id } => format!("collection {id}"), + DiffBeforeMetadata::Blueprint(b) => b.display_id(), + } + } +} + +/// Describes a sled that appeared on both sides of a diff and is changed. +#[derive(Clone, Debug)] +pub struct DiffSledModified { /// id of the sled pub sled_id: Uuid, /// generation of the "zones" configuration on the left side pub generation_before: Generation, /// generation of the "zones" configuration on the right side pub generation_after: Generation, - zones_added: Vec<&'a BlueprintZoneConfig>, - zones_removed: Vec<&'a BlueprintZoneConfig>, - zones_common: Vec>, + zones_added: Vec, + zones_removed: Vec, + zones_common: Vec, } -impl<'a> DiffSledCommon<'a> { +impl DiffSledModified { + fn new( + sled_id: Uuid, + before: BlueprintZonesConfig, + after: BlueprintZonesConfig, + errors: &mut Vec, + ) -> Self { + // Assemble separate summaries of the zones, indexed by zone id. + let before_by_id: HashMap<_, _> = before + .zones + .into_iter() + .map(|zone| (zone.config.id, zone)) + .collect(); + let mut after_by_id: HashMap<_, _> = after + .zones + .into_iter() + .map(|zone| (zone.config.id, zone)) + .collect(); + + let mut zones_removed = Vec::new(); + let mut zones_common = Vec::new(); + + // Now go through each zone and compare them. + for (zone_id, zone_before) in before_by_id { + if let Some(zone_after) = after_by_id.remove(&zone_id) { + let before_kind = zone_before.config.zone_type.kind(); + let after_kind = zone_after.config.zone_type.kind(); + + if before_kind != after_kind { + errors.push(BlueprintDiffSingleError::ZoneTypeChanged { + sled_id, + zone_id, + before: before_kind, + after: after_kind, + }); + } else { + let common = DiffZoneCommon { zone_before, zone_after }; + zones_common.push(common); + } + } else { + zones_removed.push(zone_before); + } + } + + // Since we removed common zones above, anything else exists only in + // before and was therefore added. + let mut zones_added: Vec<_> = after_by_id.into_values().collect(); + + // Sort for test reproducibility. + zones_added.sort_unstable_by_key(zone_sort_key); + zones_removed.sort_unstable_by_key(zone_sort_key); + zones_common.sort_unstable_by_key(|common| { + // The ID is common by definition, and the zone type was already + // verified to be the same above. So just sort by the sort key for + // the before zone. (In case of errors, the result will be thrown + // away anyway, so this is harmless.) + zone_sort_key(&common.zone_before) + }); + + Self { + sled_id, + generation_before: before.generation, + generation_after: after.generation, + zones_added, + zones_removed, + zones_common, + } + } + /// Iterate over zones added between the blueprints pub fn zones_added( &self, - ) -> impl Iterator + '_ { - self.zones_added.iter().copied() + ) -> impl ExactSizeIterator + '_ { + self.zones_added.iter() } /// Iterate over zones removed between the blueprints pub fn zones_removed( &self, - ) -> impl Iterator + '_ { - self.zones_removed.iter().copied() + ) -> impl ExactSizeIterator + '_ { + self.zones_removed.iter() } /// Iterate over zones that are common to both blueprints pub fn zones_in_common( &self, - ) -> impl Iterator> + '_ { - self.zones_common.iter().copied() + ) -> impl ExactSizeIterator + '_ { + self.zones_common.iter() } - /// Iterate over zones that changed between the blue prints - pub fn zones_changed( + /// Iterate over zones that changed between the blueprints + pub fn zones_modified(&self) -> impl Iterator + '_ { + self.zones_in_common().filter(|z| z.is_modified()) + } + + /// Iterate over zones that did not change between the blueprints + pub fn zones_unchanged( &self, - ) -> impl Iterator> + '_ { - self.zones_in_common().filter(|z| z.is_changed()) + ) -> impl Iterator + '_ { + self.zones_in_common().filter(|z| !z.is_modified()) } } /// Describes a zone that was common to both sides of a diff -#[derive(Debug, Copy, Clone)] -pub struct DiffZoneCommon<'a> { +#[derive(Debug, Clone)] +pub struct DiffZoneCommon { /// full zone configuration before - pub zone_before: &'a BlueprintZoneConfig, + pub zone_before: BlueprintZoneConfig, /// full zone configuration after - pub zone_after: &'a BlueprintZoneConfig, + pub zone_after: BlueprintZoneConfig, } -impl<'a> DiffZoneCommon<'a> { +impl DiffZoneCommon { /// Returns true if there are any differences between `zone_before` and /// `zone_after`. /// /// This is equivalent to `config_changed() || disposition_changed()`. #[inline] - pub fn is_changed(&self) -> bool { + pub fn is_modified(&self) -> bool { // state is smaller and easier to compare than config. self.disposition_changed() || self.config_changed() } @@ -682,253 +969,673 @@ impl<'a> DiffZoneCommon<'a> { } } -impl<'a> OmicronZonesDiff<'a> { - fn sleds_before(&self) -> BTreeSet { - self.before_zones.keys().copied().collect() - } +/// Encapsulates Reconfigurator state +/// +/// This serialized from is intended for saving state from hand-constructed or +/// real, deployed systems and loading it back into a simulator or test suite +/// +/// **This format is not stable. It may change at any time without +/// backwards-compatibility guarantees.** +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct UnstableReconfiguratorState { + pub policy: Policy, + pub collections: Vec, + pub blueprints: Vec, + pub internal_dns: BTreeMap, + pub external_dns: BTreeMap, + pub silo_names: Vec, + pub external_dns_zone_names: Vec, +} - fn sleds_after(&self) -> BTreeSet { - self.after_zones.keys().copied().collect() - } +/// Code to generate tables. +/// +/// This is here because `tabled` has a number of generically-named types, and +/// we'd like to avoid name collisions with other types. +mod table_display { + use super::*; + use crate::sectioned_table::SectionSpacing; + use crate::sectioned_table::StBuilder; + use crate::sectioned_table::StSectionBuilder; + use tabled::builder::Builder; + use tabled::settings::object::Columns; + use tabled::settings::Modify; + use tabled::settings::Padding; + use tabled::settings::Style; + use tabled::Table; + + impl<'a> super::BlueprintDisplay<'a> { + pub(super) fn make_zone_table(&self) -> Table { + let blueprint_zones = &self.blueprint.blueprint_zones; + let mut builder = StBuilder::new(); + builder.push_header_row(header_row()); + + for (sled_id, sled_zones) in blueprint_zones { + let heading = format!( + "{SLED_INDENT}sled {sled_id}: zones at generation {}", + sled_zones.generation + ); + builder.make_section( + SectionSpacing::Always, + heading, + |section| { + for zone in &sled_zones.zones { + add_zone_record( + ZONE_INDENT.to_string(), + zone, + section, + ); + } + + if section.is_empty() { + section.push_nested_heading( + SectionSpacing::IfNotFirst, + format!("{ZONE_HEAD_INDENT}{NO_ZONES_PARENS}"), + ); + } + }, + ); + } - /// Iterate over sleds only present in the second blueprint of a diff - pub fn sleds_added( - &self, - ) -> impl Iterator + '_ { - let sled_ids = self - .sleds_after() - .difference(&self.sleds_before()) - .copied() - .collect::>(); + builder.build() + } - sled_ids - .into_iter() - .map(|sled_id| (sled_id, self.after_zones.get(&sled_id).unwrap())) + pub(super) fn make_metadata_table(&self) -> Table { + let mut builder = Builder::new(); + + // Metadata is presented as a linear (top-to-bottom) table with a + // small indent. + + builder.push_record(vec![ + METADATA_INDENT.to_string(), + linear_table_label(&CREATED_BY), + self.blueprint.creator.clone(), + ]); + + builder.push_record(vec![ + METADATA_INDENT.to_string(), + linear_table_label(&CREATED_AT), + humantime::format_rfc3339_millis( + self.blueprint.time_created.into(), + ) + .to_string(), + ]); + + let comment = if self.blueprint.comment.is_empty() { + NONE_PARENS.to_string() + } else { + self.blueprint.comment.clone() + }; + + builder.push_record(vec![ + METADATA_INDENT.to_string(), + linear_table_label(&COMMENT), + comment, + ]); + + builder.push_record(vec![ + METADATA_INDENT.to_string(), + linear_table_label(&INTERNAL_DNS_VERSION), + self.blueprint.internal_dns_version.to_string(), + ]); + + builder.push_record(vec![ + METADATA_INDENT.to_string(), + linear_table_label(&EXTERNAL_DNS_VERSION), + self.blueprint.external_dns_version.to_string(), + ]); + + let mut table = builder.build(); + apply_linear_table_settings(&mut table); + table + } } - /// Iterate over sleds only present in the first blueprint of a diff - pub fn sleds_removed( - &self, - ) -> impl Iterator + '_ { - let sled_ids = self - .sleds_before() - .difference(&self.sleds_after()) - .copied() - .collect::>(); - sled_ids - .into_iter() - .map(|sled_id| (sled_id, self.before_zones.get(&sled_id).unwrap())) - } - - /// Iterate over sleds present in both blueprints in a diff - pub fn sleds_in_common( - &'a self, - ) -> impl Iterator)> + '_ { - let sled_ids = self - .sleds_before() - .intersection(&self.sleds_after()) - .copied() - .collect::>(); - sled_ids.into_iter().map(|sled_id| { - let b1sledzones = self.before_zones.get(&sled_id).unwrap(); - let b2sledzones = self.after_zones.get(&sled_id).unwrap(); - - // Assemble separate summaries of the zones, indexed by zone id. - let b1_zones: BTreeMap = b1sledzones - .zones - .iter() - .map(|zone| (zone.config.id, zone)) - .collect(); - let mut b2_zones: BTreeMap = - b2sledzones - .zones - .iter() - .map(|zone| (zone.config.id, zone)) - .collect(); - let mut zones_removed = vec![]; - let mut zones_common = vec![]; - - // Now go through each zone and compare them. - for (zone_id, zone_before) in &b1_zones { - if let Some(zone_after) = b2_zones.remove(zone_id) { - zones_common - .push(DiffZoneCommon { zone_before, zone_after }); - } else { - zones_removed.push(*zone_before); + impl<'diff> BlueprintDiffDisplay<'diff> { + pub(super) fn make_zone_diff_table(&self) -> Table { + let diff = self.diff; + + // Add the unchanged prefix to the zone indent since the first + // column will be used as the prefix. + let mut builder = StBuilder::new(); + builder.push_header_row(diff_header_row()); + + // The order is: + // + // 1. Unchanged + // 2. Removed + // 3. Modified + // 4. Added + // + // The idea behind the order is to (a) group all changes together + // and (b) put changes towards the bottom, so people have to scroll + // back less. + // + // Zones within a modified sled follow the same order. If you're + // changing the order here, make sure to keep that in sync. + + // First, unchanged sleds. + builder.make_section( + SectionSpacing::Always, + unchanged_sleds_heading(), + |section| { + for (sled_id, sled_zones) in diff.sleds_unchanged() { + add_whole_sled_records( + sled_id, + sled_zones, + WholeSledKind::Unchanged, + section, + ); + } + }, + ); + + // Then, removed sleds. + builder.make_section( + SectionSpacing::Always, + removed_sleds_heading(), + |section| { + for (sled_id, sled_zones) in diff.sleds_removed() { + add_whole_sled_records( + sled_id, + sled_zones, + WholeSledKind::Removed, + section, + ); + } + }, + ); + + // Then, modified sleds. + builder.make_section( + SectionSpacing::Always, + modified_sleds_heading(), + |section| { + // For sleds that are in common: + for (sled_id, modified) in diff.sleds_modified() { + add_modified_sled_records(sled_id, modified, section); + } + }, + ); + + // Finally, added sleds. + builder.make_section( + SectionSpacing::Always, + added_sleds_heading(), + |section| { + for (sled_id, sled_zones) in diff.sleds_added() { + add_whole_sled_records( + sled_id, + sled_zones, + WholeSledKind::Added, + section, + ); + } + }, + ); + + builder.build() + } + + pub(super) fn make_metadata_diff_table(&self) -> Table { + let diff = self.diff; + let mut builder = Builder::new(); + + // Metadata is presented as a linear (top-to-bottom) table with a + // small indent. + + match &diff.before_meta { + DiffBeforeMetadata::Collection { .. } => { + // Collections don't have DNS versions, so this is new. + builder.push_record(vec![ + format!("{ADDED_PREFIX}{METADATA_DIFF_INDENT}"), + metadata_table_internal_dns(), + linear_table_modified( + &NOT_PRESENT_IN_COLLECTION_PARENS, + &diff.after_meta.internal_dns_version, + ), + ]); + + builder.push_record(vec![ + format!("{ADDED_PREFIX}{METADATA_DIFF_INDENT}"), + metadata_table_external_dns(), + linear_table_modified( + &NOT_PRESENT_IN_COLLECTION_PARENS, + &diff.after_meta.external_dns_version, + ), + ]); + } + DiffBeforeMetadata::Blueprint(before) => { + if before.internal_dns_version + != diff.after_meta.internal_dns_version + { + builder.push_record(vec![ + format!("{MODIFIED_PREFIX}{METADATA_DIFF_INDENT}"), + metadata_table_internal_dns(), + linear_table_modified( + &before.internal_dns_version, + &diff.after_meta.internal_dns_version, + ), + ]); + } else { + builder.push_record(vec![ + format!("{UNCHANGED_PREFIX}{METADATA_DIFF_INDENT}"), + metadata_table_internal_dns(), + linear_table_unchanged( + &before.internal_dns_version, + ), + ]); + }; + + if before.external_dns_version + != diff.after_meta.external_dns_version + { + builder.push_record(vec![ + format!("{MODIFIED_PREFIX}{METADATA_DIFF_INDENT}"), + metadata_table_external_dns(), + linear_table_modified( + &before.external_dns_version, + &diff.after_meta.external_dns_version, + ), + ]); + } else { + builder.push_record(vec![ + format!("{UNCHANGED_PREFIX}{METADATA_DIFF_INDENT}"), + metadata_table_external_dns(), + linear_table_unchanged( + &before.external_dns_version, + ), + ]); + }; } } - // Since we removed common zones above, anything else exists only in - // b2 and was therefore added. - let zones_added = b2_zones.into_values().collect(); + let mut table = builder.build(); + apply_linear_table_settings(&mut table); + table + } + } + fn add_whole_sled_records( + sled_id: Uuid, + sled_zones: &BlueprintZonesConfig, + kind: WholeSledKind, + section: &mut StSectionBuilder, + ) { + let heading = format!( + "{}{SLED_INDENT}sled {sled_id}: zones at generation {}", + kind.prefix(), + sled_zones.generation, + ); + let prefix = kind.prefix(); + let status = kind.status(); + section.make_subsection(SectionSpacing::Always, heading, |s2| { + // Also add another section for zones. + for zone in &sled_zones.zones { + match status { + Some(status) => { + add_zone_record_with_status( + format!("{prefix}{ZONE_INDENT}"), + zone, + status, + s2, + ); + } + None => { + add_zone_record( + format!("{prefix}{ZONE_INDENT}"), + zone, + s2, + ); + } + } + } + }); + } + + fn add_modified_sled_records( + sled_id: Uuid, + modified: &DiffSledModified, + section: &mut StSectionBuilder, + ) { + let (generation_heading, warning) = if modified.generation_before + != modified.generation_after + { ( - sled_id, - DiffSledCommon { - sled_id, - generation_before: b1sledzones.generation, - generation_after: b2sledzones.generation, - zones_added, - zones_removed, - zones_common, - }, + format!( + "zones at generation: {} -> {}", + modified.generation_before, modified.generation_after, + ), + None, ) - }) + } else { + // Modified sleds should always see a generation bump. + ( + format!("zones at generation: {}", modified.generation_before), + Some(format!( + "{WARNING_PREFIX}{ZONE_HEAD_INDENT}\ + warning: generation should have changed" + )), + ) + }; + + let sled_heading = + format!("{MODIFIED_PREFIX}{SLED_INDENT}sled {sled_id}: {generation_heading}"); + + section.make_subsection(SectionSpacing::Always, sled_heading, |s2| { + if let Some(warning) = warning { + s2.push_nested_heading(SectionSpacing::Never, warning); + } + + // The order is: + // + // 1. Unchanged + // 2. Removed + // 3. Modified + // 4. Added + // + // The idea behind the order is to (a) group all changes together + // and (b) put changes towards the bottom, so people have to scroll + // back less. + // + // Sleds follow the same order. If you're changing the order here, + // make sure to keep that in sync. + + // First, unchanged zones. + for zone_unchanged in modified.zones_unchanged() { + add_zone_record( + format!("{UNCHANGED_PREFIX}{ZONE_INDENT}"), + &zone_unchanged.zone_before, + s2, + ); + } + + // Then, removed zones. + for zone in modified.zones_removed() { + add_zone_record_with_status( + format!("{REMOVED_PREFIX}{ZONE_INDENT}"), + zone, + REMOVED, + s2, + ); + } + + // Then, modified zones. + for zone_modified in modified.zones_modified() { + add_modified_zone_records(zone_modified, s2); + } + + // Finally, added zones. + for zone in modified.zones_added() { + add_zone_record_with_status( + format!("{ADDED_PREFIX}{ZONE_INDENT}"), + zone, + ADDED, + s2, + ); + } + + // If no rows were pushed, add a row indicating that for this sled. + if s2.is_empty() { + s2.push_nested_heading( + SectionSpacing::Never, + format!( + "{UNCHANGED_PREFIX}{ZONE_HEAD_INDENT}\ + {NO_ZONES_PARENS}" + ), + ); + } + }); } - pub fn sleds_changed( - &'a self, - ) -> impl Iterator)> + '_ { - self.sleds_in_common().filter(|(_, sled_changes)| { - sled_changes.zones_added().next().is_some() - || sled_changes.zones_removed().next().is_some() - || sled_changes.zones_changed().next().is_some() - }) + /// Add a zone record to this section. + /// + /// This is the meat-and-potatoes of the diff display. + fn add_zone_record( + first_column: String, + zone: &BlueprintZoneConfig, + section: &mut StSectionBuilder, + ) { + section.push_record(vec![ + first_column, + zone.config.zone_type.kind().to_string(), + zone.config.id.to_string(), + zone.disposition.to_string(), + zone.config.underlay_address.to_string(), + ]); } - /// Return a struct that can be used to display the diff in a - /// unified `diff(1)`-like format. - pub fn display(&self) -> OmicronZonesDiffDisplay<'_, 'a> { - OmicronZonesDiffDisplay::new(self) + fn add_zone_record_with_status( + first_column: String, + zone: &BlueprintZoneConfig, + status: &str, + section: &mut StSectionBuilder, + ) { + section.push_record(vec![ + first_column, + zone.config.zone_type.kind().to_string(), + zone.config.id.to_string(), + zone.disposition.to_string(), + zone.config.underlay_address.to_string(), + status.to_string(), + ]); } -} -/// Wrapper to allow a [`OmicronZonesDiff`] to be displayed in a unified -/// `diff(1)`-like format. -/// -/// Returned by [`OmicronZonesDiff::display()`]. -#[derive(Clone, Debug)] -#[must_use = "this struct does nothing unless displayed"] -pub struct OmicronZonesDiffDisplay<'diff, 'a> { - diff: &'diff OmicronZonesDiff<'a>, - // TODO: add colorization with a stylesheet -} + /// Add a change table for the zone to the section. + /// + /// For diffs, this contains a table of changes between two zone + /// records. + fn add_modified_zone_records( + modified: &DiffZoneCommon, + section: &mut StSectionBuilder, + ) { + // Negative record for the before. + let before = &modified.zone_before; + let after = &modified.zone_after; + + // Before record. + add_zone_record_with_status( + format!("{REMOVED_PREFIX}{ZONE_INDENT}"), + &before, + MODIFIED, + section, + ); + + let mut what_changed = Vec::new(); + if before.config.zone_type != after.config.zone_type { + what_changed.push(ZONE_TYPE_CONFIG); + } + if before.disposition != after.disposition { + what_changed.push(DISPOSITION); + } + if before.config.underlay_address != after.config.underlay_address { + what_changed.push(UNDERLAY_IP); + } + debug_assert!( + !what_changed.is_empty(), + "at least something should have changed:\n\ + before = {before:#?}\n\ + after = {after:#?}" + ); + + let record = vec![ + format!("{ADDED_PREFIX}{ZONE_INDENT}"), + // First two columns of data are skipped over since they're + // always the same (verified at diff construction time). + format!(" {SUB_NOT_LAST}"), + "".to_string(), + after.disposition.to_string(), + after.config.underlay_address.to_string(), + ]; + section.push_record(record); + + section.push_spanned_row(format!( + "{MODIFIED_PREFIX}{ZONE_INDENT} \ + {SUB_LAST} changed: {}", + what_changed.join(", "), + )); + } -impl<'diff, 'a> OmicronZonesDiffDisplay<'diff, 'a> { - #[inline] - fn new(diff: &'diff OmicronZonesDiff<'a>) -> Self { - Self { diff } + #[derive(Copy, Clone, Debug)] + enum WholeSledKind { + Removed, + Added, + Unchanged, } - fn print_whole_sled( - &self, - f: &mut fmt::Formatter<'_>, - prefix: char, - label: &str, - bbsledzones: &BlueprintZonesConfig, - sled_id: Uuid, - ) -> fmt::Result { - writeln!(f, "{} sled {} ({})", prefix, sled_id, label)?; - writeln!( - f, - "{} zone config generation {}", - prefix, bbsledzones.generation - )?; - for z in &bbsledzones.zones { - writeln!(f, "{prefix} {} ({label})", z.display())?; + impl WholeSledKind { + fn prefix(self) -> char { + match self { + WholeSledKind::Removed => REMOVED_PREFIX, + WholeSledKind::Added => ADDED_PREFIX, + WholeSledKind::Unchanged => UNCHANGED_PREFIX, + } } - Ok(()) + fn status(self) -> Option<&'static str> { + match self { + WholeSledKind::Removed => Some(REMOVED), + WholeSledKind::Added => Some(ADDED), + WholeSledKind::Unchanged => None, + } + } } -} -impl<'diff, 'a> fmt::Display for OmicronZonesDiffDisplay<'diff, 'a> { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - let diff = self.diff; - writeln!(f, "diff {} {}", diff.before_label, diff.after_label)?; - writeln!(f, "--- {}", diff.before_label)?; - writeln!(f, "+++ {}", diff.after_label)?; + // Apply settings for a table which has top-to-bottom rows, and a first + // column with indents. + fn apply_linear_table_settings(table: &mut Table) { + table.with(Style::empty()).with(Padding::zero()).with( + Modify::new(Columns::single(1)) + // Add an padding on the right of the label column to make the + // table visually distinctive. + .with(Padding::new(0, 2, 0, 0)), + ); + } - for (sled_id, sled_zones) in diff.sleds_removed() { - self.print_whole_sled(f, '-', "removed", sled_zones, sled_id)?; - } + // --- + // Heading and other definitions + // --- - for (sled_id, sled_changes) in diff.sleds_in_common() { - // Print a line about the sled itself and zone config generation, - // regardless of whether anything has changed. - writeln!(f, " sled {}", sled_id)?; - if sled_changes.generation_before != sled_changes.generation_after { - writeln!( - f, - "- zone config generation {}", - sled_changes.generation_before - )?; - writeln!( - f, - "+ zone config generation {}", - sled_changes.generation_after - )?; - } else { - writeln!( - f, - " zone config generation {}", - sled_changes.generation_before - )?; - } + // This aligns the heading with the first column of actual text. + const H1_INDENT: &str = " "; + const SLED_HEAD_INDENT: &str = " "; + const SLED_INDENT: &str = " "; + const ZONE_HEAD_INDENT: &str = " "; + // Due to somewhat mysterious reasons with how padding works with tabled, + // this needs to be 3 columns wide rather than 4. + const ZONE_INDENT: &str = " "; + const METADATA_INDENT: &str = " "; + const METADATA_DIFF_INDENT: &str = " "; + + const ADDED_PREFIX: char = '+'; + const REMOVED_PREFIX: char = '-'; + const MODIFIED_PREFIX: char = '*'; + const UNCHANGED_PREFIX: char = ' '; + const WARNING_PREFIX: char = '!'; + + const ARROW: &str = "->"; + const SUB_NOT_LAST: &str = "├─"; + const SUB_LAST: &str = "└─"; + + const ZONE_TYPE: &str = "zone type"; + const ZONE_ID: &str = "zone ID"; + const DISPOSITION: &str = "disposition"; + const UNDERLAY_IP: &str = "underlay IP"; + const ZONE_TYPE_CONFIG: &str = "zone type config"; + const STATUS: &str = "status"; + const REMOVED_SLEDS_HEADING: &str = "REMOVED SLEDS"; + const MODIFIED_SLEDS_HEADING: &str = "MODIFIED SLEDS"; + const UNCHANGED_SLEDS_HEADING: &str = "UNCHANGED SLEDS"; + const ADDED_SLEDS_HEADING: &str = "ADDED SLEDS"; + const REMOVED: &str = "removed"; + const ADDED: &str = "added"; + const MODIFIED: &str = "modified"; + + const METADATA_HEADING: &str = "METADATA"; + const CREATED_BY: &str = "created by"; + const CREATED_AT: &str = "created at"; + const INTERNAL_DNS_VERSION: &str = "internal DNS version"; + const EXTERNAL_DNS_VERSION: &str = "external DNS version"; + const COMMENT: &str = "comment"; + + const UNCHANGED_PARENS: &str = "(unchanged)"; + const NO_ZONES_PARENS: &str = "(no zones)"; + const NONE_PARENS: &str = "(none)"; + const NOT_PRESENT_IN_COLLECTION_PARENS: &str = + "(not present in collection)"; + + fn header_row() -> Vec { + vec![ + // First column is so that the header border aligns with the ZONE + // TABLE section header. + SLED_INDENT.to_string(), + ZONE_TYPE.to_string(), + ZONE_ID.to_string(), + DISPOSITION.to_string(), + UNDERLAY_IP.to_string(), + ] + } - for zone in sled_changes.zones_removed() { - writeln!(f, "- {} (removed)", zone.display())?; - } + fn diff_header_row() -> Vec { + vec![ + // First column is so that the header border aligns with the ZONE + // TABLE section header. + SLED_HEAD_INDENT.to_string(), + ZONE_TYPE.to_string(), + ZONE_ID.to_string(), + DISPOSITION.to_string(), + UNDERLAY_IP.to_string(), + STATUS.to_string(), + ] + } - for zone_changes in sled_changes.zones_in_common() { - if zone_changes.config_changed() { - writeln!( - f, - "- {} (changed)", - zone_changes.zone_before.display(), - )?; - writeln!( - f, - "+ {} (changed)", - zone_changes.zone_after.display(), - )?; - } else if zone_changes.disposition_changed() { - writeln!( - f, - "- {} (disposition changed)", - zone_changes.zone_before.display(), - )?; - writeln!( - f, - "+ {} (disposition changed)", - zone_changes.zone_after.display(), - )?; - } else { - writeln!( - f, - " {} (unchanged)", - zone_changes.zone_before.display(), - )?; - } - } + pub(super) fn metadata_heading() -> String { + format!("{METADATA_HEADING}:") + } - for zone in sled_changes.zones_added() { - writeln!(f, "+ {} (added)", zone.display())?; - } - } + pub(super) fn metadata_diff_heading() -> String { + format!("{H1_INDENT}{METADATA_HEADING}:") + } - for (sled_id, sled_zones) in diff.sleds_added() { - self.print_whole_sled(f, '+', "added", sled_zones, sled_id)?; - } + fn sleds_heading(prefix: char, heading: &'static str) -> String { + format!("{prefix}{SLED_HEAD_INDENT}{heading}:") + } - Ok(()) + fn removed_sleds_heading() -> String { + sleds_heading(UNCHANGED_PREFIX, REMOVED_SLEDS_HEADING) } -} -/// Encapsulates Reconfigurator state -/// -/// This serialized from is intended for saving state from hand-constructed or -/// real, deployed systems and loading it back into a simulator or test suite -/// -/// **This format is not stable. It may change at any time without -/// backwards-compatibility guarantees.** -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct UnstableReconfiguratorState { - pub policy: Policy, - pub collections: Vec, - pub blueprints: Vec, - pub internal_dns: BTreeMap, - pub external_dns: BTreeMap, - pub silo_names: Vec, - pub external_dns_zone_names: Vec, + fn added_sleds_heading() -> String { + sleds_heading(UNCHANGED_PREFIX, ADDED_SLEDS_HEADING) + } + + fn modified_sleds_heading() -> String { + sleds_heading(UNCHANGED_PREFIX, MODIFIED_SLEDS_HEADING) + } + + fn unchanged_sleds_heading() -> String { + sleds_heading(UNCHANGED_PREFIX, UNCHANGED_SLEDS_HEADING) + } + + fn metadata_table_internal_dns() -> String { + linear_table_label(&INTERNAL_DNS_VERSION) + } + + fn metadata_table_external_dns() -> String { + linear_table_label(&EXTERNAL_DNS_VERSION) + } + + fn linear_table_label(value: &dyn fmt::Display) -> String { + format!("{value}:") + } + + fn linear_table_modified( + before: &dyn fmt::Display, + after: &dyn fmt::Display, + ) -> String { + format!("{before} {ARROW} {after}") + } + + fn linear_table_unchanged(value: &dyn fmt::Display) -> String { + format!("{value} {UNCHANGED_PARENS}") + } } diff --git a/nexus/types/src/lib.rs b/nexus/types/src/lib.rs index 494573e834..b6286c3f64 100644 --- a/nexus/types/src/lib.rs +++ b/nexus/types/src/lib.rs @@ -34,3 +34,4 @@ pub mod external_api; pub mod identity; pub mod internal_api; pub mod inventory; +mod sectioned_table; diff --git a/nexus/types/src/sectioned_table.rs b/nexus/types/src/sectioned_table.rs new file mode 100644 index 0000000000..addb4c876e --- /dev/null +++ b/nexus/types/src/sectioned_table.rs @@ -0,0 +1,357 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Support for tables with builtin sections. +//! +//! This could live in its own crate (within omicron, or even on crates.io), +//! but is here for now. + +use std::collections::HashSet; +use std::iter; + +use tabled::builder::Builder; +use tabled::grid::config::Border; +use tabled::settings::object::Columns; +use tabled::settings::object::Object; +use tabled::settings::object::Rows; +use tabled::settings::span::ColumnSpan; +use tabled::settings::Modify; +use tabled::settings::Padding; +use tabled::settings::Style; +use tabled::Table; + +/// A sectioned table. +/// +/// A sectioned table allows sections and subsections to be defined, with each +/// section having a title and a list of rows in that section. The section +/// headers and other rows can break standard table conventions. +/// +/// There are two kinds of special rows: +/// +/// 1. Headings: rows that span all columns. +/// 2. Spanned rows: also rows that span all columns, but not as headings. +/// +/// This builder does not currently automatically indent sections or records -- +/// that can be done in the future, though it has to be done with some care. +#[derive(Debug)] +pub(crate) struct StBuilder { + builder: Builder, + // Rows that are marked off with ---- on both sides. + header_rows: Vec, + // Heading rows that span all columns. + headings: Vec<(HeadingSpacing, usize)>, + // Other rows that span all columns. + spanned_rows: Vec, +} + +impl StBuilder { + pub(crate) fn new() -> Self { + let builder = Builder::new(); + + Self { + builder, + header_rows: Vec::new(), + headings: Vec::new(), + spanned_rows: Vec::new(), + } + } + + /// Adds a header row to the table. + /// + /// This row contains column titles, along with *two* initial columns of + /// padding. The border will extend to the first column but not the second + /// one. + pub(crate) fn push_header_row(&mut self, row: Vec) { + self.header_rows.push(self.builder.count_records()); + self.push_record(row); + } + + /// Adds a record to the table. + pub(crate) fn push_record(&mut self, row: Vec) { + self.builder.push_record(row); + } + + /// Makes a new section of the table. + /// + /// This section will not be added to the table unless at least one row is + /// added to it, either directly or via nested sections. + pub(crate) fn make_section( + &mut self, + spacing: SectionSpacing, + heading: String, + cb: impl FnOnce(&mut StSectionBuilder), + ) { + let mut section = StSectionBuilder::from_builder( + self, + spacing.resolve(self.headings.is_empty()), + heading, + ); + cb(&mut section); + section.finish_with_root(self); + } + + /// Does the final build to produce a [`Table`]. + pub(crate) fn build(mut self) -> Table { + // Insert a column between 0 and 1 to enable header borders to be + // properly aligned with the rest of the text. + self.builder.insert_column( + 1, + iter::repeat("").take(self.builder.count_records()), + ); + + let mut table = self.builder.build(); + table + .with(Style::blank()) + .with( + // Columns 0 and 1 (indent/gutter) should not have any border + // and padding. + Modify::new(Columns::new(0..=1)) + .with(Border::empty()) + .with(Padding::zero()), + ) + .with( + Modify::new(Columns::single(2)) + // Column 2 (first column of actual data) should not have + // left padding. + .with(Padding::new(0, 1, 0, 0)), + ) + .with( + Modify::new(Columns::last()) + // Rightmost column should have no border and padding. + .with(Border::empty()) + .with(Padding::zero()), + ); + apply_normal_row_settings( + &mut table, + self.header_rows + .iter() + .copied() + .chain(self.headings.iter().map(|(_, i)| *i)) + .chain(self.spanned_rows.iter().copied()) + .collect(), + ); + apply_header_row_settings(&mut table, &self.header_rows); + apply_heading_settings(&mut table, &self.headings); + apply_spanned_row_settings(&mut table, &self.spanned_rows); + + table + } +} + +/// A part of a sectioned table. +/// +/// Created by [`StBuilder::make_section`] or +/// [`StNestedBuilder::make_subsection`]. +#[derive(Debug)] +pub(crate) struct StSectionBuilder { + start_index: usize, + spacing: HeadingSpacing, + heading: String, + rows: Vec>, + // Indexes for special rows, stored as absolute indexes wrt the overall + // zone table (i.e. start_index + 1 + index in rows). + nested_headings: Vec<(HeadingSpacing, usize)>, + spanned_rows: Vec, +} + +impl StSectionBuilder { + fn from_builder( + builder: &StBuilder, + spacing: HeadingSpacing, + heading: String, + ) -> Self { + let start_index = builder.builder.count_records(); + Self { + start_index, + spacing, + heading, + rows: Vec::new(), + nested_headings: Vec::new(), + spanned_rows: Vec::new(), + } + } + + pub(crate) fn is_empty(&self) -> bool { + self.rows.is_empty() + } + + pub(crate) fn push_record(&mut self, row: Vec) { + self.rows.push(row); + } + + pub(crate) fn push_spanned_row(&mut self, row: String) { + self.spanned_rows.push(self.next_row()); + self.rows.push(vec![row]); + } + + pub(crate) fn push_nested_heading( + &mut self, + spacing: SectionSpacing, + heading: String, + ) { + self.nested_headings.push(( + spacing.resolve(self.nested_headings.is_empty()), + self.next_row(), + )); + self.rows.push(vec![heading]); + } + + /// Makes a new subsection of this section. + /// + /// This subsection will not be added to the table unless at least one row + /// is added to it, either directly or via nested sections. + pub(crate) fn make_subsection( + &mut self, + spacing: SectionSpacing, + heading: String, + cb: impl FnOnce(&mut Self), + ) { + let mut subsection = Self { + start_index: self.next_row(), + spacing: spacing.resolve(self.nested_headings.is_empty()), + heading, + rows: Vec::new(), + nested_headings: Vec::new(), + spanned_rows: Vec::new(), + }; + cb(&mut subsection); + subsection.finish_with_parent(self); + } + + fn next_row(&self) -> usize { + // +1 to account for the heading row. + self.start_index + 1 + self.rows.len() + } + + fn finish_with_root(self, root: &mut StBuilder) { + if !self.rows.is_empty() { + // Push all the indexes. + root.headings.push((self.spacing, self.start_index)); + root.headings.extend(self.nested_headings); + root.spanned_rows.extend(self.spanned_rows); + + // Push all the rows. + root.push_record(vec![self.heading]); + for row in self.rows { + root.push_record(row); + } + } + } + + fn finish_with_parent(self, parent: &mut StSectionBuilder) { + if !self.rows.is_empty() { + // Push all the indexes. + parent.nested_headings.push((self.spacing, self.start_index)); + parent.nested_headings.extend(self.nested_headings); + parent.spanned_rows.extend(self.spanned_rows); + + // Push all the rows. + parent.rows.push(vec![self.heading]); + parent.rows.extend(self.rows); + } + } +} + +/// Spacing for sections. +#[derive(Copy, Clone, Debug)] +pub(crate) enum SectionSpacing { + /// Always add a line of spacing above the section heading. + /// + /// There will always be one row of padding above the heading. + Always, + + /// Only add a line of spacing if this isn't the first heading in the + /// series. + IfNotFirst, + + /// Do not add a line of spacing above the heading. + Never, +} + +impl SectionSpacing { + fn resolve(self, is_empty: bool) -> HeadingSpacing { + match (self, is_empty) { + (SectionSpacing::Always, _) => HeadingSpacing::Yes, + (SectionSpacing::IfNotFirst, true) => HeadingSpacing::No, + (SectionSpacing::IfNotFirst, false) => HeadingSpacing::Yes, + (SectionSpacing::Never, _) => HeadingSpacing::No, + } + } +} + +/// Spacing for headings -- a resolved form of [`SectionSpacing`]. +#[derive(Copy, Clone, Debug)] +enum HeadingSpacing { + /// Add a line of padding above the heading. + Yes, + + /// Do not add a line of padding above the heading. + No, +} + +fn apply_normal_row_settings(table: &mut Table, special_rows: HashSet) { + for row in 0..table.count_rows() { + if special_rows.contains(&row) { + continue; + } + + table.with( + Modify::new((row, 0)) + // Adjust the first column to span 2 (the extra indent). + .with(ColumnSpan::new(2)), + ); + } +} + +fn apply_header_row_settings(table: &mut Table, header_rows: &[usize]) { + for &hr in header_rows { + table.with( + Modify::new(Rows::single(hr).intersect(Columns::new(1..))) + // Column 1 onwards (everything after the initial indent) have + // borders. + .with(Border::new( + // top/bottom + Some('-'), + Some('-'), + // no left/right + None, + None, + // corners + Some('-'), + Some('-'), + Some('-'), + Some('-'), + )), + ); + } +} + +fn apply_heading_settings( + table: &mut Table, + headings: &[(HeadingSpacing, usize)], +) { + for &(kind, h) in headings { + let padding = match kind { + HeadingSpacing::Yes => Padding::new(0, 0, 1, 0), + HeadingSpacing::No => Padding::new(0, 0, 0, 0), + }; + + table.with( + Modify::new((h, 0)) + // Adjust each heading row to span the whole row. + .with(ColumnSpan::max()) + .with(padding), + ); + } +} + +fn apply_spanned_row_settings(table: &mut Table, spanned_rows: &[usize]) { + for &sr in spanned_rows { + table.with( + Modify::new((sr, 0)) + // Adjust each spanned row to span the whole row. + .with(ColumnSpan::max()), + ); + } +} From 4eb81539ddad077116afe04124b96da9876d57b3 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Thu, 28 Mar 2024 19:40:10 -0700 Subject: [PATCH 012/334] Expose Sled Agent API for "control plane disk management", use it (#5172) # Overview ## Virtual Environment Changes - Acting on Disks, not Zpools - Previously, sled agent could operate on "user-supplied zpools", which were created by `./tools/virtual_hardware.sh` - Now, in a world where Nexus has more control over zpool allocation, the configuration can supply "virtual devices" instead of "zpools", to give RSS/Nexus control over "when zpools actually get placed on these devices". - Impact: - `sled-agent/src/config.rs` - `smf/sled-agent/non-gimlet/config.toml` - `tools/virtual_hardware.sh` ## Sled Agent Changes - HTTP API - The Sled Agent exposes an API to "set" and "get" the "control plane physical disks" specified by Nexus. The set of control plane physical disks (usable U.2s) are stored into a ledger on the M.2s (as `omicron-physical-disks.json`). The set of control plane physical disks also determines "which disks are available to the rest of the sled agent". - StorageManager - **Before**: When physical U.2 disks are detected by the Sled Agent, they are "auto-formatted if empty", and we notify Nexus about them. This "upserts" them into the DB, so they are basically automatically adopted into the control plane. - **After**: As we've discussed on RFD 457, we want to get to a world where physical U.2 disks are **detected** by Sled Agent, but not **used** until RSS/Nexus explicitly tells the Sled Agent to "use this sled as part of the control plane". This set of "in-use control plane disks" is stored on a "ledger" file in the M.2. - **Transition**: On deployed systems, we need to boot up to Nexus, even though we don't have a ledger of control plane disks. Within the implementation of `StorageManager::key_manager_ready`, we implement a workaround: if we detect a system with no ledger, but with zpools, we'll use that set of zpools unconditionally until told otherwise. This is a short-term workaround to migrate existing systems, but can be removed when deployed racks reliably have ledgers for control plane disks. - StorageManagerTestHarness - In an effort to reduce "test fakes" and replace them with real storage, `StorageManagerTestHarness` provides testing utilities for spinning up vdevs, formatting them with zpools, and managing them. This helps us avoid a fair bit of bifurcation for "test-only synthetic disks" vs "real disks", though it does mean many of our tests in the sled-agent are now 'illumos-only'. ## RSS Changes - RSS is now responsible for provisioning "control plane disks and zpools" during initial bootstrapping - RSS informs Nexus about the allocation decisions it makes via the RSS handoff ## Nexus Changes - Nexus exposes a smaller API (no notification of "disk add/remove, zpools add/remove"). It receives a handoff from RSS, and will later be in charge of provisioning decisions based on inventory. - Dynamically adding/removing disks/zpools after RSS will be appearing in a subsequent PR. --------- Co-authored-by: Andrew J. Stone --- .github/buildomat/jobs/deploy.sh | 13 +- Cargo.lock | 5 + Cargo.toml | 1 + clients/sled-agent-client/src/lib.rs | 11 +- common/src/api/external/mod.rs | 1 + common/src/ledger.rs | 5 +- illumos-utils/Cargo.toml | 4 +- illumos-utils/src/zfs.rs | 61 +- illumos-utils/src/zpool.rs | 17 +- installinator/src/hardware.rs | 13 +- installinator/src/write.rs | 1 + key-manager/src/lib.rs | 2 +- nexus/db-model/src/physical_disk.rs | 15 +- nexus/db-queries/src/authz/api_resources.rs | 2 +- .../src/authz/policy_test/resources.rs | 6 +- nexus/db-queries/src/db/datastore/dataset.rs | 5 +- nexus/db-queries/src/db/datastore/mod.rs | 17 +- .../src/db/datastore/physical_disk.rs | 39 +- nexus/db-queries/src/db/datastore/rack.rs | 46 +- nexus/db-queries/src/db/datastore/zpool.rs | 21 +- nexus/db-queries/src/db/lookup.rs | 20 +- nexus/db-queries/tests/output/authz-roles.out | 2 +- nexus/inventory/src/examples.rs | 8 +- .../reconfigurator/execution/src/datasets.rs | 4 +- nexus/src/app/rack.rs | 51 +- nexus/src/app/sled.rs | 69 +- nexus/src/internal_api/http_entrypoints.rs | 81 +- nexus/src/lib.rs | 28 +- nexus/test-interface/src/lib.rs | 14 +- nexus/test-utils/src/lib.rs | 2 + nexus/test-utils/src/resource_helpers.rs | 109 +- nexus/tests/integration_tests/mod.rs | 1 - nexus/tests/integration_tests/sleds.rs | 46 +- nexus/tests/integration_tests/switches.rs | 45 +- nexus/tests/integration_tests/zpools.rs | 128 -- nexus/types/src/internal_api/params.rs | 39 +- nexus/types/src/inventory.rs | 2 +- openapi/nexus-internal.json | 198 +- openapi/sled-agent.json | 219 +++ schema/omicron-physical-disks.json | 74 + schema/rss-service-plan-v3.json | 848 +++++++++ sled-agent/Cargo.toml | 2 +- sled-agent/src/bootstrap/bootstore_setup.rs | 16 +- sled-agent/src/bootstrap/pre_server.rs | 4 - sled-agent/src/bootstrap/server.rs | 27 +- sled-agent/src/config.rs | 5 +- sled-agent/src/dump_setup.rs | 101 +- sled-agent/src/hardware_monitor.rs | 27 +- sled-agent/src/http_entrypoints.rs | 36 +- sled-agent/src/instance.rs | 245 +-- sled-agent/src/instance_manager.rs | 2 +- sled-agent/src/long_running_tasks.rs | 46 +- sled-agent/src/nexus.rs | 10 - sled-agent/src/params.rs | 5 + sled-agent/src/probe_manager.rs | 2 +- sled-agent/src/rack_setup/plan/service.rs | 154 +- sled-agent/src/rack_setup/plan/sled.rs | 4 +- sled-agent/src/rack_setup/service.rs | 277 ++- sled-agent/src/server.rs | 4 - sled-agent/src/services.rs | 163 +- sled-agent/src/sim/http_entrypoints.rs | 30 +- sled-agent/src/sim/server.rs | 26 +- sled-agent/src/sim/sled_agent.rs | 49 +- sled-agent/src/sim/storage.rs | 159 +- sled-agent/src/sled_agent.rs | 77 +- sled-agent/src/storage_monitor.rs | 344 +--- sled-agent/src/vmm_reservoir.rs | 3 +- sled-agent/src/zone_bundle.rs | 217 +-- sled-hardware/src/disk.rs | 224 ++- sled-hardware/src/illumos/partitions.rs | 32 +- sled-hardware/src/non_illumos/mod.rs | 1 + sled-storage/Cargo.toml | 8 +- sled-storage/src/config.rs | 39 + sled-storage/src/dataset.rs | 22 +- sled-storage/src/disk.rs | 261 ++- sled-storage/src/error.rs | 48 +- sled-storage/src/keyfile.rs | 3 +- sled-storage/src/lib.rs | 3 + sled-storage/src/manager.rs | 1675 ++++++++++------- sled-storage/src/manager_test_harness.rs | 393 ++++ sled-storage/src/resources.rs | 578 ++++-- smf/sled-agent/non-gimlet/config.toml | 40 +- tools/create_gimlet_virtual_hardware.sh | 2 +- tools/create_scrimlet_virtual_hardware.sh | 2 +- tools/create_virtual_hardware.sh | 2 +- tools/virtual_hardware.sh | 28 +- 86 files changed, 5037 insertions(+), 2632 deletions(-) delete mode 100644 nexus/tests/integration_tests/zpools.rs create mode 100644 schema/omicron-physical-disks.json create mode 100644 schema/rss-service-plan-v3.json create mode 100644 sled-storage/src/config.rs create mode 100644 sled-storage/src/manager_test_harness.rs diff --git a/.github/buildomat/jobs/deploy.sh b/.github/buildomat/jobs/deploy.sh index 6574ac839c..9f0629d4c1 100755 --- a/.github/buildomat/jobs/deploy.sh +++ b/.github/buildomat/jobs/deploy.sh @@ -205,7 +205,7 @@ PXA_END="$EXTRA_IP_END" export GATEWAY_IP GATEWAY_MAC PXA_START PXA_END pfexec zpool create -f scratch c1t1d0 c2t1d0 -ZPOOL_VDEV_DIR=/scratch ptime -m pfexec ./tools/create_virtual_hardware.sh +VDEV_DIR=/scratch ptime -m pfexec ./tools/create_virtual_hardware.sh # # Generate a self-signed certificate to use as the initial TLS certificate for @@ -214,7 +214,12 @@ ZPOOL_VDEV_DIR=/scratch ptime -m pfexec ./tools/create_virtual_hardware.sh # real system, the certificate would come from the customer during initial rack # setup on the technician port. # -tar xf out/omicron-sled-agent.tar pkg/config-rss.toml +tar xf out/omicron-sled-agent.tar pkg/config-rss.toml pkg/config.toml + +# Update the vdevs to point to where we've created them +sed -E -i~ "s/(m2|u2)(.*\.vdev)/\/scratch\/\1\2/g" pkg/config.toml +diff -u pkg/config.toml{~,} || true + SILO_NAME="$(sed -n 's/silo_name = "\(.*\)"/\1/p' pkg/config-rss.toml)" EXTERNAL_DNS_DOMAIN="$(sed -n 's/external_dns_zone_name = "\(.*\)"/\1/p' pkg/config-rss.toml)" @@ -241,8 +246,8 @@ addresses = \\[\"$UPLINK_IP/24\"\\] " pkg/config-rss.toml diff -u pkg/config-rss.toml{~,} || true -tar rvf out/omicron-sled-agent.tar pkg/config-rss.toml -rm -f pkg/config-rss.toml* +tar rvf out/omicron-sled-agent.tar pkg/config-rss.toml pkg/config.toml +rm -f pkg/config-rss.toml* pkg/config.toml* # # By default, OpenSSL creates self-signed certificates with "CA:true". The TLS diff --git a/Cargo.lock b/Cargo.lock index e1d684da52..d1df69b608 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3565,6 +3565,7 @@ dependencies = [ "tokio", "toml 0.8.10", "uuid 1.7.0", + "whoami", "zone 0.3.0", ] @@ -8738,11 +8739,15 @@ dependencies = [ name = "sled-storage" version = "0.1.0" dependencies = [ + "anyhow", "async-trait", "camino", "camino-tempfile", "cfg-if", + "debug-ignore", "derive_more", + "expectorate", + "futures", "glob", "illumos-utils", "key-manager", diff --git a/Cargo.toml b/Cargo.toml index 0d91aa076b..a384c8bed6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -444,6 +444,7 @@ update-engine = { path = "update-engine" } usdt = "0.5.0" uuid = { version = "1.7.0", features = ["serde", "v4"] } walkdir = "2.4" +whoami = "1.5" wicket = { path = "wicket" } wicket-common = { path = "wicket-common" } wicketd-client = { path = "clients/wicketd-client" } diff --git a/clients/sled-agent-client/src/lib.rs b/clients/sled-agent-client/src/lib.rs index 2901226d16..d500bdca3a 100644 --- a/clients/sled-agent-client/src/lib.rs +++ b/clients/sled-agent-client/src/lib.rs @@ -35,6 +35,7 @@ progenitor::generate_api!( // replace directives below? replace = { ByteCount = omicron_common::api::external::ByteCount, + DiskIdentity = omicron_common::disk::DiskIdentity, Generation = omicron_common::api::external::Generation, MacAddr = omicron_common::api::external::MacAddr, Name = omicron_common::api::external::Name, @@ -230,16 +231,6 @@ impl omicron_common::api::external::ClientError for types::Error { } } -impl From for omicron_common::disk::DiskIdentity { - fn from(identity: types::DiskIdentity) -> Self { - Self { - vendor: identity.vendor, - serial: identity.serial, - model: identity.model, - } - } -} - impl From for types::InstanceRuntimeState { diff --git a/common/src/api/external/mod.rs b/common/src/api/external/mod.rs index 324231f469..4eecd74a04 100644 --- a/common/src/api/external/mod.rs +++ b/common/src/api/external/mod.rs @@ -881,6 +881,7 @@ pub enum ResourceType { ServiceNetworkInterface, Sled, SledInstance, + SledLedger, Switch, SagaDbg, Snapshot, diff --git a/common/src/ledger.rs b/common/src/ledger.rs index 71d03fa8ee..ed5f0b57cf 100644 --- a/common/src/ledger.rs +++ b/common/src/ledger.rs @@ -7,7 +7,7 @@ use async_trait::async_trait; use camino::{Utf8Path, Utf8PathBuf}; use serde::{de::DeserializeOwned, Serialize}; -use slog::{debug, info, warn, Logger}; +use slog::{debug, error, info, warn, Logger}; #[derive(thiserror::Error, Debug)] pub enum Error { @@ -127,7 +127,7 @@ impl Ledger { let mut one_successful_write = false; for path in self.paths.iter() { if let Err(e) = self.atomic_write(&path).await { - warn!(self.log, "Failed to write to {}: {e}", path); + warn!(self.log, "Failed to write ledger"; "path" => ?path, "err" => ?e); failed_paths.push((path.to_path_buf(), e)); } else { one_successful_write = true; @@ -135,6 +135,7 @@ impl Ledger { } if !one_successful_write { + error!(self.log, "No successful writes to ledger"); return Err(Error::FailedToWrite { failed_paths }); } Ok(()) diff --git a/illumos-utils/Cargo.toml b/illumos-utils/Cargo.toml index e4a99095fd..39b24f7ccd 100644 --- a/illumos-utils/Cargo.toml +++ b/illumos-utils/Cargo.toml @@ -28,6 +28,7 @@ smf.workspace = true thiserror.workspace = true tokio.workspace = true uuid.workspace = true +whoami.workspace = true zone.workspace = true # only enabled via the `testing` feature @@ -46,6 +47,3 @@ toml.workspace = true [features] # Enable to generate MockZones testing = ["mockall"] -# Useful for tests that want real functionality and ability to run without -# pfexec -tmp_keypath = [] diff --git a/illumos-utils/src/zfs.rs b/illumos-utils/src/zfs.rs index c111955761..3dbf018ecc 100644 --- a/illumos-utils/src/zfs.rs +++ b/illumos-utils/src/zfs.rs @@ -5,7 +5,7 @@ //! Utilities for poking at ZFS. use crate::{execute, PFEXEC}; -use camino::Utf8PathBuf; +use camino::{Utf8Path, Utf8PathBuf}; use omicron_common::disk::DiskIdentity; use std::fmt; @@ -28,8 +28,6 @@ pub const ZFS: &str = "/usr/sbin/zfs"; /// the keys and recreate the files on demand when creating and mounting /// encrypted filesystems. We then zero them and unlink them. pub const KEYPATH_ROOT: &str = "/var/run/oxide/"; -// Use /tmp so we don't have to worry about running tests with pfexec -pub const TEST_KEYPATH_ROOT: &str = "/tmp"; /// Error returned by [`Zfs::list_datasets`]. #[derive(thiserror::Error, Debug)] @@ -168,27 +166,34 @@ impl fmt::Display for Keypath { } } -#[cfg(not(feature = "tmp_keypath"))] -impl From<&DiskIdentity> for Keypath { - fn from(id: &DiskIdentity) -> Self { - build_keypath(id, KEYPATH_ROOT) - } -} - -#[cfg(feature = "tmp_keypath")] -impl From<&DiskIdentity> for Keypath { - fn from(id: &DiskIdentity) -> Self { - build_keypath(id, TEST_KEYPATH_ROOT) +impl Keypath { + /// Constructs a Keypath for the specified disk within the supplied root + /// directory. + /// + /// By supplying "root", tests can override the location where these paths + /// are stored to non-global locations. + pub fn new>(id: &DiskIdentity, root: &P) -> Keypath { + let keypath_root = Utf8PathBuf::from(KEYPATH_ROOT); + let mut keypath = keypath_root.as_path(); + let keypath_directory = loop { + match keypath.strip_prefix("/") { + Ok(stripped) => keypath = stripped, + Err(_) => break root.as_ref().join(keypath), + } + }; + std::fs::create_dir_all(&keypath_directory) + .expect("Cannot ensure directory for keys"); + + let filename = format!( + "{}-{}-{}-zfs-aes-256-gcm.key", + id.vendor, id.serial, id.model + ); + let path: Utf8PathBuf = + [keypath_directory.as_str(), &filename].iter().collect(); + Keypath(path) } } -fn build_keypath(id: &DiskIdentity, root: &str) -> Keypath { - let filename = - format!("{}-{}-{}-zfs-aes-256-gcm.key", id.vendor, id.serial, id.model); - let path: Utf8PathBuf = [root, &filename].iter().collect(); - Keypath(path) -} - #[derive(Debug)] pub struct EncryptionDetails { pub keypath: Keypath, @@ -332,6 +337,20 @@ impl Zfs { err: err.into(), })?; + // We ensure that the currently running process has the ability to + // act on the underlying mountpoint. + if !zoned { + let mut command = std::process::Command::new(PFEXEC); + let user = whoami::username(); + let mount = format!("{mountpoint}"); + let cmd = command.args(["chown", "-R", &user, &mount]); + execute(cmd).map_err(|err| EnsureFilesystemError { + name: name.to_string(), + mountpoint: mountpoint.clone(), + err: err.into(), + })?; + } + if let Some(SizeDetails { quota, compression }) = size_details { // Apply any quota and compression mode. Self::apply_properties(name, &mountpoint, quota, compression)?; diff --git a/illumos-utils/src/zpool.rs b/illumos-utils/src/zpool.rs index f2c395e22b..27d7e0d700 100644 --- a/illumos-utils/src/zpool.rs +++ b/illumos-utils/src/zpool.rs @@ -12,10 +12,12 @@ use std::fmt; use std::str::FromStr; use uuid::Uuid; -const ZPOOL_EXTERNAL_PREFIX: &str = "oxp_"; -const ZPOOL_INTERNAL_PREFIX: &str = "oxi_"; +pub const ZPOOL_EXTERNAL_PREFIX: &str = "oxp_"; +pub const ZPOOL_INTERNAL_PREFIX: &str = "oxi_"; const ZPOOL: &str = "/usr/sbin/zpool"; +pub const ZPOOL_MOUNTPOINT_ROOT: &str = "/"; + #[derive(thiserror::Error, Debug, PartialEq, Eq)] #[error("Failed to parse output: {0}")] pub struct ParseError(String); @@ -192,7 +194,7 @@ impl Zpool { let mut cmd = std::process::Command::new(PFEXEC); cmd.env_clear(); cmd.env("LC_ALL", "C.UTF-8"); - cmd.arg(ZPOOL).arg("create"); + cmd.arg(ZPOOL).args(["create", "-o", "ashift=12"]); cmd.arg(&name.to_string()); cmd.arg(vdev); execute(&mut cmd).map_err(Error::from)?; @@ -374,9 +376,14 @@ impl ZpoolName { /// Returns a path to a dataset's mountpoint within the zpool. /// /// For example: oxp_(UUID) -> /pool/ext/(UUID)/(dataset) - pub fn dataset_mountpoint(&self, dataset: &str) -> Utf8PathBuf { + pub fn dataset_mountpoint( + &self, + root: &Utf8Path, + dataset: &str, + ) -> Utf8PathBuf { let mut path = Utf8PathBuf::new(); - path.push("/pool"); + path.push(root); + path.push("pool"); match self.kind { ZpoolKind::External => path.push("ext"), ZpoolKind::Internal => path.push("int"), diff --git a/installinator/src/hardware.rs b/installinator/src/hardware.rs index b037384cbe..90859e3754 100644 --- a/installinator/src/hardware.rs +++ b/installinator/src/hardware.rs @@ -9,6 +9,7 @@ use anyhow::Result; use sled_hardware::DiskVariant; use sled_hardware::HardwareManager; use sled_hardware::SledMode; +use sled_storage::config::MountConfig; use sled_storage::disk::Disk; use sled_storage::disk::RawDisk; use slog::info; @@ -49,9 +50,15 @@ impl Hardware { ); } DiskVariant::M2 => { - let disk = Disk::new(log, disk, None) - .await - .context("failed to instantiate Disk handle for M.2")?; + let disk = Disk::new( + log, + &MountConfig::default(), + disk, + None, + None, + ) + .await + .context("failed to instantiate Disk handle for M.2")?; m2_disks.push(disk); } } diff --git a/installinator/src/write.rs b/installinator/src/write.rs index 380595b4cd..c7710baff7 100644 --- a/installinator/src/write.rs +++ b/installinator/src/write.rs @@ -116,6 +116,7 @@ impl WriteDestination { let zpool_name = disk.zpool_name().clone(); let control_plane_dir = zpool_name.dataset_mountpoint( + illumos_utils::zpool::ZPOOL_MOUNTPOINT_ROOT.into(), sled_storage::dataset::INSTALL_DATASET, ); diff --git a/key-manager/src/lib.rs b/key-manager/src/lib.rs index 7ca3cfa3bb..13dd9543a8 100644 --- a/key-manager/src/lib.rs +++ b/key-manager/src/lib.rs @@ -102,7 +102,7 @@ enum StorageKeyRequest { /// the sled-agent starts. The `HardwareMonitor` gets the StorageKeyRequester /// from the bootstrap agent. If this changes, we should remove the `Clone` to /// limit who has access to the storage keys. -#[derive(Clone)] +#[derive(Debug, Clone)] pub struct StorageKeyRequester { tx: mpsc::Sender, } diff --git a/nexus/db-model/src/physical_disk.rs b/nexus/db-model/src/physical_disk.rs index 3628f7077f..3a011d0c72 100644 --- a/nexus/db-model/src/physical_disk.rs +++ b/nexus/db-model/src/physical_disk.rs @@ -29,6 +29,7 @@ pub struct PhysicalDisk { impl PhysicalDisk { pub fn new( + id: Uuid, vendor: String, serial: String, model: String, @@ -36,7 +37,7 @@ impl PhysicalDisk { sled_id: Uuid, ) -> Self { Self { - identity: PhysicalDiskIdentity::new(Uuid::new_v4()), + identity: PhysicalDiskIdentity::new(id), time_deleted: None, rcgen: Generation::new(), vendor, @@ -47,20 +48,10 @@ impl PhysicalDisk { } } - pub fn uuid(&self) -> Uuid { + pub fn id(&self) -> Uuid { self.identity.id } - // This is slightly gross, but: - // the `authz_resource` macro really expects that the "primary_key" - // for an object can be acquired by "id()". - // - // The PhysicalDisk object does actually have a separate convenience - // UUID, but may be looked by up vendor/serial/model too. - pub fn id(&self) -> (String, String, String) { - (self.vendor.clone(), self.serial.clone(), self.model.clone()) - } - pub fn time_deleted(&self) -> Option> { self.time_deleted } diff --git a/nexus/db-queries/src/authz/api_resources.rs b/nexus/db-queries/src/authz/api_resources.rs index 70bc9ab2eb..69b883a8cf 100644 --- a/nexus/db-queries/src/authz/api_resources.rs +++ b/nexus/db-queries/src/authz/api_resources.rs @@ -1060,7 +1060,7 @@ authz_resource! { authz_resource! { name = "PhysicalDisk", parent = "Fleet", - primary_key = (String, String, String), + primary_key = Uuid, roles_allowed = false, polar_snippet = FleetChild, } diff --git a/nexus/db-queries/src/authz/policy_test/resources.rs b/nexus/db-queries/src/authz/policy_test/resources.rs index 96cefb3db4..bc30e77fac 100644 --- a/nexus/db-queries/src/authz/policy_test/resources.rs +++ b/nexus/db-queries/src/authz/policy_test/resources.rs @@ -102,10 +102,12 @@ pub async fn make_resources( make_services(&mut builder).await; + let physical_disk_id = + "c9f923f6-caf3-4c83-96f9-8ffe8c627dd2".parse().unwrap(); builder.new_resource(authz::PhysicalDisk::new( authz::FLEET, - ("vendor".to_string(), "serial".to_string(), "model".to_string()), - LookupType::ByCompositeId("vendor-serial-model".to_string()), + physical_disk_id, + LookupType::ById(physical_disk_id), )); let device_user_code = String::from("a-device-user-code"); diff --git a/nexus/db-queries/src/db/datastore/dataset.rs b/nexus/db-queries/src/db/datastore/dataset.rs index 292f13354f..bfc4d61926 100644 --- a/nexus/db-queries/src/db/datastore/dataset.rs +++ b/nexus/db-queries/src/db/datastore/dataset.rs @@ -230,7 +230,10 @@ mod test { // Create a fake zpool that backs our fake datasets. let zpool_id = Uuid::new_v4(); let zpool = Zpool::new(zpool_id, sled_id, Uuid::new_v4()); - datastore.zpool_upsert(zpool).await.expect("failed to upsert zpool"); + datastore + .zpool_upsert(opctx, zpool) + .await + .expect("failed to upsert zpool"); // Inserting a new dataset should succeed. let dataset1 = datastore diff --git a/nexus/db-queries/src/db/datastore/mod.rs b/nexus/db-queries/src/db/datastore/mod.rs index 13d6bfcc8d..a6ae108376 100644 --- a/nexus/db-queries/src/db/datastore/mod.rs +++ b/nexus/db-queries/src/db/datastore/mod.rs @@ -640,6 +640,7 @@ mod test { kind: PhysicalDiskKind, ) -> Uuid { let physical_disk = PhysicalDisk::new( + Uuid::new_v4(), TEST_VENDOR.into(), TEST_SERIAL.into(), TEST_MODEL.into(), @@ -650,17 +651,19 @@ mod test { .physical_disk_upsert(opctx, physical_disk.clone()) .await .expect("Failed to upsert physical disk"); - physical_disk.uuid() + physical_disk.id() } // Creates a test zpool, returns its UUID. async fn create_test_zpool( datastore: &DataStore, + opctx: &OpContext, sled_id: Uuid, physical_disk_id: Uuid, ) -> Uuid { let zpool_id = create_test_zpool_not_in_inventory( datastore, + opctx, sled_id, physical_disk_id, ) @@ -676,12 +679,13 @@ mod test { // However, this helper doesn't add the zpool to the inventory just yet. async fn create_test_zpool_not_in_inventory( datastore: &DataStore, + opctx: &OpContext, sled_id: Uuid, physical_disk_id: Uuid, ) -> Uuid { let zpool_id = Uuid::new_v4(); let zpool = Zpool::new(zpool_id, sled_id, physical_disk_id); - datastore.zpool_upsert(zpool).await.unwrap(); + datastore.zpool_upsert(opctx, zpool).await.unwrap(); zpool_id } @@ -856,6 +860,7 @@ mod test { .then(|disk| { let pool_id_future = create_test_zpool( &datastore, + &opctx, disk.sled_id, disk.disk_id, ); @@ -1232,6 +1237,7 @@ mod test { .then(|_| { create_test_zpool_not_in_inventory( &datastore, + &opctx, sled_id, physical_disk_id, ) @@ -1327,7 +1333,12 @@ mod test { let zpool_ids: Vec = stream::iter(0..REGION_REDUNDANCY_THRESHOLD - 1) .then(|_| { - create_test_zpool(&datastore, sled_id, physical_disk_id) + create_test_zpool( + &datastore, + &opctx, + sled_id, + physical_disk_id, + ) }) .collect() .await; diff --git a/nexus/db-queries/src/db/datastore/physical_disk.rs b/nexus/db-queries/src/db/datastore/physical_disk.rs index 81fc14d1d7..b977c4dffe 100644 --- a/nexus/db-queries/src/db/datastore/physical_disk.rs +++ b/nexus/db-queries/src/db/datastore/physical_disk.rs @@ -42,6 +42,15 @@ impl DataStore { &self, opctx: &OpContext, disk: PhysicalDisk, + ) -> CreateResult { + let conn = &*self.pool_connection_authorized(&opctx).await?; + Self::physical_disk_upsert_on_connection(&conn, opctx, disk).await + } + + pub async fn physical_disk_upsert_on_connection( + conn: &async_bb8_diesel::Connection, + opctx: &OpContext, + disk: PhysicalDisk, ) -> CreateResult { opctx.authorize(authz::Action::Read, &authz::FLEET).await?; use db::schema::physical_disk::dsl; @@ -60,9 +69,7 @@ impl DataStore { dsl::time_modified.eq(now), )), ) - .insert_and_get_result_async( - &*self.pool_connection_authorized(&opctx).await?, - ) + .insert_and_get_result_async(conn) .await .map_err(|e| match e { AsyncInsertError::CollectionNotFound => Error::ObjectNotFound { @@ -203,6 +210,7 @@ mod test { // Insert a disk let disk = PhysicalDisk::new( + Uuid::new_v4(), String::from("Oxide"), String::from("123"), String::from("FakeDisk"), @@ -213,7 +221,7 @@ mod test { .physical_disk_upsert(&opctx, disk.clone()) .await .expect("Failed first attempt at upserting disk"); - assert_eq!(disk.uuid(), first_observed_disk.uuid()); + assert_eq!(disk.id(), first_observed_disk.id()); assert_disks_equal_ignore_uuid(&disk, &first_observed_disk); // Observe the inserted disk @@ -223,11 +231,12 @@ mod test { .await .expect("Failed to list physical disks"); assert_eq!(disks.len(), 1); - assert_eq!(disk.uuid(), disks[0].uuid()); + assert_eq!(disk.id(), disks[0].id()); assert_disks_equal_ignore_uuid(&disk, &disks[0]); // Insert the same disk, with a different UUID primary key let disk_again = PhysicalDisk::new( + Uuid::new_v4(), String::from("Oxide"), String::from("123"), String::from("FakeDisk"), @@ -240,8 +249,8 @@ mod test { .expect("Failed second upsert of physical disk"); // This check is pretty important - note that we return the original // UUID, not the new one. - assert_ne!(disk_again.uuid(), second_observed_disk.uuid()); - assert_eq!(disk_again.id(), second_observed_disk.id()); + assert_eq!(disk.id(), second_observed_disk.id()); + assert_ne!(disk_again.id(), second_observed_disk.id()); assert_disks_equal_ignore_uuid(&disk_again, &second_observed_disk); assert!( first_observed_disk.time_modified() @@ -255,8 +264,8 @@ mod test { // We'll use the old primary key assert_eq!(disks.len(), 1); - assert_eq!(disk.uuid(), disks[0].uuid()); - assert_ne!(disk_again.uuid(), disks[0].uuid()); + assert_eq!(disk.id(), disks[0].id()); + assert_ne!(disk_again.id(), disks[0].id()); assert_disks_equal_ignore_uuid(&disk, &disks[0]); assert_disks_equal_ignore_uuid(&disk_again, &disks[0]); @@ -276,6 +285,7 @@ mod test { // Insert a disk let disk = PhysicalDisk::new( + Uuid::new_v4(), String::from("Oxide"), String::from("123"), String::from("FakeDisk"), @@ -286,14 +296,14 @@ mod test { .physical_disk_upsert(&opctx, disk.clone()) .await .expect("Failed first attempt at upserting disk"); - assert_eq!(disk.uuid(), first_observed_disk.uuid()); + assert_eq!(disk.id(), first_observed_disk.id()); // Insert a disk with an identical UUID let second_observed_disk = datastore .physical_disk_upsert(&opctx, disk.clone()) .await .expect("Should have succeeded upserting disk"); - assert_eq!(disk.uuid(), second_observed_disk.uuid()); + assert_eq!(disk.id(), second_observed_disk.id()); assert!( first_observed_disk.time_modified() <= second_observed_disk.time_modified() @@ -326,6 +336,7 @@ mod test { // Insert a disk let disk = PhysicalDisk::new( + Uuid::new_v4(), String::from("Oxide"), String::from("123"), String::from("FakeDisk"), @@ -339,6 +350,7 @@ mod test { // Insert a second disk let disk = PhysicalDisk::new( + Uuid::new_v4(), String::from("Noxide"), String::from("456"), String::from("UnrealDisk"), @@ -371,6 +383,7 @@ mod test { // Insert a disk let disk = PhysicalDisk::new( + Uuid::new_v4(), String::from("Oxide"), String::from("123"), String::from("FakeDisk"), @@ -439,6 +452,7 @@ mod test { // Insert a disk let disk = PhysicalDisk::new( + Uuid::new_v4(), String::from("Oxide"), String::from("123"), String::from("FakeDisk"), @@ -485,6 +499,7 @@ mod test { // "Report the disk" from the second sled let disk = PhysicalDisk::new( + Uuid::new_v4(), String::from("Oxide"), String::from("123"), String::from("FakeDisk"), @@ -530,6 +545,7 @@ mod test { // Insert a disk let disk = PhysicalDisk::new( + Uuid::new_v4(), String::from("Oxide"), String::from("123"), String::from("FakeDisk"), @@ -554,6 +570,7 @@ mod test { // "Report the disk" from the second sled let disk = PhysicalDisk::new( + Uuid::new_v4(), String::from("Oxide"), String::from("123"), String::from("FakeDisk"), diff --git a/nexus/db-queries/src/db/datastore/rack.rs b/nexus/db-queries/src/db/datastore/rack.rs index e753a0cf09..09f635e0f3 100644 --- a/nexus/db-queries/src/db/datastore/rack.rs +++ b/nexus/db-queries/src/db/datastore/rack.rs @@ -23,6 +23,7 @@ use crate::db::fixed_data::vpc_subnet::NTP_VPC_SUBNET; use crate::db::identity::Asset; use crate::db::model::Dataset; use crate::db::model::IncompleteExternalIp; +use crate::db::model::PhysicalDisk; use crate::db::model::Rack; use crate::db::model::Zpool; use crate::db::pagination::paginated; @@ -70,6 +71,8 @@ pub struct RackInit { pub rack_subnet: IpNetwork, pub blueprint: Blueprint, pub services: Vec, + pub physical_disks: Vec, + pub zpools: Vec, pub datasets: Vec, pub service_ip_pool_ranges: Vec, pub internal_dns: InitialDnsGroup, @@ -90,6 +93,8 @@ enum RackInitError { BlueprintTargetSet(Error), ServiceInsert(Error), DatasetInsert { err: AsyncInsertError, zpool_id: Uuid }, + PhysicalDiskInsert(Error), + ZpoolInsert(Error), RackUpdate { err: DieselError, rack_id: Uuid }, DnsSerialization(Error), Silo(Error), @@ -126,6 +131,8 @@ impl From for Error { public_error_from_diesel(e, ErrorHandler::Server) } }, + RackInitError::PhysicalDiskInsert(err) => err, + RackInitError::ZpoolInsert(err) => err, RackInitError::ServiceInsert(err) => Error::internal_error( &format!("failed to insert Service record: {:#}", err), ), @@ -610,6 +617,8 @@ impl DataStore { let rack_id = rack_init.rack_id; let blueprint = rack_init.blueprint; let services = rack_init.services; + let physical_disks = rack_init.physical_disks; + let zpools = rack_init.zpools; let datasets = rack_init.datasets; let service_ip_pool_ranges = rack_init.service_ip_pool_ranges; @@ -640,7 +649,14 @@ impl DataStore { return Ok::<_, DieselError>(rack); } - // Otherwise, insert blueprint and datasets. + // Otherwise, insert: + // - Services + // - PhysicalDisks + // - Zpools + // - Datasets + // - A blueprint + // + // Which RSS has already allocated during bootstrapping. // Set up the IP pool for internal services. for range in service_ip_pool_ranges { @@ -713,12 +729,38 @@ impl DataStore { ) .await .map_err(|e| { + error!(log, "Failed to upsert physical disk"; "err" => ?e); err.set(e).unwrap(); DieselError::RollbackTransaction })?; } info!(log, "Inserted services"); + for physical_disk in physical_disks { + Self::physical_disk_upsert_on_connection(&conn, &opctx, physical_disk) + .await + .map_err(|e| { + error!(log, "Failed to upsert physical disk"; "err" => #%e); + err.set(RackInitError::PhysicalDiskInsert(e)) + .unwrap(); + DieselError::RollbackTransaction + })?; + } + + info!(log, "Inserted physical disks"); + + for zpool in zpools { + Self::zpool_upsert_on_connection(&conn, &opctx, zpool).await.map_err( + |e| { + error!(log, "Failed to upsert zpool"; "err" => #%e); + err.set(RackInitError::ZpoolInsert(e)).unwrap(); + DieselError::RollbackTransaction + }, + )?; + } + + info!(log, "Inserted zpools"); + for dataset in datasets { use db::schema::dataset::dsl; let zpool_id = dataset.pool_id; @@ -954,6 +996,8 @@ mod test { comment: "test suite".to_string(), }, services: vec![], + physical_disks: vec![], + zpools: vec![], datasets: vec![], service_ip_pool_ranges: vec![], internal_dns: InitialDnsGroup::new( diff --git a/nexus/db-queries/src/db/datastore/zpool.rs b/nexus/db-queries/src/db/datastore/zpool.rs index b894d5c509..0ab6bcf3af 100644 --- a/nexus/db-queries/src/db/datastore/zpool.rs +++ b/nexus/db-queries/src/db/datastore/zpool.rs @@ -32,8 +32,23 @@ use omicron_common::api::external::ResourceType; use uuid::Uuid; impl DataStore { + pub async fn zpool_upsert( + &self, + opctx: &OpContext, + zpool: Zpool, + ) -> CreateResult { + let conn = &*self.pool_connection_authorized(&opctx).await?; + Self::zpool_upsert_on_connection(&conn, opctx, zpool).await + } + /// Stores a new zpool in the database. - pub async fn zpool_upsert(&self, zpool: Zpool) -> CreateResult { + pub async fn zpool_upsert_on_connection( + conn: &async_bb8_diesel::Connection, + opctx: &OpContext, + zpool: Zpool, + ) -> CreateResult { + opctx.authorize(authz::Action::Modify, &authz::FLEET).await?; + use db::schema::zpool::dsl; let sled_id = zpool.sled_id; @@ -48,9 +63,7 @@ impl DataStore { dsl::sled_id.eq(excluded(dsl::sled_id)), )), ) - .insert_and_get_result_async( - &*self.pool_connection_unauthorized().await?, - ) + .insert_and_get_result_async(conn) .await .map_err(|e| match e { AsyncInsertError::CollectionNotFound => Error::ObjectNotFound { diff --git a/nexus/db-queries/src/db/lookup.rs b/nexus/db-queries/src/db/lookup.rs index 380c9db140..487a68b517 100644 --- a/nexus/db-queries/src/db/lookup.rs +++ b/nexus/db-queries/src/db/lookup.rs @@ -364,18 +364,8 @@ impl<'a> LookupPath<'a> { } /// Select a resource of type PhysicalDisk, identified by its id - pub fn physical_disk( - self, - vendor: &str, - serial: &str, - model: &str, - ) -> PhysicalDisk<'a> { - PhysicalDisk::PrimaryKey( - Root { lookup_root: self }, - vendor.to_string(), - serial.to_string(), - model.to_string(), - ) + pub fn physical_disk(self, id: Uuid) -> PhysicalDisk<'a> { + PhysicalDisk::PrimaryKey(Root { lookup_root: self }, id) } pub fn silo_image_id(self, id: Uuid) -> SiloImage<'a> { @@ -836,11 +826,7 @@ lookup_resource! { children = [], lookup_by_name = false, soft_deletes = true, - primary_key_columns = [ - { column_name = "vendor", rust_type = String }, - { column_name = "serial", rust_type = String }, - { column_name = "model", rust_type = String } - ] + primary_key_columns = [ { column_name = "id", rust_type = Uuid } ] } lookup_resource! { diff --git a/nexus/db-queries/tests/output/authz-roles.out b/nexus/db-queries/tests/output/authz-roles.out index ee55d775f0..0482cdfd2a 100644 --- a/nexus/db-queries/tests/output/authz-roles.out +++ b/nexus/db-queries/tests/output/authz-roles.out @@ -894,7 +894,7 @@ resource: Service id "7f7bb301-5dc9-41f1-ab29-d369f4835079" silo1-proj1-viewer ✘ ✘ ✘ ✘ ✘ ✘ ✘ ✘ unauthenticated ! ! ! ! ! ! ! ! -resource: PhysicalDisk id "vendor-serial-model" +resource: PhysicalDisk id "c9f923f6-caf3-4c83-96f9-8ffe8c627dd2" USER Q R LC RP M MP CC D fleet-admin ✘ ✔ ✔ ✔ ✔ ✔ ✔ ✔ diff --git a/nexus/inventory/src/examples.rs b/nexus/inventory/src/examples.rs index 5cc6b687d4..8af81d957d 100644 --- a/nexus/inventory/src/examples.rs +++ b/nexus/inventory/src/examples.rs @@ -276,7 +276,7 @@ pub fn representative() -> Representative { let disks = vec![ // Let's say we have one manufacturer for our M.2... sled_agent_client::types::InventoryDisk { - identity: sled_agent_client::types::DiskIdentity { + identity: omicron_common::disk::DiskIdentity { vendor: "macrohard".to_string(), model: "box".to_string(), serial: "XXIV".to_string(), @@ -286,7 +286,7 @@ pub fn representative() -> Representative { }, // ... and a couple different vendors for our U.2s sled_agent_client::types::InventoryDisk { - identity: sled_agent_client::types::DiskIdentity { + identity: omicron_common::disk::DiskIdentity { vendor: "memetendo".to_string(), model: "swatch".to_string(), serial: "0001".to_string(), @@ -295,7 +295,7 @@ pub fn representative() -> Representative { slot: 1, }, sled_agent_client::types::InventoryDisk { - identity: sled_agent_client::types::DiskIdentity { + identity: omicron_common::disk::DiskIdentity { vendor: "memetendo".to_string(), model: "swatch".to_string(), serial: "0002".to_string(), @@ -304,7 +304,7 @@ pub fn representative() -> Representative { slot: 2, }, sled_agent_client::types::InventoryDisk { - identity: sled_agent_client::types::DiskIdentity { + identity: omicron_common::disk::DiskIdentity { vendor: "tony".to_string(), model: "craystation".to_string(), serial: "5".to_string(), diff --git a/nexus/reconfigurator/execution/src/datasets.rs b/nexus/reconfigurator/execution/src/datasets.rs index 1d08f3b294..361e23b7e6 100644 --- a/nexus/reconfigurator/execution/src/datasets.rs +++ b/nexus/reconfigurator/execution/src/datasets.rs @@ -202,7 +202,7 @@ mod tests { Uuid::new_v4(), // physical_disk_id ); datastore - .zpool_upsert(zpool) + .zpool_upsert(opctx, zpool) .await .expect("failed to upsert zpool"); } @@ -271,7 +271,7 @@ mod tests { Uuid::new_v4(), // physical_disk_id ); datastore - .zpool_upsert(zpool) + .zpool_upsert(opctx, zpool) .await .expect("failed to upsert zpool"); } diff --git a/nexus/src/app/rack.rs b/nexus/src/app/rack.rs index 4a4a61142e..5b85acb929 100644 --- a/nexus/src/app/rack.rs +++ b/nexus/src/app/rack.rs @@ -87,7 +87,7 @@ impl super::Nexus { Ok(db_rack) } - /// Marks the rack as initialized with a set of services. + /// Marks the rack as initialized with information supplied by RSS. /// /// This function is a no-op if the rack has already been initialized. pub(crate) async fn rack_initialize( @@ -96,8 +96,37 @@ impl super::Nexus { rack_id: Uuid, request: RackInitializationRequest, ) -> Result<(), Error> { + let log = &opctx.log; + opctx.authorize(authz::Action::Modify, &authz::FLEET).await?; + let physical_disks: Vec<_> = request + .physical_disks + .into_iter() + .map(|disk| { + db::model::PhysicalDisk::new( + disk.id, + disk.vendor, + disk.serial, + disk.model, + disk.variant.into(), + disk.sled_id, + ) + }) + .collect(); + + let zpools: Vec<_> = request + .zpools + .into_iter() + .map(|pool| { + db::model::Zpool::new( + pool.id, + pool.sled_id, + pool.physical_disk_id, + ) + }) + .collect(); + let datasets: Vec<_> = request .datasets .into_iter() @@ -224,10 +253,7 @@ impl super::Nexus { match request.external_port_count { ExternalPortDiscovery::Auto(switch_mgmt_addrs) => { use dpd_client::Client as DpdClient; - info!( - self.log, - "Using automatic external switchport discovery" - ); + info!(log, "Using automatic external switchport discovery"); for (switch, addr) in switch_mgmt_addrs { let dpd_client = DpdClient::new( @@ -238,7 +264,7 @@ impl super::Nexus { ), dpd_client::ClientState { tag: "nexus".to_string(), - log: self.log.new(o!("component" => "DpdClient")), + log: log.new(o!("component" => "DpdClient")), }, ); @@ -247,10 +273,7 @@ impl super::Nexus { Error::internal_error(&format!("encountered error while discovering ports for {switch:#?}: {e}")) })?; - info!( - self.log, - "discovered ports for {switch}: {all_ports:#?}" - ); + info!(log, "discovered ports for {switch}: {all_ports:#?}"); let qsfp_ports: Vec = all_ports .iter() @@ -261,7 +284,7 @@ impl super::Nexus { .collect(); info!( - self.log, + log, "populating ports for {switch}: {qsfp_ports:#?}" ); @@ -276,7 +299,7 @@ impl super::Nexus { // TODO: #3602 Eliminate need for static port mappings for switch ports ExternalPortDiscovery::Static(port_mappings) => { info!( - self.log, + log, "Using static configuration for external switchports" ); for (switch, ports) in port_mappings { @@ -295,7 +318,7 @@ impl super::Nexus { // Currently calling some of the apis directly, but should we be using sagas // going forward via self.run_saga()? Note that self.create_runnable_saga and // self.execute_saga are currently not available within this scope. - info!(self.log, "Recording Rack Network Configuration"); + info!(log, "Recording Rack Network Configuration"); let address_lot_name = Name::from_str(INFRA_LOT).map_err(|e| { Error::internal_error(&format!( "unable to use `initial-infra` as `Name`: {e}" @@ -591,6 +614,8 @@ impl super::Nexus { rack_id, blueprint, services: request.services, + physical_disks, + zpools, datasets, service_ip_pool_ranges, internal_dns, diff --git a/nexus/src/app/sled.rs b/nexus/src/app/sled.rs index 06e50f2ecd..4bb4d6daef 100644 --- a/nexus/src/app/sled.rs +++ b/nexus/src/app/sled.rs @@ -5,14 +5,12 @@ //! Sleds, and the hardware and services within them. use crate::internal_api::params::{ - PhysicalDiskDeleteRequest, PhysicalDiskPutRequest, SledAgentInfo, SledRole, - ZpoolPutRequest, + PhysicalDiskPutRequest, SledAgentInfo, SledRole, ZpoolPutRequest, }; use nexus_db_queries::authz; use nexus_db_queries::context::OpContext; use nexus_db_queries::db; use nexus_db_queries::db::lookup; -use nexus_db_queries::db::lookup::LookupPath; use nexus_db_queries::db::model::DatasetKind; use nexus_types::external_api::views::SledPolicy; use nexus_types::external_api::views::SledProvisionPolicy; @@ -200,12 +198,14 @@ impl super::Nexus { ) -> Result<(), Error> { info!( self.log, "upserting physical disk"; - "sled_id" => request.sled_id.to_string(), - "vendor" => request.vendor.to_string(), - "serial" => request.serial.to_string(), - "model" => request.model.to_string() + "physical_disk_id" => %request.id, + "sled_id" => %request.sled_id, + "vendor" => %request.vendor, + "serial" => %request.serial, + "model" => %request.model, ); let disk = db::model::PhysicalDisk::new( + request.id, request.vendor, request.serial, request.model, @@ -216,56 +216,27 @@ impl super::Nexus { Ok(()) } - /// Removes a physical disk from the database. - /// - /// TODO: Remove Zpools and datasets contained within this disk. - pub(crate) async fn delete_physical_disk( - &self, - opctx: &OpContext, - request: PhysicalDiskDeleteRequest, - ) -> Result<(), Error> { - info!( - self.log, "deleting physical disk"; - "sled_id" => request.sled_id.to_string(), - "vendor" => request.vendor.to_string(), - "serial" => request.serial.to_string(), - "model" => request.model.to_string() - ); - self.db_datastore - .physical_disk_delete( - &opctx, - request.vendor, - request.serial, - request.model, - request.sled_id, - ) - .await?; - Ok(()) - } - // Zpools (contained within sleds) /// Upserts a Zpool into the database, updating it if it already exists. pub(crate) async fn upsert_zpool( &self, opctx: &OpContext, - id: Uuid, - sled_id: Uuid, - info: ZpoolPutRequest, + request: ZpoolPutRequest, ) -> Result<(), Error> { - info!(self.log, "upserting zpool"; "sled_id" => sled_id.to_string(), "zpool_id" => id.to_string()); + info!( + self.log, "upserting zpool"; + "sled_id" => %request.sled_id, + "zpool_id" => %request.id, + "physical_disk_id" => %request.physical_disk_id, + ); - let (_authz_disk, db_disk) = - LookupPath::new(&opctx, &self.db_datastore) - .physical_disk( - &info.disk_vendor, - &info.disk_serial, - &info.disk_model, - ) - .fetch() - .await?; - let zpool = db::model::Zpool::new(id, sled_id, db_disk.uuid()); - self.db_datastore.zpool_upsert(zpool).await?; + let zpool = db::model::Zpool::new( + request.id, + request.sled_id, + request.physical_disk_id, + ); + self.db_datastore.zpool_upsert(&opctx, zpool).await?; Ok(()) } diff --git a/nexus/src/internal_api/http_entrypoints.rs b/nexus/src/internal_api/http_entrypoints.rs index 3758b5289b..6d2484c19d 100644 --- a/nexus/src/internal_api/http_entrypoints.rs +++ b/nexus/src/internal_api/http_entrypoints.rs @@ -6,11 +6,7 @@ use crate::ServerContext; -use super::params::{ - OximeterInfo, PhysicalDiskDeleteRequest, PhysicalDiskPutRequest, - PhysicalDiskPutResponse, RackInitializationRequest, SledAgentInfo, - ZpoolPutRequest, ZpoolPutResponse, -}; +use super::params::{OximeterInfo, RackInitializationRequest}; use dropshot::endpoint; use dropshot::ApiDescription; use dropshot::FreeformBody; @@ -34,6 +30,7 @@ use nexus_types::external_api::params::SledSelector; use nexus_types::external_api::params::UninitializedSledId; use nexus_types::external_api::shared::UninitializedSled; use nexus_types::external_api::views::SledPolicy; +use nexus_types::internal_api::params::SledAgentInfo; use nexus_types::internal_api::params::SwitchPutRequest; use nexus_types::internal_api::params::SwitchPutResponse; use nexus_types::internal_api::views::to_list; @@ -75,9 +72,6 @@ pub(crate) fn internal_api() -> NexusApiDescription { api.register(sled_firewall_rules_request)?; api.register(switch_put)?; api.register(rack_initialization_complete)?; - api.register(physical_disk_put)?; - api.register(physical_disk_delete)?; - api.register(zpool_put)?; api.register(cpapi_instances_put)?; api.register(cpapi_disks_put)?; api.register(cpapi_volume_remove_read_only_parent)?; @@ -257,77 +251,6 @@ async fn switch_put( apictx.internal_latencies.instrument_dropshot_handler(&rqctx, handler).await } -/// Report that a physical disk for the specified sled has come online. -#[endpoint { - method = PUT, - path = "/physical-disk", - }] -async fn physical_disk_put( - rqctx: RequestContext>, - body: TypedBody, -) -> Result, HttpError> { - let apictx = rqctx.context(); - let nexus = &apictx.nexus; - let disk = body.into_inner(); - let handler = async { - let opctx = crate::context::op_context_for_internal_api(&rqctx).await; - nexus.upsert_physical_disk(&opctx, disk).await?; - Ok(HttpResponseOk(PhysicalDiskPutResponse {})) - }; - apictx.internal_latencies.instrument_dropshot_handler(&rqctx, handler).await -} - -/// Report that a physical disk for the specified sled has gone offline. -#[endpoint { - method = DELETE, - path = "/physical-disk", - }] -async fn physical_disk_delete( - rqctx: RequestContext>, - body: TypedBody, -) -> Result { - let apictx = rqctx.context(); - let nexus = &apictx.nexus; - let disk = body.into_inner(); - - let handler = async { - let opctx = crate::context::op_context_for_internal_api(&rqctx).await; - nexus.delete_physical_disk(&opctx, disk).await?; - Ok(HttpResponseDeleted()) - }; - apictx.internal_latencies.instrument_dropshot_handler(&rqctx, handler).await -} - -/// Path parameters for Zpool requests (internal API) -#[derive(Deserialize, JsonSchema)] -struct ZpoolPathParam { - sled_id: Uuid, - zpool_id: Uuid, -} - -/// Report that a pool for a specified sled has come online. -#[endpoint { - method = PUT, - path = "/sled-agents/{sled_id}/zpools/{zpool_id}", - }] -async fn zpool_put( - rqctx: RequestContext>, - path_params: Path, - pool_info: TypedBody, -) -> Result, HttpError> { - let apictx = rqctx.context(); - let nexus = &apictx.nexus; - let path = path_params.into_inner(); - let pi = pool_info.into_inner(); - - let handler = async { - let opctx = crate::context::op_context_for_internal_api(&rqctx).await; - nexus.upsert_zpool(&opctx, path.zpool_id, path.sled_id, pi).await?; - Ok(HttpResponseOk(ZpoolPutResponse {})) - }; - apictx.internal_latencies.instrument_dropshot_handler(&rqctx, handler).await -} - /// Path parameters for Instance requests (internal API) #[derive(Deserialize, JsonSchema)] struct InstancePathParam { diff --git a/nexus/src/lib.rs b/nexus/src/lib.rs index bd5a13dfd1..80c972363f 100644 --- a/nexus/src/lib.rs +++ b/nexus/src/lib.rs @@ -29,7 +29,9 @@ use internal_api::http_entrypoints::internal_api; use nexus_config::NexusConfig; use nexus_types::deployment::Blueprint; use nexus_types::external_api::views::SledProvisionPolicy; -use nexus_types::internal_api::params::ServiceKind; +use nexus_types::internal_api::params::{ + PhysicalDiskPutRequest, ServiceKind, ZpoolPutRequest, +}; use nexus_types::inventory::Collection; use omicron_common::address::IpRange; use omicron_common::api::external::Error; @@ -237,6 +239,10 @@ impl nexus_test_interface::NexusServer for Server { config: &NexusConfig, blueprint: Blueprint, services: Vec, + physical_disks: Vec< + nexus_types::internal_api::params::PhysicalDiskPutRequest, + >, + zpools: Vec, datasets: Vec, internal_dns_zone_config: nexus_types::internal_api::params::DnsConfigParams, external_dns_zone_name: &str, @@ -282,6 +288,8 @@ impl nexus_test_interface::NexusServer for Server { internal_api::params::RackInitializationRequest { blueprint, services, + physical_disks, + zpools, datasets, internal_services_ip_pool_ranges, certs, @@ -341,14 +349,26 @@ impl nexus_test_interface::NexusServer for Server { async fn upsert_crucible_dataset( &self, - id: Uuid, - zpool_id: Uuid, + physical_disk: PhysicalDiskPutRequest, + zpool: ZpoolPutRequest, + dataset_id: Uuid, address: SocketAddrV6, ) { + let opctx = self.apictx.nexus.opctx_for_internal_api(); + self.apictx + .nexus + .upsert_physical_disk(&opctx, physical_disk) + .await + .unwrap(); + + let zpool_id = zpool.id; + + self.apictx.nexus.upsert_zpool(&opctx, zpool).await.unwrap(); + self.apictx .nexus .upsert_dataset( - id, + dataset_id, zpool_id, address, nexus_db_queries::db::model::DatasetKind::Crucible, diff --git a/nexus/test-interface/src/lib.rs b/nexus/test-interface/src/lib.rs index 2e3428a1dd..54478c0876 100644 --- a/nexus/test-interface/src/lib.rs +++ b/nexus/test-interface/src/lib.rs @@ -34,6 +34,9 @@ use async_trait::async_trait; use nexus_config::NexusConfig; use nexus_types::deployment::Blueprint; +use nexus_types::internal_api::params::{ + PhysicalDiskPutRequest, ZpoolPutRequest, +}; use nexus_types::inventory::Collection; use omicron_common::api::external::Error; use slog::Logger; @@ -55,6 +58,8 @@ pub trait NexusServer: Send + Sync + 'static { config: &NexusConfig, blueprint: Blueprint, services: Vec, + physical_disks: Vec, + zpools: Vec, datasets: Vec, internal_dns_config: nexus_types::internal_api::params::DnsConfigParams, external_dns_zone_name: &str, @@ -75,6 +80,10 @@ pub trait NexusServer: Send + Sync + 'static { // control over dataset provisioning is shifting to Nexus. There is // a short window where RSS controls dataset provisioning, but afterwards, // Nexus should be calling the shots on "when to provision datasets". + // Furthermore, with https://github.com/oxidecomputer/omicron/pull/5172, + // physical disk and zpool provisioning has already moved into Nexus. This + // provides a "back-door" for tests to control the set of control plane + // disks that are considered active. // // For test purposes, we have many situations where we want to carve up // zpools and datasets precisely for disk-based tests. As a result, we @@ -88,8 +97,9 @@ pub trait NexusServer: Send + Sync + 'static { // However, doing so would let us remove this test-only API. async fn upsert_crucible_dataset( &self, - id: Uuid, - zpool_id: Uuid, + physical_disk: PhysicalDiskPutRequest, + zpool: ZpoolPutRequest, + dataset_id: Uuid, address: SocketAddrV6, ); diff --git a/nexus/test-utils/src/lib.rs b/nexus/test-utils/src/lib.rs index 76ef600fbb..c124e3b58f 100644 --- a/nexus/test-utils/src/lib.rs +++ b/nexus/test-utils/src/lib.rs @@ -861,6 +861,8 @@ impl<'a, N: NexusServer> ControlPlaneTestContextBuilder<'a, N> { // asynchronously, and we're not making any effort (currently) to // wait for them to be known to Nexus. vec![], + vec![], + vec![], dns_config, &external_dns_zone_name, recovery_silo, diff --git a/nexus/test-utils/src/resource_helpers.rs b/nexus/test-utils/src/resource_helpers.rs index b67028a996..b50a60eb8b 100644 --- a/nexus/test-utils/src/resource_helpers.rs +++ b/nexus/test-utils/src/resource_helpers.rs @@ -15,7 +15,6 @@ use http::StatusCode; use nexus_db_queries::db::fixed_data::silo::DEFAULT_SILO; use nexus_test_interface::NexusServer; use nexus_types::external_api::params; -use nexus_types::external_api::params::PhysicalDiskKind; use nexus_types::external_api::params::UserId; use nexus_types::external_api::shared; use nexus_types::external_api::shared::Baseboard; @@ -37,6 +36,7 @@ use omicron_common::api::external::IdentityMetadataCreateParams; use omicron_common::api::external::Instance; use omicron_common::api::external::InstanceCpuCount; use omicron_common::api::external::NameOrId; +use omicron_common::disk::DiskIdentity; use omicron_sled_agent::sim::SledAgent; use omicron_test_utils::dev::poll::wait_for_condition; use omicron_test_utils::dev::poll::CondCheckError; @@ -340,55 +340,6 @@ pub async fn create_switch( .await } -pub async fn create_physical_disk( - client: &ClientTestContext, - vendor: &str, - serial: &str, - model: &str, - variant: PhysicalDiskKind, - sled_id: Uuid, -) -> internal_params::PhysicalDiskPutResponse { - object_put( - client, - "/physical-disk", - &internal_params::PhysicalDiskPutRequest { - vendor: vendor.to_string(), - serial: serial.to_string(), - model: model.to_string(), - variant, - sled_id, - }, - ) - .await -} - -pub async fn delete_physical_disk( - client: &ClientTestContext, - vendor: &str, - serial: &str, - model: &str, - sled_id: Uuid, -) { - let body = internal_params::PhysicalDiskDeleteRequest { - vendor: vendor.to_string(), - serial: serial.to_string(), - model: model.to_string(), - sled_id, - }; - - NexusRequest::new( - RequestBuilder::new(client, http::Method::DELETE, "/physical-disk") - .body(Some(&body)) - .expect_status(Some(http::StatusCode::NO_CONTENT)), - ) - .authn_as(AuthnMode::PrivilegedUser) - .execute() - .await - .unwrap_or_else(|_| { - panic!("failed to make \"delete\" request of physical disk") - }); -} - pub async fn create_silo( client: &ClientTestContext, silo_name: &str, @@ -781,36 +732,60 @@ impl DiskTest { cptestctx: &ControlPlaneTestContext, gibibytes: u32, ) { + // To get a dataset, we actually need to create a new simulated physical + // disk, zpool, and dataset, all contained within one another. let zpool = TestZpool { id: Uuid::new_v4(), size: ByteCount::from_gibibytes_u32(gibibytes), datasets: vec![TestDataset { id: Uuid::new_v4() }], }; + let physical_disk_id = Uuid::new_v4(); + + let disk_identity = DiskIdentity { + vendor: "test-vendor".into(), + serial: "test-serial".into(), + model: "test-model".into(), + }; + + let physical_disk_request = + nexus_types::internal_api::params::PhysicalDiskPutRequest { + id: physical_disk_id, + vendor: disk_identity.vendor.clone(), + serial: disk_identity.serial.clone(), + model: disk_identity.model.clone(), + variant: + nexus_types::external_api::params::PhysicalDiskKind::U2, + sled_id: self.sled_agent.id, + }; + + let zpool_request = + nexus_types::internal_api::params::ZpoolPutRequest { + id: zpool.id, + physical_disk_id, + sled_id: self.sled_agent.id, + }; + + // Tell the simulated sled agent to create the disk and zpool containing + // these datasets. + self.sled_agent .create_external_physical_disk( - "test-vendor".into(), - "test-serial".into(), - "test-model".into(), + physical_disk_id, + disk_identity.clone(), ) .await; self.sled_agent - .create_zpool( - zpool.id, - "test-vendor".into(), - "test-serial".into(), - "test-model".into(), - zpool.size.to_bytes(), - ) + .create_zpool(zpool.id, physical_disk_id, zpool.size.to_bytes()) .await; for dataset in &zpool.datasets { + // Sled Agent side: Create the Dataset, make sure regions can be + // created immediately if Nexus requests anything. let address = self .sled_agent .create_crucible_dataset(zpool.id, dataset.id) .await; - - // By default, regions are created immediately. let crucible = self .sled_agent .get_crucible_dataset(zpool.id, dataset.id) @@ -819,6 +794,9 @@ impl DiskTest { .set_create_callback(Box::new(|_| RegionState::Created)) .await; + // Nexus side: Notify Nexus of the physical disk/zpool/dataset + // combination that exists. + let address = match address { std::net::SocketAddr::V6(addr) => addr, _ => panic!("Unsupported address type: {address} "), @@ -826,7 +804,12 @@ impl DiskTest { cptestctx .server - .upsert_crucible_dataset(dataset.id, zpool.id, address) + .upsert_crucible_dataset( + physical_disk_request.clone(), + zpool_request.clone(), + dataset.id, + address, + ) .await; } diff --git a/nexus/tests/integration_tests/mod.rs b/nexus/tests/integration_tests/mod.rs index 804694c0b2..80a5534790 100644 --- a/nexus/tests/integration_tests/mod.rs +++ b/nexus/tests/integration_tests/mod.rs @@ -50,7 +50,6 @@ mod vpc_firewall; mod vpc_routers; mod vpc_subnets; mod vpcs; -mod zpools; // This module is used only for shared data, not test cases. mod endpoints; diff --git a/nexus/tests/integration_tests/sleds.rs b/nexus/tests/integration_tests/sleds.rs index 743a76be17..b6ed9183a3 100644 --- a/nexus/tests/integration_tests/sleds.rs +++ b/nexus/tests/integration_tests/sleds.rs @@ -6,17 +6,17 @@ use camino::Utf8Path; use dropshot::test_util::ClientTestContext; +use nexus_db_model::PhysicalDisk as DbPhysicalDisk; +use nexus_db_model::PhysicalDiskKind as DbPhysicalDiskKind; +use nexus_db_queries::context::OpContext; use nexus_test_interface::NexusServer; use nexus_test_utils::resource_helpers::create_default_ip_pool; use nexus_test_utils::resource_helpers::create_instance; -use nexus_test_utils::resource_helpers::create_physical_disk; use nexus_test_utils::resource_helpers::create_project; -use nexus_test_utils::resource_helpers::delete_physical_disk; use nexus_test_utils::resource_helpers::objects_list_page_authz; use nexus_test_utils::start_sled_agent; use nexus_test_utils::SLED_AGENT_UUID; use nexus_test_utils_macros::nexus_test; -use nexus_types::external_api::params::PhysicalDiskKind; use nexus_types::external_api::views::SledInstance; use nexus_types::external_api::views::{PhysicalDisk, Sled}; use omicron_sled_agent::sim; @@ -95,7 +95,6 @@ async fn test_physical_disk_create_list_delete( cptestctx: &ControlPlaneTestContext, ) { let external_client = &cptestctx.external_client; - let internal_client = &cptestctx.internal_client; // Verify that there are two sleds to begin with. let sleds_url = "/v1/system/hardware/sleds"; @@ -106,17 +105,26 @@ async fn test_physical_disk_create_list_delete( format!("/v1/system/hardware/sleds/{SLED_AGENT_UUID}/disks"); let disks_initial = physical_disks_list(&external_client, &disks_url).await; - // Insert a new disk using the internal API, observe it in the external API + // Inject a disk into the database, observe it in the external API + let nexus = &cptestctx.server.apictx().nexus; + let datastore = nexus.datastore(); let sled_id = Uuid::from_str(&SLED_AGENT_UUID).unwrap(); - create_physical_disk( - &internal_client, - "v", - "s", - "m", - PhysicalDiskKind::U2, + let physical_disk = DbPhysicalDisk::new( + Uuid::new_v4(), + "v".into(), + "s".into(), + "m".into(), + DbPhysicalDiskKind::U2, sled_id, - ) - .await; + ); + + let opctx = + OpContext::for_tests(cptestctx.logctx.log.new(o!()), datastore.clone()); + let _disk_id = datastore + .physical_disk_upsert(&opctx, physical_disk.clone()) + .await + .expect("Failed to upsert physical disk"); + let disks = physical_disks_list(&external_client, &disks_url).await; assert_eq!(disks.len(), disks_initial.len() + 1); let _new_disk = disks @@ -129,7 +137,17 @@ async fn test_physical_disk_create_list_delete( .expect("did not find the new disk"); // Delete that disk using the internal API, observe it in the external API - delete_physical_disk(&internal_client, "v", "s", "m", sled_id).await; + datastore + .physical_disk_delete( + &opctx, + "v".into(), + "s".into(), + "m".into(), + sled_id, + ) + .await + .expect("Failed to upsert physical disk"); + assert_eq!( physical_disks_list(&external_client, &disks_url).await, disks_initial diff --git a/nexus/tests/integration_tests/switches.rs b/nexus/tests/integration_tests/switches.rs index f56d42f6d1..d665d6ff8e 100644 --- a/nexus/tests/integration_tests/switches.rs +++ b/nexus/tests/integration_tests/switches.rs @@ -6,15 +6,11 @@ use dropshot::test_util::ClientTestContext; use nexus_test_interface::NexusServer; -use nexus_test_utils::resource_helpers::create_physical_disk; -use nexus_test_utils::resource_helpers::delete_physical_disk; use nexus_test_utils::resource_helpers::objects_list_page_authz; use nexus_test_utils::start_sled_agent; use nexus_test_utils::SLED_AGENT_UUID; use nexus_test_utils_macros::nexus_test; -use nexus_types::external_api::views::{ - PhysicalDisk, PhysicalDiskType, Sled, -}; +use nexus_types::external_api::views::Sled; use nexus_types::internal_api::params as internal_params; use omicron_sled_agent::sim; use std::str::FromStr; @@ -75,42 +71,3 @@ async fn test_switches_list(cptestctx: &ControlPlaneTestContext) { sa.http_server.close().await.unwrap(); } } - -#[nexus_test] -async fn test_physical_disk_create_list_delete( - cptestctx: &ControlPlaneTestContext, -) { - let external_client = &cptestctx.external_client; - let internal_client = &cptestctx.internal_client; - - // Verify that there is one sled to begin with. - let switches_url = "/v1/system/hardware/switches"; - assert_eq!(switches_list(&external_client, &switches_url).await.len(), 1); - - // Verify that there are no disks. - let disks_url = - format!("/v1/system/hardware/switches/{SLED_AGENT_UUID}/disks"); - assert!(physical_disks_list(&external_client, &disks_url).await.is_empty()); - - // Insert a new disk using the internal API, observe it in the external API - let sled_id = Uuid::from_str(&SLED_AGENT_UUID).unwrap(); - create_physical_disk( - &internal_client, - "v", - "s", - "m", - internal_params::PhysicalDiskKind::U2, - sled_id, - ) - .await; - let disks = physical_disks_list(&external_client, &disks_url).await; - assert_eq!(disks.len(), 1); - assert_eq!(disks[0].vendor, "v"); - assert_eq!(disks[0].serial, "s"); - assert_eq!(disks[0].model, "m"); - assert_eq!(disks[0].disk_type, PhysicalDiskType::External); - - // Delete that disk using the internal API, observe it in the external API - delete_physical_disk(&internal_client, "v", "s", "m", sled_id).await; - assert!(physical_disks_list(&external_client, &disks_url).await.is_empty()); -} diff --git a/nexus/tests/integration_tests/zpools.rs b/nexus/tests/integration_tests/zpools.rs deleted file mode 100644 index 8e058f9349..0000000000 --- a/nexus/tests/integration_tests/zpools.rs +++ /dev/null @@ -1,128 +0,0 @@ -// This Source Code Form is subject to the terms of the Mozilla Public -// License, v. 2.0. If a copy of the MPL was not distributed with this -// file, You can obtain one at https://mozilla.org/MPL/2.0/. - -use dropshot::test_util::ClientTestContext; -use http::method::Method; -use http::StatusCode; -use nexus_types::external_api::params::PhysicalDiskKind; -use nexus_types::internal_api::params::PhysicalDiskPutRequest; -use nexus_types::internal_api::params::ZpoolPutRequest; -use omicron_common::api::external::ByteCount; -use uuid::Uuid; - -use nexus_test_utils::SLED_AGENT_UUID; -use nexus_test_utils_macros::nexus_test; - -type ControlPlaneTestContext = - nexus_test_utils::ControlPlaneTestContext; - -const VENDOR: &str = "test-vendor"; -const SERIAL: &str = "test-serial"; -const MODEL: &str = "test-model"; - -async fn create_test_physical_disk(client: &ClientTestContext) { - let request = PhysicalDiskPutRequest { - vendor: VENDOR.into(), - serial: SERIAL.into(), - model: MODEL.into(), - variant: PhysicalDiskKind::U2, - sled_id: SLED_AGENT_UUID.parse().unwrap(), - }; - let physical_disk_put_url = "/physical-disk"; - client - .make_request( - Method::PUT, - &physical_disk_put_url, - Some(request), - StatusCode::OK, - ) - .await - .unwrap(); -} - -// Tests the "normal" case of zpool_put: inserting a known Zpool. -// -// This will typically be invoked by the Sled Agent, after performing inventory. -#[nexus_test] -async fn test_zpool_put_success(cptestctx: &ControlPlaneTestContext) { - let client = &cptestctx.internal_client; - create_test_physical_disk(&client).await; - - let zpool_id = Uuid::new_v4(); - let zpool_put_url = - format!("/sled-agents/{}/zpools/{}", SLED_AGENT_UUID, zpool_id); - - let request = ZpoolPutRequest { - size: ByteCount::from_gibibytes_u32(1), - disk_vendor: VENDOR.into(), - disk_serial: SERIAL.into(), - disk_model: MODEL.into(), - }; - client - .make_request( - Method::PUT, - &zpool_put_url, - Some(request), - StatusCode::OK, - ) - .await - .unwrap(); -} - -// Tests a failure case of zpool_put: Inserting a zpool into a sled agent that -// does not exist. -#[nexus_test] -async fn test_zpool_put_bad_sled_returns_not_found( - cptestctx: &ControlPlaneTestContext, -) { - let client = &cptestctx.internal_client; - create_test_physical_disk(&client).await; - - // A sled with the "nil" UUID should not exist. - let sled_id = Uuid::nil(); - let zpool_id = Uuid::new_v4(); - let zpool_put_url = format!("/sled_agents/{}/zpools/{}", sled_id, zpool_id); - - let request = ZpoolPutRequest { - size: ByteCount::from_gibibytes_u32(1), - disk_vendor: VENDOR.into(), - disk_serial: SERIAL.into(), - disk_model: MODEL.into(), - }; - client - .make_request_error_body( - Method::PUT, - &zpool_put_url, - request, - StatusCode::NOT_FOUND, - ) - .await; -} - -// Tests a failure case of zpool_put: Inserting a zpool into a sled agent that -// exists, but into a disk that does not exist -#[nexus_test] -async fn test_zpool_put_bad_physical_disk_returns_not_found( - cptestctx: &ControlPlaneTestContext, -) { - let client = &cptestctx.internal_client; - let zpool_id = Uuid::new_v4(); - let zpool_put_url = - format!("/sled_agents/{}/zpools/{}", SLED_AGENT_UUID, zpool_id); - - let request = ZpoolPutRequest { - size: ByteCount::from_gibibytes_u32(1), - disk_vendor: VENDOR.into(), - disk_serial: SERIAL.into(), - disk_model: MODEL.into(), - }; - client - .make_request_error_body( - Method::PUT, - &zpool_put_url, - request, - StatusCode::NOT_FOUND, - ) - .await; -} diff --git a/nexus/types/src/internal_api/params.rs b/nexus/types/src/internal_api/params.rs index 9f80d313fd..a811106c2c 100644 --- a/nexus/types/src/internal_api/params.rs +++ b/nexus/types/src/internal_api/params.rs @@ -82,43 +82,25 @@ pub struct SwitchPutResponse {} #[derive(Clone, Debug, Serialize, Deserialize, JsonSchema)] pub struct PhysicalDiskPutRequest { - pub vendor: String, - pub serial: String, - pub model: String, - - pub variant: PhysicalDiskKind, - pub sled_id: Uuid, -} - -#[derive(Serialize, Deserialize, JsonSchema)] -pub struct PhysicalDiskPutResponse {} + pub id: Uuid, -#[derive(Clone, Debug, Serialize, Deserialize, JsonSchema)] -pub struct PhysicalDiskDeleteRequest { pub vendor: String, pub serial: String, pub model: String, + pub variant: PhysicalDiskKind, pub sled_id: Uuid, } -/// Sent by a sled agent on startup to Nexus to request further instruction +/// Identifies information about a Zpool that should be part of the control +/// plane. #[derive(Clone, Debug, Serialize, Deserialize, JsonSchema)] pub struct ZpoolPutRequest { - /// Total size of the pool. - pub size: ByteCount, - - // Information to identify the disk to which this zpool belongs - pub disk_vendor: String, - pub disk_serial: String, - pub disk_model: String, - // TODO: We could include any other data from `ZpoolInfo` we want, - // such as "allocated/free" space and pool health? + pub id: Uuid, + pub sled_id: Uuid, + pub physical_disk_id: Uuid, } -#[derive(Serialize, Deserialize, JsonSchema)] -pub struct ZpoolPutResponse {} - /// Describes the purpose of the dataset. #[derive( Debug, Serialize, Deserialize, JsonSchema, Clone, Copy, PartialEq, Eq, @@ -253,6 +235,13 @@ pub struct RackInitializationRequest { pub blueprint: Blueprint, /// Services on the rack which have been created by RSS. pub services: Vec, + + /// "Managed" physical disks owned by the control plane + pub physical_disks: Vec, + + /// Zpools created within the physical disks created by the control plane. + pub zpools: Vec, + /// Datasets on the rack which have been provisioned by RSS. pub datasets: Vec, /// Ranges of the service IP pool which may be used for internal services, diff --git a/nexus/types/src/inventory.rs b/nexus/types/src/inventory.rs index 40da26047b..bf2fd16971 100644 --- a/nexus/types/src/inventory.rs +++ b/nexus/types/src/inventory.rs @@ -360,7 +360,7 @@ pub struct PhysicalDisk { impl From for PhysicalDisk { fn from(disk: sled_agent_client::types::InventoryDisk) -> PhysicalDisk { PhysicalDisk { - identity: disk.identity.into(), + identity: disk.identity, variant: disk.variant.into(), slot: disk.slot, } diff --git a/openapi/nexus-internal.json b/openapi/nexus-internal.json index db3199833e..fee389dfdc 100644 --- a/openapi/nexus-internal.json +++ b/openapi/nexus-internal.json @@ -919,65 +919,6 @@ } } }, - "/physical-disk": { - "put": { - "summary": "Report that a physical disk for the specified sled has come online.", - "operationId": "physical_disk_put", - "requestBody": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/PhysicalDiskPutRequest" - } - } - }, - "required": true - }, - "responses": { - "200": { - "description": "successful operation", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/PhysicalDiskPutResponse" - } - } - } - }, - "4XX": { - "$ref": "#/components/responses/Error" - }, - "5XX": { - "$ref": "#/components/responses/Error" - } - } - }, - "delete": { - "summary": "Report that a physical disk for the specified sled has gone offline.", - "operationId": "physical_disk_delete", - "requestBody": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/PhysicalDiskDeleteRequest" - } - } - }, - "required": true - }, - "responses": { - "204": { - "description": "successful deletion" - }, - "4XX": { - "$ref": "#/components/responses/Error" - }, - "5XX": { - "$ref": "#/components/responses/Error" - } - } - } - }, "/probes/{sled}": { "get": { "summary": "Get all the probes associated with a given sled.", @@ -1277,60 +1218,6 @@ } } }, - "/sled-agents/{sled_id}/zpools/{zpool_id}": { - "put": { - "summary": "Report that a pool for a specified sled has come online.", - "operationId": "zpool_put", - "parameters": [ - { - "in": "path", - "name": "sled_id", - "required": true, - "schema": { - "type": "string", - "format": "uuid" - } - }, - { - "in": "path", - "name": "zpool_id", - "required": true, - "schema": { - "type": "string", - "format": "uuid" - } - } - ], - "requestBody": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/ZpoolPutRequest" - } - } - }, - "required": true - }, - "responses": { - "200": { - "description": "successful operation", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/ZpoolPutResponse" - } - } - } - }, - "4XX": { - "$ref": "#/components/responses/Error" - }, - "5XX": { - "$ref": "#/components/responses/Error" - } - } - } - }, "/sleds/add": { "post": { "summary": "Add sled to initialized rack", @@ -5980,30 +5867,6 @@ "collector_id" ] }, - "PhysicalDiskDeleteRequest": { - "type": "object", - "properties": { - "model": { - "type": "string" - }, - "serial": { - "type": "string" - }, - "sled_id": { - "type": "string", - "format": "uuid" - }, - "vendor": { - "type": "string" - } - }, - "required": [ - "model", - "serial", - "sled_id", - "vendor" - ] - }, "PhysicalDiskKind": { "description": "Describes the form factor of physical disks.", "type": "string", @@ -6015,6 +5878,10 @@ "PhysicalDiskPutRequest": { "type": "object", "properties": { + "id": { + "type": "string", + "format": "uuid" + }, "model": { "type": "string" }, @@ -6033,6 +5900,7 @@ } }, "required": [ + "id", "model", "serial", "sled_id", @@ -6040,9 +5908,6 @@ "vendor" ] }, - "PhysicalDiskPutResponse": { - "type": "object" - }, "PortConfigV1": { "type": "object", "properties": { @@ -6468,6 +6333,13 @@ "$ref": "#/components/schemas/IpRange" } }, + "physical_disks": { + "description": "\"Managed\" physical disks owned by the control plane", + "type": "array", + "items": { + "$ref": "#/components/schemas/PhysicalDiskPutRequest" + } + }, "rack_network_config": { "description": "Initial rack network configuration", "allOf": [ @@ -6490,6 +6362,13 @@ "items": { "$ref": "#/components/schemas/ServicePutRequest" } + }, + "zpools": { + "description": "Zpools created within the physical disks created by the control plane.", + "type": "array", + "items": { + "$ref": "#/components/schemas/ZpoolPutRequest" + } } }, "required": [ @@ -6500,9 +6379,11 @@ "external_port_count", "internal_dns_zone_config", "internal_services_ip_pool_ranges", + "physical_disks", "rack_network_config", "recovery_silo", - "services" + "services", + "zpools" ] }, "RackNetworkConfigV1": { @@ -7667,37 +7548,28 @@ "type": "string" }, "ZpoolPutRequest": { - "description": "Sent by a sled agent on startup to Nexus to request further instruction", + "description": "Identifies information about a Zpool that should be part of the control plane.", "type": "object", "properties": { - "disk_model": { - "type": "string" - }, - "disk_serial": { - "type": "string" + "id": { + "type": "string", + "format": "uuid" }, - "disk_vendor": { - "type": "string" + "physical_disk_id": { + "type": "string", + "format": "uuid" }, - "size": { - "description": "Total size of the pool.", - "allOf": [ - { - "$ref": "#/components/schemas/ByteCount" - } - ] + "sled_id": { + "type": "string", + "format": "uuid" } }, "required": [ - "disk_model", - "disk_serial", - "disk_vendor", - "size" + "id", + "physical_disk_id", + "sled_id" ] }, - "ZpoolPutResponse": { - "type": "object" - }, "SemverVersion": { "type": "string", "pattern": "^(0|[1-9]\\d*)\\.(0|[1-9]\\d*)\\.(0|[1-9]\\d*)(?:-((?:0|[1-9]\\d*|\\d*[a-zA-Z-][0-9a-zA-Z-]*)(?:\\.(?:0|[1-9]\\d*|\\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?(?:\\+([0-9a-zA-Z-]+(?:\\.[0-9a-zA-Z-]+)*))?$" diff --git a/openapi/sled-agent.json b/openapi/sled-agent.json index e5b3a1c56f..07a42b461f 100644 --- a/openapi/sled-agent.json +++ b/openapi/sled-agent.json @@ -623,6 +623,60 @@ } } }, + "/omicron-physical-disks": { + "get": { + "operationId": "omicron_physical_disks_get", + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/OmicronPhysicalDisksConfig" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + }, + "put": { + "operationId": "omicron_physical_disks_put", + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/OmicronPhysicalDisksConfig" + } + } + }, + "required": true + }, + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/DisksManagementResult" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, "/omicron-zones": { "get": { "operationId": "omicron_zones_get", @@ -3571,6 +3625,112 @@ "vendor" ] }, + "DiskManagementError": { + "oneOf": [ + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "not_found" + ] + } + }, + "required": [ + "type" + ] + }, + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "zpool_uuid_mismatch" + ] + }, + "value": { + "type": "object", + "properties": { + "expected": { + "type": "string", + "format": "uuid" + }, + "observed": { + "type": "string", + "format": "uuid" + } + }, + "required": [ + "expected", + "observed" + ] + } + }, + "required": [ + "type", + "value" + ] + }, + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "key_manager" + ] + }, + "value": { + "type": "string" + } + }, + "required": [ + "type", + "value" + ] + }, + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "other" + ] + }, + "value": { + "type": "string" + } + }, + "required": [ + "type", + "value" + ] + } + ] + }, + "DiskManagementStatus": { + "description": "Identifies how a single disk management operation may have succeeded or failed.", + "type": "object", + "properties": { + "err": { + "nullable": true, + "allOf": [ + { + "$ref": "#/components/schemas/DiskManagementError" + } + ] + }, + "identity": { + "$ref": "#/components/schemas/DiskIdentity" + } + }, + "required": [ + "identity" + ] + }, "DiskRequest": { "description": "DiskRequest\n\n
JSON schema\n\n```json { \"type\": \"object\", \"required\": [ \"device\", \"name\", \"read_only\", \"slot\", \"volume_construction_request\" ], \"properties\": { \"device\": { \"type\": \"string\" }, \"name\": { \"type\": \"string\" }, \"read_only\": { \"type\": \"boolean\" }, \"slot\": { \"$ref\": \"#/components/schemas/Slot\" }, \"volume_construction_request\": { \"$ref\": \"#/components/schemas/VolumeConstructionRequest\" } } } ```
", "type": "object", @@ -3911,6 +4071,21 @@ "M2" ] }, + "DisksManagementResult": { + "description": "The result from attempting to manage underlying disks.\n\nThis is more complex than a simple \"Error\" type because it's possible for some disks to be initialized correctly, while others can fail.\n\nThis structure provides a mechanism for callers to learn about partial failures, and handle them appropriately on a per-disk basis.", + "type": "object", + "properties": { + "status": { + "type": "array", + "items": { + "$ref": "#/components/schemas/DiskManagementStatus" + } + } + }, + "required": [ + "status" + ] + }, "Duration": { "type": "object", "properties": { @@ -5817,6 +5992,50 @@ } ] }, + "OmicronPhysicalDiskConfig": { + "type": "object", + "properties": { + "id": { + "type": "string", + "format": "uuid" + }, + "identity": { + "$ref": "#/components/schemas/DiskIdentity" + }, + "pool_id": { + "type": "string", + "format": "uuid" + } + }, + "required": [ + "id", + "identity", + "pool_id" + ] + }, + "OmicronPhysicalDisksConfig": { + "type": "object", + "properties": { + "disks": { + "type": "array", + "items": { + "$ref": "#/components/schemas/OmicronPhysicalDiskConfig" + } + }, + "generation": { + "description": "generation number of this configuration\n\nThis generation number is owned by the control plane (i.e., RSS or Nexus, depending on whether RSS-to-Nexus handoff has happened). It should not be bumped within Sled Agent.\n\nSled Agent rejects attempts to set the configuration to a generation older than the one it's currently running.", + "allOf": [ + { + "$ref": "#/components/schemas/Generation" + } + ] + } + }, + "required": [ + "disks", + "generation" + ] + }, "OmicronZoneConfig": { "description": "Describes one Omicron-managed zone running on a sled", "type": "object", diff --git a/schema/omicron-physical-disks.json b/schema/omicron-physical-disks.json new file mode 100644 index 0000000000..efc1b2cdd2 --- /dev/null +++ b/schema/omicron-physical-disks.json @@ -0,0 +1,74 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "OmicronPhysicalDisksConfig", + "type": "object", + "required": [ + "disks", + "generation" + ], + "properties": { + "disks": { + "type": "array", + "items": { + "$ref": "#/definitions/OmicronPhysicalDiskConfig" + } + }, + "generation": { + "description": "generation number of this configuration\n\nThis generation number is owned by the control plane (i.e., RSS or Nexus, depending on whether RSS-to-Nexus handoff has happened). It should not be bumped within Sled Agent.\n\nSled Agent rejects attempts to set the configuration to a generation older than the one it's currently running.", + "allOf": [ + { + "$ref": "#/definitions/Generation" + } + ] + } + }, + "definitions": { + "DiskIdentity": { + "description": "Uniquely identifies a disk.", + "type": "object", + "required": [ + "model", + "serial", + "vendor" + ], + "properties": { + "model": { + "type": "string" + }, + "serial": { + "type": "string" + }, + "vendor": { + "type": "string" + } + } + }, + "Generation": { + "description": "Generation numbers stored in the database, used for optimistic concurrency control", + "type": "integer", + "format": "uint64", + "minimum": 0.0 + }, + "OmicronPhysicalDiskConfig": { + "type": "object", + "required": [ + "id", + "identity", + "pool_id" + ], + "properties": { + "id": { + "type": "string", + "format": "uuid" + }, + "identity": { + "$ref": "#/definitions/DiskIdentity" + }, + "pool_id": { + "type": "string", + "format": "uuid" + } + } + } + } +} \ No newline at end of file diff --git a/schema/rss-service-plan-v3.json b/schema/rss-service-plan-v3.json new file mode 100644 index 0000000000..fcc672a93b --- /dev/null +++ b/schema/rss-service-plan-v3.json @@ -0,0 +1,848 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "title": "Plan", + "type": "object", + "required": [ + "dns_config", + "services" + ], + "properties": { + "dns_config": { + "$ref": "#/definitions/DnsConfigParams" + }, + "services": { + "type": "object", + "additionalProperties": { + "$ref": "#/definitions/SledConfig" + } + } + }, + "definitions": { + "DiskIdentity": { + "description": "Uniquely identifies a disk.", + "type": "object", + "required": [ + "model", + "serial", + "vendor" + ], + "properties": { + "model": { + "type": "string" + }, + "serial": { + "type": "string" + }, + "vendor": { + "type": "string" + } + } + }, + "DnsConfigParams": { + "description": "DnsConfigParams\n\n
JSON schema\n\n```json { \"type\": \"object\", \"required\": [ \"generation\", \"time_created\", \"zones\" ], \"properties\": { \"generation\": { \"type\": \"integer\", \"format\": \"uint64\", \"minimum\": 0.0 }, \"time_created\": { \"type\": \"string\", \"format\": \"date-time\" }, \"zones\": { \"type\": \"array\", \"items\": { \"$ref\": \"#/components/schemas/DnsConfigZone\" } } } } ```
", + "type": "object", + "required": [ + "generation", + "time_created", + "zones" + ], + "properties": { + "generation": { + "type": "integer", + "format": "uint64", + "minimum": 0.0 + }, + "time_created": { + "type": "string", + "format": "date-time" + }, + "zones": { + "type": "array", + "items": { + "$ref": "#/definitions/DnsConfigZone" + } + } + } + }, + "DnsConfigZone": { + "description": "DnsConfigZone\n\n
JSON schema\n\n```json { \"type\": \"object\", \"required\": [ \"records\", \"zone_name\" ], \"properties\": { \"records\": { \"type\": \"object\", \"additionalProperties\": { \"type\": \"array\", \"items\": { \"$ref\": \"#/components/schemas/DnsRecord\" } } }, \"zone_name\": { \"type\": \"string\" } } } ```
", + "type": "object", + "required": [ + "records", + "zone_name" + ], + "properties": { + "records": { + "type": "object", + "additionalProperties": { + "type": "array", + "items": { + "$ref": "#/definitions/DnsRecord" + } + } + }, + "zone_name": { + "type": "string" + } + } + }, + "DnsRecord": { + "description": "DnsRecord\n\n
JSON schema\n\n```json { \"oneOf\": [ { \"type\": \"object\", \"required\": [ \"data\", \"type\" ], \"properties\": { \"data\": { \"type\": \"string\", \"format\": \"ipv4\" }, \"type\": { \"type\": \"string\", \"enum\": [ \"A\" ] } } }, { \"type\": \"object\", \"required\": [ \"data\", \"type\" ], \"properties\": { \"data\": { \"type\": \"string\", \"format\": \"ipv6\" }, \"type\": { \"type\": \"string\", \"enum\": [ \"AAAA\" ] } } }, { \"type\": \"object\", \"required\": [ \"data\", \"type\" ], \"properties\": { \"data\": { \"$ref\": \"#/components/schemas/Srv\" }, \"type\": { \"type\": \"string\", \"enum\": [ \"SRV\" ] } } } ] } ```
", + "oneOf": [ + { + "type": "object", + "required": [ + "data", + "type" + ], + "properties": { + "data": { + "type": "string", + "format": "ipv4" + }, + "type": { + "type": "string", + "enum": [ + "A" + ] + } + } + }, + { + "type": "object", + "required": [ + "data", + "type" + ], + "properties": { + "data": { + "type": "string", + "format": "ipv6" + }, + "type": { + "type": "string", + "enum": [ + "AAAA" + ] + } + } + }, + { + "type": "object", + "required": [ + "data", + "type" + ], + "properties": { + "data": { + "$ref": "#/definitions/Srv" + }, + "type": { + "type": "string", + "enum": [ + "SRV" + ] + } + } + } + ] + }, + "Generation": { + "description": "Generation numbers stored in the database, used for optimistic concurrency control", + "type": "integer", + "format": "uint64", + "minimum": 0.0 + }, + "IpNet": { + "oneOf": [ + { + "title": "v4", + "allOf": [ + { + "$ref": "#/definitions/Ipv4Net" + } + ] + }, + { + "title": "v6", + "allOf": [ + { + "$ref": "#/definitions/Ipv6Net" + } + ] + } + ] + }, + "Ipv4Net": { + "title": "An IPv4 subnet", + "description": "An IPv4 subnet, including prefix and subnet mask", + "examples": [ + "192.168.1.0/24" + ], + "type": "string", + "pattern": "^(([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])\\.){3}([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])/([0-9]|1[0-9]|2[0-9]|3[0-2])$" + }, + "Ipv6Net": { + "title": "An IPv6 subnet", + "description": "An IPv6 subnet, including prefix and subnet mask", + "examples": [ + "fd12:3456::/64" + ], + "type": "string", + "pattern": "^([fF][dD])[0-9a-fA-F]{2}:(([0-9a-fA-F]{1,4}:){6}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,6}:)([0-9a-fA-F]{1,4})?\\/([0-9]|[1-9][0-9]|1[0-1][0-9]|12[0-8])$" + }, + "MacAddr": { + "title": "A MAC address", + "description": "A Media Access Control address, in EUI-48 format", + "examples": [ + "ff:ff:ff:ff:ff:ff" + ], + "type": "string", + "maxLength": 17, + "minLength": 5, + "pattern": "^([0-9a-fA-F]{0,2}:){5}[0-9a-fA-F]{0,2}$" + }, + "Name": { + "title": "A name unique within the parent collection", + "description": "Names must begin with a lower case ASCII letter, be composed exclusively of lowercase ASCII, uppercase ASCII, numbers, and '-', and may not end with a '-'. Names cannot be a UUID though they may contain a UUID.", + "type": "string", + "maxLength": 63, + "minLength": 1, + "pattern": "^(?![0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$)^[a-z]([a-zA-Z0-9-]*[a-zA-Z0-9]+)?$" + }, + "NetworkInterface": { + "description": "Information required to construct a virtual network interface", + "type": "object", + "required": [ + "id", + "ip", + "kind", + "mac", + "name", + "primary", + "slot", + "subnet", + "vni" + ], + "properties": { + "id": { + "type": "string", + "format": "uuid" + }, + "ip": { + "type": "string", + "format": "ip" + }, + "kind": { + "$ref": "#/definitions/NetworkInterfaceKind" + }, + "mac": { + "$ref": "#/definitions/MacAddr" + }, + "name": { + "$ref": "#/definitions/Name" + }, + "primary": { + "type": "boolean" + }, + "slot": { + "type": "integer", + "format": "uint8", + "minimum": 0.0 + }, + "subnet": { + "$ref": "#/definitions/IpNet" + }, + "vni": { + "$ref": "#/definitions/Vni" + } + } + }, + "NetworkInterfaceKind": { + "description": "The type of network interface", + "oneOf": [ + { + "description": "A vNIC attached to a guest instance", + "type": "object", + "required": [ + "id", + "type" + ], + "properties": { + "id": { + "type": "string", + "format": "uuid" + }, + "type": { + "type": "string", + "enum": [ + "instance" + ] + } + } + }, + { + "description": "A vNIC associated with an internal service", + "type": "object", + "required": [ + "id", + "type" + ], + "properties": { + "id": { + "type": "string", + "format": "uuid" + }, + "type": { + "type": "string", + "enum": [ + "service" + ] + } + } + }, + { + "description": "A vNIC associated with a probe", + "type": "object", + "required": [ + "id", + "type" + ], + "properties": { + "id": { + "type": "string", + "format": "uuid" + }, + "type": { + "type": "string", + "enum": [ + "probe" + ] + } + } + } + ] + }, + "OmicronPhysicalDiskConfig": { + "type": "object", + "required": [ + "id", + "identity", + "pool_id" + ], + "properties": { + "id": { + "type": "string", + "format": "uuid" + }, + "identity": { + "$ref": "#/definitions/DiskIdentity" + }, + "pool_id": { + "type": "string", + "format": "uuid" + } + } + }, + "OmicronPhysicalDisksConfig": { + "type": "object", + "required": [ + "disks", + "generation" + ], + "properties": { + "disks": { + "type": "array", + "items": { + "$ref": "#/definitions/OmicronPhysicalDiskConfig" + } + }, + "generation": { + "description": "generation number of this configuration\n\nThis generation number is owned by the control plane (i.e., RSS or Nexus, depending on whether RSS-to-Nexus handoff has happened). It should not be bumped within Sled Agent.\n\nSled Agent rejects attempts to set the configuration to a generation older than the one it's currently running.", + "allOf": [ + { + "$ref": "#/definitions/Generation" + } + ] + } + } + }, + "OmicronZoneConfig": { + "description": "Describes one Omicron-managed zone running on a sled", + "type": "object", + "required": [ + "id", + "underlay_address", + "zone_type" + ], + "properties": { + "id": { + "type": "string", + "format": "uuid" + }, + "underlay_address": { + "type": "string", + "format": "ipv6" + }, + "zone_type": { + "$ref": "#/definitions/OmicronZoneType" + } + } + }, + "OmicronZoneDataset": { + "description": "Describes a persistent ZFS dataset associated with an Omicron zone", + "type": "object", + "required": [ + "pool_name" + ], + "properties": { + "pool_name": { + "$ref": "#/definitions/ZpoolName" + } + } + }, + "OmicronZoneType": { + "description": "Describes what kind of zone this is (i.e., what component is running in it) as well as any type-specific configuration", + "oneOf": [ + { + "type": "object", + "required": [ + "address", + "dns_servers", + "nic", + "ntp_servers", + "snat_cfg", + "type" + ], + "properties": { + "address": { + "type": "string" + }, + "dns_servers": { + "type": "array", + "items": { + "type": "string", + "format": "ip" + } + }, + "domain": { + "type": [ + "string", + "null" + ] + }, + "nic": { + "description": "The service vNIC providing outbound connectivity using OPTE.", + "allOf": [ + { + "$ref": "#/definitions/NetworkInterface" + } + ] + }, + "ntp_servers": { + "type": "array", + "items": { + "type": "string" + } + }, + "snat_cfg": { + "description": "The SNAT configuration for outbound connections.", + "allOf": [ + { + "$ref": "#/definitions/SourceNatConfig" + } + ] + }, + "type": { + "type": "string", + "enum": [ + "boundary_ntp" + ] + } + } + }, + { + "type": "object", + "required": [ + "address", + "dataset", + "type" + ], + "properties": { + "address": { + "type": "string" + }, + "dataset": { + "$ref": "#/definitions/OmicronZoneDataset" + }, + "type": { + "type": "string", + "enum": [ + "clickhouse" + ] + } + } + }, + { + "type": "object", + "required": [ + "address", + "dataset", + "type" + ], + "properties": { + "address": { + "type": "string" + }, + "dataset": { + "$ref": "#/definitions/OmicronZoneDataset" + }, + "type": { + "type": "string", + "enum": [ + "clickhouse_keeper" + ] + } + } + }, + { + "type": "object", + "required": [ + "address", + "dataset", + "type" + ], + "properties": { + "address": { + "type": "string" + }, + "dataset": { + "$ref": "#/definitions/OmicronZoneDataset" + }, + "type": { + "type": "string", + "enum": [ + "cockroach_db" + ] + } + } + }, + { + "type": "object", + "required": [ + "address", + "dataset", + "type" + ], + "properties": { + "address": { + "type": "string" + }, + "dataset": { + "$ref": "#/definitions/OmicronZoneDataset" + }, + "type": { + "type": "string", + "enum": [ + "crucible" + ] + } + } + }, + { + "type": "object", + "required": [ + "address", + "type" + ], + "properties": { + "address": { + "type": "string" + }, + "type": { + "type": "string", + "enum": [ + "crucible_pantry" + ] + } + } + }, + { + "type": "object", + "required": [ + "dataset", + "dns_address", + "http_address", + "nic", + "type" + ], + "properties": { + "dataset": { + "$ref": "#/definitions/OmicronZoneDataset" + }, + "dns_address": { + "description": "The address at which the external DNS server is reachable.", + "type": "string" + }, + "http_address": { + "description": "The address at which the external DNS server API is reachable.", + "type": "string" + }, + "nic": { + "description": "The service vNIC providing external connectivity using OPTE.", + "allOf": [ + { + "$ref": "#/definitions/NetworkInterface" + } + ] + }, + "type": { + "type": "string", + "enum": [ + "external_dns" + ] + } + } + }, + { + "type": "object", + "required": [ + "dataset", + "dns_address", + "gz_address", + "gz_address_index", + "http_address", + "type" + ], + "properties": { + "dataset": { + "$ref": "#/definitions/OmicronZoneDataset" + }, + "dns_address": { + "type": "string" + }, + "gz_address": { + "description": "The addresses in the global zone which should be created\n\nFor the DNS service, which exists outside the sleds's typical subnet - adding an address in the GZ is necessary to allow inter-zone traffic routing.", + "type": "string", + "format": "ipv6" + }, + "gz_address_index": { + "description": "The address is also identified with an auxiliary bit of information to ensure that the created global zone address can have a unique name.", + "type": "integer", + "format": "uint32", + "minimum": 0.0 + }, + "http_address": { + "type": "string" + }, + "type": { + "type": "string", + "enum": [ + "internal_dns" + ] + } + } + }, + { + "type": "object", + "required": [ + "address", + "dns_servers", + "ntp_servers", + "type" + ], + "properties": { + "address": { + "type": "string" + }, + "dns_servers": { + "type": "array", + "items": { + "type": "string", + "format": "ip" + } + }, + "domain": { + "type": [ + "string", + "null" + ] + }, + "ntp_servers": { + "type": "array", + "items": { + "type": "string" + } + }, + "type": { + "type": "string", + "enum": [ + "internal_ntp" + ] + } + } + }, + { + "type": "object", + "required": [ + "external_dns_servers", + "external_ip", + "external_tls", + "internal_address", + "nic", + "type" + ], + "properties": { + "external_dns_servers": { + "description": "External DNS servers Nexus can use to resolve external hosts.", + "type": "array", + "items": { + "type": "string", + "format": "ip" + } + }, + "external_ip": { + "description": "The address at which the external nexus server is reachable.", + "type": "string", + "format": "ip" + }, + "external_tls": { + "description": "Whether Nexus's external endpoint should use TLS", + "type": "boolean" + }, + "internal_address": { + "description": "The address at which the internal nexus server is reachable.", + "type": "string" + }, + "nic": { + "description": "The service vNIC providing external connectivity using OPTE.", + "allOf": [ + { + "$ref": "#/definitions/NetworkInterface" + } + ] + }, + "type": { + "type": "string", + "enum": [ + "nexus" + ] + } + } + }, + { + "type": "object", + "required": [ + "address", + "type" + ], + "properties": { + "address": { + "type": "string" + }, + "type": { + "type": "string", + "enum": [ + "oximeter" + ] + } + } + } + ] + }, + "SledConfig": { + "type": "object", + "required": [ + "disks", + "zones" + ], + "properties": { + "disks": { + "description": "Control plane disks configured for this sled", + "allOf": [ + { + "$ref": "#/definitions/OmicronPhysicalDisksConfig" + } + ] + }, + "zones": { + "description": "zones configured for this sled", + "type": "array", + "items": { + "$ref": "#/definitions/OmicronZoneConfig" + } + } + } + }, + "SourceNatConfig": { + "description": "An IP address and port range used for source NAT, i.e., making outbound network connections from guests or services.", + "type": "object", + "required": [ + "first_port", + "ip", + "last_port" + ], + "properties": { + "first_port": { + "description": "The first port used for source NAT, inclusive.", + "type": "integer", + "format": "uint16", + "minimum": 0.0 + }, + "ip": { + "description": "The external address provided to the instance or service.", + "type": "string", + "format": "ip" + }, + "last_port": { + "description": "The last port used for source NAT, also inclusive.", + "type": "integer", + "format": "uint16", + "minimum": 0.0 + } + } + }, + "Srv": { + "description": "Srv\n\n
JSON schema\n\n```json { \"type\": \"object\", \"required\": [ \"port\", \"prio\", \"target\", \"weight\" ], \"properties\": { \"port\": { \"type\": \"integer\", \"format\": \"uint16\", \"minimum\": 0.0 }, \"prio\": { \"type\": \"integer\", \"format\": \"uint16\", \"minimum\": 0.0 }, \"target\": { \"type\": \"string\" }, \"weight\": { \"type\": \"integer\", \"format\": \"uint16\", \"minimum\": 0.0 } } } ```
", + "type": "object", + "required": [ + "port", + "prio", + "target", + "weight" + ], + "properties": { + "port": { + "type": "integer", + "format": "uint16", + "minimum": 0.0 + }, + "prio": { + "type": "integer", + "format": "uint16", + "minimum": 0.0 + }, + "target": { + "type": "string" + }, + "weight": { + "type": "integer", + "format": "uint16", + "minimum": 0.0 + } + } + }, + "Vni": { + "description": "A Geneve Virtual Network Identifier", + "type": "integer", + "format": "uint32", + "minimum": 0.0 + }, + "ZpoolName": { + "title": "The name of a Zpool", + "description": "Zpool names are of the format ox{i,p}_. They are either Internal or External, and should be unique", + "type": "string", + "pattern": "^ox[ip]_[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$" + } + } +} \ No newline at end of file diff --git a/sled-agent/Cargo.toml b/sled-agent/Cargo.toml index c941ee2625..734055b9e5 100644 --- a/sled-agent/Cargo.toml +++ b/sled-agent/Cargo.toml @@ -106,7 +106,7 @@ tempfile.workspace = true tokio-stream.workspace = true tokio-util.workspace = true -illumos-utils = { workspace = true, features = ["testing", "tmp_keypath"] } +illumos-utils = { workspace = true, features = ["testing"] } sled-storage = { workspace = true, features = ["testing"] } # diff --git a/sled-agent/src/bootstrap/bootstore_setup.rs b/sled-agent/src/bootstrap/bootstore_setup.rs index e5079b978e..ee9a321474 100644 --- a/sled-agent/src/bootstrap/bootstore_setup.rs +++ b/sled-agent/src/bootstrap/bootstore_setup.rs @@ -15,7 +15,7 @@ use omicron_ddm_admin_client::Client as DdmAdminClient; use sled_hardware_types::underlay::BootstrapInterface; use sled_hardware_types::Baseboard; use sled_storage::dataset::CLUSTER_DATASET; -use sled_storage::resources::StorageResources; +use sled_storage::resources::AllDisks; use slog::Logger; use std::collections::BTreeSet; use std::net::Ipv6Addr; @@ -26,7 +26,7 @@ const BOOTSTORE_FSM_STATE_FILE: &str = "bootstore-fsm-state.json"; const BOOTSTORE_NETWORK_CONFIG_FILE: &str = "bootstore-network-config.json"; pub fn new_bootstore_config( - storage_resources: &StorageResources, + all_disks: &AllDisks, baseboard: Baseboard, global_zone_bootstrap_ip: Ipv6Addr, ) -> Result { @@ -37,17 +37,17 @@ pub fn new_bootstore_config( learn_timeout: Duration::from_secs(5), rack_init_timeout: Duration::from_secs(300), rack_secret_request_timeout: Duration::from_secs(5), - fsm_state_ledger_paths: bootstore_fsm_state_paths(&storage_resources)?, + fsm_state_ledger_paths: bootstore_fsm_state_paths(&all_disks)?, network_config_ledger_paths: bootstore_network_config_paths( - &storage_resources, + &all_disks, )?, }) } fn bootstore_fsm_state_paths( - storage: &StorageResources, + all_disks: &AllDisks, ) -> Result, StartError> { - let paths: Vec<_> = storage + let paths: Vec<_> = all_disks .all_m2_mountpoints(CLUSTER_DATASET) .into_iter() .map(|p| p.join(BOOTSTORE_FSM_STATE_FILE)) @@ -60,9 +60,9 @@ fn bootstore_fsm_state_paths( } fn bootstore_network_config_paths( - storage: &StorageResources, + all_disks: &AllDisks, ) -> Result, StartError> { - let paths: Vec<_> = storage + let paths: Vec<_> = all_disks .all_m2_mountpoints(CLUSTER_DATASET) .into_iter() .map(|p| p.join(BOOTSTORE_NETWORK_CONFIG_FILE)) diff --git a/sled-agent/src/bootstrap/pre_server.rs b/sled-agent/src/bootstrap/pre_server.rs index 38bedf921c..0657004b72 100644 --- a/sled-agent/src/bootstrap/pre_server.rs +++ b/sled-agent/src/bootstrap/pre_server.rs @@ -20,7 +20,6 @@ use crate::long_running_tasks::{ use crate::services::ServiceManager; use crate::services::TimeSyncConfig; use crate::sled_agent::SledAgent; -use crate::storage_monitor::UnderlayAccess; use camino::Utf8PathBuf; use cancel_safe_futures::TryStreamExt; use futures::stream; @@ -54,7 +53,6 @@ pub(super) struct BootstrapAgentStartup { pub(super) service_manager: ServiceManager, pub(super) long_running_task_handles: LongRunningTaskHandles, pub(super) sled_agent_started_tx: oneshot::Sender, - pub(super) underlay_available_tx: oneshot::Sender, } impl BootstrapAgentStartup { @@ -126,7 +124,6 @@ impl BootstrapAgentStartup { long_running_task_handles, sled_agent_started_tx, service_manager_ready_tx, - underlay_available_tx, ) = spawn_all_longrunning_tasks( &base_log, sled_mode, @@ -172,7 +169,6 @@ impl BootstrapAgentStartup { service_manager, long_running_task_handles, sled_agent_started_tx, - underlay_available_tx, }) } } diff --git a/sled-agent/src/bootstrap/server.rs b/sled-agent/src/bootstrap/server.rs index bca3350696..6f61e87663 100644 --- a/sled-agent/src/bootstrap/server.rs +++ b/sled-agent/src/bootstrap/server.rs @@ -26,7 +26,6 @@ use crate::long_running_tasks::LongRunningTaskHandles; use crate::server::Server as SledAgentServer; use crate::services::ServiceManager; use crate::sled_agent::SledAgent; -use crate::storage_monitor::UnderlayAccess; use bootstore::schemes::v0 as bootstore; use camino::Utf8PathBuf; use cancel_safe_futures::TryStreamExt; @@ -179,7 +178,6 @@ impl Server { service_manager, long_running_task_handles, sled_agent_started_tx, - underlay_available_tx, } = BootstrapAgentStartup::run(config).await?; // Do we have a StartSledAgentRequest stored in the ledger? @@ -242,7 +240,6 @@ impl Server { &config, start_sled_agent_request, long_running_task_handles.clone(), - underlay_available_tx, service_manager.clone(), &ddm_admin_localhost_client, &base_log, @@ -264,10 +261,7 @@ impl Server { sled_agent.load_services().await; SledAgentState::ServerStarted(sled_agent_server) } else { - SledAgentState::Bootstrapping( - Some(sled_agent_started_tx), - Some(underlay_available_tx), - ) + SledAgentState::Bootstrapping(Some(sled_agent_started_tx)) }; // Spawn our inner task that handles any future hardware updates and any @@ -310,10 +304,7 @@ impl Server { // bootstrap server). enum SledAgentState { // We're still in the bootstrapping phase, waiting for a sled-agent request. - Bootstrapping( - Option>, - Option>, - ), + Bootstrapping(Option>), // ... or the sled agent server is running. ServerStarted(SledAgentServer), } @@ -357,7 +348,6 @@ async fn start_sled_agent( config: &SledConfig, request: StartSledAgentRequest, long_running_task_handles: LongRunningTaskHandles, - underlay_available_tx: oneshot::Sender, service_manager: ServiceManager, ddmd_client: &DdmAdminClient, base_log: &Logger, @@ -429,7 +419,6 @@ async fn start_sled_agent( request.clone(), long_running_task_handles.clone(), service_manager, - underlay_available_tx, ) .await .map_err(SledAgentServerStartError::FailedStartingServer)?; @@ -495,7 +484,7 @@ impl From for SledAgentServerStartError { async fn sled_config_paths( storage: &StorageHandle, ) -> Result, MissingM2Paths> { - let resources = storage.get_latest_resources().await; + let resources = storage.get_latest_disks().await; let paths: Vec<_> = resources .all_m2_mountpoints(CONFIG_DATASET) .into_iter() @@ -573,10 +562,7 @@ impl Inner { log: &Logger, ) { match &mut self.state { - SledAgentState::Bootstrapping( - sled_agent_started_tx, - underlay_available_tx, - ) => { + SledAgentState::Bootstrapping(sled_agent_started_tx) => { let request_id = request.body.id; // Extract from options to satisfy the borrow checker. @@ -587,14 +573,11 @@ impl Inner { // See https://github.com/oxidecomputer/omicron/issues/4494 let sled_agent_started_tx = sled_agent_started_tx.take().unwrap(); - let underlay_available_tx = - underlay_available_tx.take().unwrap(); let response = match start_sled_agent( &self.config, request, self.long_running_task_handles.clone(), - underlay_available_tx, self.service_manager.clone(), &self.ddm_admin_localhost_client, &self.base_log, @@ -664,7 +647,7 @@ impl Inner { let config_dirs = self .long_running_task_handles .storage_manager - .get_latest_resources() + .get_latest_disks() .await .all_m2_mountpoints(CONFIG_DATASET) .into_iter(); diff --git a/sled-agent/src/config.rs b/sled-agent/src/config.rs index 058f343e2a..d084f5f546 100644 --- a/sled-agent/src/config.rs +++ b/sled-agent/src/config.rs @@ -12,7 +12,6 @@ use illumos_utils::dladm::Dladm; use illumos_utils::dladm::FindPhysicalLinkError; use illumos_utils::dladm::PhysicalLink; use illumos_utils::dladm::CHELSIO_LINK_PREFIX; -use illumos_utils::zpool::ZpoolName; use omicron_common::vlan::VlanID; use serde::Deserialize; use sled_hardware::is_gimlet; @@ -65,8 +64,8 @@ pub struct Config { pub swap_device_size_gb: Option, /// Optional VLAN ID to be used for tagging guest VNICs. pub vlan: Option, - /// Optional list of zpools to be used as "discovered disks". - pub zpools: Option>, + /// Optional list of virtual devices to be used as "discovered disks". + pub vdevs: Option>, /// Optionally skip waiting for time synchronization pub skip_timesync: Option, diff --git a/sled-agent/src/dump_setup.rs b/sled-agent/src/dump_setup.rs index bdbc008ccb..4717f8b49e 100644 --- a/sled-agent/src/dump_setup.rs +++ b/sled-agent/src/dump_setup.rs @@ -89,13 +89,12 @@ use illumos_utils::dumpadm::{DumpAdm, DumpContentType}; use illumos_utils::zone::ZONE_PREFIX; use illumos_utils::zpool::{ZpoolHealth, ZpoolName}; use illumos_utils::ExecutionError; -use omicron_common::disk::DiskIdentity; use sled_hardware::DiskVariant; +use sled_storage::config::MountConfig; use sled_storage::dataset::{CRASH_DATASET, DUMP_DATASET}; use sled_storage::disk::Disk; -use sled_storage::pool::Pool; use slog::Logger; -use std::collections::{BTreeMap, HashSet}; +use std::collections::HashSet; use std::ffi::OsString; use std::path::{Path, PathBuf}; use std::sync::{Arc, Weak}; @@ -119,32 +118,50 @@ struct DebugDataset(Utf8PathBuf); struct CoreDataset(Utf8PathBuf); #[derive(AsRef, Clone, From)] -pub(super) struct CoreZpool(pub ZpoolName); +struct CoreZpool { + mount_config: MountConfig, + name: ZpoolName, +} + #[derive(AsRef, Clone, From)] -pub(super) struct DebugZpool(pub ZpoolName); +struct DebugZpool { + mount_config: MountConfig, + name: ZpoolName, +} impl GetMountpoint for DebugZpool { type NewType = DebugDataset; const MOUNTPOINT: &'static str = DUMP_DATASET; + fn mount_config(&self) -> &MountConfig { + &self.mount_config + } } impl GetMountpoint for CoreZpool { type NewType = CoreDataset; const MOUNTPOINT: &'static str = CRASH_DATASET; + fn mount_config(&self) -> &MountConfig { + &self.mount_config + } } // only want to access these directories after they're mounted! trait GetMountpoint: AsRef { type NewType: From; const MOUNTPOINT: &'static str; + + fn mount_config(&self) -> &MountConfig; + fn mountpoint( &self, invoker: &dyn ZfsInvoker, ) -> Result, ZfsGetError> { if invoker.zfs_get_prop(&self.as_ref().to_string(), "mounted")? == "yes" { - Ok(Some(Self::NewType::from( - invoker.mountpoint(self.as_ref(), Self::MOUNTPOINT), - ))) + Ok(Some(Self::NewType::from(invoker.mountpoint( + self.mount_config(), + self.as_ref(), + Self::MOUNTPOINT, + )))) } else { Ok(None) } @@ -172,12 +189,13 @@ struct DumpSetupWorker { pub struct DumpSetup { worker: Arc>, + mount_config: MountConfig, _poller: std::thread::JoinHandle<()>, log: Logger, } impl DumpSetup { - pub fn new(log: &Logger) -> Self { + pub fn new(log: &Logger, mount_config: MountConfig) -> Self { let worker = Arc::new(std::sync::Mutex::new(DumpSetupWorker::new( Box::new(RealCoreDumpAdm {}), Box::new(RealZfs {}), @@ -190,18 +208,19 @@ impl DumpSetup { Self::poll_file_archival(worker_weak, log_poll) }); let log = log.new(o!("component" => "DumpSetup")); - Self { worker, _poller, log } + Self { worker, mount_config, _poller, log } } pub(crate) async fn update_dumpdev_setup( &self, - disks: &BTreeMap, + disks: impl Iterator, ) { let log = &self.log; let mut m2_dump_slices = Vec::new(); let mut u2_debug_datasets = Vec::new(); let mut m2_core_datasets = Vec::new(); - for (_id, (disk, _)) in disks.iter() { + let mount_config = self.mount_config.clone(); + for disk in disks { if disk.is_synthetic() { // We only setup dump devices on real disks continue; @@ -222,8 +241,10 @@ impl DumpSetup { illumos_utils::zpool::Zpool::get_info(&name.to_string()) { if info.health() == ZpoolHealth::Online { - m2_core_datasets - .push(CoreZpool::from(name.clone())); + m2_core_datasets.push(CoreZpool { + mount_config: mount_config.clone(), + name: name.clone(), + }); } else { warn!(log, "Zpool {name:?} not online, won't attempt to save process core dumps there"); } @@ -235,8 +256,10 @@ impl DumpSetup { illumos_utils::zpool::Zpool::get_info(&name.to_string()) { if info.health() == ZpoolHealth::Online { - u2_debug_datasets - .push(DebugZpool::from(name.clone())); + u2_debug_datasets.push(DebugZpool { + mount_config: mount_config.clone(), + name: name.clone(), + }); } else { warn!(log, "Zpool {name:?} not online, won't attempt to save kernel core dumps there"); } @@ -349,6 +372,7 @@ trait ZfsInvoker { fn mountpoint( &self, + mount_config: &MountConfig, zpool: &ZpoolName, mountpoint: &'static str, ) -> Utf8PathBuf; @@ -458,10 +482,11 @@ impl ZfsInvoker for RealZfs { fn mountpoint( &self, + mount_config: &MountConfig, zpool: &ZpoolName, mountpoint: &'static str, ) -> Utf8PathBuf { - zpool.dataset_mountpoint(mountpoint) + zpool.dataset_mountpoint(&mount_config.root, mountpoint) } } @@ -1120,6 +1145,7 @@ mod tests { fn mountpoint( &self, + _mount_config: &MountConfig, zpool: &ZpoolName, mountpoint: &'static str, ) -> Utf8PathBuf { @@ -1174,8 +1200,10 @@ mod tests { assert_eq!(worker.chosen_core_dir, None); // nothing when only a disk that's not ready - let non_mounted_zpool = - CoreZpool(ZpoolName::from_str(NOT_MOUNTED_INTERNAL).unwrap()); + let non_mounted_zpool = CoreZpool { + mount_config: MountConfig::default(), + name: ZpoolName::from_str(NOT_MOUNTED_INTERNAL).unwrap(), + }; worker.update_disk_loadout(vec![], vec![], vec![non_mounted_zpool]); assert_eq!(worker.chosen_core_dir, None); logctx.cleanup_successful(); @@ -1191,11 +1219,18 @@ mod tests { const MOUNTED_INTERNAL: &str = "oxi_474e554e-6174-616c-6965-4e677579656e"; const ERROR_INTERNAL: &str = "oxi_4861636b-2054-6865-2050-6c616e657421"; - let mounted_zpool = - CoreZpool(ZpoolName::from_str(MOUNTED_INTERNAL).unwrap()); - let non_mounted_zpool = - CoreZpool(ZpoolName::from_str(NOT_MOUNTED_INTERNAL).unwrap()); - let err_zpool = CoreZpool(ZpoolName::from_str(ERROR_INTERNAL).unwrap()); + let mounted_zpool = CoreZpool { + mount_config: MountConfig::default(), + name: ZpoolName::from_str(MOUNTED_INTERNAL).unwrap(), + }; + let non_mounted_zpool = CoreZpool { + mount_config: MountConfig::default(), + name: ZpoolName::from_str(NOT_MOUNTED_INTERNAL).unwrap(), + }; + let err_zpool = CoreZpool { + mount_config: MountConfig::default(), + name: ZpoolName::from_str(ERROR_INTERNAL).unwrap(), + }; const ZPOOL_MNT: &str = "/path/to/internal/zpool"; let mut worker = DumpSetupWorker::new( Box::::default(), @@ -1364,8 +1399,10 @@ mod tests { let tempdir = TempDir::new().unwrap(); let (occupied, _) = populate_tempdir_with_fake_dumps(&tempdir); - let mounted_zpool = - DebugZpool(ZpoolName::from_str(MOUNTED_EXTERNAL).unwrap()); + let mounted_zpool = DebugZpool { + mount_config: MountConfig::default(), + name: ZpoolName::from_str(MOUNTED_EXTERNAL).unwrap(), + }; worker.update_disk_loadout( vec![occupied.clone()], vec![mounted_zpool], @@ -1447,10 +1484,14 @@ mod tests { ) .unwrap(); - let mounted_core_zpool = - CoreZpool(ZpoolName::from_str(MOUNTED_INTERNAL).unwrap()); - let mounted_debug_zpool = - DebugZpool(ZpoolName::from_str(MOUNTED_EXTERNAL).unwrap()); + let mounted_core_zpool = CoreZpool { + mount_config: MountConfig::default(), + name: ZpoolName::from_str(MOUNTED_INTERNAL).unwrap(), + }; + let mounted_debug_zpool = DebugZpool { + mount_config: MountConfig::default(), + name: ZpoolName::from_str(MOUNTED_EXTERNAL).unwrap(), + }; worker.update_disk_loadout( vec![], diff --git a/sled-agent/src/hardware_monitor.rs b/sled-agent/src/hardware_monitor.rs index cbd3134cf0..3708a642f3 100644 --- a/sled-agent/src/hardware_monitor.rs +++ b/sled-agent/src/hardware_monitor.rs @@ -177,10 +177,27 @@ impl HardwareMonitor { } } HardwareUpdate::DiskAdded(disk) => { - self.storage_manager.upsert_disk(disk.into()).await; + // We notify the storage manager of the hardware, but do not need to + // wait for the result to be fully processed. + // + // Here and below, we're "dropping a future" rather than + // awaiting it. That's intentional - the hardware monitor + // doesn't care when this work is finished, just when it's + // enqueued. + #[allow(clippy::let_underscore_future)] + let _ = self + .storage_manager + .detected_raw_disk(disk.into()) + .await; } HardwareUpdate::DiskRemoved(disk) => { - self.storage_manager.delete_disk(disk.into()).await; + // We notify the storage manager of the hardware, but do not need to + // wait for the result to be fully processed. + #[allow(clippy::let_underscore_future)] + let _ = self + .storage_manager + .detected_raw_disk_removal(disk.into()) + .await; } }, Err(broadcast::error::RecvError::Lagged(count)) => { @@ -251,7 +268,11 @@ impl HardwareMonitor { self.deactivate_switch().await; } - self.storage_manager + // We notify the storage manager of the hardware, but do not need to + // wait for the result to be fully processed. + #[allow(clippy::let_underscore_future)] + let _ = self + .storage_manager .ensure_using_exactly_these_disks( self.hardware_manager.disks().into_iter().map(RawDisk::from), ) diff --git a/sled-agent/src/http_entrypoints.rs b/sled-agent/src/http_entrypoints.rs index bf1102d897..23a1bde4d8 100644 --- a/sled-agent/src/http_entrypoints.rs +++ b/sled-agent/src/http_entrypoints.rs @@ -11,8 +11,8 @@ use crate::params::{ BootstoreStatus, CleanupContextUpdate, DiskEnsureBody, InstanceEnsureBody, InstanceExternalIpBody, InstancePutMigrationIdsBody, InstancePutStateBody, InstancePutStateResponse, InstanceUnregisterResponse, Inventory, - OmicronZonesConfig, SledRole, TimeSync, VpcFirewallRulesEnsureBody, - ZoneBundleId, ZoneBundleMetadata, Zpool, + OmicronPhysicalDisksConfig, OmicronZonesConfig, SledRole, TimeSync, + VpcFirewallRulesEnsureBody, ZoneBundleId, ZoneBundleMetadata, Zpool, }; use crate::sled_agent::Error as SledAgentError; use crate::zone_bundle; @@ -40,6 +40,7 @@ use oximeter_producer::ProducerIdPathParams; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; use sled_hardware::DiskVariant; +use sled_storage::resources::DisksManagementResult; use std::collections::BTreeMap; use uuid::Uuid; @@ -60,6 +61,8 @@ pub fn api() -> SledApiDescription { api.register(omicron_zones_get)?; api.register(omicron_zones_put)?; api.register(zones_list)?; + api.register(omicron_physical_disks_get)?; + api.register(omicron_physical_disks_put)?; api.register(zone_bundle_list)?; api.register(zone_bundle_list_all)?; api.register(zone_bundle_create)?; @@ -338,6 +341,31 @@ async fn omicron_zones_get( Ok(HttpResponseOk(sa.omicron_zones_list().await?)) } +#[endpoint { + method = PUT, + path = "/omicron-physical-disks", +}] +async fn omicron_physical_disks_put( + rqctx: RequestContext, + body: TypedBody, +) -> Result, HttpError> { + let sa = rqctx.context(); + let body_args = body.into_inner(); + let result = sa.omicron_physical_disks_ensure(body_args).await?; + Ok(HttpResponseOk(result)) +} + +#[endpoint { + method = GET, + path = "/omicron-physical-disks", +}] +async fn omicron_physical_disks_get( + rqctx: RequestContext, +) -> Result, HttpError> { + let sa = rqctx.context(); + Ok(HttpResponseOk(sa.omicron_physical_disks_list().await?)) +} + #[endpoint { method = PUT, path = "/omicron-zones", @@ -839,8 +867,8 @@ async fn host_os_write_start( // Find our corresponding disk. let maybe_disk_path = - sa.storage().get_latest_resources().await.disks().values().find_map( - |(disk, _pool)| { + sa.storage().get_latest_disks().await.iter_managed().find_map( + |(_identity, disk)| { // Synthetic disks panic if asked for their `slot()`, so filter // them out first; additionally, filter out any non-M2 disks. if disk.is_synthetic() || disk.variant() != DiskVariant::M2 { diff --git a/sled-agent/src/instance.rs b/sled-agent/src/instance.rs index b859c08a94..d016715591 100644 --- a/sled-agent/src/instance.rs +++ b/sled-agent/src/instance.rs @@ -1340,7 +1340,7 @@ impl InstanceRunner { let mut rng = rand::rngs::StdRng::from_entropy(); let root = self .storage - .get_latest_resources() + .get_latest_disks() .await .all_u2_mountpoints(ZONE_DATASET) .choose(&mut rng) @@ -1520,17 +1520,15 @@ impl InstanceRunner { } } -#[cfg(test)] +#[cfg(all(test, target_os = "illumos"))] mod tests { use super::*; use crate::fakes::nexus::{FakeNexusServer, ServerContext}; - use crate::nexus::NexusClient; use crate::vmm_reservoir::VmmReservoirManagerHandle; use crate::zone_bundle::CleanupContext; use camino_tempfile::Utf8TempDir; - use dns_server::dns_server::ServerHandle as DnsServerHandle; - use dropshot::test_util::LogContext; - use dropshot::{HandlerTaskMode, HttpServer}; + use dns_server::TransientServer; + use dropshot::HttpServer; use illumos_utils::dladm::MockDladm; use illumos_utils::dladm::__mock_MockDladm::__create_vnic::Context as MockDladmCreateVnicContext; use illumos_utils::dladm::__mock_MockDladm::__delete_vnic::Context as MockDladmDeleteVnicContext; @@ -1539,15 +1537,13 @@ mod tests { use illumos_utils::zone::MockZones; use illumos_utils::zone::__mock_MockZones::__boot::Context as MockZonesBootContext; use illumos_utils::zone::__mock_MockZones::__id::Context as MockZonesIdContext; - use illumos_utils::zpool::ZpoolName; use internal_dns::resolver::Resolver; - use internal_dns::ServiceName; use omicron_common::api::external::{ ByteCount, Generation, Hostname, InstanceCpuCount, InstanceState, }; use omicron_common::api::internal::nexus::InstanceProperties; - use sled_storage::disk::{RawDisk, SyntheticDisk}; - use sled_storage::manager::FakeStorageManager; + use omicron_common::FileKv; + use sled_storage::manager_test_harness::StorageManagerTestHarness; use std::net::Ipv6Addr; use std::str::FromStr; use tokio::sync::watch::Receiver; @@ -1584,26 +1580,42 @@ mod tests { } struct FakeNexusParts { - nexus_client: NexusClient, - nexus_server: HttpServer, + nexus_client: NexusClientWithResolver, + _nexus_server: HttpServer, state_rx: Receiver, + _dns_server: TransientServer, } impl FakeNexusParts { - fn new(logctx: &LogContext) -> Self { + async fn new(log: &Logger) -> Self { let (state_tx, state_rx) = tokio::sync::watch::channel(ReceivedInstanceState::None); - let nexus_server = crate::fakes::nexus::start_test_server( - logctx.log.new(o!("component" => "FakeNexusServer")), + let _nexus_server = crate::fakes::nexus::start_test_server( + log.new(o!("component" => "FakeNexusServer")), Box::new(NexusServer { observed_runtime_state: state_tx }), ); - let nexus_client = NexusClient::new( - &format!("http://{}", nexus_server.local_addr()), - logctx.log.new(o!("component" => "NexusClient")), + + let _dns_server = + crate::fakes::nexus::start_dns_server(&log, &_nexus_server) + .await; + + let resolver = Arc::new( + Resolver::new_from_addrs( + log.clone(), + &[_dns_server.dns_server.local_address()], + ) + .unwrap(), ); - Self { nexus_client, nexus_server, state_rx } + let nexus_client = + NexusClientWithResolver::new_from_resolver_with_port( + &log, + resolver, + _nexus_server.local_addr().port(), + ); + + Self { nexus_client, _nexus_server, state_rx, _dns_server } } } @@ -1639,65 +1651,6 @@ mod tests { (boot_ctx, wait_ctx, zone_id_ctx) } - async fn dns_server( - logctx: &LogContext, - nexus_server: &HttpServer, - ) -> (DnsServerHandle, Arc, Utf8TempDir) { - let storage_path = - Utf8TempDir::new().expect("Failed to create temporary directory"); - let config_store = dns_server::storage::Config { - keep_old_generations: 3, - storage_path: storage_path.path().to_owned(), - }; - - let (dns_server, dns_dropshot) = dns_server::start_servers( - logctx.log.new(o!("component" => "DnsServer")), - dns_server::storage::Store::new( - logctx.log.new(o!("component" => "DnsStore")), - &config_store, - ) - .unwrap(), - &dns_server::dns_server::Config { - bind_address: "[::1]:0".parse().unwrap(), - }, - &dropshot::ConfigDropshot { - bind_address: "[::1]:0".parse().unwrap(), - request_body_max_bytes: 8 * 1024, - default_handler_task_mode: HandlerTaskMode::Detached, - }, - ) - .await - .expect("starting DNS server"); - - let dns_dropshot_client = dns_service_client::Client::new( - &format!("http://{}", dns_dropshot.local_addr()), - logctx.log.new(o!("component" => "DnsDropshotClient")), - ); - let mut dns_config = internal_dns::DnsConfigBuilder::new(); - let IpAddr::V6(nexus_ip_addr) = nexus_server.local_addr().ip() else { - panic!("IPv6 address required for nexus_server") - }; - let zone = dns_config.host_zone(Uuid::new_v4(), nexus_ip_addr).unwrap(); - dns_config - .service_backend_zone( - ServiceName::Nexus, - &zone, - nexus_server.local_addr().port(), - ) - .unwrap(); - let dns_config = dns_config.build_full_config_for_initial_generation(); - dns_dropshot_client.dns_config_put(&dns_config).await.unwrap(); - - let resolver = Arc::new( - Resolver::new_from_addrs( - logctx.log.new(o!("component" => "Resolver")), - &[dns_server.local_address()], - ) - .unwrap(), - ); - (dns_server, resolver, storage_path) - } - // note the "mock" here is different from the vnic/zone contexts above. // this is actually running code for a dropshot server from propolis. // (might we want a locally-defined fake whose behavior we can control @@ -1736,19 +1689,22 @@ mod tests { (srv, client) } - // make a FakeStorageManager with a "U2" upserted - async fn fake_storage_manager_with_u2() -> StorageHandle { - let (storage_manager, storage_handle) = FakeStorageManager::new(); - tokio::spawn(storage_manager.run()); - let external_zpool_name = ZpoolName::new_external(Uuid::new_v4()); - let external_disk: RawDisk = - SyntheticDisk::new(external_zpool_name, 0).into(); - storage_handle.upsert_disk(external_disk).await; - storage_handle + async fn setup_storage_manager(log: &Logger) -> StorageManagerTestHarness { + let mut harness = StorageManagerTestHarness::new(log).await; + let raw_disks = + harness.add_vdevs(&["u2_under_test.vdev", "m2_helping.vdev"]).await; + harness.handle().key_manager_ready().await; + let config = harness.make_config(1, &raw_disks); + let _ = harness + .handle() + .omicron_physical_disks_ensure(config.clone()) + .await + .expect("Ensuring disks should work after key manager is ready"); + harness } async fn instance_struct( - logctx: &LogContext, + log: &Logger, propolis_addr: SocketAddr, nexus_client_with_resolver: NexusClientWithResolver, storage_handle: StorageHandle, @@ -1763,7 +1719,7 @@ mod tests { fake_instance_initial_state(propolis_id, propolis_addr); let services = fake_instance_manager_services( - logctx, + log, storage_handle, nexus_client_with_resolver, temp_dir, @@ -1775,7 +1731,7 @@ mod tests { }; Instance::new( - logctx.log.new(o!("component" => "Instance")), + log.new(o!("component" => "Instance")), id, propolis_id, ticket, @@ -1833,7 +1789,7 @@ mod tests { } fn fake_instance_manager_services( - logctx: &LogContext, + log: &Logger, storage_handle: StorageHandle, nexus_client_with_resolver: NexusClientWithResolver, temp_dir: &String, @@ -1841,13 +1797,13 @@ mod tests { let vnic_allocator = VnicAllocator::new("Foo", Etherstub("mystub".to_string())); let port_manager = PortManager::new( - logctx.log.new(o!("component" => "PortManager")), + log.new(o!("component" => "PortManager")), Ipv6Addr::new(0xfd00, 0x1de, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01), ); let cleanup_context = CleanupContext::default(); let zone_bundler = ZoneBundler::new( - logctx.log.new(o!("component" => "ZoneBundler")), + log.new(o!("component" => "ZoneBundler")), storage_handle.clone(), cleanup_context, ); @@ -1867,27 +1823,24 @@ mod tests { let logctx = omicron_test_utils::dev::test_setup_log( "test_instance_create_events_normal", ); + let log = logctx.log.new(o!(FileKv)); - let (propolis_server, _propolis_client) = - propolis_mock_server(&logctx.log); + let (propolis_server, _propolis_client) = propolis_mock_server(&log); let propolis_addr = propolis_server.local_addr(); // automock'd things used during this test let _mock_vnic_contexts = mock_vnic_contexts(); let _mock_zone_contexts = mock_zone_contexts(); - let FakeNexusParts { nexus_client, nexus_server, mut state_rx } = - FakeNexusParts::new(&logctx); - - let (_dns_server, resolver, _dns_config_dir) = - timeout(TIMEOUT_DURATION, dns_server(&logctx, &nexus_server)) - .await - .expect("timed out making DNS server and Resolver"); - - let nexus_client_with_resolver = - NexusClientWithResolver::new_with_client(nexus_client, resolver); + let FakeNexusParts { + nexus_client, + mut state_rx, + _dns_server, + _nexus_server, + } = FakeNexusParts::new(&log).await; - let storage_handle = fake_storage_manager_with_u2().await; + let mut storage_harness = setup_storage_manager(&log).await; + let storage_handle = storage_harness.handle().clone(); let temp_guard = Utf8TempDir::new().unwrap(); let temp_dir = temp_guard.path().to_string(); @@ -1895,9 +1848,9 @@ mod tests { let inst = timeout( TIMEOUT_DURATION, instance_struct( - &logctx, + &log, propolis_addr, - nexus_client_with_resolver, + nexus_client, storage_handle, &temp_dir, ), @@ -1935,6 +1888,7 @@ mod tests { .expect("timed out waiting for InstanceState::Running in FakeNexus") .expect("failed to receive FakeNexus' InstanceState"); + storage_harness.cleanup().await; logctx.cleanup_successful(); } @@ -1944,23 +1898,21 @@ mod tests { let logctx = omicron_test_utils::dev::test_setup_log( "test_instance_create_timeout_while_starting_propolis", ); + let log = logctx.log.new(o!(FileKv)); // automock'd things used during this test let _mock_vnic_contexts = mock_vnic_contexts(); let _mock_zone_contexts = mock_zone_contexts(); - let FakeNexusParts { nexus_client, nexus_server, state_rx } = - FakeNexusParts::new(&logctx); - - let (_dns_server, resolver, _dns_config_dir) = - timeout(TIMEOUT_DURATION, dns_server(&logctx, &nexus_server)) - .await - .expect("timed out making DNS server and Resolver"); - - let nexus_client_with_resolver = - NexusClientWithResolver::new_with_client(nexus_client, resolver); + let FakeNexusParts { + nexus_client, + state_rx, + _dns_server, + _nexus_server, + } = FakeNexusParts::new(&log).await; - let storage_handle = fake_storage_manager_with_u2().await; + let mut storage_harness = setup_storage_manager(&logctx.log).await; + let storage_handle = storage_harness.handle().clone(); let temp_guard = Utf8TempDir::new().unwrap(); let temp_dir = temp_guard.path().to_string(); @@ -1968,10 +1920,10 @@ mod tests { let inst = timeout( TIMEOUT_DURATION, instance_struct( - &logctx, + &log, // we want to test propolis not ever coming up SocketAddr::V6(SocketAddrV6::new(Ipv6Addr::LOCALHOST, 1, 0, 0)), - nexus_client_with_resolver, + nexus_client, storage_handle, &temp_dir, ), @@ -2007,6 +1959,7 @@ mod tests { panic!("Nexus's InstanceState should never have reached running if zone creation timed out"); } + storage_harness.cleanup().await; logctx.cleanup_successful(); } @@ -2015,6 +1968,7 @@ mod tests { let logctx = omicron_test_utils::dev::test_setup_log( "test_instance_create_timeout_while_creating_zone", ); + let log = logctx.log.new(o!(FileKv)); // automock'd things used during this test let _mock_vnic_contexts = mock_vnic_contexts(); @@ -2032,18 +1986,15 @@ mod tests { let zone_id_ctx = MockZones::id_context(); zone_id_ctx.expect().times(..).returning(|_| Ok(Some(1))); - let FakeNexusParts { nexus_client, nexus_server, state_rx } = - FakeNexusParts::new(&logctx); - - let (_dns_server, resolver, _dns_config_dir) = - timeout(TIMEOUT_DURATION, dns_server(&logctx, &nexus_server)) - .await - .expect("timed out making DNS server and Resolver"); - - let nexus_client_with_resolver = - NexusClientWithResolver::new_with_client(nexus_client, resolver); + let FakeNexusParts { + nexus_client, + state_rx, + _dns_server, + _nexus_server, + } = FakeNexusParts::new(&log).await; - let storage_handle = fake_storage_manager_with_u2().await; + let mut storage_harness = setup_storage_manager(&logctx.log).await; + let storage_handle = storage_harness.handle().clone(); let temp_guard = Utf8TempDir::new().unwrap(); let temp_dir = temp_guard.path().to_string(); @@ -2051,10 +2002,10 @@ mod tests { let inst = timeout( TIMEOUT_DURATION, instance_struct( - &logctx, + &log, // isn't running because the "zone" never "boots" SocketAddr::V6(SocketAddrV6::new(Ipv6Addr::LOCALHOST, 1, 0, 0)), - nexus_client_with_resolver, + nexus_client, storage_handle, &temp_dir, ), @@ -2090,6 +2041,7 @@ mod tests { panic!("Nexus's InstanceState should never have reached running if zone creation timed out"); } + storage_harness.cleanup().await; logctx.cleanup_successful(); } @@ -2098,23 +2050,21 @@ mod tests { let logctx = omicron_test_utils::dev::test_setup_log( "test_instance_manager_creation", ); + let log = logctx.log.new(o!(FileKv)); // automock'd things used during this test let _mock_vnic_contexts = mock_vnic_contexts(); let _mock_zone_contexts = mock_zone_contexts(); - let storage_handle = fake_storage_manager_with_u2().await; + let mut storage_harness = setup_storage_manager(&logctx.log).await; + let storage_handle = storage_harness.handle().clone(); - let FakeNexusParts { nexus_client, nexus_server, mut state_rx } = - FakeNexusParts::new(&logctx); - - let (_dns_server, resolver, _dns_config_dir) = - timeout(TIMEOUT_DURATION, dns_server(&logctx, &nexus_server)) - .await - .expect("timed out making DNS server and Resolver"); - - let nexus_client_with_resolver = - NexusClientWithResolver::new_with_client(nexus_client, resolver); + let FakeNexusParts { + nexus_client, + mut state_rx, + _dns_server, + _nexus_server, + } = FakeNexusParts::new(&log).await; let temp_guard = Utf8TempDir::new().unwrap(); let temp_dir = temp_guard.path().to_string(); @@ -2127,9 +2077,9 @@ mod tests { zone_bundler, zone_builder_factory, } = fake_instance_manager_services( - &logctx, + &log, storage_handle, - nexus_client_with_resolver, + nexus_client, &temp_dir, ); @@ -2196,6 +2146,7 @@ mod tests { .expect("timed out waiting for InstanceState::Running in FakeNexus") .expect("failed to receive FakeNexus' InstanceState"); + storage_harness.cleanup().await; logctx.cleanup_successful(); } } diff --git a/sled-agent/src/instance_manager.rs b/sled-agent/src/instance_manager.rs index 2c9780b3ce..cf6563b117 100644 --- a/sled-agent/src/instance_manager.rs +++ b/sled-agent/src/instance_manager.rs @@ -750,7 +750,7 @@ impl InstanceTicket { InstanceTicket { id, terminate_tx: Some(terminate_tx) } } - #[cfg(test)] + #[cfg(all(test, target_os = "illumos"))] pub(crate) fn new_without_manager_for_test(id: Uuid) -> Self { Self { id, terminate_tx: None } } diff --git a/sled-agent/src/long_running_tasks.rs b/sled-agent/src/long_running_tasks.rs index 3b29bdda60..9b0ea7ac6c 100644 --- a/sled-agent/src/long_running_tasks.rs +++ b/sled-agent/src/long_running_tasks.rs @@ -20,12 +20,13 @@ use crate::config::Config; use crate::hardware_monitor::HardwareMonitor; use crate::services::ServiceManager; use crate::sled_agent::SledAgent; -use crate::storage_monitor::{StorageMonitor, UnderlayAccess}; +use crate::storage_monitor::StorageMonitor; use crate::zone_bundle::{CleanupContext, ZoneBundler}; use bootstore::schemes::v0 as bootstore; use key_manager::{KeyManager, StorageKeyRequester}; use sled_hardware::{HardwareManager, SledMode}; -use sled_storage::disk::SyntheticDisk; +use sled_storage::config::MountConfig; +use sled_storage::disk::RawSyntheticDisk; use sled_storage::manager::{StorageHandle, StorageManager}; use slog::{info, Logger}; use std::net::Ipv6Addr; @@ -65,14 +66,12 @@ pub async fn spawn_all_longrunning_tasks( LongRunningTaskHandles, oneshot::Sender, oneshot::Sender, - oneshot::Sender, ) { let storage_key_requester = spawn_key_manager(log); let mut storage_manager = spawn_storage_manager(log, storage_key_requester.clone()); - let underlay_available_tx = - spawn_storage_monitor(log, storage_manager.clone()); + spawn_storage_monitor(log, storage_manager.clone()); let hardware_manager = spawn_hardware_manager(log, sled_mode).await; @@ -81,7 +80,7 @@ pub async fn spawn_all_longrunning_tasks( spawn_hardware_monitor(log, &hardware_manager, &storage_manager); // Add some synthetic disks if necessary. - upsert_synthetic_zpools_if_needed(&log, &storage_manager, &config).await; + upsert_synthetic_disks_if_needed(&log, &storage_manager, &config).await; // Wait for the boot disk so that we can work with any ledgers, // such as those needed by the bootstore and sled-agent @@ -109,7 +108,6 @@ pub async fn spawn_all_longrunning_tasks( }, sled_agent_started_tx, service_manager_ready_tx, - underlay_available_tx, ) } @@ -127,24 +125,21 @@ fn spawn_storage_manager( key_requester: StorageKeyRequester, ) -> StorageHandle { info!(log, "Starting StorageManager"); - let (manager, handle) = StorageManager::new(log, key_requester); + let (manager, handle) = + StorageManager::new(log, MountConfig::default(), key_requester); tokio::spawn(async move { manager.run().await; }); handle } -fn spawn_storage_monitor( - log: &Logger, - storage_handle: StorageHandle, -) -> oneshot::Sender { +fn spawn_storage_monitor(log: &Logger, storage_handle: StorageHandle) { info!(log, "Starting StorageMonitor"); - let (storage_monitor, underlay_available_tx) = - StorageMonitor::new(log, storage_handle); + let storage_monitor = + StorageMonitor::new(log, MountConfig::default(), storage_handle); tokio::spawn(async move { storage_monitor.run().await; }); - underlay_available_tx } async fn spawn_hardware_manager( @@ -188,9 +183,9 @@ async fn spawn_bootstore_tasks( hardware_manager: &HardwareManager, global_zone_bootstrap_ip: Ipv6Addr, ) -> bootstore::NodeHandle { - let storage_resources = storage_handle.get_latest_resources().await; + let iter_all = storage_handle.get_latest_disks().await; let config = new_bootstore_config( - &storage_resources, + &iter_all, hardware_manager.baseboard(), global_zone_bootstrap_ip, ) @@ -222,21 +217,22 @@ fn spawn_zone_bundler_tasks( ZoneBundler::new(log, storage_handle.clone(), CleanupContext::default()) } -async fn upsert_synthetic_zpools_if_needed( +async fn upsert_synthetic_disks_if_needed( log: &Logger, storage_manager: &StorageHandle, config: &Config, ) { - if let Some(pools) = &config.zpools { - for (i, pool) in pools.iter().enumerate() { + if let Some(vdevs) = &config.vdevs { + for (i, vdev) in vdevs.iter().enumerate() { info!( log, - "Upserting synthetic zpool to Storage Manager: {}", - pool.to_string() + "Upserting synthetic device to Storage Manager"; + "vdev" => vdev.to_string(), ); - let disk = - SyntheticDisk::new(pool.clone(), i.try_into().unwrap()).into(); - storage_manager.upsert_disk(disk).await; + let disk = RawSyntheticDisk::load(vdev, i.try_into().unwrap()) + .expect("Failed to parse synthetic disk") + .into(); + storage_manager.detected_raw_disk(disk).await.await.unwrap(); } } } diff --git a/sled-agent/src/nexus.rs b/sled-agent/src/nexus.rs index 3f24c6a806..12fcc05ce3 100644 --- a/sled-agent/src/nexus.rs +++ b/sled-agent/src/nexus.rs @@ -60,16 +60,6 @@ impl NexusClientWithResolver { } } - // for when we have a NexusClient constructed from a FakeNexusServer - // (no need to expose this function outside of tests) - #[cfg(test)] - pub(crate) fn new_with_client( - client: NexusClient, - resolver: Arc, - ) -> Self { - Self { client, resolver } - } - /// Access the progenitor-based Nexus Client. pub fn client(&self) -> &NexusClient { &self.client diff --git a/sled-agent/src/params.rs b/sled-agent/src/params.rs index c9e0211690..12c2907f49 100644 --- a/sled-agent/src/params.rs +++ b/sled-agent/src/params.rs @@ -294,6 +294,11 @@ impl std::fmt::Display for ZoneType { } } +pub type OmicronPhysicalDiskConfig = + sled_storage::disk::OmicronPhysicalDiskConfig; +pub type OmicronPhysicalDisksConfig = + sled_storage::disk::OmicronPhysicalDisksConfig; + /// Describes the set of Omicron-managed zones running on a sled #[derive( Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq, Eq, Hash, diff --git a/sled-agent/src/probe_manager.rs b/sled-agent/src/probe_manager.rs index 8481dc4b79..16559039a2 100644 --- a/sled-agent/src/probe_manager.rs +++ b/sled-agent/src/probe_manager.rs @@ -206,7 +206,7 @@ impl ProbeManagerInner { let mut rng = rand::rngs::StdRng::from_entropy(); let root = self .storage - .get_latest_resources() + .get_latest_disks() .await .all_u2_mountpoints(ZONE_DATASET) .choose(&mut rng) diff --git a/sled-agent/src/rack_setup/plan/service.rs b/sled-agent/src/rack_setup/plan/service.rs index 153031a545..9e0a2941c5 100644 --- a/sled-agent/src/rack_setup/plan/service.rs +++ b/sled-agent/src/rack_setup/plan/service.rs @@ -5,7 +5,10 @@ //! Plan generation for "where should services be initialized". use crate::bootstrap::params::StartSledAgentRequest; -use crate::params::{OmicronZoneConfig, OmicronZoneDataset, OmicronZoneType}; +use crate::params::{ + OmicronPhysicalDiskConfig, OmicronPhysicalDisksConfig, OmicronZoneConfig, + OmicronZoneDataset, OmicronZoneType, +}; use crate::rack_setup::config::SetupServiceConfig as Config; use camino::Utf8PathBuf; use dns_service_client::types::DnsConfigParams; @@ -18,7 +21,7 @@ use omicron_common::address::{ MGD_PORT, MGS_PORT, NEXUS_REDUNDANCY, NTP_PORT, NUM_SOURCE_NAT_PORTS, RSS_RESERVED_ADDRESSES, SLED_PREFIX, }; -use omicron_common::api::external::{MacAddr, Vni}; +use omicron_common::api::external::{Generation, MacAddr, Vni}; use omicron_common::api::internal::shared::{ NetworkInterface, NetworkInterfaceKind, SourceNatConfig, }; @@ -59,7 +62,7 @@ const CLICKHOUSE_COUNT: usize = 1; const CLICKHOUSE_KEEPER_COUNT: usize = 0; // TODO(https://github.com/oxidecomputer/omicron/issues/732): Remove. // when Nexus provisions Crucible. -const MINIMUM_U2_ZPOOL_COUNT: usize = 3; +const MINIMUM_U2_COUNT: usize = 3; // TODO(https://github.com/oxidecomputer/omicron/issues/732): Remove. // when Nexus provisions the Pantry. const PANTRY_COUNT: usize = 3; @@ -94,10 +97,16 @@ pub enum PlanError { #[error("Found only v1 service plan")] FoundV1, + + #[error("Found only v2 service plan")] + FoundV2, } #[derive(Clone, Debug, Default, Serialize, Deserialize, JsonSchema)] pub struct SledConfig { + /// Control plane disks configured for this sled + pub disks: OmicronPhysicalDisksConfig, + /// zones configured for this sled pub zones: Vec, } @@ -115,7 +124,8 @@ impl Ledgerable for Plan { fn generation_bump(&mut self) {} } const RSS_SERVICE_PLAN_V1_FILENAME: &str = "rss-service-plan.json"; -const RSS_SERVICE_PLAN_FILENAME: &str = "rss-service-plan-v2.json"; +const RSS_SERVICE_PLAN_V2_FILENAME: &str = "rss-service-plan-v2.json"; +const RSS_SERVICE_PLAN_FILENAME: &str = "rss-service-plan-v3.json"; impl Plan { pub async fn load( @@ -123,7 +133,7 @@ impl Plan { storage_manager: &StorageHandle, ) -> Result, PlanError> { let paths: Vec = storage_manager - .get_latest_resources() + .get_latest_disks() .await .all_m2_mountpoints(CONFIG_DATASET) .into_iter() @@ -167,6 +177,14 @@ impl Plan { // support a condition that we do not believe can ever happen in any // system. Err(PlanError::FoundV1) + } else if Self::has_v2(storage_manager).await.map_err(|err| { + // Same as the comment above, but for version 2. + PlanError::Io { + message: String::from("looking for v2 RSS plan"), + err, + } + })? { + Err(PlanError::FoundV2) } else { Ok(None) } @@ -176,7 +194,7 @@ impl Plan { storage_manager: &StorageHandle, ) -> Result { let paths = storage_manager - .get_latest_resources() + .get_latest_disks() .await .all_m2_mountpoints(CONFIG_DATASET) .into_iter() @@ -191,6 +209,25 @@ impl Plan { Ok(false) } + async fn has_v2( + storage_manager: &StorageHandle, + ) -> Result { + let paths = storage_manager + .get_latest_disks() + .await + .all_m2_mountpoints(CONFIG_DATASET) + .into_iter() + .map(|p| p.join(RSS_SERVICE_PLAN_V2_FILENAME)); + + for p in paths { + if p.try_exists()? { + return Ok(true); + } + } + + Ok(false) + } + async fn is_sled_scrimlet( log: &Logger, address: SocketAddrV6, @@ -214,11 +251,10 @@ impl Plan { } } - // Gets zpool UUIDs from U.2 devices on the sled. - async fn get_u2_zpools_from_sled( + async fn get_inventory( log: &Logger, address: SocketAddrV6, - ) -> Result, PlanError> { + ) -> Result { let dur = std::time::Duration::from_secs(60); let client = reqwest::ClientBuilder::new() .connect_timeout(dur) @@ -231,52 +267,47 @@ impl Plan { log.new(o!("SledAgentClient" => address.to_string())), ); - let get_u2_zpools = || async { - let zpools: Vec = client - .zpools_get() + let get_inventory = || async { + let inventory = client + .inventory() .await - .map(|response| { - response - .into_inner() - .into_iter() - .filter_map(|zpool| match zpool.disk_type { - SledAgentTypes::DiskType::U2 => { - Some(ZpoolName::new_external(zpool.id)) - } - SledAgentTypes::DiskType::M2 => None, - }) - .collect() - }) + .map(|response| response.into_inner()) .map_err(|err| { BackoffError::transient(PlanError::SledApi(err)) })?; - if zpools.len() < MINIMUM_U2_ZPOOL_COUNT { + if inventory + .disks + .iter() + .filter(|disk| { + matches!(disk.variant, SledAgentTypes::DiskVariant::U2) + }) + .count() + < MINIMUM_U2_COUNT + { return Err(BackoffError::transient( - PlanError::SledInitialization( - "Awaiting zpools".to_string(), - ), + PlanError::SledInitialization("Awaiting disks".to_string()), )); } - Ok(zpools) + Ok(inventory) }; - let log_failure = |error, call_count, total_duration| { + let log_failure = |error: PlanError, call_count, total_duration| { if call_count == 0 { - info!(log, "failed to get zpools from {address}"; "error" => ?error); + info!(log, "failed to get inventory from {address}"; "error" => ?error); } else if total_duration > std::time::Duration::from_secs(20) { - warn!(log, "failed to get zpools from {address}"; "error" => ?error, "total duration" => ?total_duration); + warn!(log, "failed to get inventory from {address}"; "error" => ?error, "total duration" => ?total_duration); } }; - let u2_zpools = retry_notify_ext( + let inventory = retry_notify_ext( retry_policy_internal_service_aggressive(), - get_u2_zpools, + get_inventory, log_failure, ) .await?; - Ok(u2_zpools) + Ok(inventory) } pub fn create_transient( @@ -307,6 +338,37 @@ impl Plan { .unwrap(); } + // Set up storage early, as it'll be necessary for placement of + // many subsequent services. + // + // Our policy at RSS time is currently "adopt all the U.2 disks we can see". + for sled_info in sled_info.iter_mut() { + let disks = sled_info + .inventory + .disks + .iter() + .filter(|disk| { + matches!(disk.variant, SledAgentTypes::DiskVariant::U2) + }) + .map(|disk| OmicronPhysicalDiskConfig { + identity: disk.identity.clone(), + id: Uuid::new_v4(), + pool_id: Uuid::new_v4(), + }) + .collect(); + sled_info.request.disks = OmicronPhysicalDisksConfig { + generation: Generation::new(), + disks, + }; + sled_info.u2_zpools = sled_info + .request + .disks + .disks + .iter() + .map(|disk| ZpoolName::new_external(disk.pool_id)) + .collect(); + } + // We'll stripe most services across all available Sleds, round-robin // style. In development and CI, this might only be one Sled. We'll // only report `NotEnoughSleds` below if there are zero Sleds or if we @@ -708,16 +770,15 @@ impl Plan { |sled_request| async { let subnet = sled_request.body.subnet; let sled_address = get_sled_address(subnet); - let u2_zpools = - Self::get_u2_zpools_from_sled(log, sled_address) - .await?; + let inventory = + Self::get_inventory(log, sled_address).await?; let is_scrimlet = Self::is_sled_scrimlet(log, sled_address).await?; Ok(SledInfo::new( sled_request.body.id, subnet, sled_address, - u2_zpools, + inventory, is_scrimlet, )) }, @@ -730,7 +791,7 @@ impl Plan { // Once we've constructed a plan, write it down to durable storage. let paths: Vec = storage_manager - .get_latest_resources() + .get_latest_disks() .await .all_m2_mountpoints(CONFIG_DATASET) .into_iter() @@ -771,7 +832,9 @@ pub struct SledInfo { subnet: Ipv6Subnet, /// the address of the Sled Agent on the sled's subnet pub sled_address: SocketAddrV6, - /// the list of zpools on the Sled + /// the inventory returned by the Sled + inventory: SledAgentTypes::Inventory, + /// The Zpools available for usage by services u2_zpools: Vec, /// spreads components across a Sled's zpools u2_zpool_allocators: @@ -789,14 +852,15 @@ impl SledInfo { sled_id: Uuid, subnet: Ipv6Subnet, sled_address: SocketAddrV6, - u2_zpools: Vec, + inventory: SledAgentTypes::Inventory, is_scrimlet: bool, ) -> SledInfo { SledInfo { sled_id, subnet, sled_address, - u2_zpools, + inventory, + u2_zpools: vec![], u2_zpool_allocators: HashMap::new(), is_scrimlet, addr_alloc: AddressBumpAllocator::new(subnet), @@ -1207,10 +1271,10 @@ mod tests { } #[test] - fn test_rss_service_plan_v2_schema() { + fn test_rss_service_plan_v3_schema() { let schema = schemars::schema_for!(Plan); expectorate::assert_contents( - "../schema/rss-service-plan-v2.json", + "../schema/rss-service-plan-v3.json", &serde_json::to_string_pretty(&schema).unwrap(), ); } diff --git a/sled-agent/src/rack_setup/plan/sled.rs b/sled-agent/src/rack_setup/plan/sled.rs index efdd86d2f9..a3fd57369a 100644 --- a/sled-agent/src/rack_setup/plan/sled.rs +++ b/sled-agent/src/rack_setup/plan/sled.rs @@ -59,7 +59,7 @@ impl Plan { storage: &StorageHandle, ) -> Result, PlanError> { let paths: Vec = storage - .get_latest_resources() + .get_latest_disks() .await .all_m2_mountpoints(CONFIG_DATASET) .into_iter() @@ -126,7 +126,7 @@ impl Plan { // Once we've constructed a plan, write it down to durable storage. let paths: Vec = storage_manager - .get_latest_resources() + .get_latest_disks() .await .all_m2_mountpoints(CONFIG_DATASET) .into_iter() diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index 587625fe7b..5ff6074249 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -17,7 +17,7 @@ //! state files that get generated as RSS executes: //! //! - /pool/int/UUID/config/rss-sled-plan.json (Sled Plan) -//! - /pool/int/UUID/config/rss-service-plan-v2.json (Service Plan) +//! - /pool/int/UUID/config/rss-service-plan-v3.json (Service Plan) //! - /pool/int/UUID/config/rss-plan-completed.marker (Plan Execution Complete) //! //! These phases are described below. As each phase completes, a corresponding @@ -115,6 +115,7 @@ use std::collections::{btree_map, BTreeMap, BTreeSet}; use std::collections::{HashMap, HashSet}; use std::iter; use std::net::{Ipv6Addr, SocketAddrV6}; +use std::time::Duration; use thiserror::Error; use uuid::Uuid; @@ -276,6 +277,125 @@ impl ServiceInner { ServiceInner { log } } + // Ensures that all storage for a particular generation is configured. + // + // This will either return: + // - Ok if the requests are all successful (where "successful" also + // includes any of the sleds having a storage configuration more recent than + // what we've requested), or + // - An error from attempting to configure storage on the underlying sleds + async fn ensure_storage_config_at_least( + &self, + plan: &ServicePlan, + ) -> Result<(), SetupServiceError> { + cancel_safe_futures::future::join_all_then_try( + plan.services.iter().map(|(sled_address, config)| async move { + self.initialize_storage_on_sled( + *sled_address, + SledAgentTypes::OmicronPhysicalDisksConfig { + generation: config.disks.generation, + disks: config + .disks + .disks + .iter() + .map(|disk| { + SledAgentTypes::OmicronPhysicalDiskConfig { + identity: disk.identity.clone(), + id: disk.id, + pool_id: disk.pool_id, + } + }) + .collect(), + }, + ) + .await + }), + ) + .await?; + Ok(()) + } + + /// Requests that the specified sled configure storage as described + /// by `storage_config`. + /// + /// This function succeeds if either the configuration is supplied, or if + /// the configuration on the target sled is newer than what we're supplying. + // This function shares a lot of implementation details with + // [Self::initialize_zones_on_sled]. Although it has a different meaning, + // the usage (and expectations around generation numbers) are similar. + async fn initialize_storage_on_sled( + &self, + sled_address: SocketAddrV6, + storage_config: SledAgentTypes::OmicronPhysicalDisksConfig, + ) -> Result<(), SetupServiceError> { + let dur = std::time::Duration::from_secs(60); + let client = reqwest::ClientBuilder::new() + .connect_timeout(dur) + .build() + .map_err(SetupServiceError::HttpClient)?; + let log = self.log.new(o!("sled_address" => sled_address.to_string())); + let client = SledAgentClient::new_with_client( + &format!("http://{}", sled_address), + client, + log.clone(), + ); + + let storage_put = || async { + info!( + log, + "attempting to set up sled's storage: {:?}", storage_config, + ); + let result = client + .omicron_physical_disks_put(&storage_config.clone()) + .await; + let Err(error) = result else { + return Ok::< + (), + BackoffError>, + >(()); + }; + + if let sled_agent_client::Error::ErrorResponse(response) = &error { + if response.status() == http::StatusCode::CONFLICT { + warn!( + log, + "ignoring attempt to initialize storage because \ + the server seems to be newer"; + "attempted_generation" => i64::from(&storage_config.generation), + "req_id" => &response.request_id, + "server_message" => &response.message, + ); + + // If we attempt to initialize storage at generation X, and + // the server refuses because it's at some generation newer + // than X, then we treat that as success. See the doc + // comment on this function. + return Ok(()); + } + } + + // TODO Many other codes here should not be retried. See + // omicron#4578. + return Err(BackoffError::transient(error)); + }; + let log_failure = |error, delay| { + warn!( + log, + "failed to initialize Omicron storage"; + "error" => #%error, + "retry_after" => ?delay, + ); + }; + retry_notify( + retry_policy_internal_service_aggressive(), + storage_put, + log_failure, + ) + .await?; + + Ok(()) + } + /// Requests that the specified sled configure zones as described by /// `zones_config` /// @@ -345,7 +465,7 @@ impl ServiceInner { warn!( log, "failed to initialize Omicron zones"; - "error" => ?error, + "error" => #%error, "retry_after" => ?delay, ); }; @@ -564,8 +684,16 @@ impl ServiceInner { info!(self.log, "Nexus address: {}", nexus_address.to_string()); - let nexus_client = NexusClient::new( + const CLIENT_TIMEOUT: Duration = Duration::from_secs(60); + let client = reqwest::Client::builder() + .connect_timeout(CLIENT_TIMEOUT) + .timeout(CLIENT_TIMEOUT) + .build() + .map_err(SetupServiceError::HttpClient)?; + + let nexus_client = NexusClient::new_with_client( &format!("http://{}", nexus_address), + client, self.log.new(o!("component" => "NexusClient")), ); @@ -687,9 +815,44 @@ impl ServiceInner { info!(self.log, "rack_network_config: {:#?}", rack_network_config); + let physical_disks: Vec<_> = service_plan + .services + .iter() + .flat_map(|(addr, config)| { + let sled_id = id_map.get(addr).expect("Missing sled"); + config.disks.disks.iter().map(|config| { + NexusTypes::PhysicalDiskPutRequest { + id: config.id, + vendor: config.identity.vendor.clone(), + serial: config.identity.serial.clone(), + model: config.identity.model.clone(), + variant: NexusTypes::PhysicalDiskKind::U2, + sled_id: *sled_id, + } + }) + }) + .collect(); + + let zpools = service_plan + .services + .iter() + .flat_map(|(addr, config)| { + let sled_id = id_map.get(addr).expect("Missing sled"); + config.disks.disks.iter().map(|config| { + NexusTypes::ZpoolPutRequest { + id: config.pool_id, + physical_disk_id: config.id, + sled_id: *sled_id, + } + }) + }) + .collect(); + let request = NexusTypes::RackInitializationRequest { blueprint, services, + physical_disks, + zpools, datasets, internal_services_ip_pool_ranges, certs: config.external_certificates.clone(), @@ -789,7 +952,7 @@ impl ServiceInner { warn!( self.log, "Failed to initialize CockroachDB"; - "error" => ?error, + "error" => #%error, "retry_after" => ?delay ); }; @@ -839,7 +1002,7 @@ impl ServiceInner { )?; let marker_paths: Vec = storage_manager - .get_latest_resources() + .get_latest_disks() .await .all_m2_mountpoints(CONFIG_DATASET) .into_iter() @@ -1004,6 +1167,10 @@ impl ServiceInner { .await? }; + // Before we can ask for any services, we need to ensure that storage is + // operational. + self.ensure_storage_config_at_least(&service_plan).await?; + // Set up internal DNS services first and write the initial // DNS configuration to the internal DNS servers. let v1generator = OmicronZonesConfigGenerator::initial_version( @@ -1301,57 +1468,65 @@ mod test { params::OmicronZoneType, rack_setup::plan::service::{Plan as ServicePlan, SledInfo}, }; - use illumos_utils::zpool::ZpoolName; - use omicron_common::{address::Ipv6Subnet, api::external::Generation}; + use omicron_common::{ + address::{get_sled_address, Ipv6Subnet, SLED_PREFIX}, + api::external::{ByteCount, Generation}, + disk::DiskIdentity, + }; + use sled_agent_client::types as SledAgentTypes; + use uuid::Uuid; + + fn make_sled_info( + sled_id: Uuid, + subnet: Ipv6Subnet, + u2_count: usize, + ) -> SledInfo { + let sled_agent_address = get_sled_address(subnet); + SledInfo::new( + sled_id, + subnet, + sled_agent_address, + SledAgentTypes::Inventory { + sled_id, + sled_agent_address: sled_agent_address.to_string(), + sled_role: SledAgentTypes::SledRole::Scrimlet, + baseboard: SledAgentTypes::Baseboard::Unknown, + usable_hardware_threads: 32, + usable_physical_ram: ByteCount::from_gibibytes_u32(16), + reservoir_size: ByteCount::from_gibibytes_u32(0), + disks: (0..u2_count) + .map(|i| SledAgentTypes::InventoryDisk { + identity: DiskIdentity { + vendor: "test-manufacturer".to_string(), + serial: format!("test-{sled_id}-#{i}"), + model: "v1".to_string(), + }, + variant: SledAgentTypes::DiskVariant::U2, + slot: i.try_into().unwrap(), + }) + .collect(), + zpools: vec![], + }, + true, + ) + } fn make_test_service_plan() -> ServicePlan { let rss_config = crate::bootstrap::params::test_config(); let fake_sleds = vec![ - SledInfo::new( - "d4ba4bbe-8542-4907-bc8f-48df53eb5089".parse().unwrap(), - Ipv6Subnet::new("fd00:1122:3344:101::1".parse().unwrap()), - "[fd00:1122:3344:101::1]:80".parse().unwrap(), - vec![ - ZpoolName::new_internal( - "c5885278-0ae2-4f1e-9223-07f2ada818e1".parse().unwrap(), - ), - ZpoolName::new_internal( - "57465977-8275-43aa-a320-b6cd5cb20ca6".parse().unwrap(), - ), - ZpoolName::new_external( - "886f9fe7-bf70-4ddd-ae92-764dc3ed14ab".parse().unwrap(), - ), - ZpoolName::new_external( - "4c9061b1-345b-4985-8cbd-a2a899f15b68".parse().unwrap(), - ), - ZpoolName::new_external( - "b2bd488e-b187-42a0-b157-9ab0f70d91a8".parse().unwrap(), - ), - ], - true, + make_sled_info( + Uuid::new_v4(), + Ipv6Subnet::::new( + "fd00:1122:3344:101::1".parse().unwrap(), + ), + 5, ), - SledInfo::new( - "b4359dea-665d-41ca-a681-f55912f2d5d0".parse().unwrap(), - Ipv6Subnet::new("fd00:1122:3344:102::1".parse().unwrap()), - "[fd00:1122:3344:102::1]:80".parse().unwrap(), - vec![ - ZpoolName::new_internal( - "34d6b5e5-a09f-4e96-a599-fa306ce6d983".parse().unwrap(), - ), - ZpoolName::new_internal( - "e9b8d1ea-da29-4b61-a493-c0ed319098da".parse().unwrap(), - ), - ZpoolName::new_external( - "37f8e903-2adb-4613-b78c-198122c289f0".parse().unwrap(), - ), - ZpoolName::new_external( - "b50f787c-97b3-4b91-a5bd-99d11fc86fb8".parse().unwrap(), - ), - ZpoolName::new_external( - "809e50c8-930e-413a-950c-69a540b688e2".parse().unwrap(), - ), - ], - true, + make_sled_info( + Uuid::new_v4(), + Ipv6Subnet::::new( + "fd00:1122:3344:102::1".parse().unwrap(), + ), + 5, ), ]; let service_plan = diff --git a/sled-agent/src/server.rs b/sled-agent/src/server.rs index b93ad0721c..f702e4c67d 100644 --- a/sled-agent/src/server.rs +++ b/sled-agent/src/server.rs @@ -11,12 +11,10 @@ use crate::bootstrap::params::StartSledAgentRequest; use crate::long_running_tasks::LongRunningTaskHandles; use crate::nexus::NexusClientWithResolver; use crate::services::ServiceManager; -use crate::storage_monitor::UnderlayAccess; use internal_dns::resolver::Resolver; use slog::Logger; use std::net::SocketAddr; use std::sync::Arc; -use tokio::sync::oneshot; use uuid::Uuid; /// Packages up a [`SledAgent`], running the sled agent API under a Dropshot @@ -42,7 +40,6 @@ impl Server { request: StartSledAgentRequest, long_running_tasks_handles: LongRunningTaskHandles, services: ServiceManager, - underlay_available_tx: oneshot::Sender, ) -> Result { info!(log, "setting up sled agent server"); @@ -65,7 +62,6 @@ impl Server { request, services, long_running_tasks_handles, - underlay_available_tx, ) .await .map_err(|e| e.to_string())?; diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index e23cdf58b9..bfc0b91a71 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -94,6 +94,7 @@ use sled_hardware::underlay; use sled_hardware::SledMode; use sled_hardware_types::underlay::BOOTSTRAP_PREFIX; use sled_hardware_types::Baseboard; +use sled_storage::config::MountConfig; use sled_storage::dataset::{ DatasetKind, DatasetName, CONFIG_DATASET, INSTALL_DATASET, ZONE_DATASET, }; @@ -661,7 +662,7 @@ pub(crate) enum TimeSyncConfig { // Skips timesync unconditionally. Skip, // Fails timesync unconditionally. - #[cfg(test)] + #[cfg(all(test, target_os = "illumos"))] Fail, } @@ -734,12 +735,12 @@ impl ServiceManager { } } - #[cfg(test)] + #[cfg(all(test, target_os = "illumos"))] fn override_ledger_directory(&self, path: Utf8PathBuf) { self.inner.ledger_directory_override.set(path).unwrap(); } - #[cfg(test)] + #[cfg(all(test, target_os = "illumos"))] fn override_image_directory(&self, path: Utf8PathBuf) { self.inner.image_directory_override.set(path).unwrap(); } @@ -752,7 +753,7 @@ impl ServiceManager { if let Some(dir) = self.inner.ledger_directory_override.get() { return vec![dir.join(SERVICES_LEDGER_FILENAME)]; } - let resources = self.inner.storage.get_latest_resources().await; + let resources = self.inner.storage.get_latest_disks().await; resources .all_m2_mountpoints(CONFIG_DATASET) .into_iter() @@ -764,7 +765,7 @@ impl ServiceManager { if let Some(dir) = self.inner.ledger_directory_override.get() { return vec![dir.join(ZONES_LEDGER_FILENAME)]; } - let resources = self.inner.storage.get_latest_resources().await; + let resources = self.inner.storage.get_latest_disks().await; resources .all_m2_mountpoints(CONFIG_DATASET) .into_iter() @@ -1508,11 +1509,12 @@ impl ServiceManager { // If the boot disk exists, look for the image in the "install" dataset // there too. - if let Some((_, boot_zpool)) = - self.inner.storage.get_latest_resources().await.boot_disk() - { - zone_image_paths - .push(boot_zpool.dataset_mountpoint(INSTALL_DATASET)); + let all_disks = self.inner.storage.get_latest_disks().await; + if let Some((_, boot_zpool)) = all_disks.boot_disk() { + zone_image_paths.push(boot_zpool.dataset_mountpoint( + &all_disks.mount_config.root, + INSTALL_DATASET, + )); } let zone_type_str = match &request { @@ -2906,6 +2908,7 @@ impl ServiceManager { // storage configuration against the reality of the current sled. async fn start_omicron_zone( &self, + mount_config: &MountConfig, zone: &OmicronZoneConfig, time_is_synchronized: bool, all_u2_pools: &Vec, @@ -2924,7 +2927,11 @@ impl ServiceManager { // Ensure that this zone's storage is ready. let root = self - .validate_storage_and_pick_mountpoint(&zone, &all_u2_pools) + .validate_storage_and_pick_mountpoint( + mount_config, + &zone, + &all_u2_pools, + ) .await?; let config = OmicronZoneConfigLocal { zone: zone.clone(), root }; @@ -2953,6 +2960,7 @@ impl ServiceManager { // to start. async fn start_omicron_zones( &self, + mount_config: &MountConfig, requests: impl Iterator + Clone, time_is_synchronized: bool, all_u2_pools: &Vec, @@ -2969,6 +2977,7 @@ impl ServiceManager { let futures = requests.map(|zone| async move { self.start_omicron_zone( + mount_config, &zone, time_is_synchronized, all_u2_pools, @@ -3192,7 +3201,8 @@ impl ServiceManager { } // Collect information that's necessary to start new zones - let storage = self.inner.storage.get_latest_resources().await; + let storage = self.inner.storage.get_latest_disks().await; + let mount_config = &storage.mount_config; let all_u2_pools = storage.all_u2_zpools(); let time_is_synchronized = match self.timesync_get_locked(&existing_zones).await { @@ -3205,6 +3215,7 @@ impl ServiceManager { // Concurrently boot all new zones let StartZonesResult { new_zones, errors } = self .start_omicron_zones( + mount_config, zones_to_be_added, time_is_synchronized, &all_u2_pools, @@ -3305,6 +3316,7 @@ impl ServiceManager { // is valid. async fn validate_storage_and_pick_mountpoint( &self, + mount_config: &MountConfig, zone: &OmicronZoneConfig, all_u2_pools: &Vec, ) -> Result { @@ -3363,14 +3375,16 @@ impl ServiceManager { device: format!("zpool: {data_pool}"), }); } - data_pool.dataset_mountpoint(ZONE_DATASET) + data_pool.dataset_mountpoint(&mount_config.root, ZONE_DATASET) } else { // If the zone it not coupled to other datsets, we pick one // arbitrarily. let mut rng = rand::thread_rng(); all_u2_pools .choose(&mut rng) - .map(|pool| pool.dataset_mountpoint(ZONE_DATASET)) + .map(|pool| { + pool.dataset_mountpoint(&mount_config.root, ZONE_DATASET) + }) .ok_or_else(|| Error::U2NotFound)? .clone() }; @@ -3477,7 +3491,7 @@ impl ServiceManager { let skip_timesync = match &self.inner.time_sync_config { TimeSyncConfig::Normal => false, TimeSyncConfig::Skip => true, - #[cfg(test)] + #[cfg(all(test, target_os = "illumos"))] TimeSyncConfig::Fail => { info!(self.inner.log, "Configured to fail timesync checks"); return Err(Error::TimeNotSynchronized); @@ -4128,10 +4142,9 @@ impl ServiceManager { } } -#[cfg(test)] +#[cfg(all(test, target_os = "illumos"))] mod test { use super::*; - use illumos_utils::zpool::ZpoolName; use illumos_utils::{ dladm::{ Etherstub, MockDladm, BOOTSTRAP_ETHERSTUB_NAME, @@ -4140,9 +4153,8 @@ mod test { svc, zone::MockZones, }; - use sled_storage::disk::{RawDisk, SyntheticDisk}; - use sled_storage::manager::{FakeStorageManager, StorageHandle}; + use sled_storage::manager_test_harness::StorageManagerTestHarness; use std::net::{Ipv6Addr, SocketAddrV6}; use std::os::unix::process::ExitStatusExt; use uuid::Uuid; @@ -4366,18 +4378,21 @@ mod test { ) -> Result<(), Error> { let zone_prefix = format!("oxz_{}", zone_type.zone_type_str()); let _expectations = expect_new_service(&zone_prefix); - mgr.ensure_all_omicron_zones_persistent( - OmicronZonesConfig { - generation, - zones: vec![OmicronZoneConfig { - id, - underlay_address: Ipv6Addr::LOCALHOST, - zone_type, - }], - }, - Some(&tmp_dir), - ) - .await + let r = mgr + .ensure_all_omicron_zones_persistent( + OmicronZonesConfig { + generation, + zones: vec![OmicronZoneConfig { + id, + underlay_address: Ipv6Addr::LOCALHOST, + zone_type, + }], + }, + Some(&tmp_dir), + ) + .await; + illumos_utils::USE_MOCKS.store(false, Ordering::SeqCst); + r } // Prepare to call "ensure" for a service which already exists. We should @@ -4460,31 +4475,25 @@ mod test { } } - async fn setup_storage() -> StorageHandle { - let (manager, handle) = FakeStorageManager::new(); - - // Spawn the storage manager as done by sled-agent - tokio::spawn(async move { - manager.run().await; - }); - - let internal_zpool_name = ZpoolName::new_internal(Uuid::new_v4()); - let internal_disk: RawDisk = - SyntheticDisk::new(internal_zpool_name, 0).into(); - handle.upsert_disk(internal_disk).await; - let external_zpool_name = ZpoolName::new_external(Uuid::new_v4()); - let external_disk: RawDisk = - SyntheticDisk::new(external_zpool_name, 1).into(); - handle.upsert_disk(external_disk).await; - - handle + async fn setup_storage(log: &Logger) -> StorageManagerTestHarness { + let mut harness = StorageManagerTestHarness::new(&log).await; + let raw_disks = + harness.add_vdevs(&["u2_test.vdev", "m2_test.vdev"]).await; + harness.handle().key_manager_ready().await; + let config = harness.make_config(1, &raw_disks); + let result = harness + .handle() + .omicron_physical_disks_ensure(config.clone()) + .await + .expect("Failed to ensure disks"); + assert!(!result.has_error(), "{:?}", result); + harness } - #[derive(Clone)] struct LedgerTestHelper<'a> { log: slog::Logger, ddmd_client: DdmAdminClient, - storage_handle: StorageHandle, + storage_test_harness: StorageManagerTestHarness, zone_bundler: ZoneBundler, test_config: &'a TestConfig, } @@ -4495,41 +4504,45 @@ mod test { test_config: &'a TestConfig, ) -> LedgerTestHelper { let ddmd_client = DdmAdminClient::localhost(&log).unwrap(); - let storage_handle = setup_storage().await; + let storage_test_harness = setup_storage(&log).await; let zone_bundler = ZoneBundler::new( log.clone(), - storage_handle.clone(), + storage_test_harness.handle().clone(), Default::default(), ); LedgerTestHelper { log, ddmd_client, - storage_handle, + storage_test_harness, zone_bundler, test_config, } } - fn new_service_manager(self) -> ServiceManager { + async fn cleanup(&mut self) { + self.storage_test_harness.cleanup().await; + } + + fn new_service_manager(&self) -> ServiceManager { self.new_service_manager_with_timesync(TimeSyncConfig::Skip) } fn new_service_manager_with_timesync( - self, + &self, time_sync_config: TimeSyncConfig, ) -> ServiceManager { let log = &self.log; let mgr = ServiceManager::new( log, - self.ddmd_client, + self.ddmd_client.clone(), make_bootstrap_networking_config(), SledMode::Auto, time_sync_config, SidecarRevision::Physical("rev-test".to_string()), vec![], - self.storage_handle, - self.zone_bundler, + self.storage_test_harness.handle().clone(), + self.zone_bundler.clone(), ); self.test_config.override_paths(&mgr); mgr @@ -4563,7 +4576,7 @@ mod test { let logctx = omicron_test_utils::dev::test_setup_log("test_ensure_service"); let test_config = TestConfig::new().await; - let helper = + let mut helper = LedgerTestHelper::new(logctx.log.clone(), &test_config).await; let mgr = helper.new_service_manager(); LedgerTestHelper::sled_agent_started(&logctx.log, &test_config, &mgr); @@ -4592,6 +4605,7 @@ mod test { drop_service_manager(mgr); + helper.cleanup().await; logctx.cleanup_successful(); } @@ -4601,7 +4615,7 @@ mod test { "test_ensure_service_before_timesync", ); let test_config = TestConfig::new().await; - let helper = + let mut helper = LedgerTestHelper::new(logctx.log.clone(), &test_config).await; let mgr = @@ -4666,6 +4680,7 @@ mod test { .unwrap(); drop_service_manager(mgr); + helper.cleanup().await; logctx.cleanup_successful(); } @@ -4675,7 +4690,7 @@ mod test { "test_ensure_service_which_already_exists", ); let test_config = TestConfig::new().await; - let helper = + let mut helper = LedgerTestHelper::new(logctx.log.clone(), &test_config).await; let mgr = helper.new_service_manager(); LedgerTestHelper::sled_agent_started(&logctx.log, &test_config, &mgr); @@ -4694,6 +4709,7 @@ mod test { drop_service_manager(mgr); + helper.cleanup().await; logctx.cleanup_successful(); } @@ -4703,12 +4719,12 @@ mod test { "test_services_are_recreated_on_reboot", ); let test_config = TestConfig::new().await; - let helper = + let mut helper = LedgerTestHelper::new(logctx.log.clone(), &test_config).await; // First, spin up a ServiceManager, create a new zone, and then tear // down the ServiceManager. - let mgr = helper.clone().new_service_manager(); + let mgr = helper.new_service_manager(); LedgerTestHelper::sled_agent_started(&logctx.log, &test_config, &mgr); let v2 = Generation::new().next(); @@ -4727,6 +4743,7 @@ mod test { let _expectations = expect_new_service(EXPECTED_ZONE_NAME_PREFIX); let mgr = helper.new_service_manager(); LedgerTestHelper::sled_agent_started(&logctx.log, &test_config, &mgr); + illumos_utils::USE_MOCKS.store(false, Ordering::SeqCst); let found = mgr.omicron_zones_list().await.expect("failed to list zones"); @@ -4736,6 +4753,7 @@ mod test { drop_service_manager(mgr); + helper.cleanup().await; logctx.cleanup_successful(); } @@ -4745,12 +4763,12 @@ mod test { "test_services_do_not_persist_without_config", ); let test_config = TestConfig::new().await; - let helper = + let mut helper = LedgerTestHelper::new(logctx.log.clone(), &test_config).await; // First, spin up a ServiceManager, create a new zone, and then tear // down the ServiceManager. - let mgr = helper.clone().new_service_manager(); + let mgr = helper.new_service_manager(); LedgerTestHelper::sled_agent_started(&logctx.log, &test_config, &mgr); let v1 = Generation::new(); @@ -4783,6 +4801,7 @@ mod test { drop_service_manager(mgr); + helper.cleanup().await; logctx.cleanup_successful(); } @@ -4792,7 +4811,7 @@ mod test { let logctx = omicron_test_utils::dev::test_setup_log("test_bad_generations"); let test_config = TestConfig::new().await; - let helper = + let mut helper = LedgerTestHelper::new(logctx.log.clone(), &test_config).await; let mgr = helper.new_service_manager(); LedgerTestHelper::sled_agent_started(&logctx.log, &test_config, &mgr); @@ -4900,6 +4919,8 @@ mod test { drop_service_manager(mgr); + illumos_utils::USE_MOCKS.store(false, Ordering::SeqCst); + helper.cleanup().await; logctx.cleanup_successful(); } @@ -4921,9 +4942,9 @@ mod test { .expect("failed to copy example old-format services ledger into place"); // Now start the service manager. - let helper = + let mut helper = LedgerTestHelper::new(logctx.log.clone(), &test_config).await; - let mgr = helper.clone().new_service_manager(); + let mgr = helper.new_service_manager(); LedgerTestHelper::sled_agent_started(&logctx.log, &test_config, &mgr); // Trigger the migration code. (Yes, it's hokey that we create this @@ -4964,6 +4985,7 @@ mod test { assert_eq!(found, expected_config); drop_service_manager(mgr); + helper.cleanup().await; logctx.cleanup_successful(); } @@ -4973,7 +4995,7 @@ mod test { "test_old_ledger_migration_bad", ); let test_config = TestConfig::new().await; - let helper = + let mut helper = LedgerTestHelper::new(logctx.log.clone(), &test_config).await; // Before we start things, stuff a broken ledger into place. For this @@ -5001,6 +5023,7 @@ mod test { format!("{:#}", error) ); + helper.cleanup().await; logctx.cleanup_successful(); } diff --git a/sled-agent/src/sim/http_entrypoints.rs b/sled-agent/src/sim/http_entrypoints.rs index c3c92eb6fe..7d0d513a14 100644 --- a/sled-agent/src/sim/http_entrypoints.rs +++ b/sled-agent/src/sim/http_entrypoints.rs @@ -11,7 +11,7 @@ use crate::params::{ DiskEnsureBody, InstanceEnsureBody, InstanceExternalIpBody, InstancePutMigrationIdsBody, InstancePutStateBody, InstancePutStateResponse, InstanceUnregisterResponse, Inventory, - OmicronZonesConfig, VpcFirewallRulesEnsureBody, + OmicronPhysicalDisksConfig, OmicronZonesConfig, VpcFirewallRulesEnsureBody, }; use dropshot::endpoint; use dropshot::ApiDescription; @@ -31,6 +31,7 @@ use omicron_common::api::internal::shared::RackNetworkConfig; use omicron_common::api::internal::shared::SwitchPorts; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; +use sled_storage::resources::DisksManagementResult; use std::net::{Ipv4Addr, Ipv6Addr}; use std::sync::Arc; use uuid::Uuid; @@ -60,6 +61,8 @@ pub fn api() -> SledApiDescription { api.register(read_network_bootstore_config)?; api.register(write_network_bootstore_config)?; api.register(inventory)?; + api.register(omicron_physical_disks_get)?; + api.register(omicron_physical_disks_put)?; api.register(omicron_zones_get)?; api.register(omicron_zones_put)?; @@ -441,6 +444,31 @@ async fn inventory( )) } +#[endpoint { + method = PUT, + path = "/omicron-physical-disks", +}] +async fn omicron_physical_disks_put( + rqctx: RequestContext>, + body: TypedBody, +) -> Result, HttpError> { + let sa = rqctx.context(); + let body_args = body.into_inner(); + let result = sa.omicron_physical_disks_ensure(body_args).await?; + Ok(HttpResponseOk(result)) +} + +#[endpoint { + method = GET, + path = "/omicron-physical-disks", +}] +async fn omicron_physical_disks_get( + rqctx: RequestContext>, +) -> Result, HttpError> { + let sa = rqctx.context(); + Ok(HttpResponseOk(sa.omicron_physical_disks_list().await?)) +} + #[endpoint { method = GET, path = "/omicron-zones", diff --git a/sled-agent/src/sim/server.rs b/sled-agent/src/sim/server.rs index dc770d179d..3a0ab2484a 100644 --- a/sled-agent/src/sim/server.rs +++ b/sled-agent/src/sim/server.rs @@ -31,6 +31,7 @@ use omicron_common::api::external::Vni; use omicron_common::backoff::{ retry_notify, retry_policy_internal_service_aggressive, BackoffError, }; +use omicron_common::disk::DiskIdentity; use omicron_common::FileKv; use slog::{info, Drain, Logger}; use std::collections::BTreeMap; @@ -163,20 +164,24 @@ impl Server { // Crucible dataset for each. This emulates the setup we expect to have // on the physical rack. for zpool in &config.storage.zpools { + let physical_disk_id = Uuid::new_v4(); let zpool_id = Uuid::new_v4(); let vendor = "synthetic-vendor".to_string(); let serial = format!("synthetic-serial-{zpool_id}"); let model = "synthetic-model".to_string(); sled_agent .create_external_physical_disk( - vendor.clone(), - serial.clone(), - model.clone(), + physical_disk_id, + DiskIdentity { + vendor: vendor.clone(), + serial: serial.clone(), + model: model.clone(), + }, ) .await; sled_agent - .create_zpool(zpool_id, vendor, serial, model, zpool.size) + .create_zpool(zpool_id, physical_disk_id, zpool.size) .await; let dataset_id = Uuid::new_v4(); let address = @@ -470,12 +475,14 @@ pub async fn run_standalone_server( }; let mut datasets = vec![]; - for zpool_id in server.sled_agent.get_zpools().await { + let physical_disks = server.sled_agent.get_all_physical_disks().await; + let zpools = server.sled_agent.get_zpools().await; + for zpool in &zpools { for (dataset_id, address) in - server.sled_agent.get_datasets(zpool_id).await + server.sled_agent.get_datasets(zpool.id).await { datasets.push(NexusTypes::DatasetCreateRequest { - zpool_id, + zpool_id: zpool.id, dataset_id, request: NexusTypes::DatasetPutRequest { address: address.to_string(), @@ -490,10 +497,11 @@ pub async fn run_standalone_server( None => vec![], }; + let disks = server.sled_agent.omicron_physical_disks_list().await?; let services = zones.iter().map(|z| z.to_nexus_service_req(config.id)).collect(); let mut sled_configs = BTreeMap::new(); - sled_configs.insert(config.id, SledConfig { zones }); + sled_configs.insert(config.id, SledConfig { disks, zones }); let rack_init_request = NexusTypes::RackInitializationRequest { blueprint: build_initial_blueprint_from_sled_configs( @@ -501,6 +509,8 @@ pub async fn run_standalone_server( internal_dns_version, ), services, + physical_disks, + zpools, datasets, internal_services_ip_pool_ranges, certs, diff --git a/sled-agent/src/sim/sled_agent.rs b/sled-agent/src/sim/sled_agent.rs index 1edde622a1..455c2988d3 100644 --- a/sled-agent/src/sim/sled_agent.rs +++ b/sled-agent/src/sim/sled_agent.rs @@ -15,13 +15,13 @@ use crate::params::{ DiskStateRequested, InstanceExternalIpBody, InstanceHardware, InstanceMetadata, InstanceMigrationSourceParams, InstancePutStateResponse, InstanceStateRequested, InstanceUnregisterResponse, Inventory, - OmicronZonesConfig, SledRole, + OmicronPhysicalDisksConfig, OmicronZonesConfig, SledRole, }; use crate::sim::simulatable::Simulatable; use crate::updates::UpdateManager; use anyhow::bail; use anyhow::Context; -use dropshot::HttpServer; +use dropshot::{HttpError, HttpServer}; use futures::lock::Mutex; use illumos_utils::opte::params::{ DeleteVirtualNetworkInterfaceHost, SetVirtualNetworkInterfaceHost, @@ -35,10 +35,12 @@ use omicron_common::api::internal::nexus::{ use omicron_common::api::internal::nexus::{ InstanceRuntimeState, VmmRuntimeState, }; +use omicron_common::disk::DiskIdentity; use propolis_client::{ types::VolumeConstructionRequest, Client as PropolisClient, }; use propolis_mock_server::Context as PropolisContext; +use sled_storage::resources::DisksManagementResult; use slog::Logger; use std::collections::{HashMap, HashSet}; use std::net::{IpAddr, Ipv6Addr, SocketAddr}; @@ -156,7 +158,6 @@ impl SledAgent { )), storage: Mutex::new(Storage::new( id, - Arc::clone(&nexus_client), config.storage.ip, storage_log, )), @@ -521,19 +522,26 @@ impl SledAgent { /// Adds a Physical Disk to the simulated sled agent. pub async fn create_external_physical_disk( &self, - vendor: String, - serial: String, - model: String, + id: Uuid, + identity: DiskIdentity, ) { let variant = sled_hardware::DiskVariant::U2; self.storage .lock() .await - .insert_physical_disk(vendor, serial, model, variant) + .insert_physical_disk(id, identity, variant) .await; } - pub async fn get_zpools(&self) -> Vec { + pub async fn get_all_physical_disks( + &self, + ) -> Vec { + self.storage.lock().await.get_all_physical_disks() + } + + pub async fn get_zpools( + &self, + ) -> Vec { self.storage.lock().await.get_all_zpools() } @@ -548,15 +556,13 @@ impl SledAgent { pub async fn create_zpool( &self, id: Uuid, - vendor: String, - serial: String, - model: String, + physical_disk_id: Uuid, size: u64, ) { self.storage .lock() .await - .insert_zpool(id, vendor, serial, model, size) + .insert_zpool(id, physical_disk_id, size) .await; } @@ -780,9 +786,9 @@ impl SledAgent { .context("reservoir_size")?, disks: storage .physical_disks() - .iter() - .map(|(identity, info)| crate::params::InventoryDisk { - identity: identity.clone(), + .values() + .map(|info| crate::params::InventoryDisk { + identity: info.identity.clone(), variant: info.variant, slot: info.slot, }) @@ -800,6 +806,19 @@ impl SledAgent { }) } + pub async fn omicron_physical_disks_list( + &self, + ) -> Result { + self.storage.lock().await.omicron_physical_disks_list().await + } + + pub async fn omicron_physical_disks_ensure( + &self, + config: OmicronPhysicalDisksConfig, + ) -> Result { + self.storage.lock().await.omicron_physical_disks_ensure(config).await + } + pub async fn omicron_zones_list(&self) -> OmicronZonesConfig { self.fake_zones.lock().await.clone() } diff --git a/sled-agent/src/sim/storage.rs b/sled-agent/src/sim/storage.rs index 8fb362c5b7..13c3da4fd0 100644 --- a/sled-agent/src/sim/storage.rs +++ b/sled-agent/src/sim/storage.rs @@ -8,7 +8,7 @@ //! than the representation of "virtual disks" which would be presented //! through Nexus' external API. -use crate::nexus::NexusClient; +use crate::params::OmicronPhysicalDisksConfig; use crate::sim::http_entrypoints_pantry::ExpectedDigest; use crate::sim::SledAgent; use anyhow::{self, bail, Result}; @@ -19,12 +19,11 @@ use crucible_agent_client::types::{ use dropshot::HandlerTaskMode; use dropshot::HttpError; use futures::lock::Mutex; -use nexus_client::types::{ - ByteCount, PhysicalDiskKind, PhysicalDiskPutRequest, ZpoolPutRequest, -}; use omicron_common::disk::DiskIdentity; use propolis_client::types::VolumeConstructionRequest; use sled_hardware::DiskVariant; +use sled_storage::resources::DiskManagementStatus; +use sled_storage::resources::DisksManagementResult; use slog::Logger; use std::collections::HashMap; use std::collections::HashSet; @@ -474,18 +473,21 @@ impl CrucibleServer { } pub(crate) struct PhysicalDisk { + pub(crate) identity: DiskIdentity, pub(crate) variant: DiskVariant, pub(crate) slot: i64, } pub(crate) struct Zpool { - datasets: HashMap, + id: Uuid, + physical_disk_id: Uuid, total_size: u64, + datasets: HashMap, } impl Zpool { - fn new(total_size: u64) -> Self { - Zpool { datasets: HashMap::new(), total_size } + fn new(id: Uuid, physical_disk_id: Uuid, total_size: u64) -> Self { + Zpool { id, physical_disk_id, total_size, datasets: HashMap::new() } } fn insert_dataset( @@ -541,9 +543,9 @@ impl Zpool { /// Simulated representation of all storage on a sled. pub struct Storage { sled_id: Uuid, - nexus_client: Arc, log: Logger, - physical_disks: HashMap, + config: Option, + physical_disks: HashMap, next_disk_slot: i64, zpools: HashMap, crucible_ip: IpAddr, @@ -551,16 +553,11 @@ pub struct Storage { } impl Storage { - pub fn new( - sled_id: Uuid, - nexus_client: Arc, - crucible_ip: IpAddr, - log: Logger, - ) -> Self { + pub fn new(sled_id: Uuid, crucible_ip: IpAddr, log: Logger) -> Self { Self { sled_id, - nexus_client, log, + config: None, physical_disks: HashMap::new(), next_disk_slot: 0, zpools: HashMap::new(), @@ -570,68 +567,70 @@ impl Storage { } /// Returns an immutable reference to all (currently known) physical disks - pub fn physical_disks(&self) -> &HashMap { + pub fn physical_disks(&self) -> &HashMap { &self.physical_disks } + pub async fn omicron_physical_disks_list( + &mut self, + ) -> Result { + let Some(config) = self.config.as_ref() else { + return Err(HttpError::for_not_found( + None, + "No control plane disks".into(), + )); + }; + Ok(config.clone()) + } + + pub async fn omicron_physical_disks_ensure( + &mut self, + config: OmicronPhysicalDisksConfig, + ) -> Result { + if let Some(stored_config) = self.config.as_ref() { + if stored_config.generation < config.generation { + return Err(HttpError::for_client_error( + None, + http::StatusCode::BAD_REQUEST, + "Generation number too old".to_string(), + )); + } + } + self.config.replace(config.clone()); + + Ok(DisksManagementResult { + status: config + .disks + .into_iter() + .map(|config| DiskManagementStatus { + identity: config.identity, + err: None, + }) + .collect(), + }) + } + pub async fn insert_physical_disk( &mut self, - vendor: String, - serial: String, - model: String, + id: Uuid, + identity: DiskIdentity, variant: DiskVariant, ) { - let identifier = DiskIdentity { - vendor: vendor.clone(), - serial: serial.clone(), - model: model.clone(), - }; let slot = self.next_disk_slot; self.next_disk_slot += 1; - self.physical_disks.insert(identifier, PhysicalDisk { variant, slot }); - - let variant = match variant { - DiskVariant::U2 => PhysicalDiskKind::U2, - DiskVariant::M2 => PhysicalDiskKind::M2, - }; - - // Notify Nexus - let request = PhysicalDiskPutRequest { - vendor, - serial, - model, - variant, - sled_id: self.sled_id, - }; - self.nexus_client - .physical_disk_put(&request) - .await - .expect("Failed to notify Nexus about new Physical Disk"); + self.physical_disks + .insert(id, PhysicalDisk { identity, variant, slot }); } - /// Adds a Zpool to the sled's simulated storage and notifies Nexus. + /// Adds a Zpool to the sled's simulated storage. pub async fn insert_zpool( &mut self, zpool_id: Uuid, - disk_vendor: String, - disk_serial: String, - disk_model: String, + disk_id: Uuid, size: u64, ) { // Update our local data - self.zpools.insert(zpool_id, Zpool::new(size)); - - // Notify Nexus - let request = ZpoolPutRequest { - size: ByteCount(size), - disk_vendor, - disk_serial, - disk_model, - }; - self.nexus_client - .zpool_put(&self.sled_id, &zpool_id, &request) - .await - .expect("Failed to notify Nexus about new Zpool"); + self.zpools.insert(zpool_id, Zpool::new(zpool_id, disk_id, size)); } /// Returns an immutable reference to all zpools @@ -661,8 +660,42 @@ impl Storage { dataset.address() } - pub fn get_all_zpools(&self) -> Vec { - self.zpools.keys().cloned().collect() + pub fn get_all_physical_disks( + &self, + ) -> Vec { + self.physical_disks + .iter() + .map(|(id, disk)| { + let variant = match disk.variant { + DiskVariant::U2 => { + nexus_client::types::PhysicalDiskKind::U2 + } + DiskVariant::M2 => { + nexus_client::types::PhysicalDiskKind::M2 + } + }; + + nexus_client::types::PhysicalDiskPutRequest { + id: *id, + vendor: disk.identity.vendor.clone(), + serial: disk.identity.serial.clone(), + model: disk.identity.model.clone(), + variant, + sled_id: self.sled_id, + } + }) + .collect() + } + + pub fn get_all_zpools(&self) -> Vec { + self.zpools + .values() + .map(|pool| nexus_client::types::ZpoolPutRequest { + id: pool.id, + sled_id: self.sled_id, + physical_disk_id: pool.physical_disk_id, + }) + .collect() } pub fn get_all_datasets(&self, zpool_id: Uuid) -> Vec<(Uuid, SocketAddr)> { diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index cbda32bbe1..e42f708006 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -22,12 +22,11 @@ use crate::params::{ DiskStateRequested, InstanceExternalIpBody, InstanceHardware, InstanceMetadata, InstanceMigrationSourceParams, InstancePutStateResponse, InstanceStateRequested, InstanceUnregisterResponse, Inventory, - OmicronZonesConfig, SledRole, TimeSync, VpcFirewallRule, - ZoneBundleMetadata, Zpool, + OmicronPhysicalDisksConfig, OmicronZonesConfig, SledRole, TimeSync, + VpcFirewallRule, ZoneBundleMetadata, Zpool, }; use crate::probe_manager::ProbeManager; use crate::services::{self, ServiceManager}; -use crate::storage_monitor::UnderlayAccess; use crate::updates::{ConfigUpdates, UpdateManager}; use crate::vmm_reservoir::{ReservoirMode, VmmReservoirManager}; use crate::zone_bundle; @@ -70,11 +69,11 @@ use sled_hardware::{underlay, HardwareManager}; use sled_hardware_types::underlay::BootstrapInterface; use sled_hardware_types::Baseboard; use sled_storage::manager::StorageHandle; +use sled_storage::resources::DisksManagementResult; use slog::Logger; use std::collections::BTreeMap; use std::net::{Ipv6Addr, SocketAddr, SocketAddrV6}; use std::sync::Arc; -use tokio::sync::oneshot; use uuid::Uuid; use illumos_utils::running_zone::ZoneBuilderFactory; @@ -161,8 +160,9 @@ pub enum Error { impl From for omicron_common::api::external::Error { fn from(err: Error) -> Self { match err { - // Service errors can convert themselves into the external error + // Some errors can convert themselves into the external error Error::Services(err) => err.into(), + Error::Storage(err) => err.into(), _ => omicron_common::api::external::Error::InternalError { internal_message: err.to_string(), }, @@ -342,7 +342,6 @@ impl SledAgent { request: StartSledAgentRequest, services: ServiceManager, long_running_task_handles: LongRunningTaskHandles, - underlay_available_tx: oneshot::Sender, ) -> Result { // Pass the "parent_log" to all subcomponents that want to set their own // "component" value. @@ -357,7 +356,7 @@ impl SledAgent { let storage_manager = &long_running_task_handles.storage_manager; let boot_disk = storage_manager - .get_latest_resources() + .get_latest_disks() .await .boot_disk() .ok_or_else(|| Error::BootDiskNotFound)?; @@ -461,16 +460,6 @@ impl SledAgent { *sled_address.ip(), ); - // Inform the `StorageMonitor` that the underlay is available so that - // it can try to contact nexus. - underlay_available_tx - .send(UnderlayAccess { - nexus_client: nexus_client.clone(), - sled_id: request.body.id, - }) - .map_err(|_| ()) - .expect("Failed to send to StorageMonitor"); - // Configure the VMM reservoir as either a percentage of DRAM or as an // exact size in MiB. let reservoir_mode = ReservoirMode::from_config( @@ -802,6 +791,28 @@ impl SledAgent { self.inner.zone_bundler.cleanup().await.map_err(Error::from) } + /// Requests the set of physical disks currently managed by the Sled Agent. + /// + /// This should be contrasted by the set of disks in the inventory, which + /// may contain a slightly different set, if certain disks are not expected + /// to be in-use by the broader control plane. + pub async fn omicron_physical_disks_list( + &self, + ) -> Result { + Ok(self.storage().omicron_physical_disks_list().await?) + } + + /// Ensures that the specific set of Omicron Physical Disks are running + /// on this sled, and that no other disks are being used by the control + /// plane (with the exception of M.2s, which are always automatically + /// in-use). + pub async fn omicron_physical_disks_ensure( + &self, + config: OmicronPhysicalDisksConfig, + ) -> Result { + Ok(self.storage().omicron_physical_disks_ensure(config).await?) + } + /// List the Omicron zone configuration that's currently running pub async fn omicron_zones_list( &self, @@ -849,7 +860,7 @@ impl SledAgent { pub async fn zpools_get(&self) -> Vec { self.inner .storage - .get_latest_resources() + .get_latest_disks() .await .get_all_zpools() .into_iter() @@ -1105,17 +1116,33 @@ impl SledAgent { let mut disks = vec![]; let mut zpools = vec![]; - for (identity, (disk, pool)) in - self.storage().get_latest_resources().await.disks().iter() - { + let all_disks = self.storage().get_latest_disks().await; + for (identity, variant, slot) in all_disks.iter_all() { disks.push(crate::params::InventoryDisk { identity: identity.clone(), - variant: disk.variant(), - slot: disk.slot(), + variant, + slot, }); + } + for zpool in all_disks.all_u2_zpools() { + let info = + match illumos_utils::zpool::Zpool::get_info(&zpool.to_string()) + { + Ok(info) => info, + Err(err) => { + warn!( + self.log, + "Failed to access zpool info"; + "zpool" => %zpool, + "err" => %err + ); + continue; + } + }; + zpools.push(crate::params::InventoryZpool { - id: pool.name.id(), - total_size: ByteCount::try_from(pool.info.size())?, + id: zpool.id(), + total_size: ByteCount::try_from(info.size())?, }); } diff --git a/sled-agent/src/storage_monitor.rs b/sled-agent/src/storage_monitor.rs index 0c9b287396..8cb63e31f8 100644 --- a/sled-agent/src/storage_monitor.rs +++ b/sled-agent/src/storage_monitor.rs @@ -3,67 +3,19 @@ // file, You can obtain one at https://mozilla.org/MPL/2.0/. //! A task that listens for storage events from [`sled_storage::manager::StorageManager`] -//! and dispatches them to other parst of the bootstrap agent and sled agent +//! and dispatches them to other parts of the bootstrap agent and sled agent //! code. use crate::dump_setup::DumpSetup; -use crate::nexus::{ConvertInto, NexusClientWithResolver}; -use derive_more::From; -use futures::stream::FuturesOrdered; -use futures::FutureExt; -use futures::StreamExt; -use nexus_client::types::PhysicalDiskDeleteRequest; -use nexus_client::types::PhysicalDiskPutRequest; -use nexus_client::types::ZpoolPutRequest; -use omicron_common::api::external::ByteCount; -use omicron_common::backoff; -use omicron_common::disk::DiskIdentity; +use sled_storage::config::MountConfig; use sled_storage::manager::StorageHandle; -use sled_storage::pool::Pool; -use sled_storage::resources::StorageResources; +use sled_storage::resources::AllDisks; use slog::Logger; -use std::fmt::Debug; -use std::pin::Pin; -use tokio::sync::oneshot; -use uuid::Uuid; - -#[derive(From, Clone, Debug)] -enum NexusDiskRequest { - Put(PhysicalDiskPutRequest), - Delete(PhysicalDiskDeleteRequest), -} - -/// Describes the access to the underlay used by the StorageManager. -#[derive(Clone)] -pub struct UnderlayAccess { - pub nexus_client: NexusClientWithResolver, - pub sled_id: Uuid, -} - -impl Debug for UnderlayAccess { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - f.debug_struct("UnderlayAccess") - .field("sled_id", &self.sled_id) - .finish() - } -} pub struct StorageMonitor { log: Logger, storage_manager: StorageHandle, - // Receive a onetime notification that the underlay is available - underlay_available_rx: oneshot::Receiver, - - // A cached copy of the `StorageResources` from the last update - storage_resources: StorageResources, - - // Ability to access the underlay network - underlay: Option, - - // A queue for sending nexus notifications in order - nexus_notifications: FuturesOrdered, - // Invokes dumpadm(8) and savecore(8) when new disks are encountered dump_setup: DumpSetup, } @@ -71,24 +23,12 @@ pub struct StorageMonitor { impl StorageMonitor { pub fn new( log: &Logger, + mount_config: MountConfig, storage_manager: StorageHandle, - ) -> (StorageMonitor, oneshot::Sender) { - let (underlay_available_tx, underlay_available_rx) = oneshot::channel(); - let storage_resources = StorageResources::default(); - let dump_setup = DumpSetup::new(&log); + ) -> StorageMonitor { + let dump_setup = DumpSetup::new(&log, mount_config); let log = log.new(o!("component" => "StorageMonitor")); - ( - StorageMonitor { - log, - storage_manager, - underlay_available_rx, - storage_resources, - underlay: None, - nexus_notifications: FuturesOrdered::new(), - dump_setup, - }, - underlay_available_tx, - ) + StorageMonitor { log, storage_manager, dump_setup } } /// Run the main receive loop of the `StorageMonitor` @@ -97,277 +37,23 @@ impl StorageMonitor { pub async fn run(mut self) { loop { tokio::select! { - res = self.nexus_notifications.next(), - if !self.nexus_notifications.is_empty() => - { - match res { - Some(Ok(s)) => { - info!(self.log, "Nexus notification complete: {s}"); - } - e => error!(self.log, "Nexus notification error: {e:?}") - } - } - resources = self.storage_manager.wait_for_changes() => { + disks = self.storage_manager.wait_for_changes() => { info!( self.log, "Received storage manager update"; - "resources" => ?resources + "disks" => ?disks ); - self.handle_resource_update(resources).await; + self.handle_resource_update(disks).await; } - Ok(underlay) = &mut self.underlay_available_rx, - if self.underlay.is_none() => - { - let sled_id = underlay.sled_id; - info!( - self.log, - "Underlay Available"; "sled_id" => %sled_id - ); - self.underlay = Some(underlay); - self.notify_nexus_about_existing_resources(sled_id).await; - } - } - } - } - - /// When the underlay becomes available, we need to notify nexus about any - /// discovered disks and pools, since we don't attempt to notify until there - /// is an underlay available. - async fn notify_nexus_about_existing_resources(&mut self, sled_id: Uuid) { - let current = StorageResources::default(); - let updated = &self.storage_resources; - let nexus_updates = - compute_resource_diffs(&self.log, &sled_id, ¤t, updated); - for put in nexus_updates.disk_puts { - self.physical_disk_notify(put.into()).await; - } - for (pool, put) in nexus_updates.zpool_puts { - self.add_zpool_notify(pool, put).await; - } - } - - async fn handle_resource_update( - &mut self, - updated_resources: StorageResources, - ) { - // If the underlay isn't available, we only record the changes. Nexus - // isn't yet reachable to notify. - if self.underlay.is_some() { - let nexus_updates = compute_resource_diffs( - &self.log, - &self.underlay.as_ref().unwrap().sled_id, - &self.storage_resources, - &updated_resources, - ); - - for put in nexus_updates.disk_puts { - self.physical_disk_notify(put.into()).await; - } - for del in nexus_updates.disk_deletes { - self.physical_disk_notify(del.into()).await; - } - for (pool, put) in nexus_updates.zpool_puts { - self.add_zpool_notify(pool, put).await; } } - self.dump_setup.update_dumpdev_setup(updated_resources.disks()).await; - - // Save the updated `StorageResources` - self.storage_resources = updated_resources; - } - - // Adds a "notification to nexus" to `self.nexus_notifications`, informing it - // about the addition/removal of a physical disk to this sled. - async fn physical_disk_notify(&mut self, disk: NexusDiskRequest) { - let underlay = self.underlay.as_ref().unwrap().clone(); - let disk2 = disk.clone(); - let notify_nexus = move || { - let underlay = underlay.clone(); - let disk = disk.clone(); - async move { - let nexus_client = underlay.nexus_client.client().clone(); - - match &disk { - NexusDiskRequest::Put(request) => { - nexus_client - .physical_disk_put(&request) - .await - .map_err(|e| { - backoff::BackoffError::transient(e.to_string()) - })?; - } - NexusDiskRequest::Delete(request) => { - nexus_client - .physical_disk_delete(&request) - .await - .map_err(|e| { - backoff::BackoffError::transient(e.to_string()) - })?; - } - } - let msg = format!("{:?}", disk); - Ok(msg) - } - }; - - let log = self.log.clone(); - // This notification is often invoked before Nexus has started - // running, so avoid flagging any errors as concerning until some - // time has passed. - let log_post_failure = move |err, call_count, total_duration| { - if call_count == 0 { - info!(log, "failed to notify nexus about {disk2:?}"; - "err" => ?err - ); - } else if total_duration > std::time::Duration::from_secs(30) { - warn!(log, "failed to notify nexus about {disk2:?}"; - "err" => ?err, - "total duration" => ?total_duration); - } - }; - self.nexus_notifications.push_back( - backoff::retry_notify_ext( - backoff::retry_policy_internal_service_aggressive(), - notify_nexus, - log_post_failure, - ) - .boxed(), - ); } - // Adds a "notification to nexus" to `nexus_notifications`, - // informing it about the addition of `pool_id` to this sled. - async fn add_zpool_notify( - &mut self, - pool: Pool, - zpool_request: ZpoolPutRequest, - ) { - let pool_id = pool.name.id(); - let underlay = self.underlay.as_ref().unwrap().clone(); - - let notify_nexus = move || { - let underlay = underlay.clone(); - let zpool_request = zpool_request.clone(); - async move { - let sled_id = underlay.sled_id; - let nexus_client = underlay.nexus_client.client().clone(); - nexus_client - .zpool_put(&sled_id, &pool_id, &zpool_request) - .await - .map_err(|e| { - backoff::BackoffError::transient(e.to_string()) - })?; - let msg = format!("{:?}", zpool_request); - Ok(msg) - } - }; - - let log = self.log.clone(); - let name = pool.name.clone(); - let disk = pool.parent.clone(); - let log_post_failure = move |err, call_count, total_duration| { - if call_count == 0 { - info!(log, "failed to notify nexus about a new pool {name} on disk {disk:?}"; - "err" => ?err); - } else if total_duration > std::time::Duration::from_secs(30) { - warn!(log, "failed to notify nexus about a new pool {name} on disk {disk:?}"; - "err" => ?err, - "total duration" => ?total_duration); - } - }; - self.nexus_notifications.push_back( - backoff::retry_notify_ext( - backoff::retry_policy_internal_service_aggressive(), - notify_nexus, - log_post_failure, + async fn handle_resource_update(&mut self, updated_disks: AllDisks) { + self.dump_setup + .update_dumpdev_setup( + updated_disks.iter_managed().map(|(_id, disk)| disk), ) - .boxed(), - ); + .await; } } - -// The type of a future which is used to send a notification to Nexus. -type NotifyFut = - Pin> + Send>>; - -struct NexusUpdates { - disk_puts: Vec, - disk_deletes: Vec, - zpool_puts: Vec<(Pool, ZpoolPutRequest)>, -} - -fn compute_resource_diffs( - log: &Logger, - sled_id: &Uuid, - current: &StorageResources, - updated: &StorageResources, -) -> NexusUpdates { - let mut disk_puts = vec![]; - let mut disk_deletes = vec![]; - let mut zpool_puts = vec![]; - - let mut put_pool = |disk_id: &DiskIdentity, updated_pool: &Pool| { - match ByteCount::try_from(updated_pool.info.size()) { - Ok(size) => zpool_puts.push(( - updated_pool.clone(), - ZpoolPutRequest { - size: size.into(), - disk_model: disk_id.model.clone(), - disk_serial: disk_id.serial.clone(), - disk_vendor: disk_id.vendor.clone(), - }, - )), - Err(err) => { - error!( - log, - "Error parsing pool size"; - "name" => updated_pool.name.to_string(), - "err" => ?err); - } - } - }; - - // Diff the existing resources with the update to see what has changed - // This loop finds disks and pools that were modified or deleted - for (disk_id, (disk, pool)) in current.disks().iter() { - match updated.disks().get(disk_id) { - Some((updated_disk, updated_pool)) => { - if disk != updated_disk { - disk_puts.push(PhysicalDiskPutRequest { - sled_id: *sled_id, - model: disk_id.model.clone(), - serial: disk_id.serial.clone(), - vendor: disk_id.vendor.clone(), - variant: updated_disk.variant().convert(), - }); - } - if pool != updated_pool { - put_pool(disk_id, updated_pool); - } - } - None => disk_deletes.push(PhysicalDiskDeleteRequest { - model: disk_id.model.clone(), - serial: disk_id.serial.clone(), - vendor: disk_id.vendor.clone(), - sled_id: *sled_id, - }), - } - } - - // Diff the existing resources with the update to see what has changed - // This loop finds new disks and pools - for (disk_id, (updated_disk, updated_pool)) in updated.disks().iter() { - if !current.disks().contains_key(disk_id) { - disk_puts.push(PhysicalDiskPutRequest { - sled_id: *sled_id, - model: disk_id.model.clone(), - serial: disk_id.serial.clone(), - vendor: disk_id.vendor.clone(), - variant: updated_disk.variant().convert(), - }); - put_pool(disk_id, updated_pool); - } - } - - NexusUpdates { disk_puts, disk_deletes, zpool_puts } -} diff --git a/sled-agent/src/vmm_reservoir.rs b/sled-agent/src/vmm_reservoir.rs index b16286f5f5..caa1d88254 100644 --- a/sled-agent/src/vmm_reservoir.rs +++ b/sled-agent/src/vmm_reservoir.rs @@ -120,7 +120,8 @@ impl VmmReservoirManagerHandle { rx.await.map_err(|_| Error::ReplySenderDropped)? } - #[cfg(test)] + /// TODO: We should be able run to tests in VMs that can use the real VmmReservoir + #[cfg(all(test, target_os = "illumos"))] pub fn stub_for_test() -> Self { let (tx, _) = flume::bounded(1); let (size_updated_tx, _) = broadcast::channel(1); diff --git a/sled-agent/src/zone_bundle.rs b/sled-agent/src/zone_bundle.rs index 7b0d9b8071..57d3cb1049 100644 --- a/sled-agent/src/zone_bundle.rs +++ b/sled-agent/src/zone_bundle.rs @@ -255,7 +255,7 @@ impl Inner { // that can exist but do not, i.e., those whose parent datasets already // exist; and returns those. async fn bundle_directories(&self) -> Vec { - let resources = self.storage_handle.get_latest_resources().await; + let resources = self.storage_handle.get_latest_disks().await; let expected = resources.all_zone_bundle_directories(); let mut out = Vec::with_capacity(expected.len()); for each in expected.into_iter() { @@ -263,6 +263,7 @@ impl Inner { out.push(each); } } + out.sort(); out } } @@ -427,7 +428,7 @@ impl ZoneBundler { ) -> Result { let inner = self.inner.lock().await; let storage_dirs = inner.bundle_directories().await; - let resources = inner.storage_handle.get_latest_resources().await; + let resources = inner.storage_handle.get_latest_disks().await; let extra_log_dirs = resources .all_u2_mountpoints(U2_DEBUG_DATASET) .into_iter() @@ -2168,26 +2169,22 @@ mod illumos_tests { use super::StorageLimit; use super::Utf8Path; use super::Utf8PathBuf; - use super::Uuid; use super::ZoneBundleCause; use super::ZoneBundleId; use super::ZoneBundleInfo; use super::ZoneBundleMetadata; use super::ZoneBundler; - use super::ZFS; use anyhow::Context; use chrono::DateTime; use chrono::TimeZone; use chrono::Timelike; use chrono::Utc; - use illumos_utils::zpool::ZpoolName; use rand::RngCore; - use sled_storage::disk::RawDisk; - use sled_storage::disk::SyntheticDisk; - use sled_storage::manager::{FakeStorageManager, StorageHandle}; + use sled_storage::manager_test_harness::StorageManagerTestHarness; use slog::Drain; use slog::Logger; - use tokio::process::Command; + use std::sync::Arc; + use tokio::sync::Mutex; /// An iterator that returns the date of consecutive days beginning with 1st /// January 2020. The time portion of each returned date will be fixed at @@ -2239,77 +2236,58 @@ mod illumos_tests { assert!(zfs_quota(&path).await.is_err()); } - struct CleanupTestContext { + struct CleanupTestContextInner { resource_wrapper: ResourceWrapper, context: CleanupContext, bundler: ZoneBundler, } + // Practically, we only expect one thread to "own" this context at a time. + // However, with the "run_test_with_zfs_dataset", it's hard to pass an + // async function as a parameter ("test") that acts on a mutable reference + // without some fancy HRTB shenanigans. + // + // Reader: If you think you can pass a "&mut CleanupTestContextInner" + // there instead of an "Arc>", I welcome you to try! + #[derive(Clone)] + struct CleanupTestContext { + ctx: Arc>, + } + // A wrapper around `StorageResources`, that automatically creates dummy // directories in the provided test locations and removes them on drop. // - // I'd much prefer this to be done in $TEMPDIR. However, `StorageResources` - // is difficult to mock out or modify in such a way that the underlying - // dataset locations can be controlled. - // - // This creates completely BS disks, and fake names for the zpools on them. - // Those pools are _supposed_ to live at directories like: - // - // `/pool/int/` - // // They don't exist when you just do `StorageResources::new_for_test()`. // This type creates the datasets at the expected mountpoints, backed by the // ramdisk, and removes them on drop. This is basically a tempdir-like // system, that creates the directories implied by the `StorageResources` // expected disk structure. struct ResourceWrapper { - storage_handle: StorageHandle, + storage_test_harness: StorageManagerTestHarness, dirs: Vec, } - async fn setup_storage() -> StorageHandle { - let (manager, handle) = FakeStorageManager::new(); - - // Spawn the storage manager as done by sled-agent - tokio::spawn(async move { - manager.run().await; - }); + async fn setup_storage(log: &Logger) -> StorageManagerTestHarness { + let mut harness = StorageManagerTestHarness::new(&log).await; - // These must be internal zpools - for i in 0..2 { - let internal_zpool_name = ZpoolName::new_internal(Uuid::new_v4()); - let internal_disk: RawDisk = - SyntheticDisk::new(internal_zpool_name.clone(), i).into(); - handle.upsert_disk(internal_disk).await; - } - handle + harness.handle().key_manager_ready().await; + let _raw_disks = + harness.add_vdevs(&["m2_left.vdev", "m2_right.vdev"]).await; + harness } impl ResourceWrapper { - // Create new storage resources, and mount fake datasets at the required + // Create new storage resources, and mount datasets at the required // locations. - async fn new() -> Self { + async fn new(log: &Logger) -> Self { // Spawn the storage related tasks required for testing and insert // synthetic disks. - let storage_handle = setup_storage().await; - let resources = storage_handle.get_latest_resources().await; - let dirs = resources.all_zone_bundle_directories(); - for d in dirs.iter() { - let id = - d.components().nth(3).unwrap().as_str().parse().unwrap(); - create_test_dataset(&id, d).await.unwrap(); - } - Self { storage_handle, dirs } - } - } - - impl Drop for ResourceWrapper { - fn drop(&mut self) { - for d in self.dirs.iter() { - let id = - d.components().nth(3).unwrap().as_str().parse().unwrap(); - remove_test_dataset(&id).unwrap(); - } + let storage_test_harness = setup_storage(log).await; + let resources = + storage_test_harness.handle().get_latest_disks().await; + let mut dirs = resources.all_zone_bundle_directories(); + dirs.sort(); + Self { storage_test_harness, dirs } } } @@ -2325,25 +2303,34 @@ mod illumos_tests { async fn setup_fake_cleanup_task() -> anyhow::Result { let log = test_logger(); let context = CleanupContext::default(); - let resource_wrapper = ResourceWrapper::new().await; + let resource_wrapper = ResourceWrapper::new(&log).await; let bundler = ZoneBundler::new( log, - resource_wrapper.storage_handle.clone(), + resource_wrapper.storage_test_harness.handle().clone(), context, ); - Ok(CleanupTestContext { resource_wrapper, context, bundler }) + Ok(CleanupTestContext { + ctx: Arc::new(Mutex::new(CleanupTestContextInner { + resource_wrapper, + context, + bundler, + })), + }) } #[tokio::test] async fn test_context() { - let ctx = setup_fake_cleanup_task().await.unwrap(); + let context = setup_fake_cleanup_task().await.unwrap(); + let mut ctx = context.ctx.lock().await; let context = ctx.bundler.cleanup_context().await; assert_eq!(context, ctx.context, "received incorrect context"); + ctx.resource_wrapper.storage_test_harness.cleanup().await; } #[tokio::test] async fn test_update_context() { - let ctx = setup_fake_cleanup_task().await.unwrap(); + let context = setup_fake_cleanup_task().await.unwrap(); + let mut ctx = context.ctx.lock().await; let new_context = CleanupContext { period: CleanupPeriod::new(ctx.context.period.as_duration() / 2) .unwrap(), @@ -2363,6 +2350,7 @@ mod illumos_tests { .expect("failed to set context"); let context = ctx.bundler.cleanup_context().await; assert_eq!(context, new_context, "failed to update context"); + ctx.resource_wrapper.storage_test_harness.cleanup().await; } // Quota applied to test datasets. @@ -2374,59 +2362,7 @@ mod illumos_tests { // i.e., the "ashift" value. An empty dataset is unlikely to contain more // than one megabyte of overhead, so use that as a conservative test size to // avoid issues. - const TEST_QUOTA: u64 = 1024 * 1024; - - async fn create_test_dataset( - id: &Uuid, - mountpoint: &Utf8PathBuf, - ) -> anyhow::Result<()> { - let output = Command::new("/usr/bin/pfexec") - .arg(ZFS) - .arg("create") - .arg("-o") - .arg(format!("quota={TEST_QUOTA}")) - .arg("-o") - .arg(format!("mountpoint={mountpoint}")) - .arg(format!("rpool/{id}")) - .output() - .await - .context("failed to spawn zfs create operation")?; - anyhow::ensure!( - output.status.success(), - "zfs create operation failed: {}", - String::from_utf8_lossy(&output.stderr), - ); - - // Make the path operable by the test code. - let output = Command::new("/usr/bin/pfexec") - .arg("chmod") - .arg("a+rw") - .arg(&mountpoint) - .output() - .await - .context("failed to spawn chmod operation")?; - anyhow::ensure!( - output.status.success(), - "chmod-ing the dataset failed: {}", - String::from_utf8_lossy(&output.stderr), - ); - Ok(()) - } - - fn remove_test_dataset(id: &Uuid) -> anyhow::Result<()> { - let output = std::process::Command::new("/usr/bin/pfexec") - .arg(ZFS) - .arg("destroy") - .arg(format!("rpool/{id}")) - .output() - .context("failed to spawn zfs destroy operation")?; - anyhow::ensure!( - output.status.success(), - "zfs destroy operation failed: {}", - String::from_utf8_lossy(&output.stderr), - ); - Ok(()) - } + const TEST_QUOTA: usize = sled_storage::dataset::DEBUG_DATASET_QUOTA; async fn run_test_with_zfs_dataset(test: T) where @@ -2436,7 +2372,14 @@ mod illumos_tests { let context = setup_fake_cleanup_task() .await .expect("failed to create cleanup task"); - let result = test(context).await; + let result = test(context.clone()).await; + + let mut ctx = context.ctx.lock().await; + info!( + &ctx.bundler.log, + "Test completed, performing cleanup before emitting result" + ); + ctx.resource_wrapper.storage_test_harness.cleanup().await; result.expect("test failed!"); } @@ -2448,6 +2391,7 @@ mod illumos_tests { async fn test_utilization_body( ctx: CleanupTestContext, ) -> anyhow::Result<()> { + let ctx = ctx.ctx.lock().await; let utilization = ctx.bundler.utilization().await?; let paths = utilization.keys().cloned().collect::>(); @@ -2462,8 +2406,22 @@ mod illumos_tests { .values() .next() .context("no utilization information?")?; + + // If this needs to change, go modify the "add_vdevs" call in + // "setup_storage". + assert!( + TEST_QUOTA + < StorageManagerTestHarness::DEFAULT_VDEV_SIZE + .try_into() + .unwrap(), + "Quota larger than underlying device (quota: {}, device size: {})", + TEST_QUOTA, + StorageManagerTestHarness::DEFAULT_VDEV_SIZE, + ); + anyhow::ensure!( - bundle_utilization.dataset_quota == TEST_QUOTA, + bundle_utilization.dataset_quota + == u64::try_from(TEST_QUOTA).unwrap(), "computed incorrect dataset quota" ); @@ -2489,9 +2447,13 @@ mod illumos_tests { DaysOfOurBundles::new().next().unwrap(), ZoneBundleCause::ExplicitRequest, ) - .await?; + .await + .context("Failed to insert_fake_bundle")?; - let new_utilization = ctx.bundler.utilization().await?; + let new_utilization = + ctx.bundler.utilization().await.context( + "Failed to get utilization after inserting fake bundle", + )?; anyhow::ensure!( paths == new_utilization.keys().cloned().collect::>(), "paths should not change" @@ -2545,6 +2507,7 @@ mod illumos_tests { } async fn test_cleanup_body(ctx: CleanupTestContext) -> anyhow::Result<()> { + let ctx = ctx.ctx.lock().await; // Let's add a bunch of fake bundles, until we should be over the // storage limit. These will all be explicit requests, so the priority // should be decided based on time, i.e., the ones first added should be @@ -2560,16 +2523,18 @@ mod illumos_tests { let mut days = DaysOfOurBundles::new(); let mut info = Vec::new(); let mut utilization = ctx.bundler.utilization().await?; + let bundle_dir = &ctx.resource_wrapper.dirs[0]; loop { let us = utilization - .values() - .next() + .get(bundle_dir) .context("no utilization information")?; + if us.bytes_used > us.bytes_available { break; } + let it = insert_fake_bundle( - &ctx.resource_wrapper.dirs[0], + bundle_dir, days.next().unwrap(), ZoneBundleCause::ExplicitRequest, ) @@ -2582,15 +2547,8 @@ mod illumos_tests { let counts = ctx.bundler.cleanup().await.context("failed to run cleanup")?; - // We should have cleaned up items in the same paths that we have in the - // context. - anyhow::ensure!( - counts.keys().zip(ctx.resource_wrapper.dirs.iter()).all(|(a, b)| a == b), - "cleaned-up directories do not match the context's storage directories", - ); - // We should have cleaned up the first-inserted bundle. - let count = counts.values().next().context("no cleanup counts")?; + let count = counts.get(bundle_dir).context("no cleanup counts")?; anyhow::ensure!(count.bundles == 1, "expected to cleanup one bundle"); anyhow::ensure!( count.bytes == info[0].bytes, @@ -2621,6 +2579,7 @@ mod illumos_tests { async fn test_list_with_filter_body( ctx: CleanupTestContext, ) -> anyhow::Result<()> { + let ctx = ctx.ctx.lock().await; let mut days = DaysOfOurBundles::new(); let mut info = Vec::new(); const N_BUNDLES: usize = 3; diff --git a/sled-hardware/src/disk.rs b/sled-hardware/src/disk.rs index a649b205e1..adea1d182a 100644 --- a/sled-hardware/src/disk.rs +++ b/sled-hardware/src/disk.rs @@ -32,6 +32,14 @@ pub enum PooledDiskError { BadPartitionLayout { path: Utf8PathBuf, why: String }, #[error("Requested partition {partition:?} not found on device {path}")] NotFound { path: Utf8PathBuf, partition: Partition }, + #[error("Zpool UUID required to format this disk")] + MissingZpoolUuid, + #[error("Observed Zpool with unexpected UUID (saw: {observed}, expected: {expected})")] + UnexpectedUuid { expected: Uuid, observed: Uuid }, + #[error("Unexpected disk variant")] + UnexpectedVariant, + #[error("Zpool does not exist")] + ZpoolDoesNotExist, #[error(transparent)] ZpoolCreate(#[from] illumos_utils::zpool::CreateError), #[error("Cannot import zpool: {0}")] @@ -58,7 +66,7 @@ pub enum Partition { ZfsPool, } -#[derive(Debug, Clone, PartialEq, Eq, Hash)] +#[derive(Debug, Clone, PartialEq, Eq, Hash, Ord, PartialOrd)] pub struct DiskPaths { // Full path to the disk under "/devices". // Should NOT end with a ":partition_letter". @@ -69,7 +77,11 @@ pub struct DiskPaths { impl DiskPaths { // Returns the "illumos letter-indexed path" for a device. - fn partition_path(&self, index: usize, raw: bool) -> Option { + pub fn partition_path( + &self, + index: usize, + raw: bool, + ) -> Option { let index = u8::try_from(index).ok()?; let path = &self.devfs_path; @@ -125,7 +137,7 @@ impl DiskPaths { /// This exists as a distinct entity from `Disk` in `sled-storage` because it /// may be desirable to monitor for hardware in one context, and conform disks /// to partition layouts in a different context. -#[derive(Debug, Clone, PartialEq, Eq, Hash)] +#[derive(Debug, Clone, PartialEq, Eq, Hash, Ord, PartialOrd)] pub struct UnparsedDisk { paths: DiskPaths, slot: i64, @@ -135,7 +147,6 @@ pub struct UnparsedDisk { } impl UnparsedDisk { - #[allow(dead_code)] pub fn new( devfs_path: Utf8PathBuf, dev_path: Option, @@ -153,6 +164,10 @@ impl UnparsedDisk { } } + pub fn paths(&self) -> &DiskPaths { + &self.paths + } + pub fn devfs_path(&self) -> &Utf8PathBuf { &self.paths.devfs_path } @@ -168,6 +183,10 @@ impl UnparsedDisk { pub fn is_boot_disk(&self) -> bool { self.is_boot_disk } + + pub fn slot(&self) -> i64 { + self.slot + } } /// A physical disk that is partitioned to contain exactly one zpool @@ -197,14 +216,15 @@ impl PooledDisk { pub fn new( log: &Logger, unparsed_disk: UnparsedDisk, + zpool_id: Option, ) -> Result { let paths = &unparsed_disk.paths; let variant = unparsed_disk.variant; - let identity = unparsed_disk.identity(); + let identity = &unparsed_disk.identity; // Ensure the GPT has the right format. This does not necessarily // mean that the partitions are populated with the data we need. let partitions = - ensure_partition_layout(&log, &paths, variant, identity)?; + ensure_partition_layout(&log, &paths, variant, identity, zpool_id)?; // Find the path to the zpool which exists on this disk. // @@ -216,9 +236,10 @@ impl PooledDisk { false, )?; - let zpool_name = Self::ensure_zpool_exists(log, variant, &zpool_path)?; - Self::ensure_zpool_imported(log, &zpool_name)?; - Self::ensure_zpool_failmode_is_continue(log, &zpool_name)?; + let zpool_name = + ensure_zpool_exists(log, variant, &zpool_path, zpool_id)?; + ensure_zpool_imported(log, &zpool_name)?; + ensure_zpool_failmode_is_continue(log, &zpool_name)?; Ok(Self { paths: unparsed_disk.paths, @@ -230,83 +251,130 @@ impl PooledDisk { zpool_name, }) } +} - fn ensure_zpool_exists( - log: &Logger, - variant: DiskVariant, - zpool_path: &Utf8Path, - ) -> Result { - let zpool_name = match Fstyp::get_zpool(&zpool_path) { - Ok(zpool_name) => zpool_name, - Err(_) => { - // What happened here? - // - We saw that a GPT exists for this Disk (or we didn't, and - // made our own). - // - However, this particular partition does not appear to have - // a zpool. - // - // This can happen in situations where "zpool create" - // initialized a zpool, and "zpool destroy" removes the zpool - // but still leaves the partition table untouched. - // - // To remedy: Let's enforce that the partition exists. - info!( - log, - "GPT exists without Zpool: formatting zpool at {}", - zpool_path, - ); - // If a zpool does not already exist, create one. - let zpool_name = match variant { - DiskVariant::M2 => ZpoolName::new_internal(Uuid::new_v4()), - DiskVariant::U2 => ZpoolName::new_external(Uuid::new_v4()), - }; - Zpool::create(&zpool_name, &zpool_path)?; - zpool_name +/// Checks if the zpool exists, but makes no modifications, +/// and does not attempt to import the zpool. +pub fn check_if_zpool_exists( + zpool_path: &Utf8Path, +) -> Result { + let zpool_name = match Fstyp::get_zpool(&zpool_path) { + Ok(zpool_name) => zpool_name, + Err(_) => return Err(PooledDiskError::ZpoolDoesNotExist), + }; + Ok(zpool_name) +} + +pub fn ensure_zpool_exists( + log: &Logger, + variant: DiskVariant, + zpool_path: &Utf8Path, + zpool_id: Option, +) -> Result { + let zpool_name = match Fstyp::get_zpool(&zpool_path) { + Ok(zpool_name) => { + if let Some(expected) = zpool_id { + info!(log, "Checking that UUID in storage matches request"; "expected" => ?expected); + let observed = zpool_name.id(); + if expected != observed { + warn!(log, "Zpool UUID mismatch"; "expected" => ?expected, "observed" => ?observed); + return Err(PooledDiskError::UnexpectedUuid { + expected, + observed, + }); + } } - }; - Zpool::import(&zpool_name).map_err(|e| { - warn!(log, "Failed to import zpool {zpool_name}: {e}"); - PooledDiskError::ZpoolImport(e) - })?; + zpool_name + } + Err(_) => { + // What happened here? + // - We saw that a GPT exists for this Disk (or we didn't, and + // made our own). + // - However, this particular partition does not appear to have + // a zpool. + // + // This can happen in situations where "zpool create" + // initialized a zpool, and "zpool destroy" removes the zpool + // but still leaves the partition table untouched. + // + // To remedy: Let's enforce that the partition exists. + info!( + log, + "GPT exists without Zpool: formatting zpool at {}", zpool_path, + ); + let id = match zpool_id { + Some(id) => { + info!(log, "Formatting zpool with requested ID"; "id" => ?id); + id + } + None => { + let id = Uuid::new_v4(); + info!(log, "Formatting zpool with generated ID"; "id" => ?id); + id + } + }; + + // If a zpool does not already exist, create one. + let zpool_name = match variant { + DiskVariant::M2 => ZpoolName::new_internal(id), + DiskVariant::U2 => ZpoolName::new_external(id), + }; + Zpool::create(&zpool_name, &zpool_path)?; + zpool_name + } + }; + Zpool::import(&zpool_name).map_err(|e| { + warn!(log, "Failed to import zpool {zpool_name}: {e}"); + PooledDiskError::ZpoolImport(e) + })?; - Ok(zpool_name) - } + Ok(zpool_name) +} - fn ensure_zpool_imported( - log: &Logger, - zpool_name: &ZpoolName, - ) -> Result<(), PooledDiskError> { - Zpool::import(&zpool_name).map_err(|e| { - warn!(log, "Failed to import zpool {zpool_name}: {e}"); - PooledDiskError::ZpoolImport(e) - })?; - Ok(()) - } +pub fn ensure_zpool_imported( + log: &Logger, + zpool_name: &ZpoolName, +) -> Result<(), PooledDiskError> { + Zpool::import(&zpool_name).map_err(|e| { + warn!(log, "Failed to import zpool {zpool_name}: {e}"); + PooledDiskError::ZpoolImport(e) + })?; + Ok(()) +} - fn ensure_zpool_failmode_is_continue( - log: &Logger, - zpool_name: &ZpoolName, - ) -> Result<(), PooledDiskError> { - // Ensure failmode is set to `continue`. See - // https://github.com/oxidecomputer/omicron/issues/2766 for details. The - // short version is, each pool is only backed by one vdev. There is no - // recovery if one starts breaking, so if connectivity to one dies it's - // actively harmful to try to wait for it to come back; we'll be waiting - // forever and get stuck. We'd rather get the errors so we can deal with - // them ourselves. - Zpool::set_failmode_continue(&zpool_name).map_err(|e| { - warn!( - log, - "Failed to set failmode=continue on zpool {zpool_name}: {e}" - ); - PooledDiskError::ZpoolImport(e) - })?; - Ok(()) - } +pub fn ensure_zpool_failmode_is_continue( + log: &Logger, + zpool_name: &ZpoolName, +) -> Result<(), PooledDiskError> { + // Ensure failmode is set to `continue`. See + // https://github.com/oxidecomputer/omicron/issues/2766 for details. The + // short version is, each pool is only backed by one vdev. There is no + // recovery if one starts breaking, so if connectivity to one dies it's + // actively harmful to try to wait for it to come back; we'll be waiting + // forever and get stuck. We'd rather get the errors so we can deal with + // them ourselves. + Zpool::set_failmode_continue(&zpool_name).map_err(|e| { + warn!( + log, + "Failed to set failmode=continue on zpool {zpool_name}: {e}" + ); + PooledDiskError::ZpoolImport(e) + })?; + Ok(()) } #[derive( - Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize, JsonSchema, + Debug, + Clone, + Copy, + PartialEq, + Eq, + Hash, + Serialize, + Deserialize, + JsonSchema, + Ord, + PartialOrd, )] pub enum DiskVariant { U2, diff --git a/sled-hardware/src/illumos/partitions.rs b/sled-hardware/src/illumos/partitions.rs index 3b8e0af2ee..32debfc3e1 100644 --- a/sled-hardware/src/illumos/partitions.rs +++ b/sled-hardware/src/illumos/partitions.rs @@ -148,9 +148,10 @@ pub fn ensure_partition_layout( paths: &DiskPaths, variant: DiskVariant, identity: &DiskIdentity, + zpool_id: Option, ) -> Result, PooledDiskError> { internal_ensure_partition_layout::( - log, paths, variant, identity, + log, paths, variant, identity, zpool_id, ) } @@ -161,23 +162,26 @@ fn internal_ensure_partition_layout( paths: &DiskPaths, variant: DiskVariant, identity: &DiskIdentity, + zpool_id: Option, ) -> Result, PooledDiskError> { // Open the "Whole Disk" as a raw device to be parsed by the // libefi-illumos library. This lets us peek at the GPT before // making too many assumptions about it. let raw = true; let path = paths.whole_disk(raw); + let devfs_path_str = paths.devfs_path.as_str().to_string(); + let log = log.new(slog::o!("path" => devfs_path_str)); let gpt = match GPT::read(&path) { Ok(gpt) => { // This should be the common steady-state case - info!(log, "Disk at {} already has a GPT", paths.devfs_path); + info!(log, "Disk already has a GPT"); gpt } Err(libefi_illumos::Error::LabelNotFound) => { // Fresh U.2 disks are an example of devices where "we don't expect // a GPT to exist". - info!(log, "Disk at {} does not have a GPT", paths.devfs_path); + info!(log, "Disk does not have a GPT"); // For ZFS-implementation-specific reasons, Zpool create can only // act on devices under the "/dev" hierarchy, rather than the device @@ -193,12 +197,19 @@ fn internal_ensure_partition_layout( DiskVariant::U2 => { // First we need to check that this disk is of the proper // size and correct logical block address formatting. - ensure_size_and_formatting(log, identity)?; + ensure_size_and_formatting(&log, identity)?; + + info!( + log, + "Formatting zpool on disk"; + "uuid" => ?zpool_id, + ); + let Some(zpool_id) = zpool_id else { + return Err(PooledDiskError::MissingZpoolUuid); + }; - // If we were successful we can create a zpool on this disk. - info!(log, "Formatting zpool on disk {}", paths.devfs_path); // If a zpool does not already exist, create one. - let zpool_name = ZpoolName::new_external(Uuid::new_v4()); + let zpool_name = ZpoolName::new_external(zpool_id); Zpool::create(&zpool_name, dev_path)?; return Ok(vec![Partition::ZfsPool]); } @@ -385,6 +396,7 @@ mod test { &DiskPaths { devfs_path, dev_path: None }, DiskVariant::U2, &mock_disk_identity(), + None, ); match result { Err(PooledDiskError::CannotFormatMissingDevPath { .. }) => {} @@ -419,6 +431,7 @@ mod test { }, DiskVariant::U2, &mock_disk_identity(), + Some(Uuid::new_v4()), ) .expect("Should have succeeded partitioning disk"); @@ -444,6 +457,7 @@ mod test { }, DiskVariant::M2, &mock_disk_identity(), + None, ) .is_err()); @@ -482,6 +496,7 @@ mod test { }, DiskVariant::U2, &mock_disk_identity(), + None, ) .expect("Should be able to parse disk"); @@ -525,6 +540,7 @@ mod test { }, DiskVariant::M2, &mock_disk_identity(), + None, ) .expect("Should be able to parse disk"); @@ -565,6 +581,7 @@ mod test { }, DiskVariant::M2, &mock_disk_identity(), + None, ) .expect_err("Should have failed parsing empty GPT"), PooledDiskError::BadPartitionLayout { .. } @@ -591,6 +608,7 @@ mod test { }, DiskVariant::U2, &mock_disk_identity(), + None, ) .expect_err("Should have failed parsing empty GPT"), PooledDiskError::BadPartitionLayout { .. } diff --git a/sled-hardware/src/non_illumos/mod.rs b/sled-hardware/src/non_illumos/mod.rs index e990567b7c..a47bb0d2bc 100644 --- a/sled-hardware/src/non_illumos/mod.rs +++ b/sled-hardware/src/non_illumos/mod.rs @@ -68,6 +68,7 @@ pub fn ensure_partition_layout( _paths: &DiskPaths, _variant: DiskVariant, _identity: &DiskIdentity, + _zpool_id: Option, ) -> Result, PooledDiskError> { unimplemented!("Accessing hardware unsupported on non-illumos"); } diff --git a/sled-storage/Cargo.toml b/sled-storage/Cargo.toml index cb3a790631..839908effb 100644 --- a/sled-storage/Cargo.toml +++ b/sled-storage/Cargo.toml @@ -4,11 +4,15 @@ version = "0.1.0" edition = "2021" [dependencies] +anyhow.workspace = true async-trait.workspace = true camino.workspace = true +camino-tempfile.workspace = true cfg-if.workspace = true +debug-ignore.workspace = true derive_more.workspace = true glob.workspace = true +futures.workspace = true illumos-utils.workspace = true key-manager.workspace = true omicron-common.workspace = true @@ -24,9 +28,9 @@ uuid.workspace = true omicron-workspace-hack.workspace = true [dev-dependencies] -illumos-utils = { workspace = true, features = ["tmp_keypath", "testing"] } +expectorate.workspace = true +illumos-utils = { workspace = true, features = ["testing"] } omicron-test-utils.workspace = true -camino-tempfile.workspace = true [features] # Quotas and the like can be shrunk via this feature diff --git a/sled-storage/src/config.rs b/sled-storage/src/config.rs new file mode 100644 index 0000000000..a3baf220b2 --- /dev/null +++ b/sled-storage/src/config.rs @@ -0,0 +1,39 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Describes a handful of configuration options that can be +//! used to tweak behavior under test. + +use camino::Utf8PathBuf; + +/// Options to alter the mount path of datasets. +/// +/// By default, datasets within a pool are mounted under "/pool/ext/..." and +/// "/pool/int/...". For more context, see: +/// [illumos_utils::zpool::ZpoolName::dataset_mountpoint]. +/// +/// However, under test, it can be desirable to have a root filesystem +/// which is isolated from other tests, and which doesn't need to exist under +/// the root filesystem. [MountConfig] provides options to tweak which path is +/// used to set up and access these datasets. +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct MountConfig { + /// The root path under which datasets are located. + pub root: Utf8PathBuf, + + /// The path where synthetic disks are stored, + /// if their paths are not absolute. + pub synthetic_disk_root: Utf8PathBuf, +} + +impl Default for MountConfig { + fn default() -> Self { + Self { + root: Utf8PathBuf::from( + illumos_utils::zpool::ZPOOL_MOUNTPOINT_ROOT, + ), + synthetic_disk_root: Utf8PathBuf::from("/var/tmp"), + } + } +} diff --git a/sled-storage/src/dataset.rs b/sled-storage/src/dataset.rs index 41b77ea38b..06eea367b9 100644 --- a/sled-storage/src/dataset.rs +++ b/sled-storage/src/dataset.rs @@ -4,6 +4,7 @@ //! ZFS dataset related functionality +use crate::config::MountConfig; use crate::keyfile::KeyFile; use camino::Utf8PathBuf; use cfg_if::cfg_if; @@ -33,7 +34,7 @@ pub const M2_BACKING_DATASET: &'static str = "backing"; cfg_if! { if #[cfg(any(test, feature = "testing"))] { // Tuned for zone_bundle tests - pub const DEBUG_DATASET_QUOTA: usize = 100 * (1 << 10); + pub const DEBUG_DATASET_QUOTA: usize = 1 << 20; } else { // TODO-correctness: This value of 100GiB is a pretty wild guess, and should be // tuned as needed. @@ -279,10 +280,12 @@ pub enum DatasetError { /// `None` is for the M.2s touched by the Installinator. pub(crate) async fn ensure_zpool_has_datasets( log: &Logger, + mount_config: &MountConfig, zpool_name: &ZpoolName, disk_identity: &DiskIdentity, key_requester: Option<&StorageKeyRequester>, ) -> Result<(), DatasetError> { + info!(log, "Ensuring zpool has datasets"; "zpool" => ?zpool_name, "disk_identity" => ?disk_identity); let (root, datasets) = match zpool_name.kind().into() { DiskVariant::M2 => (None, M2_EXPECTED_DATASETS.iter()), DiskVariant::U2 => (Some(CRYPT_DATASET), U2_EXPECTED_DATASETS.iter()), @@ -297,8 +300,10 @@ pub(crate) async fn ensure_zpool_has_datasets( let Some(key_requester) = key_requester else { return Err(DatasetError::MissingStorageKeyRequester); }; - let mountpoint = zpool_name.dataset_mountpoint(dataset); - let keypath: Keypath = disk_identity.into(); + let mountpoint = + zpool_name.dataset_mountpoint(&mount_config.root, dataset); + let keypath: Keypath = + illumos_utils::zfs::Keypath::new(disk_identity, &mount_config.root); let epoch = if let Ok(epoch_str) = Zfs::get_oxide_value(dataset, "epoch") @@ -324,15 +329,15 @@ pub(crate) async fn ensure_zpool_has_datasets( // other reason, but the dataset actually existed, we will // try to create the dataset below and that will fail. So // there is no harm in just loading the latest secret here. - info!(log, "Loading latest secret"; "disk_id"=>#?disk_identity); + info!(log, "Loading latest secret"; "disk_id"=>?disk_identity); let epoch = key_requester.load_latest_secret().await?; - info!(log, "Loaded latest secret"; "epoch"=>%epoch, "disk_id"=>#?disk_identity); + info!(log, "Loaded latest secret"; "epoch"=>%epoch, "disk_id"=>?disk_identity); epoch }; - info!(log, "Retrieving key"; "epoch"=>%epoch, "disk_id"=>#?disk_identity); + info!(log, "Retrieving key"; "epoch"=>%epoch, "disk_id"=>?disk_identity); let key = key_requester.get_key(epoch, disk_identity.clone()).await?; - info!(log, "Got key"; "epoch"=>%epoch, "disk_id"=>#?disk_identity); + info!(log, "Got key"; "epoch"=>%epoch, "disk_id"=>?disk_identity); let mut keyfile = KeyFile::create(keypath.clone(), key.expose_secret(), log) @@ -366,7 +371,8 @@ pub(crate) async fn ensure_zpool_has_datasets( }; for dataset in datasets.into_iter() { - let mountpoint = zpool_name.dataset_mountpoint(dataset.name); + let mountpoint = + zpool_name.dataset_mountpoint(&mount_config.root, dataset.name); let name = &format!("{}/{}", zpool_name, dataset.name); // Use a value that's alive for the duration of this sled agent diff --git a/sled-storage/src/disk.rs b/sled-storage/src/disk.rs index 705b38718a..7383475cb9 100644 --- a/sled-storage/src/disk.rs +++ b/sled-storage/src/disk.rs @@ -4,19 +4,72 @@ //! Disk related types +use anyhow::bail; use camino::{Utf8Path, Utf8PathBuf}; use derive_more::From; -use illumos_utils::zpool::{Zpool, ZpoolKind, ZpoolName}; +use illumos_utils::zpool::{ZpoolKind, ZpoolName}; use key_manager::StorageKeyRequester; +use omicron_common::api::external::Generation; use omicron_common::disk::DiskIdentity; +use omicron_common::ledger::Ledgerable; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; use sled_hardware::{ DiskVariant, Partition, PooledDisk, PooledDiskError, UnparsedDisk, }; -use slog::Logger; -use std::fs::File; +use slog::{info, Logger}; +use uuid::Uuid; +use crate::config::MountConfig; use crate::dataset; +#[derive( + Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq, Eq, Hash, +)] +pub struct OmicronPhysicalDiskConfig { + pub identity: DiskIdentity, + pub id: Uuid, + pub pool_id: Uuid, +} + +#[derive( + Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq, Eq, Hash, +)] +pub struct OmicronPhysicalDisksConfig { + /// generation number of this configuration + /// + /// This generation number is owned by the control plane (i.e., RSS or + /// Nexus, depending on whether RSS-to-Nexus handoff has happened). It + /// should not be bumped within Sled Agent. + /// + /// Sled Agent rejects attempts to set the configuration to a generation + /// older than the one it's currently running. + pub generation: Generation, + + pub disks: Vec, +} + +impl Default for OmicronPhysicalDisksConfig { + fn default() -> Self { + Self { generation: Generation::new(), disks: vec![] } + } +} + +impl Ledgerable for OmicronPhysicalDisksConfig { + fn is_newer_than(&self, other: &OmicronPhysicalDisksConfig) -> bool { + self.generation > other.generation + } + + // No need to do this, the generation number is provided externally. + fn generation_bump(&mut self) {} +} + +impl OmicronPhysicalDisksConfig { + pub fn new() -> Self { + Self { generation: Generation::new(), disks: vec![] } + } +} + #[derive(Debug, thiserror::Error)] pub enum DiskError { #[error(transparent)] @@ -25,13 +78,11 @@ pub enum DiskError { PooledDisk(#[from] sled_hardware::PooledDiskError), } -// A synthetic disk that acts as one "found" by the hardware and that is backed -// by a zpool +/// A synthetic disk which has been formatted with a zpool. #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct SyntheticDisk { - pub identity: DiskIdentity, - pub zpool_name: ZpoolName, - pub slot: i64, + raw: RawSyntheticDisk, + zpool_name: ZpoolName, } // By adding slots at an "offset", this acts as a barrier against synthetic @@ -43,45 +94,111 @@ pub struct SyntheticDisk { const SYNTHETIC_SLOT_OFFSET: i64 = 1024; impl SyntheticDisk { - // Create a zpool and import it for the synthetic disk - // Zpools willl be set to the min size of 64Mib - pub fn create_zpool( - dir: &Utf8Path, - zpool_name: &ZpoolName, + // "Manages" a SyntheticDisk by ensuring that it has a Zpool and importing + // it. If the zpool already exists, it is imported, but not re-created. + pub fn new( + log: &Logger, + mount_config: &MountConfig, + raw: RawSyntheticDisk, + zpool_id: Option, + ) -> Self { + let path = if raw.path.is_absolute() { + raw.path.clone() + } else { + mount_config.synthetic_disk_root.join(&raw.path) + }; + + info!( + log, + "Invoking SyntheticDisk::new"; + "identity" => ?raw.identity, + "path" => %path, + ); + + let zpool_name = sled_hardware::disk::ensure_zpool_exists( + log, + raw.variant, + &path, + zpool_id, + ) + .unwrap(); + sled_hardware::disk::ensure_zpool_imported(log, &zpool_name).unwrap(); + sled_hardware::disk::ensure_zpool_failmode_is_continue( + log, + &zpool_name, + ) + .unwrap(); + + Self { raw, zpool_name } + } +} + +// A synthetic disk that acts as one "found" by the hardware and that is backed +// by a vdev. +#[derive(Debug, Clone, PartialEq, Eq, Hash, Ord, PartialOrd)] +pub struct RawSyntheticDisk { + pub path: Utf8PathBuf, + pub identity: DiskIdentity, + pub variant: DiskVariant, + pub slot: i64, +} + +impl RawSyntheticDisk { + /// Creates the file with a specified length, and also parses it as + /// a [RawSyntheticDisk]. + pub fn new_with_length>( + vdev: P, + length: u64, slot: i64, - ) -> SyntheticDisk { - // 64 MiB (min size of zpool) - const DISK_SIZE: u64 = 64 * 1024 * 1024; - let path = dir.join(zpool_name.to_string()); - let file = File::create(&path).unwrap(); - file.set_len(DISK_SIZE).unwrap(); - drop(file); - Zpool::create(zpool_name, &path).unwrap(); - Zpool::import(zpool_name).unwrap(); - Zpool::set_failmode_continue(zpool_name).unwrap(); - Self::new(zpool_name.clone(), slot) + ) -> Result { + let file = std::fs::File::create(vdev.as_ref())?; + file.set_len(length)?; + Self::load(vdev, slot) } - pub fn new(zpool_name: ZpoolName, slot: i64) -> SyntheticDisk { - let id = zpool_name.id(); + /// Treats a file at path `vdev` as a synthetic disk. The file + /// should already exist, and have the desired length. + pub fn load>( + vdev: P, + slot: i64, + ) -> Result { + let path = vdev.as_ref(); + let Some(file) = path.file_name() else { + bail!("Missing file name for synthetic disk"); + }; + + let Some(file) = file.strip_suffix(".vdev") else { + bail!("Missing '.vdev' suffix for synthetic disk"); + }; + + let (serial, variant) = if let Some(serial) = file.strip_prefix("m2_") { + (serial, DiskVariant::M2) + } else if let Some(serial) = file.strip_prefix("u2_") { + (serial, DiskVariant::U2) + } else { + bail!("Unknown file prefix: {file}. Try one of {{m2_,u2_}}"); + }; + let identity = DiskIdentity { vendor: "synthetic-vendor".to_string(), - serial: format!("synthetic-serial-{id}"), - model: "synthetic-model".to_string(), + serial: format!("synthetic-serial-{serial}"), + model: format!("synthetic-model-{variant:?}"), }; - SyntheticDisk { + + Ok(Self { + path: path.into(), identity, - zpool_name, + variant, slot: slot + SYNTHETIC_SLOT_OFFSET, - } + }) } } // An [`UnparsedDisk`] disk learned about from the hardware or a wrapped zpool -#[derive(Debug, Clone, PartialEq, Eq, Hash, From)] +#[derive(Debug, Clone, PartialEq, Eq, Hash, Ord, PartialOrd, From)] pub enum RawDisk { Real(UnparsedDisk), - Synthetic(SyntheticDisk), + Synthetic(RawSyntheticDisk), } impl RawDisk { @@ -90,7 +207,7 @@ impl RawDisk { Self::Real(disk) => disk.is_boot_disk(), Self::Synthetic(disk) => { // Just label any M.2 the boot disk. - disk.zpool_name.kind() == ZpoolKind::Internal + disk.variant == DiskVariant::M2 } } } @@ -105,18 +222,7 @@ impl RawDisk { pub fn variant(&self) -> DiskVariant { match self { Self::Real(disk) => disk.variant(), - Self::Synthetic(disk) => match disk.zpool_name.kind() { - ZpoolKind::External => DiskVariant::U2, - ZpoolKind::Internal => DiskVariant::M2, - }, - } - } - - #[cfg(test)] - pub fn zpool_name(&self) -> &ZpoolName { - match self { - Self::Real(_) => unreachable!(), - Self::Synthetic(disk) => &disk.zpool_name, + Self::Synthetic(disk) => disk.variant, } } @@ -131,12 +237,37 @@ impl RawDisk { !self.is_synthetic() } + pub fn u2_zpool_path(&self) -> Result { + if !matches!(self.variant(), DiskVariant::U2) { + return Err(PooledDiskError::UnexpectedVariant); + } + match self { + Self::Real(disk) => { + let paths = disk.paths(); + // This is hard-coded to be "0", but that's because we aren't + // really parsing the whole partition table before considering + // where this would be see. + paths + .partition_path(0, false) + .ok_or_else(|| PooledDiskError::ZpoolDoesNotExist) + } + Self::Synthetic(raw) => Ok(raw.path.clone()), + } + } + pub fn devfs_path(&self) -> &Utf8PathBuf { match self { Self::Real(disk) => disk.devfs_path(), Self::Synthetic(_) => unreachable!(), } } + + pub fn slot(&self) -> i64 { + match self { + Self::Real(disk) => disk.slot(), + Self::Synthetic(disk) => disk.slot, + } + } } /// A physical [`PooledDisk`] or a [`SyntheticDisk`] that contains or is backed @@ -151,15 +282,23 @@ pub enum Disk { impl Disk { pub async fn new( log: &Logger, + mount_config: &MountConfig, raw_disk: RawDisk, + pool_id: Option, key_requester: Option<&StorageKeyRequester>, ) -> Result { - let disk = match raw_disk { - RawDisk::Real(disk) => PooledDisk::new(log, disk)?.into(), - RawDisk::Synthetic(disk) => Disk::Synthetic(disk), + let disk: Disk = match raw_disk { + RawDisk::Real(disk) => PooledDisk::new(log, disk, pool_id)?.into(), + RawDisk::Synthetic(disk) => Disk::Synthetic(SyntheticDisk::new( + log, + mount_config, + disk, + pool_id, + )), }; dataset::ensure_zpool_has_datasets( log, + mount_config, disk.zpool_name(), disk.identity(), key_requester, @@ -194,7 +333,7 @@ impl Disk { Self::Real(disk) => disk.is_boot_disk, Self::Synthetic(disk) => { // Just label any M.2 the boot disk. - disk.zpool_name.kind() == ZpoolKind::Internal + disk.raw.variant == DiskVariant::M2 } } } @@ -202,7 +341,7 @@ impl Disk { pub fn identity(&self) -> &DiskIdentity { match self { Self::Real(disk) => &disk.identity, - Self::Synthetic(disk) => &disk.identity, + Self::Synthetic(disk) => &disk.raw.identity, } } @@ -261,7 +400,25 @@ impl Disk { pub fn slot(&self) -> i64 { match self { Self::Real(disk) => disk.slot, - Self::Synthetic(disk) => disk.slot, + Self::Synthetic(disk) => disk.raw.slot, + } + } +} + +impl From for RawDisk { + fn from(disk: Disk) -> RawDisk { + match disk { + Disk::Real(pooled_disk) => RawDisk::Real(UnparsedDisk::new( + pooled_disk.paths.devfs_path, + pooled_disk.paths.dev_path, + pooled_disk.slot, + pooled_disk.variant, + pooled_disk.identity, + pooled_disk.is_boot_disk, + )), + Disk::Synthetic(synthetic_disk) => { + RawDisk::Synthetic(synthetic_disk.raw) + } } } } diff --git a/sled-storage/src/error.rs b/sled-storage/src/error.rs index b9f97ee428..4c5582fd79 100644 --- a/sled-storage/src/error.rs +++ b/sled-storage/src/error.rs @@ -8,6 +8,7 @@ use crate::dataset::{DatasetError, DatasetName}; use crate::disk::DiskError; use camino::Utf8PathBuf; use omicron_common::api::external::ByteCountRangeError; +use omicron_common::api::external::Generation; use uuid::Uuid; #[derive(thiserror::Error, Debug)] @@ -49,9 +50,6 @@ pub enum Error { #[error(transparent)] ZoneInstall(#[from] illumos_utils::running_zone::InstallZoneError), - #[error("No U.2 Zpools found")] - NoU2Zpool, - #[error("Failed to parse UUID from {path}: {err}")] ParseUuid { path: Utf8PathBuf, @@ -76,6 +74,50 @@ pub enum Error { err: uuid::Error, }, + #[error("Not ready to manage U.2s (key manager is not ready)")] + KeyManagerNotReady, + + #[error("Physical disk configuration out-of-date (asked for {requested}, but latest is {current})")] + PhysicalDiskConfigurationOutdated { + requested: Generation, + current: Generation, + }, + + #[error("Failed to update ledger in internal storage")] + Ledger(#[from] omicron_common::ledger::Error), + + #[error("No ledger found on internal storage")] + LedgerNotFound, + #[error("Zpool Not Found: {0}")] ZpoolNotFound(String), } + +impl From for omicron_common::api::external::Error { + fn from(err: Error) -> Self { + use omicron_common::api::external::Error as ExternalError; + use omicron_common::api::external::LookupType; + use omicron_common::api::external::ResourceType; + + match err { + Error::LedgerNotFound => ExternalError::ObjectNotFound { + type_name: ResourceType::SledLedger, + lookup_type: LookupType::ByOther( + "Could not find record on M.2s".to_string(), + ), + }, + Error::ZpoolNotFound(name) => ExternalError::ObjectNotFound { + type_name: ResourceType::Zpool, + lookup_type: LookupType::ByName(name), + }, + Error::KeyManagerNotReady => ExternalError::ServiceUnavailable { + internal_message: + "Not ready to manage disks, try again after trust quorum" + .to_string(), + }, + _ => omicron_common::api::external::Error::InternalError { + internal_message: err.to_string(), + }, + } + } +} diff --git a/sled-storage/src/keyfile.rs b/sled-storage/src/keyfile.rs index 48e5d9a528..2c0524aec7 100644 --- a/sled-storage/src/keyfile.rs +++ b/sled-storage/src/keyfile.rs @@ -27,6 +27,7 @@ impl KeyFile { key: &[u8; 32], log: &Logger, ) -> std::io::Result { + info!(log, "About to create keyfile"; "path" => ?path); // We want to overwrite any existing contents. let mut file = tokio::fs::OpenOptions::new() .create(true) @@ -34,7 +35,7 @@ impl KeyFile { .open(&path.0) .await?; file.write_all(key).await?; - info!(log, "Created keyfile {}", path); + info!(log, "Created keyfile"; "path" => ?path); Ok(KeyFile { path, file, diff --git a/sled-storage/src/lib.rs b/sled-storage/src/lib.rs index d4b64c55a5..681f003b52 100644 --- a/sled-storage/src/lib.rs +++ b/sled-storage/src/lib.rs @@ -8,10 +8,13 @@ //! hardware partitions from the `sled-hardware` crate. It utilizes the //! `illumos-utils` crate to actually perform ZFS related OS calls. +pub mod config; pub mod dataset; pub mod disk; pub mod error; pub(crate) mod keyfile; pub mod manager; +#[cfg(any(feature = "testing", test))] +pub mod manager_test_harness; pub mod pool; pub mod resources; diff --git a/sled-storage/src/manager.rs b/sled-storage/src/manager.rs index bb749cc366..2cd79e6556 100644 --- a/sled-storage/src/manager.rs +++ b/sled-storage/src/manager.rs @@ -6,17 +6,24 @@ use std::collections::HashSet; -use crate::dataset::{DatasetError, DatasetName}; -use crate::disk::{Disk, DiskError, RawDisk}; +use crate::config::MountConfig; +use crate::dataset::{DatasetName, CONFIG_DATASET}; +use crate::disk::{ + OmicronPhysicalDiskConfig, OmicronPhysicalDisksConfig, RawDisk, +}; use crate::error::Error; -use crate::resources::{AddDiskResult, StorageResources}; +use crate::resources::{AllDisks, DisksManagementResult, StorageResources}; use camino::Utf8PathBuf; +use debug_ignore::DebugIgnore; +use futures::future::FutureExt; use illumos_utils::zfs::{Mountpoint, Zfs}; use illumos_utils::zpool::ZpoolName; use key_manager::StorageKeyRequester; use omicron_common::disk::DiskIdentity; +use omicron_common::ledger::Ledger; use sled_hardware::DiskVariant; -use slog::{error, info, o, warn, Logger}; +use slog::{info, o, warn, Logger}; +use std::future::Future; use tokio::sync::{mpsc, oneshot, watch}; use tokio::time::{interval, Duration, MissedTickBehavior}; use uuid::Uuid; @@ -48,80 +55,199 @@ use uuid::Uuid; // large messages. // // Here we start relatively small so that we can evaluate our choice over time. -const QUEUE_SIZE: usize = 256; +pub(crate) const QUEUE_SIZE: usize = 256; + +const SYNCHRONIZE_INTERVAL: Duration = Duration::from_secs(10); + +// The filename of the ledger storing physical disk info +const DISKS_LEDGER_FILENAME: &str = "omicron-physical-disks.json"; #[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum StorageManagerState { +enum StorageManagerState { + // We know that any attempts to manage disks will fail, as the key manager + // is not ready yet. WaitingForKeyManager, - QueueingDisks, - Normal, + + // This state is used to indicate that the set of "control plane" physical + // disks and the set of "observed" disks may be out-of-sync. + // + // This can happen when: + // - The sled boots, and the ledger of "control plane disks" is initially + // loaded. + // - A U.2 is added to the disk after initial boot. + // + // In both of these cases, if trust quorum hasn't been established, it's + // possible that the request to [Self::manage_disks] will need to retry. + SynchronizationNeeded, + + // This state indicates the key manager is ready, and the storage manager + // believes that the set of control plane disks is in-sync with the set of + // observed disks. + Synchronized, } #[derive(Debug)] -struct NewFilesystemRequest { +pub(crate) struct NewFilesystemRequest { dataset_id: Uuid, dataset_name: DatasetName, - responder: oneshot::Sender>, + responder: DebugIgnore>>, } #[derive(Debug)] -enum StorageRequest { - AddDisk(RawDisk), - RemoveDisk(RawDisk), - DisksChanged(HashSet), +pub(crate) enum StorageRequest { + // Requests to manage which devices the sled considers active. + // These are manipulated by hardware management. + DetectedRawDisk { + raw_disk: RawDisk, + tx: DebugIgnore>>, + }, + DetectedRawDiskRemoval { + raw_disk: RawDisk, + tx: DebugIgnore>>, + }, + DetectedRawDisksChanged { + raw_disks: HashSet, + tx: DebugIgnore>>, + }, + + // Requests to explicitly manage or stop managing a set of devices + OmicronPhysicalDisksEnsure { + config: OmicronPhysicalDisksConfig, + tx: DebugIgnore>>, + }, + + // Reads the last set of physical disks that were successfully ensured. + OmicronPhysicalDisksList { + tx: DebugIgnore< + oneshot::Sender>, + >, + }, + + // Requests the creation of a new dataset within a managed disk. NewFilesystem(NewFilesystemRequest), + KeyManagerReady, + /// This will always grab the latest state after any new updates, as it /// serializes through the `StorageManager` task after all prior requests. /// This serialization is particularly useful for tests. - GetLatestResources(oneshot::Sender), - - /// Get the internal task state of the manager - GetManagerState(oneshot::Sender), -} - -/// Data managed internally to the StorageManagerTask that can be useful -/// to clients for debugging purposes, and that isn't exposed in other ways. -#[derive(Debug, Clone)] -pub struct StorageManagerData { - pub state: StorageManagerState, - pub queued_u2_drives: HashSet, + GetLatestResources(DebugIgnore>), } /// A mechanism for interacting with the [`StorageManager`] #[derive(Clone)] pub struct StorageHandle { tx: mpsc::Sender, - resource_updates: watch::Receiver, + disk_updates: watch::Receiver, } impl StorageHandle { + pub(crate) fn new( + tx: mpsc::Sender, + disk_updates: watch::Receiver, + ) -> Self { + Self { tx, disk_updates } + } + /// Adds a disk and associated zpool to the storage manager. - pub async fn upsert_disk(&self, disk: RawDisk) { - self.tx.send(StorageRequest::AddDisk(disk)).await.unwrap(); + /// + /// Returns a future which completes once the notification has been + /// processed. Awaiting this future is optional. + pub async fn detected_raw_disk( + &self, + raw_disk: RawDisk, + ) -> impl Future> { + let (tx, rx) = oneshot::channel(); + self.tx + .send(StorageRequest::DetectedRawDisk { raw_disk, tx: tx.into() }) + .await + .unwrap(); + + rx.map(|result| result.unwrap()) } /// Removes a disk, if it's tracked by the storage manager, as well /// as any associated zpools. - pub async fn delete_disk(&self, disk: RawDisk) { - self.tx.send(StorageRequest::RemoveDisk(disk)).await.unwrap(); + /// + /// Returns a future which completes once the notification has been + /// processed. Awaiting this future is optional. + pub async fn detected_raw_disk_removal( + &self, + raw_disk: RawDisk, + ) -> impl Future> { + let (tx, rx) = oneshot::channel(); + self.tx + .send(StorageRequest::DetectedRawDiskRemoval { + raw_disk, + tx: tx.into(), + }) + .await + .unwrap(); + + rx.map(|result| result.unwrap()) } /// Ensures that the storage manager tracks exactly the provided disks. /// - /// This acts similar to a batch [Self::upsert_disk] for all new disks, and - /// [Self::delete_disk] for all removed disks. + /// This acts similar to a batch [Self::detected_raw_disk] for all new disks, and + /// [Self::detected_raw_disk_removal] for all removed disks. /// /// If errors occur, an arbitrary "one" of them will be returned, but a /// best-effort attempt to add all disks will still be attempted. - pub async fn ensure_using_exactly_these_disks(&self, raw_disks: I) + /// + /// Returns a future which completes once the notification has been + /// processed. Awaiting this future is optional. + pub async fn ensure_using_exactly_these_disks( + &self, + raw_disks: I, + ) -> impl Future> where I: IntoIterator, { + let (tx, rx) = oneshot::channel(); self.tx - .send(StorageRequest::DisksChanged(raw_disks.into_iter().collect())) + .send(StorageRequest::DetectedRawDisksChanged { + raw_disks: raw_disks.into_iter().collect(), + tx: tx.into(), + }) .await .unwrap(); + rx.map(|result| result.unwrap()) + } + + pub async fn omicron_physical_disks_ensure( + &self, + config: OmicronPhysicalDisksConfig, + ) -> Result { + let (tx, rx) = oneshot::channel(); + self.tx + .send(StorageRequest::OmicronPhysicalDisksEnsure { + config, + tx: tx.into(), + }) + .await + .unwrap(); + + rx.await.unwrap() + } + + /// Reads the last value written to storage by + /// [Self::omicron_physical_disks_ensure]. + /// + /// This should be contrasted with both inventory and the result + /// of [Self::get_latest_disks] -- since this function focuses on + /// "Control Plane disks", it may return information about disks + /// that are no longer detected within the hardware of this sled. + pub async fn omicron_physical_disks_list( + &self, + ) -> Result { + let (tx, rx) = oneshot::channel(); + self.tx + .send(StorageRequest::OmicronPhysicalDisksList { tx: tx.into() }) + .await + .unwrap(); + + rx.await.unwrap() } /// Notify the [`StorageManager`] that the [`key_manager::KeyManager`] @@ -139,36 +265,35 @@ impl StorageHandle { /// Wait for a boot disk to be initialized pub async fn wait_for_boot_disk(&mut self) -> (DiskIdentity, ZpoolName) { + // We create a distinct receiver to avoid colliding with + // the receiver used by [Self::wait_for_changes]. + let mut receiver = self.disk_updates.clone(); loop { - let resources = self.resource_updates.borrow_and_update(); + let resources = receiver.borrow_and_update(); if let Some((disk_id, zpool_name)) = resources.boot_disk() { return (disk_id, zpool_name); } drop(resources); // We panic if the sender is dropped, as this means // the StorageManager has gone away, which it should not do. - self.resource_updates.changed().await.unwrap(); + receiver.changed().await.unwrap(); } } /// Wait for any storage resource changes - pub async fn wait_for_changes(&mut self) -> StorageResources { - self.resource_updates.changed().await.unwrap(); - self.resource_updates.borrow_and_update().clone() + pub async fn wait_for_changes(&mut self) -> AllDisks { + self.disk_updates.changed().await.unwrap(); + self.disk_updates.borrow_and_update().clone() } - /// Retrieve the latest value of `StorageResources` from the + /// Retrieve the latest value of `AllDisks` from the /// `StorageManager` task. - pub async fn get_latest_resources(&self) -> StorageResources { - let (tx, rx) = oneshot::channel(); - self.tx.send(StorageRequest::GetLatestResources(tx)).await.unwrap(); - rx.await.unwrap() - } - - /// Return internal data useful for debugging and testing - pub async fn get_manager_state(&self) -> StorageManagerData { + pub async fn get_latest_disks(&self) -> AllDisks { let (tx, rx) = oneshot::channel(); - self.tx.send(StorageRequest::GetManagerState(tx)).await.unwrap(); + self.tx + .send(StorageRequest::GetLatestResources(tx.into())) + .await + .unwrap(); rx.await.unwrap() } @@ -178,112 +303,42 @@ impl StorageHandle { dataset_name: DatasetName, ) -> Result<(), Error> { let (tx, rx) = oneshot::channel(); - let request = - NewFilesystemRequest { dataset_id, dataset_name, responder: tx }; + let request = NewFilesystemRequest { + dataset_id, + dataset_name, + responder: tx.into(), + }; self.tx.send(StorageRequest::NewFilesystem(request)).await.unwrap(); rx.await.unwrap() } } - -// Some sled-agent tests cannot currently use the real StorageManager -// and want to fake the entire behavior, but still have access to the -// `StorageResources`. We allow this via use of the `FakeStorageManager` -// that will respond to real storage requests from a real `StorageHandle`. -#[cfg(feature = "testing")] -pub struct FakeStorageManager { - rx: mpsc::Receiver, - resources: StorageResources, - resource_updates: watch::Sender, -} - -#[cfg(feature = "testing")] -impl FakeStorageManager { - pub fn new() -> (Self, StorageHandle) { - let (tx, rx) = mpsc::channel(QUEUE_SIZE); - let resources = StorageResources::default(); - let (update_tx, update_rx) = watch::channel(resources.clone()); - ( - Self { rx, resources, resource_updates: update_tx }, - StorageHandle { tx, resource_updates: update_rx }, - ) - } - - /// Run the main receive loop of the `FakeStorageManager` - /// - /// This should be spawned into a tokio task - pub async fn run(mut self) { - loop { - match self.rx.recv().await { - Some(StorageRequest::AddDisk(raw_disk)) => { - if self.add_disk(raw_disk).disk_inserted() { - self.resource_updates - .send_replace(self.resources.clone()); - } - } - Some(StorageRequest::GetLatestResources(tx)) => { - let _ = tx.send(self.resources.clone()); - } - Some(_) => { - unreachable!(); - } - None => break, - } - } - } - - // Add a disk to `StorageResources` if it is new and return true if so - fn add_disk(&mut self, raw_disk: RawDisk) -> AddDiskResult { - let disk = match raw_disk { - RawDisk::Real(_) => { - panic!( - "Only synthetic disks can be used with `FakeStorageManager`" - ); - } - RawDisk::Synthetic(synthetic_disk) => { - Disk::Synthetic(synthetic_disk) - } - }; - self.resources.insert_fake_disk(disk) - } -} - /// The storage manager responsible for the state of the storage /// on a sled. The storage manager runs in its own task and is interacted /// with via the [`StorageHandle`]. pub struct StorageManager { log: Logger, state: StorageManagerState, - // Used to find the capacity of the channel for tracking purposes - tx: mpsc::Sender, rx: mpsc::Receiver, resources: StorageResources, - queued_u2_drives: HashSet, - key_requester: StorageKeyRequester, - resource_updates: watch::Sender, - last_logged_capacity: usize, } impl StorageManager { pub fn new( log: &Logger, + mount_config: MountConfig, key_requester: StorageKeyRequester, ) -> (StorageManager, StorageHandle) { let (tx, rx) = mpsc::channel(QUEUE_SIZE); - let resources = StorageResources::default(); - let (update_tx, update_rx) = watch::channel(resources.clone()); + let resources = StorageResources::new(log, mount_config, key_requester); + let disk_updates = resources.watch_disks(); ( StorageManager { log: log.new(o!("component" => "StorageManager")), state: StorageManagerState::WaitingForKeyManager, - tx: tx.clone(), rx, resources, - queued_u2_drives: HashSet::new(), - key_requester, - resource_updates: update_tx, - last_logged_capacity: QUEUE_SIZE, }, - StorageHandle { tx, resource_updates: update_rx }, + StorageHandle::new(tx, disk_updates), ) } @@ -291,22 +346,29 @@ impl StorageManager { /// /// This should be spawned into a tokio task pub async fn run(mut self) { + let mut interval = interval(SYNCHRONIZE_INTERVAL); + interval.set_missed_tick_behavior(MissedTickBehavior::Delay); + tokio::pin!(interval); + loop { - const QUEUED_DISK_RETRY_TIMEOUT: Duration = Duration::from_secs(10); - let mut interval = interval(QUEUED_DISK_RETRY_TIMEOUT); - interval.set_missed_tick_behavior(MissedTickBehavior::Delay); tokio::select! { - res = self.step() => { - if let Err(e) = res { + Some(req) = self.rx.recv() => { + // It's critical that we don't "step" directly in the select + // branch, as that could cancel an ongoing request if it + // fires while a request is being processed. + // + // Instead, if we receive any request, we stop + // "select!"-ing and fully process the request before + // continuing. + if let Err(e) = self.step(req).await { warn!(self.log, "{e}"); } } _ = interval.tick(), - if self.state == StorageManagerState::QueueingDisks => + if self.state == StorageManagerState::SynchronizationNeeded => { - if self.add_queued_disks().await { - let _ = self.resource_updates.send_replace(self.resources.clone()); - } + info!(self.log, "automatically managing disks"); + self.manage_disks().await; } } } @@ -315,191 +377,387 @@ impl StorageManager { /// Process the next event /// /// This is useful for testing/debugging - pub async fn step(&mut self) -> Result<(), Error> { - const CAPACITY_LOG_THRESHOLD: usize = 10; - // We check the capacity and log it every time it changes by at least 10 - // entries in either direction. - let current = self.tx.capacity(); - if self.last_logged_capacity.saturating_sub(current) - >= CAPACITY_LOG_THRESHOLD - { - info!( - self.log, - "Channel capacity decreased"; - "previous" => ?self.last_logged_capacity, - "current" => ?current - ); - self.last_logged_capacity = current; - } else if current.saturating_sub(self.last_logged_capacity) - >= CAPACITY_LOG_THRESHOLD - { - info!( - self.log, - "Channel capacity increased"; - "previous" => ?self.last_logged_capacity, - "current" => ?current - ); - self.last_logged_capacity = current; - } - // The sending side never disappears because we hold a copy - let req = self.rx.recv().await.unwrap(); + async fn step(&mut self, req: StorageRequest) -> Result<(), Error> { info!(self.log, "Received {:?}", req); - let should_send_updates = match req { - StorageRequest::AddDisk(raw_disk) => { - self.add_disk(raw_disk).await?.disk_inserted() + + match req { + StorageRequest::DetectedRawDisk { raw_disk, tx } => { + let result = self.detected_raw_disk(raw_disk).await; + if let Err(ref err) = &result { + warn!(self.log, "Failed to add raw disk"; "err" => ?err); + } + let _ = tx.0.send(result); + } + StorageRequest::DetectedRawDiskRemoval { raw_disk, tx } => { + self.detected_raw_disk_removal(raw_disk); + let _ = tx.0.send(Ok(())); + } + StorageRequest::DetectedRawDisksChanged { raw_disks, tx } => { + self.ensure_using_exactly_these_disks(raw_disks).await; + let _ = tx.0.send(Ok(())); + } + StorageRequest::OmicronPhysicalDisksEnsure { config, tx } => { + let _ = + tx.0.send(self.omicron_physical_disks_ensure(config).await); } - StorageRequest::RemoveDisk(raw_disk) => self.remove_disk(raw_disk), - StorageRequest::DisksChanged(raw_disks) => { - self.ensure_using_exactly_these_disks(raw_disks).await + StorageRequest::OmicronPhysicalDisksList { tx } => { + let _ = tx.0.send(self.omicron_physical_disks_list().await); } StorageRequest::NewFilesystem(request) => { let result = self.add_dataset(&request).await; - if result.is_err() { - warn!(self.log, "{result:?}"); + if let Err(ref err) = &result { + warn!(self.log, "Failed to add dataset"; "err" => ?err); } - let _ = request.responder.send(result); - false + let _ = request.responder.0.send(result); } StorageRequest::KeyManagerReady => { - self.state = StorageManagerState::Normal; - self.add_queued_disks().await + self.key_manager_ready().await?; } StorageRequest::GetLatestResources(tx) => { - let _ = tx.send(self.resources.clone()); - false - } - StorageRequest::GetManagerState(tx) => { - let _ = tx.send(StorageManagerData { - state: self.state, - queued_u2_drives: self.queued_u2_drives.clone(), - }); - false + let _ = tx.0.send(self.resources.disks().clone()); } }; - if should_send_updates { - let _ = self.resource_updates.send_replace(self.resources.clone()); - } - Ok(()) } - // Loop through all queued disks inserting them into [`StorageResources`] - // unless we hit a transient error. If we hit a transient error, we return - // and wait for the next retry window to re-call this method. If we hit a - // permanent error we log it, but we continue inserting queued disks. - // - // Return true if updates should be sent to watchers, false otherwise - async fn add_queued_disks(&mut self) -> bool { + async fn manage_disks(&mut self) { + let result = self.resources.synchronize_disk_management().await; + + if result.has_retryable_error() { + // This is logged as "info", not "warn", as it can happen before + // trust quorum has been established. + info!( + self.log, + "Failed to synchronize disks, but will retry"; + "result" => ?result, + ); + return; + } + + self.state = StorageManagerState::Synchronized; + + if result.has_error() { + warn!( + self.log, + "Failed to synchronize disks due to permanant error"; + "result" => #?result, + ); + return; + } + info!( self.log, - "Attempting to add queued disks"; - "num_disks" => %self.queued_u2_drives.len() + "Successfully synchronized disks without error"; + "result" => ?result, ); - self.state = StorageManagerState::Normal; - - let mut send_updates = false; - - // Disks that should be requeued. - let queued = self.queued_u2_drives.clone(); - let mut to_dequeue = HashSet::new(); - for disk in queued.iter() { - if self.state == StorageManagerState::QueueingDisks { - // We hit a transient error in a prior iteration. - break; - } else { - match self.add_u2_disk(disk.clone()).await { - Err(_) => { - // This is an unrecoverable error, so we don't queue the - // disk again. - to_dequeue.insert(disk); - } - Ok(AddDiskResult::DiskInserted) => { - send_updates = true; - to_dequeue.insert(disk); - } - Ok(AddDiskResult::DiskAlreadyInserted) => { - to_dequeue.insert(disk); - } - Ok(AddDiskResult::DiskQueued) => (), + } + + async fn all_omicron_disk_ledgers(&self) -> Vec { + self.resources + .disks() + .all_m2_mountpoints(CONFIG_DATASET) + .into_iter() + .map(|p| p.join(DISKS_LEDGER_FILENAME)) + .collect() + } + + // Manages a newly detected disk that has been attached to this sled. + // + // For U.2s: we update our inventory. + // For M.2s: we do the same, but also begin "managing" the disk so + // it can automatically be in-use. + async fn detected_raw_disk( + &mut self, + raw_disk: RawDisk, + ) -> Result<(), Error> { + // In other words, the decision of "should we use this U.2" requires + // coordination with the control plane at large. + let needs_synchronization = + matches!(raw_disk.variant(), DiskVariant::U2); + self.resources.insert_disk(raw_disk).await?; + + if needs_synchronization { + match self.state { + // We'll synchronize once the key manager comes up. + StorageManagerState::WaitingForKeyManager => (), + // In these cases, we'd benefit from another call + // to "manage_disks" from StorageManager task runner. + StorageManagerState::SynchronizationNeeded + | StorageManagerState::Synchronized => { + self.state = StorageManagerState::SynchronizationNeeded; + + // TODO(https://github.com/oxidecomputer/omicron/issues/5328): + // We can remove this call once we've migrated everyone to a + // world that uses the ledger -- normally we'd only need to + // load the storage config once, when we know that the key + // manager is ready, but without a ledger, we may need to + // retry auto-management when any new U.2 appears. + self.load_storage_config().await?; } } } - // Dequeue any inserted disks - self.queued_u2_drives.retain(|k| !to_dequeue.contains(k)); - send_updates + + Ok(()) } - // Add a disk to `StorageResources` if it is new, - // updated, or its pool has been updated as determined by - // [`$crate::resources::StorageResources::insert_disk`] and we decide not to - // queue the disk for later addition. - async fn add_disk( - &mut self, - raw_disk: RawDisk, - ) -> Result { - match raw_disk.variant() { - DiskVariant::U2 => self.add_u2_disk(raw_disk).await, - DiskVariant::M2 => self.add_m2_disk(raw_disk).await, + async fn load_ledger(&self) -> Option> { + let ledger_paths = self.all_omicron_disk_ledgers().await; + let log = self.log.new(o!("request" => "load_ledger")); + let maybe_ledger = Ledger::::new( + &log, + ledger_paths.clone(), + ) + .await; + + match maybe_ledger { + Some(ledger) => { + info!(self.log, "Ledger of physical disks exists"); + return Some(ledger); + } + None => { + info!(self.log, "No ledger of physical disks exists"); + return None; + } } } - // Add a U.2 disk to [`StorageResources`] or queue it to be added later - async fn add_u2_disk( + async fn key_manager_ready(&mut self) -> Result<(), Error> { + self.load_storage_config().await + } + + async fn load_storage_config(&mut self) -> Result<(), Error> { + info!(self.log, "Loading storage config"); + // Set the state to "synchronization needed", to force us to try to + // asynchronously ensure that disks are ready. + self.state = StorageManagerState::SynchronizationNeeded; + + // Now that we're actually able to unpack U.2s, attempt to load the + // set of disks which we previously stored in the ledger, if one + // existed. + let ledger = self.load_ledger().await; + if let Some(ledger) = ledger { + info!(self.log, "Setting StorageResources state to match ledger"); + + // Identify which disks should be managed by the control + // plane, and adopt all requested disks into the control plane + // in a background task (see: [Self::manage_disks]). + self.resources.set_config(&ledger.data().disks); + } else { + info!(self.log, "KeyManager ready, but no ledger detected"); + let mut synthetic_config = + self.resources.get_config().values().cloned().collect(); + // TODO(https://github.com/oxidecomputer/omicron/issues/5328): Once + // we are confident that we have migrated to a world where this + // ledger is universally used, we should remove the following + // kludge. The sled agent should not need to "self-manage" anything! + let changed = self + .self_manage_disks_with_zpools(&mut synthetic_config) + .await?; + if !changed { + info!(self.log, "No disks to be automatically managed"); + return Ok(()); + } + info!(self.log, "auto-managed disks"; "count" => synthetic_config.len()); + self.resources.set_config(&synthetic_config); + } + + Ok(()) + } + + // NOTE: What follows is an exceptional case: one where we have + // no record of "Control Plane Physical Disks", but we have zpools + // on our U.2s, and we want to use them regardless. + // + // THIS WOULD NORMALLY BE INCORRECT BEHAVIOR. In the future, these + // zpools will not be "automatically imported", and instead, we'll + // let Nexus decide whether or not to reformat the disks. + // + // However, because we are transitioning from "the set of disks / + // zpools is implicit" to a world where that set is explicit, this + // is a necessary transitional tool. + // + // Returns "true" if the synthetic_config has changed. + async fn self_manage_disks_with_zpools( &mut self, - raw_disk: RawDisk, - ) -> Result { - if self.state != StorageManagerState::Normal { - self.queued_u2_drives.insert(raw_disk); - return Ok(AddDiskResult::DiskQueued); + synthetic_config: &mut Vec, + ) -> Result { + let mut changed = false; + for (identity, disk) in self.resources.disks().values.iter() { + match disk { + crate::resources::ManagedDisk::Unmanaged(raw) => { + let zpool_path = match raw.u2_zpool_path() { + Ok(zpool_path) => zpool_path, + Err(err) => { + info!(self.log, "Cannot find zpool path"; "identity" => ?identity, "err" => ?err); + continue; + } + }; + + let zpool_name = + match sled_hardware::disk::check_if_zpool_exists( + &zpool_path, + ) { + Ok(zpool_name) => zpool_name, + Err(err) => { + info!(self.log, "Zpool does not exist"; "identity" => ?identity, "err" => ?err); + continue; + } + }; + + info!(self.log, "Found existing zpool on device without ledger"; + "identity" => ?identity, + "zpool" => ?zpool_name); + + // We found an unmanaged disk with a zpool, even though + // we have no prior record of a ledger of control-plane + // disks. + synthetic_config.push( + // These disks don't have a control-plane UUID -- + // report "nil" until they're overwritten with real + // values. + OmicronPhysicalDiskConfig { + identity: identity.clone(), + id: Uuid::nil(), + pool_id: zpool_name.id(), + }, + ); + changed = true; + } + _ => continue, + } } + Ok(changed) + } - match Disk::new(&self.log, raw_disk.clone(), Some(&self.key_requester)) - .await - { - Ok(disk) => self.resources.insert_disk(disk), - Err(err @ DiskError::Dataset(DatasetError::KeyManager(_))) => { - warn!( - self.log, - "Transient error: {err}: queuing disk"; - "disk_id" => ?raw_disk.identity() + // Makes an U.2 disk managed by the control plane within [`StorageResources`]. + async fn omicron_physical_disks_ensure( + &mut self, + mut config: OmicronPhysicalDisksConfig, + ) -> Result { + let log = + self.log.new(o!("request" => "omicron_physical_disks_ensure")); + + // Ensure that the set of disks arrives in a consistent order. + config + .disks + .sort_by(|a, b| a.identity.partial_cmp(&b.identity).unwrap()); + + // We rely on the schema being stable across reboots -- observe + // "test_omicron_physical_disks_schema" below for that property + // guarantee. + let ledger_paths = self.all_omicron_disk_ledgers().await; + let maybe_ledger = Ledger::::new( + &log, + ledger_paths.clone(), + ) + .await; + + let mut ledger = match maybe_ledger { + Some(ledger) => { + info!( + log, + "Comparing 'requested disks' to ledger on internal storage" ); - self.queued_u2_drives.insert(raw_disk); - self.state = StorageManagerState::QueueingDisks; - Ok(AddDiskResult::DiskQueued) + let ledger_data = ledger.data(); + if config.generation < ledger_data.generation { + warn!( + log, + "Request looks out-of-date compared to prior request" + ); + return Err(Error::PhysicalDiskConfigurationOutdated { + requested: config.generation, + current: ledger_data.generation, + }); + } + + // TODO: If the generation is equal, check that the values are + // also equal. + + info!(log, "Request looks newer than prior requests"); + ledger } - Err(err) => { - error!( - self.log, - "Persistent error:not queueing disk"; - "err" => ?err, - "disk_id" => ?raw_disk.identity() - ); - Err(err.into()) + None => { + info!(log, "No previously-stored 'requested disks', creating new ledger"); + Ledger::::new_with( + &log, + ledger_paths.clone(), + OmicronPhysicalDisksConfig::new(), + ) } + }; + + let result = + self.omicron_physical_disks_ensure_internal(&log, &config).await?; + + let ledger_data = ledger.data_mut(); + if *ledger_data == config { + return Ok(result); } + *ledger_data = config; + ledger.commit().await?; + + Ok(result) } - // Add a U.2 disk to [`StorageResources`] if new and return `Ok(true)` if so - // + // Updates [StorageResources] to manage the disks requested by `config`, if + // those disks exist. // - // We never queue M.2 drives, as they don't rely on [`KeyManager`] based - // encryption - async fn add_m2_disk( + // Makes no attempts to manipulate the ledger storage. + async fn omicron_physical_disks_ensure_internal( &mut self, - raw_disk: RawDisk, - ) -> Result { - let disk = - Disk::new(&self.log, raw_disk.clone(), Some(&self.key_requester)) - .await?; - self.resources.insert_disk(disk) + log: &Logger, + config: &OmicronPhysicalDisksConfig, + ) -> Result { + if self.state == StorageManagerState::WaitingForKeyManager { + warn!( + log, + "Not ready to manage storage yet (waiting for the key manager)" + ); + return Err(Error::KeyManagerNotReady); + } + + // Identify which disks should be managed by the control + // plane, and adopt all requested disks into the control plane. + self.resources.set_config(&config.disks); + + // Actually try to "manage" those disks, which may involve formatting + // zpools and conforming partitions to those expected by the control + // plane. + Ok(self.resources.synchronize_disk_management().await) + } + + async fn omicron_physical_disks_list( + &mut self, + ) -> Result { + let log = self.log.new(o!("request" => "omicron_physical_disks_list")); + + // TODO(https://github.com/oxidecomputer/omicron/issues/5328): This + // could just use "resources.get_config", but that'll be more feasible + // once we don't have to cons up a fake "Generation" number. + + let ledger_paths = self.all_omicron_disk_ledgers().await; + let maybe_ledger = Ledger::::new( + &log, + ledger_paths.clone(), + ) + .await; + + match maybe_ledger { + Some(ledger) => { + info!(log, "Found ledger on internal storage"); + return Ok(ledger.data().clone()); + } + None => { + info!(log, "No ledger detected on internal storage"); + return Err(Error::LedgerNotFound); + } + } } // Delete a real disk and return `true` if the disk was actually removed - fn remove_disk(&mut self, raw_disk: RawDisk) -> bool { - // If the disk is a U.2, we want to first delete it from any queued disks - let _ = self.queued_u2_drives.remove(&raw_disk); - self.resources.remove_disk(raw_disk.identity()) + fn detected_raw_disk_removal(&mut self, raw_disk: RawDisk) { + self.resources.remove_disk(raw_disk.identity()); } // Find all disks to remove that are not in raw_disks and remove them. Then @@ -509,13 +767,7 @@ impl StorageManager { async fn ensure_using_exactly_these_disks( &mut self, raw_disks: HashSet, - ) -> bool { - let mut should_update = false; - - // Clear out any queued U.2 disks that are real. - // We keep synthetic disks, as they are only added once. - self.queued_u2_drives.retain(|d| d.is_synthetic()); - + ) { let all_ids: HashSet<_> = raw_disks.iter().map(|d| d.identity()).collect(); @@ -523,8 +775,8 @@ impl StorageManager { let to_remove: Vec = self .resources .disks() - .keys() - .filter_map(|id| { + .iter_all() + .filter_map(|(id, _variant, _slot)| { if !all_ids.contains(id) { Some(id.clone()) } else { @@ -534,27 +786,19 @@ impl StorageManager { .collect(); for id in to_remove { - if self.resources.remove_disk(&id) { - should_update = true; - } + self.resources.remove_disk(&id); } for raw_disk in raw_disks { let disk_id = raw_disk.identity().clone(); - match self.add_disk(raw_disk).await { - Ok(AddDiskResult::DiskInserted) => should_update = true, - Ok(_) => (), - Err(err) => { - warn!( - self.log, - "Failed to add disk to storage resources: {err}"; - "disk_id" => ?disk_id - ); - } + if let Err(err) = self.detected_raw_disk(raw_disk).await { + warn!( + self.log, + "Failed to add disk to storage resources: {err}"; + "disk_id" => ?disk_id + ); } } - - should_update } // Attempts to add a dataset within a zpool, according to `request`. @@ -562,15 +806,15 @@ impl StorageManager { &mut self, request: &NewFilesystemRequest, ) -> Result<(), Error> { - info!(self.log, "add_dataset: {:?}", request); + info!(self.log, "add_dataset"; "request" => ?request); if !self .resources .disks() - .values() - .any(|(_, pool)| &pool.name == request.dataset_name.pool()) + .iter_managed() + .any(|(_, disk)| disk.zpool_name() == request.dataset_name.pool()) { return Err(Error::ZpoolNotFound(format!( - "{}, looked up while trying to add dataset", + "{}", request.dataset_name.pool(), ))); } @@ -617,271 +861,313 @@ impl StorageManager { #[cfg(all(test, target_os = "illumos"))] mod tests { use crate::dataset::DatasetKind; - use crate::disk::SyntheticDisk; + use crate::disk::RawSyntheticDisk; + use crate::manager_test_harness::StorageManagerTestHarness; + use crate::resources::DiskManagementError; use super::*; - use async_trait::async_trait; - use camino_tempfile::tempdir; - use illumos_utils::zpool::Zpool; - use key_manager::{ - KeyManager, SecretRetriever, SecretRetrieverError, SecretState, - VersionedIkm, - }; + use camino_tempfile::tempdir_in; + use omicron_common::api::external::Generation; + use omicron_common::ledger; use omicron_test_utils::dev::test_setup_log; - use std::sync::{ - atomic::{AtomicBool, Ordering}, - Arc, - }; + use std::sync::atomic::Ordering; use uuid::Uuid; - /// A [`key-manager::SecretRetriever`] that only returns hardcoded IKM for - /// epoch 0 - #[derive(Debug, Default)] - struct HardcodedSecretRetriever { - inject_error: Arc, - } + // A helper struct to advance time. + struct TimeTravel {} - #[async_trait] - impl SecretRetriever for HardcodedSecretRetriever { - async fn get_latest( - &self, - ) -> Result { - if self.inject_error.load(Ordering::SeqCst) { - return Err(SecretRetrieverError::Bootstore( - "Timeout".to_string(), - )); - } - - let epoch = 0; - let salt = [0u8; 32]; - let secret = [0x1d; 32]; - - Ok(VersionedIkm::new(epoch, salt, &secret)) + impl TimeTravel { + pub fn new() -> Self { + tokio::time::pause(); + Self {} } - /// We don't plan to do any key rotation before trust quorum is ready - async fn get( - &self, - epoch: u64, - ) -> Result { - if self.inject_error.load(Ordering::SeqCst) { - return Err(SecretRetrieverError::Bootstore( - "Timeout".to_string(), - )); - } - if epoch != 0 { - return Err(SecretRetrieverError::NoSuchEpoch(epoch)); - } - Ok(SecretState::Current(self.get_latest().await?)) + pub async fn enough_to_start_synchronization(&self) { + tokio::time::advance(SYNCHRONIZE_INTERVAL).await; } } #[tokio::test] - async fn add_u2_disk_while_not_in_normal_stage_and_ensure_it_gets_queued() { + async fn add_control_plane_disks_requires_keymanager() { illumos_utils::USE_MOCKS.store(false, Ordering::SeqCst); - let logctx = test_setup_log( - "add_u2_disk_while_not_in_normal_stage_and_ensure_it_gets_queued", - ); - let (mut _key_manager, key_requester) = - KeyManager::new(&logctx.log, HardcodedSecretRetriever::default()); - let (mut manager, _) = StorageManager::new(&logctx.log, key_requester); - let zpool_name = ZpoolName::new_external(Uuid::new_v4()); - let raw_disk: RawDisk = SyntheticDisk::new(zpool_name, 0).into(); - assert_eq!(StorageManagerState::WaitingForKeyManager, manager.state); - manager.add_u2_disk(raw_disk.clone()).await.unwrap(); - assert!(manager.resources.all_u2_zpools().is_empty()); - assert_eq!(manager.queued_u2_drives, HashSet::from([raw_disk.clone()])); - - // Check other non-normal stages and ensure disk gets queued - manager.queued_u2_drives.clear(); - manager.state = StorageManagerState::QueueingDisks; - manager.add_u2_disk(raw_disk.clone()).await.unwrap(); - assert!(manager.resources.all_u2_zpools().is_empty()); - assert_eq!(manager.queued_u2_drives, HashSet::from([raw_disk])); + let logctx = + test_setup_log("add_control_plane_disks_requires_keymanager"); + + let mut harness = StorageManagerTestHarness::new(&logctx.log).await; + let raw_disks = + harness.add_vdevs(&["u2_under_test.vdev", "m2_helping.vdev"]).await; + + // These disks should exist, but only the M.2 should have a zpool. + let all_disks = harness.handle().get_latest_disks().await; + assert_eq!(2, all_disks.iter_all().collect::>().len()); + assert_eq!(0, all_disks.all_u2_zpools().len()); + assert_eq!(1, all_disks.all_m2_zpools().len()); + + // If we try to "act like nexus" and request a control-plane disk, we'll + // see a failure because the key manager isn't ready. + let config = harness.make_config(1, &raw_disks); + let result = harness + .handle() + .omicron_physical_disks_ensure(config.clone()) + .await; + assert!(matches!(result, Err(Error::KeyManagerNotReady))); + + // If we make the key manager ready and try again, it'll work. + harness.handle().key_manager_ready().await; + let result = harness + .handle() + .omicron_physical_disks_ensure(config.clone()) + .await + .expect("Ensuring disks should work after key manager is ready"); + assert!(!result.has_error(), "{:?}", result); + + // If we look at the disks again, we'll now see one U.2 zpool. + let all_disks = harness.handle().get_latest_disks().await; + assert_eq!(2, all_disks.iter_all().collect::>().len()); + assert_eq!(1, all_disks.all_u2_zpools().len()); + assert_eq!(1, all_disks.all_m2_zpools().len()); + + harness.cleanup().await; logctx.cleanup_successful(); } #[tokio::test] - async fn ensure_u2_gets_added_to_resources() { - illumos_utils::USE_MOCKS.store(false, Ordering::SeqCst); - let logctx = test_setup_log("ensure_u2_gets_added_to_resources"); - let (mut key_manager, key_requester) = - KeyManager::new(&logctx.log, HardcodedSecretRetriever::default()); - let (mut manager, _) = StorageManager::new(&logctx.log, key_requester); - let zpool_name = ZpoolName::new_external(Uuid::new_v4()); - let dir = tempdir().unwrap(); - let disk = - SyntheticDisk::create_zpool(dir.path(), &zpool_name, 0).into(); - - // Spawn the key_manager so that it will respond to requests for encryption keys - tokio::spawn(async move { key_manager.run().await }); - - // Set the stage to pretend we've progressed enough to have a key_manager available. - manager.state = StorageManagerState::Normal; - manager.add_u2_disk(disk).await.unwrap(); - assert_eq!(manager.resources.all_u2_zpools().len(), 1); - Zpool::destroy(&zpool_name).unwrap(); + async fn ledger_writes_require_at_least_one_m2() { + let logctx = test_setup_log("ledger_writes_require_at_least_one_m2"); + + // Create a single U.2 under test, with a ready-to-go key manager. + let mut harness = StorageManagerTestHarness::new(&logctx.log).await; + let raw_disks = harness.add_vdevs(&["u2_under_test.vdev"]).await; + harness.handle().key_manager_ready().await; + let config = harness.make_config(1, &raw_disks); + + // Attempting to adopt this U.2 fails (we don't have anywhere to put the + // ledger). + let result = harness + .handle() + .omicron_physical_disks_ensure(config.clone()) + .await; + assert!( + matches!( + result, + Err(Error::Ledger(ledger::Error::FailedToWrite { .. })) + ), + "Saw unexpected result: {:?}", + result + ); + + // Add an M.2 which can store the ledger. + let _raw_disks = + harness.add_vdevs(&["m2_finally_showed_up.vdev"]).await; + harness.handle_mut().wait_for_boot_disk().await; + + let result = harness + .handle() + .omicron_physical_disks_ensure(config.clone()) + .await + .expect("After adding an M.2, the ledger write should have worked"); + assert!(!result.has_error(), "{:?}", result); + + // Wait for the add disk notification + let tt = TimeTravel::new(); + tt.enough_to_start_synchronization().await; + let all_disks = harness.handle_mut().wait_for_changes().await; + assert_eq!(all_disks.all_u2_zpools().len(), 1); + assert_eq!(all_disks.all_m2_zpools().len(), 1); + + harness.cleanup().await; logctx.cleanup_successful(); } #[tokio::test] - async fn wait_for_bootdisk() { + async fn add_raw_u2_does_not_create_zpool() { illumos_utils::USE_MOCKS.store(false, Ordering::SeqCst); - let logctx = test_setup_log("wait_for_bootdisk"); - let (mut key_manager, key_requester) = - KeyManager::new(&logctx.log, HardcodedSecretRetriever::default()); - let (manager, mut handle) = - StorageManager::new(&logctx.log, key_requester); - // Spawn the key_manager so that it will respond to requests for encryption keys - tokio::spawn(async move { key_manager.run().await }); - - // Spawn the storage manager as done by sled-agent - tokio::spawn(async move { - manager.run().await; - }); - - // Create a synthetic internal disk - let zpool_name = ZpoolName::new_internal(Uuid::new_v4()); - let dir = tempdir().unwrap(); - let disk = - SyntheticDisk::create_zpool(dir.path(), &zpool_name, 0).into(); - - handle.upsert_disk(disk).await; - handle.wait_for_boot_disk().await; - Zpool::destroy(&zpool_name).unwrap(); + let logctx = test_setup_log("add_raw_u2_does_not_create_zpool"); + let mut harness = StorageManagerTestHarness::new(&logctx.log).await; + harness.handle().key_manager_ready().await; + + // Add a representative scenario for a small sled: a U.2 and M.2. + let _raw_disks = + harness.add_vdevs(&["u2_under_test.vdev", "m2_helping.vdev"]).await; + + // This disks should exist, but only the M.2 should have a zpool. + let all_disks = harness.handle().get_latest_disks().await; + assert_eq!(2, all_disks.iter_all().collect::>().len()); + assert_eq!(0, all_disks.all_u2_zpools().len()); + assert_eq!(1, all_disks.all_m2_zpools().len()); + + harness.cleanup().await; logctx.cleanup_successful(); } #[tokio::test] - async fn queued_disks_get_added_as_resources() { + async fn wait_for_boot_disk() { illumos_utils::USE_MOCKS.store(false, Ordering::SeqCst); - let logctx = test_setup_log("queued_disks_get_added_as_resources"); - let (mut key_manager, key_requester) = - KeyManager::new(&logctx.log, HardcodedSecretRetriever::default()); - let (manager, handle) = StorageManager::new(&logctx.log, key_requester); + let logctx = test_setup_log("wait_for_boot_disk"); + let mut harness = StorageManagerTestHarness::new(&logctx.log).await; + let _raw_disks = harness.add_vdevs(&["u2_under_test.vdev"]).await; + + // When we wait for changes, we can see the U.2 being added, but no boot + // disk. + let all_disks = harness.handle_mut().wait_for_changes().await; + assert_eq!(1, all_disks.iter_all().collect::>().len()); + assert!(all_disks.boot_disk().is_none()); + + // Waiting for the boot disk should time out. + assert!(tokio::time::timeout( + tokio::time::Duration::from_millis(10), + harness.handle_mut().wait_for_boot_disk(), + ) + .await + .is_err()); - // Spawn the key_manager so that it will respond to requests for encryption keys - tokio::spawn(async move { key_manager.run().await }); + // Now we add a boot disk. + let boot_disk = harness.add_vdevs(&["m2_under_test.vdev"]).await; - // Spawn the storage manager as done by sled-agent - tokio::spawn(async move { - manager.run().await; - }); + // It shows up through the general "wait for changes" API. + let all_disks = harness.handle_mut().wait_for_changes().await; + assert_eq!(2, all_disks.iter_all().collect::>().len()); + assert!(all_disks.boot_disk().is_some()); - // Queue up a disks, as we haven't told the `StorageManager` that - // the `KeyManager` is ready yet. - let zpool_name = ZpoolName::new_external(Uuid::new_v4()); - let dir = tempdir().unwrap(); - let disk = - SyntheticDisk::create_zpool(dir.path(), &zpool_name, 0).into(); - handle.upsert_disk(disk).await; - let resources = handle.get_latest_resources().await; - assert!(resources.all_u2_zpools().is_empty()); - - // Now inform the storage manager that the key manager is ready - // The queued disk should be successfully added - handle.key_manager_ready().await; - let resources = handle.get_latest_resources().await; - assert_eq!(resources.all_u2_zpools().len(), 1); - Zpool::destroy(&zpool_name).unwrap(); + // We can wait for, and see, the boot disk. + let (id, _) = harness.handle_mut().wait_for_boot_disk().await; + assert_eq!(&id, boot_disk[0].identity()); + + // We can keep calling this function without blocking. + let (id, _) = harness.handle_mut().wait_for_boot_disk().await; + assert_eq!(&id, boot_disk[0].identity()); + + harness.cleanup().await; + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn disks_automatically_managed_after_key_manager_ready() { + illumos_utils::USE_MOCKS.store(false, Ordering::SeqCst); + let logctx = test_setup_log( + "disks_automatically_managed_after_key_manager_ready", + ); + let mut harness = StorageManagerTestHarness::new(&logctx.log).await; + + // Boot normally, add an M.2 and a U.2, and let them + // create pools. + let raw_disks = + harness.add_vdevs(&["u2_under_test.vdev", "m2_helping.vdev"]).await; + harness.handle().key_manager_ready().await; + let config = harness.make_config(1, &raw_disks); + let result = harness + .handle() + .omicron_physical_disks_ensure(config.clone()) + .await + .unwrap(); + assert!(!result.has_error(), "{:?}", result); + + // Both pools exist + let all_disks = harness.handle().get_latest_disks().await; + assert_eq!(2, all_disks.iter_all().collect::>().len()); + assert_eq!(1, all_disks.all_u2_zpools().len()); + assert_eq!(1, all_disks.all_m2_zpools().len()); + + // "reboot" the storage manager, and let it see the disks before + // the key manager is ready. + let mut harness = harness.reboot(&logctx.log).await; + + // Both disks exist, but the U.2's pool is not yet accessible. + let all_disks = harness.handle_mut().wait_for_changes().await; + assert_eq!(2, all_disks.iter_all().collect::>().len()); + assert_eq!(0, all_disks.all_u2_zpools().len()); + assert_eq!(1, all_disks.all_m2_zpools().len()); + + // Mark the key manaager ready. This should eventually lead to the + // U.2 being managed, since it exists in the M.2 ledger. + harness.handle().key_manager_ready().await; + let all_disks = harness.handle_mut().wait_for_changes().await; + assert_eq!(1, all_disks.all_u2_zpools().len()); + + harness.cleanup().await; logctx.cleanup_successful(); } - /// For this test, we are going to step through the msg recv loop directly - /// without running the `StorageManager` in a tokio task. - /// This allows us to control timing precisely. #[tokio::test] async fn queued_disks_get_requeued_on_secret_retriever_error() { illumos_utils::USE_MOCKS.store(false, Ordering::SeqCst); let logctx = test_setup_log( "queued_disks_get_requeued_on_secret_retriever_error", ); - let inject_error = Arc::new(AtomicBool::new(false)); - let (mut key_manager, key_requester) = KeyManager::new( - &logctx.log, - HardcodedSecretRetriever { inject_error: inject_error.clone() }, - ); - let (mut manager, handle) = - StorageManager::new(&logctx.log, key_requester); - - // Spawn the key_manager so that it will respond to requests for encryption keys - tokio::spawn(async move { key_manager.run().await }); + let mut harness = StorageManagerTestHarness::new(&logctx.log).await; // Queue up a disks, as we haven't told the `StorageManager` that // the `KeyManager` is ready yet. - let zpool_name = ZpoolName::new_external(Uuid::new_v4()); - let dir = tempdir().unwrap(); - let disk = - SyntheticDisk::create_zpool(dir.path(), &zpool_name, 0).into(); - handle.upsert_disk(disk).await; - manager.step().await.unwrap(); - - // We can't wait for a reply through the handle as the storage manager task - // isn't actually running. We just check the resources directly. - assert!(manager.resources.all_u2_zpools().is_empty()); - - // Let's inject an error to the `SecretRetriever` to simulate a trust - // quorum timeout - inject_error.store(true, Ordering::SeqCst); - - // Now inform the storage manager that the key manager is ready - // The queued disk should not be added due to the error - handle.key_manager_ready().await; - manager.step().await.unwrap(); - assert!(manager.resources.all_u2_zpools().is_empty()); - - // Manually simulating a timer tick to add queued disks should also - // still hit the error - manager.add_queued_disks().await; - assert!(manager.resources.all_u2_zpools().is_empty()); - - // Clearing the injected error will cause the disk to get added - inject_error.store(false, Ordering::SeqCst); - manager.add_queued_disks().await; - assert_eq!(1, manager.resources.all_u2_zpools().len()); - - Zpool::destroy(&zpool_name).unwrap(); + let raw_disks = + harness.add_vdevs(&["u2_under_test.vdev", "m2_helping.vdev"]).await; + let config = harness.make_config(1, &raw_disks); + let result = harness + .handle() + .omicron_physical_disks_ensure(config.clone()) + .await; + assert!(matches!(result, Err(Error::KeyManagerNotReady))); + + // As usual, the U.2 isn't ready yet. + let all_disks = harness.handle().get_latest_disks().await; + assert_eq!(2, all_disks.iter_all().collect::>().len()); + assert_eq!(0, all_disks.all_u2_zpools().len()); + + // Mark the key manager ready, but throwing errors. + harness.key_manager_error_injector().store(true, Ordering::SeqCst); + harness.handle().key_manager_ready().await; + + let result = harness + .handle() + .omicron_physical_disks_ensure(config.clone()) + .await + .unwrap(); + assert!(result.has_error()); + assert!(matches!( + result.status[0].err.as_ref(), + Some(DiskManagementError::KeyManager(_)) + )); + let all_disks = harness.handle().get_latest_disks().await; + assert_eq!(0, all_disks.all_u2_zpools().len()); + + // After toggling KeyManager errors off, the U.2 can be successfully added. + harness.key_manager_error_injector().store(false, Ordering::SeqCst); + let result = harness + .handle() + .omicron_physical_disks_ensure(config.clone()) + .await + .expect("Ensuring control plane disks should have worked"); + assert!(!result.has_error(), "{:?}", result); + let all_disks = harness.handle().get_latest_disks().await; + assert_eq!(1, all_disks.all_u2_zpools().len()); + + harness.cleanup().await; logctx.cleanup_successful(); } #[tokio::test] - async fn delete_disk_triggers_notification() { + async fn detected_raw_disk_removal_triggers_notification() { illumos_utils::USE_MOCKS.store(false, Ordering::SeqCst); - let logctx = test_setup_log("delete_disk_triggers_notification"); - let (mut key_manager, key_requester) = - KeyManager::new(&logctx.log, HardcodedSecretRetriever::default()); - let (manager, mut handle) = - StorageManager::new(&logctx.log, key_requester); - - // Spawn the key_manager so that it will respond to requests for encryption keys - tokio::spawn(async move { key_manager.run().await }); - - // Spawn the storage manager as done by sled-agent - tokio::spawn(async move { - manager.run().await; - }); - - // Inform the storage manager that the key manager is ready, so disks - // don't get queued - handle.key_manager_ready().await; - - // Create and add a disk - let zpool_name = ZpoolName::new_external(Uuid::new_v4()); - let dir = tempdir().unwrap(); - let disk: RawDisk = - SyntheticDisk::create_zpool(dir.path(), &zpool_name, 0).into(); - handle.upsert_disk(disk.clone()).await; + let logctx = + test_setup_log("detected_raw_disk_removal_triggers_notification"); + let mut harness = StorageManagerTestHarness::new(&logctx.log).await; + harness.handle().key_manager_ready().await; + let mut raw_disks = harness.add_vdevs(&["u2_under_test.vdev"]).await; - // Wait for the add disk notification - let resources = handle.wait_for_changes().await; - assert_eq!(resources.all_u2_zpools().len(), 1); + // Access the add disk notification + let all_disks = harness.handle_mut().wait_for_changes().await; + assert_eq!(1, all_disks.iter_all().collect::>().len()); // Delete the disk and wait for a notification - handle.delete_disk(disk).await; - let resources = handle.wait_for_changes().await; - assert!(resources.all_u2_zpools().is_empty()); + harness + .handle() + .detected_raw_disk_removal(raw_disks.remove(0)) + .await + .await + .unwrap(); + let all_disks = harness.handle_mut().wait_for_changes().await; + assert_eq!(0, all_disks.iter_all().collect::>().len()); - Zpool::destroy(&zpool_name).unwrap(); + harness.cleanup().await; logctx.cleanup_successful(); } @@ -889,122 +1175,81 @@ mod tests { async fn ensure_using_exactly_these_disks() { illumos_utils::USE_MOCKS.store(false, Ordering::SeqCst); let logctx = test_setup_log("ensure_using_exactly_these_disks"); - let (mut key_manager, key_requester) = - KeyManager::new(&logctx.log, HardcodedSecretRetriever::default()); - let (manager, mut handle) = - StorageManager::new(&logctx.log, key_requester); - - // Spawn the key_manager so that it will respond to requests for encryption keys - tokio::spawn(async move { key_manager.run().await }); - - // Spawn the storage manager as done by sled-agent - tokio::spawn(async move { - manager.run().await; - }); - - // Create a bunch of file backed external disks with zpools - let dir = tempdir().unwrap(); - let zpools: Vec = - (0..10).map(|_| ZpoolName::new_external(Uuid::new_v4())).collect(); - let disks: Vec = zpools - .iter() - .enumerate() - .map(|(slot, zpool_name)| { - SyntheticDisk::create_zpool( - dir.path(), - zpool_name, - slot.try_into().unwrap(), - ) - .into() + let mut harness = StorageManagerTestHarness::new(&logctx.log).await; + + // Create a bunch of file backed external disks + let vdev_dir = tempdir_in("/var/tmp").unwrap(); + let disks: Vec = (0..10) + .map(|serial| { + let vdev_path = + vdev_dir.path().join(format!("u2_{serial}.vdev")); + RawSyntheticDisk::new_with_length(&vdev_path, 1 << 20, serial) + .unwrap() + .into() }) .collect(); - // Add the first 3 disks, and ensure they get queued, as we haven't - // marked our key manager ready yet - handle + // Observe the first three disks + harness + .handle() .ensure_using_exactly_these_disks(disks.iter().take(3).cloned()) - .await; - let state = handle.get_manager_state().await; - assert_eq!(state.queued_u2_drives.len(), 3); - assert_eq!(state.state, StorageManagerState::WaitingForKeyManager); - assert!(handle.get_latest_resources().await.all_u2_zpools().is_empty()); - - // Mark the key manager ready and wait for the storage update - handle.key_manager_ready().await; - let resources = handle.wait_for_changes().await; - let expected: HashSet<_> = - disks.iter().take(3).map(|d| d.identity()).collect(); - let actual: HashSet<_> = resources.disks().keys().collect(); - assert_eq!(expected, actual); + .await + .await + .unwrap(); - // Add first three disks after the initial one. The returned resources + let all_disks = harness.handle().get_latest_disks().await; + assert_eq!(3, all_disks.iter_all().collect::>().len()); + + // Add first three disks after the initial one. The returned disks // should not contain the first disk. - handle + harness + .handle() .ensure_using_exactly_these_disks( disks.iter().skip(1).take(3).cloned(), ) - .await; - let resources = handle.wait_for_changes().await; + .await + .await + .unwrap(); + + let all_disks = harness.handle_mut().wait_for_changes().await; + assert_eq!(3, all_disks.iter_all().collect::>().len()); + let expected: HashSet<_> = disks.iter().skip(1).take(3).map(|d| d.identity()).collect(); - let actual: HashSet<_> = resources.disks().keys().collect(); + let actual: HashSet<_> = all_disks.values.keys().collect(); assert_eq!(expected, actual); // Ensure the same set of disks and make sure no change occurs - // Note that we directly request the resources this time so we aren't + // Note that we directly request the disks this time so we aren't // waiting forever for a change notification. - handle + harness + .handle() .ensure_using_exactly_these_disks( disks.iter().skip(1).take(3).cloned(), ) - .await; - let resources2 = handle.get_latest_resources().await; - assert_eq!(resources, resources2); + .await + .await + .unwrap(); + let all_disks2 = harness.handle().get_latest_disks().await; + assert_eq!(all_disks.values, all_disks2.values); // Add a disjoint set of disks and see that only they come through - handle + harness + .handle() .ensure_using_exactly_these_disks( disks.iter().skip(4).take(5).cloned(), ) - .await; - let resources = handle.wait_for_changes().await; + .await + .await + .unwrap(); + + let all_disks = harness.handle().get_latest_disks().await; let expected: HashSet<_> = disks.iter().skip(4).take(5).map(|d| d.identity()).collect(); - let actual: HashSet<_> = resources.disks().keys().collect(); + let actual: HashSet<_> = all_disks.values.keys().collect(); assert_eq!(expected, actual); - // Finally, change the zpool backing of the 5th disk to be that of the 10th - // and ensure that disk changes. Note that we don't change the identity - // of the 5th disk. - let mut modified_disk = disks[4].clone(); - if let RawDisk::Synthetic(disk) = &mut modified_disk { - disk.zpool_name = disks[9].zpool_name().clone(); - } else { - panic!(); - } - let mut expected: HashSet<_> = - disks.iter().skip(5).take(4).cloned().collect(); - expected.insert(modified_disk); - - handle - .ensure_using_exactly_these_disks(expected.clone().into_iter()) - .await; - let resources = handle.wait_for_changes().await; - - // Ensure the one modified disk changed as we expected - assert_eq!(5, resources.disks().len()); - for raw_disk in expected { - let (disk, pool) = - resources.disks().get(raw_disk.identity()).unwrap(); - assert_eq!(disk.zpool_name(), raw_disk.zpool_name()); - assert_eq!(&pool.name, disk.zpool_name()); - assert_eq!(raw_disk.identity(), &pool.parent); - } - - // Cleanup - for zpool in zpools { - Zpool::destroy(&zpool).unwrap(); - } + harness.cleanup().await; logctx.cleanup_successful(); } @@ -1012,34 +1257,194 @@ mod tests { async fn upsert_filesystem() { illumos_utils::USE_MOCKS.store(false, Ordering::SeqCst); let logctx = test_setup_log("upsert_filesystem"); - let (mut key_manager, key_requester) = - KeyManager::new(&logctx.log, HardcodedSecretRetriever::default()); - let (manager, handle) = StorageManager::new(&logctx.log, key_requester); + let mut harness = StorageManagerTestHarness::new(&logctx.log).await; + + // Test setup: Add a U.2 and M.2, adopt them into the "control plane" + // for usage. + harness.handle().key_manager_ready().await; + let raw_disks = + harness.add_vdevs(&["u2_under_test.vdev", "m2_helping.vdev"]).await; + let config = harness.make_config(1, &raw_disks); + let result = harness + .handle() + .omicron_physical_disks_ensure(config.clone()) + .await + .expect("Ensuring disks should work after key manager is ready"); + assert!(!result.has_error(), "{:?}", result); - // Spawn the key_manager so that it will respond to requests for encryption keys - tokio::spawn(async move { key_manager.run().await }); + // Create a filesystem on the newly formatted U.2 + let dataset_id = Uuid::new_v4(); + let zpool_name = ZpoolName::new_external(config.disks[0].pool_id); + let dataset_name = + DatasetName::new(zpool_name.clone(), DatasetKind::Crucible); + harness + .handle() + .upsert_filesystem(dataset_id, dataset_name) + .await + .unwrap(); - // Spawn the storage manager as done by sled-agent - tokio::spawn(async move { - manager.run().await; - }); + harness.cleanup().await; + logctx.cleanup_successful(); + } - handle.key_manager_ready().await; + #[tokio::test] + async fn ledgerless_to_ledgered_migration() { + illumos_utils::USE_MOCKS.store(false, Ordering::SeqCst); + let logctx = test_setup_log("ledgerless_to_ledgered_migration"); + let mut harness = StorageManagerTestHarness::new(&logctx.log).await; + + // Test setup: Create two U.2s and an M.2 + let raw_disks = harness + .add_vdevs(&[ + "u2_under_test.vdev", + "u2_that_shows_up_late.vdev", + "m2_helping.vdev", + ]) + .await; - // Create and add a disk - let zpool_name = ZpoolName::new_external(Uuid::new_v4()); - let dir = tempdir().unwrap(); - let disk: RawDisk = - SyntheticDisk::create_zpool(dir.path(), &zpool_name, 0).into(); - handle.upsert_disk(disk.clone()).await; + // First, we format the U.2s to have a zpool. This should work, even + // without looping in the StorageManager. + let first_u2 = &raw_disks[0]; + let first_pool_id = Uuid::new_v4(); + let _disk = crate::disk::Disk::new( + &logctx.log, + &harness.mount_config(), + first_u2.clone(), + Some(first_pool_id), + Some(harness.key_requester()), + ) + .await + .expect("Failed to format U.2"); - // Create a filesystem - let dataset_id = Uuid::new_v4(); - let dataset_name = - DatasetName::new(zpool_name.clone(), DatasetKind::Crucible); - handle.upsert_filesystem(dataset_id, dataset_name).await.unwrap(); + let second_u2 = &raw_disks[1]; + let second_pool_id = Uuid::new_v4(); + let _disk = crate::disk::Disk::new( + &logctx.log, + &harness.mount_config(), + second_u2.clone(), + Some(second_pool_id), + Some(harness.key_requester()), + ) + .await + .expect("Failed to format U.2"); + + // Because we did that formatting "behind the back" of the + // StorageManager, we should see no evidence of the U.2 being managed. + // + // This currently matches the format of "existing systems, which were + // initialized before the storage ledger was created". + + // We should still see no ledger. + let result = harness.handle().omicron_physical_disks_list().await; + assert!(matches!(result, Err(Error::LedgerNotFound)), "{:?}", result); + + // We should also not see any managed U.2s. + let disks = harness.handle().get_latest_disks().await; + assert!(disks.all_u2_zpools().is_empty()); + + // Leave one of the U.2s attached, but "remove" the other one. + harness.remove_vdev(second_u2).await; + + // When the system activates, we should see a single Zpool, and + // "auto-manage" it. + harness.handle().key_manager_ready().await; + + // It might take a moment for synchronization to be handled by the + // background task, but we'll eventually see the U.2 zpool. + // + // This is the equivalent of us "loading a zpool, even though + // it was not backed by a ledger". + let tt = TimeTravel::new(); + tt.enough_to_start_synchronization().await; + while harness + .handle_mut() + .wait_for_changes() + .await + .all_u2_zpools() + .is_empty() + { + info!(&logctx.log, "Waiting for U.2 to automatically show up"); + } + let u2s = harness.handle().get_latest_disks().await.all_u2_zpools(); + assert_eq!(u2s.len(), 1, "{:?}", u2s); + + // If we attach the second U.2 -- the equivalent of it appearing after + // the key manager is ready -- it'll also be included in the set of + // auto-maanged U.2s. + harness.add_vdev_as(second_u2.clone()).await; + tt.enough_to_start_synchronization().await; + while harness + .handle_mut() + .wait_for_changes() + .await + .all_u2_zpools() + .len() + == 1 + { + info!(&logctx.log, "Waiting for U.2 to automatically show up"); + } + let u2s = harness.handle().get_latest_disks().await.all_u2_zpools(); + assert_eq!(u2s.len(), 2, "{:?}", u2s); + + // This is the equivalent of the "/omicron-physical-disks GET" API, + // which Nexus might use to contact this sled. + // + // This means that we'll bootstrap the sled successfully, but report a + // 404 if nexus asks us for the latest configuration. + let result = harness.handle().omicron_physical_disks_list().await; + assert!(matches!(result, Err(Error::LedgerNotFound),), "{:?}", result); + + // At this point, Nexus may want to explicitly tell sled agent which + // disks it should use. This is the equivalent of invoking + // "/omicron-physical-disks PUT". + let mut disks = vec![ + OmicronPhysicalDiskConfig { + identity: first_u2.identity().clone(), + id: Uuid::new_v4(), + pool_id: first_pool_id, + }, + OmicronPhysicalDiskConfig { + identity: second_u2.identity().clone(), + id: Uuid::new_v4(), + pool_id: second_pool_id, + }, + ]; + // Sort the disks to ensure the "output" matches the "input" when we + // query later. + disks.sort_by(|a, b| a.identity.partial_cmp(&b.identity).unwrap()); + let config = + OmicronPhysicalDisksConfig { generation: Generation::new(), disks }; + let result = harness + .handle() + .omicron_physical_disks_ensure(config.clone()) + .await + .expect("Failed to ensure disks with 'new' Config"); + assert!(!result.has_error(), "{:?}", result); + + let observed_config = harness + .handle() + .omicron_physical_disks_list() + .await + .expect("Failed to retreive config after ensuring it"); + assert_eq!(observed_config, config); + + let u2s = harness.handle().get_latest_disks().await.all_u2_zpools(); + assert_eq!(u2s.len(), 2, "{:?}", u2s); - Zpool::destroy(&zpool_name).unwrap(); + harness.cleanup().await; logctx.cleanup_successful(); } } + +#[cfg(test)] +mod test { + use super::*; + #[test] + fn test_omicron_physical_disks_schema() { + let schema = schemars::schema_for!(OmicronPhysicalDisksConfig); + expectorate::assert_contents( + "../schema/omicron-physical-disks.json", + &serde_json::to_string_pretty(&schema).unwrap(), + ); + } +} diff --git a/sled-storage/src/manager_test_harness.rs b/sled-storage/src/manager_test_harness.rs new file mode 100644 index 0000000000..efdbb0b9f6 --- /dev/null +++ b/sled-storage/src/manager_test_harness.rs @@ -0,0 +1,393 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Utilities for creating a StorageManager under test. + +use crate::config::MountConfig; +use crate::disk::{OmicronPhysicalDisksConfig, RawDisk}; +use crate::manager::{StorageHandle, StorageManager}; +use camino::Utf8PathBuf; +use key_manager::StorageKeyRequester; +use slog::{info, Logger}; +use std::sync::{ + atomic::{AtomicBool, Ordering}, + Arc, +}; +use uuid::Uuid; + +/// A [`key-manager::SecretRetriever`] that only returns hardcoded IKM for +/// epoch 0 +#[derive(Debug, Default)] +struct HardcodedSecretRetriever { + inject_error: Arc, +} + +#[async_trait::async_trait] +impl key_manager::SecretRetriever for HardcodedSecretRetriever { + async fn get_latest( + &self, + ) -> Result + { + if self.inject_error.load(Ordering::SeqCst) { + return Err(key_manager::SecretRetrieverError::Bootstore( + "Timeout".to_string(), + )); + } + + let epoch = 0; + let salt = [0u8; 32]; + let secret = [0x1d; 32]; + + Ok(key_manager::VersionedIkm::new(epoch, salt, &secret)) + } + + /// We don't plan to do any key rotation before trust quorum is ready + async fn get( + &self, + epoch: u64, + ) -> Result + { + if self.inject_error.load(Ordering::SeqCst) { + return Err(key_manager::SecretRetrieverError::Bootstore( + "Timeout".to_string(), + )); + } + if epoch != 0 { + return Err(key_manager::SecretRetrieverError::NoSuchEpoch(epoch)); + } + Ok(key_manager::SecretState::Current(self.get_latest().await?)) + } +} + +/// Helper utility for tests that want to use a StorageManager. +/// +/// Attempts to make it easy to create a set of vdev-based M.2 and U.2 +/// devices, which can be formatted with arbitrary zpools. +pub struct StorageManagerTestHarness { + handle: StorageHandle, + vdev_dir: Option, + vdevs: std::collections::BTreeSet, + next_slot: i64, + #[allow(unused)] + key_requester: StorageKeyRequester, + key_manager_error_injector: Arc, + key_manager_task: tokio::task::JoinHandle<()>, + storage_manager_task: tokio::task::JoinHandle<()>, +} + +impl Drop for StorageManagerTestHarness { + fn drop(&mut self) { + if let Some(vdev_dir) = self.vdev_dir.take() { + eprintln!( + "WARNING: StorageManagerTestHarness called without 'cleanup()'.\n\ + We may have leaked zpools, and not correctly deleted {}", + vdev_dir.path() + ); + + let pools = [ + ( + illumos_utils::zpool::ZPOOL_INTERNAL_PREFIX, + vdev_dir.path().join("pool/int"), + ), + ( + illumos_utils::zpool::ZPOOL_EXTERNAL_PREFIX, + vdev_dir.path().join("pool/ext"), + ), + ]; + + eprintln!( + "The following commands may need to be run to clean up state:" + ); + eprintln!("---"); + for (prefix, pool) in pools { + let Ok(entries) = pool.read_dir_utf8() else { + continue; + }; + for entry in entries.flatten() { + eprintln!( + " pfexec zpool destroy {prefix}{} ", + entry.file_name() + ); + } + } + eprintln!(" pfexec rm -rf {}", vdev_dir.path()); + eprintln!("---"); + + panic!("Dropped without cleanup. See stderr for cleanup advice"); + } + } +} + +impl StorageManagerTestHarness { + /// Creates a new StorageManagerTestHarness with no associated disks. + pub async fn new(log: &Logger) -> Self { + illumos_utils::USE_MOCKS.store(false, Ordering::SeqCst); + let tmp = camino_tempfile::tempdir_in("/var/tmp") + .expect("Failed to make temporary directory"); + info!(log, "Using tmp: {}", tmp.path()); + Self::new_with_tmp_dir(log, tmp).await + } + + async fn new_with_tmp_dir( + log: &Logger, + tmp: camino_tempfile::Utf8TempDir, + ) -> Self { + let mount_config = + MountConfig { root: tmp.path().into(), ..Default::default() }; + + let key_manager_error_injector = Arc::new(AtomicBool::new(false)); + let (mut key_manager, key_requester) = key_manager::KeyManager::new( + &log, + HardcodedSecretRetriever { + inject_error: key_manager_error_injector.clone(), + }, + ); + let (manager, handle) = + StorageManager::new(&log, mount_config, key_requester.clone()); + + // Spawn the key_manager so that it will respond to requests for encryption keys + let key_manager_task = + tokio::spawn(async move { key_manager.run().await }); + + // Spawn the storage manager as done by sled-agent + let storage_manager_task = tokio::spawn(async move { + manager.run().await; + }); + + Self { + handle, + vdev_dir: Some(tmp), + vdevs: std::collections::BTreeSet::new(), + next_slot: 0, + key_requester, + key_manager_error_injector, + key_manager_task, + storage_manager_task, + } + } + + /// Emulate a system rebooting. + /// + /// - Stops the currently running tasks and restarts them + /// - Re-inserts all vdevs previously created by [Self::add_vdevs]. + pub async fn reboot(mut self, log: &Logger) -> Self { + // Abort ongoing tasks, in lieu of a cleaner shutdown mechanism. + self.key_manager_task.abort(); + self.storage_manager_task.abort(); + + // Deconstruct the test harness + let vdev_dir = + std::mem::take(&mut self.vdev_dir).expect("Already terminated"); + let vdevs = std::mem::take(&mut self.vdevs); + + // Re-create all the state we created during the constructor, but + // leave the temporary directory as it was "before reboot". + let mut slef = Self::new_with_tmp_dir(log, vdev_dir).await; + slef.next_slot = self.next_slot; + + // Notify ourselves of the new disks, just as the hardware would. + // + // NOTE: Technically, if these disks have pools, they're still imported. + // However, the SledManager doesn't know about them, and wouldn't + // assume they're being managed right now. + for raw_disk in vdevs { + slef.handle + .detected_raw_disk(raw_disk.clone()) + .await // Notify StorageManager + .await // Wait for it to finish processing + .unwrap(); + slef.vdevs.insert(raw_disk.clone()); + } + + slef + } + + #[allow(unused)] + pub(crate) fn mount_config(&self) -> MountConfig { + MountConfig { + root: self + .vdev_dir + .as_ref() + .expect("Harness destroyed?") + .path() + .into(), + ..Default::default() + } + } + + #[allow(unused)] + pub(crate) fn key_requester(&self) -> &StorageKeyRequester { + &self.key_requester + } + + pub const DEFAULT_VDEV_SIZE: u64 = 64 * (1 << 20); + + /// Adds raw devices to the [crate::manager::StorageManager], as if they were detected via + /// hardware. Can be called several times. + /// + /// Each device is [Self::DEFAULT_VDEV_SIZE] in size. + /// Use [Self::add_vdevs_with_size] if you need more control + /// over device sizes. + pub async fn add_vdevs + ?Sized>( + &mut self, + vdevs: &[&P], + ) -> Vec { + self.add_vdevs_with_size( + &vdevs + .iter() + .map(|vdev| (vdev, Self::DEFAULT_VDEV_SIZE)) + .collect::>(), + ) + .await + } + + pub async fn add_vdevs_with_size + ?Sized>( + &mut self, + vdevs: &[(&P, u64)], + ) -> Vec { + let vdev_dir = self + .vdev_dir + .as_ref() + .expect("Cannot add vdevs, test harness terminated"); + let mut added = vec![]; + for (vdev, size) in vdevs + .iter() + .map(|(vdev, size)| (Utf8PathBuf::from(vdev.as_ref()), size)) + { + assert!(vdev.is_relative()); + let vdev_path = vdev_dir.path().join(&vdev); + let raw_disk: RawDisk = + crate::disk::RawSyntheticDisk::new_with_length( + &vdev_path, + *size, + self.next_slot, + ) + .unwrap_or_else(|err| { + panic!( + "Failed to create synthetic disk for {vdev}: {err:?}" + ) + }) + .into(); + self.next_slot += 1; + self.handle + .detected_raw_disk(raw_disk.clone()) + .await // Notify StorageManager + .await // Wait for it to finish processing + .unwrap(); + + self.vdevs.insert(raw_disk.clone()); + added.push(raw_disk); + } + added + } + + // Removes a vdev from the set of "tracked" devices. + // + // This is equivalent to having the hardware monitor unplug a device. + // + // If this device has an associated zpool, it must be either re-attached + // to the harness or manually destroyed before the test completes. + // Otherwise, removing the temporary directory containing that zpool + // will likely fail with a "device busy" error. + pub async fn remove_vdev(&mut self, raw: &RawDisk) { + assert!(self.vdevs.remove(&raw), "Vdev does not exist"); + self.handle + .detected_raw_disk_removal(raw.clone()) + .await + .await + .expect("Failed to remove vdev"); + } + + // Adds a vdev to the set of "tracked" devices. + pub async fn add_vdev_as(&mut self, raw_disk: RawDisk) { + self.handle + .detected_raw_disk(raw_disk.clone()) + .await // Notify StorageManager + .await // Wait for it to finish processing + .unwrap(); + self.vdevs.insert(raw_disk.clone()); + } + + pub fn make_config( + &self, + generation: u32, + disks: &[RawDisk], + ) -> OmicronPhysicalDisksConfig { + let disks = disks + .into_iter() + .map(|raw| { + let identity = raw.identity(); + + crate::disk::OmicronPhysicalDiskConfig { + identity: identity.clone(), + id: Uuid::new_v4(), + pool_id: Uuid::new_v4(), + } + }) + .collect(); + + OmicronPhysicalDisksConfig { + generation: omicron_common::api::external::Generation::from( + generation, + ), + disks, + } + } + + /// Returns the underlying [crate::manager::StorageHandle]. + pub fn handle_mut(&mut self) -> &mut StorageHandle { + &mut self.handle + } + + /// Returns the underlying [crate::manager::StorageHandle]. + pub fn handle(&self) -> &StorageHandle { + &self.handle + } + + /// Set to "true" to throw errors, "false" to not inject errors. + pub fn key_manager_error_injector(&self) -> &Arc { + &self.key_manager_error_injector + } + + /// Cleanly terminates the test harness + pub async fn cleanup(&mut self) { + let Some(vdev_dir) = self.vdev_dir.take() else { + // Already terminated + return; + }; + + eprintln!("Terminating StorageManagerTestHarness"); + let disks = self.handle().get_latest_disks().await; + let pools = disks.get_all_zpools(); + for (pool, _) in pools { + eprintln!("Destroying pool: {pool:?}"); + if let Err(e) = illumos_utils::zpool::Zpool::destroy(&pool) { + eprintln!("Failed to destroy {pool:?}: {e:?}"); + } + } + + self.key_manager_task.abort(); + self.storage_manager_task.abort(); + + // Make sure that we're actually able to delete everything within the + // temporary directory. + // + // This is necessary because the act of mounting datasets within this + // directory may have created directories owned by root, and the test + // process may not have been started as root. + // + // Since we're about to delete all these files anyway, make them + // accessible to everyone before destroying them. + let mut command = std::process::Command::new("/usr/bin/pfexec"); + let mount = vdev_dir.path(); + let cmd = command.args(["chmod", "-R", "a+rw", mount.as_str()]); + cmd.output().expect( + "Failed to change ownership of the temporary directory we're trying to delete" + ); + + // Actually delete everything, and check the result to fail loud if + // something goes wrong. + vdev_dir.close().expect("Failed to clean up temporary directory"); + } +} diff --git a/sled-storage/src/resources.rs b/sled-storage/src/resources.rs index c1f460dc92..34b30f1bfd 100644 --- a/sled-storage/src/resources.rs +++ b/sled-storage/src/resources.rs @@ -4,17 +4,23 @@ //! Discovered and usable disks and zpools -use crate::dataset::M2_DEBUG_DATASET; -use crate::disk::Disk; +use crate::config::MountConfig; +use crate::dataset::{DatasetError, M2_DEBUG_DATASET}; +use crate::disk::{Disk, DiskError, OmicronPhysicalDiskConfig, RawDisk}; use crate::error::Error; -use crate::pool::Pool; use camino::Utf8PathBuf; use cfg_if::cfg_if; use illumos_utils::zpool::ZpoolName; +use key_manager::StorageKeyRequester; use omicron_common::disk::DiskIdentity; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; use sled_hardware::DiskVariant; +use slog::{info, o, warn, Logger}; use std::collections::BTreeMap; use std::sync::Arc; +use tokio::sync::watch; +use uuid::Uuid; // The directory within the debug dataset in which bundles are created. const BUNDLE_DIRECTORY: &str = "bundle"; @@ -22,129 +28,131 @@ const BUNDLE_DIRECTORY: &str = "bundle"; // The directory for zone bundles. const ZONE_BUNDLE_DIRECTORY: &str = "zone"; -pub enum AddDiskResult { - DiskInserted, - DiskAlreadyInserted, - DiskQueued, +#[derive(Debug, thiserror::Error, JsonSchema, Serialize, Deserialize)] +#[serde(rename_all = "snake_case", tag = "type", content = "value")] +pub enum DiskManagementError { + #[error("Disk requested by control plane, but not found on device")] + NotFound, + + #[error("Expected zpool UUID of {expected}, but saw {observed}")] + ZpoolUuidMismatch { expected: Uuid, observed: Uuid }, + + #[error("Failed to access keys necessary to unlock storage. This error may be transient.")] + KeyManager(String), + + #[error("Other error starting disk management: {0}")] + Other(String), } -impl AddDiskResult { - pub fn disk_inserted(&self) -> bool { +impl DiskManagementError { + fn retryable(&self) -> bool { match self { - AddDiskResult::DiskInserted => true, + DiskManagementError::KeyManager(_) => true, _ => false, } } } -/// Storage related resources: disks and zpools -/// -/// This state is internal to the [`crate::manager::StorageManager`] task. Clones -/// of this state can be retrieved by requests to the `StorageManager` task -/// from the [`crate::manager::StorageHandle`]. This state is not `Sync`, and -/// as such does not require any mutexes. However, we do expect to share it -/// relatively frequently, and we want copies of it to be as cheaply made -/// as possible. So any large state is stored inside `Arc`s. On the other -/// hand, we expect infrequent updates to this state, and as such, we use -/// [`std::sync::Arc::make_mut`] to implement clone on write functionality -/// inside the `StorageManager` task if there are any outstanding copies. -/// Therefore, we only pay the cost to update infrequently, and no locks are -/// required by callers when operating on cloned data. The only contention here -/// is for the reference counters of the internal Arcs when `StorageResources` -/// gets cloned or dropped. -#[derive(Debug, Clone, Default, PartialEq, Eq)] -pub struct StorageResources { - // All disks, real and synthetic, being managed by this sled - disks: Arc>, +/// Identifies how a single disk management operation may have succeeded or +/// failed. +#[derive(Debug, JsonSchema, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub struct DiskManagementStatus { + pub identity: DiskIdentity, + pub err: Option, } -impl StorageResources { - /// Return a reference to the current snapshot of disks - pub fn disks(&self) -> &BTreeMap { - &self.disks - } +/// The result from attempting to manage underlying disks. +/// +/// This is more complex than a simple "Error" type because it's possible +/// for some disks to be initialized correctly, while others can fail. +/// +/// This structure provides a mechanism for callers to learn about partial +/// failures, and handle them appropriately on a per-disk basis. +#[derive(Default, Debug, JsonSchema, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +#[must_use = "this `DiskManagementResult` may contain errors, which should be handled"] +pub struct DisksManagementResult { + pub status: Vec, +} - /// Insert a disk and its zpool - /// - /// If the disk passed in is new or modified, or its pool size or pool - /// name changed, then insert the changed values and return `DiskInserted`. - /// Otherwise, do not insert anything and return `DiskAlreadyInserted`. - /// For instance, if only the pool health changes, because it is not one - /// of the checked values, we will not insert the update and will return - /// `DiskAlreadyInserted`. - pub(crate) fn insert_disk( - &mut self, - disk: Disk, - ) -> Result { - let disk_id = disk.identity().clone(); - let zpool_name = disk.zpool_name().clone(); - let zpool = Pool::new(zpool_name, disk_id.clone())?; - if let Some((stored_disk, stored_pool)) = self.disks.get(&disk_id) { - if stored_disk == &disk - && stored_pool.info.size() == zpool.info.size() - && stored_pool.name == zpool.name - { - return Ok(AddDiskResult::DiskAlreadyInserted); +impl DisksManagementResult { + pub fn has_error(&self) -> bool { + for status in &self.status { + if status.err.is_some() { + return true; } } - // Either the disk or zpool changed - Arc::make_mut(&mut self.disks).insert(disk_id, (disk, zpool)); - Ok(AddDiskResult::DiskInserted) - } - - /// Insert a disk while creating a fake pool - /// This is a workaround for current mock based testing strategies - /// in the sled-agent. - #[cfg(feature = "testing")] - pub fn insert_fake_disk(&mut self, disk: Disk) -> AddDiskResult { - let disk_id = disk.identity().clone(); - let zpool_name = disk.zpool_name().clone(); - let zpool = Pool::new_with_fake_info(zpool_name, disk_id.clone()); - if self.disks.contains_key(&disk_id) { - return AddDiskResult::DiskAlreadyInserted; - } - // Either the disk or zpool changed - Arc::make_mut(&mut self.disks).insert(disk_id, (disk, zpool)); - AddDiskResult::DiskInserted + false } - /// Delete a disk and its zpool - /// - /// Return true, if data was changed, false otherwise - /// - /// Note: We never allow removal of synthetic disks in production as they - /// are only added once. - pub(crate) fn remove_disk(&mut self, id: &DiskIdentity) -> bool { - let Some((disk, _)) = self.disks.get(id) else { - return false; - }; - - cfg_if! { - if #[cfg(test)] { - // For testing purposes, we allow synthetic disks to be deleted. - // Silence an unused variable warning. - _ = disk; - } else { - // In production, we disallow removal of synthetic disks as they - // are only added once. - if disk.is_synthetic() { - return false; + pub fn has_retryable_error(&self) -> bool { + for status in &self.status { + if let Some(err) = &status.err { + if err.retryable() { + return true; } } } - - // Safe to unwrap as we just checked the key existed above - Arc::make_mut(&mut self.disks).remove(id).unwrap(); - true + false } +} + +// The Sled Agent is responsible for both observing disks and managing them at +// the request of the broader control plane. This enum encompasses that duality, +// by representing all disks that can exist, managed or not. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum ManagedDisk { + // A disk explicitly managed by the control plane. + // + // This includes U.2s which Nexus has told us to format and use. + ExplicitlyManaged(Disk), + + // A disk implicitly managed by the control plane. + // + // This includes M.2s which the sled agent auto-detects and uses. + ImplicitlyManaged(Disk), + + // A disk which has been observed by the sled, but which is not yet being + // managed by the control plane. + // + // This disk should be treated as "read-only" until we're explicitly told to + // use it. + Unmanaged(RawDisk), +} + +/// The disks, keyed by their identity, managed by the sled agent. +/// +/// This state is owned by [`crate::manager::StorageManager`], through +/// [`crate::resources::StorageResources`]. Clones of this state can be +/// retrieved by requests to the `StorageManager` task from the +/// [`crate::manager::StorageHandle`]. This state is not `Sync`, and as such +/// does not require any mutexes. However, we do expect to share it relatively +/// frequently, and we want copies of it to be as cheaply made as possible. So +/// any large state is stored inside `Arc`s. On the other hand, we expect +/// infrequent updates to this state, and as such, we use +/// [`std::sync::Arc::make_mut`] to implement clone on write functionality +/// inside the `StorageManager` task if there are any outstanding copies. +/// Therefore, we only pay the cost to update infrequently, and no locks are +/// required by callers when operating on cloned data. The only contention here +/// is for the reference counters of the internal Arcs when `AllDisks` +/// gets cloned or dropped. +#[derive(Debug, Clone, Eq, PartialEq)] +pub struct AllDisks { + pub values: Arc>, + pub mount_config: MountConfig, +} +impl AllDisks { /// Returns the identity of the boot disk. /// /// If this returns `None`, we have not processed the boot disk yet. pub fn boot_disk(&self) -> Option<(DiskIdentity, ZpoolName)> { - for (id, (disk, _)) in self.disks.iter() { - if disk.is_boot_disk() { - return Some((id.clone(), disk.zpool_name().clone())); + for (id, disk) in self.values.iter() { + if let ManagedDisk::ImplicitlyManaged(disk) = disk { + if disk.is_boot_disk() { + return Some((id.clone(), disk.zpool_name().clone())); + } } } None @@ -164,7 +172,9 @@ impl StorageResources { pub fn all_m2_mountpoints(&self, dataset: &str) -> Vec { self.all_m2_zpools() .iter() - .map(|zpool| zpool.dataset_mountpoint(dataset)) + .map(|zpool| { + zpool.dataset_mountpoint(&self.mount_config.root, dataset) + }) .collect() } @@ -172,26 +182,41 @@ impl StorageResources { pub fn all_u2_mountpoints(&self, dataset: &str) -> Vec { self.all_u2_zpools() .iter() - .map(|zpool| zpool.dataset_mountpoint(dataset)) + .map(|zpool| { + zpool.dataset_mountpoint(&self.mount_config.root, dataset) + }) .collect() } + /// Returns all zpools managed by the control plane pub fn get_all_zpools(&self) -> Vec<(ZpoolName, DiskVariant)> { - self.disks + self.values .values() - .map(|(disk, _)| (disk.zpool_name().clone(), disk.variant())) + .filter_map(|disk| match disk { + ManagedDisk::ExplicitlyManaged(disk) + | ManagedDisk::ImplicitlyManaged(disk) => { + Some((disk.zpool_name().clone(), disk.variant())) + } + ManagedDisk::Unmanaged(_) => None, + }) .collect() } - // Returns all zpools of a particular variant + // Returns all zpools of a particular variant. + // + // Only returns zpools from disks actively being managed. fn all_zpools(&self, variant: DiskVariant) -> Vec { - self.disks + self.values .values() - .filter_map(|(disk, _)| { - if disk.variant() == variant { - return Some(disk.zpool_name().clone()); + .filter_map(|disk| match disk { + ManagedDisk::ExplicitlyManaged(disk) + | ManagedDisk::ImplicitlyManaged(disk) => { + if disk.variant() == variant { + return Some(disk.zpool_name().clone()); + } + None } - None + ManagedDisk::Unmanaged(_) => None, }) .collect() } @@ -203,4 +228,333 @@ impl StorageResources { .map(|p| p.join(BUNDLE_DIRECTORY).join(ZONE_BUNDLE_DIRECTORY)) .collect() } + + /// Returns an iterator over all managed disks. + pub fn iter_managed(&self) -> impl Iterator { + self.values.iter().filter_map(|(identity, disk)| match disk { + ManagedDisk::ExplicitlyManaged(disk) => Some((identity, disk)), + ManagedDisk::ImplicitlyManaged(disk) => Some((identity, disk)), + _ => None, + }) + } + + /// Returns an iterator over all disks, managed or not. + pub fn iter_all( + &self, + ) -> impl Iterator { + self.values.iter().map(|(identity, disk)| match disk { + ManagedDisk::ExplicitlyManaged(disk) => { + (identity, disk.variant(), disk.slot()) + } + ManagedDisk::ImplicitlyManaged(disk) => { + (identity, disk.variant(), disk.slot()) + } + ManagedDisk::Unmanaged(raw) => { + (identity, raw.variant(), raw.slot()) + } + }) + } +} + +/// The intersection of "physical disks noticed by hardware" and "physical +/// disks requested by the control plane". +#[derive(Debug)] +pub struct StorageResources { + log: Logger, + + key_requester: StorageKeyRequester, + + // All disks, real and synthetic, that exist within this sled + disks: AllDisks, + + // The last set of disks the control plane explicitly told us to manage. + // + // Only includes external storage (U.2s). + control_plane_disks: BTreeMap, + + // Many clients are interested when changes in the set of [AllDisks] + // might occur. This watch channel is updated once these disks get updated. + disk_updates: watch::Sender, +} + +impl StorageResources { + pub fn new( + log: &Logger, + mount_config: MountConfig, + key_requester: StorageKeyRequester, + ) -> Self { + let disks = + AllDisks { values: Arc::new(BTreeMap::new()), mount_config }; + Self { + log: log.new(o!("component" => "StorageResources")), + key_requester, + disks: disks.clone(), + control_plane_disks: BTreeMap::new(), + disk_updates: watch::Sender::new(disks), + } + } + + /// Monitors the set of disks for any updates + pub fn watch_disks(&self) -> watch::Receiver { + self.disk_updates.subscribe() + } + + /// Gets the set of all disks + pub fn disks(&self) -> &AllDisks { + &self.disks + } + + /// Sets the "control plane disk" state, as last requested by Nexus. + /// + /// Does not attempt to manage any of the physical disks previously + /// observed. To synchronize the "set of requested disks" with the "set of + /// observed disks", call [Self::synchronize_disk_management]. + pub fn set_config(&mut self, config: &Vec) { + self.control_plane_disks = config + .iter() + .map(|disk| (disk.identity.clone(), disk.clone())) + .collect(); + } + + pub fn get_config( + &self, + ) -> &BTreeMap { + &self.control_plane_disks + } + + /// Attempts to "manage" all the U.2 disks requested by the control plane. + /// + /// If any requested physical disks have not been observed by the hardware + /// monitor, they are ignored. + /// If the hardware monitor has observed disks that are not requested, they + /// are ignored. + /// + /// Attempts to manage all disks possible, and returns an error on partial + /// failure, indicating "which disks have failed to be synchronized". + pub async fn synchronize_disk_management( + &mut self, + ) -> DisksManagementResult { + let mut updated = false; + let disks = Arc::make_mut(&mut self.disks.values); + info!(self.log, "Synchronizing disk managment"); + + // "Unmanage" all disks no longer requested by the control plane. + // + // This updates the reported sets of "managed" disks, and performs no + // other modifications to the underlying storage. + for (identity, managed_disk) in &mut *disks { + match managed_disk { + // This leaves the presence of the disk still in "Self", but + // downgrades the disk to an unmanaged status. + ManagedDisk::ExplicitlyManaged(disk) => { + if self.control_plane_disks.get(identity).is_none() { + *managed_disk = + ManagedDisk::Unmanaged(RawDisk::from(disk.clone())); + updated = true; + } + } + _ => (), + } + } + + // "Manage" all disks that the control plane wants. + // + // If the disk can be successfully managed, and it's new, it will be + // formatted with a zpool identified by the Nexus-specified + // configuration. + let mut result = DisksManagementResult::default(); + for (identity, config) in &self.control_plane_disks { + let Some(managed_disk) = disks.get_mut(identity) else { + warn!( + self.log, + "Control plane disk requested, but not detected within sled"; + "disk_identity" => ?identity + ); + result.status.push(DiskManagementStatus { + identity: identity.clone(), + err: Some(DiskManagementError::NotFound), + }); + continue; + }; + info!(self.log, "Managing disk"; "disk_identity" => ?identity); + match managed_disk { + // Disk is currently unmanaged. Try to adopt the disk, which may + // involve formatting it, and emplacing the zpool. + ManagedDisk::Unmanaged(raw_disk) => { + match Self::begin_disk_management( + &self.log, + &self.disks.mount_config, + raw_disk, + config, + Some(&self.key_requester), + ) + .await + { + Ok(disk) => { + info!(self.log, "Disk management started successfully"; "disk_identity" => ?identity); + *managed_disk = disk; + updated = true; + } + Err(err) => { + warn!(self.log, "Cannot parse disk"; "err" => ?err); + result.status.push(DiskManagementStatus { + identity: identity.clone(), + err: Some(err), + }); + continue; + } + } + } + // Disk is already managed. Check that the configuration + // matches what we expect. + ManagedDisk::ExplicitlyManaged(disk) => { + let expected = config.pool_id; + let observed = disk.zpool_name().id(); + if expected != observed { + warn!( + self.log, + "Observed an unexpected zpool uuid"; + "expected" => ?expected, "observed" => ?observed + ); + result.status.push(DiskManagementStatus { + identity: identity.clone(), + err: Some(DiskManagementError::ZpoolUuidMismatch { + expected, + observed, + }), + }); + continue; + } + info!(self.log, "Disk already managed successfully"; "disk_identity" => ?identity); + } + // Skip disks that are managed implicitly + ManagedDisk::ImplicitlyManaged(_) => continue, + } + + result.status.push(DiskManagementStatus { + identity: identity.clone(), + err: None, + }); + } + + if updated { + self.disk_updates.send_replace(self.disks.clone()); + } + + return result; + } + + // Helper function to help transition an "unmanaged" disk to a "managed" + // disk. + async fn begin_disk_management( + log: &Logger, + mount_config: &MountConfig, + raw_disk: &RawDisk, + config: &OmicronPhysicalDiskConfig, + key_requester: Option<&StorageKeyRequester>, + ) -> Result { + info!(log, "Invoking Disk::new on an unmanaged disk"); + let disk = Disk::new( + &log, + mount_config, + raw_disk.clone(), + Some(config.pool_id), + key_requester, + ) + .await + .map_err(|err| { + warn!(log, "Disk::new failed"; "err" => ?err); + match err { + // We pick this error out and identify it separately because + // it may be transient, and should sometimes be handled with + // a retry. + DiskError::Dataset(DatasetError::KeyManager(_)) => { + DiskManagementError::KeyManager(err.to_string()) + } + err => DiskManagementError::Other(err.to_string()), + } + })?; + info!(log, "Disk::new completed successfully"; "disk_identity" => ?raw_disk.identity()); + Ok(ManagedDisk::ExplicitlyManaged(disk)) + } + + /// Tracks a new disk. + /// + /// For U.2s: Does not automatically attempt to manage disks -- for this, + /// the caller will need to also invoke + /// [`Self::synchronize_disk_management`]. + /// + /// For M.2s: As no additional control plane guidance is necessary to adopt + /// M.2s, these are automatically managed. + pub(crate) async fn insert_disk( + &mut self, + disk: RawDisk, + ) -> Result<(), Error> { + let disk_identity = disk.identity().clone(); + info!(self.log, "Inserting disk"; "identity" => ?disk_identity); + if self.disks.values.contains_key(&disk_identity) { + info!(self.log, "Disk already exists"; "identity" => ?disk_identity); + return Ok(()); + } + + let disks = Arc::make_mut(&mut self.disks.values); + match disk.variant() { + DiskVariant::U2 => { + disks.insert(disk_identity, ManagedDisk::Unmanaged(disk)); + } + DiskVariant::M2 => { + let managed_disk = Disk::new( + &self.log, + &self.disks.mount_config, + disk, + None, + Some(&self.key_requester), + ) + .await?; + disks.insert( + disk_identity, + ManagedDisk::ImplicitlyManaged(managed_disk), + ); + } + } + self.disk_updates.send_replace(self.disks.clone()); + + Ok(()) + } + + /// Delete a disk and its zpool + /// + /// Return true, if data was changed, false otherwise + /// + /// Note: We never allow removal of synthetic disks in production as they + /// are only added once. + pub(crate) fn remove_disk(&mut self, id: &DiskIdentity) { + info!(self.log, "Removing disk"; "identity" => ?id); + let Some(entry) = self.disks.values.get(id) else { + return; + }; + let synthetic = match entry { + ManagedDisk::ExplicitlyManaged(disk) + | ManagedDisk::ImplicitlyManaged(disk) => disk.is_synthetic(), + ManagedDisk::Unmanaged(raw) => raw.is_synthetic(), + }; + + cfg_if! { + if #[cfg(test)] { + // For testing purposes, we allow synthetic disks to be deleted. + // Silence an unused variable warning. + _ = synthetic; + } else { + // In production, we disallow removal of synthetic disks as they + // are only added once. + if synthetic { + return; + } + } + } + + // Safe to unwrap as we just checked the key existed above + Arc::make_mut(&mut self.disks.values).remove(id).unwrap(); + self.disk_updates.send_replace(self.disks.clone()); + } } diff --git a/smf/sled-agent/non-gimlet/config.toml b/smf/sled-agent/non-gimlet/config.toml index 432652c50b..9efdcfbb93 100644 --- a/smf/sled-agent/non-gimlet/config.toml +++ b/smf/sled-agent/non-gimlet/config.toml @@ -18,27 +18,29 @@ sidecar_revision.soft_zone = { front_port_count = 1, rear_port_count = 1 } # in-sync, rather than querying its NTP zone. skip_timesync = false -# For testing purposes, A file-backed zpool can be manually created with the -# following: +# For testing purposes, a file-backed virtual disk can be manually created with +# the following: # -# # truncate -s 10GB testpool.vdev -# # zpool create oxp_d462a7f7-b628-40fe-80ff-4e4189e2d62b "$PWD/testpool.vdev" +# # truncate -s 10GB .vdev # -# Note that you'll need to create one such zpool for each below, with a -# different vdev for each. The `create_virtual_hardware.sh` script does this -# for you. -zpools = [ - "oxi_a462a7f7-b628-40fe-80ff-4e4189e2d62b", - "oxi_b462a7f7-b628-40fe-80ff-4e4189e2d62b", - "oxp_d462a7f7-b628-40fe-80ff-4e4189e2d62b", - "oxp_e4b4dc87-ab46-49fb-a4b4-d361ae214c03", - "oxp_f4b4dc87-ab46-49fb-a4b4-d361ae214c03", - "oxp_14b4dc87-ab46-49fb-a4b4-d361ae214c03", - "oxp_24b4dc87-ab46-49fb-a4b4-d361ae214c03", - "oxp_cd70d7f6-2354-4bf2-8012-55bf9eaf7930", - "oxp_ceb4461c-cf56-4719-ad3c-14430bfdfb60", - "oxp_31bd71cd-4736-4a12-a387-9b74b050396f", - "oxp_616b26df-e62a-4c68-b506-f4a923d8aaf7", +# Note that you'll need to create one such file for each disk below. +# The `create_virtual_hardware.sh` script does this for you. +# +# These paths have the prefix of either "u2" or "m2", followed by an underscore, +# followed by a string that is embedded into their fake serial values. +vdevs = [ + "m2_0.vdev", + "m2_1.vdev", + + "u2_0.vdev", + "u2_1.vdev", + "u2_2.vdev", + "u2_3.vdev", + "u2_4.vdev", + "u2_5.vdev", + "u2_6.vdev", + "u2_7.vdev", + "u2_8.vdev", ] # Percentage of usable physical DRAM to use for the VMM reservoir, which diff --git a/tools/create_gimlet_virtual_hardware.sh b/tools/create_gimlet_virtual_hardware.sh index ad22cc26e7..da26bef3cd 100755 --- a/tools/create_gimlet_virtual_hardware.sh +++ b/tools/create_gimlet_virtual_hardware.sh @@ -29,4 +29,4 @@ if [[ -f "$MARKER" ]]; then fi ensure_run_as_root -ensure_zpools +ensure_vdevs diff --git a/tools/create_scrimlet_virtual_hardware.sh b/tools/create_scrimlet_virtual_hardware.sh index be7785a90d..5ae4e52258 100755 --- a/tools/create_scrimlet_virtual_hardware.sh +++ b/tools/create_scrimlet_virtual_hardware.sh @@ -60,6 +60,6 @@ function ensure_softnpu_zone { } ensure_run_as_root -ensure_zpools +ensure_vdevs ensure_uplink_vnic "$PHYSICAL_LINK" ensure_softnpu_zone diff --git a/tools/create_virtual_hardware.sh b/tools/create_virtual_hardware.sh index ef01af92bb..116032dc22 100755 --- a/tools/create_virtual_hardware.sh +++ b/tools/create_virtual_hardware.sh @@ -84,7 +84,7 @@ in the SoftNPU zone later to add those entries." } ensure_run_as_root -ensure_zpools +ensure_vdevs if [[ "$SOFTNPU_MODE" == "zone" ]]; then ensure_simulated_links "$PHYSICAL_LINK" diff --git a/tools/virtual_hardware.sh b/tools/virtual_hardware.sh index ade7ac58b3..883b98a04e 100755 --- a/tools/virtual_hardware.sh +++ b/tools/virtual_hardware.sh @@ -23,27 +23,23 @@ function fail { exit 1 } -# Create the ZFS zpools required for the sled agent, backed by file-based vdevs. -function ensure_zpools { - # Find the list of zpools the sled agent expects, from its configuration +# Create the virtual devices required by the sled agent. +function ensure_vdevs { + # Find the list of virtual devices the sled agent expects, from its configuration # file. - ZPOOL_TYPES=('oxp_' 'oxi_') - for ZPOOL_TYPE in "${ZPOOL_TYPES[@]}"; do - readarray -t ZPOOLS < <( \ - grep "\"$ZPOOL_TYPE" "$OMICRON_TOP/smf/sled-agent/non-gimlet/config.toml" | \ + VDEV_TYPES=('m2_' 'u2_') + for VDEV_TYPE in "${VDEV_TYPES[@]}"; do + readarray -t VDEVS < <( \ + grep "\"$VDEV_TYPE" "$OMICRON_TOP/smf/sled-agent/non-gimlet/config.toml" | \ sed 's/[ ",]//g' \ ) - for ZPOOL in "${ZPOOLS[@]}"; do - echo "Zpool: [$ZPOOL]" - VDEV_PATH="${ZPOOL_VDEV_DIR:-$OMICRON_TOP}/$ZPOOL.vdev" + for VDEV in "${VDEVS[@]}"; do + echo "Device: [$VDEV]" + VDEV_PATH="${VDEV_DIR:-/var/tmp}/$VDEV" if ! [[ -f "$VDEV_PATH" ]]; then dd if=/dev/zero of="$VDEV_PATH" bs=1 count=0 seek=20G fi - success "ZFS vdev $VDEV_PATH exists" - if [[ -z "$(zpool list -o name | grep $ZPOOL)" ]]; then - zpool create -o ashift=12 -f "$ZPOOL" "$VDEV_PATH" - fi - success "ZFS zpool $ZPOOL exists" + success "vdev $VDEV_PATH exists" done done } @@ -53,7 +49,7 @@ function try_destroy_zpools { for ZPOOL_TYPE in "${ZPOOL_TYPES[@]}"; do readarray -t ZPOOLS < <(zfs list -d 0 -o name | grep "^$ZPOOL_TYPE") for ZPOOL in "${ZPOOLS[@]}"; do - VDEV_FILE="${ZPOOL_VDEV_DIR:-$OMICRON_TOP}/$ZPOOL.vdev" + VDEV_FILE="${VDEV_DIR:-/var/tmp}/$VDEV" zfs destroy -r "$ZPOOL" && \ (zfs unmount "$ZPOOL" || true) && \ zpool destroy "$ZPOOL" && \ From e76251015baad13793821d8eabbba6b7708d87af Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Thu, 28 Mar 2024 21:05:51 -0700 Subject: [PATCH 013/334] [nexus] Add Physical Disk Policy, State (#5335) - Adds physical disk state, policy as defined in RFD 457 - Does not expose any way to modify this information - In the future, these values will toggled by both sled expungement, and explicitly through a physical disk API Fixes https://github.com/oxidecomputer/omicron/issues/5153 --- nexus/db-model/src/lib.rs | 4 + nexus/db-model/src/physical_disk.rs | 11 +- nexus/db-model/src/physical_disk_policy.rs | 54 ++++++ nexus/db-model/src/physical_disk_state.rs | 54 ++++++ nexus/db-model/src/schema.rs | 2 + nexus/db-model/src/schema_versions.rs | 3 +- nexus/db-queries/src/db/datastore/mod.rs | 160 ++++++++++++++++-- .../src/db/datastore/physical_disk.rs | 42 +++++ nexus/db-queries/src/db/pool_connection.rs | 2 + .../src/db/queries/region_allocation.rs | 18 +- .../output/region_allocate_distinct_sleds.sql | 3 + .../output/region_allocate_random_sleds.sql | 3 + nexus/src/app/sled.rs | 10 ++ nexus/src/external_api/http_entrypoints.rs | 24 +++ nexus/test-utils/src/lib.rs | 1 + nexus/test-utils/src/resource_helpers.rs | 42 +++-- nexus/tests/integration_tests/disks.rs | 12 +- nexus/tests/integration_tests/endpoints.rs | 15 +- nexus/tests/integration_tests/unauthorized.rs | 12 +- .../integration_tests/volume_management.rs | 4 +- nexus/tests/output/nexus_tags.txt | 1 + nexus/types/src/external_api/params.rs | 1 + nexus/types/src/external_api/views.rs | 96 +++++++++++ openapi/nexus.json | 111 ++++++++++++ schema/crdb/dbinit.sql | 39 ++++- .../physical-disk-state-and-policy/up01.sql | 4 + .../physical-disk-state-and-policy/up02.sql | 4 + .../physical-disk-state-and-policy/up03.sql | 5 + .../physical-disk-state-and-policy/up04.sql | 3 + 29 files changed, 689 insertions(+), 51 deletions(-) create mode 100644 nexus/db-model/src/physical_disk_policy.rs create mode 100644 nexus/db-model/src/physical_disk_state.rs create mode 100644 schema/crdb/physical-disk-state-and-policy/up01.sql create mode 100644 schema/crdb/physical-disk-state-and-policy/up02.sql create mode 100644 schema/crdb/physical-disk-state-and-policy/up03.sql create mode 100644 schema/crdb/physical-disk-state-and-policy/up04.sql diff --git a/nexus/db-model/src/lib.rs b/nexus/db-model/src/lib.rs index 7124103b30..a2e9565d46 100644 --- a/nexus/db-model/src/lib.rs +++ b/nexus/db-model/src/lib.rs @@ -46,6 +46,8 @@ mod network_interface; mod oximeter_info; mod physical_disk; mod physical_disk_kind; +mod physical_disk_policy; +mod physical_disk_state; mod probe; mod producer_endpoint; mod project; @@ -150,6 +152,8 @@ pub use network_interface::*; pub use oximeter_info::*; pub use physical_disk::*; pub use physical_disk_kind::*; +pub use physical_disk_policy::*; +pub use physical_disk_state::*; pub use probe::*; pub use producer_endpoint::*; pub use project::*; diff --git a/nexus/db-model/src/physical_disk.rs b/nexus/db-model/src/physical_disk.rs index 3a011d0c72..c6ef97ee1f 100644 --- a/nexus/db-model/src/physical_disk.rs +++ b/nexus/db-model/src/physical_disk.rs @@ -2,7 +2,9 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -use super::{Generation, PhysicalDiskKind}; +use super::{ + Generation, PhysicalDiskKind, PhysicalDiskPolicy, PhysicalDiskState, +}; use crate::collection::DatastoreCollectionConfig; use crate::schema::{physical_disk, zpool}; use chrono::{DateTime, Utc}; @@ -25,9 +27,12 @@ pub struct PhysicalDisk { pub variant: PhysicalDiskKind, pub sled_id: Uuid, + pub disk_policy: PhysicalDiskPolicy, + pub disk_state: PhysicalDiskState, } impl PhysicalDisk { + /// Creates a new in-service, active disk pub fn new( id: Uuid, vendor: String, @@ -45,6 +50,8 @@ impl PhysicalDisk { model, variant, sled_id, + disk_policy: PhysicalDiskPolicy::InService, + disk_state: PhysicalDiskState::Active, } } @@ -61,6 +68,8 @@ impl From for views::PhysicalDisk { fn from(disk: PhysicalDisk) -> Self { Self { identity: disk.identity(), + policy: disk.disk_policy.into(), + state: disk.disk_state.into(), sled_id: Some(disk.sled_id), vendor: disk.vendor, serial: disk.serial, diff --git a/nexus/db-model/src/physical_disk_policy.rs b/nexus/db-model/src/physical_disk_policy.rs new file mode 100644 index 0000000000..85b6feccf2 --- /dev/null +++ b/nexus/db-model/src/physical_disk_policy.rs @@ -0,0 +1,54 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Database representation of a disks's operator-defined policy. +//! +//! This is related to, but different from `PhysicalDiskState`: a disk's **policy** is +//! its disposition as specified by the operator, while its **state** refers to +//! what's currently on it, as determined by Nexus. +//! +//! For example, a disk might be in the `Active` state, but have a policy of +//! `Expunged` -- this would mean that Nexus knows about resources currently +//! provisioned on the disk, but the operator has said that it should be marked +//! as gone. + +use super::impl_enum_type; +use nexus_types::external_api::views; +use serde::{Deserialize, Serialize}; + +impl_enum_type!( + #[derive(Clone, SqlType, Debug, QueryId)] + #[diesel(postgres_type(name = "physical_disk_policy", schema = "public"))] + pub struct PhysicalDiskPolicyEnum; + + #[derive(Clone, Copy, Debug, AsExpression, FromSqlRow, Serialize, Deserialize, PartialEq)] + #[diesel(sql_type = PhysicalDiskPolicyEnum)] + pub enum PhysicalDiskPolicy; + + // Enum values + InService => b"in_service" + Expunged => b"expunged" +); + +impl From for views::PhysicalDiskPolicy { + fn from(policy: PhysicalDiskPolicy) -> Self { + match policy { + PhysicalDiskPolicy::InService => { + views::PhysicalDiskPolicy::InService + } + PhysicalDiskPolicy::Expunged => views::PhysicalDiskPolicy::Expunged, + } + } +} + +impl From for PhysicalDiskPolicy { + fn from(policy: views::PhysicalDiskPolicy) -> Self { + match policy { + views::PhysicalDiskPolicy::InService => { + PhysicalDiskPolicy::InService + } + views::PhysicalDiskPolicy::Expunged => PhysicalDiskPolicy::Expunged, + } + } +} diff --git a/nexus/db-model/src/physical_disk_state.rs b/nexus/db-model/src/physical_disk_state.rs new file mode 100644 index 0000000000..0dcc8f139a --- /dev/null +++ b/nexus/db-model/src/physical_disk_state.rs @@ -0,0 +1,54 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Database representation of a physical disk's state as understood by Nexus. + +use super::impl_enum_type; +use nexus_types::external_api::views; +use serde::{Deserialize, Serialize}; +use std::fmt; +use strum::EnumIter; + +impl_enum_type!( + #[derive(Clone, SqlType, Debug, QueryId)] + #[diesel(postgres_type(name = "physical_disk_state", schema = "public"))] + pub struct PhysicalDiskStateEnum; + + #[derive(Clone, Copy, Debug, AsExpression, FromSqlRow, Serialize, Deserialize, PartialEq, Eq, EnumIter)] + #[diesel(sql_type = PhysicalDiskStateEnum)] + pub enum PhysicalDiskState; + + // Enum values + Active => b"active" + Decommissioned => b"decommissioned" +); + +impl fmt::Display for PhysicalDiskState { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + // Forward to the canonical implementation in nexus-types. + views::PhysicalDiskState::from(*self).fmt(f) + } +} + +impl From for views::PhysicalDiskState { + fn from(state: PhysicalDiskState) -> Self { + match state { + PhysicalDiskState::Active => views::PhysicalDiskState::Active, + PhysicalDiskState::Decommissioned => { + views::PhysicalDiskState::Decommissioned + } + } + } +} + +impl From for PhysicalDiskState { + fn from(state: views::PhysicalDiskState) -> Self { + match state { + views::PhysicalDiskState::Active => PhysicalDiskState::Active, + views::PhysicalDiskState::Decommissioned => { + PhysicalDiskState::Decommissioned + } + } + } +} diff --git a/nexus/db-model/src/schema.rs b/nexus/db-model/src/schema.rs index c533b426bd..a5b217d222 100644 --- a/nexus/db-model/src/schema.rs +++ b/nexus/db-model/src/schema.rs @@ -900,6 +900,8 @@ table! { model -> Text, variant -> crate::PhysicalDiskKindEnum, + disk_policy -> crate::PhysicalDiskPolicyEnum, + disk_state -> crate::PhysicalDiskStateEnum, sled_id -> Uuid, } } diff --git a/nexus/db-model/src/schema_versions.rs b/nexus/db-model/src/schema_versions.rs index 487d557c06..64ddc7c451 100644 --- a/nexus/db-model/src/schema_versions.rs +++ b/nexus/db-model/src/schema_versions.rs @@ -17,7 +17,7 @@ use std::collections::BTreeMap; /// /// This must be updated when you change the database schema. Refer to /// schema/crdb/README.adoc in the root of this repository for details. -pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(48, 0, 0); +pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(49, 0, 0); /// List of all past database schema versions, in *reverse* order /// @@ -29,6 +29,7 @@ static KNOWN_VERSIONS: Lazy> = Lazy::new(|| { // | leaving the first copy as an example for the next person. // v // KnownVersion::new(next_int, "unique-dirname-with-the-sql-files"), + KnownVersion::new(49, "physical-disk-state-and-policy"), KnownVersion::new(48, "add-metrics-producers-time-modified-index"), KnownVersion::new(47, "add-view-for-bgp-peer-configs"), KnownVersion::new(46, "first-named-migration"), diff --git a/nexus/db-queries/src/db/datastore/mod.rs b/nexus/db-queries/src/db/datastore/mod.rs index a6ae108376..af6cc73350 100644 --- a/nexus/db-queries/src/db/datastore/mod.rs +++ b/nexus/db-queries/src/db/datastore/mod.rs @@ -383,9 +383,9 @@ mod test { use crate::db::lookup::LookupPath; use crate::db::model::{ BlockSize, ConsoleSession, Dataset, DatasetKind, ExternalIp, - PhysicalDisk, PhysicalDiskKind, Project, Rack, Region, Service, - ServiceKind, SiloUser, SledBaseboard, SledSystemHardware, SledUpdate, - SshKey, VpcSubnet, Zpool, + PhysicalDisk, PhysicalDiskKind, PhysicalDiskPolicy, PhysicalDiskState, + Project, Rack, Region, Service, ServiceKind, SiloUser, SledBaseboard, + SledSystemHardware, SledUpdate, SshKey, VpcSubnet, Zpool, }; use crate::db::queries::vpc_subnet::FilterConflictingVpcSubnetRangesQuery; use chrono::{Duration, Utc}; @@ -630,19 +630,23 @@ mod test { } const TEST_VENDOR: &str = "test-vendor"; - const TEST_SERIAL: &str = "test-serial"; const TEST_MODEL: &str = "test-model"; + /// Creates a disk on a sled of a particular kind. + /// + /// The "serial" value of the disk is supplied by the + /// caller, and is arbitrary, but should be unique. async fn create_test_physical_disk( datastore: &DataStore, opctx: &OpContext, sled_id: Uuid, kind: PhysicalDiskKind, + serial: String, ) -> Uuid { let physical_disk = PhysicalDisk::new( Uuid::new_v4(), TEST_VENDOR.into(), - TEST_SERIAL.into(), + serial, TEST_MODEL.into(), kind, sled_id, @@ -832,18 +836,21 @@ mod test { .map(|sled_id| { let sled_id_iter: Vec = (0..9).map(|_| sled_id).collect(); - stream::iter(sled_id_iter).then(|sled_id| { - let disk_id_future = create_test_physical_disk( - &datastore, - opctx, - sled_id, - PhysicalDiskKind::U2, - ); - async move { - let disk_id = disk_id_future.await; - PhysicalDisk { sled_id, disk_id } - } - }) + stream::iter(sled_id_iter).enumerate().then( + |(i, sled_id)| { + let disk_id_future = create_test_physical_disk( + &datastore, + opctx, + sled_id, + PhysicalDiskKind::U2, + format!("{sled_id}, disk index {i}"), + ); + async move { + let disk_id = disk_id_future.await; + PhysicalDisk { sled_id, disk_id } + } + }, + ) }) .flatten() .collect() @@ -1229,6 +1236,7 @@ mod test { &opctx, sled_id, PhysicalDiskKind::U2, + "fake serial".to_string(), ) .await; @@ -1326,6 +1334,7 @@ mod test { &opctx, sled_id, PhysicalDiskKind::U2, + "fake serial".to_string(), ) .await; @@ -1393,6 +1402,123 @@ mod test { logctx.cleanup_successful(); } + #[tokio::test] + async fn test_region_allocation_only_considers_disks_in_service() { + let logctx = dev::test_setup_log( + "test_region_allocation_only_considers_disks_in_service", + ); + let mut db = test_setup_database(&logctx.log).await; + let (opctx, datastore) = datastore_test(&logctx, &db).await; + + // Create a sled... + let sled_id = create_test_sled(&datastore).await; + + // ... and create several disks on that sled, each with a zpool/dataset. + let mut physical_disk_ids = vec![]; + for i in 0..REGION_REDUNDANCY_THRESHOLD { + let physical_disk_id = create_test_physical_disk( + &datastore, + &opctx, + sled_id, + PhysicalDiskKind::U2, + format!("fake serial #{i}"), + ) + .await; + let zpool_id = create_test_zpool( + &datastore, + &opctx, + sled_id, + physical_disk_id, + ) + .await; + let bogus_addr = SocketAddrV6::new(Ipv6Addr::LOCALHOST, 8080, 0, 0); + let dataset = Dataset::new( + Uuid::new_v4(), + zpool_id, + bogus_addr, + DatasetKind::Crucible, + ); + datastore.dataset_upsert(dataset).await.unwrap(); + physical_disk_ids.push(physical_disk_id); + } + + // Check the following combinations of physical disk policy/state + // on region allocation. Since we only created + // REGION_REDUNDANCY_THRESHOLD disks/zpools/datasets, updating the + // state of a single disk should be sufficient to prevent the + // allocations from occurring. + use PhysicalDiskPolicy as Policy; + use PhysicalDiskState as State; + + // Just a bool with a fancier name -- determines whether or not + // we expect the policy/state combinations to pass or not. + enum AllocationShould { + Fail, + Succeed, + } + + let policy_state_combos = [ + (Policy::Expunged, State::Active, AllocationShould::Fail), + (Policy::Expunged, State::Decommissioned, AllocationShould::Fail), + (Policy::InService, State::Decommissioned, AllocationShould::Fail), + // Save this one for last, since it actually leaves an allocation + // lying around. + (Policy::InService, State::Active, AllocationShould::Succeed), + ]; + + let volume_id = Uuid::new_v4(); + let params = create_test_disk_create_params( + "disk", + ByteCount::from_mebibytes_u32(500), + ); + + for (policy, state, expected) in policy_state_combos { + // Update policy/state only on a single physical disk. + // + // The rest are assumed "in service" + "active". + datastore + .physical_disk_update_policy( + &opctx, + physical_disk_ids[0], + policy, + ) + .await + .unwrap(); + datastore + .physical_disk_update_state(&opctx, physical_disk_ids[0], state) + .await + .unwrap(); + + let result = datastore + .region_allocate( + &opctx, + volume_id, + ¶ms.disk_source, + params.size, + &RegionAllocationStrategy::Random { seed: Some(0) }, + ) + .await; + + match expected { + AllocationShould::Fail => { + let err = result.unwrap_err(); + let expected = "Not enough zpool space to allocate disks"; + assert!( + err.to_string().contains(expected), + "Saw error: \'{err}\', but expected \'{expected}\'" + ); + assert!(matches!(err, Error::InsufficientCapacity { .. })); + } + AllocationShould::Succeed => { + let _ = result.expect("Allocation should have succeeded"); + } + } + } + + let _ = db.cleanup().await; + logctx.cleanup_successful(); + } + #[tokio::test] async fn test_region_allocation_out_of_space_fails() { let logctx = diff --git a/nexus/db-queries/src/db/datastore/physical_disk.rs b/nexus/db-queries/src/db/datastore/physical_disk.rs index b977c4dffe..b97853dd06 100644 --- a/nexus/db-queries/src/db/datastore/physical_disk.rs +++ b/nexus/db-queries/src/db/datastore/physical_disk.rs @@ -13,6 +13,8 @@ use crate::db::collection_insert::DatastoreCollection; use crate::db::error::public_error_from_diesel; use crate::db::error::ErrorHandler; use crate::db::model::PhysicalDisk; +use crate::db::model::PhysicalDiskPolicy; +use crate::db::model::PhysicalDiskState; use crate::db::model::Sled; use crate::db::pagination::paginated; use async_bb8_diesel::AsyncRunQueryDsl; @@ -84,6 +86,46 @@ impl DataStore { Ok(disk_in_db) } + pub async fn physical_disk_update_policy( + &self, + opctx: &OpContext, + id: Uuid, + policy: PhysicalDiskPolicy, + ) -> Result<(), Error> { + opctx.authorize(authz::Action::Read, &authz::FLEET).await?; + use db::schema::physical_disk::dsl; + + diesel::update(dsl::physical_disk.filter(dsl::id.eq(id))) + .filter(dsl::time_deleted.is_null()) + .set(dsl::disk_policy.eq(policy)) + .execute_async(&*self.pool_connection_authorized(&opctx).await?) + .await + .map_err(|err| { + public_error_from_diesel(err, ErrorHandler::Server) + })?; + Ok(()) + } + + pub async fn physical_disk_update_state( + &self, + opctx: &OpContext, + id: Uuid, + state: PhysicalDiskState, + ) -> Result<(), Error> { + opctx.authorize(authz::Action::Read, &authz::FLEET).await?; + use db::schema::physical_disk::dsl; + + diesel::update(dsl::physical_disk.filter(dsl::id.eq(id))) + .filter(dsl::time_deleted.is_null()) + .set(dsl::disk_state.eq(state)) + .execute_async(&*self.pool_connection_authorized(&opctx).await?) + .await + .map_err(|err| { + public_error_from_diesel(err, ErrorHandler::Server) + })?; + Ok(()) + } + pub async fn physical_disk_list( &self, opctx: &OpContext, diff --git a/nexus/db-queries/src/db/pool_connection.rs b/nexus/db-queries/src/db/pool_connection.rs index 0331a3a103..bb455cbf2d 100644 --- a/nexus/db-queries/src/db/pool_connection.rs +++ b/nexus/db-queries/src/db/pool_connection.rs @@ -55,6 +55,8 @@ static CUSTOM_TYPE_KEYS: &'static [&'static str] = &[ "ip_pool_resource_type", "network_interface_kind", "physical_disk_kind", + "physical_disk_policy", + "physical_disk_state", "producer_kind", "provider_type", "root_of_trust_page_which", diff --git a/nexus/db-queries/src/db/queries/region_allocation.rs b/nexus/db-queries/src/db/queries/region_allocation.rs index 2e4f4cd776..971090ccaa 100644 --- a/nexus/db-queries/src/db/queries/region_allocation.rs +++ b/nexus/db-queries/src/db/queries/region_allocation.rs @@ -131,21 +131,21 @@ pub fn allocation_query( }; let builder = builder.sql(" old_zpool_usage.pool_id - FROM ( + FROM old_zpool_usage - INNER JOIN + INNER JOIN (zpool INNER JOIN sled ON (zpool.sled_id = sled.id)) ON (zpool.id = old_zpool_usage.pool_id) - ) + INNER JOIN + physical_disk ON (zpool.physical_disk_id = physical_disk.id) WHERE ( - ((old_zpool_usage.size_used + ").param().sql(" ) <= + (old_zpool_usage.size_used + ").param().sql(" ) <= (SELECT total_size FROM omicron.public.inv_zpool WHERE inv_zpool.id = old_zpool_usage.pool_id ORDER BY inv_zpool.time_collected DESC LIMIT 1) - ) - AND - (sled.sled_policy = 'in_service') - AND - (sled.sled_state = 'active') + AND sled.sled_policy = 'in_service' + AND sled.sled_state = 'active' + AND physical_disk.disk_policy = 'in_service' + AND physical_disk.disk_state = 'active' )" ).bind::(size_delta as i64); diff --git a/nexus/db-queries/tests/output/region_allocate_distinct_sleds.sql b/nexus/db-queries/tests/output/region_allocate_distinct_sleds.sql index 7aa85458a6..e84d47d2bb 100644 --- a/nexus/db-queries/tests/output/region_allocate_distinct_sleds.sql +++ b/nexus/db-queries/tests/output/region_allocate_distinct_sleds.sql @@ -34,6 +34,7 @@ WITH old_zpool_usage INNER JOIN (zpool INNER JOIN sled ON zpool.sled_id = sled.id) ON zpool.id = old_zpool_usage.pool_id + INNER JOIN physical_disk ON zpool.physical_disk_id = physical_disk.id WHERE (old_zpool_usage.size_used + $2) <= ( @@ -50,6 +51,8 @@ WITH ) AND sled.sled_policy = 'in_service' AND sled.sled_state = 'active' + AND physical_disk.disk_policy = 'in_service' + AND physical_disk.disk_state = 'active' ORDER BY zpool.sled_id, md5(CAST(zpool.id AS BYTES) || $3) ), diff --git a/nexus/db-queries/tests/output/region_allocate_random_sleds.sql b/nexus/db-queries/tests/output/region_allocate_random_sleds.sql index 0918c8f2d1..85e5dc85ef 100644 --- a/nexus/db-queries/tests/output/region_allocate_random_sleds.sql +++ b/nexus/db-queries/tests/output/region_allocate_random_sleds.sql @@ -34,6 +34,7 @@ WITH old_zpool_usage INNER JOIN (zpool INNER JOIN sled ON zpool.sled_id = sled.id) ON zpool.id = old_zpool_usage.pool_id + INNER JOIN physical_disk ON zpool.physical_disk_id = physical_disk.id WHERE (old_zpool_usage.size_used + $2) <= ( @@ -50,6 +51,8 @@ WITH ) AND sled.sled_policy = 'in_service' AND sled.sled_state = 'active' + AND physical_disk.disk_policy = 'in_service' + AND physical_disk.disk_state = 'active' ), candidate_datasets AS ( diff --git a/nexus/src/app/sled.rs b/nexus/src/app/sled.rs index 4bb4d6daef..de6eb84334 100644 --- a/nexus/src/app/sled.rs +++ b/nexus/src/app/sled.rs @@ -4,6 +4,7 @@ //! Sleds, and the hardware and services within them. +use crate::external_api::params; use crate::internal_api::params::{ PhysicalDiskPutRequest, SledAgentInfo, SledRole, ZpoolPutRequest, }; @@ -171,6 +172,15 @@ impl super::Nexus { // Physical disks + pub async fn physical_disk_lookup<'a>( + &'a self, + opctx: &'a OpContext, + disk_selector: ¶ms::PhysicalDiskPath, + ) -> Result, Error> { + Ok(lookup::LookupPath::new(&opctx, &self.db_datastore) + .physical_disk(disk_selector.disk_id)) + } + pub(crate) async fn sled_list_physical_disks( &self, opctx: &OpContext, diff --git a/nexus/src/external_api/http_entrypoints.rs b/nexus/src/external_api/http_entrypoints.rs index 551ef00817..6fa530b49d 100644 --- a/nexus/src/external_api/http_entrypoints.rs +++ b/nexus/src/external_api/http_entrypoints.rs @@ -235,6 +235,7 @@ pub(crate) fn external_api() -> NexusApiDescription { api.register(sled_instance_list)?; api.register(sled_physical_disk_list)?; api.register(physical_disk_list)?; + api.register(physical_disk_view)?; api.register(switch_list)?; api.register(switch_view)?; api.register(sled_list_uninitialized)?; @@ -5389,6 +5390,29 @@ async fn physical_disk_list( apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await } +/// Get a physical disk +#[endpoint { + method = GET, + path = "/v1/system/hardware/disks/{disk_id}", + tags = ["system/hardware"], +}] +async fn physical_disk_view( + rqctx: RequestContext>, + path_params: Path, +) -> Result, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let nexus = &apictx.nexus; + let path = path_params.into_inner(); + let opctx = crate::context::op_context_for_external_api(&rqctx).await?; + + let (.., physical_disk) = + nexus.physical_disk_lookup(&opctx, &path).await?.fetch().await?; + Ok(HttpResponseOk(physical_disk.into())) + }; + apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await +} + // Switches /// List switches diff --git a/nexus/test-utils/src/lib.rs b/nexus/test-utils/src/lib.rs index c124e3b58f..81814efda3 100644 --- a/nexus/test-utils/src/lib.rs +++ b/nexus/test-utils/src/lib.rs @@ -82,6 +82,7 @@ pub const SLED_AGENT_UUID: &str = "b6d65341-167c-41df-9b5c-41cded99c229"; pub const SLED_AGENT2_UUID: &str = "039be560-54cc-49e3-88df-1a29dadbf913"; pub const RACK_UUID: &str = "c19a698f-c6f9-4a17-ae30-20d711b8f7dc"; pub const SWITCH_UUID: &str = "dae4e1f1-410e-4314-bff1-fec0504be07e"; +pub const PHYSICAL_DISK_UUID: &str = "fbf4e1f1-410e-4314-bff1-fec0504be07e"; pub const OXIMETER_UUID: &str = "39e6175b-4df2-4730-b11d-cbc1e60a2e78"; pub const PRODUCER_UUID: &str = "a6458b7d-87c3-4483-be96-854d814c20de"; pub const RACK_SUBNET: &str = "fd00:1122:3344:0100::/56"; diff --git a/nexus/test-utils/src/resource_helpers.rs b/nexus/test-utils/src/resource_helpers.rs index b50a60eb8b..942ca63f58 100644 --- a/nexus/test-utils/src/resource_helpers.rs +++ b/nexus/test-utils/src/resource_helpers.rs @@ -708,8 +708,17 @@ pub struct DiskTest { impl DiskTest { pub const DEFAULT_ZPOOL_SIZE_GIB: u32 = 10; + pub const DEFAULT_ZPOOL_COUNT: u32 = 3; + + /// Creates a new "DiskTest", but does not actually add any zpools. + pub async fn empty( + cptestctx: &ControlPlaneTestContext, + ) -> Self { + let sled_agent = cptestctx.sled_agent.sled_agent.clone(); + + Self { sled_agent, zpools: vec![] } + } - // Creates fake physical storage, an organization, and a project. pub async fn new( cptestctx: &ControlPlaneTestContext, ) -> Self { @@ -718,10 +727,8 @@ impl DiskTest { let mut disk_test = Self { sled_agent, zpools: vec![] }; // Create three Zpools, each 10 GiB, each with one Crucible dataset. - for _ in 0..3 { - disk_test - .add_zpool_with_dataset(cptestctx, Self::DEFAULT_ZPOOL_SIZE_GIB) - .await; + for _ in 0..Self::DEFAULT_ZPOOL_COUNT { + disk_test.add_zpool_with_dataset(cptestctx).await; } disk_test @@ -730,21 +737,36 @@ impl DiskTest { pub async fn add_zpool_with_dataset( &mut self, cptestctx: &ControlPlaneTestContext, + ) { + self.add_zpool_with_dataset_ext( + cptestctx, + Uuid::new_v4(), + Uuid::new_v4(), + Uuid::new_v4(), + Self::DEFAULT_ZPOOL_SIZE_GIB, + ) + .await + } + + pub async fn add_zpool_with_dataset_ext( + &mut self, + cptestctx: &ControlPlaneTestContext, + physical_disk_id: Uuid, + zpool_id: Uuid, + dataset_id: Uuid, gibibytes: u32, ) { // To get a dataset, we actually need to create a new simulated physical // disk, zpool, and dataset, all contained within one another. let zpool = TestZpool { - id: Uuid::new_v4(), + id: zpool_id, size: ByteCount::from_gibibytes_u32(gibibytes), - datasets: vec![TestDataset { id: Uuid::new_v4() }], + datasets: vec![TestDataset { id: dataset_id }], }; - let physical_disk_id = Uuid::new_v4(); - let disk_identity = DiskIdentity { vendor: "test-vendor".into(), - serial: "test-serial".into(), + serial: format!("totally-unique-serial: {}", physical_disk_id), model: "test-model".into(), }; diff --git a/nexus/tests/integration_tests/disks.rs b/nexus/tests/integration_tests/disks.rs index 5464d7e589..7337d1b009 100644 --- a/nexus/tests/integration_tests/disks.rs +++ b/nexus/tests/integration_tests/disks.rs @@ -967,9 +967,9 @@ async fn test_disk_backed_by_multiple_region_sets( assert_eq!(10, DiskTest::DEFAULT_ZPOOL_SIZE_GIB); // Create another three zpools, all 10 gibibytes, each with one dataset - test.add_zpool_with_dataset(cptestctx, 10).await; - test.add_zpool_with_dataset(cptestctx, 10).await; - test.add_zpool_with_dataset(cptestctx, 10).await; + test.add_zpool_with_dataset(cptestctx).await; + test.add_zpool_with_dataset(cptestctx).await; + test.add_zpool_with_dataset(cptestctx).await; create_project_and_pool(client).await; @@ -1682,9 +1682,9 @@ async fn test_multiple_disks_multiple_zpools( // Assert default is still 10 GiB assert_eq!(10, DiskTest::DEFAULT_ZPOOL_SIZE_GIB); - test.add_zpool_with_dataset(cptestctx, 10).await; - test.add_zpool_with_dataset(cptestctx, 10).await; - test.add_zpool_with_dataset(cptestctx, 10).await; + test.add_zpool_with_dataset(cptestctx).await; + test.add_zpool_with_dataset(cptestctx).await; + test.add_zpool_with_dataset(cptestctx).await; create_project_and_pool(client).await; diff --git a/nexus/tests/integration_tests/endpoints.rs b/nexus/tests/integration_tests/endpoints.rs index b2b1e72c23..1003722723 100644 --- a/nexus/tests/integration_tests/endpoints.rs +++ b/nexus/tests/integration_tests/endpoints.rs @@ -15,6 +15,7 @@ use nexus_db_queries::authn; use nexus_db_queries::db::fixed_data::silo::DEFAULT_SILO; use nexus_db_queries::db::identity::Resource; use nexus_test_utils::resource_helpers::DiskTest; +use nexus_test_utils::PHYSICAL_DISK_UUID; use nexus_test_utils::RACK_UUID; use nexus_test_utils::SLED_AGENT_UUID; use nexus_test_utils::SWITCH_UUID; @@ -56,7 +57,9 @@ pub static DEMO_SLED_PROVISION_POLICY: Lazy = pub static HARDWARE_SWITCH_URL: Lazy = Lazy::new(|| format!("/v1/system/hardware/switches/{}", SWITCH_UUID)); -pub const HARDWARE_DISK_URL: &'static str = "/v1/system/hardware/disks"; +pub const HARDWARE_DISKS_URL: &'static str = "/v1/system/hardware/disks"; +pub static HARDWARE_DISK_URL: Lazy = + Lazy::new(|| format!("/v1/system/hardware/disks/{}", PHYSICAL_DISK_UUID)); pub static HARDWARE_SLED_DISK_URL: Lazy = Lazy::new(|| { format!("/v1/system/hardware/sleds/{}/disks", SLED_AGENT_UUID) }); @@ -1956,12 +1959,20 @@ pub static VERIFY_ENDPOINTS: Lazy> = Lazy::new(|| { }, VerifyEndpoint { - url: &HARDWARE_DISK_URL, + url: &HARDWARE_DISKS_URL, visibility: Visibility::Public, unprivileged_access: UnprivilegedAccess::None, allowed_methods: vec![AllowedMethod::Get], }, + VerifyEndpoint { + url: &HARDWARE_DISK_URL, + visibility: Visibility::Protected, + unprivileged_access: UnprivilegedAccess::None, + allowed_methods: vec![AllowedMethod::Get], + }, + + VerifyEndpoint { url: &HARDWARE_SLED_DISK_URL, visibility: Visibility::Public, diff --git a/nexus/tests/integration_tests/unauthorized.rs b/nexus/tests/integration_tests/unauthorized.rs index 3671564866..d9f5f38c1f 100644 --- a/nexus/tests/integration_tests/unauthorized.rs +++ b/nexus/tests/integration_tests/unauthorized.rs @@ -54,7 +54,17 @@ type ControlPlaneTestContext = // 403). #[nexus_test] async fn test_unauthorized(cptestctx: &ControlPlaneTestContext) { - DiskTest::new(cptestctx).await; + let mut disk_test = DiskTest::new(cptestctx).await; + disk_test + .add_zpool_with_dataset_ext( + cptestctx, + nexus_test_utils::PHYSICAL_DISK_UUID.parse().unwrap(), + uuid::Uuid::new_v4(), + uuid::Uuid::new_v4(), + DiskTest::DEFAULT_ZPOOL_SIZE_GIB, + ) + .await; + let client = &cptestctx.external_client; let log = &cptestctx.logctx.log; let mut setup_results = std::collections::BTreeMap::new(); diff --git a/nexus/tests/integration_tests/volume_management.rs b/nexus/tests/integration_tests/volume_management.rs index 289446fe85..daf78823ed 100644 --- a/nexus/tests/integration_tests/volume_management.rs +++ b/nexus/tests/integration_tests/volume_management.rs @@ -2052,9 +2052,7 @@ async fn test_keep_your_targets_straight(cptestctx: &ControlPlaneTestContext) { // Four zpools, one dataset each let mut disk_test = DiskTest::new(&cptestctx).await; - disk_test - .add_zpool_with_dataset(&cptestctx, DiskTest::DEFAULT_ZPOOL_SIZE_GIB) - .await; + disk_test.add_zpool_with_dataset(&cptestctx).await; // This bug occurs when region_snapshot records share a snapshot_addr, so // insert those here manually. diff --git a/nexus/tests/output/nexus_tags.txt b/nexus/tests/output/nexus_tags.txt index 64413f396e..91d2504a57 100644 --- a/nexus/tests/output/nexus_tags.txt +++ b/nexus/tests/output/nexus_tags.txt @@ -131,6 +131,7 @@ networking_switch_port_apply_settings POST /v1/system/hardware/switch-por networking_switch_port_clear_settings DELETE /v1/system/hardware/switch-port/{port}/settings networking_switch_port_list GET /v1/system/hardware/switch-port physical_disk_list GET /v1/system/hardware/disks +physical_disk_view GET /v1/system/hardware/disks/{disk_id} rack_list GET /v1/system/hardware/racks rack_view GET /v1/system/hardware/racks/{rack_id} sled_add POST /v1/system/hardware/sleds diff --git a/nexus/types/src/external_api/params.rs b/nexus/types/src/external_api/params.rs index 51d0869821..1ba373ff56 100644 --- a/nexus/types/src/external_api/params.rs +++ b/nexus/types/src/external_api/params.rs @@ -88,6 +88,7 @@ id_path_param!(GroupPath, group_id, "group"); // ID that can be used to deterministically generate the UUID. id_path_param!(SledPath, sled_id, "sled"); id_path_param!(SwitchPath, switch_id, "switch"); +id_path_param!(PhysicalDiskPath, disk_id, "physical disk"); // Internal API parameters id_path_param!(BlueprintPath, blueprint_id, "blueprint"); diff --git a/nexus/types/src/external_api/views.rs b/nexus/types/src/external_api/views.rs index fcea302f72..f8997d6ff9 100644 --- a/nexus/types/src/external_api/views.rs +++ b/nexus/types/src/external_api/views.rs @@ -742,6 +742,11 @@ pub struct PhysicalDisk { #[serde(flatten)] pub identity: AssetIdentityMetadata, + /// The operator-defined policy for a physical disk. + pub policy: PhysicalDiskPolicy, + /// The current state Nexus believes the disk to be in. + pub state: PhysicalDiskState, + /// The sled to which this disk is attached, if any. pub sled_id: Option, @@ -752,6 +757,97 @@ pub struct PhysicalDisk { pub form_factor: PhysicalDiskKind, } +/// The operator-defined policy of a physical disk. +#[derive( + Copy, Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq, Eq, +)] +#[serde(rename_all = "snake_case", tag = "kind")] +pub enum PhysicalDiskPolicy { + /// The operator has indicated that the disk is in-service. + InService, + + /// The operator has indicated that the disk has been permanently removed + /// from service. + /// + /// This is a terminal state: once a particular disk ID is expunged, it + /// will never return to service. (The actual hardware may be reused, but + /// it will be treated as a brand-new disk.) + /// + /// An expunged disk is always non-provisionable. + Expunged, + // NOTE: if you add a new value here, be sure to add it to + // the `IntoEnumIterator` impl below! +} + +// Can't automatically derive strum::EnumIter because that doesn't provide a +// way to iterate over nested enums. +impl IntoEnumIterator for PhysicalDiskPolicy { + type Iterator = std::array::IntoIter; + + fn iter() -> Self::Iterator { + [Self::InService, Self::Expunged].into_iter() + } +} + +impl PhysicalDiskPolicy { + /// Creates a new `PhysicalDiskPolicy` that is in-service. + pub fn in_service() -> Self { + Self::InService + } + + /// Returns true if the disk can be decommissioned in this state. + pub fn is_decommissionable(&self) -> bool { + // This should be kept in sync with decommissionable_states below. + match self { + Self::InService => false, + Self::Expunged => true, + } + } +} + +impl fmt::Display for PhysicalDiskPolicy { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + PhysicalDiskPolicy::InService => write!(f, "in service"), + PhysicalDiskPolicy::Expunged => write!(f, "expunged"), + } + } +} + +/// The current state of the disk, as determined by Nexus. +#[derive( + Copy, + Clone, + Debug, + Deserialize, + Serialize, + JsonSchema, + PartialEq, + Eq, + EnumIter, +)] +#[serde(rename_all = "snake_case")] +pub enum PhysicalDiskState { + /// The disk is currently active, and has resources allocated on it. + Active, + + /// The disk has been permanently removed from service. + /// + /// This is a terminal state: once a particular disk ID is decommissioned, + /// it will never return to service. (The actual hardware may be reused, + /// but it will be treated as a brand-new disk.) + Decommissioned, +} + +impl fmt::Display for PhysicalDiskState { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + PhysicalDiskState::Active => write!(f, "active"), + PhysicalDiskState::Decommissioned => write!(f, "decommissioned"), + } + } +} + // SILO USERS /// View of a User diff --git a/openapi/nexus.json b/openapi/nexus.json index 7d236de7a3..3cc991126d 100644 --- a/openapi/nexus.json +++ b/openapi/nexus.json @@ -4113,6 +4113,45 @@ } } }, + "/v1/system/hardware/disks/{disk_id}": { + "get": { + "tags": [ + "system/hardware" + ], + "summary": "Get a physical disk", + "operationId": "physical_disk_view", + "parameters": [ + { + "in": "path", + "name": "disk_id", + "description": "ID of the physical disk", + "required": true, + "schema": { + "type": "string", + "format": "uuid" + } + } + ], + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/PhysicalDisk" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, "/v1/system/hardware/racks": { "get": { "tags": [ @@ -14419,6 +14458,14 @@ "model": { "type": "string" }, + "policy": { + "description": "The operator-defined policy for a physical disk.", + "allOf": [ + { + "$ref": "#/components/schemas/PhysicalDiskPolicy" + } + ] + }, "serial": { "type": "string" }, @@ -14428,6 +14475,14 @@ "type": "string", "format": "uuid" }, + "state": { + "description": "The current state Nexus believes the disk to be in.", + "allOf": [ + { + "$ref": "#/components/schemas/PhysicalDiskState" + } + ] + }, "time_created": { "description": "timestamp when this resource was created", "type": "string", @@ -14446,7 +14501,9 @@ "form_factor", "id", "model", + "policy", "serial", + "state", "time_created", "time_modified", "vendor" @@ -14460,6 +14517,41 @@ "u2" ] }, + "PhysicalDiskPolicy": { + "description": "The operator-defined policy of a physical disk.", + "oneOf": [ + { + "description": "The operator has indicated that the disk is in-service.", + "type": "object", + "properties": { + "kind": { + "type": "string", + "enum": [ + "in_service" + ] + } + }, + "required": [ + "kind" + ] + }, + { + "description": "The operator has indicated that the disk has been permanently removed from service.\n\nThis is a terminal state: once a particular disk ID is expunged, it will never return to service. (The actual hardware may be reused, but it will be treated as a brand-new disk.)\n\nAn expunged disk is always non-provisionable.", + "type": "object", + "properties": { + "kind": { + "type": "string", + "enum": [ + "expunged" + ] + } + }, + "required": [ + "kind" + ] + } + ] + }, "PhysicalDiskResultsPage": { "description": "A single page of results", "type": "object", @@ -14481,6 +14573,25 @@ "items" ] }, + "PhysicalDiskState": { + "description": "The current state of the disk, as determined by Nexus.", + "oneOf": [ + { + "description": "The disk is currently active, and has resources allocated on it.", + "type": "string", + "enum": [ + "active" + ] + }, + { + "description": "The disk has been permanently removed from service.\n\nThis is a terminal state: once a particular disk ID is decommissioned, it will never return to service. (The actual hardware may be reused, but it will be treated as a brand-new disk.)", + "type": "string", + "enum": [ + "decommissioned" + ] + } + ] + }, "Ping": { "type": "object", "properties": { diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index 89546415e7..da3dbb3f4c 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -331,6 +331,40 @@ CREATE TYPE IF NOT EXISTS omicron.public.physical_disk_kind AS ENUM ( 'u2' ); +-- The disposition for a particular physical disk. +-- This is updated by the operator, either explicitly through an operator API, +-- or implicitly when altering sled policy. +CREATE TYPE IF NOT EXISTS omicron.public.physical_disk_policy AS ENUM ( + -- The disk is in service, and new resources can be provisioned onto it. + 'in_service', + -- The disk has been, or will be, removed from the rack, and it should be + -- assumed that any resources currently on it are now permanently missing. + 'expunged' +); + +-- The actual state of a physical disk. This is updated exclusively by Nexus. +-- +-- Nexus's goal is to match the physical disk's state with the +-- operator-indicated policy. For example, if the policy is "expunged" and the +-- state is "active", Nexus will assume that the physical disk is gone. Based +-- on that, Nexus will reallocate resources currently on the expunged disk +-- elsewhere, etc. Once the expunged disk no longer has any resources attached +-- to it, Nexus will mark it as decommissioned. +CREATE TYPE IF NOT EXISTS omicron.public.physical_disk_state AS ENUM ( + -- The disk has resources of any kind allocated on it, or, is available for + -- new resources. + -- + -- The disk can be in this state and have a different policy, e.g. + -- "expunged". + 'active', + + -- The disk no longer has resources allocated on it, now or in the future. + -- + -- This is a terminal state. This state is only valid if the policy is + -- 'expunged'. + 'decommissioned' +); + -- A physical disk which exists inside the rack. CREATE TABLE IF NOT EXISTS omicron.public.physical_disk ( id UUID PRIMARY KEY, @@ -348,6 +382,9 @@ CREATE TABLE IF NOT EXISTS omicron.public.physical_disk ( -- FK into the Sled table sled_id UUID NOT NULL, + disk_policy omicron.public.physical_disk_policy NOT NULL, + disk_state omicron.public.physical_disk_state NOT NULL, + -- This constraint should be upheld, even for deleted disks -- in the fleet. CONSTRAINT vendor_serial_model_unique UNIQUE ( @@ -3733,7 +3770,7 @@ INSERT INTO omicron.public.db_metadata ( version, target_version ) VALUES - ( TRUE, NOW(), NOW(), '48.0.0', NULL) + ( TRUE, NOW(), NOW(), '49.0.0', NULL) ON CONFLICT DO NOTHING; COMMIT; diff --git a/schema/crdb/physical-disk-state-and-policy/up01.sql b/schema/crdb/physical-disk-state-and-policy/up01.sql new file mode 100644 index 0000000000..5589f3a6ee --- /dev/null +++ b/schema/crdb/physical-disk-state-and-policy/up01.sql @@ -0,0 +1,4 @@ +CREATE TYPE IF NOT EXISTS omicron.public.physical_disk_policy AS ENUM ( + 'in_service', + 'expunged' +); diff --git a/schema/crdb/physical-disk-state-and-policy/up02.sql b/schema/crdb/physical-disk-state-and-policy/up02.sql new file mode 100644 index 0000000000..fbe5ba6e51 --- /dev/null +++ b/schema/crdb/physical-disk-state-and-policy/up02.sql @@ -0,0 +1,4 @@ +CREATE TYPE IF NOT EXISTS omicron.public.physical_disk_state AS ENUM ( + 'active', + 'decommissioned' +); diff --git a/schema/crdb/physical-disk-state-and-policy/up03.sql b/schema/crdb/physical-disk-state-and-policy/up03.sql new file mode 100644 index 0000000000..d3dcd714bd --- /dev/null +++ b/schema/crdb/physical-disk-state-and-policy/up03.sql @@ -0,0 +1,5 @@ +ALTER TABLE omicron.public.physical_disk + ADD COLUMN IF NOT EXISTS disk_policy omicron.public.physical_disk_policy + NOT NULL DEFAULT 'in_service', + ADD COLUMN IF NOT EXISTS disk_state omicron.public.physical_disk_state + NOT NULL DEFAULT 'active'; diff --git a/schema/crdb/physical-disk-state-and-policy/up04.sql b/schema/crdb/physical-disk-state-and-policy/up04.sql new file mode 100644 index 0000000000..a455c59dc3 --- /dev/null +++ b/schema/crdb/physical-disk-state-and-policy/up04.sql @@ -0,0 +1,3 @@ +ALTER TABLE omicron.public.physical_disk + ALTER COLUMN disk_policy DROP DEFAULT, + ALTER COLUMN disk_state DROP DEFAULT; From 7efe8389617deb91181c6435f8b3fbbee45e4535 Mon Sep 17 00:00:00 2001 From: David Pacheco Date: Fri, 29 Mar 2024 08:05:45 -0700 Subject: [PATCH 014/334] add way to edit and upload blueprints (#5345) --- Cargo.lock | 13 +- Cargo.toml | 1 + dev-tools/omdb/src/bin/omdb/db.rs | 156 +----------- dev-tools/omdb/src/bin/omdb/nexus.rs | 34 +++ dev-tools/omdb/tests/successes.out | 4 +- dev-tools/reconfigurator-cli/Cargo.toml | 10 +- dev-tools/reconfigurator-cli/src/main.rs | 236 ++++++++++++------ .../reconfigurator-cli/tests/config.test.toml | 1 + .../reconfigurator-cli/tests/test_basic.rs | 231 +++++++++++++++++ nexus/reconfigurator/preparation/Cargo.toml | 3 + nexus/reconfigurator/preparation/src/lib.rs | 161 ++++++++++++ nexus/src/app/deployment.rs | 9 + nexus/src/internal_api/http_entrypoints.rs | 23 ++ openapi/nexus-internal.json | 28 +++ 14 files changed, 684 insertions(+), 226 deletions(-) create mode 120000 dev-tools/reconfigurator-cli/tests/config.test.toml diff --git a/Cargo.lock b/Cargo.lock index d1df69b608..5fc65c2a10 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4840,8 +4840,11 @@ dependencies = [ name = "nexus-reconfigurator-preparation" version = "0.1.0" dependencies = [ + "anyhow", + "futures", "illumos-utils", "nexus-db-model", + "nexus-db-queries", "nexus-types", "omicron-common", "omicron-workspace-hack", @@ -7426,6 +7429,7 @@ name = "reconfigurator-cli" version = "0.1.0" dependencies = [ "anyhow", + "assert_matches", "camino", "camino-tempfile", "clap 4.5.1", @@ -7434,22 +7438,29 @@ dependencies = [ "expectorate", "humantime", "indexmap 2.2.5", + "nexus-client", + "nexus-db-queries", "nexus-reconfigurator-execution", "nexus-reconfigurator-planning", + "nexus-reconfigurator-preparation", + "nexus-test-utils", + "nexus-test-utils-macros", "nexus-types", "omicron-common", + "omicron-nexus", "omicron-rpaths", "omicron-test-utils", "omicron-workspace-hack", "pq-sys", "reedline", - "regex", + "serde", "serde_json", "slog", "slog-error-chain", "subprocess", "swrite", "tabled", + "tokio", "uuid 1.7.0", ] diff --git a/Cargo.toml b/Cargo.toml index a384c8bed6..2cfe265ca6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -299,6 +299,7 @@ num = { version = "0.4.1", default-features = false, features = [ "libm" ] } omicron-common = { path = "common" } omicron-gateway = { path = "gateway" } omicron-nexus = { path = "nexus" } +omicron-omdb = { path = "dev-tools/omdb" } omicron-package = { path = "package" } omicron-rpaths = { path = "rpaths" } omicron-sled-agent = { path = "sled-agent" } diff --git a/dev-tools/omdb/src/bin/omdb/db.rs b/dev-tools/omdb/src/bin/omdb/db.rs index e1e71ff3d1..30473fccd4 100644 --- a/dev-tools/omdb/src/bin/omdb/db.rs +++ b/dev-tools/omdb/src/bin/omdb/db.rs @@ -36,8 +36,6 @@ use diesel::JoinOnDsl; use diesel::NullableExpressionMethods; use diesel::OptionalExtension; use diesel::TextExpressionMethods; -use dropshot::PaginationOrder; -use futures::StreamExt; use gateway_client::types::SpType; use ipnetwork::IpNetwork; use nexus_config::PostgresConfigWithUrl; @@ -73,30 +71,23 @@ use nexus_db_queries::db; use nexus_db_queries::db::datastore::read_only_resources_associated_with_volume; use nexus_db_queries::db::datastore::CrucibleTargets; use nexus_db_queries::db::datastore::DataStoreConnection; -use nexus_db_queries::db::datastore::DataStoreDnsTest; -use nexus_db_queries::db::datastore::DataStoreInventoryTest; -use nexus_db_queries::db::datastore::Discoverability; use nexus_db_queries::db::datastore::InstanceAndActiveVmm; use nexus_db_queries::db::identity::Asset; use nexus_db_queries::db::lookup::LookupPath; use nexus_db_queries::db::model::ServiceKind; use nexus_db_queries::db::queries::ALLOW_FULL_TABLE_SCAN_SQL; use nexus_db_queries::db::DataStore; -use nexus_reconfigurator_preparation::policy_from_db; use nexus_types::deployment::Blueprint; use nexus_types::deployment::OmicronZoneType; -use nexus_types::deployment::UnstableReconfiguratorState; use nexus_types::identity::Resource; use nexus_types::internal_api::params::DnsRecord; use nexus_types::internal_api::params::Srv; use nexus_types::inventory::CabooseWhich; use nexus_types::inventory::Collection; use nexus_types::inventory::RotPageWhich; -use omicron_common::address::NEXUS_REDUNDANCY; use omicron_common::api::external::DataPageParams; use omicron_common::api::external::Generation; use omicron_common::api::external::InstanceState; -use omicron_common::api::external::LookupType; use omicron_common::api::external::MacAddr; use sled_agent_client::types::VolumeConstructionRequest; use std::borrow::Cow; @@ -522,7 +513,6 @@ impl DbArgs { cmd_db_reconfigurator_save( &opctx, &datastore, - &self.fetch_opts, reconfig_save_args, ) .await @@ -3297,151 +3287,15 @@ impl LongStringFormatter { async fn cmd_db_reconfigurator_save( opctx: &OpContext, datastore: &DataStore, - fetch_opts: &DbFetchOptions, reconfig_save_args: &ReconfiguratorSaveArgs, ) -> Result<(), anyhow::Error> { // See Nexus::blueprint_planning_context(). - eprint!("assembling policy ... "); - let sled_rows = datastore - .sled_list_all_batched(opctx) - .await - .context("listing sleds")?; - let zpool_rows = datastore - .zpool_list_all_external_batched(opctx) - .await - .context("listing zpools")?; - let ip_pool_range_rows = { - let (authz_service_ip_pool, _) = datastore - .ip_pools_service_lookup(opctx) - .await - .context("fetching IP services pool")?; - datastore - .ip_pool_list_ranges_batched(opctx, &authz_service_ip_pool) - .await - .context("listing services IP pool ranges")? - }; - - let policy = policy_from_db( - &sled_rows, - &zpool_rows, - &ip_pool_range_rows, - NEXUS_REDUNDANCY, + eprint!("assembling reconfigurator state ... "); + let state = nexus_reconfigurator_preparation::reconfigurator_state_load( + opctx, datastore, ) - .context("assembling policy")?; - eprintln!("done."); - - eprint!("loading inventory collections ... "); - let collection_ids = datastore - .inventory_collections() - .await - .context("listing collections")?; - let collections = futures::stream::iter(collection_ids) - .filter_map(|id| async move { - let read = datastore - .inventory_collection_read(opctx, id) - .await - .with_context(|| format!("reading collection {}", id)); - if let Err(error) = &read { - eprintln!("warning: {}", error); - } - read.ok() - }) - .collect::>() - .await; - eprintln!("done."); - - eprint!("loading blueprints ... "); - let limit = fetch_opts.fetch_limit; - let pagparams = DataPageParams { - marker: None, - direction: PaginationOrder::Ascending, - limit, - }; - let blueprint_ids = datastore - .blueprints_list(opctx, &pagparams) - .await - .context("listing blueprints")?; - check_limit(&blueprint_ids, limit, || "listing blueprint ids"); - let blueprints = futures::stream::iter(blueprint_ids) - .filter_map(|bpm| async move { - let blueprint_id = bpm.id; - let read = datastore - .blueprint_read( - opctx, - &nexus_db_queries::authz::Blueprint::new( - nexus_db_queries::authz::FLEET, - blueprint_id, - LookupType::ById(blueprint_id), - ), - ) - .await - .with_context(|| format!("reading blueprint {}", blueprint_id)); - if let Err(error) = &read { - eprintln!("warning: {}", error); - } - read.ok() - }) - .collect::>() - .await; - eprintln!("done."); - - // It's also useful to include information about any DNS generations - // mentioned in any blueprints. - let blueprints_list = &blueprints; - let fetch_dns_group = |dns_group: DnsGroup| async move { - let latest_version = datastore - .dns_group_latest_version(&opctx, dns_group) - .await - .with_context(|| { - format!("reading latest {:?} version", dns_group) - })?; - let dns_generations_needed: BTreeSet<_> = blueprints_list - .iter() - .map(|blueprint| match dns_group { - DnsGroup::Internal => blueprint.internal_dns_version, - DnsGroup::External => blueprint.external_dns_version, - }) - .chain(std::iter::once(*latest_version.version)) - .collect(); - let mut rv = BTreeMap::new(); - for gen in dns_generations_needed { - let config = datastore - .dns_config_read_version(&opctx, dns_group, gen) - .await - .with_context(|| { - format!("reading {:?} DNS version {}", dns_group, gen) - })?; - rv.insert(gen, config); - } - - Ok::, anyhow::Error>(rv) - }; - - let internal_dns = fetch_dns_group(DnsGroup::Internal).await?; - let external_dns = fetch_dns_group(DnsGroup::External).await?; - let silo_names = datastore - .silo_list_all_batched(&opctx, Discoverability::All) - .await - .context("listing all Silos")? - .into_iter() - .map(|s| s.name().clone()) - .collect(); - let external_dns_zone_names = datastore - .dns_zones_list_all(&opctx, DnsGroup::External) - .await - .context("listing external DNS zone names")? - .into_iter() - .map(|dns_zone| dns_zone.zone_name) - .collect(); - let state = UnstableReconfiguratorState { - policy, - collections, - blueprints, - internal_dns, - external_dns, - silo_names, - external_dns_zone_names, - }; + .await?; + eprintln!("done"); let output_path = &reconfig_save_args.output_file; let file = std::fs::OpenOptions::new() diff --git a/dev-tools/omdb/src/bin/omdb/nexus.rs b/dev-tools/omdb/src/bin/omdb/nexus.rs index d3d539cb2c..bdcfe0cdc4 100644 --- a/dev-tools/omdb/src/bin/omdb/nexus.rs +++ b/dev-tools/omdb/src/bin/omdb/nexus.rs @@ -9,6 +9,7 @@ use crate::db::DbUrlOptions; use crate::Omdb; use anyhow::bail; use anyhow::Context; +use camino::Utf8PathBuf; use chrono::DateTime; use chrono::SecondsFormat; use chrono::Utc; @@ -23,6 +24,7 @@ use nexus_client::types::LastResult; use nexus_client::types::SledSelector; use nexus_client::types::UninitializedSledId; use nexus_db_queries::db::lookup::LookupPath; +use nexus_types::deployment::Blueprint; use nexus_types::inventory::BaseboardId; use reedline::DefaultPrompt; use reedline::DefaultPromptSegment; @@ -94,6 +96,8 @@ enum BlueprintsCommands { GenerateFromCollection(CollectionIdArgs), /// Generate a new blueprint Regenerate, + /// Import a blueprint + Import(BlueprintImportArgs), } #[derive(Debug, Args)] @@ -156,6 +160,12 @@ enum BlueprintTargetSetEnabled { Inherit, } +#[derive(Debug, Args)] +struct BlueprintImportArgs { + /// path to a file containing a JSON-serialized blueprint + input: Utf8PathBuf, +} + #[derive(Debug, Args)] struct SledsArgs { #[command(subcommand)] @@ -301,6 +311,12 @@ impl NexusArgs { ) .await } + NexusCommands::Blueprints(BlueprintsArgs { + command: BlueprintsCommands::Import(args), + }) => { + let token = omdb.check_allow_destructive()?; + cmd_nexus_blueprints_import(&client, token, args).await + } NexusCommands::Sleds(SledsArgs { command: SledsCommands::ListUninitialized, @@ -1091,6 +1107,24 @@ async fn cmd_nexus_blueprints_regenerate( Ok(()) } +async fn cmd_nexus_blueprints_import( + client: &nexus_client::Client, + _destruction_token: DestructiveOperationToken, + args: &BlueprintImportArgs, +) -> Result<(), anyhow::Error> { + let input_path = &args.input; + let contents = std::fs::read_to_string(input_path) + .with_context(|| format!("open {:?}", input_path))?; + let blueprint: Blueprint = serde_json::from_str(&contents) + .with_context(|| format!("read {:?}", input_path))?; + client + .blueprint_import(&blueprint) + .await + .with_context(|| format!("upload {:?}", input_path))?; + eprintln!("uploaded new blueprint {}", blueprint.id); + Ok(()) +} + /// Runs `omdb nexus sleds list-uninitialized` async fn cmd_nexus_sleds_list_uninitialized( client: &nexus_client::Client, diff --git a/dev-tools/omdb/tests/successes.out b/dev-tools/omdb/tests/successes.out index 8876a293a5..6e25f7b3a3 100644 --- a/dev-tools/omdb/tests/successes.out +++ b/dev-tools/omdb/tests/successes.out @@ -66,9 +66,7 @@ stdout: stderr: note: using database URL postgresql://root@[::1]:REDACTED_PORT/omicron?sslmode=disable note: database schema version matches expected () -assembling policy ... done. -loading inventory collections ... done. -loading blueprints ... done. +assembling reconfigurator state ... done wrote ============================================= EXECUTING COMMAND: omdb ["db", "services", "list-instances"] diff --git a/dev-tools/reconfigurator-cli/Cargo.toml b/dev-tools/reconfigurator-cli/Cargo.toml index c8d32513c9..cae07ec9b6 100644 --- a/dev-tools/reconfigurator-cli/Cargo.toml +++ b/dev-tools/reconfigurator-cli/Cargo.toml @@ -9,6 +9,7 @@ omicron-rpaths.workspace = true [dependencies] anyhow.workspace = true +assert_matches.workspace = true camino.workspace = true clap.workspace = true dns-service-client.workspace = true @@ -33,9 +34,16 @@ omicron-workspace-hack.workspace = true [dev-dependencies] camino-tempfile.workspace = true expectorate.workspace = true +nexus-client.workspace = true +nexus-db-queries.workspace = true +nexus-reconfigurator-preparation.workspace = true +nexus-test-utils.workspace = true +nexus-test-utils-macros.workspace = true +omicron-nexus.workspace = true omicron-test-utils.workspace = true -regex.workspace = true +serde.workspace = true subprocess.workspace = true +tokio.workspace = true # Disable doc builds by default for our binaries to work around issue # rust-lang/cargo#8373. These docs would not be very useful anyway. diff --git a/dev-tools/reconfigurator-cli/src/main.rs b/dev-tools/reconfigurator-cli/src/main.rs index 358873db44..cef5c3c63f 100644 --- a/dev-tools/reconfigurator-cli/src/main.rs +++ b/dev-tools/reconfigurator-cli/src/main.rs @@ -15,6 +15,7 @@ use indexmap::IndexMap; use nexus_reconfigurator_execution::blueprint_external_dns_config; use nexus_reconfigurator_execution::blueprint_internal_dns_config; use nexus_reconfigurator_planning::blueprint_builder::BlueprintBuilder; +use nexus_reconfigurator_planning::blueprint_builder::EnsureMultiple; use nexus_reconfigurator_planning::planner::Planner; use nexus_reconfigurator_planning::system::{ SledBuilder, SledHwInventory, SystemDescription, @@ -68,6 +69,31 @@ struct ReconfiguratorSim { log: slog::Logger, } +impl ReconfiguratorSim { + fn blueprint_lookup(&self, id: Uuid) -> Result<&Blueprint, anyhow::Error> { + self.blueprints + .get(&id) + .ok_or_else(|| anyhow!("no such blueprint: {}", id)) + } + + fn blueprint_insert_new(&mut self, blueprint: Blueprint) { + let previous = self.blueprints.insert(blueprint.id, blueprint); + assert!(previous.is_none()); + } + + fn blueprint_insert_loaded( + &mut self, + blueprint: Blueprint, + ) -> Result<(), anyhow::Error> { + let entry = self.blueprints.entry(blueprint.id); + if let indexmap::map::Entry::Occupied(_) = &entry { + return Err(anyhow!("blueprint already exists: {}", blueprint.id)); + } + let _ = entry.or_insert(blueprint); + Ok(()) + } +} + /// interactive REPL for exploring the planner #[derive(Parser, Debug)] struct CmdReconfiguratorSim { @@ -200,6 +226,7 @@ fn process_entry(sim: &mut ReconfiguratorSim, entry: String) -> LoopResult { Commands::BlueprintFromInventory(args) => { cmd_blueprint_from_inventory(sim, args) } + Commands::BlueprintEdit(args) => cmd_blueprint_edit(sim, args), Commands::BlueprintPlan(args) => cmd_blueprint_plan(sim, args), Commands::BlueprintShow(args) => cmd_blueprint_show(sim, args), Commands::BlueprintDiff(args) => cmd_blueprint_diff(sim, args), @@ -207,6 +234,7 @@ fn process_entry(sim: &mut ReconfiguratorSim, entry: String) -> LoopResult { Commands::BlueprintDiffInventory(args) => { cmd_blueprint_diff_inventory(sim, args) } + Commands::BlueprintSave(args) => cmd_blueprint_save(sim, args), Commands::Show => cmd_show(sim), Commands::Set(args) => cmd_set(sim, args), Commands::Load(args) => cmd_load(sim, args), @@ -259,6 +287,8 @@ enum Commands { BlueprintFromInventory(InventoryArgs), /// run planner to generate a new blueprint BlueprintPlan(BlueprintPlanArgs), + /// edit contents of a blueprint directly + BlueprintEdit(BlueprintEditArgs), /// show details about a blueprint BlueprintShow(BlueprintArgs), /// show differences between two blueprints @@ -267,6 +297,8 @@ enum Commands { BlueprintDiffDns(BlueprintDiffDnsArgs), /// show differences between a blueprint and an inventory collection BlueprintDiffInventory(BlueprintDiffInventoryArgs), + /// write one blueprint to a file + BlueprintSave(BlueprintSaveArgs), /// show system properties Show, @@ -314,6 +346,29 @@ struct BlueprintPlanArgs { collection_id: Uuid, } +#[derive(Debug, Args)] +struct BlueprintEditArgs { + /// id of the blueprint to edit + blueprint_id: Uuid, + /// "creator" field for the new blueprint + #[arg(long)] + creator: Option, + /// "comment" field for the new blueprint + #[arg(long)] + comment: Option, + #[command(subcommand)] + edit_command: BlueprintEditCommands, +} + +#[derive(Debug, Subcommand)] +enum BlueprintEditCommands { + /// add a Nexus instance to a particular sled + AddNexus { + /// sled on which to deploy the new instance + sled_id: Uuid, + }, +} + #[derive(Debug, Args)] struct BlueprintArgs { /// id of the blueprint @@ -344,6 +399,14 @@ struct BlueprintDiffInventoryArgs { blueprint_id: Uuid, } +#[derive(Debug, Args)] +struct BlueprintSaveArgs { + /// id of the blueprint + blueprint_id: Uuid, + /// output file + filename: Utf8PathBuf, +} + #[derive(Debug, Args)] struct BlueprintDiffArgs { /// id of the first blueprint @@ -577,7 +640,7 @@ fn cmd_blueprint_from_inventory( "generated blueprint {} from inventory collection {}", blueprint.id, collection_id ); - sim.blueprints.insert(blueprint.id, blueprint); + sim.blueprint_insert_new(blueprint); Ok(Some(rv)) } @@ -587,10 +650,7 @@ fn cmd_blueprint_plan( ) -> anyhow::Result> { let parent_blueprint_id = args.parent_blueprint_id; let collection_id = args.collection_id; - let parent_blueprint = sim - .blueprints - .get(&parent_blueprint_id) - .ok_or_else(|| anyhow!("no such blueprint: {}", parent_blueprint_id))?; + let parent_blueprint = sim.blueprint_lookup(parent_blueprint_id)?; let collection = sim .collections .get(&collection_id) @@ -638,7 +698,49 @@ fn cmd_blueprint_plan( "generated blueprint {} based on parent blueprint {}", blueprint.id, parent_blueprint_id, ); - sim.blueprints.insert(blueprint.id, blueprint); + sim.blueprint_insert_new(blueprint); + Ok(Some(rv)) +} + +fn cmd_blueprint_edit( + sim: &mut ReconfiguratorSim, + args: BlueprintEditArgs, +) -> anyhow::Result> { + let blueprint_id = args.blueprint_id; + let blueprint = sim.blueprint_lookup(blueprint_id)?; + let creator = args.creator.as_deref().unwrap_or("reconfigurator-cli"); + let policy = sim.system.to_policy().context("assembling policy")?; + let mut builder = BlueprintBuilder::new_based_on( + &sim.log, + &blueprint, + blueprint.internal_dns_version, + blueprint.external_dns_version, + &policy, + creator, + ) + .context("creating blueprint builder")?; + + if let Some(comment) = args.comment { + builder.comment(comment); + } + + let label = match args.edit_command { + BlueprintEditCommands::AddNexus { sled_id } => { + let current = builder.sled_num_nexus_zones(sled_id); + let added = builder + .sled_ensure_zone_multiple_nexus(sled_id, current + 1) + .context("failed to add Nexus zone")?; + assert_matches::assert_matches!(added, EnsureMultiple::Added(1)); + format!("added Nexus zone to sled {}", sled_id) + } + }; + + let new_blueprint = builder.build(); + let rv = format!( + "blueprint {} created from blueprint {}: {}", + new_blueprint.id, blueprint_id, label + ); + sim.blueprint_insert_new(new_blueprint); Ok(Some(rv)) } @@ -646,10 +748,7 @@ fn cmd_blueprint_show( sim: &mut ReconfiguratorSim, args: BlueprintArgs, ) -> anyhow::Result> { - let blueprint = sim - .blueprints - .get(&args.blueprint_id) - .ok_or_else(|| anyhow!("no such blueprint: {}", args.blueprint_id))?; + let blueprint = sim.blueprint_lookup(args.blueprint_id)?; Ok(Some(format!("{}", blueprint.display()))) } @@ -660,14 +759,8 @@ fn cmd_blueprint_diff( let mut rv = String::new(); let blueprint1_id = args.blueprint1_id; let blueprint2_id = args.blueprint2_id; - let blueprint1 = sim - .blueprints - .get(&blueprint1_id) - .ok_or_else(|| anyhow!("no such blueprint: {}", blueprint1_id))?; - let blueprint2 = sim - .blueprints - .get(&blueprint2_id) - .ok_or_else(|| anyhow!("no such blueprint: {}", blueprint2_id))?; + let blueprint1 = sim.blueprint_lookup(blueprint1_id)?; + let blueprint2 = sim.blueprint_lookup(blueprint2_id)?; let sled_diff = blueprint2 .diff_since_blueprint(&blueprint1) @@ -742,10 +835,7 @@ fn cmd_blueprint_diff_dns( let dns_group = args.dns_group; let dns_version = Generation::from(args.dns_version); let blueprint_id = args.blueprint_id; - let blueprint = sim - .blueprints - .get(&blueprint_id) - .ok_or_else(|| anyhow!("no such blueprint: {}", blueprint_id))?; + let blueprint = sim.blueprint_lookup(blueprint_id)?; let existing_dns_config = match dns_group { CliDnsGroup::Internal => sim.internal_dns.get(&dns_version), @@ -792,17 +882,28 @@ fn cmd_blueprint_diff_inventory( let collection = sim.collections.get(&collection_id).ok_or_else(|| { anyhow!("no such inventory collection: {}", collection_id) })?; - let blueprint = sim - .blueprints - .get(&blueprint_id) - .ok_or_else(|| anyhow!("no such blueprint: {}", blueprint_id))?; - + let blueprint = sim.blueprint_lookup(blueprint_id)?; let diff = blueprint .diff_since_collection(&collection) .context("failed to diff blueprint from inventory collection")?; Ok(Some(diff.display().to_string())) } +fn cmd_blueprint_save( + sim: &mut ReconfiguratorSim, + args: BlueprintSaveArgs, +) -> anyhow::Result> { + let blueprint_id = args.blueprint_id; + let blueprint = sim.blueprint_lookup(blueprint_id)?; + + let output_path = &args.filename; + let output_str = serde_json::to_string_pretty(&blueprint) + .context("serializing blueprint")?; + std::fs::write(&output_path, &output_str) + .with_context(|| format!("write {:?}", output_path))?; + Ok(Some(format!("saved blueprint {} to {:?}", blueprint_id, output_path))) +} + fn cmd_save( sim: &mut ReconfiguratorSim, args: SaveArgs, @@ -819,14 +920,10 @@ fn cmd_save( }; let output_path = &args.filename; - let outfile = std::fs::OpenOptions::new() - .create_new(true) - .write(true) - .open(output_path) - .with_context(|| format!("open {:?}", output_path))?; - serde_json::to_writer_pretty(&outfile, &saved) - .with_context(|| format!("writing to {:?}", output_path)) - .unwrap_or_else(|e| panic!("{:#}", e)); + let output_str = + serde_json::to_string_pretty(&saved).context("serializing state")?; + std::fs::write(&output_path, &output_str) + .with_context(|| format!("write {:?}", output_path))?; Ok(Some(format!( "saved policy, collections, and blueprints to {:?}", output_path @@ -907,7 +1004,8 @@ fn read_file( ) -> anyhow::Result { let file = std::fs::File::open(input_path) .with_context(|| format!("open {:?}", input_path))?; - serde_json::from_reader(file) + let bufread = std::io::BufReader::new(file); + serde_json::from_reader(bufread) .with_context(|| format!("read {:?}", input_path)) } @@ -983,30 +1081,21 @@ fn cmd_load( continue; }; - let inventory_sp = match &inventory_sled_agent.baseboard_id { - Some(baseboard_id) => { - let inv_sp = primary_collection - .sps - .get(baseboard_id) - .ok_or_else(|| { - anyhow!( - "error: load sled {}: missing SP inventory", - sled_id - ) - })?; - let inv_rot = primary_collection - .rots - .get(baseboard_id) - .ok_or_else(|| { - anyhow!( - "error: load sled {}: missing RoT inventory", - sled_id - ) - })?; - Some(SledHwInventory { baseboard_id, sp: inv_sp, rot: inv_rot }) - } - None => None, - }; + let inventory_sp = inventory_sled_agent.baseboard_id.as_ref().and_then( + |baseboard_id| { + let inv_sp = primary_collection.sps.get(baseboard_id); + let inv_rot = primary_collection.rots.get(baseboard_id); + if let (Some(inv_sp), Some(inv_rot)) = (inv_sp, inv_rot) { + Some(SledHwInventory { + baseboard_id: &baseboard_id, + sp: inv_sp, + rot: inv_rot, + }) + } else { + None + } + }, + ); let result = sim.system.sled_full( sled_id, @@ -1038,19 +1127,26 @@ fn cmd_load( } for blueprint in loaded.blueprints { - if sim.blueprints.contains_key(&blueprint.id) { - swriteln!( - s, - "blueprint {}: skipped (one with the \ - same id is already loaded)", - blueprint.id - ); - } else { - swriteln!(s, "blueprint {} loaded", blueprint.id); - sim.blueprints.insert(blueprint.id, blueprint); + let blueprint_id = blueprint.id; + match sim.blueprint_insert_loaded(blueprint) { + Ok(_) => { + swriteln!(s, "blueprint {} loaded", blueprint_id); + } + Err(error) => { + swriteln!( + s, + "blueprint {}: skipped ({:#})", + blueprint_id, + error + ); + } } } + let ranges = format!("{:?}", loaded.policy.service_ip_pool_ranges); + sim.system.service_ip_pool_ranges(loaded.policy.service_ip_pool_ranges); + swriteln!(s, "loaded service IP pool ranges: {:?}", ranges); + sim.internal_dns = loaded.internal_dns; sim.external_dns = loaded.external_dns; sim.silo_names = loaded.silo_names; diff --git a/dev-tools/reconfigurator-cli/tests/config.test.toml b/dev-tools/reconfigurator-cli/tests/config.test.toml new file mode 120000 index 0000000000..6050ca47dd --- /dev/null +++ b/dev-tools/reconfigurator-cli/tests/config.test.toml @@ -0,0 +1 @@ +../../../nexus/tests/config.test.toml \ No newline at end of file diff --git a/dev-tools/reconfigurator-cli/tests/test_basic.rs b/dev-tools/reconfigurator-cli/tests/test_basic.rs index 6048aece1b..19522bace6 100644 --- a/dev-tools/reconfigurator-cli/tests/test_basic.rs +++ b/dev-tools/reconfigurator-cli/tests/test_basic.rs @@ -2,14 +2,34 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. +use anyhow::Context; +use camino::Utf8Path; use expectorate::assert_contents; +use nexus_db_queries::authn; +use nexus_db_queries::authz; +use nexus_db_queries::context::OpContext; +use nexus_test_utils::SLED_AGENT_UUID; +use nexus_test_utils_macros::nexus_test; +use nexus_types::deployment::Blueprint; +use nexus_types::deployment::UnstableReconfiguratorState; +use omicron_common::api::external::Error; +use omicron_test_utils::dev::poll::wait_for_condition; +use omicron_test_utils::dev::poll::CondCheckError; use omicron_test_utils::dev::test_cmds::assert_exit_code; use omicron_test_utils::dev::test_cmds::path_to_executable; use omicron_test_utils::dev::test_cmds::redact_variable; use omicron_test_utils::dev::test_cmds::run_command; use omicron_test_utils::dev::test_cmds::EXIT_SUCCESS; +use slog::debug; +use std::io::BufReader; +use std::io::BufWriter; use std::path::PathBuf; +use std::sync::Arc; +use std::time::Duration; use subprocess::Exec; +use swrite::swriteln; +use swrite::SWrite; +use uuid::Uuid; fn path_to_cli() -> PathBuf { path_to_executable(env!("CARGO_BIN_EXE_reconfigurator-cli")) @@ -25,3 +45,214 @@ fn test_basic() { assert_contents("tests/output/cmd-stdout", &stdout_text); assert_contents("tests/output/cmd-stderr", &stderr_text); } + +type ControlPlaneTestContext = + nexus_test_utils::ControlPlaneTestContext; + +// Tests a round trip of blueprint editing: start with the blueprint that's +// present in a running system, fetch it with the rest of the reconfigurator +// state, load it into reconfigurator-cli, edit it, save that to a file, then +// import it back. +#[nexus_test] +async fn test_blueprint_edit(cptestctx: &ControlPlaneTestContext) { + // Setup + let nexus = &cptestctx.server.apictx().nexus; + let datastore = nexus.datastore(); + let log = &cptestctx.logctx.log; + let opctx = OpContext::for_background( + log.clone(), + Arc::new(authz::Authz::new(log)), + authn::Context::internal_api(), + datastore.clone(), + ); + let tmpdir = camino_tempfile::tempdir().expect("failed to create tmpdir"); + // Save the path and prevent the temporary directory from being cleaned up + // automatically. We want to be preserve the contents if this test fails. + let tmpdir_path = tmpdir.into_path(); + let saved_state1_path = tmpdir_path.join("reconfigurator-state1.json"); + let saved_state2_path = tmpdir_path.join("reconfigurator-state2.json"); + let script1_path = tmpdir_path.join("cmds1"); + let script2_path = tmpdir_path.join("cmds2"); + let new_blueprint_path = tmpdir_path.join("new_blueprint.json"); + + println!("temporary directory: {}", tmpdir_path); + + // Wait until Nexus has successfully completed an inventory collection. + // We don't need it directly but we want it to be present in the saved + // reconfigurator state. + let collection = wait_for_condition( + || async { + let result = + datastore.inventory_get_latest_collection(&opctx).await; + let log_result = match &result { + Ok(Some(_)) => Ok("found"), + Ok(None) => Ok("not found"), + Err(error) => Err(error), + }; + debug!( + log, + "attempt to fetch latest inventory collection"; + "result" => ?log_result, + ); + + match result { + Ok(None) => Err(CondCheckError::NotYet), + Ok(Some(c)) => Ok(c), + Err(Error::ServiceUnavailable { .. }) => { + Err(CondCheckError::NotYet) + } + Err(error) => Err(CondCheckError::Failed(error)), + } + }, + &Duration::from_millis(50), + &Duration::from_secs(30), + ) + .await + .expect("took too long to find first inventory collection"); + + // Assemble state that we can load into reconfigurator-cli. + let state1 = nexus_reconfigurator_preparation::reconfigurator_state_load( + &opctx, datastore, + ) + .await + .expect("failed to assemble reconfigurator state"); + + // Smoke check the initial state. + let sled_id: Uuid = SLED_AGENT_UUID.parse().unwrap(); + assert!(state1.policy.sleds.contains_key(&sled_id)); + assert!(!state1.policy.service_ip_pool_ranges.is_empty()); + assert!(!state1.silo_names.is_empty()); + assert!(!state1.external_dns_zone_names.is_empty()); + // We waited for the first inventory collection already. + assert!(state1.collections.iter().any(|c| c.id == collection.id)); + assert!(!state1.collections.is_empty()); + // Test suite setup establishes the initial blueprint. + assert!(!state1.blueprints.is_empty()); + // Setup requires that internal and external DNS be configured so we should + // have at least the current DNS generations here. + assert!(!state1.internal_dns.is_empty()); + assert!(!state1.external_dns.is_empty()); + + // unwrap: we checked above that this list was non-empty. + let blueprint = state1.blueprints.iter().next().unwrap(); + + // Write a reconfigurator-cli script to load the file, edit the + // blueprint, and save the entire state to a new file. + let mut s = String::new(); + swriteln!(s, "load {} {}", saved_state1_path, collection.id); + swriteln!(s, "blueprint-edit {} add-nexus {}", blueprint.id, sled_id); + swriteln!(s, "save {}", saved_state2_path); + std::fs::write(&script1_path, &s) + .with_context(|| format!("write {}", &script1_path)) + .unwrap(); + + // Run this reconfigurator-cli invocation. + write_json(&saved_state1_path, &state1).unwrap(); + let exec = Exec::cmd(path_to_cli()).arg(&script1_path); + let (exit_status, _, stderr_text) = run_command(exec); + assert_exit_code(exit_status, EXIT_SUCCESS, &stderr_text); + + // Load the new file and find the new blueprint name. + let state2: UnstableReconfiguratorState = + read_json(&saved_state2_path).unwrap(); + assert_eq!(state2.blueprints.len(), state1.blueprints.len() + 1); + let new_blueprint = state2.blueprints.into_iter().rev().next().unwrap(); + assert_ne!(new_blueprint.id, blueprint.id); + + // While we're at it, smoke check the new blueprint. + assert_eq!(new_blueprint.parent_blueprint_id, Some(blueprint.id)); + assert_eq!(new_blueprint.creator, "reconfigurator-cli"); + + // Now run reconfigurator-cli again just to save the new blueprint. This is + // a little unfortunate but it's hard to avoid if we want to test that + // blueprint-save works. + let mut s = String::new(); + swriteln!(s, "load {} {}", saved_state2_path, collection.id); + swriteln!(s, "blueprint-save {} {}", new_blueprint.id, new_blueprint_path); + std::fs::write(&script2_path, &s) + .with_context(|| format!("write {}", &script2_path)) + .unwrap(); + let exec = Exec::cmd(path_to_cli()).arg(&script2_path); + let (exit_status, _, stderr_text) = run_command(exec); + assert_exit_code(exit_status, EXIT_SUCCESS, &stderr_text); + + // Load the blueprint we just wrote. + let new_blueprint2: Blueprint = read_json(&new_blueprint_path).unwrap(); + assert_eq!(new_blueprint, new_blueprint2); + + // Import the new blueprint. + let nexus_internal_url = + format!("http://{}/", cptestctx.internal_client.bind_address); + let nexus_client = + nexus_client::Client::new(&nexus_internal_url, log.clone()); + nexus_client + .blueprint_import(&new_blueprint) + .await + .expect("failed to import new blueprint"); + + let found_blueprint = nexus_client + .blueprint_view(&new_blueprint.id) + .await + .expect("failed to find imported blueprint in Nexus") + .into_inner(); + assert_eq!(found_blueprint, new_blueprint2); + + // Set the blueprint as the (disabled) target. + nexus_client + .blueprint_target_set(&nexus_client::types::BlueprintTargetSet { + target_id: new_blueprint.id, + enabled: false, + }) + .await + .context("setting target blueprint") + .unwrap(); + + // Read that back. + let target = nexus_client + .blueprint_target_view() + .await + .context("fetching target blueprint") + .unwrap(); + assert_eq!(target.target_id, new_blueprint.id); + + // Now clean up the temporary directory. + for path in [ + saved_state1_path, + saved_state2_path, + script1_path, + script2_path, + new_blueprint_path, + ] { + std::fs::remove_file(&path) + .with_context(|| format!("remove {}", path)) + .unwrap(); + } + + std::fs::remove_dir(&tmpdir_path) + .with_context(|| format!("remove {}", tmpdir_path)) + .unwrap(); +} + +fn read_json serde::Deserialize<'a>>( + path: &Utf8Path, +) -> Result { + let file = std::fs::File::open(path) + .with_context(|| format!("open {:?}", path))?; + let bufread = BufReader::new(file); + serde_json::from_reader(bufread).with_context(|| format!("read {:?}", path)) +} + +fn write_json( + path: &Utf8Path, + obj: &T, +) -> Result<(), anyhow::Error> { + let file = std::fs::OpenOptions::new() + .write(true) + .create(true) + .open(path) + .with_context(|| format!("open {:?}", path))?; + let bufwrite = BufWriter::new(file); + serde_json::to_writer_pretty(bufwrite, obj) + .with_context(|| format!("write {:?}", path))?; + Ok(()) +} diff --git a/nexus/reconfigurator/preparation/Cargo.toml b/nexus/reconfigurator/preparation/Cargo.toml index f95f9c4afe..44538ecb03 100644 --- a/nexus/reconfigurator/preparation/Cargo.toml +++ b/nexus/reconfigurator/preparation/Cargo.toml @@ -4,8 +4,11 @@ version = "0.1.0" edition = "2021" [dependencies] +anyhow.workspace = true +futures.workspace = true illumos-utils.workspace = true nexus-db-model.workspace = true +nexus-db-queries.workspace = true nexus-types.workspace = true omicron-common.workspace = true diff --git a/nexus/reconfigurator/preparation/src/lib.rs b/nexus/reconfigurator/preparation/src/lib.rs index 77d4532023..46f71e5834 100644 --- a/nexus/reconfigurator/preparation/src/lib.rs +++ b/nexus/reconfigurator/preparation/src/lib.rs @@ -4,14 +4,31 @@ //! Common facilities for assembling inputs to the planner +use anyhow::Context; +use futures::StreamExt; +use nexus_db_model::DnsGroup; +use nexus_db_queries::context::OpContext; +use nexus_db_queries::db::datastore::DataStoreDnsTest; +use nexus_db_queries::db::datastore::DataStoreInventoryTest; +use nexus_db_queries::db::datastore::Discoverability; +use nexus_db_queries::db::datastore::SQL_BATCH_SIZE; +use nexus_db_queries::db::pagination::Paginator; +use nexus_db_queries::db::DataStore; +use nexus_types::deployment::Blueprint; +use nexus_types::deployment::BlueprintMetadata; use nexus_types::deployment::Policy; use nexus_types::deployment::SledResources; +use nexus_types::deployment::UnstableReconfiguratorState; use nexus_types::deployment::ZpoolName; use nexus_types::identity::Asset; +use nexus_types::identity::Resource; +use nexus_types::inventory::Collection; use omicron_common::address::IpRange; use omicron_common::address::Ipv6Subnet; +use omicron_common::address::NEXUS_REDUNDANCY; use omicron_common::address::SLED_PREFIX; use omicron_common::api::external::Error; +use omicron_common::api::external::LookupType; use std::collections::BTreeMap; use std::collections::BTreeSet; use std::str::FromStr; @@ -72,3 +89,147 @@ pub fn policy_from_db( Ok(Policy { sleds, service_ip_pool_ranges, target_nexus_zone_count }) } + +/// Loads state for import into `reconfigurator-cli` +/// +/// This is only to be used in omdb or tests. +pub async fn reconfigurator_state_load( + opctx: &OpContext, + datastore: &DataStore, +) -> Result { + opctx.check_complex_operations_allowed()?; + let sled_rows = datastore + .sled_list_all_batched(opctx) + .await + .context("listing sleds")?; + let zpool_rows = datastore + .zpool_list_all_external_batched(opctx) + .await + .context("listing zpools")?; + let ip_pool_range_rows = { + let (authz_service_ip_pool, _) = datastore + .ip_pools_service_lookup(opctx) + .await + .context("fetching IP services pool")?; + datastore + .ip_pool_list_ranges_batched(opctx, &authz_service_ip_pool) + .await + .context("listing services IP pool ranges")? + }; + + let policy = policy_from_db( + &sled_rows, + &zpool_rows, + &ip_pool_range_rows, + NEXUS_REDUNDANCY, + ) + .context("assembling policy")?; + + let collection_ids = datastore + .inventory_collections() + .await + .context("listing collections")?; + let collections = futures::stream::iter(collection_ids) + .filter_map(|id| async move { + let read = datastore + .inventory_collection_read(opctx, id) + .await + .with_context(|| format!("reading collection {}", id)); + // It's not necessarily a problem if we failed to read a collection. + // They can be removed since we fetched the list. + read.ok() + }) + .collect::>() + .await; + + let mut blueprint_ids = Vec::new(); + let mut paginator = Paginator::new(SQL_BATCH_SIZE); + while let Some(p) = paginator.next() { + let batch = datastore + .blueprints_list(opctx, &p.current_pagparams()) + .await + .context("listing blueprints")?; + paginator = + p.found_batch(&blueprint_ids, &|b: &BlueprintMetadata| b.id); + blueprint_ids.extend(batch.into_iter()); + } + + let blueprints = futures::stream::iter(blueprint_ids) + .filter_map(|bpm| async move { + let blueprint_id = bpm.id; + let read = datastore + .blueprint_read( + opctx, + &nexus_db_queries::authz::Blueprint::new( + nexus_db_queries::authz::FLEET, + blueprint_id, + LookupType::ById(blueprint_id), + ), + ) + .await + .with_context(|| format!("reading blueprint {}", blueprint_id)); + // It's not necessarily a problem if we failed to read a blueprint. + // They can be removed since we fetched the list. + read.ok() + }) + .collect::>() + .await; + + // It's also useful to include information about any DNS generations + // mentioned in any blueprints. + let blueprints_list = &blueprints; + let fetch_dns_group = |dns_group: DnsGroup| async move { + let latest_version = datastore + .dns_group_latest_version(&opctx, dns_group) + .await + .with_context(|| { + format!("reading latest {:?} version", dns_group) + })?; + let dns_generations_needed: BTreeSet<_> = blueprints_list + .iter() + .map(|blueprint| match dns_group { + DnsGroup::Internal => blueprint.internal_dns_version, + DnsGroup::External => blueprint.external_dns_version, + }) + .chain(std::iter::once(*latest_version.version)) + .collect(); + let mut rv = BTreeMap::new(); + for gen in dns_generations_needed { + let config = datastore + .dns_config_read_version(&opctx, dns_group, gen) + .await + .with_context(|| { + format!("reading {:?} DNS version {}", dns_group, gen) + })?; + rv.insert(gen, config); + } + + Ok::, anyhow::Error>(rv) + }; + + let internal_dns = fetch_dns_group(DnsGroup::Internal).await?; + let external_dns = fetch_dns_group(DnsGroup::External).await?; + let silo_names = datastore + .silo_list_all_batched(&opctx, Discoverability::All) + .await + .context("listing all Silos")? + .into_iter() + .map(|s| s.name().clone()) + .collect(); + let external_dns_zone_names = datastore + .dns_zones_list_all(&opctx, DnsGroup::External) + .await + .context("listing external DNS zone names")? + .into_iter() + .map(|dns_zone| dns_zone.zone_name) + .collect(); + Ok(UnstableReconfiguratorState { + policy, + collections, + blueprints, + internal_dns, + external_dns, + silo_names, + external_dns_zone_names, + }) +} diff --git a/nexus/src/app/deployment.rs b/nexus/src/app/deployment.rs index 48ed844f12..b38508d74c 100644 --- a/nexus/src/app/deployment.rs +++ b/nexus/src/app/deployment.rs @@ -271,4 +271,13 @@ impl super::Nexus { self.blueprint_add(&opctx, &blueprint).await?; Ok(blueprint) } + + pub async fn blueprint_import( + &self, + opctx: &OpContext, + blueprint: Blueprint, + ) -> Result<(), Error> { + let _ = self.blueprint_add(&opctx, &blueprint).await?; + Ok(()) + } } diff --git a/nexus/src/internal_api/http_entrypoints.rs b/nexus/src/internal_api/http_entrypoints.rs index 6d2484c19d..0ce0a204f5 100644 --- a/nexus/src/internal_api/http_entrypoints.rs +++ b/nexus/src/internal_api/http_entrypoints.rs @@ -104,6 +104,7 @@ pub(crate) fn internal_api() -> NexusApiDescription { api.register(blueprint_target_set_enabled)?; api.register(blueprint_generate_from_collection)?; api.register(blueprint_regenerate)?; + api.register(blueprint_import)?; api.register(sled_list_uninitialized)?; api.register(sled_add)?; @@ -1004,6 +1005,28 @@ async fn blueprint_regenerate( apictx.internal_latencies.instrument_dropshot_handler(&rqctx, handler).await } +/// Imports a client-provided blueprint +/// +/// This is intended for development and support, not end users or operators. +#[endpoint { + method = POST, + path = "/deployment/blueprints/import", +}] +async fn blueprint_import( + rqctx: RequestContext>, + blueprint: TypedBody, +) -> Result { + let apictx = rqctx.context(); + let handler = async { + let opctx = crate::context::op_context_for_internal_api(&rqctx).await; + let nexus = &apictx.nexus; + let blueprint = blueprint.into_inner(); + nexus.blueprint_import(&opctx, blueprint).await?; + Ok(HttpResponseUpdatedNoContent()) + }; + apictx.internal_latencies.instrument_dropshot_handler(&rqctx, handler).await +} + /// List uninitialized sleds #[endpoint { method = GET, diff --git a/openapi/nexus-internal.json b/openapi/nexus-internal.json index fee389dfdc..28d82976c7 100644 --- a/openapi/nexus-internal.json +++ b/openapi/nexus-internal.json @@ -489,6 +489,34 @@ } } }, + "/deployment/blueprints/import": { + "post": { + "summary": "Imports a client-provided blueprint", + "description": "This is intended for development and support, not end users or operators.", + "operationId": "blueprint_import", + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/Blueprint" + } + } + }, + "required": true + }, + "responses": { + "204": { + "description": "resource updated" + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, "/deployment/blueprints/regenerate": { "post": { "summary": "Generates a new blueprint for the current system, re-evaluating anything", From 6e19854b088e676d11de7aca60f59c995374fefd Mon Sep 17 00:00:00 2001 From: David Crespo Date: Fri, 29 Mar 2024 12:14:05 -0500 Subject: [PATCH 015/334] Bump web console (fix for quick draw mcgraws on ip pools list) (#5350) https://github.com/oxidecomputer/console/compare/c0dd895e...156c082c * [156c082c](https://github.com/oxidecomputer/console/commit/156c082c) oxidecomputer/console#2110 * [cfdb3aae](https://github.com/oxidecomputer/console/commit/cfdb3aae) oxidecomputer/console#2106 * [63d8156f](https://github.com/oxidecomputer/console/commit/63d8156f) Revert "Change all uses of RHF `` to `useController` (oxidecomputer/console#2102)" * [44c646f9](https://github.com/oxidecomputer/console/commit/44c646f9) update API client gen script to use tsx directly * [f5245ab4](https://github.com/oxidecomputer/console/commit/f5245ab4) oxidecomputer/console#2104 * [e2a7bcd7](https://github.com/oxidecomputer/console/commit/e2a7bcd7) oxidecomputer/console#2102 * [56a00488](https://github.com/oxidecomputer/console/commit/56a00488) oxidecomputer/console#2101 --- tools/console_version | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/console_version b/tools/console_version index 33d87ded32..d09192f353 100644 --- a/tools/console_version +++ b/tools/console_version @@ -1,2 +1,2 @@ -COMMIT="c0dd895eb5f1bfe7f4824a09998cd0b34594e253" -SHA2="c66ec4a376b011e000cf8396d7dc43b2f044cb173ff91585357de267ccee9398" +COMMIT="156c082cdb21231ab95ef7475de199ecb7a96dc3" +SHA2="bf768008b6958e19b8d8cef25710b4ff64eef1e59bb3bedb27bb6bf33459a78b" From 04be445d1f576479e2a966b758f02067a7b6ee10 Mon Sep 17 00:00:00 2001 From: Alan Hanson Date: Fri, 29 Mar 2024 13:52:40 -0700 Subject: [PATCH 016/334] Update Propolis and Crucible (#5352) Propolis changes: Add `IntrPin::import_state` and migrate LPC UART pin states (#669) Attempt to set WCE for raw file backends Fix clippy/lint nits for rust 1.77.0 Crucible changes: Correctly (and robustly) count bytes (#1237) test-replay.sh fix name of DTrace script (#1235) BlockReq -> BlockOp (#1234) Simplify `BlockReq` (#1218) DTrace, cmon, cleanup, retry downstairs connections at 10 seconds. (#1231) Remove `MAX_ACTIVE_COUNT` flow control system (#1217) Crucible changes that were in Omicron but not in Propolis before this commit. Return *410 Gone* if volume is inactive (#1232) Update Rust crate opentelemetry to 0.22.0 (#1224) Update Rust crate base64 to 0.22.0 (#1222) Update Rust crate async-recursion to 1.1.0 (#1221) Minor cleanups to extent implementations (#1230) Update Rust crate http to 0.2.12 (#1220) Update Rust crate reedline to 0.30.0 (#1227) Update Rust crate rayon to 1.9.0 (#1226) Update Rust crate nix to 0.28 (#1223) Update Rust crate async-trait to 0.1.78 (#1219) Various buffer optimizations (#1211) Add low-level test for message encoding (#1214) Don't let df failures ruin the buildomat tests (#1213) Activate the NBD server's psuedo file (#1209) --------- Co-authored-by: Alan Hanson --- Cargo.lock | 26 +++++++++++++------------- Cargo.toml | 12 ++++++------ package-manifest.toml | 12 ++++++------ 3 files changed, 25 insertions(+), 25 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5fc65c2a10..160c8aacf1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -491,9 +491,9 @@ dependencies = [ [[package]] name = "bhyve_api" version = "0.0.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=fdf0585c6a227a7cfbee4a61a36938c3d77e4712#fdf0585c6a227a7cfbee4a61a36938c3d77e4712" +source = "git+https://github.com/oxidecomputer/propolis?rev=9b2deee64874b315427962b1c7fccceef99436b2#9b2deee64874b315427962b1c7fccceef99436b2" dependencies = [ - "bhyve_api_sys 0.0.0 (git+https://github.com/oxidecomputer/propolis?rev=fdf0585c6a227a7cfbee4a61a36938c3d77e4712)", + "bhyve_api_sys 0.0.0 (git+https://github.com/oxidecomputer/propolis?rev=9b2deee64874b315427962b1c7fccceef99436b2)", "libc", "strum 0.26.1", ] @@ -510,7 +510,7 @@ dependencies = [ [[package]] name = "bhyve_api_sys" version = "0.0.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=fdf0585c6a227a7cfbee4a61a36938c3d77e4712#fdf0585c6a227a7cfbee4a61a36938c3d77e4712" +source = "git+https://github.com/oxidecomputer/propolis?rev=9b2deee64874b315427962b1c7fccceef99436b2#9b2deee64874b315427962b1c7fccceef99436b2" dependencies = [ "libc", "strum 0.26.1", @@ -1418,7 +1418,7 @@ dependencies = [ [[package]] name = "crucible-agent-client" version = "0.0.1" -source = "git+https://github.com/oxidecomputer/crucible?rev=16f16478f4af1502b25ddcd79d307b3f116f13f6#16f16478f4af1502b25ddcd79d307b3f116f13f6" +source = "git+https://github.com/oxidecomputer/crucible?rev=09bcfa6b9201f75891a5413928bb088cc150d319#09bcfa6b9201f75891a5413928bb088cc150d319" dependencies = [ "anyhow", "chrono", @@ -1434,7 +1434,7 @@ dependencies = [ [[package]] name = "crucible-pantry-client" version = "0.0.1" -source = "git+https://github.com/oxidecomputer/crucible?rev=16f16478f4af1502b25ddcd79d307b3f116f13f6#16f16478f4af1502b25ddcd79d307b3f116f13f6" +source = "git+https://github.com/oxidecomputer/crucible?rev=09bcfa6b9201f75891a5413928bb088cc150d319#09bcfa6b9201f75891a5413928bb088cc150d319" dependencies = [ "anyhow", "chrono", @@ -1451,7 +1451,7 @@ dependencies = [ [[package]] name = "crucible-smf" version = "0.0.0" -source = "git+https://github.com/oxidecomputer/crucible?rev=16f16478f4af1502b25ddcd79d307b3f116f13f6#16f16478f4af1502b25ddcd79d307b3f116f13f6" +source = "git+https://github.com/oxidecomputer/crucible?rev=09bcfa6b9201f75891a5413928bb088cc150d319#09bcfa6b9201f75891a5413928bb088cc150d319" dependencies = [ "crucible-workspace-hack", "libc", @@ -3539,7 +3539,7 @@ version = "0.1.0" dependencies = [ "anyhow", "async-trait", - "bhyve_api 0.0.0 (git+https://github.com/oxidecomputer/propolis?rev=fdf0585c6a227a7cfbee4a61a36938c3d77e4712)", + "bhyve_api 0.0.0 (git+https://github.com/oxidecomputer/propolis?rev=9b2deee64874b315427962b1c7fccceef99436b2)", "byteorder", "camino", "camino-tempfile", @@ -5459,7 +5459,7 @@ dependencies = [ "pq-sys", "pretty_assertions", "progenitor-client", - "propolis-client 0.1.0 (git+https://github.com/oxidecomputer/propolis?rev=fdf0585c6a227a7cfbee4a61a36938c3d77e4712)", + "propolis-client 0.1.0 (git+https://github.com/oxidecomputer/propolis?rev=9b2deee64874b315427962b1c7fccceef99436b2)", "rand 0.8.5", "rcgen", "ref-cast", @@ -5671,7 +5671,7 @@ dependencies = [ "oximeter-instruments", "oximeter-producer", "pretty_assertions", - "propolis-client 0.1.0 (git+https://github.com/oxidecomputer/propolis?rev=fdf0585c6a227a7cfbee4a61a36938c3d77e4712)", + "propolis-client 0.1.0 (git+https://github.com/oxidecomputer/propolis?rev=9b2deee64874b315427962b1c7fccceef99436b2)", "propolis-mock-server", "rand 0.8.5", "rcgen", @@ -7080,7 +7080,7 @@ dependencies = [ [[package]] name = "propolis-client" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=fdf0585c6a227a7cfbee4a61a36938c3d77e4712#fdf0585c6a227a7cfbee4a61a36938c3d77e4712" +source = "git+https://github.com/oxidecomputer/propolis?rev=9b2deee64874b315427962b1c7fccceef99436b2#9b2deee64874b315427962b1c7fccceef99436b2" dependencies = [ "async-trait", "base64 0.21.7", @@ -7101,7 +7101,7 @@ dependencies = [ [[package]] name = "propolis-mock-server" version = "0.0.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=fdf0585c6a227a7cfbee4a61a36938c3d77e4712#fdf0585c6a227a7cfbee4a61a36938c3d77e4712" +source = "git+https://github.com/oxidecomputer/propolis?rev=9b2deee64874b315427962b1c7fccceef99436b2#9b2deee64874b315427962b1c7fccceef99436b2" dependencies = [ "anyhow", "atty", @@ -7111,7 +7111,7 @@ dependencies = [ "futures", "hyper 0.14.28", "progenitor", - "propolis_types 0.0.0 (git+https://github.com/oxidecomputer/propolis?rev=fdf0585c6a227a7cfbee4a61a36938c3d77e4712)", + "propolis_types 0.0.0 (git+https://github.com/oxidecomputer/propolis?rev=9b2deee64874b315427962b1c7fccceef99436b2)", "rand 0.8.5", "reqwest", "schemars", @@ -7152,7 +7152,7 @@ dependencies = [ [[package]] name = "propolis_types" version = "0.0.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=fdf0585c6a227a7cfbee4a61a36938c3d77e4712#fdf0585c6a227a7cfbee4a61a36938c3d77e4712" +source = "git+https://github.com/oxidecomputer/propolis?rev=9b2deee64874b315427962b1c7fccceef99436b2#9b2deee64874b315427962b1c7fccceef99436b2" dependencies = [ "schemars", "serde", diff --git a/Cargo.toml b/Cargo.toml index 2cfe265ca6..3237cc79bd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -197,9 +197,9 @@ cookie = "0.18" criterion = { version = "0.5.1", features = [ "async_tokio" ] } crossbeam = "0.8" crossterm = { version = "0.27.0", features = ["event-stream"] } -crucible-agent-client = { git = "https://github.com/oxidecomputer/crucible", rev = "16f16478f4af1502b25ddcd79d307b3f116f13f6" } -crucible-pantry-client = { git = "https://github.com/oxidecomputer/crucible", rev = "16f16478f4af1502b25ddcd79d307b3f116f13f6" } -crucible-smf = { git = "https://github.com/oxidecomputer/crucible", rev = "16f16478f4af1502b25ddcd79d307b3f116f13f6" } +crucible-agent-client = { git = "https://github.com/oxidecomputer/crucible", rev = "09bcfa6b9201f75891a5413928bb088cc150d319" } +crucible-pantry-client = { git = "https://github.com/oxidecomputer/crucible", rev = "09bcfa6b9201f75891a5413928bb088cc150d319" } +crucible-smf = { git = "https://github.com/oxidecomputer/crucible", rev = "09bcfa6b9201f75891a5413928bb088cc150d319" } csv = "1.3.0" curve25519-dalek = "4" datatest-stable = "0.2.3" @@ -340,9 +340,9 @@ prettyplease = { version = "0.2.16", features = ["verbatim"] } proc-macro2 = "1.0" progenitor = { git = "https://github.com/oxidecomputer/progenitor", branch = "main" } progenitor-client = { git = "https://github.com/oxidecomputer/progenitor", branch = "main" } -bhyve_api = { git = "https://github.com/oxidecomputer/propolis", rev = "fdf0585c6a227a7cfbee4a61a36938c3d77e4712" } -propolis-client = { git = "https://github.com/oxidecomputer/propolis", rev = "fdf0585c6a227a7cfbee4a61a36938c3d77e4712" } -propolis-mock-server = { git = "https://github.com/oxidecomputer/propolis", rev = "fdf0585c6a227a7cfbee4a61a36938c3d77e4712" } +bhyve_api = { git = "https://github.com/oxidecomputer/propolis", rev = "9b2deee64874b315427962b1c7fccceef99436b2" } +propolis-client = { git = "https://github.com/oxidecomputer/propolis", rev = "9b2deee64874b315427962b1c7fccceef99436b2" } +propolis-mock-server = { git = "https://github.com/oxidecomputer/propolis", rev = "9b2deee64874b315427962b1c7fccceef99436b2" } proptest = "1.4.0" quote = "1.0" rand = "0.8.5" diff --git a/package-manifest.toml b/package-manifest.toml index 0987280906..d2f4924ffe 100644 --- a/package-manifest.toml +++ b/package-manifest.toml @@ -492,10 +492,10 @@ only_for_targets.image = "standard" # 3. Use source.type = "manual" instead of "prebuilt" source.type = "prebuilt" source.repo = "crucible" -source.commit = "16f16478f4af1502b25ddcd79d307b3f116f13f6" +source.commit = "09bcfa6b9201f75891a5413928bb088cc150d319" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/crucible/image//crucible.sha256.txt -source.sha256 = "ce186a1a1243ea618755ae341844795cff0ce2c1415c6bd770360b3330dc664b" +source.sha256 = "32a0cc78b436679ed9966564e5a7c0214d67f56c4a5fbac0a5b9507d99752b15" output.type = "zone" output.intermediate_only = true @@ -504,10 +504,10 @@ service_name = "crucible_pantry_prebuilt" only_for_targets.image = "standard" source.type = "prebuilt" source.repo = "crucible" -source.commit = "16f16478f4af1502b25ddcd79d307b3f116f13f6" +source.commit = "09bcfa6b9201f75891a5413928bb088cc150d319" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/crucible/image//crucible-pantry.sha256.txt -source.sha256 = "8920622255fa0317ce312d0127c94b8fef647a85be4c8abaf861be560fe43194" +source.sha256 = "99028aaac8c879e4855296ce0bde826ceb8f73504fadf0ded7674dcf45fb0446" output.type = "zone" output.intermediate_only = true @@ -519,10 +519,10 @@ service_name = "propolis-server" only_for_targets.image = "standard" source.type = "prebuilt" source.repo = "propolis" -source.commit = "fdf0585c6a227a7cfbee4a61a36938c3d77e4712" +source.commit = "9b2deee64874b315427962b1c7fccceef99436b2" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/propolis/image//propolis-server.sha256.txt -source.sha256 = "f07720e9041907f9285432251c82c0c7502bf1be9dd4df1ba6abe8f7462c2e9e" +source.sha256 = "b32be7167e0c10ebad874de011a752edcbf936cf55abdaddef7f40025beb9b6a" output.type = "zone" [package.mg-ddm-gz] From 5f153c2b509ea3625d1ed8a6ac511b7f25be92e1 Mon Sep 17 00:00:00 2001 From: "oxide-reflector-bot[bot]" <130185838+oxide-reflector-bot[bot]@users.noreply.github.com> Date: Fri, 29 Mar 2024 21:10:01 +0000 Subject: [PATCH 017/334] Update dendrite to eeb1944 (#5353) Updated dendrite to commit eeb1944. Co-authored-by: reflector[bot] <130185838+reflector[bot]@users.noreply.github.com> --- package-manifest.toml | 12 ++++++------ tools/dendrite_openapi_version | 2 +- tools/dendrite_stub_checksums | 6 +++--- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/package-manifest.toml b/package-manifest.toml index d2f4924ffe..2d9d272525 100644 --- a/package-manifest.toml +++ b/package-manifest.toml @@ -595,8 +595,8 @@ only_for_targets.image = "standard" # 2. Copy dendrite.tar.gz from dendrite/out to omicron/out source.type = "prebuilt" source.repo = "dendrite" -source.commit = "c2a9f29f70b1e05d891c713997577be53826e1bb" -source.sha256 = "1405185cc9645b3a6f1bf82d2ffd89f17505e4e625795cd0beb58f043fa7fd8a" +source.commit = "eeb194461a4b863dae25a933541b1a4fb8efe14d" +source.sha256 = "79c32441d7d5328a06e92e7d9c410805e9d8be9d78d59ce1ed6c3e0fba93198d" output.type = "zone" output.intermediate_only = true @@ -620,8 +620,8 @@ only_for_targets.image = "standard" # 2. Copy the output zone image from dendrite/out to omicron/out source.type = "prebuilt" source.repo = "dendrite" -source.commit = "c2a9f29f70b1e05d891c713997577be53826e1bb" -source.sha256 = "2124ffd76bb80bcb7063862a1516da3d805b1c062fe2339b95e4656355a55fd9" +source.commit = "eeb194461a4b863dae25a933541b1a4fb8efe14d" +source.sha256 = "4e77c3aea01be77be440bf30a7f6d48fbeb97b3ecbc72c431eeeca217356d487" output.type = "zone" output.intermediate_only = true @@ -638,8 +638,8 @@ only_for_targets.image = "standard" # 2. Copy dendrite.tar.gz from dendrite/out to omicron/out/dendrite-softnpu.tar.gz source.type = "prebuilt" source.repo = "dendrite" -source.commit = "c2a9f29f70b1e05d891c713997577be53826e1bb" -source.sha256 = "3e89ff18a1209b82caafce91db42dec9f9f8d0fcaacbb1a8cfe7d1c0b0966700" +source.commit = "eeb194461a4b863dae25a933541b1a4fb8efe14d" +source.sha256 = "89c9212991656d8aee799c30a6bb63105a6a45e45b396f6dd56d43cf4c294e11" output.type = "zone" output.intermediate_only = true diff --git a/tools/dendrite_openapi_version b/tools/dendrite_openapi_version index 14221a70ab..2d459e6c3a 100644 --- a/tools/dendrite_openapi_version +++ b/tools/dendrite_openapi_version @@ -1,2 +1,2 @@ -COMMIT="c2a9f29f70b1e05d891c713997577be53826e1bb" +COMMIT="eeb194461a4b863dae25a933541b1a4fb8efe14d" SHA2="50eff6d9f986b7b1af5970d11d8d01b812de37269731c6c691a244b3fdae82ae" diff --git a/tools/dendrite_stub_checksums b/tools/dendrite_stub_checksums index 74147127b1..8bae48c3aa 100644 --- a/tools/dendrite_stub_checksums +++ b/tools/dendrite_stub_checksums @@ -1,3 +1,3 @@ -CIDL_SHA256_ILLUMOS="1405185cc9645b3a6f1bf82d2ffd89f17505e4e625795cd0beb58f043fa7fd8a" -CIDL_SHA256_LINUX_DPD="db3e419cf4381607482c55230c64c6afa80388a26db8efa48e437ebc8b82526a" -CIDL_SHA256_LINUX_SWADM="1de82f6b7419aa0e6e0632def69d5b370a9cb3ac2261527fce96840eb5af5b8d" +CIDL_SHA256_ILLUMOS="79c32441d7d5328a06e92e7d9c410805e9d8be9d78d59ce1ed6c3e0fba93198d" +CIDL_SHA256_LINUX_DPD="c56a5754996bdce4cf4142829a80f050563c5cab8c30a05b9e56b8d85723d0f5" +CIDL_SHA256_LINUX_SWADM="54042fb53e304bfade94ea7ca1b41c62c86bf48c32ca355b2c09dd6067ccb53b" From 17510a64780b86733b39300cfea9946f9623f0dd Mon Sep 17 00:00:00 2001 From: bnaecker Date: Fri, 29 Mar 2024 16:41:43 -0700 Subject: [PATCH 018/334] Add basic Oximeter query language (#5273) - Add basic grammar for an Oximeter query language. Includes support for numeric, string, boolean, UUID, timestamp, IP address, and duration literals. Queries are constructed in a pipeline of "table operations", each of which operates on a set of timeseries and produces another set. - Implement temporal alignment, currently supporting one method that generates output samples from the mean of the inputs over the alignment period. - Add basic subquery support, for fetching multiple timeseries and joining them - Implement filtering on fields and timestamps, both in the DB as much as possible, and the query pipeline; and implement filtering on data values in code. - Implement group-by support, where we can currently reduce values within a group by summing or computing the mean. - Add public Nexus API endpoints for listing timeseries schema, and running an OxQL query. Both are currently restricted to fleet readers, until a more thorough authz process is fleshed out. - This also reorganizes the internals of the `oximeter_db::client` module, which were starting to get too unwieldy and littered with conditional compilation directives. --- Cargo.lock | 32 + Cargo.toml | 6 +- nexus/src/app/metrics.rs | 85 +- nexus/src/external_api/http_entrypoints.rs | 52 + nexus/tests/integration_tests/endpoints.rs | 31 + nexus/tests/integration_tests/metrics.rs | 25 + nexus/tests/output/nexus_tags.txt | 2 + nexus/types/src/external_api/params.rs | 7 + openapi/nexus.json | 797 ++++++- oximeter/db/Cargo.toml | 64 +- oximeter/db/src/bin/{oxdb.rs => oxdb/main.rs} | 311 +-- oximeter/db/src/bin/oxdb/oxql.rs | 333 +++ oximeter/db/src/bin/oxdb/sql.rs | 298 +++ oximeter/db/src/client/dbwrite.rs | 266 +++ oximeter/db/src/{client.rs => client/mod.rs} | 596 +---- oximeter/db/src/client/oxql.rs | 1281 +++++++++++ oximeter/db/src/client/query_summary.rs | 123 + oximeter/db/src/client/sql.rs | 104 + oximeter/db/src/lib.rs | 25 +- oximeter/db/src/model.rs | 19 +- oximeter/db/src/oxql/ast/cmp.rs | 76 + oximeter/db/src/oxql/ast/grammar.rs | 1334 +++++++++++ oximeter/db/src/oxql/ast/ident.rs | 25 + oximeter/db/src/oxql/ast/literal.rs | 384 ++++ oximeter/db/src/oxql/ast/logical_op.rs | 41 + oximeter/db/src/oxql/ast/mod.rs | 152 ++ oximeter/db/src/oxql/ast/table_ops/align.rs | 753 ++++++ oximeter/db/src/oxql/ast/table_ops/filter.rs | 1283 +++++++++++ oximeter/db/src/oxql/ast/table_ops/get.rs | 15 + .../db/src/oxql/ast/table_ops/group_by.rs | 746 ++++++ oximeter/db/src/oxql/ast/table_ops/join.rs | 385 ++++ oximeter/db/src/oxql/ast/table_ops/mod.rs | 76 + oximeter/db/src/oxql/mod.rs | 39 + oximeter/db/src/oxql/point.rs | 2040 +++++++++++++++++ oximeter/db/src/oxql/query/mod.rs | 837 +++++++ oximeter/db/src/oxql/table.rs | 293 +++ oximeter/db/src/query.rs | 45 +- oximeter/db/src/sql/mod.rs | 26 + oximeter/oximeter/src/types.rs | 28 +- workspace-hack/Cargo.toml | 2 + 40 files changed, 12177 insertions(+), 860 deletions(-) rename oximeter/db/src/bin/{oxdb.rs => oxdb/main.rs} (50%) create mode 100644 oximeter/db/src/bin/oxdb/oxql.rs create mode 100644 oximeter/db/src/bin/oxdb/sql.rs create mode 100644 oximeter/db/src/client/dbwrite.rs rename oximeter/db/src/{client.rs => client/mod.rs} (88%) create mode 100644 oximeter/db/src/client/oxql.rs create mode 100644 oximeter/db/src/client/query_summary.rs create mode 100644 oximeter/db/src/client/sql.rs create mode 100644 oximeter/db/src/oxql/ast/cmp.rs create mode 100644 oximeter/db/src/oxql/ast/grammar.rs create mode 100644 oximeter/db/src/oxql/ast/ident.rs create mode 100644 oximeter/db/src/oxql/ast/literal.rs create mode 100644 oximeter/db/src/oxql/ast/logical_op.rs create mode 100644 oximeter/db/src/oxql/ast/mod.rs create mode 100644 oximeter/db/src/oxql/ast/table_ops/align.rs create mode 100644 oximeter/db/src/oxql/ast/table_ops/filter.rs create mode 100644 oximeter/db/src/oxql/ast/table_ops/get.rs create mode 100644 oximeter/db/src/oxql/ast/table_ops/group_by.rs create mode 100644 oximeter/db/src/oxql/ast/table_ops/join.rs create mode 100644 oximeter/db/src/oxql/ast/table_ops/mod.rs create mode 100644 oximeter/db/src/oxql/mod.rs create mode 100644 oximeter/db/src/oxql/point.rs create mode 100644 oximeter/db/src/oxql/query/mod.rs create mode 100644 oximeter/db/src/oxql/table.rs diff --git a/Cargo.lock b/Cargo.lock index 160c8aacf1..4548d0a3d7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5822,6 +5822,7 @@ dependencies = [ "num-traits", "once_cell", "openapiv3", + "peg-runtime", "pem-rfc7468", "petgraph", "postgres-types", @@ -6189,22 +6190,26 @@ name = "oximeter-db" version = "0.1.0" dependencies = [ "anyhow", + "async-recursion", "async-trait", "bcs", "bytes", "camino", "chrono", "clap 4.5.1", + "crossterm", "dropshot", "expectorate", "futures", "highway", "indexmap 2.2.5", "itertools 0.12.1", + "num", "omicron-common", "omicron-test-utils", "omicron-workspace-hack", "oximeter", + "peg", "reedline", "regex", "reqwest", @@ -6511,6 +6516,33 @@ version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "19b17cddbe7ec3f8bc800887bab5e717348c95ea2ca0b1bf0837fb964dc67099" +[[package]] +name = "peg" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "400bcab7d219c38abf8bd7cc2054eb9bbbd4312d66f6a5557d572a203f646f61" +dependencies = [ + "peg-macros", + "peg-runtime", +] + +[[package]] +name = "peg-macros" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46e61cce859b76d19090f62da50a9fe92bab7c2a5f09e183763559a2ac392c90" +dependencies = [ + "peg-runtime", + "proc-macro2", + "quote", +] + +[[package]] +name = "peg-runtime" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "36bae92c60fa2398ce4678b98b2c4b5a7c61099961ca1fa305aec04a9ad28922" + [[package]] name = "pem" version = "3.0.2" diff --git a/Cargo.toml b/Cargo.toml index 3237cc79bd..0d66583a82 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -257,7 +257,6 @@ ipcc = { path = "ipcc" } ipnet = "2.9" itertools = "0.12.1" internet-checksum = "0.2" -ipcc-key-value = { path = "ipcc-key-value" } ipnetwork = { version = "0.20", features = ["schemars"] } ispf = { git = "https://github.com/oxidecomputer/ispf" } key-manager = { path = "key-manager" } @@ -313,7 +312,6 @@ openapiv3 = "2.0.0" # must match samael's crate! openssl = "0.10" openssl-sys = "0.9" -openssl-probe = "0.1.5" opte-ioctl = { git = "https://github.com/oxidecomputer/opte", rev = "7ee353a470ea59529ee1b34729681da887aa88ce" } oso = "0.27" owo-colors = "4.0.0" @@ -330,6 +328,7 @@ partial-io = { version = "0.5.4", features = ["proptest1", "tokio1"] } parse-size = "1.0.0" paste = "1.0.14" percent-encoding = "2.3.1" +peg = "0.8.2" pem = "3.0" petgraph = "0.6.4" postgres-protocol = "0.6.6" @@ -368,7 +367,6 @@ schemars = "0.8.16" secrecy = "0.8.0" semver = { version = "1.0.22", features = ["std", "serde"] } serde = { version = "1.0", default-features = false, features = [ "derive", "rc" ] } -serde_derive = "1.0" serde_human_bytes = { git = "http://github.com/oxidecomputer/serde_human_bytes", branch = "main" } serde_json = "1.0.114" serde_path_to_error = "0.1.16" @@ -394,12 +392,12 @@ slog-envlogger = "2.2" slog-error-chain = { git = "https://github.com/oxidecomputer/slog-error-chain", branch = "main", features = ["derive"] } slog-term = "2.9" smf = "0.2" -snafu = "0.7" socket2 = { version = "0.5", features = ["all"] } sp-sim = { path = "sp-sim" } sprockets-common = { git = "http://github.com/oxidecomputer/sprockets", rev = "77df31efa5619d0767ffc837ef7468101608aee9" } sprockets-host = { git = "http://github.com/oxidecomputer/sprockets", rev = "77df31efa5619d0767ffc837ef7468101608aee9" } sprockets-rot = { git = "http://github.com/oxidecomputer/sprockets", rev = "77df31efa5619d0767ffc837ef7468101608aee9" } +sqlformat = "0.2.3" sqlparser = { version = "0.43.1", features = [ "visitor" ] } static_assertions = "1.1.0" # Please do not change the Steno version to a Git dependency. It makes it diff --git a/nexus/src/app/metrics.rs b/nexus/src/app/metrics.rs index 94fb232892..3728a3bdc1 100644 --- a/nexus/src/app/metrics.rs +++ b/nexus/src/app/metrics.rs @@ -13,7 +13,9 @@ use nexus_db_queries::{ db::{fixed_data::FLEET_ID, lookup}, }; use omicron_common::api::external::{Error, InternalContext}; -use oximeter_db::Measurement; +use oximeter_db::{ + oxql, Measurement, TimeseriesSchema, TimeseriesSchemaPaginationParams, +}; use std::num::NonZeroU32; impl super::Nexus { @@ -96,4 +98,85 @@ impl super::Nexus { ) .await } + + /// List available timeseries schema. + pub(crate) async fn timeseries_schema_list( + &self, + opctx: &OpContext, + pagination: &TimeseriesSchemaPaginationParams, + limit: NonZeroU32, + ) -> Result, Error> { + // Must be a fleet user to list timeseries schema. + // + // TODO-security: We need to figure out how to implement proper security + // checks here, letting less-privileged users fetch data for the + // resources they have access to. + opctx.authorize(authz::Action::Read, &authz::FLEET).await?; + self.timeseries_client + .get() + .await + .map_err(|e| { + Error::internal_error(&format!( + "Cannot access timeseries DB: {}", + e + )) + })? + .timeseries_schema_list(&pagination.page, limit) + .await + .map_err(|e| match e { + oximeter_db::Error::DatabaseUnavailable(_) => { + Error::ServiceUnavailable { + internal_message: e.to_string(), + } + } + _ => Error::InternalError { internal_message: e.to_string() }, + }) + } + + /// Run an OxQL query against the timeseries database. + pub(crate) async fn timeseries_query( + &self, + opctx: &OpContext, + query: impl AsRef, + ) -> Result, Error> { + // Must be a fleet user to list timeseries schema. + // + // TODO-security: We need to figure out how to implement proper security + // checks here, letting less-privileged users fetch data for the + // resources they have access to. + opctx.authorize(authz::Action::Read, &authz::FLEET).await?; + self.timeseries_client + .get() + .await + .map_err(|e| { + Error::internal_error(&format!( + "Cannot access timeseries DB: {}", + e + )) + })? + .oxql_query(query) + .await + .map(|result| { + // TODO-observability: The query method returns information + // about the duration of the OxQL query and the database + // resource usage for each contained SQL query. We should + // publish this as a timeseries itself, so that we can track + // improvements to query processing. + // + // For now, simply return the tables alone. + result.tables + }) + .map_err(|e| match e { + oximeter_db::Error::DatabaseUnavailable(_) => { + Error::ServiceUnavailable { + internal_message: e.to_string(), + } + } + oximeter_db::Error::Oxql(_) + | oximeter_db::Error::TimeseriesNotFound(_) => { + Error::invalid_request(e.to_string()) + } + _ => Error::InternalError { internal_message: e.to_string() }, + }) + } } diff --git a/nexus/src/external_api/http_entrypoints.rs b/nexus/src/external_api/http_entrypoints.rs index 6fa530b49d..a570cd60c4 100644 --- a/nexus/src/external_api/http_entrypoints.rs +++ b/nexus/src/external_api/http_entrypoints.rs @@ -321,6 +321,8 @@ pub(crate) fn external_api() -> NexusApiDescription { api.register(system_metric)?; api.register(silo_metric)?; + api.register(timeseries_schema_list)?; + api.register(timeseries_query)?; api.register(system_update_put_repository)?; api.register(system_update_get_repository)?; @@ -5626,6 +5628,56 @@ async fn silo_metric( apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await } +/// List available timeseries schema. +#[endpoint { + method = GET, + path = "/v1/timeseries/schema", + tags = ["metrics"], +}] +async fn timeseries_schema_list( + rqctx: RequestContext>, + pag_params: Query, +) -> Result>, HttpError> +{ + let apictx = rqctx.context(); + let handler = async { + let nexus = &apictx.nexus; + let opctx = crate::context::op_context_for_external_api(&rqctx).await?; + let pagination = pag_params.into_inner(); + let limit = rqctx.page_limit(&pagination)?; + nexus + .timeseries_schema_list(&opctx, &pagination, limit) + .await + .map(HttpResponseOk) + .map_err(HttpError::from) + }; + apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await +} + +/// Run a timeseries query, written OxQL. +#[endpoint { + method = POST, + path = "/v1/timeseries/query", + tags = ["metrics"], +}] +async fn timeseries_query( + rqctx: RequestContext>, + body: TypedBody, +) -> Result>, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let nexus = &apictx.nexus; + let opctx = crate::context::op_context_for_external_api(&rqctx).await?; + let query = body.into_inner().query; + nexus + .timeseries_query(&opctx, &query) + .await + .map(HttpResponseOk) + .map_err(HttpError::from) + }; + apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await +} + // Updates /// Upload TUF repository diff --git a/nexus/tests/integration_tests/endpoints.rs b/nexus/tests/integration_tests/endpoints.rs index 1003722723..02ab1385e3 100644 --- a/nexus/tests/integration_tests/endpoints.rs +++ b/nexus/tests/integration_tests/endpoints.rs @@ -848,6 +848,17 @@ pub static DEMO_SILO_METRICS_URL: Lazy = Lazy::new(|| { ) }); +pub static TIMESERIES_LIST_URL: Lazy = + Lazy::new(|| String::from("/v1/timeseries/schema")); + +pub static TIMESERIES_QUERY_URL: Lazy = + Lazy::new(|| String::from("/v1/timeseries/query")); + +pub static DEMO_TIMESERIES_QUERY: Lazy = + Lazy::new(|| params::TimeseriesQuery { + query: String::from("get http_service:request_latency_histogram"), + }); + // Users pub static DEMO_USER_CREATE: Lazy = Lazy::new(|| params::UserCreate { @@ -2023,6 +2034,26 @@ pub static VERIFY_ENDPOINTS: Lazy> = Lazy::new(|| { ], }, + VerifyEndpoint { + url: &TIMESERIES_LIST_URL, + visibility: Visibility::Public, + unprivileged_access: UnprivilegedAccess::None, + allowed_methods: vec![ + AllowedMethod::Get, + ], + }, + + VerifyEndpoint { + url: &TIMESERIES_QUERY_URL, + visibility: Visibility::Public, + unprivileged_access: UnprivilegedAccess::None, + allowed_methods: vec![ + AllowedMethod::Post( + serde_json::to_value(&*DEMO_TIMESERIES_QUERY).unwrap() + ), + ], + }, + /* Silo identity providers */ VerifyEndpoint { diff --git a/nexus/tests/integration_tests/metrics.rs b/nexus/tests/integration_tests/metrics.rs index 73f11ce49a..c96cf9b0fb 100644 --- a/nexus/tests/integration_tests/metrics.rs +++ b/nexus/tests/integration_tests/metrics.rs @@ -16,6 +16,7 @@ use nexus_test_utils::ControlPlaneTestContext; use nexus_test_utils_macros::nexus_test; use oximeter::types::Datum; use oximeter::types::Measurement; +use oximeter::TimeseriesSchema; use uuid::Uuid; pub async fn query_for_metrics( @@ -238,3 +239,27 @@ async fn test_metrics( // project 1 unaffected by project 2's resources assert_silo_metrics(&cptestctx, Some(project1_id), GIB, 4, GIB).await; } + +/// Test that we can correctly list some timeseries schema. +#[nexus_test] +async fn test_timeseries_schema_list( + cptestctx: &ControlPlaneTestContext, +) { + // We should be able to fetch the list of timeseries, and it should include + // Nexus's HTTP latency distribution. This is defined in Nexus itself, and + // should always exist after we've registered as a producer and start + // producing data. Force a collection to ensure that happens. + cptestctx.server.register_as_producer().await; + cptestctx.oximeter.force_collect().await; + let client = &cptestctx.external_client; + let url = "/v1/timeseries/schema"; + let schema = + objects_list_page_authz::(client, &url).await; + schema + .items + .iter() + .find(|sc| { + sc.timeseries_name == "http_service:request_latency_histogram" + }) + .expect("Failed to find HTTP request latency histogram schema"); +} diff --git a/nexus/tests/output/nexus_tags.txt b/nexus/tests/output/nexus_tags.txt index 91d2504a57..3e40e8293d 100644 --- a/nexus/tests/output/nexus_tags.txt +++ b/nexus/tests/output/nexus_tags.txt @@ -73,6 +73,8 @@ login_saml POST /login/{silo_name}/saml/{provi API operations found with tag "metrics" OPERATION ID METHOD URL PATH silo_metric GET /v1/metrics/{metric_name} +timeseries_query POST /v1/timeseries/query +timeseries_schema_list GET /v1/timeseries/schema API operations found with tag "policy" OPERATION ID METHOD URL PATH diff --git a/nexus/types/src/external_api/params.rs b/nexus/types/src/external_api/params.rs index 1ba373ff56..3829484a27 100644 --- a/nexus/types/src/external_api/params.rs +++ b/nexus/types/src/external_api/params.rs @@ -2055,3 +2055,10 @@ pub struct ProbeListSelector { /// A name or id to use when selecting a probe. pub name_or_id: Option, } + +/// A timeseries query string, written in the Oximeter query language. +#[derive(Deserialize, JsonSchema, Serialize)] +pub struct TimeseriesQuery { + /// A timeseries query string, written in the Oximeter query language. + pub query: String, +} diff --git a/openapi/nexus.json b/openapi/nexus.json index 3cc991126d..e7e4c1d31c 100644 --- a/openapi/nexus.json +++ b/openapi/nexus.json @@ -7929,6 +7929,99 @@ } } }, + "/v1/timeseries/query": { + "post": { + "tags": [ + "metrics" + ], + "summary": "Run a timeseries query, written OxQL.", + "operationId": "timeseries_query", + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/TimeseriesQuery" + } + } + }, + "required": true + }, + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "title": "Array_of_Table", + "type": "array", + "items": { + "$ref": "#/components/schemas/Table" + } + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, + "/v1/timeseries/schema": { + "get": { + "tags": [ + "metrics" + ], + "summary": "List available timeseries schema.", + "operationId": "timeseries_schema_list", + "parameters": [ + { + "in": "query", + "name": "limit", + "description": "Maximum number of items returned by a single call", + "schema": { + "nullable": true, + "type": "integer", + "format": "uint32", + "minimum": 1 + } + }, + { + "in": "query", + "name": "page_token", + "description": "Token returned by previous call to retrieve the subsequent page", + "schema": { + "nullable": true, + "type": "string" + } + } + ], + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/TimeseriesSchemaResultsPage" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + }, + "x-dropshot-pagination": { + "required": [] + } + } + }, "/v1/users": { "get": { "tags": [ @@ -11917,6 +12010,56 @@ } ] }, + "Distributiondouble": { + "description": "A distribution is a sequence of bins and counts in those bins.", + "type": "object", + "properties": { + "bins": { + "type": "array", + "items": { + "type": "number", + "format": "double" + } + }, + "counts": { + "type": "array", + "items": { + "type": "integer", + "format": "uint64", + "minimum": 0 + } + } + }, + "required": [ + "bins", + "counts" + ] + }, + "Distributionint64": { + "description": "A distribution is a sequence of bins and counts in those bins.", + "type": "object", + "properties": { + "bins": { + "type": "array", + "items": { + "type": "integer", + "format": "int64" + } + }, + "counts": { + "type": "array", + "items": { + "type": "integer", + "format": "uint64", + "minimum": 0 + } + } + }, + "required": [ + "bins", + "counts" + ] + }, "EphemeralIpCreate": { "description": "Parameters for creating an ephemeral IP address for an instance.", "type": "object", @@ -12080,33 +12223,314 @@ } }, "required": [ - "floating_ip", - "type" + "floating_ip", + "type" + ] + } + ] + }, + "ExternalIpResultsPage": { + "description": "A single page of results", + "type": "object", + "properties": { + "items": { + "description": "list of items on this page of results", + "type": "array", + "items": { + "$ref": "#/components/schemas/ExternalIp" + } + }, + "next_page": { + "nullable": true, + "description": "token used to fetch the next page of results (if any)", + "type": "string" + } + }, + "required": [ + "items" + ] + }, + "FieldSchema": { + "description": "The name and type information for a field of a timeseries schema.", + "type": "object", + "properties": { + "field_type": { + "$ref": "#/components/schemas/FieldType" + }, + "name": { + "type": "string" + }, + "source": { + "$ref": "#/components/schemas/FieldSource" + } + }, + "required": [ + "field_type", + "name", + "source" + ] + }, + "FieldSource": { + "description": "The source from which a field is derived, the target or metric.", + "type": "string", + "enum": [ + "target", + "metric" + ] + }, + "FieldType": { + "description": "The `FieldType` identifies the data type of a target or metric field.", + "type": "string", + "enum": [ + "string", + "i8", + "u8", + "i16", + "u16", + "i32", + "u32", + "i64", + "u64", + "ip_addr", + "uuid", + "bool" + ] + }, + "FieldValue": { + "description": "The `FieldValue` contains the value of a target or metric field.", + "oneOf": [ + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "string" + ] + }, + "value": { + "type": "string" + } + }, + "required": [ + "type", + "value" + ] + }, + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "i8" + ] + }, + "value": { + "type": "integer", + "format": "int8" + } + }, + "required": [ + "type", + "value" + ] + }, + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "u8" + ] + }, + "value": { + "type": "integer", + "format": "uint8", + "minimum": 0 + } + }, + "required": [ + "type", + "value" + ] + }, + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "i16" + ] + }, + "value": { + "type": "integer", + "format": "int16" + } + }, + "required": [ + "type", + "value" + ] + }, + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "u16" + ] + }, + "value": { + "type": "integer", + "format": "uint16", + "minimum": 0 + } + }, + "required": [ + "type", + "value" + ] + }, + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "i32" + ] + }, + "value": { + "type": "integer", + "format": "int32" + } + }, + "required": [ + "type", + "value" + ] + }, + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "u32" + ] + }, + "value": { + "type": "integer", + "format": "uint32", + "minimum": 0 + } + }, + "required": [ + "type", + "value" + ] + }, + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "i64" + ] + }, + "value": { + "type": "integer", + "format": "int64" + } + }, + "required": [ + "type", + "value" + ] + }, + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "u64" + ] + }, + "value": { + "type": "integer", + "format": "uint64", + "minimum": 0 + } + }, + "required": [ + "type", + "value" + ] + }, + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "ip_addr" + ] + }, + "value": { + "type": "string", + "format": "ip" + } + }, + "required": [ + "type", + "value" + ] + }, + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "uuid" + ] + }, + "value": { + "type": "string", + "format": "uuid" + } + }, + "required": [ + "type", + "value" + ] + }, + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "bool" + ] + }, + "value": { + "type": "boolean" + } + }, + "required": [ + "type", + "value" ] } ] }, - "ExternalIpResultsPage": { - "description": "A single page of results", - "type": "object", - "properties": { - "items": { - "description": "list of items on this page of results", - "type": "array", - "items": { - "$ref": "#/components/schemas/ExternalIp" - } - }, - "next_page": { - "nullable": true, - "description": "token used to fetch the next page of results (if any)", - "type": "string" - } - }, - "required": [ - "items" - ] - }, "FinalizeDisk": { "description": "Parameters for finalizing a disk", "type": "object", @@ -14279,6 +14703,32 @@ "items" ] }, + "MetricType": { + "description": "The type of the metric itself, indicating what its values represent.", + "oneOf": [ + { + "description": "The value represents an instantaneous measurement in time.", + "type": "string", + "enum": [ + "gauge" + ] + }, + { + "description": "The value represents a difference between two points in time.", + "type": "string", + "enum": [ + "delta" + ] + }, + { + "description": "The value represents an accumulation between two points in time.", + "type": "string", + "enum": [ + "cumulative" + ] + } + ] + }, "MissingDatum": { "type": "object", "properties": { @@ -14614,6 +15064,37 @@ "ok" ] }, + "Points": { + "description": "Timepoints and values for one timeseries.", + "type": "object", + "properties": { + "start_times": { + "nullable": true, + "type": "array", + "items": { + "type": "string", + "format": "date-time" + } + }, + "timestamps": { + "type": "array", + "items": { + "type": "string", + "format": "date-time" + } + }, + "values": { + "type": "array", + "items": { + "$ref": "#/components/schemas/Values" + } + } + }, + "required": [ + "timestamps", + "values" + ] + }, "Probe": { "description": "Identity-related metadata that's included in nearly all public API objects", "type": "object", @@ -16965,6 +17446,113 @@ "vlan_id" ] }, + "Table": { + "description": "A table represents one or more timeseries with the same schema.\n\nA table is the result of an OxQL query. It contains a name, usually the name of the timeseries schema from which the data is derived, and any number of timeseries, which contain the actual data.", + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "timeseries": { + "type": "object", + "additionalProperties": { + "$ref": "#/components/schemas/Timeseries" + } + } + }, + "required": [ + "name", + "timeseries" + ] + }, + "Timeseries": { + "description": "A timeseries contains a timestamped set of values from one source.\n\nThis includes the typed key-value pairs that uniquely identify it, and the set of timestamps and data values from it.", + "type": "object", + "properties": { + "fields": { + "type": "object", + "additionalProperties": { + "$ref": "#/components/schemas/FieldValue" + } + }, + "points": { + "$ref": "#/components/schemas/Points" + } + }, + "required": [ + "fields", + "points" + ] + }, + "TimeseriesName": { + "title": "The name of a timeseries", + "description": "Names are constructed by concatenating the target and metric names with ':'. Target and metric names must be lowercase alphanumeric characters with '_' separating words.", + "type": "string", + "pattern": "^(([a-z]+[a-z0-9]*)(_([a-z0-9]+))*):(([a-z]+[a-z0-9]*)(_([a-z0-9]+))*)$" + }, + "TimeseriesQuery": { + "description": "A timeseries query string, written in the Oximeter query language.", + "type": "object", + "properties": { + "query": { + "description": "A timeseries query string, written in the Oximeter query language.", + "type": "string" + } + }, + "required": [ + "query" + ] + }, + "TimeseriesSchema": { + "description": "The schema for a timeseries.\n\nThis includes the name of the timeseries, as well as the datum type of its metric and the schema for each field.", + "type": "object", + "properties": { + "created": { + "type": "string", + "format": "date-time" + }, + "datum_type": { + "$ref": "#/components/schemas/DatumType" + }, + "field_schema": { + "type": "array", + "items": { + "$ref": "#/components/schemas/FieldSchema" + }, + "uniqueItems": true + }, + "timeseries_name": { + "$ref": "#/components/schemas/TimeseriesName" + } + }, + "required": [ + "created", + "datum_type", + "field_schema", + "timeseries_name" + ] + }, + "TimeseriesSchemaResultsPage": { + "description": "A single page of results", + "type": "object", + "properties": { + "items": { + "description": "list of items on this page of results", + "type": "array", + "items": { + "$ref": "#/components/schemas/TimeseriesSchema" + } + }, + "next_page": { + "nullable": true, + "description": "token used to fetch the next page of results (if any)", + "type": "string" + } + }, + "required": [ + "items" + ] + }, "UninitializedSled": { "description": "A sled that has not been added to an initialized rack yet", "type": "object", @@ -17246,6 +17834,169 @@ "provisioned" ] }, + "ValueArray": { + "description": "List of data values for one timeseries.\n\nEach element is an option, where `None` represents a missing sample.", + "oneOf": [ + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "integer" + ] + }, + "values": { + "type": "array", + "items": { + "nullable": true, + "type": "integer", + "format": "int64" + } + } + }, + "required": [ + "type", + "values" + ] + }, + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "double" + ] + }, + "values": { + "type": "array", + "items": { + "nullable": true, + "type": "number", + "format": "double" + } + } + }, + "required": [ + "type", + "values" + ] + }, + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "boolean" + ] + }, + "values": { + "type": "array", + "items": { + "nullable": true, + "type": "boolean" + } + } + }, + "required": [ + "type", + "values" + ] + }, + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "string" + ] + }, + "values": { + "type": "array", + "items": { + "nullable": true, + "type": "string" + } + } + }, + "required": [ + "type", + "values" + ] + }, + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "integer_distribution" + ] + }, + "values": { + "type": "array", + "items": { + "nullable": true, + "allOf": [ + { + "$ref": "#/components/schemas/Distributionint64" + } + ] + } + } + }, + "required": [ + "type", + "values" + ] + }, + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "double_distribution" + ] + }, + "values": { + "type": "array", + "items": { + "nullable": true, + "allOf": [ + { + "$ref": "#/components/schemas/Distributiondouble" + } + ] + } + } + }, + "required": [ + "type", + "values" + ] + } + ] + }, + "Values": { + "description": "A single list of values, for one dimension of a timeseries.", + "type": "object", + "properties": { + "metric_type": { + "$ref": "#/components/schemas/MetricType" + }, + "values": { + "$ref": "#/components/schemas/ValueArray" + } + }, + "required": [ + "metric_type", + "values" + ] + }, "VirtualResourceCounts": { "description": "A collection of resource counts used to describe capacity and utilization", "type": "object", diff --git a/oximeter/db/Cargo.toml b/oximeter/db/Cargo.toml index c4ee44acb6..88a2ab8a89 100644 --- a/oximeter/db/Cargo.toml +++ b/oximeter/db/Cargo.toml @@ -7,6 +7,7 @@ license = "MPL-2.0" [dependencies] anyhow.workspace = true +async-recursion = "1.0.5" async-trait.workspace = true bcs.workspace = true camino.workspace = true @@ -15,21 +16,16 @@ clap.workspace = true dropshot.workspace = true futures.workspace = true highway.workspace = true -indexmap.workspace = true omicron-common.workspace = true omicron-workspace-hack.workspace = true oximeter.workspace = true -reedline.workspace = true regex.workspace = true -rustyline.workspace = true serde.workspace = true serde_json.workspace = true slog.workspace = true slog-async.workspace = true +slog-dtrace.workspace = true slog-term.workspace = true -sqlparser.workspace = true -sqlformat = "0.2.3" -tabled.workspace = true thiserror.workspace = true usdt.workspace = true uuid.workspace = true @@ -38,26 +34,82 @@ uuid.workspace = true workspace = true features = [ "serde" ] +[dependencies.crossterm] +workspace = true +optional = true + +[dependencies.indexmap] +workspace = true +optional = true + +[dependencies.num] +workspace = true +optional = true + +[dependencies.peg] +workspace = true +optional = true + +[dependencies.reedline] +workspace = true +optional = true + [dependencies.reqwest] workspace = true features = [ "json" ] +[dependencies.rustyline] +workspace = true +optional = true + [dependencies.schemars] workspace = true features = [ "uuid1", "bytes", "chrono" ] +[dependencies.sqlformat] +workspace = true +optional = true + +[dependencies.sqlparser] +workspace = true +optional = true + [dependencies.tokio] workspace = true features = [ "rt-multi-thread", "macros" ] +[dependencies.tabled] +workspace = true +optional = true + [dev-dependencies] expectorate.workspace = true +indexmap.workspace = true itertools.workspace = true omicron-test-utils.workspace = true slog-dtrace.workspace = true +sqlparser.workspace = true strum.workspace = true tempfile.workspace = true +[features] +default = [ "oxql", "sql" ] +sql = [ + "dep:indexmap", + "dep:reedline", + "dep:rustyline", + "dep:sqlformat", + "dep:sqlparser", + "dep:tabled" +] +oxql = [ + "dep:crossterm", + "dep:num", + "dep:peg", + "dep:reedline", + "dep:tabled", +] + [[bin]] name = "oxdb" doc = false diff --git a/oximeter/db/src/bin/oxdb.rs b/oximeter/db/src/bin/oxdb/main.rs similarity index 50% rename from oximeter/db/src/bin/oxdb.rs rename to oximeter/db/src/bin/oxdb/main.rs index 02a8054da0..ca11dd18a3 100644 --- a/oximeter/db/src/bin/oxdb.rs +++ b/oximeter/db/src/bin/oxdb/main.rs @@ -4,31 +4,27 @@ //! Tool for developing against the Oximeter timeseries database, populating data and querying. -// Copyright 2023 Oxide Computer Company +// Copyright 2024 Oxide Computer Company use anyhow::{bail, Context}; use chrono::{DateTime, Utc}; use clap::{Args, Parser}; -use dropshot::EmptyScanParams; -use dropshot::WhichPage; use oximeter::{ types::{Cumulative, Sample}, Metric, Target, }; -use oximeter_db::sql::function_allow_list; -use oximeter_db::QueryMetadata; -use oximeter_db::QueryResult; -use oximeter_db::Table; use oximeter_db::{query, Client, DbWrite}; -use reedline::DefaultPrompt; -use reedline::DefaultPromptSegment; -use reedline::Reedline; -use reedline::Signal; use slog::{debug, info, o, Drain, Level, Logger}; use std::net::IpAddr; use std::net::SocketAddr; use uuid::Uuid; +#[cfg(feature = "sql")] +mod sql; + +#[cfg(feature = "oxql")] +mod oxql; + // Samples are inserted in chunks of this size, to avoid large allocations when inserting huge // numbers of timeseries. const INSERT_CHUNK_SIZE: usize = 100_000; @@ -151,9 +147,17 @@ enum Subcommand { }, /// Enter a SQL shell for interactive querying. + #[cfg(feature = "sql")] Sql { #[clap(flatten)] - opts: ShellOptions, + opts: crate::sql::ShellOptions, + }, + + /// Enter the Oximeter Query Language shell for interactive querying. + #[cfg(feature = "oxql")] + Oxql { + #[clap(flatten)] + opts: crate::oxql::ShellOptions, }, } @@ -312,281 +316,6 @@ async fn query( Ok(()) } -fn print_basic_commands() { - println!("Basic commands:"); - println!(" \\?, \\h, help - Print this help"); - println!(" \\q, quit, exit, ^D - Exit the shell"); - println!(" \\l - List tables"); - println!(" \\d
- Describe a table"); - println!( - " \\f - List or describe ClickHouse SQL functions" - ); - println!(); - println!("Or try entering a SQL `SELECT` statement"); -} - -async fn list_virtual_tables(client: &Client) -> anyhow::Result<()> { - let mut page = WhichPage::First(EmptyScanParams {}); - let limit = 100.try_into().unwrap(); - loop { - let results = client.timeseries_schema_list(&page, limit).await?; - for schema in results.items.iter() { - println!("{}", schema.timeseries_name); - } - if results.next_page.is_some() { - if let Some(last) = results.items.last() { - page = WhichPage::Next(last.timeseries_name.clone()); - } else { - return Ok(()); - } - } else { - return Ok(()); - } - } -} - -async fn describe_virtual_table( - client: &Client, - table: &str, -) -> anyhow::Result<()> { - match table.parse() { - Err(_) => println!("Invalid timeseries name: {table}"), - Ok(name) => { - if let Some(schema) = client.schema_for_timeseries(&name).await? { - let mut cols = - Vec::with_capacity(schema.field_schema.len() + 2); - let mut types = cols.clone(); - for field in schema.field_schema.iter() { - cols.push(field.name.clone()); - types.push(field.field_type.to_string()); - } - cols.push("timestamp".into()); - types.push("DateTime64".into()); - - if schema.datum_type.is_histogram() { - cols.push("start_time".into()); - types.push("DateTime64".into()); - - cols.push("bins".into()); - types.push(format!( - "Array[{}]", - schema - .datum_type - .to_string() - .strip_prefix("Histogram") - .unwrap() - .to_lowercase(), - )); - - cols.push("counts".into()); - types.push("Array[u64]".into()); - } else if schema.datum_type.is_cumulative() { - cols.push("start_time".into()); - types.push("DateTime64".into()); - cols.push("datum".into()); - types.push(schema.datum_type.to_string()); - } else { - cols.push("datum".into()); - types.push(schema.datum_type.to_string()); - } - - let mut builder = tabled::builder::Builder::default(); - builder.push_record(cols); // first record is the header - builder.push_record(types); - println!( - "{}", - builder.build().with(tabled::settings::Style::psql()) - ); - } else { - println!("No such timeseries: {table}"); - } - } - } - Ok(()) -} - -#[derive(Clone, Debug, Args)] -struct ShellOptions { - /// Print query metadata. - #[clap(long = "metadata")] - print_metadata: bool, - /// Print the original SQL query. - #[clap(long = "original")] - print_original_query: bool, - /// Print the rewritten SQL query that is actually run on the DB. - #[clap(long = "rewritten")] - print_rewritten_query: bool, - /// Print the transformed query, but do not run it. - #[clap(long)] - transform: Option, -} - -impl Default for ShellOptions { - fn default() -> Self { - Self { - print_metadata: true, - print_original_query: false, - print_rewritten_query: false, - transform: None, - } - } -} - -fn list_supported_functions() { - println!("Subset of ClickHouse SQL functions currently supported"); - println!( - "See https://clickhouse.com/docs/en/sql-reference/functions for more" - ); - println!(); - for func in function_allow_list().iter() { - println!(" {func}"); - } -} - -fn show_supported_function(name: &str) { - if let Some(func) = function_allow_list().iter().find(|f| f.name == name) { - println!("{}", func.name); - println!(" {}", func.usage); - println!(" {}", func.description); - } else { - println!("No supported function '{name}'"); - } -} - -fn print_sql_query(query: &str) { - println!( - "{}", - sqlformat::format( - &query, - &sqlformat::QueryParams::None, - sqlformat::FormatOptions { uppercase: true, ..Default::default() } - ) - ); - println!(); -} - -fn print_query_metadata(table: &Table, metadata: &QueryMetadata) { - println!("Metadata"); - println!(" Query ID: {}", metadata.id); - println!(" Result rows: {}", table.rows.len()); - println!(" Time: {:?}", metadata.elapsed); - println!(" Read: {}\n", metadata.summary.read); -} - -async fn sql_shell( - address: IpAddr, - port: u16, - log: Logger, - opts: ShellOptions, -) -> anyhow::Result<()> { - let client = make_client(address, port, &log).await?; - - // A workaround to ensure the client has all available timeseries when the - // shell starts. - let dummy = "foo:bar".parse().unwrap(); - let _ = client.schema_for_timeseries(&dummy).await; - - // Possibly just transform the query, but do not execute it. - if let Some(query) = &opts.transform { - let transformed = client.transform_query(query).await?; - println!( - "{}", - sqlformat::format( - &transformed, - &sqlformat::QueryParams::None, - sqlformat::FormatOptions { - uppercase: true, - ..Default::default() - } - ) - ); - return Ok(()); - } - - let mut ed = Reedline::create(); - let prompt = DefaultPrompt::new( - DefaultPromptSegment::Basic("0x".to_string()), - DefaultPromptSegment::Empty, - ); - println!("Oximeter SQL shell"); - println!(); - print_basic_commands(); - loop { - let sig = ed.read_line(&prompt); - match sig { - Ok(Signal::Success(buf)) => { - let cmd = buf.as_str().trim(); - match cmd { - "" => continue, - "\\?" | "\\h" | "help" => print_basic_commands(), - "\\q" | "quit" | "exit" => return Ok(()), - "\\l" | "\\d" => list_virtual_tables(&client).await?, - _ => { - if let Some(table_name) = cmd.strip_prefix("\\d") { - if table_name.is_empty() { - list_virtual_tables(&client).await?; - } else { - describe_virtual_table( - &client, - table_name.trim().trim_end_matches(';'), - ) - .await?; - } - } else if let Some(func_name) = cmd.strip_prefix("\\f") - { - if func_name.is_empty() { - list_supported_functions(); - } else { - show_supported_function( - func_name.trim().trim_end_matches(';'), - ); - } - } else { - match client.query(&buf).await { - Err(e) => println!("Query failed: {e:#?}"), - Ok(QueryResult { - original_query, - rewritten_query, - metadata, - table, - }) => { - println!(); - let mut builder = - tabled::builder::Builder::default(); - builder.push_record(&table.column_names); // first record is the header - for row in table.rows.iter() { - builder.push_record( - row.iter().map(ToString::to_string), - ); - } - if opts.print_original_query { - print_sql_query(&original_query); - } - if opts.print_rewritten_query { - print_sql_query(&rewritten_query); - } - println!( - "{}\n", - builder.build().with( - tabled::settings::Style::psql() - ) - ); - if opts.print_metadata { - print_query_metadata(&table, &metadata); - } - } - } - } - } - } - } - Ok(Signal::CtrlD) => return Ok(()), - Ok(Signal::CtrlC) => continue, - err => println!("err: {err:?}"), - } - } -} - #[tokio::main] async fn main() -> anyhow::Result<()> { usdt::register_probes().context("Failed to register USDT probes")?; @@ -598,6 +327,7 @@ async fn main() -> anyhow::Result<()> { .filter_level(args.log_level) .fuse(); let drain = slog_async::Async::new(drain).build().fuse(); + let drain = slog_dtrace::with_drain(drain).0.fuse(); let log = Logger::root(drain, o!("component" => "oxdb")); match args.cmd { Subcommand::Describe => describe_data(), @@ -636,8 +366,13 @@ async fn main() -> anyhow::Result<()> { ) .await?; } + #[cfg(feature = "sql")] Subcommand::Sql { opts } => { - sql_shell(args.address, args.port, log, opts).await? + crate::sql::sql_shell(args.address, args.port, log, opts).await? + } + #[cfg(feature = "oxql")] + Subcommand::Oxql { opts } => { + crate::oxql::oxql_shell(args.address, args.port, log, opts).await? } } Ok(()) diff --git a/oximeter/db/src/bin/oxdb/oxql.rs b/oximeter/db/src/bin/oxdb/oxql.rs new file mode 100644 index 0000000000..54e40afa15 --- /dev/null +++ b/oximeter/db/src/bin/oxdb/oxql.rs @@ -0,0 +1,333 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! OxQL shell. + +// Copyright 2024 Oxide Computer + +use crate::make_client; +use clap::Args; +use crossterm::style::Stylize; +use dropshot::EmptyScanParams; +use dropshot::WhichPage; +use oximeter_db::oxql::query::special_idents; +use oximeter_db::oxql::Table; +use oximeter_db::Client; +use oximeter_db::OxqlResult; +use reedline::DefaultPrompt; +use reedline::DefaultPromptSegment; +use reedline::Reedline; +use reedline::Signal; +use slog::Logger; +use std::net::IpAddr; + +#[derive(Clone, Debug, Args)] +pub struct ShellOptions { + /// Print summaries of each SQL query run against the database. + #[clap(long = "summaries")] + print_summaries: bool, + /// Print the total elapsed query duration. + #[clap(long = "elapsed")] + print_elapsed: bool, +} + +// Print help for the basic OxQL commands. +fn print_basic_commands() { + println!("Basic commands:"); + println!(" \\?, \\h, help - Print this help"); + println!(" \\q, quit, exit, ^D - Exit the shell"); + println!(" \\l - List timeseries"); + println!(" \\d - Describe a timeseries"); + println!(" \\ql [] - Get OxQL help about an operation"); + println!(); + println!("Or try entering an OxQL `get` query"); +} + +// Print high-level information about OxQL. +fn print_general_oxql_help() { + const HELP: &str = r#"Oximeter Query Language + +The Oximeter Query Language (OxQL) implements queries as +as sequence of operations. Each of these takes zero or more +timeseries as inputs, and produces zero or more timeseries +as outputs. Operations are chained together with the pipe +operator, "|". + +All queries start with a `get` operation, which selects a +timeseries from the database, by name. For example: + +`get physical_data_link:bytes_received` + +The supported timeseries operations are: + +- get: Select a timeseries by name +- filter: Filter timeseries by field or sample values +- group_by: Group timeseries by fields, applying a reducer. +- join: Join two or more timeseries together + +Run `\ql ` to get specific help about that operation. + "#; + println!("{HELP}"); +} + +// Print help for a specific OxQL operation. +fn print_oxql_operation_help(op: &str) { + match op { + "get" => { + const HELP: &str = r#"get "); + +Get instances of a timeseries by name"#; + println!("{HELP}"); + } + "filter" => { + const HELP: &str = r#"filter "); + +Filter timeseries based on their attributes. + can be a logical combination of filtering +\"atoms\", such as `field_foo > 0`. Expressions +may use any of the usual comparison operators, and +can be nested and combined with && or ||. + +Expressions must refer to the name of a field +for a timeseries at this time, and must compare +against literals. For example, `some_field > 0` +is supported, but `some_field > other_field` is not."#; + println!("{HELP}"); + } + "group_by" => { + const HELP: &str = r#"group_by [, ... ] +group_by [, ... ], + +Group timeseries by the named fields, optionally +specifying a reducer to use when aggregating the +timeseries within each group. If no reducer is +specified, `mean` is used, averaging the values +within each group. + +Current supported reducers: + - mean + - sum"#; + println!("{HELP}"); + } + "join" => { + const HELP: &str = r#"join + +Combine 2 or more tables by peforming a natural +inner join, matching up those with fields of the +same value. Currently, joining does not take into +account the timestamps, and does not align the outputs +directly."#; + println!("{HELP}"); + } + _ => eprintln!("unrecognized OxQL operation: '{op}'"), + } +} + +// List the known timeseries. +async fn list_timeseries(client: &Client) -> anyhow::Result<()> { + let mut page = WhichPage::First(EmptyScanParams {}); + let limit = 100.try_into().unwrap(); + loop { + let results = client.timeseries_schema_list(&page, limit).await?; + for schema in results.items.iter() { + println!("{}", schema.timeseries_name); + } + if results.next_page.is_some() { + if let Some(last) = results.items.last() { + page = WhichPage::Next(last.timeseries_name.clone()); + } else { + return Ok(()); + } + } else { + return Ok(()); + } + } +} + +// Describe a single timeseries. +async fn describe_timeseries( + client: &Client, + timeseries: &str, +) -> anyhow::Result<()> { + match timeseries.parse() { + Err(_) => eprintln!( + "Invalid timeseries name '{timeseries}, \ + use \\l to list available timeseries by name + " + ), + Ok(name) => { + if let Some(schema) = client.schema_for_timeseries(&name).await? { + let mut cols = + Vec::with_capacity(schema.field_schema.len() + 2); + let mut types = cols.clone(); + for field in schema.field_schema.iter() { + cols.push(field.name.clone()); + types.push(field.field_type.to_string()); + } + cols.push(special_idents::TIMESTAMP.into()); + types.push(special_idents::DATETIME64.into()); + + if schema.datum_type.is_histogram() { + cols.push(special_idents::START_TIME.into()); + types.push(special_idents::DATETIME64.into()); + + cols.push(special_idents::BINS.into()); + types.push( + special_idents::array_type_name_from_histogram_type( + schema.datum_type, + ) + .unwrap(), + ); + + cols.push(special_idents::COUNTS.into()); + types.push(special_idents::ARRAYU64.into()); + } else if schema.datum_type.is_cumulative() { + cols.push(special_idents::START_TIME.into()); + types.push(special_idents::DATETIME64.into()); + cols.push(special_idents::DATUM.into()); + types.push(schema.datum_type.to_string()); + } else { + cols.push(special_idents::DATUM.into()); + types.push(schema.datum_type.to_string()); + } + + let mut builder = tabled::builder::Builder::default(); + builder.push_record(cols); // first record is the header + builder.push_record(types); + println!( + "{}", + builder.build().with(tabled::settings::Style::psql()) + ); + } else { + eprintln!("No such timeseries: {timeseries}"); + } + } + } + Ok(()) +} + +/// Run the OxQL shell. +pub async fn oxql_shell( + address: IpAddr, + port: u16, + log: Logger, + opts: ShellOptions, +) -> anyhow::Result<()> { + let client = make_client(address, port, &log).await?; + + // A workaround to ensure the client has all available timeseries when the + // shell starts. + let dummy = "foo:bar".parse().unwrap(); + let _ = client.schema_for_timeseries(&dummy).await; + + // Create the line-editor. + let mut ed = Reedline::create(); + let prompt = DefaultPrompt::new( + DefaultPromptSegment::Basic("0x".to_string()), + DefaultPromptSegment::Empty, + ); + println!("Oximeter Query Language shell"); + println!(); + print_basic_commands(); + loop { + let sig = ed.read_line(&prompt); + match sig { + Ok(Signal::Success(buf)) => { + let cmd = buf.as_str().trim(); + match cmd { + "" => continue, + "\\?" | "\\h" | "help" => print_basic_commands(), + "\\q" | "quit" | "exit" => return Ok(()), + "\\l" | "\\d" => list_timeseries(&client).await?, + _ => { + if let Some(timeseries_name) = cmd.strip_prefix("\\d") { + if timeseries_name.is_empty() { + list_timeseries(&client).await?; + } else { + describe_timeseries( + &client, + timeseries_name + .trim() + .trim_end_matches(';'), + ) + .await?; + } + } else if let Some(stmt) = cmd.strip_prefix("\\ql") { + let stmt = stmt.trim(); + if stmt.is_empty() { + print_general_oxql_help(); + } else { + print_oxql_operation_help(stmt); + } + } else { + match client + .oxql_query(cmd.trim().trim_end_matches(';')) + .await + { + Ok(result) => { + print_query_summary( + &result, + opts.print_elapsed, + opts.print_summaries, + ); + print_tables(&result.tables); + } + Err(e) => { + eprintln!("{}", "Error".underlined().red()); + eprintln!("{e}"); + } + } + } + } + } + } + Ok(Signal::CtrlD) => return Ok(()), + Ok(Signal::CtrlC) => continue, + err => eprintln!("err: {err:?}"), + } + } +} + +fn print_query_summary( + result: &OxqlResult, + print_elapsed: bool, + print_summaries: bool, +) { + if !print_elapsed && !print_summaries { + return; + } + println!("{}", "Query summary".underlined().bold()); + println!(" {}: {}", "ID".bold(), result.query_id); + if print_elapsed { + println!(" {}: {:?}\n", "Total duration".bold(), result.total_duration); + } + if print_summaries { + println!(" {}:", "SQL queries".bold()); + for summary in result.query_summaries.iter() { + println!(" {}: {}", "ID".bold(), summary.id); + println!(" {}: {:?}", "Duration".bold(), summary.elapsed); + println!(" {}: {}", "Read".bold(), summary.io_summary.read); + println!(); + } + } +} + +fn print_tables(tables: &[Table]) { + for table in tables.iter() { + println!(); + println!("{}", table.name().underlined().bold()); + for timeseries in table.iter() { + if timeseries.points.is_empty() { + continue; + } + println!(); + for (name, value) in timeseries.fields.iter() { + println!(" {}: {}", name.as_str().bold(), value); + } + for point in timeseries.points.iter_points() { + println!(" {point}"); + } + } + } +} diff --git a/oximeter/db/src/bin/oxdb/sql.rs b/oximeter/db/src/bin/oxdb/sql.rs new file mode 100644 index 0000000000..d50a60f4d7 --- /dev/null +++ b/oximeter/db/src/bin/oxdb/sql.rs @@ -0,0 +1,298 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! SQL shell subcommand for `oxdb`. + +// Copyright 2024 Oxide Computer Company + +use crate::make_client; +use clap::Args; +use dropshot::EmptyScanParams; +use dropshot::WhichPage; +use oximeter_db::sql::function_allow_list; +use oximeter_db::sql::QueryResult; +use oximeter_db::sql::Table; +use oximeter_db::Client; +use oximeter_db::QuerySummary; +use reedline::DefaultPrompt; +use reedline::DefaultPromptSegment; +use reedline::Reedline; +use reedline::Signal; +use slog::Logger; +use std::net::IpAddr; + +fn print_basic_commands() { + println!("Basic commands:"); + println!(" \\?, \\h, help - Print this help"); + println!(" \\q, quit, exit, ^D - Exit the shell"); + println!(" \\l - List tables"); + println!(" \\d
- Describe a table"); + println!( + " \\f - List or describe ClickHouse SQL functions" + ); + println!(); + println!("Or try entering a SQL `SELECT` statement"); +} + +async fn list_virtual_tables(client: &Client) -> anyhow::Result<()> { + let mut page = WhichPage::First(EmptyScanParams {}); + let limit = 100.try_into().unwrap(); + loop { + let results = client.timeseries_schema_list(&page, limit).await?; + for schema in results.items.iter() { + println!("{}", schema.timeseries_name); + } + if results.next_page.is_some() { + if let Some(last) = results.items.last() { + page = WhichPage::Next(last.timeseries_name.clone()); + } else { + return Ok(()); + } + } else { + return Ok(()); + } + } +} + +async fn describe_virtual_table( + client: &Client, + table: &str, +) -> anyhow::Result<()> { + match table.parse() { + Err(_) => println!("Invalid timeseries name: {table}"), + Ok(name) => { + if let Some(schema) = client.schema_for_timeseries(&name).await? { + let mut cols = + Vec::with_capacity(schema.field_schema.len() + 2); + let mut types = cols.clone(); + for field in schema.field_schema.iter() { + cols.push(field.name.clone()); + types.push(field.field_type.to_string()); + } + cols.push("timestamp".into()); + types.push("DateTime64".into()); + + if schema.datum_type.is_histogram() { + cols.push("start_time".into()); + types.push("DateTime64".into()); + + cols.push("bins".into()); + types.push(format!( + "Array[{}]", + schema + .datum_type + .to_string() + .strip_prefix("Histogram") + .unwrap() + .to_lowercase(), + )); + + cols.push("counts".into()); + types.push("Array[u64]".into()); + } else if schema.datum_type.is_cumulative() { + cols.push("start_time".into()); + types.push("DateTime64".into()); + cols.push("datum".into()); + types.push(schema.datum_type.to_string()); + } else { + cols.push("datum".into()); + types.push(schema.datum_type.to_string()); + } + + let mut builder = tabled::builder::Builder::default(); + builder.push_record(cols); // first record is the header + builder.push_record(types); + println!( + "{}", + builder.build().with(tabled::settings::Style::psql()) + ); + } else { + println!("No such timeseries: {table}"); + } + } + } + Ok(()) +} + +#[derive(Clone, Debug, Args)] +pub struct ShellOptions { + /// Print query metadata. + #[clap(long = "metadata")] + print_metadata: bool, + /// Print the original SQL query. + #[clap(long = "original")] + print_original_query: bool, + /// Print the rewritten SQL query that is actually run on the DB. + #[clap(long = "rewritten")] + print_rewritten_query: bool, + /// Print the transformed query, but do not run it. + #[clap(long)] + transform: Option, +} + +impl Default for ShellOptions { + fn default() -> Self { + Self { + print_metadata: true, + print_original_query: false, + print_rewritten_query: false, + transform: None, + } + } +} + +fn list_supported_functions() { + println!("Subset of ClickHouse SQL functions currently supported"); + println!( + "See https://clickhouse.com/docs/en/sql-reference/functions for more" + ); + println!(); + for func in function_allow_list().iter() { + println!(" {func}"); + } +} + +fn show_supported_function(name: &str) { + if let Some(func) = function_allow_list().iter().find(|f| f.name == name) { + println!("{}", func.name); + println!(" {}", func.usage); + println!(" {}", func.description); + } else { + println!("No supported function '{name}'"); + } +} + +fn print_sql_query(query: &str) { + println!( + "{}", + sqlformat::format( + &query, + &sqlformat::QueryParams::None, + sqlformat::FormatOptions { uppercase: true, ..Default::default() } + ) + ); + println!(); +} + +fn print_query_summary(table: &Table, summary: &QuerySummary) { + println!("Summary"); + println!(" Query ID: {}", summary.id); + println!(" Result rows: {}", table.rows.len()); + println!(" Time: {:?}", summary.elapsed); + println!(" Read: {}\n", summary.io_summary.read); +} + +pub async fn sql_shell( + address: IpAddr, + port: u16, + log: Logger, + opts: ShellOptions, +) -> anyhow::Result<()> { + let client = make_client(address, port, &log).await?; + + // A workaround to ensure the client has all available timeseries when the + // shell starts. + let dummy = "foo:bar".parse().unwrap(); + let _ = client.schema_for_timeseries(&dummy).await; + + // Possibly just transform the query, but do not execute it. + if let Some(query) = &opts.transform { + let transformed = client.transform_query(query).await?; + println!( + "{}", + sqlformat::format( + &transformed, + &sqlformat::QueryParams::None, + sqlformat::FormatOptions { + uppercase: true, + ..Default::default() + } + ) + ); + return Ok(()); + } + + let mut ed = Reedline::create(); + let prompt = DefaultPrompt::new( + DefaultPromptSegment::Basic("0x".to_string()), + DefaultPromptSegment::Empty, + ); + println!("Oximeter SQL shell"); + println!(); + print_basic_commands(); + loop { + let sig = ed.read_line(&prompt); + match sig { + Ok(Signal::Success(buf)) => { + let cmd = buf.as_str().trim(); + match cmd { + "" => continue, + "\\?" | "\\h" | "help" => print_basic_commands(), + "\\q" | "quit" | "exit" => return Ok(()), + "\\l" | "\\d" => list_virtual_tables(&client).await?, + _ => { + if let Some(table_name) = cmd.strip_prefix("\\d") { + if table_name.is_empty() { + list_virtual_tables(&client).await?; + } else { + describe_virtual_table( + &client, + table_name.trim().trim_end_matches(';'), + ) + .await?; + } + } else if let Some(func_name) = cmd.strip_prefix("\\f") + { + if func_name.is_empty() { + list_supported_functions(); + } else { + show_supported_function( + func_name.trim().trim_end_matches(';'), + ); + } + } else { + match client.query(&buf).await { + Err(e) => println!("Query failed: {e:#?}"), + Ok(QueryResult { + original_query, + rewritten_query, + summary, + table, + }) => { + println!(); + let mut builder = + tabled::builder::Builder::default(); + builder.push_record(&table.column_names); // first record is the header + for row in table.rows.iter() { + builder.push_record( + row.iter().map(ToString::to_string), + ); + } + if opts.print_original_query { + print_sql_query(&original_query); + } + if opts.print_rewritten_query { + print_sql_query(&rewritten_query); + } + println!( + "{}\n", + builder.build().with( + tabled::settings::Style::psql() + ) + ); + if opts.print_metadata { + print_query_summary(&table, &summary); + } + } + } + } + } + } + } + Ok(Signal::CtrlD) => return Ok(()), + Ok(Signal::CtrlC) => continue, + err => eprintln!("err: {err:?}"), + } + } +} diff --git a/oximeter/db/src/client/dbwrite.rs b/oximeter/db/src/client/dbwrite.rs new file mode 100644 index 0000000000..f21880f314 --- /dev/null +++ b/oximeter/db/src/client/dbwrite.rs @@ -0,0 +1,266 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Implementation of client methods that write to the ClickHouse database. + +// Copyright 2024 Oxide Computer Company + +use crate::client::Client; +use crate::model; +use crate::Error; +use oximeter::Sample; +use oximeter::TimeseriesName; +use slog::debug; +use std::collections::BTreeMap; +use std::collections::BTreeSet; + +#[derive(Debug)] +pub(super) struct UnrolledSampleRows { + /// The timeseries schema rows, keyed by timeseries name. + pub new_schema: BTreeMap, + /// The rows to insert in all the other tables, keyed by the table name. + pub rows: BTreeMap>, +} + +/// A trait allowing a [`Client`] to write data into the timeseries database. +/// +/// The vanilla [`Client`] object allows users to query the timeseries database, returning +/// timeseries samples corresponding to various filtering criteria. This trait segregates the +/// methods required for _writing_ new data into the database, and is intended only for use by the +/// `oximeter-collector` crate. +#[async_trait::async_trait] +pub trait DbWrite { + /// Insert the given samples into the database. + async fn insert_samples(&self, samples: &[Sample]) -> Result<(), Error>; + + /// Initialize the replicated telemetry database, creating tables as needed. + async fn init_replicated_db(&self) -> Result<(), Error>; + + /// Initialize a single node telemetry database, creating tables as needed. + async fn init_single_node_db(&self) -> Result<(), Error>; + + /// Wipe the ClickHouse database entirely from a single node set up. + async fn wipe_single_node_db(&self) -> Result<(), Error>; + + /// Wipe the ClickHouse database entirely from a replicated set up. + async fn wipe_replicated_db(&self) -> Result<(), Error>; +} + +#[async_trait::async_trait] +impl DbWrite for Client { + /// Insert the given samples into the database. + async fn insert_samples(&self, samples: &[Sample]) -> Result<(), Error> { + debug!(self.log, "unrolling {} total samples", samples.len()); + let UnrolledSampleRows { new_schema, rows } = + self.unroll_samples(samples).await; + self.save_new_schema_or_remove(new_schema).await?; + self.insert_unrolled_samples(rows).await + } + + /// Initialize the replicated telemetry database, creating tables as needed. + async fn init_replicated_db(&self) -> Result<(), Error> { + debug!(self.log, "initializing ClickHouse database"); + self.run_many_sql_statements(include_str!(concat!( + env!("CARGO_MANIFEST_DIR"), + "/schema/replicated/db-init.sql" + ))) + .await + } + + /// Wipe the ClickHouse database entirely from a replicated set up. + async fn wipe_replicated_db(&self) -> Result<(), Error> { + debug!(self.log, "wiping ClickHouse database"); + self.run_many_sql_statements(include_str!(concat!( + env!("CARGO_MANIFEST_DIR"), + "/schema/replicated/db-wipe.sql" + ))) + .await + } + + /// Initialize a single node telemetry database, creating tables as needed. + async fn init_single_node_db(&self) -> Result<(), Error> { + debug!(self.log, "initializing ClickHouse database"); + self.run_many_sql_statements(include_str!(concat!( + env!("CARGO_MANIFEST_DIR"), + "/schema/single-node/db-init.sql" + ))) + .await + } + + /// Wipe the ClickHouse database entirely from a single node set up. + async fn wipe_single_node_db(&self) -> Result<(), Error> { + debug!(self.log, "wiping ClickHouse database"); + self.run_many_sql_statements(include_str!(concat!( + env!("CARGO_MANIFEST_DIR"), + "/schema/single-node/db-wipe.sql" + ))) + .await + } +} + +impl Client { + // Unroll each sample into its consituent rows, after verifying the schema. + // + // Note that this also inserts the schema into the internal cache, if it + // does not already exist there. + pub(super) async fn unroll_samples( + &self, + samples: &[Sample], + ) -> UnrolledSampleRows { + let mut seen_timeseries = BTreeSet::new(); + let mut rows = BTreeMap::new(); + let mut new_schema = BTreeMap::new(); + + for sample in samples.iter() { + match self.verify_or_cache_sample_schema(sample).await { + Err(_) => { + // Skip the sample, but otherwise do nothing. The error is logged in the above + // call. + continue; + } + Ok(None) => {} + Ok(Some((name, schema))) => { + debug!( + self.log, + "new timeseries schema"; + "timeseries_name" => %name, + "schema" => %schema + ); + new_schema.insert(name, schema); + } + } + + // Key on both the timeseries name and key, as timeseries may actually share keys. + let key = ( + sample.timeseries_name.as_str(), + crate::timeseries_key(sample), + ); + if !seen_timeseries.contains(&key) { + for (table_name, table_rows) in model::unroll_field_rows(sample) + { + rows.entry(table_name) + .or_insert_with(Vec::new) + .extend(table_rows); + } + } + + let (table_name, measurement_row) = + model::unroll_measurement_row(sample); + + rows.entry(table_name) + .or_insert_with(Vec::new) + .push(measurement_row); + + seen_timeseries.insert(key); + } + + UnrolledSampleRows { new_schema, rows } + } + + // Insert unrolled sample rows into the corresponding tables. + async fn insert_unrolled_samples( + &self, + rows: BTreeMap>, + ) -> Result<(), Error> { + for (table_name, rows) in rows { + let body = format!( + "INSERT INTO {table_name} FORMAT JSONEachRow\n{row_data}\n", + table_name = table_name, + row_data = rows.join("\n") + ); + // TODO-robustness We've verified the schema, so this is likely a transient failure. + // But we may want to check the actual error condition, and, if possible, continue + // inserting any remaining data. + self.execute(body).await?; + debug!( + self.log, + "inserted rows into table"; + "n_rows" => rows.len(), + "table_name" => table_name, + ); + } + + // TODO-correctness We'd like to return all errors to clients here, and there may be as + // many as one per sample. It's not clear how to structure this in a way that's useful. + Ok(()) + } + + // Save new schema to the database, or remove them from the cache on + // failure. + // + // This attempts to insert the provided schema into the timeseries schema + // table. If that fails, those schema are _also_ removed from the internal + // cache. + // + // TODO-robustness There's still a race possible here. If two distinct clients receive new + // but conflicting schema, they will both try to insert those at some point into the schema + // tables. It's not clear how to handle this, since ClickHouse provides no transactions. + // This is unlikely to happen at this point, because the design is such that there will be + // a single `oximeter` instance, which has one client object, connected to a single + // ClickHouse server. But once we start replicating data, the window within which the race + // can occur is much larger, since it includes the time it takes ClickHouse to replicate + // data between nodes. + // + // NOTE: This is an issue even in the case where the schema don't conflict. Two clients may + // receive a sample with a new schema, and both would then try to insert that schema. + pub(super) async fn save_new_schema_or_remove( + &self, + new_schema: BTreeMap, + ) -> Result<(), Error> { + if !new_schema.is_empty() { + debug!( + self.log, + "inserting {} new timeseries schema", + new_schema.len() + ); + const APPROX_ROW_SIZE: usize = 64; + let mut body = String::with_capacity( + APPROX_ROW_SIZE + APPROX_ROW_SIZE * new_schema.len(), + ); + body.push_str("INSERT INTO "); + body.push_str(crate::DATABASE_NAME); + body.push_str(".timeseries_schema FORMAT JSONEachRow\n"); + for row_data in new_schema.values() { + body.push_str(row_data); + body.push('\n'); + } + + // Try to insert the schema. + // + // If this fails, be sure to remove the schema we've added from the + // internal cache. Since we check the internal cache first for + // schema, if we fail here but _don't_ remove the schema, we'll + // never end up inserting the schema, but we will insert samples. + if let Err(e) = self.execute(body).await { + debug!( + self.log, + "failed to insert new schema, removing from cache"; + "error" => ?e, + ); + let mut schema = self.schema.lock().await; + for name in new_schema.keys() { + schema + .remove(name) + .expect("New schema should have been cached"); + } + return Err(e); + } + } + Ok(()) + } + + // Run one or more SQL statements. + // + // This is intended to be used for the methods which run SQL from one of the + // SQL files in the crate, e.g., the DB initialization or update files. + async fn run_many_sql_statements( + &self, + sql: impl AsRef, + ) -> Result<(), Error> { + for stmt in sql.as_ref().split(';').filter(|s| !s.trim().is_empty()) { + self.execute(stmt).await?; + } + Ok(()) + } +} diff --git a/oximeter/db/src/client.rs b/oximeter/db/src/client/mod.rs similarity index 88% rename from oximeter/db/src/client.rs rename to oximeter/db/src/client/mod.rs index abea11aa06..e92518ae08 100644 --- a/oximeter/db/src/client.rs +++ b/oximeter/db/src/client/mod.rs @@ -4,11 +4,19 @@ //! Rust client to ClickHouse database -// Copyright 2023 Oxide Computer Company +// Copyright 2024 Oxide Computer Company +pub(crate) mod dbwrite; +#[cfg(any(feature = "oxql", test))] +pub(crate) mod oxql; +pub(crate) mod query_summary; +#[cfg(any(feature = "sql", test))] +mod sql; + +pub use self::dbwrite::DbWrite; +use crate::client::query_summary::QuerySummary; use crate::model; use crate::query; -use crate::sql::RestrictedQuery; use crate::Error; use crate::Metric; use crate::Target; @@ -18,16 +26,13 @@ use crate::TimeseriesName; use crate::TimeseriesPageSelector; use crate::TimeseriesScanParams; use crate::TimeseriesSchema; -use async_trait::async_trait; use dropshot::EmptyScanParams; use dropshot::PaginationOrder; use dropshot::ResultsPage; use dropshot::WhichPage; -use indexmap::IndexMap; use oximeter::types::Sample; use regex::Regex; use regex::RegexBuilder; -use reqwest::header::HeaderMap; use slog::debug; use slog::error; use slog::info; @@ -44,7 +49,6 @@ use std::ops::Bound; use std::path::Path; use std::path::PathBuf; use std::sync::OnceLock; -use std::time::Duration; use std::time::Instant; use tokio::fs; use tokio::sync::Mutex; @@ -56,139 +60,11 @@ const CLICKHOUSE_DB_VERSION_MISSING: &'static str = #[usdt::provider(provider = "clickhouse_client")] mod probes { - fn query__start(_: &usdt::UniqueId, sql: &str) {} - fn query__done(_: &usdt::UniqueId) {} -} - -/// A count of bytes / rows accessed during a query. -#[derive(Clone, Copy, Debug)] -pub struct IoCount { - pub bytes: u64, - pub rows: u64, -} - -impl std::fmt::Display for IoCount { - fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - write!(f, "{} rows ({} bytes)", self.rows, self.bytes) - } -} - -/// Summary of the I/O and duration of a query. -#[derive(Clone, Copy, Debug, serde::Deserialize)] -#[serde(try_from = "serde_json::Value")] -pub struct QuerySummary { - /// The bytes and rows read by the query. - pub read: IoCount, - /// The bytes and rows written by the query. - pub written: IoCount, -} - -impl TryFrom for QuerySummary { - type Error = Error; - - fn try_from(j: serde_json::Value) -> Result { - use serde_json::Map; - use serde_json::Value; - use std::str::FromStr; - - let Value::Object(map) = j else { - return Err(Error::Database(String::from( - "Expected a JSON object for a metadata summary", - ))); - }; + /// Fires when a SQL query begins, with the query string. + fn sql__query__start(_: &usdt::UniqueId, sql: &str) {} - fn unpack_summary_value( - map: &Map, - key: &str, - ) -> Result - where - T: FromStr, - ::Err: std::error::Error, - { - let value = map.get(key).ok_or_else(|| { - Error::MissingHeaderKey { key: key.to_string() } - })?; - let Value::String(v) = value else { - return Err(Error::BadMetadata { - key: key.to_string(), - msg: String::from("Expected a string value"), - }); - }; - v.parse::().map_err(|e| Error::BadMetadata { - key: key.to_string(), - msg: e.to_string(), - }) - } - let rows_read: u64 = unpack_summary_value(&map, "read_rows")?; - let bytes_read: u64 = unpack_summary_value(&map, "read_bytes")?; - let rows_written: u64 = unpack_summary_value(&map, "written_rows")?; - let bytes_written: u64 = unpack_summary_value(&map, "written_bytes")?; - Ok(Self { - read: IoCount { bytes: bytes_read, rows: rows_read }, - written: IoCount { bytes: bytes_written, rows: rows_written }, - }) - } -} - -/// Basic metadata about the resource usage of a single SQL query. -#[derive(Clone, Copy, Debug)] -pub struct QueryMetadata { - /// The database-assigned query ID. - pub id: Uuid, - /// The total duration of the query (network plus execution). - pub elapsed: Duration, - /// Summary of the data read and written. - pub summary: QuerySummary, -} - -impl QueryMetadata { - fn from_headers( - elapsed: Duration, - headers: &HeaderMap, - ) -> Result { - fn get_header<'a>( - map: &'a HeaderMap, - key: &'a str, - ) -> Result<&'a str, Error> { - let hdr = map.get(key).ok_or_else(|| Error::MissingHeaderKey { - key: key.to_string(), - })?; - std::str::from_utf8(hdr.as_bytes()) - .map_err(|err| Error::Database(err.to_string())) - } - let summary = - serde_json::from_str(get_header(headers, "X-ClickHouse-Summary")?) - .map_err(|err| Error::Database(err.to_string()))?; - let id = get_header(headers, "X-ClickHouse-Query-Id")? - .parse() - .map_err(|err: uuid::Error| Error::Database(err.to_string()))?; - Ok(Self { id, elapsed, summary }) - } -} - -/// A tabular result from a SQL query against a timeseries. -#[derive(Clone, Debug, Default, serde::Serialize)] -pub struct Table { - /// The name of each column in the result set. - pub column_names: Vec, - /// The rows of the result set, one per column. - pub rows: Vec>, -} - -/// The full result of running a SQL query against a timeseries. -#[derive(Clone, Debug)] -pub struct QueryResult { - /// The query as written by the client. - pub original_query: String, - /// The rewritten query, run against the JOINed representation of the - /// timeseries. - /// - /// This is the query that is actually run in the database itself. - pub rewritten_query: String, - /// Metadata about the resource usage of the query. - pub metadata: QueryMetadata, - /// The result of the query, with column names and rows. - pub table: Table, + /// Fires when a SQL query ends, either in success or failure. + fn sql__query__done(_: &usdt::UniqueId) {} } /// A `Client` to the ClickHouse metrics database. @@ -229,76 +105,6 @@ impl Client { Ok(()) } - /// Transform a SQL query against a timeseries, but do not execute it. - pub async fn transform_query( - &self, - query: impl AsRef, - ) -> Result { - let restricted = RestrictedQuery::new(query.as_ref())?; - restricted.to_oximeter_sql(&*self.schema.lock().await) - } - - /// Run a SQL query against a timeseries. - pub async fn query( - &self, - query: impl AsRef, - ) -> Result { - let original_query = query.as_ref().trim_end_matches(';'); - let ox_sql = self.transform_query(original_query).await?; - let rewritten = format!("{ox_sql} FORMAT JSONEachRow"); - debug!( - self.log, - "rewrote restricted query"; - "original_sql" => &original_query, - "rewritten_sql" => &rewritten, - ); - let request = self - .client - .post(&self.url) - .query(&[ - ("output_format_json_quote_64bit_integers", "0"), - ("database", crate::DATABASE_NAME), - ]) - .body(rewritten.clone()); - let query_start = Instant::now(); - let response = handle_db_response( - request - .send() - .await - .map_err(|err| Error::DatabaseUnavailable(err.to_string()))?, - ) - .await?; - let metadata = QueryMetadata::from_headers( - query_start.elapsed(), - response.headers(), - )?; - let text = response.text().await.unwrap(); - let mut table = Table::default(); - for line in text.lines() { - let row = - serde_json::from_str::>( - line.trim(), - ) - .unwrap(); - if table.column_names.is_empty() { - table.column_names.extend(row.keys().cloned()) - } else { - assert!(table - .column_names - .iter() - .zip(row.keys()) - .all(|(k1, k2)| k1 == k2)); - } - table.rows.push(row.into_values().collect()); - } - Ok(QueryResult { - original_query: original_query.to_string(), - rewritten_query: rewritten, - metadata, - table, - }) - } - /// Select timeseries from criteria on the fields and start/end timestamps. pub async fn select_timeseries_with( &self, @@ -348,6 +154,7 @@ impl Client { Some(field_query) => { self.select_matching_timeseries_info(&field_query, &schema) .await? + .1 } None => BTreeMap::new(), }; @@ -367,6 +174,7 @@ impl Client { } } + /// Return a page of timeseries schema from the database. pub async fn list_timeseries( &self, page: &WhichPage, @@ -401,6 +209,7 @@ impl Client { Some(field_query) => { self.select_matching_timeseries_info(&field_query, &schema) .await? + .1 } None => BTreeMap::new(), }; @@ -445,6 +254,7 @@ impl Client { concat!( "SELECT * ", "FROM {}.timeseries_schema ", + "ORDER BY timeseries_name ", "LIMIT {} ", "FORMAT JSONEachRow;", ), @@ -457,6 +267,7 @@ impl Client { concat!( "SELECT * FROM {}.timeseries_schema ", "WHERE timeseries_name > '{}' ", + "ORDER BY timeseries_name ", "LIMIT {} ", "FORMAT JSONEachRow;", ), @@ -466,7 +277,7 @@ impl Client { ) } }; - let body = self.execute_with_body(sql).await?; + let body = self.execute_with_body(sql).await?.1; let schema = body .lines() .map(|line| { @@ -848,14 +659,14 @@ impl Client { ); let version = match self.execute_with_body(sql).await { - Ok(body) if body.is_empty() => { + Ok((_, body)) if body.is_empty() => { warn!( self.log, "no version in database (treated as 'version 0')" ); 0 } - Ok(body) => body.trim().parse::().map_err(|err| { + Ok((_, body)) => body.trim().parse::().map_err(|err| { Error::Database(format!("Cannot read version: {err}")) })?, Err(Error::Database(err)) @@ -895,14 +706,13 @@ impl Client { "INSERT INTO {db_name}.version (*) VALUES ({version}, now());", db_name = crate::DATABASE_NAME, ); - self.execute_with_body(sql).await?; - Ok(()) + self.execute(sql).await } /// Verifies if instance is part of oximeter_cluster pub async fn is_oximeter_cluster(&self) -> Result { let sql = "SHOW CLUSTERS FORMAT JSONEachRow;"; - let res = self.execute_with_body(sql).await?; + let res = self.execute_with_body(sql).await?.1; Ok(res.contains("oximeter_cluster")) } @@ -972,8 +782,9 @@ impl Client { &self, field_query: &str, schema: &TimeseriesSchema, - ) -> Result, Error> { - let body = self.execute_with_body(field_query).await?; + ) -> Result<(QuerySummary, BTreeMap), Error> + { + let (summary, body) = self.execute_with_body(field_query).await?; let mut results = BTreeMap::new(); for line in body.lines() { let row: model::FieldSelectRow = serde_json::from_str(line) @@ -982,7 +793,7 @@ impl Client { model::parse_field_select_row(&row, schema); results.insert(id, (target, metric)); } - Ok(results) + Ok((summary, results)) } // Given information returned from `select_matching_timeseries_info`, select the actual @@ -996,7 +807,8 @@ impl Client { let mut timeseries_by_key = BTreeMap::new(); let keys = info.keys().copied().collect::>(); let measurement_query = query.measurement_query(&keys); - for line in self.execute_with_body(&measurement_query).await?.lines() { + for line in self.execute_with_body(&measurement_query).await?.1.lines() + { let (key, measurement) = model::parse_measurement_from_row(line, schema.datum_type); let timeseries = timeseries_by_key.entry(key).or_insert_with( @@ -1032,7 +844,10 @@ impl Client { // Execute a generic SQL statement, awaiting the response as text // // TODO-robustness This currently does no validation of the statement. - async fn execute_with_body(&self, sql: S) -> Result + async fn execute_with_body( + &self, + sql: S, + ) -> Result<(QuerySummary, String), Error> where S: AsRef, { @@ -1042,24 +857,50 @@ impl Client { "executing SQL query"; "sql" => &sql, ); + + // Run the SQL query itself. + // + // This code gets a bit convoluted, so that we can fire the USDT probe + // in all situations, even when the various fallible operations + // complete. let id = usdt::UniqueId::new(); - probes::query__start!(|| (&id, &sql)); - let response = handle_db_response( - self.client - .post(&self.url) - // See regression test `test_unquoted_64bit_integers` for details. - .query(&[("output_format_json_quote_64bit_integers", "0")]) - .body(sql) - .send() - .await - .map_err(|err| Error::DatabaseUnavailable(err.to_string()))?, - ) - .await? - .text() - .await - .map_err(|err| Error::Database(err.to_string())); - probes::query__done!(|| (&id)); - response + probes::sql__query__start!(|| (&id, &sql)); + let start = Instant::now(); + + // Submit the SQL request itself. + let response = self + .client + .post(&self.url) + .query(&[("output_format_json_quote_64bit_integers", "0")]) + .body(sql) + .send() + .await + .map_err(|err| { + probes::sql__query__done!(|| (&id)); + Error::DatabaseUnavailable(err.to_string()) + })?; + + // Convert the HTTP response into a database response. + let response = handle_db_response(response).await.map_err(|err| { + probes::sql__query__done!(|| (&id)); + err + })?; + + // Extract the query summary, measuring resource usage and duration. + let summary = + QuerySummary::from_headers(start.elapsed(), response.headers()) + .map_err(|err| { + probes::sql__query__done!(|| (&id)); + err + })?; + + // Extract the actual text of the response. + let text = response.text().await.map_err(|err| { + probes::sql__query__done!(|| (&id)); + Error::Database(err.to_string()) + })?; + probes::sql__query__done!(|| (&id)); + Ok((summary, text)) } // Get timeseries schema from the database. @@ -1095,7 +936,7 @@ impl Client { ) } }; - let body = self.execute_with_body(sql).await?; + let body = self.execute_with_body(sql).await?.1; if body.is_empty() { trace!(self.log, "no new timeseries schema in database"); } else { @@ -1113,167 +954,6 @@ impl Client { } Ok(()) } - - // Unroll each sample into its consituent rows, after verifying the schema. - // - // Note that this also inserts the schema into the internal cache, if it - // does not already exist there. - async fn unroll_samples(&self, samples: &[Sample]) -> UnrolledSampleRows { - let mut seen_timeseries = BTreeSet::new(); - let mut rows = BTreeMap::new(); - let mut new_schema = BTreeMap::new(); - - for sample in samples.iter() { - match self.verify_or_cache_sample_schema(sample).await { - Err(_) => { - // Skip the sample, but otherwise do nothing. The error is logged in the above - // call. - continue; - } - Ok(None) => {} - Ok(Some((name, schema))) => { - debug!( - self.log, - "new timeseries schema"; - "timeseries_name" => %name, - "schema" => %schema - ); - new_schema.insert(name, schema); - } - } - - // Key on both the timeseries name and key, as timeseries may actually share keys. - let key = ( - sample.timeseries_name.as_str(), - crate::timeseries_key(&sample), - ); - if !seen_timeseries.contains(&key) { - for (table_name, table_rows) in model::unroll_field_rows(sample) - { - rows.entry(table_name) - .or_insert_with(Vec::new) - .extend(table_rows); - } - } - - let (table_name, measurement_row) = - model::unroll_measurement_row(sample); - - rows.entry(table_name) - .or_insert_with(Vec::new) - .push(measurement_row); - - seen_timeseries.insert(key); - } - - UnrolledSampleRows { new_schema, rows } - } - - // Save new schema to the database, or remove them from the cache on - // failure. - // - // This attempts to insert the provided schema into the timeseries schema - // table. If that fails, those schema are _also_ removed from the internal - // cache. - // - // TODO-robustness There's still a race possible here. If two distinct clients receive new - // but conflicting schema, they will both try to insert those at some point into the schema - // tables. It's not clear how to handle this, since ClickHouse provides no transactions. - // This is unlikely to happen at this point, because the design is such that there will be - // a single `oximeter` instance, which has one client object, connected to a single - // ClickHouse server. But once we start replicating data, the window within which the race - // can occur is much larger, since it includes the time it takes ClickHouse to replicate - // data between nodes. - // - // NOTE: This is an issue even in the case where the schema don't conflict. Two clients may - // receive a sample with a new schema, and both would then try to insert that schema. - async fn save_new_schema_or_remove( - &self, - new_schema: BTreeMap, - ) -> Result<(), Error> { - if !new_schema.is_empty() { - debug!( - self.log, - "inserting {} new timeseries schema", - new_schema.len() - ); - const APPROX_ROW_SIZE: usize = 64; - let mut body = String::with_capacity( - APPROX_ROW_SIZE + APPROX_ROW_SIZE * new_schema.len(), - ); - body.push_str("INSERT INTO "); - body.push_str(crate::DATABASE_NAME); - body.push_str(".timeseries_schema FORMAT JSONEachRow\n"); - for row_data in new_schema.values() { - body.push_str(row_data); - body.push_str("\n"); - } - - // Try to insert the schema. - // - // If this fails, be sure to remove the schema we've added from the - // internal cache. Since we check the internal cache first for - // schema, if we fail here but _don't_ remove the schema, we'll - // never end up inserting the schema, but we will insert samples. - if let Err(e) = self.execute(body).await { - debug!( - self.log, - "failed to insert new schema, removing from cache"; - "error" => ?e, - ); - let mut schema = self.schema.lock().await; - for name in new_schema.keys() { - schema - .remove(name) - .expect("New schema should have been cached"); - } - return Err(e); - } - } - Ok(()) - } - - // Insert unrolled sample rows into the corresponding tables. - async fn insert_unrolled_samples( - &self, - rows: BTreeMap>, - ) -> Result<(), Error> { - for (table_name, rows) in rows { - let body = format!( - "INSERT INTO {table_name} FORMAT JSONEachRow\n{row_data}\n", - table_name = table_name, - row_data = rows.join("\n") - ); - // TODO-robustness We've verified the schema, so this is likely a transient failure. - // But we may want to check the actual error condition, and, if possible, continue - // inserting any remaining data. - self.execute(body).await?; - debug!( - self.log, - "inserted rows into table"; - "n_rows" => rows.len(), - "table_name" => table_name, - ); - } - - // TODO-correctness We'd like to return all errors to clients here, and there may be as - // many as one per sample. It's not clear how to structure this in a way that's useful. - Ok(()) - } - - // Run one or more SQL statements. - // - // This is intended to be used for the methods which run SQL from one of the - // SQL files in the crate, e.g., the DB initialization or update files. - async fn run_many_sql_statements( - &self, - sql: impl AsRef, - ) -> Result<(), Error> { - for stmt in sql.as_ref().split(';').filter(|s| !s.trim().is_empty()) { - self.execute(stmt).await?; - } - Ok(()) - } } // A regex used to validate supported schema updates. @@ -1297,87 +977,6 @@ fn schema_validation_regex() -> &'static Regex { .expect("Invalid regex") }) } - -#[derive(Debug)] -struct UnrolledSampleRows { - // The timeseries schema rows, keyed by timeseries name. - new_schema: BTreeMap, - // The rows to insert in all the other tables, keyed by the table name. - rows: BTreeMap>, -} - -/// A trait allowing a [`Client`] to write data into the timeseries database. -/// -/// The vanilla [`Client`] object allows users to query the timeseries database, returning -/// timeseries samples corresponding to various filtering criteria. This trait segregates the -/// methods required for _writing_ new data into the database, and is intended only for use by the -/// `oximeter-collector` crate. -#[async_trait] -pub trait DbWrite { - /// Insert the given samples into the database. - async fn insert_samples(&self, samples: &[Sample]) -> Result<(), Error>; - - /// Initialize the replicated telemetry database, creating tables as needed. - async fn init_replicated_db(&self) -> Result<(), Error>; - - /// Initialize a single node telemetry database, creating tables as needed. - async fn init_single_node_db(&self) -> Result<(), Error>; - - /// Wipe the ClickHouse database entirely from a single node set up. - async fn wipe_single_node_db(&self) -> Result<(), Error>; - - /// Wipe the ClickHouse database entirely from a replicated set up. - async fn wipe_replicated_db(&self) -> Result<(), Error>; -} - -#[async_trait] -impl DbWrite for Client { - /// Insert the given samples into the database. - async fn insert_samples(&self, samples: &[Sample]) -> Result<(), Error> { - debug!(self.log, "unrolling {} total samples", samples.len()); - let UnrolledSampleRows { new_schema, rows } = - self.unroll_samples(samples).await; - self.save_new_schema_or_remove(new_schema).await?; - self.insert_unrolled_samples(rows).await - } - - /// Initialize the replicated telemetry database, creating tables as needed. - async fn init_replicated_db(&self) -> Result<(), Error> { - debug!(self.log, "initializing ClickHouse database"); - self.run_many_sql_statements(include_str!( - "../schema/replicated/db-init.sql" - )) - .await - } - - /// Wipe the ClickHouse database entirely from a replicated set up. - async fn wipe_replicated_db(&self) -> Result<(), Error> { - debug!(self.log, "wiping ClickHouse database"); - self.run_many_sql_statements(include_str!( - "../schema/replicated/db-wipe.sql" - )) - .await - } - - /// Initialize a single node telemetry database, creating tables as needed. - async fn init_single_node_db(&self) -> Result<(), Error> { - debug!(self.log, "initializing ClickHouse database"); - self.run_many_sql_statements(include_str!( - "../schema/single-node/db-init.sql" - )) - .await - } - - /// Wipe the ClickHouse database entirely from a single node set up. - async fn wipe_single_node_db(&self) -> Result<(), Error> { - debug!(self.log, "wiping ClickHouse database"); - self.run_many_sql_statements(include_str!( - "../schema/single-node/db-wipe.sql" - )) - .await - } -} - // Return Ok if the response indicates success, otherwise return either the reqwest::Error, if this // is a client-side error, or the body of the actual error retrieved from ClickHouse if the error // was generated there. @@ -1397,6 +996,7 @@ async fn handle_db_response( #[cfg(test)] mod tests { + use super::dbwrite::UnrolledSampleRows; use super::*; use crate::model::OXIMETER_VERSION; use crate::query; @@ -1933,7 +1533,7 @@ mod tests { let mut result = String::from(""); let tries = 5; for _ in 0..tries { - result = client_2.execute_with_body(sql.clone()).await.unwrap(); + result = client_2.execute_with_body(sql.clone()).await.unwrap().1; if !result.contains("oximeter") { sleep(Duration::from_secs(1)).await; continue; @@ -1948,21 +1548,21 @@ mod tests { let sql = String::from( "INSERT INTO oximeter.measurements_string (datum) VALUES ('hiya');", ); - let result = client_2.execute_with_body(sql.clone()).await.unwrap(); + let result = client_2.execute_with_body(sql.clone()).await.unwrap().1; info!(log, "Inserted datum to client #2"; "sql" => sql, "result" => result); // Make sure replicas are synched let sql = String::from( "SYSTEM SYNC REPLICA oximeter.measurements_string_local;", ); - let result = client_1.execute_with_body(sql.clone()).await.unwrap(); + let result = client_1.execute_with_body(sql.clone()).await.unwrap().1; info!(log, "Synced replicas via client #1"; "sql" => sql, "result" => result); // Make sure data exists in the other replica let sql = String::from( "SELECT * FROM oximeter.measurements_string FORMAT JSONEachRow;", ); - let result = client_1.execute_with_body(sql.clone()).await.unwrap(); + let result = client_1.execute_with_body(sql.clone()).await.unwrap().1; info!(log, "Retrieved values via client #1"; "sql" => sql, "result" => result.clone()); assert!(result.contains("hiya")); @@ -2124,7 +1724,7 @@ mod tests { let sql = String::from( "SELECT * FROM oximeter.timeseries_schema FORMAT JSONEachRow;", ); - let result = client.execute_with_body(sql).await.unwrap(); + let result = client.execute_with_body(sql).await.unwrap().1; let schema = result .lines() .map(|line| { @@ -2253,7 +1853,8 @@ mod tests { table )) .await - .unwrap(); + .unwrap() + .1; let actual_count = body.lines().next().unwrap().trim().parse::().expect( "Expected a count of the number of rows from ClickHouse", @@ -2301,7 +1902,8 @@ mod tests { "SELECT toUInt64(1) AS foo FORMAT JSONEachRow;".to_string(), ) .await - .unwrap(); + .unwrap() + .1; let json: Value = serde_json::from_str(&output).unwrap(); assert_eq!(json["foo"], Value::Number(1u64.into())); @@ -3167,7 +2769,8 @@ mod tests { let body = client .execute_with_body(select_sql) .await - .expect("Failed to select field row"); + .expect("Failed to select field row") + .1; let actual_row: serde_json::Value = serde_json::from_str(&body) .expect("Failed to parse field row JSON"); println!("{actual_row:?}"); @@ -3507,7 +3110,8 @@ mod tests { let body = client .execute_with_body(select_sql) .await - .expect("Failed to select measurement row"); + .expect("Failed to select measurement row") + .1; let (_, actual_row) = crate::model::parse_measurement_from_row( &body, measurement.datum_type(), @@ -3528,6 +3132,7 @@ mod tests { ) .await .expect("Failed to SELECT from database") + .1 .lines() .count() } @@ -3749,7 +3354,7 @@ mod tests { // one. let response = client.execute_with_body( "SELECT COUNT() FROM oximeter.timeseries_schema FORMAT JSONEachRow; - ").await.unwrap(); + ").await.unwrap().1; assert_eq!(response.lines().count(), 1, "Expected exactly 1 schema"); assert_eq!(client.schema.lock().await.len(), 1); @@ -3766,7 +3371,7 @@ mod tests { // only the one schema. let response = client.execute_with_body( "SELECT COUNT() FROM oximeter.timeseries_schema FORMAT JSONEachRow; - ").await.unwrap(); + ").await.unwrap().1; assert_eq!( response.lines().count(), 1, @@ -3804,7 +3409,7 @@ mod tests { crate::DATABASE_NAME, crate::model::DbDatumType::from(ty), ); - let res = client.execute_with_body(sql).await.unwrap(); + let res = client.execute_with_body(sql).await.unwrap().1; let count = res.trim().parse::().unwrap(); assert_eq!(count, 0); } @@ -4099,7 +3704,8 @@ mod tests { " )) .await - .unwrap(); + .unwrap() + .1; let mut lines = body.lines(); assert_eq!(lines.next().unwrap(), "\"col0\",\"UInt8\""); assert_eq!(lines.next().unwrap(), "\"col1\",\"UInt16\""); @@ -4319,7 +3925,8 @@ mod tests { " )) .await - .unwrap(); + .unwrap() + .1; let mut lines = body.lines(); assert_eq!(lines.next().unwrap(), "\"col0\",\"UInt8\""); assert_eq!(lines.next().unwrap(), "\"col1\",\"UInt16\""); @@ -4480,7 +4087,7 @@ mod tests { crate::DATABASE_NAME, crate::model::DbFieldType::from(ty), ); - let res = client.execute_with_body(sql).await.unwrap(); + let res = client.execute_with_body(sql).await.unwrap().1; let count = res.trim().parse::().unwrap(); assert_eq!(count, 0); } @@ -4488,6 +4095,7 @@ mod tests { logctx.cleanup_successful(); } + #[cfg(any(feature = "sql", test))] #[tokio::test] async fn test_sql_query_output() { let logctx = test_setup_log("test_sql_query_output"); diff --git a/oximeter/db/src/client/oxql.rs b/oximeter/db/src/client/oxql.rs new file mode 100644 index 0000000000..9da4abd007 --- /dev/null +++ b/oximeter/db/src/client/oxql.rs @@ -0,0 +1,1281 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Client methods for running OxQL queries against the timeseries database. + +// Copyright 2024 Oxide Computer Company + +use super::query_summary::QuerySummary; +use crate::client::Client; +use crate::model; +use crate::oxql; +use crate::oxql::ast::table_ops::filter; +use crate::oxql::ast::table_ops::filter::Filter; +use crate::query::field_table_name; +use crate::Error; +use crate::Metric; +use crate::Target; +use crate::TimeseriesKey; +use oximeter::TimeseriesSchema; +use slog::debug; +use slog::trace; +use slog::Logger; +use std::collections::BTreeMap; +use std::time::Duration; +use std::time::Instant; +use uuid::Uuid; + +#[usdt::provider(provider = "clickhouse_client")] +mod probes { + /// Fires when an OxQL query starts, with the query ID and string. + fn oxql__query__start(_: &usdt::UniqueId, _: &Uuid, query: &str) {} + + /// Fires when an OxQL query ends, either in success or failure. + fn oxql__query__done(_: &usdt::UniqueId, _: &Uuid) {} + + /// Fires when an OxQL table operation starts, with the query ID and details + /// of the operation itself. + fn oxql__table__op__start(_: &usdt::UniqueId, _: &Uuid, op: &str) {} + + /// Fires when an OxQL table operation ends. + fn oxql__table__op__done(_: &usdt::UniqueId, _: &Uuid) {} +} + +/// The full result of an OxQL query. +#[derive(Clone, Debug)] +pub struct OxqlResult { + /// A query ID assigned to this OxQL query. + pub query_id: Uuid, + + /// The total duration of the OxQL query. + /// + /// This includes the time to run SQL queries against the database, and the + /// internal processing for each transformation in the query pipeline. + pub total_duration: Duration, + + /// The summary for each SQL query run against the ClickHouse database. + /// + /// Each OxQL query translates into many calls to ClickHouse. We fetch the + /// fields; count the number of samples; and finally fetch the samples + /// themselves. In the future, more may be needed as well. + /// + /// This returns a list of summaries, one for each SQL query that was run. + /// It includes the ClickHouse-assigned query ID for correlation and looking + /// up in the logs. + pub query_summaries: Vec, + + /// The list of OxQL tables returned from the query. + pub tables: Vec, +} + +/// The maximum number of data values fetched from the database for an OxQL +/// query. +// +// The `Client::oxql_query()` API is currently unpaginated. It's also not clear +// _how_ to paginate it. The objects contributing to the size of the returned +// value, the actual data points, are nested several layers deep, inside the +// `Timeseries` and `Table`s. A page size is supposed to refer to the top-level +// object, so we'd need to flatten this hierarchy for that to work. That's +// undesirable because it will lead to a huge amount of duplication of the table +// / timeseries-level information, once for each point. +// +// Also, since we cannot use a cursor-based pagination, we're stuck with +// limit-offset. That means we may need to run substantially all of the query, +// just to know how to retrieve the next page, sidestepping one of the main +// goals of pagination (to limit resource usage). +// +// Note that it's also hard or impossible to _predict_ how much data a query +// will use. We need to count the number of rows in the database, for example, +// _and also_ understand how table operations might change that size. For +// example, alignment is allowed to upsample the data (within limits), so the +// number of rows in the database are not the only factor. +// +// This limit here is a crude attempt to limit just the raw data fetched from +// ClickHouse itself. For any OxQL query, we may retrieve many measurements from +// the database. Each time we do so, we increment a counter, and compare it to +// this. If we exceed it, the whole query fails. +pub const MAX_DATABASE_ROWS: u64 = 1_000_000; + +// When running an OxQL query, we may need to separately run several field +// queries, to get the consistent keys independently for a range of time. +// +// This type stores the predicates used to generate the keys, and the keys +// consistent with it. +struct ConsistentKeyGroup { + predicates: Option, + consistent_keys: BTreeMap, +} + +impl Client { + /// Run a OxQL query. + pub async fn oxql_query( + &self, + query: impl AsRef, + ) -> Result { + // TODO-security: Need a way to implement authz checks for things like + // viewing resources in another project or silo. + // + // I think one way to do that is look at the predicates and make sure + // they refer to things the user has access to. Another is to add some + // implicit predicates here, indicating the subset of fields that the + // query should be able to access. + // + // This probably means we'll need to parse the query in Nexus, so that + // we can attach the other filters ourselves. + // + // See https://github.com/oxidecomputer/omicron/issues/5298. + let query = query.as_ref(); + let parsed_query = oxql::Query::new(query)?; + let query_id = Uuid::new_v4(); + let query_log = + self.log.new(slog::o!("query_id" => query_id.to_string())); + debug!( + query_log, + "parsed OxQL query"; + "query" => query, + "parsed_query" => ?parsed_query, + ); + let id = usdt::UniqueId::new(); + probes::oxql__query__start!(|| (&id, &query_id, query)); + let mut total_rows_fetched = 0; + let result = self + .run_oxql_query( + &query_log, + query_id, + parsed_query, + &mut total_rows_fetched, + None, + ) + .await; + probes::oxql__query__done!(|| (&id, &query_id)); + result + } + + /// Rewrite the predicates from an OxQL query so that they apply only to the + /// field tables. + fn rewrite_predicate_for_fields( + schema: &TimeseriesSchema, + preds: &filter::Filter, + ) -> Result, Error> { + // Walk the set of predicates, keeping those which apply to this schema. + match &preds.expr { + filter::FilterExpr::Simple(inner) => { + // If the predicate names a field in this timeseries schema, + // return that predicate printed as a string. If not, we return + // None. + let Some(field_schema) = + schema.schema_for_field(inner.ident.as_str()) + else { + return Ok(None); + }; + if !inner.value_type_is_compatible_with_field( + field_schema.field_type, + ) { + return Err(Error::from(anyhow::anyhow!( + "Expression for field {} is not compatible with \ + its type {}", + field_schema.name, + field_schema.field_type, + ))); + } + Ok(Some(inner.as_db_safe_string())) + } + filter::FilterExpr::Compound(inner) => { + let left_pred = + Self::rewrite_predicate_for_fields(schema, &inner.left)?; + let right_pred = + Self::rewrite_predicate_for_fields(schema, &inner.right)?; + let out = match (left_pred, right_pred) { + (Some(left), Some(right)) => Some(format!( + "{}({left}, {right})", + inner.op.as_db_function_name() + )), + (Some(single), None) | (None, Some(single)) => Some(single), + (None, None) => None, + }; + Ok(out) + } + } + } + + /// Rewrite the predicates from an OxQL query so that they apply only to the + /// measurement table. + fn rewrite_predicate_for_measurements( + schema: &TimeseriesSchema, + preds: &oxql::ast::table_ops::filter::Filter, + ) -> Result, Error> { + // Walk the set of predicates, keeping those which apply to this schema. + match &preds.expr { + filter::FilterExpr::Simple(inner) => { + // The relevant columns on which we filter depend on the datum + // type of the timeseries. All timeseries support "timestamp". + let ident = inner.ident.as_str(); + if ident == "timestamp" { + if matches!( + inner.value, + oxql::ast::literal::Literal::Timestamp(_) + ) { + return Ok(Some(inner.as_db_safe_string())); + } + return Err(Error::from(anyhow::anyhow!( + "Literal cannot be compared with a timestamp" + ))); + } + + // We do not currently support filtering in the database on + // values, only the `timestamp` and possibly `start_time` (if + // the metric is cumulative). + if ident == "start_time" { + if !schema.datum_type.is_cumulative() { + return Err(Error::from(anyhow::anyhow!( + "Start time can only be compared if the metric \ + is cumulative, but found one of type {}", + schema.datum_type, + ))); + } + if matches!( + inner.value, + oxql::ast::literal::Literal::Timestamp(_) + ) { + return Ok(Some(inner.as_db_safe_string())); + } + return Err(Error::from(anyhow::anyhow!( + "Literal cannot be compared with a timestamp" + ))); + } + + // We'll delegate to the actual table op to filter on any of the + // data columns. + Ok(None) + } + filter::FilterExpr::Compound(inner) => { + let left_pred = Self::rewrite_predicate_for_measurements( + schema, + &inner.left, + )?; + let right_pred = Self::rewrite_predicate_for_measurements( + schema, + &inner.right, + )?; + let out = match (left_pred, right_pred) { + (Some(left), Some(right)) => Some(format!( + "{}({left}, {right})", + inner.op.as_db_function_name() + )), + (Some(single), None) | (None, Some(single)) => Some(single), + (None, None) => None, + }; + Ok(out) + } + } + } + + // Run one query. + // + // If the query is flat, run it directly. If it's nested, run each of them; + // concatenate the results; and then apply all the remaining + // transformations. + #[async_recursion::async_recursion] + async fn run_oxql_query( + &self, + query_log: &Logger, + query_id: Uuid, + query: oxql::Query, + total_rows_fetched: &mut u64, + outer_predicates: Option, + ) -> Result { + let split = query.split(); + if let oxql::ast::SplitQuery::Nested { subqueries, transformations } = + split + { + trace!( + query_log, + "OxQL query contains subqueries, running recursively" + ); + // Create the new set of outer predicates to pass in to the + // subquery, by merging the previous outer predicates with those of + // the transformation portion of this nested query. + let new_outer_predicates = + query.coalesced_predicates(outer_predicates.clone()); + + // Run each subquery recursively, and extend the results + // accordingly. + let mut query_summaries = Vec::with_capacity(subqueries.len()); + let mut tables = Vec::with_capacity(subqueries.len()); + let query_start = Instant::now(); + for subq in subqueries.into_iter() { + let res = self + .run_oxql_query( + query_log, + query_id, + subq, + total_rows_fetched, + new_outer_predicates.clone(), + ) + .await?; + query_summaries.extend(res.query_summaries); + tables.extend(res.tables); + } + for tr in transformations.into_iter() { + trace!( + query_log, + "applying query transformation"; + "transformation" => ?tr, + ); + let id = usdt::UniqueId::new(); + probes::oxql__table__op__start!(|| ( + &id, + &query_id, + format!("{tr:?}") + )); + let new_tables = tr.apply(&tables, query.end_time()); + probes::oxql__table__op__done!(|| (&id, &query_id)); + tables = new_tables?; + } + let result = OxqlResult { + query_id, + total_duration: query_start.elapsed(), + query_summaries, + tables, + }; + return Ok(result); + } + + // This is a flat query, let's just run it directly. First step is + // getting the schema itself. + let query_start = Instant::now(); + let oxql::ast::SplitQuery::Flat(query) = split else { + unreachable!(); + }; + let name = query.timeseries_name(); + let Some(schema) = self.schema_for_timeseries(name).await? else { + return Err(Error::TimeseriesNotFound(name.to_string())); + }; + debug!( + query_log, + "running flat OxQL query"; + "query" => ?query, + "timeseries_name" => %name, + ); + + // Fetch the consistent fields (including keys) for this timeseries, + // including filtering them based on the predicates in the query + // that apply to this timeseries in particular. We also need to merge + // them in with the predicates passed in from a possible outer query. + let preds = query.coalesced_predicates(outer_predicates.clone()); + debug!( + query_log, + "coalesced predicates from flat query"; + "outer_predicates" => ?&outer_predicates, + "coalesced" => ?&preds, + ); + + // We generally run a few SQL queries for each OxQL query: + // + // - Some number of queries to fetch the timeseries keys that are + // consistent with it. + // - Fetch the consistent samples. + // + // Note that there are often 2 or more queries needed for the first + // case. In particular, there is one query required for each independent + // time range in the query (including when a time range isn't + // specified). + // + // For example, consider the filter operation: + // + // ``` + // filter some_predicate || (timestamp > @now() - 1m && other_predicate) + // ``` + // + // That is, we return all timepoints for things where `some_predicate` + // is true, and only the last minute for those satisfying + // `other_predicate`. If we simply drop the timestamp filter, and run + // the two predicates conjoined, we would erroneously return only the + // last minute for everything, including those satisfying + // `some_predicate`. + // + // So instead, we need to run one query for each of those, fetch the + // keys associated with it, and then independently select the + // measurements satisfying both the time range and key-consistency + // constraints. Thankfully that can be done in one query, albeit a + // complicated one. + // + // Convert any outer predicates to DNF, and split into disjoint key + // groups for the measurement queries. + let disjoint_predicates = if let Some(preds) = preds.as_ref() { + let simplified = preds.simplify_to_dnf()?; + debug!( + query_log, + "simplified filtering predicates to disjunctive normal form"; + "original" => %preds, + "DNF" => %simplified, + ); + simplified + .flatten_disjunctions() + .into_iter() + .map(Option::Some) + .collect() + } else { + // There are no outer predicates, so we have 1 disjoint key group, + // with no predicates. + vec![None] + }; + + // Run each query group indepdendently, keeping the predicates and the + // timeseries keys corresponding to it. + let mut consistent_key_groups = + Vec::with_capacity(1 + disjoint_predicates.len()); + let mut query_summaries = + Vec::with_capacity(1 + disjoint_predicates.len()); + for predicates in disjoint_predicates.into_iter() { + debug!( + query_log, + "running disjoint query predicate"; + "predicate" => predicates.as_ref().map(|s| s.to_string()).unwrap_or("none".into()), + ); + let all_fields_query = + self.all_fields_query(&schema, predicates.as_ref())?; + let (summary, consistent_keys) = self + .select_matching_timeseries_info(&all_fields_query, &schema) + .await?; + debug!( + query_log, + "fetched information for matching timeseries keys"; + "n_keys" => consistent_keys.len(), + ); + query_summaries.push(summary); + + // If there are no consistent keys, move to the next independent + // query chunk. + if consistent_keys.is_empty() { + continue; + } + + // Push the disjoint filter itself, plus the keys consistent with + // it. + consistent_key_groups + .push(ConsistentKeyGroup { predicates, consistent_keys }); + } + + // If there are no consistent keys _at all_, we can just return an empty + // table. + if consistent_key_groups.is_empty() { + let result = OxqlResult { + query_id, + total_duration: query_start.elapsed(), + query_summaries, + tables: vec![oxql::Table::new(schema.timeseries_name.as_str())], + }; + return Ok(result); + } + + // Fetch the consistent measurements for this timeseries, by key group. + // + // We'll keep track of all the measurements for this timeseries schema, + // organized by timeseries key. That's because we fetch all consistent + // samples at once, so we get many concrete _timeseries_ in the returned + // response, even though they're all from the same schema. + let (summary, timeseries_by_key) = self + .select_matching_samples( + query_log, + &schema, + &consistent_key_groups, + total_rows_fetched, + ) + .await?; + query_summaries.push(summary); + + // At this point, let's construct a set of tables and run the results + // through the transformation pipeline. + let mut tables = vec![oxql::Table::from_timeseries( + schema.timeseries_name.as_str(), + timeseries_by_key.into_values(), + )?]; + + let transformations = query.transformations(); + debug!( + query_log, + "constructed OxQL table, starting transformation pipeline"; + "name" => tables[0].name(), + "n_timeseries" => tables[0].n_timeseries(), + "n_transformations" => transformations.len(), + ); + for tr in transformations { + trace!( + query_log, + "applying query transformation"; + "transformation" => ?tr, + ); + let id = usdt::UniqueId::new(); + probes::oxql__table__op__start!(|| ( + &id, + &query_id, + format!("{tr:?}") + )); + let new_tables = tr.apply(&tables, query.end_time()); + probes::oxql__table__op__done!(|| (&id, &query_id)); + tables = new_tables?; + } + let result = OxqlResult { + query_id, + total_duration: query_start.elapsed(), + query_summaries, + tables, + }; + Ok(result) + } + + // Select samples matching the set of predicates and consistent keys. + // + // Note that this also implements the conversion from cumulative to gauge + // samples, depending on how data was requested. + async fn select_matching_samples( + &self, + query_log: &Logger, + schema: &TimeseriesSchema, + consistent_key_groups: &[ConsistentKeyGroup], + total_rows_fetched: &mut u64, + ) -> Result<(QuerySummary, BTreeMap), Error> + { + // We'll create timeseries for each key on the fly. To enable computing + // deltas, we need to track the last measurement we've seen as well. + let mut measurements_by_key: BTreeMap<_, Vec<_>> = BTreeMap::new(); + let measurements_query = self.measurements_query( + schema, + consistent_key_groups, + total_rows_fetched, + )?; + let mut n_measurements: u64 = 0; + let (summary, body) = + self.execute_with_body(&measurements_query).await?; + for line in body.lines() { + let (key, measurement) = + model::parse_measurement_from_row(line, schema.datum_type); + measurements_by_key.entry(key).or_default().push(measurement); + n_measurements += 1; + } + debug!( + query_log, + "fetched measurements for OxQL query"; + "n_keys" => measurements_by_key.len(), + "n_measurements" => n_measurements, + ); + + // At this point, we need to check that we're still within our maximum + // result size. The measurement query we issued limited the returned + // result to 1 more than the remainder on our allotment. So if we get + // exactly that limit, we know that there are more rows than we can + // allow. We don't know how many more, but we don't care, and we fail + // the query regardless. + update_total_rows_and_check( + query_log, + total_rows_fetched, + n_measurements, + )?; + + // At this point, we no longer care about the consistent_key groups. We + // throw away the predicates that distinguished them, and merge the + // timeseries information together. + let info = consistent_key_groups + .iter() + .map(|group| group.consistent_keys.clone()) + .reduce(|mut acc, current| { + acc.extend(current); + acc + }) + .expect("Should have at least one key-group for every query"); + + // Remove the last measurement, returning just the keys and timeseries. + let mut out = BTreeMap::new(); + for (key, measurements) in measurements_by_key.into_iter() { + // Constuct a new timeseries, from the target/metric info. + let (target, metric) = info.get(&key).unwrap(); + let mut timeseries = oxql::Timeseries::new( + target + .fields + .iter() + .chain(metric.fields.iter()) + .map(|field| (field.name.clone(), field.value.clone())), + oxql::point::DataType::try_from(schema.datum_type)?, + if schema.datum_type.is_cumulative() { + oxql::point::MetricType::Delta + } else { + oxql::point::MetricType::Gauge + }, + )?; + + // Covert its oximeter measurements into OxQL data types. + let points = if schema.datum_type.is_cumulative() { + oxql::point::Points::delta_from_cumulative(&measurements)? + } else { + oxql::point::Points::gauge_from_gauge(&measurements)? + }; + timeseries.points = points; + debug!( + query_log, + "inserted new OxQL timeseries"; + "key" => key, + "metric_type" => ?timeseries.points.metric_type(), + "n_points" => timeseries.points.len(), + ); + out.insert(key, timeseries); + } + Ok((summary, out)) + } + + fn measurements_query( + &self, + schema: &TimeseriesSchema, + consistent_key_groups: &[ConsistentKeyGroup], + total_rows_fetched: &mut u64, + ) -> Result { + use std::fmt::Write; + + // Build the base query, which just selects the timeseries by name based + // on the datum type. + let mut query = self.measurements_query_raw(schema.datum_type); + query.push_str(" WHERE timeseries_name = '"); + write!(query, "{}", schema.timeseries_name).unwrap(); + query.push('\''); + + // Filter down the fields to those which apply to the data itself, which + // includes the timestamps and data values. The supported fields here + // depend on the datum type. + // + // We join all the consistent key groups with OR, which mirrors how they + // were split originally. + let all_predicates = consistent_key_groups + .iter() + .map(|group| { + // Write out the predicates on the measurements themselves, + // which really refers to the timestamps (and possibly start + // times). + let maybe_predicates = group + .predicates + .as_ref() + .map(|preds| { + Self::rewrite_predicate_for_measurements(schema, preds) + }) + .transpose()? + .flatten(); + + // Push the predicate that selects the timeseries keys, which + // are unique to this group. + let maybe_key_set = if group.consistent_keys.len() > 0 { + let mut chunk = String::from("timeseries_key IN ("); + let keys = group + .consistent_keys + .keys() + .map(ToString::to_string) + .collect::>() + .join(","); + chunk.push_str(&keys); + chunk.push(')'); + Some(chunk) + } else { + None + }; + + let chunk = match (maybe_predicates, maybe_key_set) { + (Some(preds), None) => preds, + (None, Some(key_set)) => key_set, + (Some(preds), Some(key_set)) => { + format!("({preds} AND {key_set})") + } + (None, None) => String::new(), + }; + Ok(chunk) + }) + .collect::, Error>>()? + .join(" OR "); + if !all_predicates.is_empty() { + query.push_str(" AND ("); + query.push_str(&all_predicates); + query.push(')'); + } + + // Always impose a strong order on these fields. + // + // The tables are all sorted by: + // + // - timeseries_name + // - timeseries_key + // - start_time, if present + // - timestamp + // + // We care most about the timestamp ordering, since that is assumed (and + // asserted) by downstream table operations. We use the full sort order + // of the table, however, to make things the most efficient. + query.push_str(" ORDER BY timeseries_key"); + if schema.datum_type.is_cumulative() { + query.push_str(", start_time"); + } + query.push_str(", timestamp"); + + // Push a limit clause, which restricts the number of records we could + // return. + // + // This is used to ensure that we never go above the limit in + // `MAX_RESULT_SIZE`. That restricts the _total_ number of rows we want + // to retch from the database. So we set our limit to be one more than + // the remainder on our allotment. If we get exactly as many as we set + // in the limit, then we fail the query because there are more rows that + // _would_ be returned. We don't know how many more, but there is at + // least 1 that pushes us over the limit. This prevents tricky + // TOCTOU-like bugs where we need to check the limit twice, and improves + // performance, since we don't return much more than we could possibly + // handle. + let remainder = MAX_DATABASE_ROWS - *total_rows_fetched; + query.push_str(" LIMIT "); + write!(query, "{}", remainder + 1).unwrap(); + + // Finally, use JSON format. + query.push_str(" FORMAT "); + query.push_str(crate::DATABASE_SELECT_FORMAT); + Ok(query) + } + + fn measurements_query_raw( + &self, + datum_type: oximeter::DatumType, + ) -> String { + let value_columns = if datum_type.is_histogram() { + "timeseries_key, start_time, timestamp, bins, counts" + } else if datum_type.is_cumulative() { + "timeseries_key, start_time, timestamp, datum" + } else { + "timeseries_key, timestamp, datum" + }; + format!( + "SELECT {} \ + FROM {}.{}", + value_columns, + crate::DATABASE_NAME, + crate::query::measurement_table_name(datum_type), + ) + } + + fn all_fields_query( + &self, + schema: &TimeseriesSchema, + preds: Option<&oxql::ast::table_ops::filter::Filter>, + ) -> Result { + // Filter down the fields to those which apply to this timeseries + // itself, and rewrite as a DB-safe WHERE clause. + let preds_for_fields = preds + .map(|p| Self::rewrite_predicate_for_fields(schema, p)) + .transpose()? + .flatten(); + let (already_has_where, mut query) = self.all_fields_query_raw(schema); + if let Some(preds) = preds_for_fields { + // If the raw field has only a single select query, then we've + // already added a "WHERE" clause. Simply tack these predicates onto + // that one. + if already_has_where { + query.push_str(" AND "); + } else { + query.push_str(" WHERE "); + } + query.push_str(&preds); + } + query.push_str(" FORMAT "); + query.push_str(crate::DATABASE_SELECT_FORMAT); + Ok(query) + } + + fn all_fields_query_raw( + &self, + schema: &TimeseriesSchema, + ) -> (bool, String) { + match schema.field_schema.len() { + 0 => unreachable!(), + 1 => { + let field_schema = schema.field_schema.first().unwrap(); + ( + true, + format!( + "SELECT DISTINCT timeseries_key, field_value AS {field_name} \ + FROM {db_name}.{field_table} \ + WHERE \ + timeseries_name = '{timeseries_name}' AND \ + field_name = '{field_name}'", + field_name = field_schema.name, + db_name = crate::DATABASE_NAME, + field_table = field_table_name(field_schema.field_type), + timeseries_name = schema.timeseries_name, + ) + ) + } + _ => { + let mut top_level_columns = + Vec::with_capacity(schema.field_schema.len()); + let mut field_subqueries = + Vec::with_capacity(schema.field_schema.len()); + + // Select each field value, aliasing it to its field name. + for field_schema in schema.field_schema.iter() { + top_level_columns.push(format!( + "filter_on_{}.field_value AS {}", + field_schema.name, field_schema.name + )); + field_subqueries.push(( + format!( + "SELECT DISTINCT timeseries_key, field_value \ + FROM {db_name}.{field_table} \ + WHERE \ + timeseries_name = '{timeseries_name}' AND \ + field_name = '{field_name}' \ + ", + db_name = crate::DATABASE_NAME, + field_table = + field_table_name(field_schema.field_type), + timeseries_name = schema.timeseries_name, + field_name = field_schema.name, + ), + format!("filter_on_{}", field_schema.name), + )); + } + + // Write the top-level select statement, starting by selecting + // the timeseries key from the first field schema. + let mut out = format!( + "SELECT {}.timeseries_key AS timeseries_key, {} FROM ", + field_subqueries[0].1, + top_level_columns.join(", "), + ); + + // Then add all the subqueries selecting each field. + // + // We need to add these, along with their aliases. The first + // such subquery has no join conditions, but the later ones all + // refer to the previous via: + // + // `ON .timeseries_key = .timeseries_key` + for (i, (subq, alias)) in field_subqueries.iter().enumerate() { + // Push the subquery itself, aliased. + out.push('('); + out.push_str(subq); + out.push_str(") AS "); + out.push_str(alias); + + // Push the join conditions. + if i > 0 { + let previous_alias = &field_subqueries[i - 1].1; + out.push_str(" ON "); + out.push_str(alias); + out.push_str(".timeseries_key = "); + out.push_str(previous_alias); + out.push_str(".timeseries_key"); + } + + // Push the "INNER JOIN" expression itself, for all but the + // last subquery. + if i < field_subqueries.len() - 1 { + out.push_str(" INNER JOIN "); + } + } + (false, out) + } + } + } +} + +// Helper to update the number of total rows fetched so far, and check it's +// still under the limit. +fn update_total_rows_and_check( + query_log: &Logger, + total_rows_fetched: &mut u64, + count: u64, +) -> Result<(), Error> { + *total_rows_fetched += count; + if *total_rows_fetched > MAX_DATABASE_ROWS { + return Err(Error::from(anyhow::anyhow!( + "Query requires fetching more than the \ + current limit of {} data points from the \ + timeseries database", + MAX_DATABASE_ROWS, + ))); + } + trace!( + query_log, + "verified OxQL measurement query returns few enough results"; + "n_new_measurements" => count, + "n_total" => *total_rows_fetched, + "limit" => MAX_DATABASE_ROWS, + ); + Ok(()) +} + +#[cfg(test)] +mod tests { + use chrono::{DateTime, Utc}; + use dropshot::test_util::LogContext; + use omicron_test_utils::dev::clickhouse::ClickHouseInstance; + use omicron_test_utils::dev::test_setup_log; + use oximeter::Sample; + use oximeter::{types::Cumulative, FieldValue}; + use std::collections::BTreeMap; + use std::time::Duration; + + use crate::{ + oxql::{point::Points, Table, Timeseries}, + Client, DbWrite, + }; + + #[derive( + Clone, Debug, Eq, PartialEq, PartialOrd, Ord, oximeter::Target, + )] + struct SomeTarget { + name: String, + index: u32, + } + + #[derive(Clone, Debug, oximeter::Metric)] + struct SomeMetric { + foo: i32, + datum: Cumulative, + } + + #[derive(Clone, Debug)] + #[allow(dead_code)] + struct TestData { + targets: Vec, + // Note that we really want all the samples per metric _field_, not the + // full metric. That would give us a 1-element sample array for each. + samples_by_timeseries: BTreeMap<(SomeTarget, i32), Vec>, + first_timestamp: DateTime, + } + + struct TestContext { + logctx: LogContext, + clickhouse: ClickHouseInstance, + client: Client, + test_data: TestData, + } + + impl TestContext { + async fn cleanup_successful(mut self) { + self.clickhouse + .cleanup() + .await + .expect("Failed to cleanup ClickHouse server"); + self.logctx.cleanup_successful(); + } + } + + const N_SAMPLES_PER_TIMESERIES: usize = 16; + const SAMPLE_INTERVAL: Duration = Duration::from_secs(1); + const SHIFT: Duration = Duration::from_secs(1); + + fn format_timestamp(t: DateTime) -> String { + format!("{}", t.format("%Y-%m-%dT%H:%M:%S.%f")) + } + + fn generate_test_samples() -> TestData { + // We'll test with 4 different targets, each with two values for its + // fields. + let mut targets = Vec::with_capacity(4); + let names = &["first-target", "second-target"]; + let indices = 1..3; + for (name, index) in itertools::iproduct!(names, indices) { + let target = SomeTarget { name: name.to_string(), index }; + targets.push(target); + } + + // Create a start time for all samples. + // + // IMPORTANT: There is a TTL of 30 days on all data currently. I would + // love this to be a fixed, well-known start time, to make tests easier, + // but that's in conflict with the TTL. Instead, we'll use midnight on + // the current day, and then store it in the test data context. + let first_timestamp = + Utc::now().date_naive().and_hms_opt(0, 0, 0).unwrap().and_utc(); + + // For simplicity, we'll also assume all the cumulative measurements + // start at the first timestamp as well. + let datum = Cumulative::with_start_time(first_timestamp, 0); + + // We'll create two separate metrics, with 16 samples each. + let foos = [-1, 1]; + let mut samples_by_timeseries = BTreeMap::new(); + let mut timeseries_index = 0; + for target in targets.iter() { + for foo in foos.iter() { + // Shift this timeseries relative to the others, to ensure we + // have some different timestamps. + let timeseries_start = + first_timestamp + timeseries_index * SHIFT; + + // Create the first metric, starting from a count of 0. + let mut metric = SomeMetric { foo: *foo, datum }; + + // Create all the samples,, incrementing the datum and sample + // time. + for i in 0..N_SAMPLES_PER_TIMESERIES { + let sample_time = + timeseries_start + SAMPLE_INTERVAL * i as u32; + let sample = Sample::new_with_timestamp( + sample_time, + target, + &metric, + ) + .unwrap(); + samples_by_timeseries + .entry((target.clone(), *foo)) + .or_insert_with(|| { + Vec::with_capacity(N_SAMPLES_PER_TIMESERIES) + }) + .push(sample); + metric.datum += 1; + } + timeseries_index += 1; + } + } + TestData { targets, samples_by_timeseries, first_timestamp } + } + + async fn setup_oxql_test(name: &str) -> TestContext { + let logctx = test_setup_log(name); + let db = ClickHouseInstance::new_single_node(&logctx, 0) + .await + .expect("Failed to start ClickHouse"); + let client = Client::new(db.address, &logctx.log); + client + .init_single_node_db() + .await + .expect("Failed to init single-node oximeter database"); + let test_data = generate_test_samples(); + let samples: Vec<_> = test_data + .samples_by_timeseries + .values() + .flatten() + .cloned() + .collect(); + client + .insert_samples(&samples) + .await + .expect("Failed to insert test data"); + TestContext { logctx, clickhouse: db, client, test_data } + } + + #[tokio::test] + async fn test_get_entire_table() { + let ctx = setup_oxql_test("test_get_entire_table").await; + let query = "get some_target:some_metric"; + let result = ctx + .client + .oxql_query(query) + .await + .expect("failed to run OxQL query"); + assert_eq!(result.tables.len(), 1, "Should be exactly 1 table"); + let table = result.tables.get(0).unwrap(); + assert_eq!( + table.n_timeseries(), + ctx.test_data.samples_by_timeseries.len(), + "Should have fetched every timeseries" + ); + assert!( + table.iter().all(|t| t.points.len() == N_SAMPLES_PER_TIMESERIES), + "Should have fetched all points for all timeseries" + ); + + // Let's build the expected point array, from each timeseries we + // inserted. + let mut matched_timeseries = 0; + for ((target, foo), samples) in + ctx.test_data.samples_by_timeseries.iter() + { + let measurements: Vec<_> = + samples.iter().map(|s| s.measurement.clone()).collect(); + let expected_points = Points::delta_from_cumulative(&measurements) + .expect( + "failed to create expected points from inserted measurements", + ); + let expected_timeseries = + find_timeseries_in_table(&table, target, foo) + .expect("Table did not contain an expected timeseries"); + assert_eq!( + expected_timeseries.points, expected_points, + "Did not reconstruct the correct points for this timeseries" + ); + matched_timeseries += 1; + } + assert_eq!(matched_timeseries, table.len()); + assert_eq!( + matched_timeseries, + ctx.test_data.samples_by_timeseries.len() + ); + + ctx.cleanup_successful().await; + } + + #[tokio::test] + async fn test_get_one_timeseries() { + let ctx = setup_oxql_test("test_get_one_timeseries").await; + + // Specify exactly one timeseries we _want_ to fetch, by picking the + // first timeseries we inserted. + let ((expected_target, expected_foo), expected_samples) = + ctx.test_data.samples_by_timeseries.first_key_value().unwrap(); + let query = format!( + "get some_target:some_metric | filter {}", + exact_filter_for(expected_target, *expected_foo) + ); + let result = ctx + .client + .oxql_query(&query) + .await + .expect("failed to run OxQL query"); + assert_eq!(result.tables.len(), 1, "Should be exactly 1 table"); + let table = result.tables.get(0).unwrap(); + assert_eq!( + table.n_timeseries(), + 1, + "Should have fetched exactly the target timeseries" + ); + assert!( + table.iter().all(|t| t.points.len() == N_SAMPLES_PER_TIMESERIES), + "Should have fetched all points for all timeseries" + ); + + let expected_timeseries = + find_timeseries_in_table(&table, expected_target, expected_foo) + .expect("Table did not contain expected timeseries"); + let measurements: Vec<_> = + expected_samples.iter().map(|s| s.measurement.clone()).collect(); + let expected_points = Points::delta_from_cumulative(&measurements) + .expect("failed to build expected points from measurements"); + assert_eq!( + expected_points, expected_timeseries.points, + "Did not reconstruct the correct points for the one \ + timeseries the query fetched" + ); + + ctx.cleanup_successful().await; + } + + // In this test, we'll fetch the entire history of one timeseries, and only + // the last few samples of another. + // + // This checks that we correctly do complex logical operations that require + // fetching different sets of fields at different times. + #[tokio::test] + async fn test_get_entire_timeseries_and_part_of_another() { + usdt::register_probes().unwrap(); + let ctx = + setup_oxql_test("test_get_entire_timeseries_and_part_of_another") + .await; + + let mut it = ctx.test_data.samples_by_timeseries.iter(); + let (entire, only_part) = (it.next().unwrap(), it.next().unwrap()); + + let entire_filter = exact_filter_for(&entire.0 .0, entire.0 .1); + let only_part_filter = + exact_filter_for(&only_part.0 .0, only_part.0 .1); + let start_timestamp = only_part.1[6].measurement.timestamp(); + let only_part_timestamp_filter = format_timestamp(start_timestamp); + + let query = format!( + "get some_target:some_metric | filter ({}) || (timestamp >= @{} && {})", + entire_filter, + only_part_timestamp_filter, + only_part_filter, + ); + let result = ctx + .client + .oxql_query(&query) + .await + .expect("failed to run OxQL query"); + assert_eq!(result.tables.len(), 1, "Should be exactly 1 table"); + let table = result.tables.get(0).unwrap(); + assert_eq!( + table.n_timeseries(), + 2, + "Should have fetched exactly the two target timeseries" + ); + + // Check that we fetched the entire timeseries for the first one. + let expected_timeseries = + find_timeseries_in_table(table, &entire.0 .0, &entire.0 .1) + .expect("failed to fetch all of the first timeseries"); + let measurements: Vec<_> = + entire.1.iter().map(|s| s.measurement.clone()).collect(); + let expected_points = Points::delta_from_cumulative(&measurements) + .expect("failed to build expected points"); + assert_eq!( + expected_timeseries.points, expected_points, + "Did not collect the entire set of points for the first timeseries", + ); + + // And that we only get the last portion of the second timeseries. + let expected_timeseries = + find_timeseries_in_table(table, &only_part.0 .0, &only_part.0 .1) + .expect("failed to fetch part of the second timeseries"); + let measurements: Vec<_> = only_part + .1 + .iter() + .filter_map(|sample| { + let meas = &sample.measurement; + if meas.timestamp() >= start_timestamp { + Some(meas.clone()) + } else { + None + } + }) + .collect(); + let expected_points = Points::delta_from_cumulative(&measurements) + .expect("failed to build expected points"); + assert_eq!( + expected_timeseries.points, expected_points, + "Did not collect the last few points for the second timeseries", + ); + + ctx.cleanup_successful().await; + } + + // Return an OxQL filter item that will exactly select the provided + // timeseries by its target / metric. + fn exact_filter_for(target: &SomeTarget, foo: i32) -> String { + format!( + "name == '{}' && index == {} && foo == {}", + target.name, target.index, foo, + ) + } + + // Given a table from an OxQL query, look up the timeseries for the inserted + // target / metric, if it exists + fn find_timeseries_in_table<'a>( + table: &'a Table, + target: &'a SomeTarget, + foo: &'a i32, + ) -> Option<&'a Timeseries> { + for timeseries in table.iter() { + let fields = ×eries.fields; + + // Look up each field in turn, and compare it. + let FieldValue::String(val) = fields.get("name")? else { + unreachable!(); + }; + if val != &target.name { + continue; + } + let FieldValue::U32(val) = fields.get("index")? else { + unreachable!(); + }; + if val != &target.index { + continue; + } + let FieldValue::I32(val) = fields.get("foo")? else { + unreachable!(); + }; + if val != foo { + continue; + } + + // We done matched it. + return Some(timeseries); + } + None + } +} diff --git a/oximeter/db/src/client/query_summary.rs b/oximeter/db/src/client/query_summary.rs new file mode 100644 index 0000000000..b00a11c38e --- /dev/null +++ b/oximeter/db/src/client/query_summary.rs @@ -0,0 +1,123 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Types representing summaries of queries against the timeseries database. + +// Copyright 2024 Oxide Computer Company + +use crate::Error; +use reqwest::header::HeaderMap; +use schemars::JsonSchema; +use serde::{Deserialize, Serialize}; +use std::time::Duration; +use uuid::Uuid; + +/// A count of bytes / rows accessed during a query. +#[derive(Clone, Copy, Debug, Deserialize, JsonSchema, Serialize)] +pub struct IoCount { + /// The number of bytes accessed. + pub bytes: u64, + /// The number of rows accessed. + pub rows: u64, +} + +impl std::fmt::Display for IoCount { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!(f, "{} rows ({} bytes)", self.rows, self.bytes) + } +} + +/// Summary of the I/O resources used by a query. +#[derive(Clone, Copy, Debug, Deserialize, JsonSchema, Serialize)] +#[serde(try_from = "serde_json::Value")] +pub struct IoSummary { + /// The bytes and rows read by the query. + pub read: IoCount, + /// The bytes and rows written by the query. + pub written: IoCount, +} + +impl TryFrom for IoSummary { + type Error = Error; + + fn try_from(j: serde_json::Value) -> Result { + use serde_json::Map; + use serde_json::Value; + use std::str::FromStr; + + let Value::Object(map) = j else { + return Err(Error::Database(String::from( + "Expected a JSON object for a metadata summary", + ))); + }; + + fn unpack_summary_value( + map: &Map, + key: &str, + ) -> Result + where + T: FromStr, + ::Err: std::error::Error, + { + let value = map.get(key).ok_or_else(|| { + Error::MissingHeaderKey { key: key.to_string() } + })?; + let Value::String(v) = value else { + return Err(Error::BadMetadata { + key: key.to_string(), + msg: String::from("Expected a string value"), + }); + }; + v.parse::().map_err(|e| Error::BadMetadata { + key: key.to_string(), + msg: e.to_string(), + }) + } + let rows_read: u64 = unpack_summary_value(&map, "read_rows")?; + let bytes_read: u64 = unpack_summary_value(&map, "read_bytes")?; + let rows_written: u64 = unpack_summary_value(&map, "written_rows")?; + let bytes_written: u64 = unpack_summary_value(&map, "written_bytes")?; + Ok(Self { + read: IoCount { bytes: bytes_read, rows: rows_read }, + written: IoCount { bytes: bytes_written, rows: rows_written }, + }) + } +} + +/// Basic metadata about the resource usage of a single SQL query. +#[derive(Clone, Copy, Debug, Deserialize, JsonSchema, Serialize)] +pub struct QuerySummary { + /// The database-assigned query ID. + pub id: Uuid, + /// The total duration of the query (network plus execution). + pub elapsed: Duration, + /// Summary of the data read and written. + pub io_summary: IoSummary, +} + +impl QuerySummary { + /// Construct a SQL query summary from the headers received from the DB. + pub(crate) fn from_headers( + elapsed: Duration, + headers: &HeaderMap, + ) -> Result { + fn get_header<'a>( + map: &'a HeaderMap, + key: &'a str, + ) -> Result<&'a str, Error> { + let hdr = map.get(key).ok_or_else(|| Error::MissingHeaderKey { + key: key.to_string(), + })?; + std::str::from_utf8(hdr.as_bytes()) + .map_err(|err| Error::Database(err.to_string())) + } + let summary = + serde_json::from_str(get_header(headers, "X-ClickHouse-Summary")?) + .map_err(|err| Error::Database(err.to_string()))?; + let id = get_header(headers, "X-ClickHouse-Query-Id")? + .parse() + .map_err(|err: uuid::Error| Error::Database(err.to_string()))?; + Ok(Self { id, elapsed, io_summary: summary }) + } +} diff --git a/oximeter/db/src/client/sql.rs b/oximeter/db/src/client/sql.rs new file mode 100644 index 0000000000..236faa7aa4 --- /dev/null +++ b/oximeter/db/src/client/sql.rs @@ -0,0 +1,104 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Client methods for running SQL queries againts timeseries themselves. +//! +//! This implements a prototype system for creating "virtual tables" from each +//! timeseries, letting us run SQL queries directly against them. These tables +//! are constructed via huge joins, which effectively reconstruct the entire +//! history of samples as received from the producers. Each row is the original +//! sample. This denormalization comes at a big cost, both in cycles and memory +//! usage, since we need to build the entire join in ClickHouse and send it all +//! to the client for deserialization. +//! +//! Thus this prototype is very useful for development, running analyses on +//! small datasets. It's less helpful on real deployments, where the size of +//! data makes this approach prohibitive. + +// Copyright 2024 Oxide Computer Company + +use super::query_summary::QuerySummary; +pub use crate::sql::RestrictedQuery; +use crate::Error; +use crate::{ + client::Client, + sql::{QueryResult, Table}, +}; +pub use indexmap::IndexMap; +use slog::debug; +pub use std::time::Instant; + +impl Client { + /// Transform a SQL query against a timeseries, but do not execute it. + pub async fn transform_query( + &self, + query: impl AsRef, + ) -> Result { + let restricted = RestrictedQuery::new(query.as_ref())?; + restricted.to_oximeter_sql(&*self.schema.lock().await) + } + + /// Run a SQL query against a timeseries. + pub async fn query( + &self, + query: impl AsRef, + ) -> Result { + use crate::client::handle_db_response; + + let original_query = query.as_ref().trim_end_matches(';'); + let ox_sql = self.transform_query(original_query).await?; + let rewritten = format!("{ox_sql} FORMAT JSONEachRow"); + debug!( + self.log, + "rewrote restricted query"; + "original_sql" => &original_query, + "rewritten_sql" => &rewritten, + ); + let request = self + .client + .post(&self.url) + .query(&[ + ("output_format_json_quote_64bit_integers", "0"), + ("database", crate::DATABASE_NAME), + ]) + .body(rewritten.clone()); + let query_start = Instant::now(); + let response = handle_db_response( + request + .send() + .await + .map_err(|err| Error::DatabaseUnavailable(err.to_string()))?, + ) + .await?; + let summary = QuerySummary::from_headers( + query_start.elapsed(), + response.headers(), + )?; + let text = response.text().await.unwrap(); + let mut table = Table::default(); + for line in text.lines() { + let row = + serde_json::from_str::>( + line.trim(), + ) + .unwrap(); + if table.column_names.is_empty() { + table.column_names.extend(row.keys().cloned()) + } else { + assert!(table + .column_names + .iter() + .zip(row.keys()) + .all(|(k1, k2)| k1 == k2)); + } + table.rows.push(row.into_values().collect()); + } + Ok(QueryResult { + original_query: original_query.to_string(), + rewritten_query: rewritten, + summary, + table, + }) + } +} diff --git a/oximeter/db/src/lib.rs b/oximeter/db/src/lib.rs index 24f7d8c2d0..642612b8db 100644 --- a/oximeter/db/src/lib.rs +++ b/oximeter/db/src/lib.rs @@ -4,7 +4,7 @@ //! Tools for interacting with the control plane telemetry database. -// Copyright 2023 Oxide Computer Company +// Copyright 2024 Oxide Computer Company use crate::query::StringFieldSelector; use chrono::DateTime; @@ -32,14 +32,17 @@ use thiserror::Error; mod client; pub mod model; +#[cfg(feature = "oxql")] +pub mod oxql; pub mod query; +#[cfg(any(feature = "sql", test))] pub mod sql; +#[cfg(feature = "oxql")] +pub use client::oxql::OxqlResult; +pub use client::query_summary::QuerySummary; pub use client::Client; pub use client::DbWrite; -pub use client::QueryMetadata; -pub use client::QueryResult; -pub use client::Table; pub use model::OXIMETER_VERSION; #[derive(Debug, Error)] @@ -58,7 +61,7 @@ pub enum Error { BadMetadata { key: String, msg: String }, /// An error interacting with the telemetry database - #[error("Error interacting with telemetry database")] + #[error("Error interacting with telemetry database: {0}")] Database(String), /// A schema provided when collecting samples did not match the expected schema @@ -134,8 +137,20 @@ pub enum Error { #[error("Schema update versions must be sequential without gaps")] NonSequentialSchemaVersions, + #[cfg(any(feature = "sql", test))] #[error("SQL error")] Sql(#[from] sql::Error), + + #[cfg(any(feature = "oxql", test))] + #[error(transparent)] + Oxql(oxql::Error), +} + +#[cfg(any(feature = "oxql", test))] +impl From for Error { + fn from(e: crate::oxql::Error) -> Self { + Error::Oxql(e) + } } impl From for TimeseriesSchema { diff --git a/oximeter/db/src/model.rs b/oximeter/db/src/model.rs index b1b45eabc4..414ad25ba7 100644 --- a/oximeter/db/src/model.rs +++ b/oximeter/db/src/model.rs @@ -1600,30 +1600,23 @@ pub(crate) fn parse_field_select_row( ) -> (TimeseriesKey, Target, Metric) { assert_eq!( row.fields.len(), - 2 * schema.field_schema.len(), - "Expected pairs of (field_name, field_value) from the field query" + schema.field_schema.len(), + "Expected the same number of fields in each row as the schema itself", ); let (target_name, metric_name) = schema.component_names(); let mut target_fields = Vec::new(); let mut metric_fields = Vec::new(); - let mut actual_fields = row.fields.values(); + let mut actual_fields = row.fields.iter(); for _ in 0..schema.field_schema.len() { // Extract the field name from the row and find a matching expected field. - let actual_field_name = actual_fields + let (actual_field_name, actual_field_value) = actual_fields .next() .expect("Missing a field name from a field select query"); - let name = actual_field_name - .as_str() - .expect("Expected a string field name") - .to_string(); - let expected_field = schema.schema_for_field(&name).expect( + let expected_field = schema.schema_for_field(actual_field_name).expect( "Found field with name that is not part of the timeseries schema", ); // Parse the field value as the expected type - let actual_field_value = actual_fields - .next() - .expect("Missing a field value from a field select query"); let value = match expected_field.field_type { FieldType::Bool => { FieldValue::Bool(bool::from(DbBool::from( @@ -1726,7 +1719,7 @@ pub(crate) fn parse_field_select_row( ) } }; - let field = Field { name, value }; + let field = Field { name: actual_field_name.to_string(), value }; match expected_field.source { FieldSource::Target => target_fields.push(field), FieldSource::Metric => metric_fields.push(field), diff --git a/oximeter/db/src/oxql/ast/cmp.rs b/oximeter/db/src/oxql/ast/cmp.rs new file mode 100644 index 0000000000..ea33056c1f --- /dev/null +++ b/oximeter/db/src/oxql/ast/cmp.rs @@ -0,0 +1,76 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! An AST node describing comparison operators + +// Copyright 2024 Oxide Computer Company + +use std::fmt; + +/// Comparison operators. +// TODO-completeness: Operators for other types, like IP containment ('<<'). +#[derive(Clone, Copy, Debug, PartialEq)] +pub enum Comparison { + /// Equality comparison. + Eq, + /// Inequality comparison. + Ne, + /// Greater-than comparison + Gt, + /// Greater-than or equals comparison + Ge, + /// Lesser-than comparison + Lt, + /// Lesser-than or equals comparison + Le, + /// Regular expression pattern matching. + Like, +} + +impl Comparison { + // Return the _function name_ of the comparison that is safe for use in + // ClickHouse. + // + // Note that we're always using the functional form for these comparisons, + // even when they have obvious operators. E.g., we return `"equals"` for the + // `Comparison::Eq` rather than `"=="`. + // + // This is to normalize the different comparisons we support, which do not + // all have operator formats. `Comparison::Like` is the best example, but we + // may also want to support things like IP address containment. While DBs + // like PostgreSQL have the `<<` operator for that, ClickHouse supports only + // the function `isIPAddressInRange()`. + // + // One consequence of this is that the caller needs to wrap the argument in + // parentheses manually. + pub(crate) fn as_db_function_name(&self) -> &'static str { + match self { + Comparison::Eq => "equals", + Comparison::Ne => "notEquals", + Comparison::Gt => "greater", + Comparison::Ge => "greaterOrEquals", + Comparison::Lt => "less", + Comparison::Le => "lessOrEquals", + Comparison::Like => "match", + } + } +} + +impl fmt::Display for Comparison { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!( + f, + "{}", + match self { + Comparison::Eq => "==", + Comparison::Ne => "!=", + Comparison::Gt => ">", + Comparison::Ge => ">=", + Comparison::Lt => "<", + Comparison::Le => "<=", + Comparison::Like => "~=", + } + ) + } +} diff --git a/oximeter/db/src/oxql/ast/grammar.rs b/oximeter/db/src/oxql/ast/grammar.rs new file mode 100644 index 0000000000..00a0e6e0fe --- /dev/null +++ b/oximeter/db/src/oxql/ast/grammar.rs @@ -0,0 +1,1334 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Grammar for the Oximeter Query Language (OxQL). + +// Copyright 2024 Oxide Computer + +peg::parser! { + pub grammar query_parser() for str { + use crate::oxql::ast::cmp::Comparison; + use crate::oxql::ast::table_ops::align::Align; + use crate::oxql::ast::table_ops::align::AlignmentMethod; + use crate::oxql::ast::table_ops::filter::SimpleFilter; + use crate::oxql::ast::table_ops::filter::FilterExpr; + use crate::oxql::ast::table_ops::filter::Filter; + use crate::oxql::ast::table_ops::filter::CompoundFilter; + use crate::oxql::ast::table_ops::get::Get; + use crate::oxql::ast::table_ops::group_by::GroupBy; + use crate::oxql::ast::ident::Ident; + use crate::oxql::ast::literal::Literal; + use crate::oxql::ast::logical_op::LogicalOp; + use crate::oxql::ast::Query; + use crate::oxql::ast::table_ops::join::Join; + use crate::oxql::ast::table_ops::GroupedTableOp; + use crate::oxql::ast::table_ops::BasicTableOp; + use crate::oxql::ast::table_ops::TableOp; + use crate::oxql::ast::table_ops::group_by::Reducer; + use crate::oxql::ast::literal::duration_consts; + use oximeter::TimeseriesName; + use std::time::Duration; + use uuid::Uuid; + use chrono::Utc; + use chrono::DateTime; + use chrono::NaiveDateTime; + use chrono::NaiveDate; + use chrono::NaiveTime; + use std::net::IpAddr; + use std::net::Ipv4Addr; + use std::net::Ipv6Addr; + + rule _ = quiet!{[' ' | '\n' | '\t']+} / expected!("whitespace") + + // Parse boolean literals. + rule true_literal() -> bool = "true" { true } + rule false_literal() -> bool = "false" { false } + pub(super) rule boolean_literal_impl() -> bool + = quiet! { true_literal() / false_literal() } / expected!("boolean literal") + + pub rule boolean_literal() -> Literal + = b:boolean_literal_impl() { Literal::Boolean(b) } + + // Parse duration literals. + rule year() -> Duration + = "Y" { duration_consts::YEAR } + rule month() -> Duration + = "M" { duration_consts::MONTH } + rule week() -> Duration + = "w" { duration_consts::WEEK } + rule day() -> Duration + = "d" { duration_consts::DAY } + rule hour() -> Duration + = "h" { duration_consts::HOUR } + rule minute() -> Duration + = "m" { duration_consts::MINUTE } + rule second() -> Duration + = "s" { duration_consts::SECOND } + rule millisecond() -> Duration + = "ms" { duration_consts::MILLISECOND } + rule microsecond() -> Duration + = "us" { duration_consts::MICROSECOND } + rule nanosecond() -> Duration + = "ns" { duration_consts::NANOSECOND } + pub(super) rule duration_literal_impl() -> Duration + = count:integer_literal_impl() base:( + year() / + month() / + week() / day() / + hour() / + millisecond() / + minute() / + second() / + microsecond() / + nanosecond() + ) + {? + // NOTE: This count is the factor by which we multiply the base + // unit. So it counts the number of nanos, millis, or days, etc. It + // does not limit the total duration itself. + let Ok(count) = u32::try_from(count) else { + return Err("invalid count for duration literal"); + }; + base.checked_mul(count).ok_or("overflowed duration literal") + } + + /// Parse a literal duration from a string. + /// + /// Durations are written as a positive integer multiple of a base time + /// unit. For example, `7s` is interpreted as 7 seconds. Supported units + /// are: + /// + /// - 'y': an approximate year, 365 days + /// - 'M': an approximate month, 30 days + /// - 'w': an approximate week, 7 days + /// - 'h': an hour, 3600 seconds + /// - 'm': a minute, 60 seconds + /// - 's': seconds + /// - 'ms': milliseconds + /// - 'us': microseconds + /// - 'ns': nanoseconds + pub rule duration_literal() -> Literal + = d:duration_literal_impl() { Literal::Duration(d) } + + /// Parse a literal timestamp. + /// + /// Timestamps are literals prefixed with `@`. They can be in one of + /// several formats: + /// + /// - YYYY-MM-DD + /// - HH:MM:SS[.f] + /// - RFC 3339, `YYYY-MM-DDTHH:MM:SS.f` + /// - The literal `now()`, possibly with some simple offset expression, + /// such as `now() - 5m`. The offset must be a duration. + /// + /// All timestamps are in UTC. + pub rule timestamp_literal() -> Literal + = t:timestamp_literal_impl() { Literal::Timestamp(t) } + + rule timestamp_literal_impl() -> DateTime + = timestamp_string() + / now_timestamp() + + pub(super) rule timestamp_string() -> DateTime + = "@" s:$(['0'..='9' | '-' | 'T' | ':' | '.']+) + {? + if let Ok(t) = NaiveDate::parse_from_str(s, "%F") { + return Ok(t.and_hms_opt(0, 0, 0).unwrap().and_utc()); + } + if let Ok(t) = NaiveTime::parse_from_str(s, "%H:%M:%S%.f") { + return Ok(NaiveDateTime::new(Utc::now().date_naive(), t).and_utc()); + } + if let Ok(t) = NaiveDateTime::parse_from_str(s, "%Y-%m-%dT%H:%M:%S%.f") { + return Ok(t.and_utc()); + } + Err("a recognized timestamp format") + } + + rule now_offset() -> (bool, Duration) + = _? sign:['+' | '-'] _? dur:duration_literal_impl() + { + let negative = matches!(sign, '-'); + (negative, dur) + } + + pub(super) rule now_timestamp() -> DateTime + = "@now()" maybe_offset:now_offset()? + { + let now = Utc::now(); + if let Some((negative, offset)) = maybe_offset { + if negative { + now - offset + } else { + now + offset + } + } else { + now + } + } + + /// Parse an IP address literal, either IPv4 or IPv6 + pub rule ip_literal() -> Literal + = ip:ipv4_literal() { Literal::IpAddr(IpAddr::V4(ip)) } + / ip:ipv6_literal() { Literal::IpAddr(IpAddr::V6(ip)) } + + pub(super) rule ipv4_literal() -> Ipv4Addr + = "\"" s:$((['0'..='9']*<1,3>)**<4> ".") "\"" + {? + s.parse().map_err(|_| "an IPv4 address") + } + + pub(super) rule ipv6_literal() -> Ipv6Addr + = "\"" s:$(['a'..='f' | '0'..='9' | ':']+) "\"" + {? + s.parse().map_err(|_| "an IPv6 address") + } + + rule dashed_uuid_literal() -> Uuid + = s:$( + "\"" + ['a'..='f' | '0'..='9']*<8> "-" + ['a'..='f' | '0'..='9']*<4> "-" + ['a'..='f' | '0'..='9']*<4> "-" + ['a'..='f' | '0'..='9']*<4> "-" + ['a'..='f' | '0'..='9']*<12> + "\"" + ) {? + let Some(middle) = s.get(1..37) else { + return Err("invalid UUID literal"); + }; + middle.parse().or(Err("invalid UUID literal")) + } + rule undashed_uuid_literal() -> Uuid + = s:$("\"" ['a'..='f' | '0'..='9']*<32> "\"") {? + let Some(middle) = s.get(1..33) else { + return Err("invalid UUID literal"); + }; + middle.parse().or(Err("invalid UUID literal")) + } + pub(super) rule uuid_literal_impl() -> Uuid + = dashed_uuid_literal() / undashed_uuid_literal() + + /// Parse UUID literals. + /// + /// UUIDs should be quoted with `"` and can include or omit dashes + /// between the segments. Both of the following are equivalent. + /// + /// "fc59ab26-f1d8-44ca-abbc-dd8f61321433" + /// "fc59ab26f1d844caabbcdd8f61321433" + pub rule uuid_literal() -> Literal + = id:uuid_literal_impl() { Literal::Uuid(id) } + + // Parse string literals. + rule any_but_single_quote() -> String + = s:$([^'\'']*) + {? + recognize_escape_sequences(s).ok_or("invalid single quoted string") + } + + rule any_but_double_quote() -> String + = s:$([^'"']*) + {? + recognize_escape_sequences(s).ok_or("invalid double quoted string") + } + + rule single_quoted_string() -> String + = "'" s:any_but_single_quote() "'" { s } + + rule double_quoted_string() -> String + = "\"" s:any_but_double_quote() "\"" { s } + + pub(super) rule string_literal_impl() -> String + = single_quoted_string() / double_quoted_string() + + /// Parse a string literal, either single- or double-quoted. + /// + /// Parsing string literals is pretty tricky, but we add several + /// constraints to simplify things. First strings must be quoted, either + /// with single- or double-quotes. E.g., the strings `"this"` and + /// `'this'` parse the same way. + /// + /// We require that the string not _contain_ its quote-style, so there + /// can't be any embedded single-quotes in a single-quoted string, or + /// double-quotes in a double-quoted string. Each quote-style may contain + /// the quote from the other style. + /// + /// We support the following common escape sequences: + /// + /// ```ignore + /// \n + /// \r + /// \t + /// \\ + /// \0 + /// ``` + /// + /// Beyond this, any valid Unicode code point, written in the usual Rust + /// style, is supported. For example, `\u{1234}` is accepted and mapped + /// to `ሴ` upon parsing. This also allows users to write both quote + /// styles if required, by writing them as their Unicode escape + /// sequences. For example, this string: + /// + /// ```ignore + /// "this string has \u{22} in it" + /// ``` + /// + /// Will be parsed as `this string has " in it`. + pub rule string_literal() -> Literal + = s:string_literal_impl() { Literal::String(s) } + + pub(super) rule integer_literal_impl() -> i128 + = n:$("-"? ['0'..='9']+ !['e' | 'E' | '.']) + {? + let Ok(x) = n.parse() else { + return Err("integer literal"); + }; + if x < i128::from(i64::MIN) { + Err("negative overflow") + } else if x > i128::from(u64::MAX) { + Err("positive overflow") + } else { + Ok(x) + } + } + + /// Parse integer literals. + pub rule integer_literal() -> Literal + = n:integer_literal_impl() { Literal::Integer(n) } + + // We're being a bit lazy here, since the rule expression isn't exactly + // right. But we rely on calling `f64`'s `FromStr` implementation to + // actually verify the values can be parsed. + pub(super) rule double_literal_impl() -> f64 + = n:$("-"? ['0'..='9']* "."? ['0'..='9']* (['e' | 'E'] "-"? ['0'..='9']+)*) {? + n.parse().or(Err("double literal")) + } + + // Parse double literals. + pub rule double_literal() -> Literal + = d:double_literal_impl() { Literal::Double(d) } + + /// Parse a literal. + /// + /// Literals are typed, with support for bools, durations, integers and + /// doubles, UUIDs, and general strings. See the rules for each type of + /// literal for details on supported formats. + pub rule literal() -> Literal + = lit:( + boolean_literal() / + duration_literal() / + integer_literal() / + double_literal() / + uuid_literal() / + ip_literal() / + string_literal() / + timestamp_literal() + ) + { + lit + } + + /// Parse a logical operator. + pub(super) rule logical_op_impl() -> LogicalOp + = "||" { LogicalOp::Or} + / "&&" { LogicalOp::And } + / "^" { LogicalOp::Xor } + + + // NOTES: + // + // The rules below are all used to parse a filtering expression. This + // turns out to be surprisingly complicated to express succinctly in + // `peg`, but there are a few tricks. First, it's important that we do + // not try to parse negation ("!") inside the filtering atoms -- it's a + // higher-level concept, and not part of the atom itself. + // + // Second, it's not clear how to use `peg`'s precendence macro to + // correctly describe the precedence. Things are recursive, but we + // choose to define that in the rules themselves, rather than explicitly + // with precedence levels. This is common in PEG definitions, and the + // main trick is force things _not_ to be left-recursive, and use two + // rules tried in sequence. The `factor` rule is a good example of this. + // + // Another example is the logical OR / AND / XOR parsing. We start with + // OR, which is the lowest precedence, and move to the others in + // sequence. Each is defined as parsing either the "thing itself", e.g., + // `foo || bar` for the OR rule; or the rule with next-higher + // precedence. + // + // IMPORTANT: The #[cache] directives on the rules below are _critical_ + // to avoiding wildly exponential runtime with nested expressions. + + /// Parse a logical negation + pub rule not() = "!" + + /// A factor is a logically negated expression, or a primary expression. + #[cache] + pub rule factor() -> Filter + = not() _? factor:factor() + { + Filter { + negated: !factor.negated, + expr: factor.expr + } + } + / p:primary() { p } + + /// A primary expression is either a comparison "atom", e.g., `foo == + /// "bar"`, or a grouping around a sequence of such things. + #[cache] + pub rule primary() -> Filter + = atom:comparison_atom() + {? + if matches!(atom.cmp, Comparison::Like) && !matches!(atom.value, Literal::String(_)) { + Err("~= comparison is only supported for string literals") + } else { + Ok(Filter { negated: false, expr: FilterExpr::Simple(atom) }) + } + } + / "(" _? or:logical_or_expr() _? ")" { or } + + /// A comparison atom is a base-case for all this recursion. + /// + /// It specifies a single comparison between an identifier and a value, + /// using a specific comparison operator. For example, this parses `foo + /// == "bar"`. + pub rule comparison_atom() -> SimpleFilter + = ident:ident() _? cmp:comparison() _? value:literal() + { + SimpleFilter { ident, cmp, value } + } + + /// Two filtering expressions combined with a logical OR. + /// + /// An OR expression is two logical ANDs joined with "||", or just a + /// bare logical AND expression. + #[cache] + pub rule logical_or_expr() -> Filter + = left:logical_and_expr() _? "||" _? right:logical_or_expr() + { + let compound = CompoundFilter { + left: Box::new(left), + op: LogicalOp::Or, + right: Box::new(right), + }; + Filter { negated: false, expr: FilterExpr::Compound(compound) } + } + / logical_and_expr() + + /// Two filtering expressions combined with a logical AND. + /// + /// A logical AND expression is two logical XORs joined with "&&", or + /// just a bare logical XOR expression. + #[cache] + pub rule logical_and_expr() -> Filter + = left:logical_xor_expr() _? "&&" _? right:logical_and_expr() + { + let compound = CompoundFilter { + left: Box::new(left), + op: LogicalOp::And, + right: Box::new(right), + }; + Filter { negated: false, expr: FilterExpr::Compound(compound) } + } + / logical_xor_expr() + + /// Two filtering expressions combined with a logical XOR. + /// + /// A logical XOR expression is two logical XORs joined with "^ or + /// just a bare factor. Note that this either hits the base case, if + /// `factor` is actually an atom, or recurses again if its a logical OR + /// expression. + /// + /// Note that this is the highest-precedence logical operator. + #[cache] + pub rule logical_xor_expr() -> Filter + = left:factor() _? "^" _? right:logical_xor_expr() + { + let compound = CompoundFilter { + left: Box::new(left), + op: LogicalOp::Xor, + right: Box::new(right), + }; + Filter { negated: false, expr: FilterExpr::Compound(compound) } + } + / factor:factor() { factor } + + /// Parse the _logical expression_ part of a `filter` table operation. + pub rule filter_expr() -> Filter = logical_or_expr() + + /// Parse a "filter" table operation. + pub rule filter() -> Filter + = "filter" _ expr:filter_expr() _? + { + expr + } + + pub(super) rule ident_impl() -> &'input str + = quiet!{ inner:$(['a'..='z']+ ['a'..='z' | '0'..='9']* ("_" ['a'..='z' | '0'..='9']+)*) } / + expected!("A valid identifier") + + /// Parse an identifier, usually a column name. + pub rule ident() -> Ident + = inner:ident_impl() { Ident(inner.to_string()) } + + pub(super) rule comparison() -> Comparison + = "==" { Comparison::Eq } + / "!=" { Comparison::Ne } + / ">=" { Comparison::Ge } + / ">" { Comparison::Gt } + / "<=" { Comparison::Le } + / "<" { Comparison::Lt } + / "~=" { Comparison::Like } + + pub rule timeseries_name() -> TimeseriesName + = target_name:ident_impl() ":" metric_name:ident_impl() + {? + format!("{target_name}:{metric_name}") + .try_into() + .map_err(|_| "invalid timeseries name") + } + + rule get_delim() = quiet!{ _? "," _? } + + /// Parse a "get" table operation. + pub rule get() -> Vec + = "get" _ names:(timeseries_name() **<1,> get_delim()) + { + names.into_iter().map(|t| Get { timeseries_name: t }).collect() + } + + /// Parse a reducing operation by name. + pub rule reducer() -> Reducer + = "mean" { Reducer::Mean } + / "sum" { Reducer::Sum } + / expected!("a reducer name") + + rule ws_with_comma() = _? "," _? + pub rule group_by() -> GroupBy + = "group_by" + _ + "[" _? identifiers:(ident() ** ws_with_comma()) ","? _? "]" + reducer:("," _? red:reducer() { red })? + { + GroupBy { + identifiers, + reducer: reducer.unwrap_or_default(), + } + } + + /// Parse a `join` table operation. + pub rule join() = "join" {} + + pub(super) rule alignment_method() -> AlignmentMethod + = "interpolate" { AlignmentMethod::Interpolate } + / "mean_within" { AlignmentMethod::MeanWithin } + + /// Parse an alignment table operation. + pub rule align() -> Align + = "align" _ method:alignment_method() "(" period:duration_literal_impl() ")" + { + Align { method, period } + } + + pub(super) rule basic_table_op() -> TableOp + = g:"get" _ t:timeseries_name() { TableOp::Basic(BasicTableOp::Get(t)) } + / f:filter() { TableOp::Basic(BasicTableOp::Filter(f)) } + / g:group_by() { TableOp::Basic(BasicTableOp::GroupBy(g)) } + / join() { TableOp::Basic(BasicTableOp::Join(Join)) } + / a:align() { TableOp::Basic(BasicTableOp::Align(a)) } + + pub(super) rule grouped_table_op() -> TableOp + = "{" _? ops:(query() ++ grouped_table_op_delim()) _? "}" + { + TableOp::Grouped(GroupedTableOp { ops }) + } + + /// Parse a top-level OxQL query. + /// + /// Queries always start with a "get" operation, and may be followed by + /// any number of other timeseries transformations + pub rule query() -> Query + = ops:(basic_table_op() / grouped_table_op()) ++ query_delim() + {? + let query = Query { ops }; + if query.all_gets_at_query_start() { + Ok(query) + } else { + Err("every subquery must start with a `get` operation") + } + } + + rule grouped_table_op_delim() = quiet!{ _? ";" _? } + rule query_delim() = quiet!{ _? "|" _? } + } +} + +// Recognize escape sequences and convert them into the intended Unicode point +// they represent. +// +// For example, the string containing ASCII "abcd" is returned unchanged. +// +// The string containing "\u{1234}" is returned as the string "ሴ". Note that the +// Unicode bytes must be enclosed in {}, and can have length 1-6. +// +// If the string contains an invalid escape sequence, such as "\uFFFF", or a +// control code, such as `\u07`, `None` is returned. +// +// Note that the main goal of this method is to _unescape_ relevant sequences. +// We will get queries that may contain escaped sequences, like `\\\n`, which +// this method will unescape to `\n`. +fn recognize_escape_sequences(s: &str) -> Option { + let mut out = String::with_capacity(s.len()); + + let mut chars = s.chars().peekable(); + while let Some(ch) = chars.next() { + match ch { + '\\' => { + let Some(next_ch) = chars.next() else { + // Escape at the end of the string + return None; + }; + match next_ch { + 'n' => out.push('\n'), + 'r' => out.push('\r'), + 't' => out.push('\t'), + '\\' => out.push('\\'), + '0' => out.push('\0'), + 'u' => { + // We need this to be delimited by {}, and between 1 and + // 6 characters long. + if !matches!(chars.next(), Some('{')) { + return None; + } + + let mut digits = String::with_capacity(6); + let mut found_closing_brace = false; + while !found_closing_brace && digits.len() < 7 { + // Take the next value, if it's a hex digit or the + // closing brace. + let Some(next) = chars.next_if(|ch| { + ch.is_ascii_hexdigit() || *ch == '}' + }) else { + break; + }; + if next.is_ascii_hexdigit() { + digits.push(next); + continue; + } + found_closing_brace = true; + } + if !found_closing_brace { + return None; + } + let val = u32::from_str_radix(&digits, 16).ok()?; + let decoded = char::from_u32(val)?; + out.push(decoded) + } + _ => return None, + } + } + _ => out.push(ch), + } + } + Some(out) +} + +#[cfg(test)] +mod tests { + use super::query_parser; + use crate::oxql::ast::cmp::Comparison; + use crate::oxql::ast::grammar::recognize_escape_sequences; + use crate::oxql::ast::ident::Ident; + use crate::oxql::ast::literal::Literal; + use crate::oxql::ast::logical_op::LogicalOp; + use crate::oxql::ast::table_ops::align::Align; + use crate::oxql::ast::table_ops::align::AlignmentMethod; + use crate::oxql::ast::table_ops::filter::CompoundFilter; + use crate::oxql::ast::table_ops::filter::Filter; + use crate::oxql::ast::table_ops::filter::FilterExpr; + use crate::oxql::ast::table_ops::filter::SimpleFilter; + use crate::oxql::ast::table_ops::group_by::Reducer; + use chrono::DateTime; + use chrono::NaiveDate; + use chrono::NaiveDateTime; + use chrono::NaiveTime; + use chrono::TimeZone; + use chrono::Utc; + use std::net::IpAddr; + use std::net::Ipv4Addr; + use std::net::Ipv6Addr; + use std::time::Duration; + use uuid::Uuid; + + #[test] + fn test_boolean_literal() { + assert_eq!(query_parser::boolean_literal_impl("true").unwrap(), true); + assert_eq!(query_parser::boolean_literal_impl("false").unwrap(), false); + } + + #[test] + fn test_duration_literal() { + for (as_str, dur) in [ + ("7Y", Duration::from_secs(60 * 60 * 24 * 365 * 7)), + ("7M", Duration::from_secs(60 * 60 * 24 * 30 * 7)), + ("7w", Duration::from_secs(60 * 60 * 24 * 7 * 7)), + ("7d", Duration::from_secs(60 * 60 * 24 * 7)), + ("7h", Duration::from_secs(60 * 60 * 7)), + ("7m", Duration::from_secs(60 * 7)), + ("7s", Duration::from_secs(7)), + ("7ms", Duration::from_millis(7)), + ("7us", Duration::from_micros(7)), + ("7ns", Duration::from_nanos(7)), + ] { + assert_eq!( + query_parser::duration_literal_impl(as_str).unwrap(), + dur + ); + } + + assert!(query_parser::duration_literal_impl("-1m").is_err()); + let too_big: i64 = u32::MAX as i64 + 1; + assert!(query_parser::duration_literal_impl(&format!("{too_big}s")) + .is_err()); + } + + #[test] + fn test_uuid_literal() { + const ID: Uuid = uuid::uuid!("9f8900bd-886d-4988-b623-95b7fda36d23"); + let as_string = format!("\"{}\"", ID); + assert_eq!(query_parser::uuid_literal_impl(&as_string).unwrap(), ID); + let without_dashes = as_string.replace('-', ""); + assert_eq!( + query_parser::uuid_literal_impl(&without_dashes).unwrap(), + ID + ); + + assert!(query_parser::uuid_literal_impl( + &as_string[1..as_string.len() - 2] + ) + .is_err()); + assert!(query_parser::uuid_literal_impl( + &without_dashes[1..without_dashes.len() - 2] + ) + .is_err()); + } + + #[test] + fn test_integer_literal() { + assert_eq!(query_parser::integer_literal_impl("1").unwrap(), 1); + assert_eq!(query_parser::integer_literal_impl("-1").unwrap(), -1); + assert_eq!(query_parser::integer_literal_impl("-1").unwrap(), -1); + + assert!(query_parser::integer_literal_impl("-1.0").is_err()); + assert!(query_parser::integer_literal_impl("-1.").is_err()); + assert!(query_parser::integer_literal_impl("1e3").is_err()); + } + + #[test] + fn test_double_literal() { + assert_eq!(query_parser::double_literal_impl("1.0").unwrap(), 1.0); + assert_eq!(query_parser::double_literal_impl("-1.0").unwrap(), -1.0); + assert_eq!(query_parser::double_literal_impl("1.").unwrap(), 1.0); + assert_eq!(query_parser::double_literal_impl("-1.").unwrap(), -1.0); + assert_eq!(query_parser::double_literal_impl(".5").unwrap(), 0.5); + assert_eq!(query_parser::double_literal_impl("-.5").unwrap(), -0.5); + assert_eq!(query_parser::double_literal_impl("1e3").unwrap(), 1e3); + assert_eq!(query_parser::double_literal_impl("-1e3").unwrap(), -1e3); + assert_eq!(query_parser::double_literal_impl("-1e-3").unwrap(), -1e-3); + assert_eq!( + query_parser::double_literal_impl("0.5e-3").unwrap(), + 0.5e-3 + ); + + assert!(query_parser::double_literal_impl("-.e4").is_err()); + assert!(query_parser::double_literal_impl("-.e-4").is_err()); + assert!(query_parser::double_literal_impl("1e").is_err()); + } + + #[test] + fn test_recognize_escape_sequences_with_none() { + for each in ["", "abc", "$%("] { + assert_eq!(recognize_escape_sequences(each).unwrap(), each); + } + } + + #[test] + fn test_recognize_escape_sequence_with_valid_unicode_sequence() { + // Welp, let's just test every possible code point. + for x in 0..=0x10FFFF { + let expected = char::from_u32(x); + let as_hex = format!("{x:0x}"); + let sequence = format!("\\u{{{as_hex}}}"); + let recognized = recognize_escape_sequences(&sequence) + .map(|s| s.chars().next().unwrap()); + assert_eq!( + expected, recognized, + "did not correctly recognized Unicode escape sequence" + ); + } + } + + #[test] + fn test_recognize_escape_sequences_with_invalid_unicode_sequence() { + for each in [ + r#"\uFFFF"#, // Valid, but not using {} delimiters + r#"\u{}"#, // Not enough characters. + r#"\u{12345678}"#, // Too many characters + r#"\u{ZZZZ}"#, // Not hex digits + r#"\u{d800}"#, // A surrogate code point, not valid. + r#"\u{1234"#, // Valid, but missing closing brace. + ] { + println!("{each}"); + assert!(recognize_escape_sequences(each).is_none()); + } + } + + #[test] + fn test_recognize_escape_sequences_with_valid_escape_sequence() { + for (as_str, expected) in [ + (r#"\n"#, '\n'), + (r#"\r"#, '\r'), + (r#"\t"#, '\t'), + (r#"\0"#, '\0'), + (r#"\\"#, '\\'), + ] { + let recognized = recognize_escape_sequences(as_str).unwrap(); + assert_eq!(recognized.chars().next().unwrap(), expected); + } + } + + #[test] + fn test_single_quoted_string_literal() { + for (input, expected) in [ + ("''", String::new()), + ("'simple'", String::from("simple")), + ("'袈►♖'", String::from("袈►♖")), + (r#"'escapes \n handled'"#, String::from("escapes \n handled")), + (r#"'may contain " in it'"#, String::from("may contain \" in it")), + ( + r#"'may contain "\u{1234}" in it'"#, + String::from("may contain \"ሴ\" in it"), + ), + ] { + assert_eq!( + query_parser::string_literal_impl(input).unwrap(), + expected + ); + } + assert!(query_parser::string_literal_impl(r#"' cannot have ' in it'"#) + .is_err()); + } + + #[test] + fn test_double_quoted_string_literal() { + for (input, expected) in [ + ("\"\"", String::new()), + ("\"simple\"", String::from("simple")), + ("\"袈►♖\"", String::from("袈►♖")), + (r#""escapes \n handled""#, String::from("escapes \n handled")), + (r#""may contain ' in it""#, String::from("may contain ' in it")), + ( + r#""may contain '\u{1234}' in it""#, + String::from("may contain 'ሴ' in it"), + ), + ] { + assert_eq!( + query_parser::string_literal_impl(input).unwrap(), + expected + ); + } + + assert!(query_parser::string_literal_impl(r#"" cannot have " in it""#) + .is_err()); + } + + #[test] + fn test_comparison() { + for (as_str, cmp) in [ + ("==", Comparison::Eq), + ("!=", Comparison::Ne), + (">=", Comparison::Ge), + (">", Comparison::Gt), + ("<=", Comparison::Le), + ("<", Comparison::Lt), + ("~=", Comparison::Like), + ] { + assert_eq!(query_parser::comparison(as_str).unwrap(), cmp); + } + } + + #[test] + fn test_filter_expr_single_simple_expression() { + let expr = Filter { + negated: false, + expr: FilterExpr::Simple(SimpleFilter { + ident: Ident("a".to_string()), + cmp: Comparison::Eq, + value: Literal::Boolean(true), + }), + }; + assert_eq!(query_parser::filter_expr("a == true").unwrap(), expr); + assert_eq!(query_parser::filter_expr("(a == true)").unwrap(), expr); + + assert!(query_parser::filter_expr("(a == true").is_err()); + } + + #[test] + fn test_filter_expr_single_negated_simple_expression() { + let expr = Filter { + negated: true, + expr: FilterExpr::Simple(SimpleFilter { + ident: Ident("a".to_string()), + cmp: Comparison::Gt, + value: Literal::Double(1.0), + }), + }; + assert_eq!(query_parser::filter_expr("!(a > 1.)").unwrap(), expr,); + + assert!(query_parser::filter_expr("!(a > 1.0").is_err()); + } + + #[test] + fn test_filter_expr_two_simple_filter_expressions() { + let left = Filter { + negated: false, + expr: FilterExpr::Simple(SimpleFilter { + ident: Ident("a".to_string()), + cmp: Comparison::Eq, + value: Literal::Boolean(true), + }), + }; + + for op in [LogicalOp::And, LogicalOp::Or] { + let expected = left.merge(&left, op); + // Match with either parenthesized. + let as_str = format!("a == true {op} (a == true)"); + assert_eq!(query_parser::filter_expr(&as_str).unwrap(), expected); + let as_str = format!("(a == true) {op} a == true"); + assert_eq!(query_parser::filter_expr(&as_str).unwrap(), expected); + let as_str = format!("(a == true) {op} (a == true)"); + assert_eq!(query_parser::filter_expr(&as_str).unwrap(), expected); + } + } + + #[test] + fn test_filter_expr_operator_precedence() { + // We'll combine the following simple expression in a number of + // different sequences, to check that we correctly group by operator + // precedence. + let atom = Filter { + negated: false, + expr: FilterExpr::Simple(SimpleFilter { + ident: Ident("a".to_string()), + cmp: Comparison::Eq, + value: Literal::Boolean(true), + }), + }; + let as_str = "a == true || a == true && a == true ^ a == true"; + let parsed = query_parser::filter_expr(as_str).unwrap(); + assert_eq!( + parsed.to_string(), + "((a == true) || ((a == true) && ((a == true) ^ (a == true))))" + ); + + // This should bind most tighty from right to left: XOR, then AND, then + // OR. Since we're destructuring from out to in, though, we check in the + // opposite order, weakest to strongest, or left to right. + // + // Start with OR, which should bind the most weakly. + assert!(!parsed.negated); + let FilterExpr::Compound(CompoundFilter { left, op, right }) = + parsed.expr + else { + unreachable!(); + }; + assert!(!left.negated); + assert!(!right.negated); + assert_eq!(op, LogicalOp::Or); + assert_eq!(atom, *left); + + // && should bind next-most tightly + let FilterExpr::Compound(CompoundFilter { left, op, right }) = + right.expr + else { + unreachable!(); + }; + assert!(!left.negated); + assert!(!right.negated); + assert_eq!(op, LogicalOp::And); + assert_eq!(atom, *left); + + // Followed by XOR, the tightest binding operator. + let FilterExpr::Compound(CompoundFilter { left, op, right }) = + right.expr + else { + unreachable!(); + }; + assert!(!left.negated); + assert!(!right.negated); + assert_eq!(op, LogicalOp::Xor); + assert_eq!(atom, *left); + assert_eq!(atom, *right); + } + + #[test] + fn test_filter_expr_overridden_precedence() { + // Similar to above, we'll test with a single atom, and group in a + // number of ways. + let atom = Filter { + negated: false, + expr: FilterExpr::Simple(SimpleFilter { + ident: Ident("a".to_string()), + cmp: Comparison::Eq, + value: Literal::Boolean(true), + }), + }; + let as_str = "(a == true || a == true) && a == true"; + let parsed = query_parser::filter_expr(as_str).unwrap(); + + // Now, || should bind more tightly, so we should have (a && b) at the + // top-level, where b is the test atom. We're comparing the atom at the + // _right_ now with the original expressions. + assert!(!parsed.negated); + let FilterExpr::Compound(CompoundFilter { left, op, right }) = + parsed.expr + else { + unreachable!(); + }; + assert!(!left.negated); + assert!(!right.negated); + assert_eq!(op, LogicalOp::And); + assert_eq!(atom, *right); + + // Destructure the LHS and check it. + let FilterExpr::Compound(CompoundFilter { left, op, right }) = + left.expr + else { + unreachable!(); + }; + assert!(!left.negated); + assert!(!right.negated); + assert_eq!(op, LogicalOp::Or); + assert_eq!(atom, *left); + assert_eq!(atom, *right); + } + + #[test] + fn test_negated_filter_expr() { + let left = Filter { + negated: false, + expr: FilterExpr::Simple(SimpleFilter { + ident: Ident("a".into()), + cmp: Comparison::Eq, + value: Literal::Boolean(true), + }), + }; + let right = left.negate(); + let top = left.merge(&right, LogicalOp::Xor).negate(); + let as_str = "!(a == true ^ !(a == true))"; + let parsed = query_parser::filter_expr(as_str).unwrap(); + assert_eq!(top, parsed); + } + + #[test] + fn test_filter_table_op() { + for expr in [ + "filter field == 0", + "filter baz == 'quux'", + "filter other_field != 'yes'", + "filter id != \"45c937fb-5e99-4a86-a95b-22bf30bf1507\"", + "filter (foo == 'bar') || ((yes != \"no\") && !(maybe > 'so'))", + ] { + let parsed = query_parser::filter(expr).unwrap_or_else(|_| { + panic!("failed to parse query: '{}'", expr) + }); + println!("{parsed:#?}"); + } + } + + #[test] + fn test_get_table_op() { + for expr in [ + "get foo:bar", + "get target_name:metric_name", + "get target_name_0:metric_name000", + ] { + let parsed = query_parser::get(expr).unwrap_or_else(|_| { + panic!("failed to parse get expr: '{}'", expr) + }); + println!("{parsed:#?}"); + } + + assert!(query_parser::get("get foo").is_err()); + assert!(query_parser::get("get foo:").is_err()); + assert!(query_parser::get("get :bar").is_err()); + assert!(query_parser::get("get 0:0").is_err()); + } + + #[test] + fn test_ident() { + for id in ["foo", "foo0", "foo_0_1_2"] { + query_parser::ident(id) + .unwrap_or_else(|_| panic!("failed to identifier: '{id}'")); + } + + for id in ["0foo", "0", "A", "", "%", "foo_"] { + query_parser::ident(id).expect_err(&format!( + "should not have parsed as identifier: '{}'", + id + )); + } + } + + #[test] + fn test_group_by() { + for q in [ + "group_by []", + "group_by [baz]", + "group_by [baz,]", + "group_by [baz,another_field]", + "group_by [baz,another_field,]", + ] { + let parsed = query_parser::group_by(q) + .unwrap_or_else(|_| panic!("failed to parse group_by: '{q}'")); + println!("{parsed:#?}"); + } + } + + #[test] + fn test_query() { + for q in [ + "get foo:bar", + "get foo:bar | group_by []", + "get foo:bar | group_by [baz]", + "get foo:bar | filter baz == 'quuz'", + "get foo:bar | filter (some == 0) && (id == false || a == -1.0)", + "get foo:bar | group_by [baz] | filter baz == 'yo'", + "{ get foo:bar | filter x == 0; get x:y } | join", + "{ get foo:bar ; get x:y } | join | filter baz == 0", + "get foo:bar | align interpolate(10s)", + ] { + let parsed = query_parser::query(q) + .unwrap_or_else(|_| panic!("failed to parse query: '{q}'")); + println!("{parsed:#?}"); + } + } + + #[test] + fn test_reducer() { + assert_eq!(query_parser::reducer("mean").unwrap(), Reducer::Mean); + assert!(query_parser::reducer("foo").is_err()); + } + + #[test] + fn test_parse_literal_timestamp_string() { + assert_eq!( + query_parser::timestamp_string("@2020-01-01").unwrap(), + Utc.with_ymd_and_hms(2020, 1, 1, 0, 0, 0).unwrap(), + ); + assert_eq!( + query_parser::timestamp_string("@01:01:01").unwrap().time(), + NaiveTime::from_hms_opt(1, 1, 1).unwrap(), + ); + assert_eq!( + query_parser::timestamp_string("@01:01:01.123456").unwrap().time(), + NaiveTime::from_hms_micro_opt(1, 1, 1, 123456).unwrap(), + ); + assert_eq!( + query_parser::timestamp_string("@2020-01-01T01:01:01.123456") + .unwrap(), + NaiveDateTime::new( + NaiveDate::from_ymd_opt(2020, 1, 1).unwrap(), + NaiveTime::from_hms_micro_opt(1, 1, 1, 123456).unwrap(), + ) + .and_utc(), + ); + } + + #[test] + fn test_parse_ipv4_literal() { + let check = |s: &str, addr: IpAddr| { + let Literal::IpAddr(ip) = query_parser::ip_literal(s).unwrap() + else { + panic!("expected '{}' to be parsed into {}", s, addr); + }; + assert_eq!(ip, addr); + }; + check("\"100.100.100.100\"", Ipv4Addr::new(100, 100, 100, 100).into()); + check("\"1.2.3.4\"", Ipv4Addr::new(1, 2, 3, 4).into()); + check("\"0.0.0.0\"", Ipv4Addr::UNSPECIFIED.into()); + + assert!(query_parser::ip_literal("\"abcd\"").is_err()); + assert!(query_parser::ip_literal("\"1.1.1.\"").is_err()); + assert!(query_parser::ip_literal("\"1.1.1.1.1.1\"").is_err()); + assert!(query_parser::ip_literal("\"2555.1.1.1\"").is_err()); + assert!(query_parser::ip_literal("1.2.3.4").is_err()); // no quotes + } + + #[test] + fn test_parse_ipv6_literal() { + let check = |s: &str, addr: IpAddr| { + let Literal::IpAddr(ip) = query_parser::ip_literal(s).unwrap() + else { + panic!("expected '{}' to be parsed into {}", s, addr); + }; + assert_eq!(ip, addr); + }; + + // IPv6 is nuts, let's just check a few common patterns. + check("\"::1\"", Ipv6Addr::LOCALHOST.into()); + check("\"::\"", Ipv6Addr::UNSPECIFIED.into()); + check("\"fd00::1\"", Ipv6Addr::new(0xfd00, 0, 0, 0, 0, 0, 0, 1).into()); + check( + "\"fd00:1:2:3:4:5:6:7\"", + Ipv6Addr::new(0xfd00, 1, 2, 3, 4, 5, 6, 7).into(), + ); + + // Don't currently support IPv6-mapped IPv4 addresses + assert!(query_parser::ip_literal("\"::ffff:127.0.0.1\"").is_err()); + + // Other obviously bad patterns. + assert!(query_parser::ip_literal("\"1\"").is_err()); + assert!(query_parser::ip_literal("\":1::1::1\"").is_err()); + assert!(query_parser::ip_literal("\"::g\"").is_err()); + assert!(query_parser::ip_literal("\":::\"").is_err()); + assert!(query_parser::ip_literal("::1").is_err()); // no quotes + } + + #[test] + fn test_query_starts_with_get() { + assert!(query_parser::query("{ get a:b }") + .unwrap() + .all_gets_at_query_start()); + assert!(query_parser::query("{ get a:b; get a:b } | join") + .unwrap() + .all_gets_at_query_start()); + assert!(query_parser::query( + "{ { get a:b ; get a:b } | join; get c:d } | join" + ) + .unwrap() + .all_gets_at_query_start()); + + assert!(query_parser::query("{ get a:b; filter foo == 0 }").is_err()); + assert!(query_parser::query("{ get a:b; filter foo == 0 }").is_err()); + assert!(query_parser::query("get a:b | get a:b").is_err()); + } + + #[test] + fn test_now_with_offset() { + fn check(expr: &str, expected: DateTime) { + // Rough but still-useful bound in microseconds. + const MAX_DIFF_IN_MICROS: i64 = 1000; + let d = query_parser::now_timestamp(expr).unwrap(); + let now = Utc::now(); + let micros = d.timestamp_micros() - expected.timestamp_micros(); + assert!( + micros.abs() <= MAX_DIFF_IN_MICROS, + "Expected `{}` to be within {}us of {}, but it is {}us away", + expr, + MAX_DIFF_IN_MICROS, + now, + micros, + ); + } + check("@now() - 5m", Utc::now() - Duration::from_secs(60 * 5)); + check("@now() + 5m", Utc::now() + Duration::from_secs(60 * 5)); + check("@now() - 5s", Utc::now() - Duration::from_secs(5)); + check("@now() + 5s", Utc::now() + Duration::from_secs(5)); + check("@now() - 1d", Utc::now() - Duration::from_secs(60 * 60 * 24)); + check("@now() + 1d", Utc::now() + Duration::from_secs(60 * 60 * 24)); + } + + #[test] + fn test_like_only_available_for_strings() { + assert!(query_parser::filter_expr("foo ~= 0").is_err()); + assert!(query_parser::filter_expr("foo ~= \"something\"").is_ok()); + } + + #[test] + fn test_align_table_op() { + assert_eq!( + query_parser::align("align interpolate(1m)").unwrap(), + Align { + method: AlignmentMethod::Interpolate, + period: Duration::from_secs(60) + } + ); + assert_eq!( + query_parser::align("align mean_within(100s)").unwrap(), + Align { + method: AlignmentMethod::MeanWithin, + period: Duration::from_secs(100) + } + ); + + assert!(query_parser::align("align whatever(100s)").is_err()); + assert!(query_parser::align("align interpolate('foo')").is_err()); + } + + #[test] + fn test_complicated_logical_combinations() { + let parsed = + query_parser::logical_or_expr("a == 'b' ^ !(c == 0) && d == false") + .unwrap(); + + // Build up this expected expression from its components. + let left = Filter { + negated: false, + expr: FilterExpr::Simple(SimpleFilter { + ident: Ident("a".to_string()), + cmp: Comparison::Eq, + value: Literal::String("b".into()), + }), + }; + let middle = Filter { + negated: true, + expr: FilterExpr::Simple(SimpleFilter { + ident: Ident("c".to_string()), + cmp: Comparison::Eq, + value: Literal::Integer(0), + }), + }; + let right = Filter { + negated: false, + expr: FilterExpr::Simple(SimpleFilter { + ident: Ident("d".to_string()), + cmp: Comparison::Eq, + value: Literal::Boolean(false), + }), + }; + + // The left and right are bound most tightly, by the XOR operator. + let xor = Filter { + negated: false, + expr: FilterExpr::Compound(CompoundFilter { + left: Box::new(left), + op: LogicalOp::Xor, + right: Box::new(middle), + }), + }; + + // And then those two together are joined with the AND. + let expected = Filter { + negated: false, + expr: FilterExpr::Compound(CompoundFilter { + left: Box::new(xor), + op: LogicalOp::And, + right: Box::new(right), + }), + }; + assert_eq!(parsed, expected); + } + + #[test] + fn test_multiple_negation() { + let negated = + query_parser::filter_expr("(a == 0) || !!!(a == 0 && a == 0)") + .unwrap(); + let expected = + query_parser::filter_expr("(a == 0) || !(a == 0 && a == 0)") + .unwrap(); + assert_eq!(negated, expected, "Failed to handle multiple negations"); + } +} diff --git a/oximeter/db/src/oxql/ast/ident.rs b/oximeter/db/src/oxql/ast/ident.rs new file mode 100644 index 0000000000..6fb2dab85a --- /dev/null +++ b/oximeter/db/src/oxql/ast/ident.rs @@ -0,0 +1,25 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! OxQL identifiers, such as column names. + +// Copyright 2024 Oxide Computer Company + +use std::fmt; + +/// An identifier, such as a column or function name. +#[derive(Clone, Debug, PartialEq)] +pub struct Ident(pub(in crate::oxql) String); + +impl Ident { + pub fn as_str(&self) -> &str { + self.0.as_str() + } +} + +impl fmt::Display for Ident { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}", self.0) + } +} diff --git a/oximeter/db/src/oxql/ast/literal.rs b/oximeter/db/src/oxql/ast/literal.rs new file mode 100644 index 0000000000..33f3d81485 --- /dev/null +++ b/oximeter/db/src/oxql/ast/literal.rs @@ -0,0 +1,384 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! AST node for literal values. + +// Copyright 2024 Oxide Computer Company + +use crate::oxql::ast::cmp::Comparison; +use crate::oxql::Error; +use anyhow::Context; +use chrono::DateTime; +use chrono::Utc; +use oximeter::FieldType; +use oximeter::FieldValue; +use regex::Regex; +use std::fmt; +use std::net::IpAddr; +use std::time::Duration; +use uuid::Uuid; + +/// A literal value. +#[derive(Clone, Debug, PartialEq)] +pub enum Literal { + // TODO-performance: An i128 here is a bit gratuitous. + Integer(i128), + Double(f64), + String(String), + Boolean(bool), + Uuid(Uuid), + Duration(Duration), + Timestamp(DateTime), + IpAddr(IpAddr), +} + +impl Literal { + // Format the literal as a safe, typed string for ClickHouse. + pub(crate) fn as_db_safe_string(&self) -> String { + match self { + Literal::Integer(inner) => format!("{inner}"), + Literal::Double(inner) => format!("{inner}"), + Literal::String(inner) => format!("'{inner}'"), + Literal::Boolean(inner) => format!("{inner}"), + Literal::Uuid(inner) => format!("'{inner}'"), + Literal::Duration(inner) => { + let (count, interval) = duration_to_db_interval(inner); + format!("INTERVAL {} {}", count, interval) + } + Literal::Timestamp(inner) => { + format!("'{}'", inner.format(crate::DATABASE_TIMESTAMP_FORMAT)) + } + Literal::IpAddr(inner) => { + // NOTE: We store all IP addresses in ClickHouse as IPv6, with + // IPv4 addresses mapped to that. To run a comparison against a + // literal in Rust, we can use the value directly, since we + // decode it an convert to the right type during + // deserialization. But to compare in the DB itself, we need to + // do that with an IPv4-mapped IPv6 address. + // + // Helpfully, ClickHouse's `toIPv6` function takes a string of + // either family, and maps IPv4 into the IPv6 space, if needed. + format!("toIPv6('{inner}')") + } + } + } + + // Return true if this literal can be compared to a field of the provided + // type. + pub(crate) fn is_compatible_with_field( + &self, + field_type: FieldType, + ) -> bool { + match self { + Literal::Integer(_) => matches!( + field_type, + FieldType::U8 + | FieldType::I8 + | FieldType::U16 + | FieldType::I16 + | FieldType::U32 + | FieldType::I32 + | FieldType::U64 + | FieldType::I64 + ), + Literal::Double(_) => false, + Literal::String(_) => matches!(field_type, FieldType::String), + Literal::Boolean(_) => matches!(field_type, FieldType::Bool), + Literal::Uuid(_) => matches!(field_type, FieldType::Uuid), + Literal::Duration(_) => false, + Literal::Timestamp(_) => false, + Literal::IpAddr(_) => matches!(field_type, FieldType::IpAddr), + } + } + + /// Apply the comparison op between self and the provided field. + /// + /// Return None if the comparison cannot be applied, either because the type + /// is not compatible or the comparison doesn't make sense. + pub(crate) fn compare_field( + &self, + value: &FieldValue, + cmp: Comparison, + ) -> Result, Error> { + anyhow::ensure!( + self.is_compatible_with_field(value.field_type()), + "Field value of type {} is cannot be compared to \ + the value in this filter", + value.field_type(), + ); + macro_rules! generate_cmp_match { + ($lhs:ident, $rhs:ident) => { + match cmp { + Comparison::Eq => Ok(Some($lhs == $rhs)), + Comparison::Ne => Ok(Some($lhs != $rhs)), + Comparison::Gt => Ok(Some($lhs > $rhs)), + Comparison::Ge => Ok(Some($lhs >= $rhs)), + Comparison::Lt => Ok(Some($lhs < $rhs)), + Comparison::Le => Ok(Some($lhs <= $rhs)), + Comparison::Like => Ok(None), + } + }; + } + // Filter expressions are currently written as ` + // `. That means the literal stored in `self` is the RHS of + // the comparison, and the field value passed in is the LHS. + match (value, self) { + (FieldValue::Bool(lhs), Literal::Boolean(rhs)) => { + generate_cmp_match!(rhs, lhs) + } + (FieldValue::String(lhs), Literal::String(rhs)) => match cmp { + Comparison::Eq => Ok(Some(lhs == rhs)), + Comparison::Ne => Ok(Some(lhs != rhs)), + Comparison::Gt => Ok(Some(lhs > rhs)), + Comparison::Ge => Ok(Some(lhs >= rhs)), + Comparison::Lt => Ok(Some(lhs < rhs)), + Comparison::Le => Ok(Some(lhs <= rhs)), + Comparison::Like => { + let re = Regex::new(rhs).context( + "failed to create regex for string matching", + )?; + Ok(Some(re.is_match(lhs))) + } + }, + (FieldValue::IpAddr(lhs), Literal::IpAddr(rhs)) => { + generate_cmp_match!(rhs, lhs) + } + (FieldValue::Uuid(lhs), Literal::Uuid(rhs)) => { + generate_cmp_match!(rhs, lhs) + } + (FieldValue::U8(lhs), Literal::Integer(rhs)) => { + let lhs = i128::from(*lhs); + let rhs = *rhs; + generate_cmp_match!(lhs, rhs) + } + (FieldValue::I8(lhs), Literal::Integer(rhs)) => { + let lhs = i128::from(*lhs); + let rhs = *rhs; + generate_cmp_match!(lhs, rhs) + } + (FieldValue::U16(lhs), Literal::Integer(rhs)) => { + let lhs = i128::from(*lhs); + let rhs = *rhs; + generate_cmp_match!(lhs, rhs) + } + (FieldValue::I16(lhs), Literal::Integer(rhs)) => { + let lhs = i128::from(*lhs); + let rhs = *rhs; + generate_cmp_match!(lhs, rhs) + } + (FieldValue::U32(lhs), Literal::Integer(rhs)) => { + let lhs = i128::from(*lhs); + let rhs = *rhs; + generate_cmp_match!(lhs, rhs) + } + (FieldValue::I32(lhs), Literal::Integer(rhs)) => { + let lhs = i128::from(*lhs); + let rhs = *rhs; + generate_cmp_match!(lhs, rhs) + } + (FieldValue::U64(lhs), Literal::Integer(rhs)) => { + let lhs = i128::from(*lhs); + let rhs = *rhs; + generate_cmp_match!(lhs, rhs) + } + (FieldValue::I64(lhs), Literal::Integer(rhs)) => { + let lhs = i128::from(*lhs); + let rhs = *rhs; + generate_cmp_match!(lhs, rhs) + } + (_, _) => unreachable!(), + } + } +} + +/// Duration constants used for interpreting duration literals. +/// +/// Many of the values here are **approximate**. For example, a "year" is always +/// 365 24-hour periods, regardless of leap years, the current time, or any +/// other context. +pub(crate) mod duration_consts { + use std::time::Duration; + + /// Approximately 1 year, 365 24-hour periods. + pub const YEAR: Duration = Duration::from_secs(60 * 60 * 24 * 365); + + /// Approximately 1 month, 30 24-hour periods. + pub const MONTH: Duration = Duration::from_secs(60 * 60 * 24 * 30); + + /// Approximately 1 week, 7 24-hour periods. + pub const WEEK: Duration = Duration::from_secs(60 * 60 * 24 * 7); + + /// One day, equal to 24 hours. + pub const DAY: Duration = Duration::from_secs(60 * 60 * 24); + + /// An hour, exactly 3600 seconds. + pub const HOUR: Duration = Duration::from_secs(60 * 60); + + /// A minute, exactly 60 seconds. + pub const MINUTE: Duration = Duration::from_secs(60); + + /// One second. + pub const SECOND: Duration = Duration::from_secs(1); + + /// One millisecond, a thousandth of a second. + pub const MILLISECOND: Duration = Duration::from_millis(1); + + /// One microsecond, a millionth of a second. + pub const MICROSECOND: Duration = Duration::from_micros(1); + + /// One nanosecond, a billionth of a second. + pub const NANOSECOND: Duration = Duration::from_nanos(1); +} + +// Convert a duration into an appropriate interval for a database query. +// +// This converts the provided duration into the largest interval type for which +// the value is an integer. For example: +// +// `1us` -> (1, "MICROSECOND"), +// `3.4s` -> (3400, "MILLISECOND") +fn duration_to_db_interval(dur: &Duration) -> (u64, &'static str) { + fn as_whole_multiple(dur: &Duration, base: &Duration) -> Option { + let d = dur.as_nanos(); + let base = base.as_nanos(); + if d % base == 0 { + Some(u64::try_from(d / base).unwrap()) + } else { + None + } + } + use duration_consts::*; + const INTERVALS: [(Duration, &str); 10] = [ + (YEAR, "YEAR"), + (MONTH, "MONTH"), + (WEEK, "WEEK"), + (DAY, "DAY"), + (HOUR, "HOUR"), + (MINUTE, "MINUTE"), + (SECOND, "SECOND"), + (MILLISECOND, "MILLISECOND"), + (MICROSECOND, "MICROSECOND"), + (NANOSECOND, "NANOSECOND"), + ]; + for (base, interval) in &INTERVALS { + if let Some(count) = as_whole_multiple(dur, base) { + return (count, interval); + } + } + + // Durations must be a whole number of nanoseconds, so we will never fall + // past the last interval in the array above. + unreachable!(); +} + +impl fmt::Display for Literal { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Literal::Integer(inner) => write!(f, "{inner}"), + Literal::Double(inner) => write!(f, "{inner}"), + Literal::String(inner) => write!(f, "{inner:?}"), + Literal::Boolean(inner) => write!(f, "{inner}"), + Literal::Uuid(inner) => write!(f, "\"{inner}\""), + Literal::Duration(inner) => write!(f, "{inner:?}"), + Literal::Timestamp(inner) => write!(f, "@{inner}"), + Literal::IpAddr(inner) => write!(f, "{inner}"), + } + } +} + +#[cfg(test)] +mod tests { + use super::duration_consts::*; + use super::duration_to_db_interval; + use super::Literal; + use crate::oxql::ast::cmp::Comparison; + use oximeter::FieldValue; + + #[test] + fn test_duration_to_db_interval() { + for base in [1_u32, 2, 3] { + let b = u64::from(base); + assert_eq!(duration_to_db_interval(&(base * YEAR)), (b, "YEAR")); + assert_eq!(duration_to_db_interval(&(base * MONTH)), (b, "MONTH")); + assert_eq!(duration_to_db_interval(&(base * WEEK)), (b, "WEEK")); + assert_eq!(duration_to_db_interval(&(base * DAY)), (b, "DAY")); + assert_eq!(duration_to_db_interval(&(base * HOUR)), (b, "HOUR")); + assert_eq!( + duration_to_db_interval(&(base * MINUTE)), + (b, "MINUTE") + ); + assert_eq!( + duration_to_db_interval(&(base * SECOND)), + (b, "SECOND") + ); + assert_eq!( + duration_to_db_interval(&(base * MILLISECOND)), + (b, "MILLISECOND") + ); + assert_eq!( + duration_to_db_interval(&(base * MICROSECOND)), + (b, "MICROSECOND") + ); + assert_eq!( + duration_to_db_interval(&(base * NANOSECOND)), + (b, "NANOSECOND") + ); + } + assert_eq!(duration_to_db_interval(&(YEAR / 2)), (4380, "HOUR")); + assert_eq!(duration_to_db_interval(&(HOUR / 60)), (1, "MINUTE")); + assert_eq!(duration_to_db_interval(&(HOUR / 10)), (6, "MINUTE")); + assert_eq!(duration_to_db_interval(&(HOUR / 12)), (5, "MINUTE")); + assert_eq!(duration_to_db_interval(&(HOUR / 120)), (30, "SECOND")); + assert_eq!(duration_to_db_interval(&(MINUTE / 2)), (30, "SECOND")); + assert_eq!(duration_to_db_interval(&(MINUTE / 10)), (6, "SECOND")); + assert_eq!( + duration_to_db_interval(&MINUTE.mul_f64(1.5)), + (90, "SECOND") + ); + assert_eq!( + duration_to_db_interval(&MICROSECOND.mul_f64(1.5)), + (1500, "NANOSECOND") + ); + assert_eq!( + duration_to_db_interval(&(YEAR + NANOSECOND)), + (31536000000000001, "NANOSECOND") + ); + } + + #[test] + fn test_literal_compare_field() { + let value = FieldValue::I64(3); + let lit = Literal::Integer(4); + + // The literal comparison would be written like: `field >= 4` where + // `field` has a value of 3 here. So the comparison is false. + assert_eq!( + lit.compare_field(&value, Comparison::Ge).unwrap(), + Some(false) + ); + + // Reversing this, we should have true. + assert_eq!( + lit.compare_field(&value, Comparison::Lt).unwrap(), + Some(true) + ); + + // It should not be equal. + assert_eq!( + lit.compare_field(&value, Comparison::Eq).unwrap(), + Some(false) + ); + assert_eq!( + lit.compare_field(&value, Comparison::Ne).unwrap(), + Some(true) + ); + } + + #[test] + fn test_literal_compare_field_wrong_type() { + let value = FieldValue::String(String::from("foo")); + let lit = Literal::Integer(4); + assert!(lit.compare_field(&value, Comparison::Eq).is_err()); + } +} diff --git a/oximeter/db/src/oxql/ast/logical_op.rs b/oximeter/db/src/oxql/ast/logical_op.rs new file mode 100644 index 0000000000..60fc5d134f --- /dev/null +++ b/oximeter/db/src/oxql/ast/logical_op.rs @@ -0,0 +1,41 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! An AST node describing logical operators. + +// Copyright 2024 Oxide Computer Company + +use std::fmt; + +/// Logical operators. +#[derive(Clone, Copy, Debug, PartialEq)] +pub enum LogicalOp { + And, + Or, + Xor, +} + +impl LogicalOp { + pub(crate) fn as_db_function_name(&self) -> &'static str { + match self { + LogicalOp::And => "and", + LogicalOp::Or => "or", + LogicalOp::Xor => "xor", + } + } +} + +impl fmt::Display for LogicalOp { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!( + f, + "{}", + match self { + LogicalOp::And => "&&", + LogicalOp::Or => "||", + LogicalOp::Xor => "^", + } + ) + } +} diff --git a/oximeter/db/src/oxql/ast/mod.rs b/oximeter/db/src/oxql/ast/mod.rs new file mode 100644 index 0000000000..7037b74a7f --- /dev/null +++ b/oximeter/db/src/oxql/ast/mod.rs @@ -0,0 +1,152 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! AST for the Oximeter Query Language. + +// Copyright 2024 Oxide Computer Company + +use chrono::DateTime; +use chrono::Utc; +use oximeter::TimeseriesName; + +use self::table_ops::BasicTableOp; +use self::table_ops::GroupedTableOp; +use self::table_ops::TableOp; +pub mod cmp; +pub(super) mod grammar; +pub mod ident; +pub mod literal; +pub mod logical_op; +pub mod table_ops; + +/// An OxQL query. +#[derive(Clone, Debug, PartialEq)] +pub struct Query { + ops: Vec, +} + +impl Query { + // Return the first operation in the query, which is always a form of `get`. + fn first_op(&self) -> &TableOp { + self.ops.first().expect("Should have parsed at least 1 operation") + } + + pub(crate) fn timeseries_name(&self) -> &TimeseriesName { + match self.first_op() { + TableOp::Basic(BasicTableOp::Get(n)) => n, + TableOp::Basic(_) => unreachable!(), + TableOp::Grouped(GroupedTableOp { ops }) => { + ops.first().unwrap().timeseries_name() + } + } + } + + // Check that this query (and any subqueries) start with a get table op, and + // that there are no following get operations. I.e., we have: + // + // get ... | + // { get .. } | + // { get .. ; get .. } | + pub(crate) fn all_gets_at_query_start(&self) -> bool { + fn all_gets_at_query_start(ops: &[TableOp]) -> bool { + let (head, tail) = ops.split_at(1); + match &head[0] { + // If the head is a get, check that there are no following get + // operations. + TableOp::Basic(BasicTableOp::Get(_)) => { + !tail.iter().any(|op| { + matches!(op, TableOp::Basic(BasicTableOp::Get(_))) + }) + } + // Cannot start with any other basic op. + TableOp::Basic(_) => false, + // Recurse for grouped ops. + TableOp::Grouped(GroupedTableOp { ops }) => { + ops.iter().all(Query::all_gets_at_query_start) + } + } + } + all_gets_at_query_start(&self.ops) + } + + // Return the non-get table transformations. + pub(crate) fn transformations(&self) -> &[TableOp] { + &self.ops[1..] + } + + // Split the query into either: + // + // - a list of nested queries and the remaining table ops in self, or + // - the flat query contained in self. + pub(crate) fn split(&self, query_end_time: DateTime) -> SplitQuery { + match &self.ops[0] { + TableOp::Basic(BasicTableOp::Get(_)) => { + SplitQuery::Flat(crate::oxql::Query { + parsed: self.clone(), + end_time: query_end_time, + }) + } + TableOp::Basic(_) => unreachable!(), + TableOp::Grouped(GroupedTableOp { ops }) => SplitQuery::Nested { + subqueries: ops + .iter() + .cloned() + .map(|parsed| crate::oxql::Query { + parsed, + end_time: query_end_time, + }) + .collect(), + transformations: self.ops[1..].to_vec(), + }, + } + } + + // Return the last referenced timestamp in the query, if any. + pub(crate) fn query_end_time(&self) -> Option> { + match &self.ops[0] { + TableOp::Basic(BasicTableOp::Get(_)) => self + .transformations() + .iter() + .filter_map(|op| { + let TableOp::Basic(BasicTableOp::Filter(filter)) = op + else { + return None; + }; + filter.last_timestamp() + }) + .max(), + TableOp::Basic(_) => unreachable!(), + TableOp::Grouped(GroupedTableOp { ops }) => { + let grouped_max = + ops.iter().filter_map(Self::query_end_time).max(); + let op_max = self + .transformations() + .iter() + .filter_map(|op| { + let TableOp::Basic(BasicTableOp::Filter(filter)) = op + else { + return None; + }; + filter.last_timestamp() + }) + .max(); + grouped_max.max(op_max) + } + } + } +} + +// Either a flat query or one with nested subqueries. +// +// OxQL supports subqueries. Though they can be nested, they must always be at +// the front of a query. This represents either a query that is flat, _or_ that +// prefix of subqueries and the following transformations. +#[derive(Clone, Debug, PartialEq)] +pub(crate) enum SplitQuery { + Flat(crate::oxql::Query), + Nested { + subqueries: Vec, + transformations: Vec, + }, +} diff --git a/oximeter/db/src/oxql/ast/table_ops/align.rs b/oximeter/db/src/oxql/ast/table_ops/align.rs new file mode 100644 index 0000000000..cf54ebc312 --- /dev/null +++ b/oximeter/db/src/oxql/ast/table_ops/align.rs @@ -0,0 +1,753 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! An AST node describing timeseries alignment operations. + +// Copyright 2024 Oxide Computer Company + +use crate::oxql::point::DataType; +use crate::oxql::point::MetricType; +use crate::oxql::point::Points; +use crate::oxql::point::ValueArray; +use crate::oxql::point::Values; +use crate::oxql::query::Alignment; +use crate::oxql::Error; +use crate::oxql::Table; +use crate::oxql::Timeseries; +use anyhow::Context; +use chrono::DateTime; +use chrono::TimeDelta; +use chrono::Utc; +use std::time::Duration; + +// The maximum factor by which an alignment operation may upsample data. +// +// This is a crude way to limit the size of a query result. We do not currently +// paginate the results of OxQL queries, so we need to find other ways to avoid +// DOS attacks due to large query results. +// +// While we also apply limits on the total number of samples fetched from the +// ClickHouse database, this alone is insufficient. For example, suppose we have +// two samples, spaced 1 second apart, which are then passed to an alignment +// table operation with a period of 1 nanosecond. Now you have a billion points! +// +// To prevent this, we restrict the total amount by which any alignment +// operation can upsample the data. Another way to think of it is that this +// limits the ratio between the requested period and the largest interval +// between timestamps in the data. +const MAX_UPSAMPLING_RATIO: u128 = 10; + +fn verify_max_upsampling_ratio( + timestamps: &[DateTime], + period: &Duration, +) -> Result<(), Error> { + let period = period.as_nanos(); + let max = MAX_UPSAMPLING_RATIO * period; + for (t1, t0) in timestamps.iter().skip(1).zip(timestamps.iter()) { + let Some(nanos) = t1.signed_duration_since(t0).num_nanoseconds() else { + anyhow::bail!("Overflow computing timestamp delta"); + }; + assert!(nanos > 0, "Timestamps should be sorted"); + let nanos = nanos as u128; + anyhow::ensure!( + nanos <= max, + "A table alignment operation may not upsample data by \ + more than a factor of {MAX_UPSAMPLING_RATIO}" + ); + } + Ok(()) +} + +/// An `align` table operation, used to produce data at well-defined periods. +/// +/// Alignment is important for any kind of aggregation. Data is actually +/// produced at variable intervals, under the control of the producer itself. +/// This means that in general, two timeseries that are related (say, the same +/// schema) may have data samples at slightly different timestamps. +/// +/// Alignment is used to produce data at the defined timestamps, so that samples +/// from multiple timeseries may be combined or correlated in meaningful ways. +#[derive(Clone, Debug, PartialEq)] +pub struct Align { + /// The alignment method, used to describe how data over the input period + /// is used to generate an output sample. + pub method: AlignmentMethod, + // TODO-completeness. We'd like to separate the concept of the period, the + // interval on which data is produced by this alignment, and the input + // window, the range of time in the past over which data is considered to + // produce the output values. + // + // For example, we might want to produce a moving average, by considering + // the last 1h of data, and produce an output value every 10m. Each of those + // output values would share 50m of data with the points on either side. + // + // For now, we'll enforce that the output period and input window are the + // same. + pub period: Duration, +} + +impl Align { + // Apply the alignment function to the set of tables. + pub(crate) fn apply( + &self, + tables: &[Table], + query_end: &DateTime, + ) -> Result, Error> { + match self.method { + AlignmentMethod::Interpolate => tables + .iter() + .map(|table| align_interpolate(table, query_end, &self.period)) + .collect(), + AlignmentMethod::MeanWithin => tables + .iter() + .map(|table| align_mean_within(table, query_end, &self.period)) + .collect(), + } + } +} + +/// An alignment method. +#[derive(Clone, Debug, PartialEq)] +pub enum AlignmentMethod { + /// Alignment is done by interpolating the output data at the specified + /// period. + Interpolate, + /// Alignment is done by computing the mean of the output data within the + /// specified period. + MeanWithin, +} + +// Align the timeseries in a table by computing the average within each output +// period. +fn align_mean_within( + table: &Table, + query_end: &DateTime, + period: &Duration, +) -> Result { + let mut output_table = Table::new(table.name()); + for timeseries in table.iter() { + let points = ×eries.points; + anyhow::ensure!( + points.dimensionality() == 1, + "Aligning multidimensional timeseries is not yet supported" + ); + let data_type = points.data_types().next().unwrap(); + anyhow::ensure!( + data_type.is_numeric(), + "Alignment by mean requires numeric data type, not {}", + data_type + ); + let metric_type = points.metric_type().unwrap(); + anyhow::ensure!( + matches!(metric_type, MetricType::Gauge | MetricType::Delta), + "Alignment by mean requires a gauge or delta metric, not {}", + metric_type, + ); + verify_max_upsampling_ratio(&points.timestamps, &period)?; + + // Always convert the output to doubles, when computing the mean. The + // output is always a gauge, so we do not need the start times of the + // input either. + // + // IMPORTANT: We compute the mean in the loop below from the back of the + // array (latest timestamp) to the front (earliest timestamp). They are + // appended to these arrays here in that _reversed_ order. These arrays + // are flipped before pushing them onto the timeseries at the end of the + // loop below. + let mut output_values = Vec::with_capacity(points.len()); + let mut output_timestamps = Vec::with_capacity(points.len()); + + // Convert the input to doubles now, so the tight loop below does less + // conversion / matching inside. + let input_points = match points.values(0).unwrap() { + ValueArray::Integer(values) => values + .iter() + .map(|maybe_int| maybe_int.map(|int| int as f64)) + .collect(), + ValueArray::Double(values) => values.clone(), + _ => unreachable!(), + }; + + // Alignment works as follows: + // + // - Start at the end of the timestamp array, working our way backwards + // in time. + // - Create the output timestamp from the current step. + // - Find all points in the input array that are within the alignment + // period. + // - Compute the mean of those. + let period_ = + TimeDelta::from_std(*period).context("time delta out of range")?; + let first_timestamp = points.timestamps[0]; + let mut ix: u32 = 0; + loop { + // Compute the next output timestamp, by shifting the query end time + // by the period and the index. + let time_offset = TimeDelta::from_std(ix * *period) + .context("time delta out of range")?; + let output_time = query_end + .checked_sub_signed(time_offset) + .context("overflow computing next output timestamp")?; + let window_start = output_time + .checked_sub_signed(period_) + .context("overflow computing next output window start")?; + + // The output time is before any of the data in the input array, + // we're done. It's OK for the _start time_ to be before any input + // timestamps. + if output_time < first_timestamp { + break; + } + + // Aggregate all values within this time window. + // + // This works a bit differently for gauge timeseries and deltas. + // Gauges are simpler, so let's consider them first. A point is + // "within" the window if the timestamp is within the window. Every + // point is either completely within or completely without the + // window, so we just add the values. + // + // Deltas have a start time, which makes things a bit more + // complicated. In that case, a point can overlap _partially_ with + // the output time window, and we'd like to take that partial + // overlap into account. To do that, we find relevant values which + // have either a start time or timestamp within the output window. + // We compute the fraction of overlap with the window, which is in + // [0.0, 1.0], and multiply the value by that fraction. One can + // think of this as a dot-product between the interval-overlap array + // and the value array, divided by the 1-norm, or number of nonzero + // entries. + let output_value = if matches!(metric_type, MetricType::Gauge) { + mean_gauge_value_in_window( + &points.timestamps, + &input_points, + window_start, + output_time, + ) + } else { + mean_delta_value_in_window( + points.start_times.as_ref().unwrap(), + &points.timestamps, + &input_points, + window_start, + output_time, + ) + }; + output_values.push(output_value); + + // In any case, we push the window's end time and increment to the + // next period. + output_timestamps.push(output_time); + ix += 1; + } + + // We've accumulated our input values into the output arrays, but in + // reverse order. Flip them and push onto the existing table, as a gauge + // timeseries. + let mut new_timeseries = Timeseries::new( + timeseries.fields.clone().into_iter(), + DataType::Double, + MetricType::Gauge, + ) + .unwrap(); + let values = + ValueArray::Double(output_values.into_iter().rev().collect()); + let timestamps = output_timestamps.into_iter().rev().collect(); + let values = Values { values, metric_type: MetricType::Gauge }; + new_timeseries.points = + Points { start_times: None, timestamps, values: vec![values] }; + new_timeseries.alignment = + Some(Alignment { end_time: *query_end, period: *period }); + output_table.insert(new_timeseries).unwrap(); + } + Ok(output_table) +} + +// Given an interval start and end, and a window start and end, compute the +// fraction of the _interval_ that the time window represents. +fn fraction_overlap_with_window( + interval_start: DateTime, + interval_end: DateTime, + window_start: DateTime, + window_end: DateTime, +) -> f64 { + assert!(interval_start < interval_end); + assert!(window_start < window_end); + let end = window_end.min(interval_end); + let start = window_start.max(interval_start); + let contained_size = (end - start).num_nanoseconds().unwrap() as f64; + if contained_size < 0.0 { + return 0.0; + } + let interval_size = + (interval_end - interval_start).num_nanoseconds().unwrap() as f64; + let fraction = contained_size / interval_size; + assert!(fraction >= 0.0); + assert!(fraction <= 1.0); + fraction +} + +// For a delta metric, compute the mean of points falling within the provided +// window. +// +// This uses both the start and end times when considering each point. Each +// point's value is weighted by the faction of overlap with the window. +fn mean_delta_value_in_window( + start_times: &[DateTime], + timestamps: &[DateTime], + input_points: &[Option], + window_start: DateTime, + window_end: DateTime, +) -> Option { + // We can find the indices where the timestamp and start times separately + // overlap the window of interest. Then any interval is potentially of + // interest if _either_ its start time or timestamp is within the window. + // + // Since the start times are <= the timestamps, we can take the min of those + // two to get the first point that overlaps at all, and the max to get the + // last. + let first_timestamp = timestamps.partition_point(|t| t <= &window_start); + let last_timestamp = timestamps.partition_point(|t| t <= &window_end); + let first_start_time = start_times.partition_point(|t| t <= &window_start); + let last_start_time = start_times.partition_point(|t| t <= &window_end); + let first_index = first_timestamp.min(first_start_time); + let last_index = last_timestamp.max(last_start_time); + + // Detect the possible case where the interval is entirely before or + // entirely after the window. + if first_index == last_index { + let t = *timestamps.get(first_timestamp)?; + let s = *start_times.get(first_timestamp)?; + if t < window_start || s > window_end { + return None; + } + let Some(val) = input_points[first_timestamp] else { + return None; + }; + let fraction = fraction_overlap_with_window( + start_times[first_start_time], + timestamps[first_timestamp], + window_start, + window_end, + ); + return Some(fraction * val); + } + + // Compute the overlap for all points which have some overlap. + let starts = &start_times[first_index..last_index]; + let times = ×tamps[first_index..last_index]; + let vals = &input_points[first_index..last_index]; + let iter = starts + .into_iter() + .copied() + .zip(times.into_iter().copied()) + .zip(vals.into_iter().copied()); + let count = (last_timestamp - first_timestamp).max(1) as f64; + let mut maybe_sum = None; + for it in iter.filter_map(|((start, time), maybe_val)| { + let Some(val) = maybe_val else { + return None; + }; + let fraction = + fraction_overlap_with_window(start, time, window_start, window_end); + Some(fraction * val) + }) { + *maybe_sum.get_or_insert(0.0) += it; + } + maybe_sum.map(|sum| sum / count) +} + +// For a gauge metric, compute the mean of points falling within the provided +// window. +fn mean_gauge_value_in_window( + timestamps: &[DateTime], + input_points: &[Option], + window_start: DateTime, + window_end: DateTime, +) -> Option { + // Find the position of the window start and end in the sorted + // array of input timestamps. The `partition_point()` method accepts + // a closure, which partitions the input into a prefix where the + // closure evaluates to true, and a suffix where it's false. It + // returns the first element in the suffix. + // + // So the first closure returns true for all timestamps we want to + // exclude, which are those up to and including the window start time. + // So we get the index of the first point strictly later than the + // window start. + // + // The second closure returns true for all points up to and + // including the output time as well. + let start_index = timestamps.partition_point(|t| t <= &window_start); + let output_index = timestamps.partition_point(|t| t <= &window_end); + assert!(output_index >= start_index); + + // Accumulate the values over this set of indices. + // + // If there are really zero points in this time interval, we add + // a missing value. + if start_index != output_index { + let mut maybe_sum = None; + for it in input_points[start_index..output_index] + .iter() + .filter_map(|x| x.as_ref().copied()) + { + *maybe_sum.get_or_insert(0.0) += it; + } + maybe_sum.map(|output_value| { + output_value / (output_index - start_index) as f64 + }) + } else { + None + } +} + +fn align_interpolate( + _table: &Table, + _query_end: &DateTime, + _period: &Duration, +) -> Result { + anyhow::bail!("Alignment with interpolation not yet implemented") +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_fraction_overlap_with_window() { + let now = Utc::now(); + let window_start = now - Duration::from_secs(1); + let window_end = now; + let interval_start = window_start; + let interval_end = window_end; + assert_eq!( + fraction_overlap_with_window( + interval_start, + interval_end, + window_start, + window_end, + ), + 1.0 + ); + + let window_start = now - Duration::from_secs(1); + let window_end = now; + let interval_start = window_start; + let interval_end = now - Duration::from_secs_f64(0.5); + assert_eq!( + fraction_overlap_with_window( + interval_start, + interval_end, + window_start, + window_end, + ), + 1.0, + "This interval is aligned with the start time \ + of the window, and contained entirely within it, \ + so the fraction should be 1.0", + ); + + // If we reverse the window and interval, then the interval entirely + // contains the window, which is 50% of the interval. + let (window_start, window_end, interval_start, interval_end) = + (interval_start, interval_end, window_start, window_end); + assert_eq!( + fraction_overlap_with_window( + interval_start, + interval_end, + window_start, + window_end, + ), + 0.5, + "The window is entirely contained within the interval, \ + and covers 50% of it", + ); + + // If the interval is entirely contained in the window, we should have + // the entire interval as our fraction. + let window_start = now - Duration::from_secs(1); + let window_end = now; + let interval_start = window_start + Duration::from_secs_f64(0.25); + let interval_end = window_start + Duration::from_secs_f64(0.5); + assert_eq!( + fraction_overlap_with_window( + interval_start, + interval_end, + window_start, + window_end, + ), + 1.0, + "The interval is entirely contained within the window", + ); + + // This is aligned at the right with the window end. + let window_start = now - Duration::from_secs(1); + let window_end = now; + let interval_start = window_start + Duration::from_secs_f64(0.25); + let interval_end = window_end; + assert_eq!( + fraction_overlap_with_window( + interval_start, + interval_end, + window_start, + window_end, + ), + 1.0, + "The interval is aligned at right with the window, and \ + entirely contained within it, so the fraction should still \ + be 1.0", + ); + + // But if we reverse it again, the fraction should reveal itself. + let (window_start, window_end, interval_start, interval_end) = + (interval_start, interval_end, window_start, window_end); + assert_eq!( + fraction_overlap_with_window( + interval_start, + interval_end, + window_start, + window_end, + ), + 0.75, + "The window represents 75% of the interval", + ); + + // This interval does not overlap at all, to the left. + let window_start = now - Duration::from_secs(1); + let window_end = now; + let interval_start = window_start - Duration::from_secs(2); + let interval_end = window_start - Duration::from_secs(1); + assert_eq!( + fraction_overlap_with_window( + interval_start, + interval_end, + window_start, + window_end, + ), + 0.0, + ); + + // This interval does not overlap at all, to the right. + let window_start = now - Duration::from_secs(1); + let window_end = now; + let interval_start = window_start + Duration::from_secs(1); + let interval_end = window_start + Duration::from_secs(2); + assert_eq!( + fraction_overlap_with_window( + interval_start, + interval_end, + window_start, + window_end, + ), + 0.0, + ); + } + + #[test] + fn test_mean_delta_value_in_window() { + let now = Utc::now(); + let start_times = &[ + now - Duration::from_secs(4), + now - Duration::from_secs(3), + now - Duration::from_secs(2), + now - Duration::from_secs(1), + ]; + let timestamps = &[ + now - Duration::from_secs(3), + now - Duration::from_secs(2), + now - Duration::from_secs(1), + now, + ]; + let input_points = &[Some(0.0), Some(1.0), Some(2.0), Some(3.0)]; + + let window_start = now - Duration::from_secs_f64(0.5); + let window_end = now; + let mean = mean_delta_value_in_window( + start_times, + timestamps, + input_points, + window_start, + window_end, + ) + .expect("This should overlap the last interval"); + assert_eq!( + mean, + input_points.last().unwrap().unwrap() / 2.0, + "This overlaps the last interval by half", + ); + } + + #[test] + fn test_mean_gauge_value_in_window() { + let now = Utc::now(); + let timestamps = &[ + now - Duration::from_secs(3), + now - Duration::from_secs(2), + now - Duration::from_secs(1), + now, + ]; + let input_points = &[Some(0.0), Some(1.0), Some(2.0), Some(3.0)]; + + let window_start = now - Duration::from_secs(4); + let window_end = now - Duration::from_secs(3); + let mean = mean_gauge_value_in_window( + timestamps, + input_points, + window_start, + window_end, + ) + .expect("This window should overlap the first timestamp"); + assert_eq!( + mean, 0.0, + "This window should overlap the first timestamp, so the \ + mean value should be the mean of the first point only" + ); + + let window_start = now - Duration::from_secs(4); + let window_end = now - Duration::from_secs(2); + let mean = mean_gauge_value_in_window( + timestamps, + input_points, + window_start, + window_end, + ) + .expect("This window should overlap the first two timestamps"); + assert_eq!( + mean, 0.5, + "This window should overlap the first two timestamps, so the \ + mean value should be the mean of the first two points" + ); + + let window_start = now - Duration::from_secs(3); + let window_end = now - Duration::from_secs(2); + let mean = mean_gauge_value_in_window( + timestamps, + input_points, + window_start, + window_end, + ) + .expect("This window should overlap the second timestamps"); + assert_eq!( + mean, 1.0, + "This window should overlap the second timestamp, so the \ + mean value should be the mean of the second point only." + ); + + let window_start = now - Duration::from_secs(4); + let window_end = *timestamps.last().unwrap(); + let mean = mean_gauge_value_in_window( + timestamps, + input_points, + window_start, + window_end, + ) + .expect("This window should overlap the all timestamps"); + assert_eq!( + mean, + input_points.iter().map(|x| x.unwrap()).sum::() + / input_points.len() as f64, + "This window should overlap the all timestamps, so the \ + mean value should be the mean of all points", + ); + + let window_start = now - Duration::from_secs(3); + let window_end = now - Duration::from_secs_f64(2.5); + assert!( + mean_gauge_value_in_window( + timestamps, + input_points, + window_start, + window_end, + ) + .is_none(), + "This window should overlap none of the points" + ); + } + + #[test] + fn test_verify_max_upsampling_ratio() { + // We'll use a 1 second period, and ensure that we allow downsampling, + // and upsampling up to the max factor. That's 1/10th of a second, + // currently. + let now = Utc::now(); + let timestamps = &[now - Duration::from_secs(1), now]; + + // All values within the threshold. + for period in [ + Duration::from_secs_f64(0.5), + Duration::from_secs(10), + Duration::from_millis(100), + ] { + assert!(verify_max_upsampling_ratio(timestamps, &period).is_ok()); + } + + // Just below the threshold. + assert!(verify_max_upsampling_ratio( + timestamps, + &Duration::from_millis(99), + ) + .is_err()); + + // Sanity check for way below the threshold. + assert!(verify_max_upsampling_ratio( + timestamps, + &Duration::from_nanos(1), + ) + .is_err()); + + // Arrays where we can't compute an interval are fine. + assert!(verify_max_upsampling_ratio( + ×tamps[..1], + &Duration::from_nanos(1), + ) + .is_ok()); + assert!( + verify_max_upsampling_ratio(&[], &Duration::from_nanos(1),).is_ok() + ); + } + + #[test] + fn test_mean_delta_does_not_modify_missing_values() { + let now = Utc::now(); + let start_times = + &[now - Duration::from_secs(2), now - Duration::from_secs(1)]; + let timestamps = &[now - Duration::from_secs(1), now]; + let input_points = &[Some(1.0), None]; + let window_start = now - Duration::from_secs(1); + let window_end = now; + let mean = mean_delta_value_in_window( + start_times, + timestamps, + input_points, + window_start, + window_end, + ); + assert!( + mean.is_none(), + "This time window contains only a None value, which should not be \ + included in the sum" + ); + } + + #[test] + fn test_mean_gauge_does_not_modify_missing_values() { + let now = Utc::now(); + let timestamps = &[now - Duration::from_secs(1), now]; + let input_points = &[Some(1.0), None]; + let window_start = now - Duration::from_secs(1); + let window_end = now; + let mean = mean_gauge_value_in_window( + timestamps, + input_points, + window_start, + window_end, + ); + assert!( + mean.is_none(), + "This time window contains only a None value, which should not be \ + included in the sum" + ); + } +} diff --git a/oximeter/db/src/oxql/ast/table_ops/filter.rs b/oximeter/db/src/oxql/ast/table_ops/filter.rs new file mode 100644 index 0000000000..e97673c8f8 --- /dev/null +++ b/oximeter/db/src/oxql/ast/table_ops/filter.rs @@ -0,0 +1,1283 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! An AST node describing filtering table operations. + +// Copyright 2024 Oxide Computer Company + +use crate::oxql::ast::cmp::Comparison; +use crate::oxql::ast::ident::Ident; +use crate::oxql::ast::literal::Literal; +use crate::oxql::ast::logical_op::LogicalOp; +use crate::oxql::point::DataType; +use crate::oxql::point::MetricType; +use crate::oxql::point::Points; +use crate::oxql::point::ValueArray; +use crate::oxql::query::special_idents; +use crate::oxql::Error; +use crate::oxql::Table; +use crate::oxql::Timeseries; +use anyhow::Context; +use chrono::DateTime; +use chrono::Utc; +use oximeter::FieldType; +use oximeter::FieldValue; +use regex::Regex; +use std::collections::BTreeSet; +use std::fmt; + +/// An AST node for the `filter` table operation. +/// +/// This can be a simple operation like `foo == "bar"` or a more complex +/// expression, such as: `filter hostname == "foo" || (hostname == "bar" +/// && id == "baz")`. +#[derive(Clone, Debug, PartialEq)] +pub struct Filter { + /// True if the whole expression is negated. + pub negated: bool, + /// The contained filtering expression, which may contain many expressions + /// joined by logical operators. + pub expr: FilterExpr, +} + +impl fmt::Display for Filter { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}({})", if self.negated { "!" } else { "" }, self.expr,) + } +} + +impl core::str::FromStr for Filter { + type Err = Error; + fn from_str(s: &str) -> Result { + crate::oxql::ast::grammar::query_parser::filter_expr(s) + .map_err(|e| anyhow::anyhow!("invalid filter expression: {e}")) + } +} + +// A crude limit on expression complexity, governing how many times we +// iteratively apply a DNF simplification before bailing out. +const EXPR_COMPLEXITY_ITERATIVE_LIMIT: usize = 32; + +// A crude limit on expression complexity, governing how many times we +// recurisvely apply a DNF simplification before bailing out. +const EXPR_COMPLEXITY_RECURSIVE_LIMIT: usize = 32; + +impl Filter { + /// Return the negation of this filter. + pub fn negate(&self) -> Filter { + Self { negated: !self.negated, ..self.clone() } + } + + /// Split the filter at top-level disjunctions. + /// + /// This is likely only useful after simplifying to DNF with + /// `simplify_to_dnf()`. + pub fn flatten_disjunctions(&self) -> Vec { + let mut out = vec![]; + self.flatten_disjunctions_inner(&mut out); + out + } + + fn flatten_disjunctions_inner(&self, dis: &mut Vec) { + // Recursion is only needed if this is an OR expression. In that case, + // we split the left and push it, and then recurse on the right. + // + // Note that we don't need left-recursion because the parser is strictly + // non-left-recursive. + if let FilterExpr::Compound(CompoundFilter { + left, + op: LogicalOp::Or, + right, + }) = &self.expr + { + dis.push(*left.clone()); + right.flatten_disjunctions_inner(dis); + } else { + // It's not an OR expression, or it is a simple filter expression. + // In either case, just push it directly, withouth recursing. + dis.push(self.clone()); + } + } + + /// Simplfy a filter expression to disjunctive normal form (DNF). + /// + /// Disjunctive normal form is one of a few canonical ways of writing a + /// boolean expression. It simplifies to a disjunction of conjunctions, + /// i.e., only has terms like `(a && b) || (c && d) || ...`. + /// + /// This method exists for the purposes of creating _independent_ pieces of + /// a filtering expression, each of which can be used to generate a new SQL + /// query run against ClickHouse. This is critical to support complicated + /// OxQL queries. Consider: + /// + /// ```ignore + /// get some_timeseries + /// | filter (foo == "bar") || (timestamp > @now() - 1m && foo == "baz") + /// ``` + /// + /// This requires fetching part of one timeseries, and all of another. One + /// cannot run this as a conjunction on the fields and then a query on the + /// measurements. It must be run in such a way to get the sets of keys + /// consistent with each term in the disjunction _independently_, so that + /// one can apply the timestamp filter to only the correct one. + /// + /// We use this method to generate the DNF, a form with only disjunctions of + /// conjunctions. That is, it's not possible to further distribute + /// conjunctions over disjunctions. + /// + /// Each disjunction is then a separate query against the fields table, where + /// we keep track of the keys in each. Each set of predicates and consistent + /// keys is then used later to fetch the measurements. + /// + /// # Notes + /// + /// There is a huge academic literature on this topic, part of the study of + /// formal languages and other areas theoretical computer science. These + /// references are mostly pretty dense and formal, though a few are really + /// useful. This [paper](https://www.researchgate.net/publication/220154187_A_Survey_of_Strategies_in_Program_Transformation_Systems) + /// is a good and accessible survey to the idea of translation systems -- + /// it's mostly focused on programming languages and compilers, but Figures + /// 7-9 in particular are about DNF. + /// + /// As usual, the Wikipedia page is a reasonable overview as well, + /// [here](https://en.wikipedia.org/wiki/Disjunctive_normal_form). We're + /// using the "syntactic" DNF conversion algorithm, essentially. This + /// involves a recursive application of + /// [de Morgan's rules](https://en.wikipedia.org/wiki/De_Morgan%27s_laws), + /// [involution / double-negation](https://en.wikipedia.org/wiki/Involution_(mathematics)), + /// distributivity of [Boolean operators](https://en.wikipedia.org/wiki/Boolean_algebra#Monotone_laws), + /// etc. + pub fn simplify_to_dnf(&self) -> Result { + self.simplify_to_dnf_impl(0) + } + + fn simplify_to_dnf_impl(&self, level: usize) -> Result { + anyhow::ensure!( + level < EXPR_COMPLEXITY_RECURSIVE_LIMIT, + "Maximum recursion level exceeded trying to simplify \ + logical expression to disjunctive normal form" + ); + let mut out = self.simplify_to_dnf_inner(level)?; + if &out == self { + return Ok(out); + } + // Continually apply simplifications as long as able. + // + // This makes me really nervous, so I'm adding an escape hatch that we + // only allow a few iterations. If we've not simplified within that, + // we'll just declare the expression too complicated to handle. + for _ in 0..EXPR_COMPLEXITY_ITERATIVE_LIMIT { + let out_ = out.simplify_to_dnf_inner(level)?; + if out_ == out { + return Ok(out_); + } + out = out_; + } + anyhow::bail!("Logical expression is too complicated to simplify") + } + + fn simplify_to_dnf_inner(&self, level: usize) -> Result { + let new = self.expr.simplify_to_dnf(level)?; + + // This matches the rule: + // + // !!x -> x + if self.negated && new.negated && new.is_simple() { + return Ok(new.negate()); + } + + // These two blocks match de Morgan's rules, which distribute a negation + // down and swap the logical operator. + if self.negated { + // This matches one of de Morgan's rules: + // + // !(x && y) -> !x || !y + if let FilterExpr::Compound(CompoundFilter { + left: x, + op: LogicalOp::And, + right: y, + }) = &new.expr + { + let expr = FilterExpr::Compound(CompoundFilter { + left: Box::new(x.negate()), + op: LogicalOp::Or, + right: Box::new(y.negate()), + }); + return Ok(Filter { negated: false, expr }); + } + + // This matches the other of de Morgan's rules: + // + // !(x || y) -> !x && !y + if let FilterExpr::Compound(CompoundFilter { + left: x, + op: LogicalOp::And, + right: y, + }) = &new.expr + { + let expr = FilterExpr::Compound(CompoundFilter { + left: Box::new(x.negate()), + op: LogicalOp::Or, + right: Box::new(y.negate()), + }); + return Ok(Filter { negated: false, expr }); + } + } + + // Nothing else to do, just return ourself, though we do need to make + // sure we copy the negation from self as well. + Ok(Self { negated: self.negated, ..new }) + } + + // Merge this filter with another one, using the provided operator. + pub(crate) fn merge(&self, other: &Filter, op: LogicalOp) -> Self { + Self { + negated: false, + expr: FilterExpr::Compound(CompoundFilter { + left: Box::new(self.clone()), + op, + right: Box::new(other.clone()), + }), + } + } + + // Apply the filter to the provided field. + // + // This returns `Ok(None)` if the filter doesn't apply. It returns `Ok(x)` + // if the filter does apply, where `x` is the logical application of the + // filter to the field. `true` means "keep this field", which is analogous + // to the `Iterator::filter()` method's signature. + // + // If the filter does apply, but is incompatible or incomparable, return an + // error. + fn filter_field( + &self, + name: &str, + value: &FieldValue, + ) -> Result, Error> { + let result = match &self.expr { + FilterExpr::Simple(inner) => inner.filter_field(name, value), + FilterExpr::Compound(inner) => inner.filter_field(name, value), + }; + result.map(|maybe_keep| maybe_keep.map(|keep| self.negated ^ keep)) + } + + // Apply the filter to the provided points. + fn filter_points(&self, points: &Points) -> Result { + let to_keep = self.filter_points_inner(points)?; + points.filter(to_keep) + } + + // Inner implementation of filtering points. + // + // Returns an array of bools, where true indicates the point should be kept. + fn filter_points_inner(&self, points: &Points) -> Result, Error> { + match &self.expr { + FilterExpr::Simple(inner) => { + inner.filter_points(self.negated, points) + } + FilterExpr::Compound(inner) => { + inner.filter_points(self.negated, points) + } + } + } + + // Apply the filtering table operation. + pub(crate) fn apply(&self, tables: &[Table]) -> Result, Error> { + anyhow::ensure!( + tables.len() >= 1, + "Filtering operations require at least one table", + ); + let mut output_tables = Vec::with_capacity(tables.len()); + // Ensure that all the identifiers in this filter apply to the + // input timeseries. We can do this once at the beginning, because all + // the timeseries in a table have the same set of fields. + let first_timeseries = tables[0] + .iter() + .next() + .context("Table contains no timeseries to filter")?; + let ident_names = self.ident_names(); + + // There are extra, implied names that depend on the data type of the + // timeseries itself, check those as well. + let extras = implicit_field_names(first_timeseries); + let not_valid = ident_names + .iter() + .filter(|&&name| { + !(first_timeseries.fields.contains_key(name) + || extras.contains(name)) + }) + .collect::>(); + anyhow::ensure!( + not_valid.is_empty(), + "The filter expression contains identifiers that are not \ + valid for its input timeseries. Invalid identifiers: {:?}, \ + timeseries fields: {:?}", + not_valid, + ident_names.union(&extras), + ); + + // Filter each input table in succession. + for table in tables.iter() { + let mut timeseries = Vec::with_capacity(table.len()); + 'timeseries: for input in table.iter() { + // If the filter restricts any of the fields, remove this + // timeseries altogether. + for (name, value) in input.fields.iter() { + if let Some(false) = self.filter_field(name, value)? { + continue 'timeseries; + } + } + + // Apply the filter to the data points as well. + let points = self.filter_points(&input.points)?; + + // Similar to above, if the filter removes all data points in + // the timeseries, let's remove the timeseries altogether. + if points.is_empty() { + continue; + } + timeseries.push(Timeseries { + fields: input.fields.clone(), + points, + alignment: input.alignment, + }) + } + output_tables.push(Table::from_timeseries( + table.name(), + timeseries.into_iter(), + )?); + } + Ok(output_tables) + } + + // Return the last referenced timestamp by this filter, if any. + // + // This is the maximum timestamp, before which any filtered point must lie. + // This is used to determine the query end time. + pub(crate) fn last_timestamp(&self) -> Option> { + match &self.expr { + FilterExpr::Simple(inner) => inner.last_timestamp(), + FilterExpr::Compound(inner) => inner.last_timestamp(), + } + } + + // Return the name of all identifiers listed in this filter. + fn ident_names(&self) -> BTreeSet<&str> { + match &self.expr { + FilterExpr::Simple(inner) => { + let mut out = BTreeSet::new(); + out.insert(inner.ident.as_str()); + out + } + FilterExpr::Compound(inner) => { + let mut all = inner.left.ident_names(); + all.extend(inner.right.ident_names()); + all + } + } + } + + fn is_xor(&self) -> bool { + self.is_op(LogicalOp::Xor) + } + + fn is_op(&self, expected_op: LogicalOp) -> bool { + let FilterExpr::Compound(CompoundFilter { op, .. }) = &self.expr else { + return false; + }; + op == &expected_op + } + + // If this is an XOR, rewrite it to a disjunction of conjunctions. + // + // If it is not, return a clone of self. + fn rewrite_xor_to_disjunction(&self) -> Self { + let self_ = self.clone(); + if !self.is_xor() { + return self_; + } + let Filter { + negated, + expr: FilterExpr::Compound(CompoundFilter { left, right, .. }), + } = self_ + else { + unreachable!(); + }; + let left_ = CompoundFilter { + left: left.clone(), + op: LogicalOp::And, + right: Box::new(right.negate()), + }; + let right_ = CompoundFilter { + left: Box::new(left.negate()), + op: LogicalOp::And, + right, + }; + let expr = CompoundFilter { + left: Box::new(left_.to_filter()), + op: LogicalOp::Or, + right: Box::new(right_.to_filter()), + }; + Filter { negated, expr: FilterExpr::Compound(expr) } + } + + fn is_simple(&self) -> bool { + matches!(self.expr, FilterExpr::Simple(_)) + } +} + +/// Return the names of the implicit fields / columns that a filter can apply +/// to, based on the metric types of the contained data points. +fn implicit_field_names( + first_timeseries: &Timeseries, +) -> BTreeSet<&'static str> { + let mut out = BTreeSet::new(); + + // Everything has a timestamp! + out.insert(special_idents::TIMESTAMP); + let type_info = first_timeseries + .points + .metric_types() + .zip(first_timeseries.points.data_types()); + for (metric_type, data_type) in type_info { + match (metric_type, data_type) { + // Scalar gauges. + ( + MetricType::Gauge, + DataType::Integer + | DataType::Boolean + | DataType::Double + | DataType::String, + ) => { + out.insert(special_idents::DATUM); + } + // Histogram gauges. + ( + MetricType::Gauge, + DataType::IntegerDistribution | DataType::DoubleDistribution, + ) => { + out.insert(special_idents::BINS); + out.insert(special_idents::COUNTS); + } + // Scalars, either delta or cumulatives. + ( + MetricType::Delta | MetricType::Cumulative, + DataType::Integer | DataType::Double, + ) => { + out.insert(special_idents::DATUM); + out.insert(special_idents::START_TIME); + } + // Histograms, either delta or cumulative. + ( + MetricType::Delta | MetricType::Cumulative, + DataType::IntegerDistribution | DataType::DoubleDistribution, + ) => { + out.insert(special_idents::BINS); + out.insert(special_idents::COUNTS); + out.insert(special_idents::START_TIME); + } + // Impossible combinations + ( + MetricType::Delta | MetricType::Cumulative, + DataType::Boolean | DataType::String, + ) => unreachable!(), + } + } + out +} + +/// A filtering expression, used in the `filter` table operation. +#[derive(Clone, Debug, PartialEq)] +pub enum FilterExpr { + /// A single logical expression, e.g., `foo == "bar"`. + Simple(SimpleFilter), + /// Two logical expressions, e.g., `foo == "bar" || yes == false` + Compound(CompoundFilter), +} + +impl FilterExpr { + fn to_filter(&self) -> Filter { + Filter { negated: false, expr: self.clone() } + } + + fn simplify_to_dnf(&self, level: usize) -> Result { + match self { + FilterExpr::Simple(_) => Ok(self.to_filter()), + FilterExpr::Compound(CompoundFilter { left, op, right }) => { + // Apply recursively first. + let left = left.simplify_to_dnf_impl(level + 1)?; + let right = right.simplify_to_dnf_impl(level + 1)?; + + // This matches the rule: + // + // (x || y) && z -> (x && z) || (y && z) + if let ( + FilterExpr::Compound(CompoundFilter { + left: x, + op: LogicalOp::Or, + right: y, + }), + LogicalOp::And, + FilterExpr::Simple(z), + ) = (&left.expr, op, &right.expr) + { + let left_ = Filter { + negated: false, + expr: FilterExpr::Compound(CompoundFilter { + left: x.clone(), + op: LogicalOp::And, + right: Box::new(z.to_filter()), + }), + }; + let right_ = Filter { + negated: false, + expr: FilterExpr::Compound(CompoundFilter { + left: y.clone(), + op: LogicalOp::And, + right: Box::new(z.to_filter()), + }), + }; + return Ok(Filter { + negated: false, + expr: FilterExpr::Compound(CompoundFilter { + left: Box::new(left_), + op: LogicalOp::Or, + right: Box::new(right_), + }), + }); + } + + // This matches the rule: + // + // z && (x || y) -> (z && x) || (z && y) + if let ( + FilterExpr::Simple(z), + LogicalOp::And, + FilterExpr::Compound(CompoundFilter { + left: x, + op: LogicalOp::Or, + right: y, + }), + ) = (&left.expr, op, &right.expr) + { + let left_ = Filter { + negated: false, + expr: FilterExpr::Compound(CompoundFilter { + left: Box::new(z.to_filter()), + op: LogicalOp::And, + right: x.clone(), + }), + }; + let right_ = Filter { + negated: false, + expr: FilterExpr::Compound(CompoundFilter { + left: Box::new(z.to_filter()), + op: LogicalOp::And, + right: y.clone(), + }), + }; + return Ok(Filter { + negated: false, + expr: FilterExpr::Compound(CompoundFilter { + left: Box::new(left_), + op: LogicalOp::Or, + right: Box::new(right_), + }), + }); + } + + // Lastly, simplify an XOR to its logical equivalent, which is + // in DNF. + let out = Filter { + negated: false, + expr: FilterExpr::Compound(CompoundFilter { + left: Box::new(left), + op: *op, + right: Box::new(right), + }), + }; + Ok(out.rewrite_xor_to_disjunction()) + } + } + } +} + +impl fmt::Display for FilterExpr { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + FilterExpr::Simple(inner) => write!(f, "{inner}"), + FilterExpr::Compound(inner) => write!(f, "{inner}"), + } + } +} + +/// Two filter expressions joined by a logical operator. +#[derive(Clone, Debug, PartialEq)] +pub struct CompoundFilter { + /// The left subexpression. + pub left: Box, + /// The logical operator joining the two expressions. + pub op: LogicalOp, + /// The right subexpression. + pub right: Box, +} + +impl fmt::Display for CompoundFilter { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{} {} {}", self.left, self.op, self.right,) + } +} + +impl CompoundFilter { + fn to_filter(&self) -> Filter { + Filter { negated: false, expr: FilterExpr::Compound(self.clone()) } + } + + // Apply the filter to the provided field. + fn filter_field( + &self, + name: &str, + value: &FieldValue, + ) -> Result, Error> { + let left = self.left.filter_field(name, value)?; + let right = self.right.filter_field(name, value)?; + match (left, right) { + (None, None) => Ok(None), + (Some(x), None) | (None, Some(x)) => Ok(Some(x)), + (Some(left), Some(right)) => match self.op { + LogicalOp::And => Ok(Some(left && right)), + LogicalOp::Or => Ok(Some(left || right)), + LogicalOp::Xor => Ok(Some(left ^ right)), + }, + } + } + + // Apply the filter to the provided points. + fn filter_points( + &self, + negated: bool, + points: &Points, + ) -> Result, Error> { + let mut left = self.left.filter_points_inner(points)?; + let right = self.right.filter_points_inner(points)?; + match self.op { + LogicalOp::And => { + for i in 0..left.len() { + left[i] = negated ^ (left[i] & right[i]); + } + } + LogicalOp::Or => { + for i in 0..left.len() { + left[i] = negated ^ (left[i] | right[i]); + } + } + LogicalOp::Xor => { + for i in 0..left.len() { + left[i] = negated ^ (left[i] ^ right[i]); + } + } + } + Ok(left) + } + + fn last_timestamp(&self) -> Option> { + let left = self.left.last_timestamp(); + let right = self.right.last_timestamp(); + match (left, right) { + (None, None) => None, + (Some(single), None) | (None, Some(single)) => Some(single), + (Some(left), Some(right)) => Some(left.max(right)), + } + } +} + +/// A simple filter expression, comparing an identifier to a value. +#[derive(Clone, Debug, PartialEq)] +pub struct SimpleFilter { + /// The identifier being compared. + pub ident: Ident, + /// The comparison operator. + pub cmp: Comparison, + /// The value to compare the identifier against. + pub value: Literal, +} + +impl fmt::Display for SimpleFilter { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{} {} {}", self.ident, self.cmp, self.value,) + } +} + +impl SimpleFilter { + fn to_filter(&self) -> Filter { + Filter { negated: false, expr: FilterExpr::Simple(self.clone()) } + } + + // Apply this filter to the provided field. + // + // If the field name does not match the identifier in `self`, return + // `Ok(None)`, since this filter does not apply to the provided field. + // + // If the name matches and the type of `self` is compatible, return `Ok(x)` + // where `x` is the logical application of the filter to the field. + // + // If the field matches the name, but the type is not compatible, return an + // error. + fn filter_field( + &self, + name: &str, + value: &FieldValue, + ) -> Result, Error> { + // If the name does not match, this filter does not apply, and so we do not + // filter the field. + if self.ident.as_str() != name { + return Ok(None); + } + self.value.compare_field(value, self.cmp) + } + + pub(crate) fn value_type_is_compatible_with_field( + &self, + field_type: FieldType, + ) -> bool { + self.value.is_compatible_with_field(field_type) + } + + /// Return the expression as a string that can be applied safely in the + /// database. + pub(crate) fn as_db_safe_string(&self) -> String { + let expr = self.value.as_db_safe_string(); + let fn_name = self.cmp.as_db_function_name(); + format!("{}({}, {})", fn_name, self.ident, expr) + } + + // Returns an array of bools, where true indicates the point should be kept. + fn filter_points( + &self, + negated: bool, + points: &Points, + ) -> Result, Error> { + let ident = self.ident.as_str(); + if ident == "timestamp" { + self.filter_points_by_timestamp(negated, &points.timestamps) + } else if ident == "datum" { + anyhow::ensure!( + points.dimensionality() == 1, + "Filtering multidimensional values by datum is not yet supported" + ); + self.filter_points_by_datum(negated, points.values(0).unwrap()) + } else { + Ok(vec![!negated; points.len()]) + } + } + + fn filter_points_by_timestamp( + &self, + negated: bool, + timestamps: &[DateTime], + ) -> Result, Error> { + let Literal::Timestamp(timestamp) = &self.value else { + anyhow::bail!( + "Cannot compare non-timestamp filter against a timestamp" + ); + }; + match self.cmp { + Comparison::Eq => Ok(timestamps + .iter() + .map(|t| negated ^ (t == timestamp)) + .collect()), + Comparison::Ne => Ok(timestamps + .iter() + .map(|t| negated ^ (t != timestamp)) + .collect()), + Comparison::Gt => Ok(timestamps + .iter() + .map(|t| negated ^ (t > timestamp)) + .collect()), + Comparison::Ge => Ok(timestamps + .iter() + .map(|t| negated ^ (t >= timestamp)) + .collect()), + Comparison::Lt => Ok(timestamps + .iter() + .map(|t| negated ^ (t < timestamp)) + .collect()), + Comparison::Le => Ok(timestamps + .iter() + .map(|t| negated ^ (t <= timestamp)) + .collect()), + Comparison::Like => unreachable!(), + } + } + + fn filter_points_by_datum( + &self, + negated: bool, + values: &ValueArray, + ) -> Result, Error> { + match (&self.value, values) { + (Literal::Integer(int), ValueArray::Integer(ints)) => { + match self.cmp { + Comparison::Eq => Ok(ints + .iter() + .map(|maybe_int| { + maybe_int + .map(|i| negated ^ (i128::from(i) == *int)) + .unwrap_or(false) + }) + .collect()), + Comparison::Ne => Ok(ints + .iter() + .map(|maybe_int| { + maybe_int + .map(|i| negated ^ (i128::from(i) != *int)) + .unwrap_or(false) + }) + .collect()), + Comparison::Gt => Ok(ints + .iter() + .map(|maybe_int| { + maybe_int + .map(|i| negated ^ (i128::from(i) > *int)) + .unwrap_or(false) + }) + .collect()), + Comparison::Ge => Ok(ints + .iter() + .map(|maybe_int| { + maybe_int + .map(|i| negated ^ (i128::from(i) >= *int)) + .unwrap_or(false) + }) + .collect()), + Comparison::Lt => Ok(ints + .iter() + .map(|maybe_int| { + maybe_int + .map(|i| negated ^ (i128::from(i) < *int)) + .unwrap_or(false) + }) + .collect()), + Comparison::Le => Ok(ints + .iter() + .map(|maybe_int| { + maybe_int + .map(|i| negated ^ (i128::from(i) <= *int)) + .unwrap_or(false) + }) + .collect()), + Comparison::Like => unreachable!(), + } + } + (Literal::Double(double), ValueArray::Double(doubles)) => { + match self.cmp { + Comparison::Eq => Ok(doubles + .iter() + .map(|maybe_double| { + maybe_double + .map(|d| negated ^ (d == *double)) + .unwrap_or(false) + }) + .collect()), + Comparison::Ne => Ok(doubles + .iter() + .map(|maybe_double| { + maybe_double + .map(|d| negated ^ (d != *double)) + .unwrap_or(false) + }) + .collect()), + Comparison::Gt => Ok(doubles + .iter() + .map(|maybe_double| { + maybe_double + .map(|d| negated ^ (d > *double)) + .unwrap_or(false) + }) + .collect()), + Comparison::Ge => Ok(doubles + .iter() + .map(|maybe_double| { + maybe_double + .map(|d| negated ^ (d >= *double)) + .unwrap_or(false) + }) + .collect()), + Comparison::Lt => Ok(doubles + .iter() + .map(|maybe_double| { + maybe_double + .map(|d| negated ^ (d < *double)) + .unwrap_or(false) + }) + .collect()), + Comparison::Le => Ok(doubles + .iter() + .map(|maybe_double| { + maybe_double + .map(|d| negated ^ (d <= *double)) + .unwrap_or(false) + }) + .collect()), + Comparison::Like => unreachable!(), + } + } + (Literal::String(string), ValueArray::String(strings)) => { + let string = string.as_str(); + match self.cmp { + Comparison::Eq => Ok(strings + .iter() + .map(|maybe_string| { + maybe_string + .as_deref() + .map(|s| negated ^ (s == string)) + .unwrap_or(false) + }) + .collect()), + Comparison::Ne => Ok(strings + .iter() + .map(|maybe_string| { + maybe_string + .as_deref() + .map(|s| negated ^ (s != string)) + .unwrap_or(false) + }) + .collect()), + Comparison::Gt => Ok(strings + .iter() + .map(|maybe_string| { + maybe_string + .as_deref() + .map(|s| negated ^ (s > string)) + .unwrap_or(false) + }) + .collect()), + Comparison::Ge => Ok(strings + .iter() + .map(|maybe_string| { + maybe_string + .as_deref() + .map(|s| negated ^ (s >= string)) + .unwrap_or(false) + }) + .collect()), + Comparison::Lt => Ok(strings + .iter() + .map(|maybe_string| { + maybe_string + .as_deref() + .map(|s| negated ^ (s < string)) + .unwrap_or(false) + }) + .collect()), + Comparison::Le => Ok(strings + .iter() + .map(|maybe_string| { + maybe_string + .as_deref() + .map(|s| negated ^ (s <= string)) + .unwrap_or(false) + }) + .collect()), + Comparison::Like => { + let re = Regex::new(string)?; + Ok(strings + .iter() + .map(|maybe_string| { + maybe_string + .as_deref() + .map(|s| negated ^ re.is_match(s)) + .unwrap_or(false) + }) + .collect()) + } + } + } + (Literal::Boolean(boolean), ValueArray::Boolean(booleans)) => { + match self.cmp { + Comparison::Eq => Ok(booleans + .iter() + .map(|maybe_boolean| { + maybe_boolean + .map(|b| negated ^ (b == *boolean)) + .unwrap_or(false) + }) + .collect()), + Comparison::Ne => Ok(booleans + .iter() + .map(|maybe_boolean| { + maybe_boolean + .map(|b| negated ^ (b != *boolean)) + .unwrap_or(false) + }) + .collect()), + Comparison::Gt => Ok(booleans + .iter() + .map(|maybe_boolean| { + maybe_boolean + .map(|b| negated ^ (b & !(*boolean))) + .unwrap_or(false) + }) + .collect()), + Comparison::Ge => Ok(booleans + .iter() + .map(|maybe_boolean| { + maybe_boolean + .map(|b| negated ^ (b >= *boolean)) + .unwrap_or(false) + }) + .collect()), + Comparison::Lt => Ok(booleans + .iter() + .map(|maybe_boolean| { + maybe_boolean + .map(|b| negated ^ (!b & *boolean)) + .unwrap_or(false) + }) + .collect()), + Comparison::Le => Ok(booleans + .iter() + .map(|maybe_boolean| { + maybe_boolean + .map(|b| negated ^ (b <= *boolean)) + .unwrap_or(false) + }) + .collect()), + Comparison::Like => unreachable!(), + } + } + (_, _) => { + let lit_type = match &self.value { + Literal::Uuid(_) => "UUID", + Literal::Duration(_) => "duration", + Literal::Timestamp(_) => "timestamp", + Literal::IpAddr(_) => "IP address", + Literal::Integer(_) => "integer", + Literal::Double(_) => "double", + Literal::String(_) => "string", + Literal::Boolean(_) => "boolean", + }; + anyhow::bail!( + "Cannot compare {} literal against values of type {}", + lit_type, + values.data_type(), + ) + } + } + } + + fn last_timestamp(&self) -> Option> { + if self.ident.as_str() == "timestamp" + && matches!( + self.cmp, + Comparison::Lt | Comparison::Le | Comparison::Eq + ) + { + let Literal::Timestamp(t) = self.value else { + return None; + }; + Some(t) + } else { + None + } + } +} + +#[cfg(test)] +mod tests { + use crate::oxql::ast::grammar::query_parser; + use crate::oxql::ast::logical_op::LogicalOp; + use crate::oxql::point::MetricType; + use crate::oxql::point::Points; + use crate::oxql::point::ValueArray; + use crate::oxql::point::Values; + use chrono::Utc; + use oximeter::FieldValue; + use std::time::Duration; + use uuid::Uuid; + + #[test] + fn test_atom_filter_double_points() { + let start_times = None; + let timestamps = + vec![Utc::now(), Utc::now() + Duration::from_secs(1000)]; + let values = vec![Values { + values: ValueArray::Double(vec![Some(0.0), Some(2.0)]), + metric_type: MetricType::Gauge, + }]; + let points = Points { start_times, timestamps, values }; + + // This filter should remove the first point based on its timestamp. + let t = Utc::now() + Duration::from_secs(10); + let q = + format!("filter timestamp > @{}", t.format("%Y-%m-%dT%H:%M:%S")); + let filter = query_parser::filter(q.as_str()).unwrap(); + let out = filter.filter_points(&points).unwrap(); + assert!(out.len() == 1); + assert_eq!( + out.values(0).unwrap().as_double().unwrap()[0], + points.values(0).unwrap().as_double().unwrap()[1], + ); + + // And this one the second point based on the datum + let filter = query_parser::filter("filter datum < 1.0").unwrap(); + let out = filter.filter_points(&points).unwrap(); + assert!(out.len() == 1); + assert_eq!( + out.values(0).unwrap().as_double().unwrap()[0], + points.values(0).unwrap().as_double().unwrap()[0], + ); + } + + #[test] + fn test_atom_filter_points_wrong_type() { + let start_times = None; + let timestamps = + vec![Utc::now(), Utc::now() + Duration::from_secs(1000)]; + let values = vec![Values { + values: ValueArray::Double(vec![Some(0.0), Some(2.0)]), + metric_type: MetricType::Gauge, + }]; + let points = Points { start_times, timestamps, values }; + + let filter = + query_parser::filter("filter datum < \"something\"").unwrap(); + assert!(filter.filter_points(&points).is_err()); + } + + #[test] + fn test_all_ident_names() { + let f = query_parser::filter("filter timestamp > @now() && datum < 1") + .unwrap(); + assert_eq!( + f.ident_names(), + ["datum", "timestamp"].into_iter().collect() + ); + + let f = query_parser::filter( + "filter timestamp > @now() - 1m && timestamp < @now()", + ) + .unwrap(); + let idents = f.ident_names(); + assert_eq!(idents.len(), 1); + assert_eq!(idents.iter().next().unwrap(), &"timestamp"); + } + + #[test] + #[allow(clippy::impossible_comparisons)] + fn test_filter_field_logic() { + for op in [LogicalOp::And, LogicalOp::Or, LogicalOp::Xor] { + let s = format!("filter (x > 10) {op} (x < 0)"); + let filter = query_parser::filter(&s).unwrap(); + let cases = &[11, 10, 5, 0, -1]; + for &val in cases.iter() { + let pass = match op { + LogicalOp::And => (val > 10) && (val < 0), + LogicalOp::Or => (val > 10) || (val < 0), + LogicalOp::Xor => (val > 10) ^ (val < 0), + }; + let result = filter + .filter_field("x", &FieldValue::I32(val)) + .expect("Filter should be considered comparable") + .expect("Filter should apply to field of the same name"); + assert_eq!( + result, + pass, + "Filter '{}' should {} the value {}", + filter, + if pass { "pass" } else { "not pass" }, + val, + ); + } + + // This names a different field, so should not apply. + assert_eq!( + filter + .filter_field("y", &FieldValue::I32(11)) + .expect("Filter should be considered comparable"), + None, + "Filter should not apply, since it names a different field" + ); + + // These values should not be comparable at all, so we'll return an + // error. + let incomparable = &[ + FieldValue::String("foo".into()), + FieldValue::Uuid(Uuid::new_v4()), + FieldValue::IpAddr("127.0.0.1".parse().unwrap()), + FieldValue::Bool(false), + ]; + for na in incomparable.iter() { + filter + .filter_field("x", na) + .expect_err("These should not be comparable at all"); + } + } + } + + #[test] + fn test_simplify_to_dnf() { + let cases = &[ + // Simple cases that should not be changed + ("a == 0", "a == 0"), + ("!(a == 0)", "!(a == 0)"), + ("a == 0 || b == 1", "a == 0 || b == 1"), + ("a == 0 && b == 1", "a == 0 && b == 1"), + + // Rewrite of XOR + ("a == 0 ^ b == 1", "(a == 0 && !(b == 1)) || (!(a == 0) && (b == 1))"), + + // Simple applications of distribution rules. + // + // Distribute conjunction over disjunction. + ("a == 0 && (b == 1 || c == 2)", "(a == 0 && b == 1) || (a == 0 && c == 2)"), + ("a == 0 && (b == 1 || c == 2 || d == 3)", "(a == 0 && b == 1) || (a == 0 && c == 2) || (a == 0 && d == 3)"), + ("a == 0 && (b == 1 || c == 2 || d == 3 || e == 4)", "(a == 0 && b == 1) || (a == 0 && c == 2) || (a == 0 && d == 3) || (a == 0 && e == 4)"), + ]; + for (input, expected) in cases.iter() { + let parsed_input = query_parser::filter_expr(input).unwrap(); + let simplified = parsed_input.simplify_to_dnf().unwrap(); + let parsed_expected = query_parser::filter_expr(expected).unwrap(); + assert_eq!( + simplified, + parsed_expected, + "\ninput expression: {}\nparsed to: {}\nsimplifed to: {}\nexpected: {}\n", + input, + parsed_input, + simplified, + expected, + ); + } + } + + #[test] + fn test_dnf_conversion_fails_on_extremely_long_expressions() { + let atom = "a == 0"; + let or_chain = std::iter::repeat(atom) + .take(super::EXPR_COMPLEXITY_ITERATIVE_LIMIT + 1) + .collect::>() + .join(" || "); + let expr = format!("{atom} && ({or_chain})"); + let parsed = query_parser::filter_expr(&expr).unwrap(); + assert!( + parsed.simplify_to_dnf().is_err(), + "Should fail for extremely long logical expressions" + ); + } + + #[test] + fn test_dnf_conversion_fails_on_extremely_deep_expressions() { + let atom = "a == 0"; + let mut expr = atom.to_string(); + for _ in 0..super::EXPR_COMPLEXITY_RECURSIVE_LIMIT + 1 { + expr = format!("{atom} && ({expr})"); + } + let parsed = query_parser::filter_expr(&expr).unwrap(); + assert!( + parsed.simplify_to_dnf().is_err(), + "Should fail for extremely deep logical expressions" + ); + } +} diff --git a/oximeter/db/src/oxql/ast/table_ops/get.rs b/oximeter/db/src/oxql/ast/table_ops/get.rs new file mode 100644 index 0000000000..f0ef22c2f6 --- /dev/null +++ b/oximeter/db/src/oxql/ast/table_ops/get.rs @@ -0,0 +1,15 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! AST node for the `get` table operation. + +// Copyright 2024 Oxide Computer Company + +use oximeter::TimeseriesName; + +/// An AST node like: `get foo:bar` +#[derive(Clone, Debug, PartialEq)] +pub struct Get { + pub timeseries_name: TimeseriesName, +} diff --git a/oximeter/db/src/oxql/ast/table_ops/group_by.rs b/oximeter/db/src/oxql/ast/table_ops/group_by.rs new file mode 100644 index 0000000000..da2b1413db --- /dev/null +++ b/oximeter/db/src/oxql/ast/table_ops/group_by.rs @@ -0,0 +1,746 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! AST node for the `group_by` operation. + +// Copyright 2024 Oxide Computer Company + +use chrono::DateTime; +use chrono::Utc; + +use crate::oxql::ast::ident::Ident; +use crate::oxql::point::DataType; +use crate::oxql::point::MetricType; +use crate::oxql::point::ValueArray; +use crate::oxql::Error; +use crate::oxql::Table; +use crate::oxql::Timeseries; +use crate::TimeseriesKey; +use std::collections::btree_map::Entry; +use std::collections::BTreeMap; + +/// A table operation for grouping data by fields, apply a reducer to the +/// remaining. +#[derive(Clone, Debug, PartialEq)] +pub struct GroupBy { + pub identifiers: Vec, + pub reducer: Reducer, +} + +impl GroupBy { + // Apply the group_by table operation. + pub(crate) fn apply(&self, tables: &[Table]) -> Result, Error> { + anyhow::ensure!( + tables.len() == 1, + "Group by operations require exactly one table", + ); + let table = &tables[0]; + anyhow::ensure!( + table.is_aligned(), + "Input tables to a `group_by` must be aligned" + ); + + match self.reducer { + Reducer::Mean => self.reduce_mean(table), + Reducer::Sum => self.reduce_sum(table), + } + } + + fn check_input_timeseries(input: &Timeseries) -> Result<(), Error> { + anyhow::ensure!(input.points.len() > 0, "Timeseries cannot be empty"); + + // For now, we can only apply this to 1-D timeseries. + anyhow::ensure!( + input.points.dimensionality() == 1, + "Group-by with multi-dimensional timeseries is not yet supported" + ); + let data_type = input.points.data_types().next().unwrap(); + anyhow::ensure!( + data_type.is_numeric(), + "Only numeric data types can be grouped, not {}", + data_type, + ); + let metric_type = input.points.metric_types().next().unwrap(); + anyhow::ensure!( + !matches!(metric_type, MetricType::Cumulative), + "Cumulative metric types cannot be grouped", + ); + Ok(()) + } + + // Reduce points in each group by summing. + fn reduce_sum(&self, table: &Table) -> Result, Error> { + assert_eq!(self.reducer, Reducer::Sum); + let mut output_table = Table::new(table.name()); + let kept_fields: Vec<_> = + self.identifiers.iter().map(Ident::as_str).collect(); + + for input in table.iter() { + Self::check_input_timeseries(input)?; + + // Throw away the fields in this timeseries that are not in the + // group_by list. + let dropped = input.copy_with_fields(&kept_fields)?; + let key = dropped.key(); + + // Fetch the existing timeseries, if one exists. If one does _not_ exist, + // we'll insert it as is, without converting. That's because we're + // just summing, not averaging. + match output_table.get_mut(key) { + Some(existing) => { + // No casting is done here, we're simply adding T + + // T -> T. + let new_values = dropped.points.values(0).unwrap(); + let existing_values = existing.points.values(0).unwrap(); + match (new_values, existing_values) { + ( + ValueArray::Double(new_values), + ValueArray::Double(existing_values), + ) => { + let new_timestamps = &dropped.points.timestamps; + + // We will be merging the new data with the + // existing, but borrow-checking limits the degree + // to which we can easily do this on the `existing` + // entry in the output table. Instead, aggregate + // everything into a copy of the expected data. + let mut timestamps = + existing.points.timestamps.clone(); + let mut values = existing_values.clone(); + + // Merge in the new values, so long as they actually + // exist. That is, we can just skip missing points + // in this round, since they do not contribute to + // the reduced value. + for (new_timestamp, new_value) in new_timestamps + .iter() + .zip(new_values) + .filter_map(|(timestamp, value)| { + if let Some(val) = value { + Some((*timestamp, *val)) + } else { + None + } + }) + { + // We're really doing binary search, on both the + // sample count map and the data array. They + // both must exist, or both not, or we've done + // our accounting incorrectly. + let maybe_index = + timestamps.binary_search(&new_timestamp); + match maybe_index { + Err(insert_at) => { + // This is a new timestamp. Insert it + // into the output timeseries. + timestamps + .insert(insert_at, new_timestamp); + values + .insert(insert_at, Some(new_value)); + } + Ok(ix) => { + // This is an existing + // timestamp, so we only need to + // add the new value. If the value + // didn't exist before, replace it. + *values[ix].get_or_insert(0.0) += + new_value; + } + } + } + + // Replace the existing output timeseries's + // timestamps and data arrays. + std::mem::swap( + &mut existing.points.timestamps, + &mut timestamps, + ); + existing + .points + .values_mut(0) + .unwrap() + .swap(ValueArray::Double(values)); + } + ( + ValueArray::Integer(new_values), + ValueArray::Integer(existing_values), + ) => { + let new_timestamps = &dropped.points.timestamps; + + // We will be merging the new data with the + // existing, but borrow-checking limits the degree + // to which we can easily do this on the `existing` + // entry in the output table. Instead, aggregate + // everything into a copy of the expected data. + let mut timestamps = + existing.points.timestamps.clone(); + let mut values = existing_values.clone(); + + // Merge in the new values, so long as they actually + // exist. That is, we can just skip missing points + // in this round, since they do not contribute to + // the reduced value. + for (new_timestamp, new_value) in new_timestamps + .iter() + .zip(new_values) + .filter_map(|(timestamp, value)| { + if let Some(val) = value { + Some((*timestamp, *val)) + } else { + None + } + }) + { + // We're really doing binary search, on both the + // sample count map and the data array. They + // both must exist, or both not, or we've done + // our accounting incorrectly. + let maybe_index = + timestamps.binary_search(&new_timestamp); + match maybe_index { + Err(insert_at) => { + // This is a new timestamp. Insert it + // into the output timeseries. + timestamps + .insert(insert_at, new_timestamp); + values + .insert(insert_at, Some(new_value)); + } + Ok(ix) => { + // This is an existing + // timestamp, so we only need to + // add the new value. If the value + // didn't exist before, replace it. + *values[ix].get_or_insert(0) += + new_value; + } + } + } + + // Replace the existing output timeseries's + // timestamps and data arrays. + std::mem::swap( + &mut existing.points.timestamps, + &mut timestamps, + ); + existing + .points + .values_mut(0) + .unwrap() + .swap(ValueArray::Integer(values)); + } + _ => unreachable!(), + } + } + None => output_table.insert(dropped)?, + } + } + Ok(vec![output_table]) + } + + // Reduce points in each group by averaging. + fn reduce_mean(&self, table: &Table) -> Result, Error> { + assert_eq!(self.reducer, Reducer::Mean); + let mut output_table = Table::new(table.name()); + let kept_fields: Vec<_> = + self.identifiers.iter().map(Ident::as_str).collect(); + + // Keep track of the number of values at each output timestamp, within + // each group. + // + // As we iterate through timeseries, we reduce in-group points, so long + // as they occur at the same timestamp. And while timeseries must all be + // aligned the same way, they need not actually have identical + // timestamps. So what we're producing on the output is data at the + // union of all the input timestamps. + // + // These arrays keeps the count of values at each time, and may be either + // expanded or have its values incremented. Note that they're all + // doubles because we will be reducing at the end by dividing the sum at + // each point by the counts. + let mut sample_counts_by_group: BTreeMap< + TimeseriesKey, + BTreeMap, f64>, + > = BTreeMap::new(); + + for input in table.iter() { + Self::check_input_timeseries(input)?; + + // Throw away the fields in this timeseries that are not in the + // group_by list. + let dropped = input.copy_with_fields(&kept_fields)?; + let key = dropped.key(); + + // Fetch the existing timeseries, if one exists. If one does _not_ exist, + // we'll insert the table with the data type converted to a double, + // since we're always averaging. + match output_table.get_mut(key) { + Some(existing) => { + // Cast the new points to doubles, since we'll be + // aggregating. + let new_points = + dropped.points.cast(&[DataType::Double])?; + let ValueArray::Double(new_values) = + new_points.values(0).unwrap() + else { + unreachable!(); + }; + let new_timestamps = &new_points.timestamps; + + // We will be merging the new data with the + // existing, but borrow-checking limits the degree + // to which we can easily do this on the `existing` + // entry in the output table. Instead, aggregate + // everything into a copy of the expected data. + let mut timestamps = existing.points.timestamps.clone(); + let mut values = existing + .points + .values(0) + .unwrap() + .as_double() + .unwrap() + .clone(); + + // Also fetch a reference to the existing counts by + // timestamp for this group. This should exist. + let counts = sample_counts_by_group.get_mut(&key).expect( + "Should already have some sample counts for this group", + ); + + // Merge in the new values, so long as they actually + // exist. That is, we can just skip missing points + // in this round, since they do not contribute to + // the reduced value. + for (new_timestamp, new_value) in new_timestamps + .iter() + .zip(new_values) + .filter_map(|(timestamp, value)| { + if let Some(val) = value { + Some((*timestamp, *val)) + } else { + None + } + }) + { + // We're really doing binary search, on both the + // sample count map and the data array. They + // both must exist, or both not, or we've done + // our accounting incorrectly. + let maybe_index = + timestamps.binary_search(&new_timestamp); + let count = counts.entry(new_timestamp); + match (count, maybe_index) { + (Entry::Vacant(entry), Err(insert_at)) => { + // This is a new timestamp. Insert it + // into the output timeseries, and count + // it. + timestamps.insert(insert_at, new_timestamp); + values.insert(insert_at, Some(new_value)); + entry.insert(1.0); + } + (Entry::Occupied(mut entry), Ok(ix)) => { + // This is an existing timestamp. _Add_ + // it into the output timeseries, and + // count it. Its timestamp already + // exists. If the value was previously None, + // replace it now. + *values[ix].get_or_insert(0.0) += new_value; + *entry.get_mut() += 1.0; + } + (_, _) => { + panic!( + "In-group counts and output \ + values must both exist or \ + both be missing" + ); + } + } + } + + // Replace the existing output timeseries's + // timestamps and data arrays. + std::mem::swap( + &mut existing.points.timestamps, + &mut timestamps, + ); + existing + .points + .values_mut(0) + .unwrap() + .swap(ValueArray::Double(values)); + } + None => { + // There were no previous points for this group. + // + // We'll cast to doubles, but _keep_ any missing samples + // (None) that were in there. Those will have a "count" of + // 0, so that we don't incorrectly over-divide in the case + // where there are both missing and non-missing samples. + let new_timeseries = dropped.cast(&[DataType::Double])?; + let values = new_timeseries + .points + .values(0) + .unwrap() + .as_double() + .unwrap(); + // Insert a count of 1.0 for each timestamp remaining, and + // _zero_ for any where the values are none. + let counts = new_timeseries + .points + .timestamps + .iter() + .zip(values) + .map(|(timestamp, maybe_value)| { + let count = f64::from(maybe_value.is_some()); + (*timestamp, count) + }) + .collect(); + let old = sample_counts_by_group.insert(key, counts); + assert!(old.is_none(), "Should not have counts entry for first timeseries in the group"); + output_table.insert(new_timeseries)?; + } + } + } + + // Since we're computing the mean, we need to divide each output value + // by the number of values that went into it. + for each in output_table.iter_mut() { + let counts = sample_counts_by_group + .get(&each.key()) + .expect("key should have been inserted earlier"); + let ValueArray::Double(values) = each.points.values_mut(0).unwrap() + else { + unreachable!(); + }; + for (val, count) in values.iter_mut().zip(counts.values()) { + if let Some(x) = val.as_mut() { + *x /= *count; + } + } + } + Ok(vec![output_table]) + } +} + +/// A reduction operation applied to unnamed columns during a group by. +#[derive(Clone, Copy, Debug, Default, PartialEq)] +pub enum Reducer { + #[default] + Mean, + Sum, +} + +#[cfg(test)] +mod tests { + use super::{GroupBy, Reducer}; + use crate::oxql::{ + ast::{ + ident::Ident, + table_ops::align::{Align, AlignmentMethod}, + }, + point::{DataType, MetricType, ValueArray}, + Table, Timeseries, + }; + use chrono::{DateTime, Utc}; + use oximeter::FieldValue; + use std::{collections::BTreeMap, time::Duration}; + + // Which timeseries the second data point is missing from. + #[derive(Clone, Copy, Debug)] + enum MissingValue { + Neither, + First, + Both, + } + + #[derive(Clone, Copy, Debug)] + struct TestConfig { + missing_value: MissingValue, + overlapping_times: bool, + reducer: Reducer, + } + + #[derive(Clone, Debug)] + #[allow(dead_code)] + struct TestTable { + aligned_table: Table, + grouped_table: Table, + query_end: DateTime, + timestamps: Vec>, + } + + impl TestTable { + fn new(cfg: TestConfig) -> Self { + let query_end = Utc::now(); + let mut timestamps = vec![ + query_end - Duration::from_secs(2), + query_end - Duration::from_secs(1), + query_end, + ]; + + // Create the first timeseries. + // + // This has two fields, one of which we'll group by. There are three + // timepoints of double values. + let mut fields = BTreeMap::new(); + fields.insert("int".to_string(), FieldValue::U8(0)); + fields.insert( + "name".to_string(), + FieldValue::String("whodat".into()), + ); + let mut ts0 = Timeseries::new( + fields.into_iter(), + DataType::Double, + MetricType::Gauge, + ) + .unwrap(); + ts0.points.start_times = None; + ts0.points.timestamps = timestamps.clone(); + *ts0.points.values_mut(0).unwrap() = ValueArray::Double(vec![ + Some(1.0), + if matches!( + cfg.missing_value, + MissingValue::First | MissingValue::Both + ) { + None + } else { + Some(2.0) + }, + Some(3.0), + ]); + + // Create the second timeseries. + // + // This is nearly the same, and shares the same field value for the + // "int" field. When we group, we should reduce these two timeseries + // together. + let mut fields = BTreeMap::new(); + fields.insert("int".to_string(), FieldValue::U8(0)); + fields.insert( + "name".to_string(), + FieldValue::String("whodis".into()), + ); + let mut ts1 = Timeseries::new( + fields.into_iter(), + DataType::Double, + MetricType::Gauge, + ) + .unwrap(); + ts1.points.start_times = None; + + // Non-overlapping in this test setup means that we just shift one + // value from this array backward in time by one additional second. + // So we should have timestamps like: + // + // ts0: [ _, t0, t1, t2 ] + // ts1: [ t0, _, t1, t2 ] + // + // When reducing, t0 is never changed, and t1-t2 are always reduced + // together, if the values are present. + ts1.points.timestamps = if cfg.overlapping_times { + timestamps.clone() + } else { + let mut new_timestamps = timestamps.clone(); + new_timestamps[0] = new_timestamps[0] - Duration::from_secs(1); + timestamps.insert(0, new_timestamps[0]); + new_timestamps + }; + *ts1.points.values_mut(0).unwrap() = ValueArray::Double(vec![ + Some(2.0), + if matches!(cfg.missing_value, MissingValue::Both) { + None + } else { + Some(3.0) + }, + Some(4.0), + ]); + + let mut table = Table::new("foo"); + table.insert(ts0).unwrap(); + table.insert(ts1).unwrap(); + + // Align the actual table, based on the input, and apply the right + // group-by + let align = Align { + method: AlignmentMethod::MeanWithin, + period: Duration::from_secs(1), + }; + let aligned_tables = align.apply(&[table], &query_end).unwrap(); + let group_by = GroupBy { + identifiers: vec![Ident("int".into())], + reducer: cfg.reducer, + }; + let grouped_tables = group_by.apply(&aligned_tables).unwrap(); + assert_eq!( + grouped_tables.len(), + 1, + "Group by should produce exaclty 1 table" + ); + let grouped_table = grouped_tables.into_iter().next().unwrap(); + let aligned_table = aligned_tables.into_iter().next().unwrap(); + + let test = + Self { timestamps, aligned_table, grouped_table, query_end }; + + // These checks are all valid for grouping in general, independent + // of the exact missing values or reducer. + assert_eq!( + test.grouped_table.len(), + 1, + "Should have grouped both timeseries down to 1" + ); + let grouped_timeseries = test.grouped_table.iter().next().unwrap(); + assert_eq!( + grouped_timeseries.fields.len(), + 1, + "Should have only one grouped-by field" + ); + assert_eq!( + grouped_timeseries.fields.get("int").unwrap(), + &FieldValue::U8(0), + "Grouped-by field was not maintained correctly" + ); + let points = &grouped_timeseries.points; + assert_eq!(points.dimensionality(), 1, "Points should still be 1D"); + assert_eq!( + points.start_times, None, + "Points should not have start times" + ); + assert_eq!( + points.timestamps, test.timestamps, + "Points do not have correct timestamps" + ); + + test + } + } + + #[test] + fn test_group_by() { + const TEST_CASES: &[(TestConfig, &[Option])] = &[ + ( + TestConfig { + missing_value: MissingValue::Neither, + overlapping_times: true, + reducer: Reducer::Mean, + }, + // This is the most basic case, where we simply average all the + // values together. They exactly line up and none are missing. + &[Some(1.5), Some(2.5), Some(3.5)], + ), + ( + TestConfig { + missing_value: MissingValue::Neither, + overlapping_times: true, + reducer: Reducer::Sum, + }, + // This is the next-simplest case, where we simply sum all the + // values together. They exactly line up and none are missing. + &[Some(3.0), Some(5.0), Some(7.0)], + ), + ( + TestConfig { + missing_value: MissingValue::Neither, + overlapping_times: false, + reducer: Reducer::Mean, + }, + // In this case, the timestamps don't all overlap, though some + // of them do. In particular, the arrays are shifted by one + // timestamp relative to each other, so there are 2 extra + // values. The one value that does overlap is averaged, and the + // other two are unchanged. + &[Some(2.0), Some(1.0), Some(2.5), Some(3.5)], + ), + ( + TestConfig { + missing_value: MissingValue::Neither, + overlapping_times: false, + reducer: Reducer::Sum, + }, + // Here, we should have 4 output samples because the timestamps + // don't overlap. The second input timeseries has its first + // point shifted back by one second. That means the first two + // values are just from one array (no reduction), while the next + // two are reduced as usual. + &[Some(2.0), Some(1.0), Some(5.0), Some(7.0)], + ), + ( + TestConfig { + missing_value: MissingValue::First, + overlapping_times: true, + reducer: Reducer::Mean, + }, + // In this case, we have a missing value for the middle + // timestamp of the first input timeseries. That means we should + // still have 3 output samples, but the second point isn't an + // aggregation, it's just the input value, from the second + // timeseries. + &[Some(1.5), Some(3.0), Some(3.5)], + ), + ( + TestConfig { + missing_value: MissingValue::First, + overlapping_times: true, + reducer: Reducer::Sum, + }, + // Same as above, but we're summing, not averaging. + &[Some(3.0), Some(3.0), Some(7.0)], + ), + ( + TestConfig { + missing_value: MissingValue::First, + overlapping_times: false, + reducer: Reducer::Mean, + }, + // We need 4 output points again here, but we also have a + // missing value. So we'll take the first value from the second + // timeseries; the second from the first; the second from the + // second directly, since its corresponding point is missing in + // the first, and then the average of both in the last point. + &[Some(2.0), Some(1.0), Some(3.0), Some(3.5)], + ), + ( + TestConfig { + missing_value: MissingValue::First, + overlapping_times: false, + reducer: Reducer::Sum, + }, + // Same as above, but summing, instead of averaging. + &[Some(2.0), Some(1.0), Some(3.0), Some(7.0)], + ), + ( + TestConfig { + missing_value: MissingValue::Both, + overlapping_times: true, + reducer: Reducer::Mean, + }, + // In this case, the 2nd timepoint is missing from both + // timeseries. We should preserve that as a missing value in the + // output. + &[Some(1.5), None, Some(3.5)], + ), + ( + TestConfig { + missing_value: MissingValue::Both, + overlapping_times: true, + reducer: Reducer::Sum, + }, + // Same as above, but summing instead of averaging. + &[Some(3.0), None, Some(7.0)], + ), + ]; + for (test_config, expected_data) in TEST_CASES.iter() { + let test_table = TestTable::new(*test_config); + let grouped_timeseries = + test_table.grouped_table.iter().next().unwrap(); + let points = &grouped_timeseries.points; + let values = points.values(0).unwrap().as_double().unwrap(); + assert_eq!( + values, expected_data, + "Timeseries values were not grouped correctly, \ + test_config = {test_config:?}" + ); + } + } +} diff --git a/oximeter/db/src/oxql/ast/table_ops/join.rs b/oximeter/db/src/oxql/ast/table_ops/join.rs new file mode 100644 index 0000000000..3c150a4acf --- /dev/null +++ b/oximeter/db/src/oxql/ast/table_ops/join.rs @@ -0,0 +1,385 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! An AST node describing join table operations. + +// Copyright 2024 Oxide Computer Company + +use crate::oxql::point::MetricType; +use crate::oxql::point::Points; +use crate::oxql::point::Values; +use crate::oxql::Error; +use crate::oxql::Table; +use anyhow::Context; + +/// An AST node for a natural inner join. +#[derive(Clone, Copy, Debug, PartialEq)] +pub struct Join; +impl Join { + // Apply the group_by table operation. + pub(crate) fn apply(&self, tables: &[Table]) -> Result, Error> { + anyhow::ensure!( + tables.len() > 1, + "Join operations require more than one table", + ); + let mut tables = tables.iter().cloned().enumerate(); + let (_, mut out) = tables.next().unwrap(); + anyhow::ensure!( + out.is_aligned(), + "Input tables for a join operation must be aligned" + ); + let metric_types = out + .iter() + .next() + .context("Input tables for a join operation may not be empty")? + .points + .metric_types() + .collect::>(); + ensure_all_metric_types(metric_types.iter().copied())?; + let alignment = out.alignment(); + assert!(alignment.is_some()); + + for (i, next_table) in tables { + anyhow::ensure!( + next_table.alignment() == alignment, + "All tables to a join operator must have the same \ + alignment. Expected alignment: {:?}, found a table \ + aligned with: {:?}", + alignment.unwrap(), + next_table.alignment(), + ); + let name = next_table.name().to_string(); + for next_timeseries in next_table.into_iter() { + let new_types = + next_timeseries.points.metric_types().collect::>(); + ensure_all_metric_types(new_types.iter().copied())?; + anyhow::ensure!( + metric_types == new_types, + "Input tables do not all share the same metric types" + ); + + let key = next_timeseries.key(); + let Some(timeseries) = out.iter_mut().find(|t| t.key() == key) + else { + anyhow::bail!( + "Join failed, input table {} does not \ + contain a timeseries with key {}", + i, + key, + ); + }; + + // Joining the timeseries is done by stacking together the + // values that have the same timestamp. + // + // If two value arrays have different timestamps, which is + // possible if they're derived from two separately-aligned + // tables, then we need to correctly ensure that: + // + // 1. They have the same alignment, and + // 2. We merge the timepoints rather than simply creating a + // ragged array of points. + timeseries.points = inner_join_point_arrays( + ×eries.points, + &next_timeseries.points, + )?; + } + // We'll also update the name, to indicate the joined data. + out.name.push(','); + out.name.push_str(&name); + } + Ok(vec![out]) + } +} + +// Given two arrays of points, stack them together at matching timepoints. +// +// For time points in either which do not have a corresponding point in the +// other, the entire time point is elided. +fn inner_join_point_arrays( + left: &Points, + right: &Points, +) -> Result { + // Create an output array with roughly the right capacity, and double the + // number of dimensions. We're trying to stack output value arrays together + // along the dimension axis. + let data_types = + left.data_types().chain(right.data_types()).collect::>(); + let metric_types = + left.metric_types().chain(right.metric_types()).collect::>(); + let mut out = Points::with_capacity( + left.len().max(right.len()), + data_types.iter().copied(), + metric_types.iter().copied(), + )?; + + // Iterate through each array until one is exhausted. We're only inserting + // values from both arrays where the timestamps actually match, since this + // is an inner join. We may want to insert missing values where timestamps + // do not match on either side, when we support an outer join of some kind. + let n_left_dim = left.values.len(); + let mut left_ix = 0; + let mut right_ix = 0; + while left_ix < left.len() && right_ix < right.len() { + let left_timestamp = left.timestamps[left_ix]; + let right_timestamp = right.timestamps[right_ix]; + if left_timestamp == right_timestamp { + out.timestamps.push(left_timestamp); + push_concrete_values( + &mut out.values[..n_left_dim], + &left.values, + left_ix, + ); + push_concrete_values( + &mut out.values[n_left_dim..], + &right.values, + right_ix, + ); + left_ix += 1; + right_ix += 1; + } else if left_timestamp < right_timestamp { + left_ix += 1; + } else { + right_ix += 1; + } + } + Ok(out) +} + +// Push the `i`th value from each dimension of `from` onto `to`. +fn push_concrete_values(to: &mut [Values], from: &[Values], i: usize) { + assert_eq!(to.len(), from.len()); + for (output, input) in to.iter_mut().zip(from.iter()) { + let input_array = &input.values; + let output_array = &mut output.values; + assert_eq!(input_array.data_type(), output_array.data_type()); + if let Ok(ints) = input_array.as_integer() { + output_array.as_integer_mut().unwrap().push(ints[i]); + continue; + } + if let Ok(doubles) = input_array.as_double() { + output_array.as_double_mut().unwrap().push(doubles[i]); + continue; + } + if let Ok(bools) = input_array.as_boolean() { + output_array.as_boolean_mut().unwrap().push(bools[i]); + continue; + } + if let Ok(strings) = input_array.as_string() { + output_array.as_string_mut().unwrap().push(strings[i].clone()); + continue; + } + if let Ok(dists) = input_array.as_integer_distribution() { + output_array + .as_integer_distribution_mut() + .unwrap() + .push(dists[i].clone()); + continue; + } + if let Ok(dists) = input_array.as_double_distribution() { + output_array + .as_double_distribution_mut() + .unwrap() + .push(dists[i].clone()); + continue; + } + unreachable!(); + } +} + +// Return an error if any metric types are not suitable for joining. +fn ensure_all_metric_types( + mut metric_types: impl ExactSizeIterator, +) -> Result<(), Error> { + anyhow::ensure!( + metric_types + .all(|mt| matches!(mt, MetricType::Gauge | MetricType::Delta)), + "Join operation requires timeseries with gauge or \ + delta metric types", + ); + Ok(()) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::oxql::point::DataType; + use crate::oxql::point::Datum; + use crate::oxql::point::ValueArray; + use chrono::Utc; + use std::time::Duration; + + #[test] + fn test_push_concrete_values() { + let mut points = Points::with_capacity( + 2, + [DataType::Integer, DataType::Double].into_iter(), + [MetricType::Gauge, MetricType::Gauge].into_iter(), + ) + .unwrap(); + + // Push a concrete value for the integer dimension + let from_ints = vec![Values { + values: ValueArray::Integer(vec![Some(1)]), + metric_type: MetricType::Gauge, + }]; + push_concrete_values(&mut points.values[..1], &from_ints, 0); + + // And another for the double dimension. + let from_doubles = vec![Values { + values: ValueArray::Double(vec![Some(2.0)]), + metric_type: MetricType::Gauge, + }]; + push_concrete_values(&mut points.values[1..], &from_doubles, 0); + + assert_eq!( + points.dimensionality(), + 2, + "Points should have 2 dimensions", + ); + let ints = points.values[0].values.as_integer().unwrap(); + assert_eq!( + ints.len(), + 1, + "Should have pushed one point in the first dimension" + ); + assert_eq!( + ints[0], + Some(1), + "Should have pushed 1 onto the first dimension" + ); + let doubles = points.values[1].values.as_double().unwrap(); + assert_eq!( + doubles.len(), + 1, + "Should have pushed one point in the second dimension" + ); + assert_eq!( + doubles[0], + Some(2.0), + "Should have pushed 2.0 onto the second dimension" + ); + } + + #[test] + fn test_join_point_arrays() { + let now = Utc::now(); + + // Create a set of integer points to join with. + // + // This will have two timestamps, one of which will match the points + // below that are merged in. + let int_points = Points { + start_times: None, + timestamps: vec![ + now - Duration::from_secs(3), + now - Duration::from_secs(2), + now, + ], + values: vec![Values { + values: ValueArray::Integer(vec![Some(1), Some(2), Some(3)]), + metric_type: MetricType::Gauge, + }], + }; + + // Create an additional set of double points. + // + // This also has two timepoints, one of which matches with the above, + // and one of which does not. + let double_points = Points { + start_times: None, + timestamps: vec![ + now - Duration::from_secs(3), + now - Duration::from_secs(1), + now, + ], + values: vec![Values { + values: ValueArray::Double(vec![ + Some(4.0), + Some(5.0), + Some(6.0), + ]), + metric_type: MetricType::Gauge, + }], + }; + + // Merge the arrays. + let merged = + inner_join_point_arrays(&int_points, &double_points).unwrap(); + + // Basic checks that we merged in the right values and have the right + // types and dimensions. + assert_eq!( + merged.dimensionality(), + 2, + "Should have appended the dimensions from each input array" + ); + assert_eq!(merged.len(), 2, "Should have merged two common points",); + assert_eq!( + merged.data_types().collect::>(), + &[DataType::Integer, DataType::Double], + "Should have combined the data types of the input arrays" + ); + assert_eq!( + merged.metric_types().collect::>(), + &[MetricType::Gauge, MetricType::Gauge], + "Should have combined the metric types of the input arrays" + ); + + // Check the actual values of the array. + let mut points = merged.iter_points(); + + // The first and last timepoint overlapped between the two arrays, so we + // should have both of them as concrete samples. + let pt = points.next().unwrap(); + assert_eq!(pt.start_time, None, "Gauges don't have a start time"); + assert_eq!( + *pt.timestamp, int_points.timestamps[0], + "Should have taken the first input timestamp from both arrays", + ); + assert_eq!( + *pt.timestamp, double_points.timestamps[0], + "Should have taken the first input timestamp from both arrays", + ); + let values = pt.values; + assert_eq!(values.len(), 2, "Should have 2 dimensions"); + assert_eq!( + &values[0], + &(Datum::Integer(Some(&1)), MetricType::Gauge), + "Should have pulled value from first integer array." + ); + assert_eq!( + &values[1], + &(Datum::Double(Some(&4.0)), MetricType::Gauge), + "Should have pulled value from second double array." + ); + + // And the next point + let pt = points.next().unwrap(); + assert_eq!(pt.start_time, None, "Gauges don't have a start time"); + assert_eq!( + *pt.timestamp, int_points.timestamps[2], + "Should have taken the input timestamp from both arrays", + ); + assert_eq!( + *pt.timestamp, double_points.timestamps[2], + "Should have taken the input timestamp from both arrays", + ); + let values = pt.values; + assert_eq!(values.len(), 2, "Should have 2 dimensions"); + assert_eq!( + &values[0], + &(Datum::Integer(Some(&3)), MetricType::Gauge), + "Should have pulled value from first integer array." + ); + assert_eq!( + &values[1], + &(Datum::Double(Some(&6.0)), MetricType::Gauge), + "Should have pulled value from second double array." + ); + + // And there should be no other values. + assert!(points.next().is_none(), "There should be no more points"); + } +} diff --git a/oximeter/db/src/oxql/ast/table_ops/mod.rs b/oximeter/db/src/oxql/ast/table_ops/mod.rs new file mode 100644 index 0000000000..d9930962f8 --- /dev/null +++ b/oximeter/db/src/oxql/ast/table_ops/mod.rs @@ -0,0 +1,76 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! AST nodes for table operations. + +// Copyright 2024 Oxide Computer Company + +pub mod align; +pub mod filter; +pub mod get; +pub mod group_by; +pub mod join; + +use self::align::Align; +use self::filter::Filter; +use self::group_by::GroupBy; +use self::join::Join; +use crate::oxql::ast::Query; +use crate::oxql::Error; +use crate::oxql::Table; +use chrono::DateTime; +use chrono::Utc; +use oximeter::TimeseriesName; + +/// A basic table operation, the atoms of an OxQL query. +#[derive(Clone, Debug, PartialEq)] +pub enum BasicTableOp { + Get(TimeseriesName), + Filter(Filter), + GroupBy(GroupBy), + Join(Join), + Align(Align), +} + +impl BasicTableOp { + pub(crate) fn apply( + &self, + tables: &[Table], + query_end: &DateTime, + ) -> Result, Error> { + match self { + BasicTableOp::Get(_) => panic!("Should not apply get table ops"), + BasicTableOp::Filter(f) => f.apply(tables), + BasicTableOp::GroupBy(g) => g.apply(tables), + BasicTableOp::Join(j) => j.apply(tables), + BasicTableOp::Align(a) => a.apply(tables, query_end), + } + } +} + +/// A grouped table operation is a subquery in OxQL. +#[derive(Clone, Debug, PartialEq)] +pub struct GroupedTableOp { + pub ops: Vec, +} + +/// Any kind of OxQL table operation. +#[derive(Clone, Debug, PartialEq)] +pub enum TableOp { + Basic(BasicTableOp), + Grouped(GroupedTableOp), +} + +impl TableOp { + pub(crate) fn apply( + &self, + tables: &[Table], + query_end: &DateTime, + ) -> Result, Error> { + let TableOp::Basic(basic) = self else { + panic!("Should not apply grouped table ops"); + }; + basic.apply(tables, query_end) + } +} diff --git a/oximeter/db/src/oxql/mod.rs b/oximeter/db/src/oxql/mod.rs new file mode 100644 index 0000000000..b93d75b859 --- /dev/null +++ b/oximeter/db/src/oxql/mod.rs @@ -0,0 +1,39 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! The Oximeter Query Language, OxQL. + +// Copyright 2024 Oxide Computer Company + +use peg::error::ParseError as PegError; +use peg::str::LineCol; + +pub mod ast; +pub mod point; +pub mod query; +pub mod table; + +pub use self::query::Query; +pub use self::table::Table; +pub use self::table::Timeseries; +pub use anyhow::Error; + +// Format a PEG parsing error into a nice anyhow error. +fn fmt_parse_error(source: &str, err: PegError) -> Error { + use std::fmt::Write; + let mut out = + format!("Error at {}:{}", err.location.line, err.location.column); + const CONTEXT: usize = 24; + let start = err.location.offset.saturating_sub(CONTEXT); + let end = err.location.offset.saturating_add(CONTEXT).min(source.len()); + if let Some(context) = source.get(start..end) { + let prefix_len = out.len() + 2; + writeln!(out, ": .. {context} ..").unwrap(); + let left_pad = err.location.offset - start + 3 + prefix_len; + let right_pad = end - err.location.offset + 3 + prefix_len; + writeln!(out, "{:right_pad$}", ' ', ' ').unwrap(); + } + writeln!(out, "Expected: {}", err).unwrap(); + anyhow::anyhow!(out) +} diff --git a/oximeter/db/src/oxql/point.rs b/oximeter/db/src/oxql/point.rs new file mode 100644 index 0000000000..e12214aaf0 --- /dev/null +++ b/oximeter/db/src/oxql/point.rs @@ -0,0 +1,2040 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Definition of data points for OxQL. + +// Copyright 2024 Oxide Computer Company + +use super::Error; +use anyhow::Context; +use chrono::DateTime; +use chrono::Utc; +use num::ToPrimitive; +use oximeter::DatumType; +use oximeter::Measurement; +use schemars::JsonSchema; +use serde::Deserialize; +use serde::Serialize; +use std::fmt; + +/// The type of each individual data point's value in a timeseries. +#[derive( + Clone, Copy, Debug, Deserialize, Hash, JsonSchema, PartialEq, Serialize, +)] +#[serde(rename_all = "snake_case")] +pub enum DataType { + /// A 64-bit integer. + Integer, + /// A 64-bit float. + Double, + /// A boolean. + Boolean, + /// A string. + String, + /// A distribution, a sequence of integer bins and counts. + IntegerDistribution, + /// A distribution, a sequence of double bins and integer counts. + DoubleDistribution, +} + +impl DataType { + /// True if this is a numeric scalar type. + pub fn is_numeric(&self) -> bool { + matches!(self, DataType::Integer | DataType::Double) + } +} + +impl TryFrom for DataType { + type Error = Error; + + fn try_from(datum_type: DatumType) -> Result { + let data_type = match datum_type { + DatumType::Bool => DataType::Boolean, + DatumType::I8 + | DatumType::U8 + | DatumType::I16 + | DatumType::U16 + | DatumType::I32 + | DatumType::U32 + | DatumType::I64 + | DatumType::U64 + | DatumType::CumulativeI64 + | DatumType::CumulativeU64 => DataType::Integer, + DatumType::F32 + | DatumType::F64 + | DatumType::CumulativeF32 + | DatumType::CumulativeF64 => DataType::Double, + DatumType::String => DataType::String, + DatumType::HistogramI8 + | DatumType::HistogramU8 + | DatumType::HistogramI16 + | DatumType::HistogramU16 + | DatumType::HistogramI32 + | DatumType::HistogramU32 + | DatumType::HistogramI64 + | DatumType::HistogramU64 => DataType::IntegerDistribution, + DatumType::HistogramF32 | DatumType::HistogramF64 => { + DataType::DoubleDistribution + } + DatumType::Bytes => { + anyhow::bail!("Unsupported datum type: {}", datum_type) + } + }; + Ok(data_type) + } +} + +impl fmt::Display for DataType { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{:?}", self) + } +} + +/// The type of the metric itself, indicating what its values represent. +#[derive( + Clone, Copy, Debug, Deserialize, Hash, JsonSchema, PartialEq, Serialize, +)] +#[serde(rename_all = "snake_case")] +pub enum MetricType { + /// The value represents an instantaneous measurement in time. + Gauge, + /// The value represents a difference between two points in time. + Delta, + /// The value represents an accumulation between two points in time. + Cumulative, +} + +impl fmt::Display for MetricType { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{:?}", self) + } +} + +// A converted oximeter datum, used internally. +// +// This is used when computing deltas between cumulative measurements, and so +// only represents the possible cumulative types. +#[derive(Clone, Debug, PartialEq)] +enum CumulativeDatum { + Integer(i64), + Double(f64), + IntegerDistribution(Distribution), + DoubleDistribution(Distribution), +} + +impl CumulativeDatum { + // Construct a datum from a cumulative type, failing if the measurement is + // not cumulative. + fn from_cumulative(meas: &Measurement) -> Result { + let datum = match meas.datum() { + oximeter::Datum::CumulativeI64(val) => { + CumulativeDatum::Integer(val.value()) + } + oximeter::Datum::CumulativeU64(val) => { + let int = val + .value() + .try_into() + .context("Overflow converting u64 to i64")?; + CumulativeDatum::Integer(int) + } + oximeter::Datum::CumulativeF32(val) => { + CumulativeDatum::Double(val.value().into()) + } + oximeter::Datum::CumulativeF64(val) => { + CumulativeDatum::Double(val.value()) + } + oximeter::Datum::HistogramI8(hist) => hist.into(), + oximeter::Datum::HistogramU8(hist) => hist.into(), + oximeter::Datum::HistogramI16(hist) => hist.into(), + oximeter::Datum::HistogramU16(hist) => hist.into(), + oximeter::Datum::HistogramI32(hist) => hist.into(), + oximeter::Datum::HistogramU32(hist) => hist.into(), + oximeter::Datum::HistogramI64(hist) => hist.into(), + oximeter::Datum::HistogramU64(hist) => hist.try_into()?, + oximeter::Datum::HistogramF32(hist) => hist.into(), + oximeter::Datum::HistogramF64(hist) => hist.into(), + other => anyhow::bail!( + "Input datum of type {} is not cumulative", + other.datum_type(), + ), + }; + Ok(datum) + } +} + +/// A single list of values, for one dimension of a timeseries. +#[derive(Clone, Debug, Deserialize, JsonSchema, PartialEq, Serialize)] +pub struct Values { + // The data values. + pub(super) values: ValueArray, + // The type of this metric. + pub(super) metric_type: MetricType, +} + +impl Values { + // Construct an empty array of values to hold the provided types. + fn with_capacity( + size: usize, + data_type: DataType, + metric_type: MetricType, + ) -> Self { + Self { values: ValueArray::with_capacity(size, data_type), metric_type } + } + + fn len(&self) -> usize { + self.values.len() + } +} + +/// Reference type describing a single point in a `Points` array. +/// +/// The `Points` type is column-major, in that the timestamps and each data +/// value (one for each dimension) are stored in separate arrays, of the same +/// length. This type holds references to the relevant items in each array that +/// constitutes a single point. +#[derive(Clone, Debug, PartialEq)] +pub struct Point<'a> { + /// The start time of this point, if any. + pub start_time: Option<&'a DateTime>, + /// The timestamp for this point. + pub timestamp: &'a DateTime, + /// One datum and its metric type, for each dimension in the point. + /// + /// The datum itself is optional, and will be `None` if the point is missing + /// a value at the corresponding point and dimension. + pub values: Vec<(Datum<'a>, MetricType)>, +} + +impl<'a> fmt::Display for Point<'a> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + const TIMESTAMP_FMT: &str = "%Y-%m-%d %H:%M:%S.%f"; + match &self.start_time { + Some(start_time) => write!( + f, + "[{}, {}]: ", + start_time.format(TIMESTAMP_FMT), + self.timestamp.format(TIMESTAMP_FMT) + )?, + None => write!(f, "{}: ", self.timestamp.format(TIMESTAMP_FMT))?, + } + let values = self + .values + .iter() + .map(|(datum, _)| datum.to_string()) + .collect::>() + .join(","); + write!(f, "[{}]", values) + } +} + +impl<'a> Point<'a> { + /// Return the dimensionality of this point. + pub fn dimensionality(&self) -> usize { + self.values.len() + } +} + +/// A reference to a single datum of a multidimensional value. +#[derive(Clone, Copy, Debug, PartialEq)] +pub enum Datum<'a> { + Boolean(Option), + Integer(Option<&'a i64>), + Double(Option<&'a f64>), + String(Option<&'a str>), + IntegerDistribution(Option<&'a Distribution>), + DoubleDistribution(Option<&'a Distribution>), +} + +impl<'a> fmt::Display for Datum<'a> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Datum::Boolean(Some(inner)) => write!(f, "{}", inner), + Datum::Integer(Some(inner)) => write!(f, "{}", inner), + Datum::Double(Some(inner)) => write!(f, "{}", inner), + Datum::String(Some(inner)) => write!(f, "{}", inner), + Datum::IntegerDistribution(Some(inner)) => write!(f, "{}", inner), + Datum::DoubleDistribution(Some(inner)) => write!(f, "{}", inner), + Datum::Boolean(None) + | Datum::Integer(None) + | Datum::Double(None) + | Datum::String(None) + | Datum::IntegerDistribution(None) + | Datum::DoubleDistribution(None) => { + write!(f, "-") + } + } + } +} + +/// Timepoints and values for one timeseries. +// +// Invariants: +// +// The start_time and timestamp arrays must be the same length, or start_times +// must be None. +// +// The length of timestamps (and possibly start_times) must be the same as the +// length of _each element_ of the `values` array. That is, there are as many +// timestamps as data values. +// +// The length of `values` is the number of dimensions, and is always at least 1. +#[derive(Clone, Debug, Deserialize, JsonSchema, PartialEq, Serialize)] +pub struct Points { + // The start time points for cumulative or delta metrics. + pub(super) start_times: Option>>, + // The timestamp of each value. + pub(super) timestamps: Vec>, + // The array of data values, one for each dimension. + pub(super) values: Vec, +} + +impl Points { + /// Construct an empty array of points to hold data of the provided type. + pub fn empty(data_type: DataType, metric_type: MetricType) -> Self { + Self::with_capacity( + 0, + std::iter::once(data_type), + std::iter::once(metric_type), + ) + .unwrap() + } + + // Return a mutable reference to the value array of the specified dimension, if any. + pub(super) fn values_mut(&mut self, dim: usize) -> Option<&mut ValueArray> { + self.values.get_mut(dim).map(|val| &mut val.values) + } + + /// Return a reference to the value array of the specified dimension, if any. + pub fn values(&self, dim: usize) -> Option<&ValueArray> { + self.values.get(dim).map(|val| &val.values) + } + + /// Return the dimensionality of the data points, i.e., the number of values + /// at each timestamp. + pub fn dimensionality(&self) -> usize { + self.values.len() + } + + /// Return the number of points in self. + pub fn len(&self) -> usize { + self.values[0].len() + } + + /// Construct an empty array of points to hold size data points of the + /// provided types. + /// + /// The type information may have length > 1 to reserve space for + /// multi-dimensional values. + pub fn with_capacity( + size: usize, + data_types: D, + metric_types: M, + ) -> Result + where + D: ExactSizeIterator, + M: ExactSizeIterator, + { + anyhow::ensure!( + data_types.len() == metric_types.len(), + "Data and metric type iterators must have the same length", + ); + let timestamps = Vec::with_capacity(size); + let mut start_times = None; + let mut values = Vec::with_capacity(data_types.len()); + for (data_type, metric_type) in data_types.zip(metric_types) { + if matches!(metric_type, MetricType::Delta | MetricType::Cumulative) + && start_times.is_none() + { + start_times.replace(Vec::with_capacity(size)); + } + values.push(Values::with_capacity(size, data_type, metric_type)); + } + Ok(Self { start_times, timestamps, values }) + } + + /// Return the data types of self. + pub fn data_types(&self) -> impl ExactSizeIterator + '_ { + self.values.iter().map(|val| val.values.data_type()) + } + + /// Return the metric types of self. + pub fn metric_types( + &self, + ) -> impl ExactSizeIterator + '_ { + self.values.iter().map(|val| val.metric_type) + } + + /// Return the single metric type of all values in self, it they are all the + /// same. + pub fn metric_type(&self) -> Option { + let mut types = self.metric_types(); + let Some(first_type) = types.next() else { + unreachable!(); + }; + if types.all(|ty| ty == first_type) { + Some(first_type) + } else { + None + } + } + + /// Construct a list of gauge points from a list of gauge measurements. + /// + /// An error is returned if the provided input measurements are not gauges, + /// or do not all have the same datum type. + pub fn gauge_from_gauge( + measurements: &[Measurement], + ) -> Result { + let Some(first) = measurements.first() else { + anyhow::bail!( + "Cannot construct points from empty measurements array" + ); + }; + let datum_type = first.datum_type(); + anyhow::ensure!( + !datum_type.is_cumulative(), + "Measurements are not gauges" + ); + let data_type = DataType::try_from(datum_type)?; + let mut self_ = Self::with_capacity( + measurements.len(), + std::iter::once(data_type), + std::iter::once(MetricType::Gauge), + )?; + + // Since we're directly pushing gauges, each measurement is independent + // of the others. Simply translate types and push the data. + for measurement in measurements.iter() { + anyhow::ensure!( + measurement.datum_type() == datum_type, + "Measurements must all have the same datum type", + ); + self_ + .values_mut(0) + .unwrap() + .push_value_from_datum(measurement.datum())?; + self_.timestamps.push(measurement.timestamp()); + } + Ok(self_) + } + + /// Construct a list of delta points from a list of cumulative measurements. + /// + /// An error is returned if the provided measurements are not of the same + /// type or not cumulative. + pub fn delta_from_cumulative( + measurements: &[Measurement], + ) -> Result { + let mut iter = measurements.iter(); + let Some(first) = iter.next() else { + anyhow::bail!( + "Cannot construct points from empty measurements array" + ); + }; + let datum_type = first.datum_type(); + anyhow::ensure!( + datum_type.is_cumulative(), + "Measurements are not cumulative", + ); + let data_type = DataType::try_from(datum_type)?; + let mut self_ = Self::with_capacity( + measurements.len(), + std::iter::once(data_type), + std::iter::once(MetricType::Delta), + )?; + + // Construct the first point, which directly uses the start / end time + // of the first measurement itself. + self_.values_mut(0).unwrap().push_value_from_datum(first.datum())?; + self_.start_times.as_mut().unwrap().push(first.start_time().unwrap()); + self_.timestamps.push(first.timestamp()); + + // We need to keep track of the last cumulative measurement that's not + // _missing_, to compute successive differences between neighboring + // points. Note that we only need the datum from the measurement, + // because even missing samples have valid timestamp information. So we + // can always generate the timestamp for each delta, even if the datum + // is missing. + let mut last_datum = if first.is_missing() { + None + } else { + // Safety: We're confirming above the measurement is cumulative, and + // in this block if the datum is missing. So we know this conversion + // should succeed. + Some(CumulativeDatum::from_cumulative(first).unwrap()) + }; + + // We also need to keep track of the start time of this "epoch", periods + // where the cumulative data has the same start time. If there are jumps + // forward in this, and thus gaps in the records, we need to update the + // start_time of the epoch and also the last datum. + let mut epoch_start_time = first.start_time().unwrap(); + + // Push the remaining values. + for measurement in iter { + anyhow::ensure!( + measurement.datum_type() == datum_type, + "Measurements must all have the same datum type" + ); + + // For the time ranges we must have either: + // + // 1. Either the start time of the _first_ and new points must be + // equal, with the timestamp of the new strictly later than the + // timestamp of the last, OR + // 2. Both the start time and timestamp of the new point must be + // strictly later than the timestamp (and thus start time) of the + // last point. In this case, we effectively have a _gap_ in the + // timeseries, and so we need to update `first_start_time` to + // reflect this new epoch. + let last_start_time = + *self_.start_times.as_ref().unwrap().last().unwrap(); + let last_timestamp = *self_.timestamps.last().unwrap(); + let new_start_time = measurement.start_time().unwrap(); + let new_timestamp = measurement.timestamp(); + + if epoch_start_time == new_start_time + && last_timestamp < new_timestamp + { + // Push the timestamps to reflect this interval, from the end of + // the last sample to the end of this one. + self_.start_times.as_mut().unwrap().push(last_timestamp); + self_.timestamps.push(new_timestamp); + + // The data value is the difference between the last non-missing + // datum and the new datum. + self_.values_mut(0).unwrap().push_diff_from_last_to_datum( + &last_datum, + measurement.datum(), + data_type, + )?; + } else if new_start_time > last_timestamp + && new_timestamp > last_timestamp + { + // Push the new start time directly, since it begins a new + // epoch. + self_.start_times.as_mut().unwrap().push(new_start_time); + self_.timestamps.push(new_timestamp); + + // Update the epoch start time, and also simply push the datum + // directly. The difference with the previous is not meaningful, + // since we've begun a new epoch. + epoch_start_time = new_start_time; + self_ + .values_mut(0) + .unwrap() + .push_value_from_datum(measurement.datum())?; + } else { + // Print as useful a message as we can here. + anyhow::bail!( + "Cannot compute a delta, the timestamp of the next \ + sample has a new start time, or overlaps with the \ + last processed sample. \n \ + epoch start time = {epoch_start_time}\n \ + last timestamp = [{last_start_time}, {last_timestamp}]\n \ + new timestamp = [{new_start_time}, {new_timestamp}]" + ); + } + + // If the new datum is _not_ missing, we'll update the last one. + if !measurement.is_missing() { + last_datum.replace( + CumulativeDatum::from_cumulative(measurement).unwrap(), + ); + } + } + Ok(self_) + } + + /// Iterate over each point in self. + pub fn iter_points(&self) -> impl Iterator> + '_ { + (0..self.len()).map(|i| Point { + start_time: self.start_times.as_ref().map(|s| &s[i]), + timestamp: &self.timestamps[i], + values: self + .values + .iter() + .map(|val| (val.values.get(i), val.metric_type)) + .collect(), + }) + } + + // Filter points in self to those where `to_keep` is true. + pub(crate) fn filter(&self, to_keep: Vec) -> Result { + anyhow::ensure!( + to_keep.len() == self.len(), + "Filter array must be the same length as self", + ); + + // Compute the indices of values we're keeping. + let indices: Vec<_> = to_keep + .iter() + .enumerate() + .filter(|(_ix, to_keep)| **to_keep) + .map(|(ix, _)| ix) + .collect(); + let n_true = indices.len(); + let mut out = Self::with_capacity( + n_true, + self.data_types(), + self.metric_types(), + )?; + + // Push the compressed start times, if any. + if let Some(start_times) = self.start_times.as_ref() { + let Some(new_start_times) = out.start_times.as_mut() else { + unreachable!(); + }; + for ix in indices.iter().copied() { + new_start_times.push(start_times[ix]); + } + } + + // Push the compressed timestamps. + for ix in indices.iter().copied() { + out.timestamps.push(self.timestamps[ix]); + } + + // Push each dimension of the data values themselves. + for (new_values, existing_values) in + out.values.iter_mut().zip(self.values.iter()) + { + match (&mut new_values.values, &existing_values.values) { + (ValueArray::Integer(new), ValueArray::Integer(existing)) => { + for ix in indices.iter().copied() { + new.push(existing[ix]); + } + } + (ValueArray::Double(new), ValueArray::Double(existing)) => { + for ix in indices.iter().copied() { + new.push(existing[ix]); + } + } + (ValueArray::Boolean(new), ValueArray::Boolean(existing)) => { + for ix in indices.iter().copied() { + new.push(existing[ix]); + } + } + (ValueArray::String(new), ValueArray::String(existing)) => { + for ix in indices.iter().copied() { + new.push(existing[ix].clone()); + } + } + ( + ValueArray::IntegerDistribution(new), + ValueArray::IntegerDistribution(existing), + ) => { + for ix in indices.iter().copied() { + new.push(existing[ix].clone()); + } + } + ( + ValueArray::DoubleDistribution(new), + ValueArray::DoubleDistribution(existing), + ) => { + for ix in indices.iter().copied() { + new.push(existing[ix].clone()); + } + } + (_, _) => unreachable!(), + } + } + Ok(out) + } + + // Return a new set of points, with the values casted to the provided types. + pub(crate) fn cast(&self, types: &[DataType]) -> Result { + anyhow::ensure!( + types.len() == self.dimensionality(), + "Cannot cast to {} types, the data has dimensionality {}", + types.len(), + self.dimensionality(), + ); + let start_times = self.start_times.clone(); + let timestamps = self.timestamps.clone(); + let mut new_values = Vec::with_capacity(self.dimensionality()); + for (new_type, existing_values) in types.iter().zip(self.values.iter()) + { + let values = match (new_type, &existing_values.values) { + // "Cast" from i64 -> i64 + (DataType::Integer, ValueArray::Integer(vals)) => { + ValueArray::Integer(vals.clone()) + } + + // Cast f64 -> i64 + (DataType::Integer, ValueArray::Double(doubles)) => { + let mut new = Vec::with_capacity(doubles.len()); + for maybe_double in doubles.iter().copied() { + if let Some(d) = maybe_double { + let as_int = d + .to_i64() + .context("Cannot cast double {d} to i64")?; + new.push(Some(as_int)); + } else { + new.push(None); + } + } + ValueArray::Integer(new) + } + + // Cast bool -> i64 + (DataType::Integer, ValueArray::Boolean(bools)) => { + ValueArray::Integer( + bools + .iter() + .copied() + .map(|b| b.map(i64::from)) + .collect(), + ) + } + + // Cast string -> i64, by parsing. + (DataType::Integer, ValueArray::String(strings)) => { + let mut new = Vec::with_capacity(strings.len()); + for maybe_str in strings.iter() { + if let Some(s) = maybe_str { + let as_int = s + .parse() + .context("Cannot cast string '{s}' to i64")?; + new.push(Some(as_int)); + } else { + new.push(None); + } + } + ValueArray::Integer(new) + } + + // Cast i64 -> f64 + (DataType::Double, ValueArray::Integer(ints)) => { + let mut new = Vec::with_capacity(ints.len()); + for maybe_int in ints.iter().copied() { + if let Some(int) = maybe_int { + let as_double = int.to_f64().context( + "Cannot cast integer {int} as double", + )?; + new.push(Some(as_double)); + } else { + new.push(None); + } + } + ValueArray::Double(new) + } + + // "Cast" f64 -> f64 + (DataType::Double, ValueArray::Double(vals)) => { + ValueArray::Double(vals.clone()) + } + + // Cast bool -> f64 + (DataType::Double, ValueArray::Boolean(bools)) => { + ValueArray::Double( + bools + .iter() + .copied() + .map(|b| b.map(f64::from)) + .collect(), + ) + } + + // Cast string -> f64, by parsing. + (DataType::Double, ValueArray::String(strings)) => { + let mut new = Vec::with_capacity(strings.len()); + for maybe_str in strings.iter() { + if let Some(s) = maybe_str { + let as_double = s + .parse() + .context("Cannot cast string '{s}' to f64")?; + new.push(Some(as_double)); + } else { + new.push(None); + } + } + ValueArray::Double(new) + } + + // Cast i64 -> bool + // + // Any non-zero value is considered truthy. + (DataType::Boolean, ValueArray::Integer(ints)) => { + let mut new = Vec::with_capacity(ints.len()); + for maybe_int in ints.iter().copied() { + match maybe_int { + Some(0) => new.push(Some(false)), + Some(_) => new.push(Some(true)), + None => new.push(None), + } + } + ValueArray::Boolean(new) + } + + // Cast f64 -> bool + // + // Any non-zero value is considered truthy. + (DataType::Boolean, ValueArray::Double(doubles)) => { + let mut new = Vec::with_capacity(doubles.len()); + for maybe_double in doubles.iter().copied() { + match maybe_double { + Some(d) if d == 0.0 => new.push(Some(false)), + Some(_) => new.push(Some(true)), + None => new.push(None), + } + } + ValueArray::Boolean(new) + } + + // "Cast" bool -> bool + (DataType::Boolean, ValueArray::Boolean(vals)) => { + ValueArray::Boolean(vals.clone()) + } + + // Cast string -> bool. + // + // Any non-empty string is considered truthy + (DataType::Boolean, ValueArray::String(strings)) => { + let mut new = Vec::with_capacity(strings.len()); + for maybe_str in strings.iter() { + match maybe_str { + Some(s) if s.is_empty() => new.push(Some(false)), + Some(_) => new.push(Some(true)), + None => new.push(None), + } + } + ValueArray::Boolean(new) + } + + // Cast i64 -> string + (DataType::String, ValueArray::Integer(ints)) => { + ValueArray::String( + ints.iter().map(|x| x.map(|x| x.to_string())).collect(), + ) + } + + // Cast f64 -> string + (DataType::String, ValueArray::Double(doubles)) => { + ValueArray::String( + doubles + .iter() + .map(|x| x.map(|x| x.to_string())) + .collect(), + ) + } + + // Cast bool -> string + (DataType::String, ValueArray::Boolean(bools)) => { + ValueArray::String( + bools + .iter() + .map(|x| x.map(|x| x.to_string())) + .collect(), + ) + } + + // "Cast" string -> string + (DataType::String, ValueArray::String(vals)) => { + ValueArray::String(vals.clone()) + } + + // "Cast" distributions to the same type of distribution + ( + DataType::IntegerDistribution, + ValueArray::IntegerDistribution(vals), + ) => ValueArray::IntegerDistribution(vals.clone()), + ( + DataType::DoubleDistribution, + ValueArray::DoubleDistribution(vals), + ) => ValueArray::DoubleDistribution(vals.clone()), + + // All other casts are invalid + (_, vals) => anyhow::bail!( + "Cannot cast {} -> {}", + new_type, + vals.data_type(), + ), + }; + new_values.push(Values { + values, + metric_type: existing_values.metric_type, + }); + } + Ok(Self { start_times, timestamps, values: new_values }) + } + + /// Return true if self contains no data points. + pub fn is_empty(&self) -> bool { + self.len() == 0 + } +} + +/// List of data values for one timeseries. +/// +/// Each element is an option, where `None` represents a missing sample. +#[derive(Clone, Debug, Deserialize, JsonSchema, PartialEq, Serialize)] +#[serde(rename_all = "snake_case", tag = "type", content = "values")] +pub enum ValueArray { + Integer(Vec>), + Double(Vec>), + Boolean(Vec>), + String(Vec>), + IntegerDistribution(Vec>>), + DoubleDistribution(Vec>>), +} + +impl ValueArray { + // Create an empty array with capacity `size` of the provided data type. + fn with_capacity(size: usize, data_type: DataType) -> Self { + match data_type { + DataType::Integer => Self::Integer(Vec::with_capacity(size)), + DataType::Double => Self::Double(Vec::with_capacity(size)), + DataType::Boolean => Self::Boolean(Vec::with_capacity(size)), + DataType::String => Self::String(Vec::with_capacity(size)), + DataType::IntegerDistribution => { + Self::IntegerDistribution(Vec::with_capacity(size)) + } + DataType::DoubleDistribution => { + Self::DoubleDistribution(Vec::with_capacity(size)) + } + } + } + + // Return the data type in self. + pub(super) fn data_type(&self) -> DataType { + match self { + ValueArray::Integer(_) => DataType::Integer, + ValueArray::Double(_) => DataType::Double, + ValueArray::Boolean(_) => DataType::Boolean, + ValueArray::String(_) => DataType::String, + ValueArray::IntegerDistribution(_) => DataType::IntegerDistribution, + ValueArray::DoubleDistribution(_) => DataType::DoubleDistribution, + } + } + + // Access the inner array of booleans, if possible. + pub(super) fn as_boolean_mut( + &mut self, + ) -> Result<&mut Vec>, Error> { + let ValueArray::Boolean(inner) = self else { + anyhow::bail!( + "Cannot access value array as boolean type, it has type {}", + self.data_type(), + ); + }; + Ok(inner) + } + + /// Access the values as an array of bools, if they have that type. + pub fn as_boolean(&self) -> Result<&Vec>, Error> { + let ValueArray::Boolean(inner) = self else { + anyhow::bail!( + "Cannot access value array as boolean type, it has type {}", + self.data_type(), + ); + }; + Ok(inner) + } + + /// Access the values as an array of integers, if they have that type. + pub fn as_integer(&self) -> Result<&Vec>, Error> { + let ValueArray::Integer(inner) = self else { + anyhow::bail!( + "Cannot access value array as integer type, it has type {}", + self.data_type(), + ); + }; + Ok(inner) + } + + // Access the inner array of integers, if possible. + pub(super) fn as_integer_mut( + &mut self, + ) -> Result<&mut Vec>, Error> { + let ValueArray::Integer(inner) = self else { + anyhow::bail!( + "Cannot access value array as integer type, it has type {}", + self.data_type(), + ); + }; + Ok(inner) + } + + /// Access the values as an array of doubles, if they have that type. + pub fn as_double(&self) -> Result<&Vec>, Error> { + let ValueArray::Double(inner) = self else { + anyhow::bail!( + "Cannot access value array as double type, it has type {}", + self.data_type(), + ); + }; + Ok(inner) + } + + // Access the inner array of doubles, if possible. + pub(super) fn as_double_mut( + &mut self, + ) -> Result<&mut Vec>, Error> { + let ValueArray::Double(inner) = self else { + anyhow::bail!( + "Cannot access value array as double type, it has type {}", + self.data_type(), + ); + }; + Ok(inner) + } + + /// Access the values as an array of strings, if they have that type. + pub fn as_string(&self) -> Result<&Vec>, Error> { + let ValueArray::String(inner) = self else { + anyhow::bail!( + "Cannot access value array as string type, it has type {}", + self.data_type(), + ); + }; + Ok(inner) + } + + // Access the inner array of strings, if possible. + pub(super) fn as_string_mut( + &mut self, + ) -> Result<&mut Vec>, Error> { + let ValueArray::String(inner) = self else { + anyhow::bail!( + "Cannot access value array as string type, it has type {}", + self.data_type(), + ); + }; + Ok(inner) + } + + /// Access the values as an array of integer distribution, if they have that + /// type. + pub fn as_integer_distribution( + &self, + ) -> Result<&Vec>>, Error> { + let ValueArray::IntegerDistribution(inner) = self else { + anyhow::bail!( + "Cannot access value array as integer \ + distribution type, it has type {}", + self.data_type(), + ); + }; + Ok(inner) + } + + // Access the inner array of integer distribution, if possible. + pub(super) fn as_integer_distribution_mut( + &mut self, + ) -> Result<&mut Vec>>, Error> { + let ValueArray::IntegerDistribution(inner) = self else { + anyhow::bail!( + "Cannot access value array as integer \ + distribution type, it has type {}", + self.data_type(), + ); + }; + Ok(inner) + } + + /// Access the values as an array of double distribution, if they have that + /// type. + pub fn as_double_distribution( + &self, + ) -> Result<&Vec>>, Error> { + let ValueArray::DoubleDistribution(inner) = self else { + anyhow::bail!( + "Cannot access value array as double \ + distribution type, it has type {}", + self.data_type(), + ); + }; + Ok(inner) + } + + // Access the inner array of double distributions, if possible. + pub(super) fn as_double_distribution_mut( + &mut self, + ) -> Result<&mut Vec>>, Error> { + let ValueArray::DoubleDistribution(inner) = self else { + anyhow::bail!( + "Cannot access value array as double \ + distribution type, it has type {}", + self.data_type(), + ); + }; + Ok(inner) + } + + fn push_missing(&mut self, datum_type: DatumType) -> Result<(), Error> { + match datum_type { + DatumType::Bool => self.as_boolean_mut()?.push(None), + DatumType::I8 + | DatumType::U8 + | DatumType::I16 + | DatumType::U16 + | DatumType::I32 + | DatumType::U32 + | DatumType::I64 + | DatumType::U64 + | DatumType::CumulativeI64 + | DatumType::CumulativeU64 => self.as_integer_mut()?.push(None), + DatumType::F32 + | DatumType::F64 + | DatumType::CumulativeF32 + | DatumType::CumulativeF64 => self.as_double_mut()?.push(None), + DatumType::String => self.as_string_mut()?.push(None), + DatumType::Bytes => { + anyhow::bail!("Bytes data types are not yet supported") + } + DatumType::HistogramI8 + | DatumType::HistogramU8 + | DatumType::HistogramI16 + | DatumType::HistogramU16 + | DatumType::HistogramI32 + | DatumType::HistogramU32 + | DatumType::HistogramI64 + | DatumType::HistogramU64 => { + self.as_integer_distribution_mut()?.push(None) + } + DatumType::HistogramF32 | DatumType::HistogramF64 => { + self.as_double_distribution_mut()?.push(None) + } + } + Ok(()) + } + + // Push a value directly from a datum, without modification. + fn push_value_from_datum( + &mut self, + datum: &oximeter::Datum, + ) -> Result<(), Error> { + match datum { + oximeter::Datum::Bool(b) => self.as_boolean_mut()?.push(Some(*b)), + oximeter::Datum::I8(i) => { + self.as_integer_mut()?.push(Some(i64::from(*i))) + } + oximeter::Datum::U8(i) => { + self.as_integer_mut()?.push(Some(i64::from(*i))) + } + oximeter::Datum::I16(i) => { + self.as_integer_mut()?.push(Some(i64::from(*i))) + } + oximeter::Datum::U16(i) => { + self.as_integer_mut()?.push(Some(i64::from(*i))) + } + oximeter::Datum::I32(i) => { + self.as_integer_mut()?.push(Some(i64::from(*i))) + } + oximeter::Datum::U32(i) => { + self.as_integer_mut()?.push(Some(i64::from(*i))) + } + oximeter::Datum::I64(i) => self.as_integer_mut()?.push(Some(*i)), + oximeter::Datum::U64(i) => { + let i = + i.to_i64().context("Failed to convert u64 datum to i64")?; + self.as_integer_mut()?.push(Some(i)); + } + oximeter::Datum::F32(f) => { + self.as_double_mut()?.push(Some(f64::from(*f))) + } + oximeter::Datum::F64(f) => self.as_double_mut()?.push(Some(*f)), + oximeter::Datum::String(s) => { + self.as_string_mut()?.push(Some(s.clone())) + } + oximeter::Datum::Bytes(_) => { + anyhow::bail!("Bytes data types are not yet supported") + } + oximeter::Datum::CumulativeI64(c) => { + self.as_integer_mut()?.push(Some(c.value())) + } + oximeter::Datum::CumulativeU64(c) => { + let c = c + .value() + .to_i64() + .context("Failed to convert u64 datum to i64")?; + self.as_integer_mut()?.push(Some(c)); + } + oximeter::Datum::CumulativeF32(c) => { + self.as_double_mut()?.push(Some(f64::from(c.value()))) + } + oximeter::Datum::CumulativeF64(c) => { + self.as_double_mut()?.push(Some(c.value())) + } + oximeter::Datum::HistogramI8(h) => self + .as_integer_distribution_mut()? + .push(Some(Distribution::from(h))), + oximeter::Datum::HistogramU8(h) => self + .as_integer_distribution_mut()? + .push(Some(Distribution::from(h))), + oximeter::Datum::HistogramI16(h) => self + .as_integer_distribution_mut()? + .push(Some(Distribution::from(h))), + oximeter::Datum::HistogramU16(h) => self + .as_integer_distribution_mut()? + .push(Some(Distribution::from(h))), + oximeter::Datum::HistogramI32(h) => self + .as_integer_distribution_mut()? + .push(Some(Distribution::from(h))), + oximeter::Datum::HistogramU32(h) => self + .as_integer_distribution_mut()? + .push(Some(Distribution::from(h))), + oximeter::Datum::HistogramI64(h) => self + .as_integer_distribution_mut()? + .push(Some(Distribution::from(h))), + oximeter::Datum::HistogramU64(h) => self + .as_integer_distribution_mut()? + .push(Some(Distribution::try_from(h)?)), + oximeter::Datum::HistogramF32(h) => self + .as_double_distribution_mut()? + .push(Some(Distribution::from(h))), + oximeter::Datum::HistogramF64(h) => self + .as_double_distribution_mut()? + .push(Some(Distribution::from(h))), + oximeter::Datum::Missing(missing) => { + self.push_missing(missing.datum_type())? + } + } + Ok(()) + } + + // Push a delta from the last valid datum and a new one. + // + // This takes the last valid datum, if any, and a new one. It computes the + // delta between the the values of the datum, if possible, and pushes it + // onto the correct value array inside `self`. + // + // If both the last datum and new one exist (are not missing), the normal + // diff is pushed. If the last datum is missing, but the new one exists, + // then the new value is pushed directly. If the last datum exists but the + // new one does not, then a missing datum is pushed. If both are missing, + // then a missing one is pushed as well. + // + // In other words, the diff is always between the new datum and the last + // non-None value. If such a last value does not exist, the datum is + // inserted directly. + fn push_diff_from_last_to_datum( + &mut self, + last_datum: &Option, + new_datum: &oximeter::Datum, + data_type: DataType, + ) -> Result<(), Error> { + match (last_datum.as_ref(), new_datum.is_missing()) { + (None, true) | (Some(_), true) => { + // In this case, either both values are missing, or just the new + // one is. In either case, we cannot compute a new value, and + // need to insert None to represent the new missing datum. + match data_type { + DataType::Integer => self.as_integer_mut()?.push(None), + DataType::Double => self.as_double_mut()?.push(None), + DataType::Boolean => self.as_boolean_mut()?.push(None), + DataType::String => self.as_string_mut()?.push(None), + DataType::IntegerDistribution => { + self.as_integer_distribution_mut()?.push(None) + } + DataType::DoubleDistribution => { + self.as_double_distribution_mut()?.push(None) + } + } + } + (None, false) => { + // The last datum was missing, but the new one is not. We cannot + // compute the difference, since we have no previous point. + // However, we can still push some value by inserting the datum + // directly. + self.push_value_from_datum(new_datum)?; + } + (Some(last_datum), false) => { + // Both values exist, so we can compute the difference between + // them and insert that. + // + // Note that we're asserting both are the same _datum_ type, + // which is guaranteed by a check in the caller. + match (last_datum, new_datum) { + ( + CumulativeDatum::Integer(last), + oximeter::Datum::I8(new), + ) => { + let new = i64::from(*new); + self.as_integer_mut()?.push(Some(new - last)); + } + ( + CumulativeDatum::Integer(last), + oximeter::Datum::U8(new), + ) => { + let new = i64::from(*new); + self.as_integer_mut()?.push(Some(new - last)); + } + ( + CumulativeDatum::Integer(last), + oximeter::Datum::I16(new), + ) => { + let new = i64::from(*new); + self.as_integer_mut()?.push(Some(new - last)); + } + ( + CumulativeDatum::Integer(last), + oximeter::Datum::U16(new), + ) => { + let new = i64::from(*new); + self.as_integer_mut()?.push(Some(new - last)); + } + ( + CumulativeDatum::Integer(last), + oximeter::Datum::I32(new), + ) => { + let new = i64::from(*new); + self.as_integer_mut()?.push(Some(new - last)); + } + ( + CumulativeDatum::Integer(last), + oximeter::Datum::U32(new), + ) => { + let new = i64::from(*new); + self.as_integer_mut()?.push(Some(new - last)); + } + ( + CumulativeDatum::Integer(last), + oximeter::Datum::I64(new), + ) => { + let diff = new + .checked_sub(*last) + .context("Overflow computing deltas")?; + self.as_integer_mut()?.push(Some(diff)); + } + ( + CumulativeDatum::Integer(last), + oximeter::Datum::U64(new), + ) => { + let new = new + .to_i64() + .context("Failed to convert u64 datum to i64")?; + let diff = new + .checked_sub(*last) + .context("Overflow computing deltas")?; + self.as_integer_mut()?.push(Some(diff)); + } + ( + CumulativeDatum::Double(last), + oximeter::Datum::F32(new), + ) => { + self.as_double_mut()? + .push(Some(f64::from(*new) - last)); + } + ( + CumulativeDatum::Double(last), + oximeter::Datum::F64(new), + ) => { + self.as_double_mut()?.push(Some(new - last)); + } + ( + CumulativeDatum::Integer(last), + oximeter::Datum::CumulativeI64(new), + ) => { + let new = new.value(); + let diff = new + .checked_sub(*last) + .context("Overflow computing deltas")?; + self.as_integer_mut()?.push(Some(diff)); + } + ( + CumulativeDatum::Integer(last), + oximeter::Datum::CumulativeU64(new), + ) => { + let new = new + .value() + .to_i64() + .context("Failed to convert u64 datum to i64")?; + let diff = new + .checked_sub(*last) + .context("Overflow computing deltas")?; + self.as_integer_mut()?.push(Some(diff)); + } + ( + CumulativeDatum::Double(last), + oximeter::Datum::CumulativeF32(new), + ) => { + self.as_double_mut()? + .push(Some(f64::from(new.value()) - last)); + } + ( + CumulativeDatum::Double(last), + oximeter::Datum::CumulativeF64(new), + ) => { + self.as_double_mut()?.push(Some(new.value() - last)); + } + ( + CumulativeDatum::IntegerDistribution(last), + oximeter::Datum::HistogramI8(new), + ) => { + let new = Distribution::from(new); + self.as_integer_distribution_mut()? + .push(Some(new.checked_sub(&last)?)); + } + ( + CumulativeDatum::IntegerDistribution(last), + oximeter::Datum::HistogramU8(new), + ) => { + let new = Distribution::from(new); + self.as_integer_distribution_mut()? + .push(Some(new.checked_sub(&last)?)); + } + ( + CumulativeDatum::IntegerDistribution(last), + oximeter::Datum::HistogramI16(new), + ) => { + let new = Distribution::from(new); + self.as_integer_distribution_mut()? + .push(Some(new.checked_sub(&last)?)); + } + ( + CumulativeDatum::IntegerDistribution(last), + oximeter::Datum::HistogramU16(new), + ) => { + let new = Distribution::from(new); + self.as_integer_distribution_mut()? + .push(Some(new.checked_sub(&last)?)); + } + ( + CumulativeDatum::IntegerDistribution(last), + oximeter::Datum::HistogramI32(new), + ) => { + let new = Distribution::from(new); + self.as_integer_distribution_mut()? + .push(Some(new.checked_sub(&last)?)); + } + ( + CumulativeDatum::IntegerDistribution(last), + oximeter::Datum::HistogramU32(new), + ) => { + let new = Distribution::from(new); + self.as_integer_distribution_mut()? + .push(Some(new.checked_sub(&last)?)); + } + ( + CumulativeDatum::IntegerDistribution(last), + oximeter::Datum::HistogramI64(new), + ) => { + let new = Distribution::from(new); + self.as_integer_distribution_mut()? + .push(Some(new.checked_sub(&last)?)); + } + ( + CumulativeDatum::IntegerDistribution(last), + oximeter::Datum::HistogramU64(new), + ) => { + let new = Distribution::try_from(new)?; + self.as_integer_distribution_mut()? + .push(Some(new.checked_sub(&last)?)); + } + ( + CumulativeDatum::DoubleDistribution(last), + oximeter::Datum::HistogramF32(new), + ) => { + let new = Distribution::from(new); + self.as_double_distribution_mut()? + .push(Some(new.checked_sub(&last)?)); + } + ( + CumulativeDatum::DoubleDistribution(last), + oximeter::Datum::HistogramF64(new), + ) => { + let new = Distribution::from(new); + self.as_double_distribution_mut()? + .push(Some(new.checked_sub(&last)?)); + } + (_, _) => unreachable!(), + } + } + } + Ok(()) + } + + // Return the number of samples in self. + fn len(&self) -> usize { + match self { + ValueArray::Boolean(inner) => inner.len(), + ValueArray::Integer(inner) => inner.len(), + ValueArray::Double(inner) => inner.len(), + ValueArray::String(inner) => inner.len(), + ValueArray::IntegerDistribution(inner) => inner.len(), + ValueArray::DoubleDistribution(inner) => inner.len(), + } + } + + // Return a reference to the i-th value in the array. + // + // This panics if `i >= self.len()`. + fn get(&self, i: usize) -> Datum<'_> { + match self { + ValueArray::Boolean(inner) => Datum::Boolean(inner[i]), + ValueArray::Integer(inner) => { + Datum::Integer(inner.get(i).unwrap().as_ref()) + } + ValueArray::Double(inner) => { + Datum::Double(inner.get(i).unwrap().as_ref()) + } + ValueArray::String(inner) => { + Datum::String(inner.get(i).unwrap().as_deref()) + } + ValueArray::IntegerDistribution(inner) => { + Datum::IntegerDistribution(inner.get(i).unwrap().as_ref()) + } + ValueArray::DoubleDistribution(inner) => { + Datum::DoubleDistribution(inner.get(i).unwrap().as_ref()) + } + } + } + + // Swap the value in self with other, asserting they're the same type. + pub(crate) fn swap(&mut self, mut values: ValueArray) { + use std::mem::swap; + match (self, &mut values) { + (ValueArray::Integer(x), ValueArray::Integer(y)) => swap(x, y), + (ValueArray::Double(x), ValueArray::Double(y)) => swap(x, y), + (ValueArray::Boolean(x), ValueArray::Boolean(y)) => swap(x, y), + (ValueArray::String(x), ValueArray::String(y)) => swap(x, y), + ( + ValueArray::IntegerDistribution(x), + ValueArray::IntegerDistribution(y), + ) => swap(x, y), + ( + ValueArray::DoubleDistribution(x), + ValueArray::DoubleDistribution(y), + ) => swap(x, y), + (_, _) => panic!("Cannot swap values of different types"), + } + } +} + +mod private { + pub trait Sealed {} + impl Sealed for i64 {} + impl Sealed for f64 {} +} + +pub trait DistributionSupport: + fmt::Display + Clone + Copy + fmt::Debug + PartialEq + private::Sealed +{ +} +impl DistributionSupport for i64 {} +impl DistributionSupport for f64 {} + +/// A distribution is a sequence of bins and counts in those bins. +#[derive(Clone, Debug, Deserialize, JsonSchema, PartialEq, Serialize)] +#[schemars(rename = "Distribution{T}")] +pub struct Distribution { + bins: Vec, + counts: Vec, +} + +impl fmt::Display for Distribution { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + let elems = self + .bins + .iter() + .zip(self.counts.iter()) + .map(|(bin, count)| format!("{bin}: {count}")) + .collect::>() + .join(", "); + write!(f, "{}", elems) + } +} + +impl Distribution { + // Subtract two distributions, checking that they have the same bins. + fn checked_sub( + &self, + rhs: &Distribution, + ) -> Result, Error> { + anyhow::ensure!( + self.bins == rhs.bins, + "Cannot subtract distributions with different bins", + ); + let counts = self + .counts + .iter() + .zip(rhs.counts.iter().copied()) + .map(|(x, y)| x.checked_sub(y)) + .collect::>() + .context("Underflow subtracting distributions values")?; + Ok(Self { bins: self.bins.clone(), counts }) + } + + /// Return the slice of bins. + pub fn bins(&self) -> &[T] { + &self.bins + } + + /// Return the slice of counts. + pub fn counts(&self) -> &[u64] { + &self.counts + } + + /// Return an iterator over each bin and count. + pub fn iter(&self) -> impl ExactSizeIterator + '_ { + self.bins.iter().zip(self.counts.iter()) + } +} + +macro_rules! i64_dist_from { + ($t:ty) => { + impl From<&oximeter::histogram::Histogram<$t>> for Distribution { + fn from(hist: &oximeter::histogram::Histogram<$t>) -> Self { + let (bins, counts) = hist.to_arrays(); + Self { bins: bins.into_iter().map(i64::from).collect(), counts } + } + } + + impl From<&oximeter::histogram::Histogram<$t>> for CumulativeDatum { + fn from(hist: &oximeter::histogram::Histogram<$t>) -> Self { + CumulativeDatum::IntegerDistribution(hist.into()) + } + } + }; +} + +i64_dist_from!(i8); +i64_dist_from!(u8); +i64_dist_from!(i16); +i64_dist_from!(u16); +i64_dist_from!(i32); +i64_dist_from!(u32); +i64_dist_from!(i64); + +impl TryFrom<&oximeter::histogram::Histogram> for Distribution { + type Error = Error; + fn try_from( + hist: &oximeter::histogram::Histogram, + ) -> Result { + let (bins, counts) = hist.to_arrays(); + let bins = bins + .into_iter() + .map(i64::try_from) + .collect::>() + .context("Overflow converting u64 to i64")?; + Ok(Self { bins, counts }) + } +} + +impl TryFrom<&oximeter::histogram::Histogram> for CumulativeDatum { + type Error = Error; + fn try_from( + hist: &oximeter::histogram::Histogram, + ) -> Result { + hist.try_into().map(CumulativeDatum::IntegerDistribution) + } +} + +macro_rules! f64_dist_from { + ($t:ty) => { + impl From<&oximeter::histogram::Histogram<$t>> for Distribution { + fn from(hist: &oximeter::histogram::Histogram<$t>) -> Self { + let (bins, counts) = hist.to_arrays(); + Self { bins: bins.into_iter().map(f64::from).collect(), counts } + } + } + + impl From<&oximeter::histogram::Histogram<$t>> for CumulativeDatum { + fn from(hist: &oximeter::histogram::Histogram<$t>) -> Self { + CumulativeDatum::DoubleDistribution(hist.into()) + } + } + }; +} + +f64_dist_from!(f32); +f64_dist_from!(f64); + +#[cfg(test)] +mod tests { + use crate::oxql::point::{DataType, ValueArray}; + + use super::{Distribution, MetricType, Points, Values}; + use chrono::{DateTime, Utc}; + use oximeter::types::Cumulative; + use oximeter::Measurement; + use std::time::Duration; + + #[test] + fn test_point_delta_between() { + let mut datum = Cumulative::new(2i64); + let now = Utc::now(); + let meas0 = Measurement::new(now + Duration::from_secs(1), datum); + datum.set(10i64); + let meas1 = Measurement::new(now + Duration::from_secs(2), datum); + let measurements = vec![meas0.clone(), meas1.clone()]; + let points = Points::delta_from_cumulative(&measurements).unwrap(); + + assert_eq!(points.len(), 2); + assert_eq!( + points.values(0).unwrap().as_integer().unwrap(), + &[Some(2i64), Some(8)], + ); + assert_eq!( + Duration::from_secs(1), + (points.timestamps[1] - points.timestamps[0]).to_std().unwrap(), + ); + let expected = vec![now, meas0.timestamp()]; + let actual = points.start_times.as_ref().unwrap(); + assert_eq!(expected.len(), actual.len()); + for (x, y) in expected.into_iter().zip(actual.into_iter()) { + assert!((*y - x).num_nanoseconds().unwrap() <= 1); + } + } + + #[test] + fn test_point_delta_between_with_new_epoch() { + let datum = Cumulative::new(2i64); + let now = Utc::now(); + let meas0 = Measurement::new(now + Duration::from_secs(1), datum); + + // Create a new datum, with a completely new start time, representing a + // new epoch. + let now = Utc::now() + Duration::from_secs(10); + let datum = Cumulative::with_start_time(now, 10i64); + let meas1 = Measurement::new(now + Duration::from_secs(2), datum); + let measurements = vec![meas0.clone(), meas1.clone()]; + let points = Points::delta_from_cumulative(&measurements).unwrap(); + + // The second point should not be referenced to the first, because + // they're in different epochs. + assert_eq!(points.len(), 2); + assert_eq!( + points.values(0).unwrap().as_integer().unwrap(), + &[Some(2i64), Some(10)], + ); + + // The start times should be the start times of the measurements + // themselves as well. Same for timestamps. + assert_eq!( + points.timestamps, + vec![meas0.timestamp(), meas1.timestamp()], + ); + assert_eq!( + points.start_times.as_ref().unwrap(), + &[meas0.start_time().unwrap(), meas1.start_time().unwrap()], + ); + } + + #[test] + fn test_point_delta_between_overlapping_time_ranges() { + // These data points start at `T` and `T + 100ms` respectively, and end + // at those times + 1s. That means their time ranges overlap, and so we + // can't compute a delta from them. + let start_time = Utc::now() - Duration::from_secs(1); + let datum1 = Cumulative::with_start_time(start_time, 1i64); + let datum2 = Cumulative::with_start_time( + start_time + Duration::from_millis(100), + 10i64, + ); + let meas1 = Measurement::new( + datum1.start_time() + Duration::from_secs(1), + datum1, + ); + let meas2 = Measurement::new( + datum2.start_time() + Duration::from_secs(1), + datum2, + ); + + assert!( + Points::delta_from_cumulative(&[meas1.clone(), meas2.clone()]) + .is_err(), + "Should not be able to compute a delta point \ + between two measuremenst with overlapping start \ + times: [{}, {}] and [{}, {}]", + meas1.start_time().unwrap(), + meas1.timestamp(), + meas2.start_time().unwrap(), + meas2.timestamp(), + ); + } + + fn timestamps(n: usize) -> Vec> { + let now = Utc::now(); + let mut out = Vec::with_capacity(n); + for i in 0..n { + out.push(now - Duration::from_secs(i as _)); + } + out.into_iter().rev().collect() + } + + #[test] + fn test_cast_points_from_bool() { + let points = Points { + start_times: None, + timestamps: timestamps(2), + values: vec![Values { + values: ValueArray::Boolean(vec![Some(false), Some(true)]), + metric_type: MetricType::Gauge, + }], + }; + + let as_same = points.cast(&[DataType::Boolean]).unwrap(); + let vals = as_same.values[0].values.as_boolean().unwrap(); + assert_eq!(vals, points.values[0].values.as_boolean().unwrap()); + + let as_int = points.cast(&[DataType::Integer]).unwrap(); + let vals = as_int.values[0].values.as_integer().unwrap(); + assert_eq!(vals, &vec![Some(0), Some(1)]); + + let as_double = points.cast(&[DataType::Double]).unwrap(); + let vals = as_double.values[0].values.as_double().unwrap(); + assert_eq!(vals, &vec![Some(0.0), Some(1.0)]); + + let as_string = points.cast(&[DataType::String]).unwrap(); + let vals = as_string.values[0].values.as_string().unwrap(); + assert_eq!( + vals, + &vec![Some("false".to_string()), Some("true".to_string())] + ); + + for ty in [DataType::IntegerDistribution, DataType::DoubleDistribution] + { + assert!( + points.cast(&[ty]).is_err(), + "Should not be able to cast bool array to distributions" + ); + } + assert!(points.cast(&[]).is_err(), "Should fail to cast with no types"); + assert!( + points.cast(&[DataType::Boolean, DataType::Boolean]).is_err(), + "Should fail to cast to the wrong number of types" + ); + } + + #[test] + fn test_cast_points_from_integer() { + let points = Points { + start_times: None, + timestamps: timestamps(2), + values: vec![Values { + values: ValueArray::Integer(vec![Some(0), Some(10)]), + metric_type: MetricType::Gauge, + }], + }; + + let as_same = points.cast(&[DataType::Integer]).unwrap(); + let vals = as_same.values[0].values.as_integer().unwrap(); + assert_eq!(vals, points.values[0].values.as_integer().unwrap()); + + let as_bools = points.cast(&[DataType::Boolean]).unwrap(); + let vals = as_bools.values[0].values.as_boolean().unwrap(); + assert_eq!(vals, &vec![Some(false), Some(true)]); + + let as_double = points.cast(&[DataType::Double]).unwrap(); + let vals = as_double.values[0].values.as_double().unwrap(); + assert_eq!(vals, &vec![Some(0.0), Some(10.0)]); + + let as_string = points.cast(&[DataType::String]).unwrap(); + let vals = as_string.values[0].values.as_string().unwrap(); + assert_eq!(vals, &vec![Some("0".to_string()), Some("10".to_string())]); + + for ty in [DataType::IntegerDistribution, DataType::DoubleDistribution] + { + assert!( + points.cast(&[ty]).is_err(), + "Should not be able to cast int array to distributions" + ); + } + assert!(points.cast(&[]).is_err(), "Should fail to cast with no types"); + assert!( + points.cast(&[DataType::Boolean, DataType::Boolean]).is_err(), + "Should fail to cast to the wrong number of types" + ); + } + + #[test] + fn test_cast_points_from_double() { + let points = Points { + start_times: None, + timestamps: timestamps(2), + values: vec![Values { + values: ValueArray::Double(vec![Some(0.0), Some(10.5)]), + metric_type: MetricType::Gauge, + }], + }; + + let as_same = points.cast(&[DataType::Double]).unwrap(); + let vals = as_same.values[0].values.as_double().unwrap(); + assert_eq!(vals, points.values[0].values.as_double().unwrap()); + + let as_bools = points.cast(&[DataType::Boolean]).unwrap(); + let vals = as_bools.values[0].values.as_boolean().unwrap(); + assert_eq!(vals, &vec![Some(false), Some(true)]); + + let as_ints = points.cast(&[DataType::Integer]).unwrap(); + let vals = as_ints.values[0].values.as_integer().unwrap(); + assert_eq!(vals, &vec![Some(0), Some(10)]); + + let as_string = points.cast(&[DataType::String]).unwrap(); + let vals = as_string.values[0].values.as_string().unwrap(); + assert_eq!( + vals, + &vec![Some("0".to_string()), Some("10.5".to_string())] + ); + + let points = Points { + start_times: None, + timestamps: timestamps(2), + values: vec![Values { + values: ValueArray::Double(vec![Some(0.0), Some(f64::MAX)]), + metric_type: MetricType::Gauge, + }], + }; + assert!( + points.cast(&[DataType::Integer]).is_err(), + "Should fail to cast out-of-range doubles to integer" + ); + + for ty in [DataType::IntegerDistribution, DataType::DoubleDistribution] + { + assert!( + points.cast(&[ty]).is_err(), + "Should not be able to cast double array to distributions" + ); + } + assert!(points.cast(&[]).is_err(), "Should fail to cast with no types"); + assert!( + points.cast(&[DataType::Boolean, DataType::Boolean]).is_err(), + "Should fail to cast to the wrong number of types" + ); + } + + #[test] + fn test_cast_points_from_string() { + fn make_points(strings: &[&str]) -> Points { + Points { + start_times: None, + timestamps: timestamps(strings.len()), + values: vec![Values { + values: ValueArray::String( + strings.iter().map(|&s| Some(s.into())).collect(), + ), + metric_type: MetricType::Gauge, + }], + } + } + + let points = make_points(&["some", "strings"]); + let as_same = points.cast(&[DataType::String]).unwrap(); + assert_eq!(as_same, points); + + // Any non-empty string is truthy, even "false". + let points = make_points(&["", "false", "true"]); + let as_bools = points.cast(&[DataType::Boolean]).unwrap(); + let vals = as_bools.values[0].values.as_boolean().unwrap(); + assert_eq!(vals, &vec![Some(false), Some(true), Some(true)]); + + // Conversion to integers happens by parsing. + let points = make_points(&["0", "1"]); + let as_ints = points.cast(&[DataType::Integer]).unwrap(); + let vals = as_ints.values[0].values.as_integer().unwrap(); + assert_eq!(vals, &vec![Some(0), Some(1)]); + for bad in ["1.0", "", "foo", "[]"] { + assert!( + make_points(&[bad]).cast(&[DataType::Integer]).is_err(), + "Should fail to cast non-int string '{}' to integers", + bad, + ); + } + + // Conversion to doubles happens by parsing. + let points = make_points(&["0", "1.1"]); + let as_doubles = points.cast(&[DataType::Double]).unwrap(); + let vals = as_doubles.values[0].values.as_double().unwrap(); + assert_eq!(vals, &vec![Some(0.0), Some(1.1)]); + for bad in ["", "foo", "[]"] { + assert!( + make_points(&[bad]).cast(&[DataType::Double]).is_err(), + "Should fail to cast non-double string '{}' to double", + bad, + ); + } + + // Checks for invalid casts + for ty in [DataType::IntegerDistribution, DataType::DoubleDistribution] + { + assert!( + points.cast(&[ty]).is_err(), + "Should not be able to cast double array to distributions" + ); + } + assert!(points.cast(&[]).is_err(), "Should fail to cast with no types"); + assert!( + points.cast(&[DataType::Boolean, DataType::Boolean]).is_err(), + "Should fail to cast to the wrong number of types" + ); + } + + #[test] + fn test_cast_points_from_int_distribution() { + // We can only "cast" to the same type here. + let points = Points { + start_times: None, + timestamps: timestamps(1), + values: vec![Values { + values: ValueArray::IntegerDistribution(vec![Some( + Distribution { bins: vec![0, 1, 2], counts: vec![0; 3] }, + )]), + metric_type: MetricType::Gauge, + }], + }; + let as_same = points.cast(&[DataType::IntegerDistribution]).unwrap(); + assert_eq!(points, as_same); + + for ty in [ + DataType::Boolean, + DataType::String, + DataType::Integer, + DataType::Double, + DataType::DoubleDistribution, + ] { + assert!( + points.cast(&[ty]).is_err(), + "Should not be able to cast distributions to anything other than itself" + ); + } + assert!(points.cast(&[]).is_err()); + assert!(points + .cast(&[ + DataType::IntegerDistribution, + DataType::IntegerDistribution + ]) + .is_err()); + } + + #[test] + fn test_cast_points_from_double_distribution() { + // We can only "cast" to the same type here. + let points = Points { + start_times: None, + timestamps: timestamps(1), + values: vec![Values { + values: ValueArray::DoubleDistribution(vec![Some( + Distribution { + bins: vec![0.0, 1.0, 2.0], + counts: vec![0; 3], + }, + )]), + metric_type: MetricType::Gauge, + }], + }; + let as_same = points.cast(&[DataType::DoubleDistribution]).unwrap(); + assert_eq!(points, as_same); + + for ty in [ + DataType::Boolean, + DataType::String, + DataType::Integer, + DataType::Double, + DataType::IntegerDistribution, + ] { + assert!( + points.cast(&[ty]).is_err(), + "Should not be able to cast distributions to anything other than itself" + ); + } + assert!(points.cast(&[]).is_err()); + assert!(points + .cast(&[DataType::DoubleDistribution, DataType::DoubleDistribution]) + .is_err()); + } +} diff --git a/oximeter/db/src/oxql/query/mod.rs b/oximeter/db/src/oxql/query/mod.rs new file mode 100644 index 0000000000..bb1c0986fe --- /dev/null +++ b/oximeter/db/src/oxql/query/mod.rs @@ -0,0 +1,837 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! A single OxQL query. + +// Copyright 2024 Oxide Computer Company + +use super::ast::ident::Ident; +use super::ast::logical_op::LogicalOp; +use super::ast::table_ops::filter::CompoundFilter; +use super::ast::table_ops::filter::FilterExpr; +use super::ast::table_ops::group_by::GroupBy; +use super::ast::table_ops::BasicTableOp; +use super::ast::table_ops::TableOp; +use super::ast::SplitQuery; +use crate::oxql::ast::grammar; +use crate::oxql::ast::table_ops::filter::Filter; +use crate::oxql::ast::Query as QueryNode; +use crate::oxql::fmt_parse_error; +use crate::oxql::Error; +use crate::TimeseriesName; +use chrono::DateTime; +use chrono::Utc; +use std::time::Duration; + +/// Special identifiers for column names or other widely-used values. +pub mod special_idents { + use oximeter::DatumType; + + pub const TIMESTAMP: &str = "timestamp"; + pub const START_TIME: &str = "start_time"; + pub const DATUM: &str = "datum"; + pub const BINS: &str = "bins"; + pub const COUNTS: &str = "counts"; + pub const DATETIME64: &str = "DateTime64"; + pub const ARRAYU64: &str = "Array[u64]"; + + pub fn array_type_name_from_histogram_type( + type_: DatumType, + ) -> Option { + if !type_.is_histogram() { + return None; + } + Some(format!( + "Array[{}]", + type_.to_string().strip_prefix("Histogram").unwrap().to_lowercase(), + )) + } +} + +/// A parsed OxQL query. +#[derive(Clone, Debug, PartialEq)] +pub struct Query { + pub(super) parsed: QueryNode, + pub(super) end_time: DateTime, +} + +impl Query { + /// Construct a query written in OxQL. + pub fn new(query: impl AsRef) -> Result { + let raw = query.as_ref().trim(); + const MAX_LEN: usize = 4096; + anyhow::ensure!( + raw.len() <= MAX_LEN, + "Queries must be <= {} characters", + MAX_LEN, + ); + let parsed = grammar::query_parser::query(raw) + .map_err(|e| fmt_parse_error(raw, e))?; + + // Fetch the latest query end time referred to in the parsed query, or + // use now if there isn't one. + let query_end_time = parsed.query_end_time().unwrap_or_else(Utc::now); + Ok(Self { parsed, end_time: query_end_time }) + } + + /// Return the end time of the query. + pub fn end_time(&self) -> &DateTime { + &self.end_time + } + + /// Return the next referenced timeseries name. + /// + /// Queries always start with either a single `get` operation, which refers + /// to one timeseries; or a subquery, each component of which is a query. So + /// it is always true that there is exactly one next timeseries name, since + /// that comes from the current query, or the next subquery. + pub fn timeseries_name(&self) -> &TimeseriesName { + self.parsed.timeseries_name() + } + + /// Return the transformation table ops, i.e., everything after the initial + /// get operation or subquery. + pub fn transformations(&self) -> &[TableOp] { + self.parsed.transformations() + } + + /// Return the set of all predicates in the query, coalesced. + /// + /// Query optimization is a large topic. There are few rules, and many + /// heuristics. However, one of those is extremely useful for our case: + /// predicate pushdown. This is where one moves predicates as close as + /// possible to the data, filtering out unused data as early as possible in + /// query processing. + /// + /// In our case, _currently_, we can implement this pretty easily. Filtering + /// operations can usually be coalesced into a single item. That means: + /// + /// - successive filtering operations are merged: `filter a | filter b -> + /// `filter (a) && (b)`. + /// - filtering operations are "pushed down", to just after the initial + /// `get` operation in the query. + /// + /// # Group by + /// + /// While filters can be combined and pushed down through many operations, + /// special care is taken for `group_by`. Specifically, the filter must only + /// name columns explicitly named in the `group_by`. If we pushed through + /// filters which named one of the columns _within_ the group (one not + /// named), then that would change the set of data in a group, and thus the + /// result. + /// + /// # Datum filters + /// + /// We currently only push down filters on the timestamps, and that is only + /// because we do _not_ support aggregations across time, only values. If + /// and when we do support that, then filters which reference time also + /// cannot be pushed down. + /// + /// # No predicates + /// + /// Note that this may return `None`, in the case where there are zero + /// predicates of any kind. + // + // Pushing filters through a group by. Consider the following data: + // + // a b timestamp datum + // 0 0 0 0 + // 0 0 1 1 + // 0 1 0 2 + // 0 1 1 3 + // 1 0 0 4 + // 1 0 1 5 + // 1 1 0 6 + // 1 1 1 7 + // + // So there are two groups for a and b columns each with two samples. + // + // Consider `get a:b | group_by [a] | filter a == 0`. + // + // After the group by, the result is: + // + // a timestamp datum + // 0 0 avg([0, 2]) -> 1 + // 0 1 avg([1, 3]) -> 2 + // 1 0 avg([4, 6]) -> 5 + // 1 1 avg([5, 7]) -> 6 + // + // Then after the filter, it becomes: + // + // a timestamp datum + // 0 0 avg([0, 2]) -> 1 + // 0 1 avg([1, 3]) -> 2 + // + // Now, let's do the filter first, as if we pushed that down. + // i.e., `get a:b | filter a == 0 | group_by [a]`. After the filter, we get: + // + // a b timestamp datum + // 0 0 0 0 + // 0 0 1 1 + // 0 1 0 2 + // 0 1 1 3 + // + // Then we apply the group by: + // + // a timestamp datum + // 0 0 avg([0, 2]) -> 1 + // 0 1 avg([1, 3]) -> 2 + // + // So we get the same result. Let's suppose we had a filter on the column + // `b` instead. Doing the group_by first, we get the exact same result as + // the first one above. Or we really get an error, because the resulting + // table does not have a `b` column. + // + // If instead we did the filter first, we'd get a different result. Starting + // from: + // + // a b timestamp datum + // 0 0 0 0 + // 0 0 1 1 + // 0 1 0 2 + // 0 1 1 3 + // 1 0 0 4 + // 1 0 1 5 + // 1 1 0 6 + // 1 1 1 7 + // + // Apply `filter b == 0`: + // + // + // a b timestamp datum + // 0 0 0 0 + // 0 0 1 1 + // 1 0 0 4 + // 1 0 1 5 + // + // Then apply group_by [a] + // + // a timestamp datum + // 0 0 avg([0, 1]) -> 0.5 + // 0 1 avg([4, 5]) -> 4.5 + // + // So we get something very different. + // + // What about filtering by timestamp? Starting from the raw data again: + // + // a b timestamp datum + // 0 0 0 0 + // 0 0 1 1 + // 0 1 0 2 + // 0 1 1 3 + // 1 0 0 4 + // 1 0 1 5 + // 1 1 0 6 + // 1 1 1 7 + // + // Let's add a `filter timestamp >= 1`. After the `group_by [a]`, we get: + // + // a timestamp datum + // 0 0 avg([0, 2]) -> 1 + // 0 1 avg([1, 3]) -> 2 + // 1 0 avg([4, 6]) -> 5 + // 1 1 avg([5, 7]) -> 6 + // + // Then after `filter timestamp >= 1`: + // + // a timestamp datum + // 0 1 avg([1, 3]) -> 2 + // 1 1 avg([5, 7]) -> 6 + // + // Now, filtering the timestamps first, after that we get: + // + // a b timestamp datum + // 0 0 1 1 + // 0 1 1 3 + // 1 0 1 5 + // 1 1 1 7 + // + // Then grouping: + // + // a timestamp datum + // 0 1 avg([1, 3]) -> 2 + // 1 1 avg([5, 7]) -> 6 + // + // So that also works fine. + pub(crate) fn coalesced_predicates( + &self, + mut outer: Option, + ) -> Option { + let maybe_filter = self.transformations().iter().rev().fold( + None, + |maybe_filter, next_tr| { + // Transformations only return basic ops, since all the + // subqueries must be at the prefix of the query. + let TableOp::Basic(op) = next_tr else { + unreachable!(); + }; + + match op { + BasicTableOp::GroupBy(GroupBy { identifiers, .. }) => { + // We may have been passed predicates from an outer + // query. Those also need to be restricted, if we're + // trying to push them through a group_by operation. + outer = outer.as_ref().and_then(|outer| { + restrict_filter_idents(outer, identifiers) + }); + + // Only push through columns referred to in the group by + // itself, which replaces the current filter. + maybe_filter.as_ref().and_then(|current| { + restrict_filter_idents(current, identifiers) + }) + } + BasicTableOp::Filter(filter) => { + // Merge with any existing filter. + if let Some(left) = maybe_filter { + Some(left.merge(&filter, LogicalOp::And)) + } else { + Some(filter.clone()) + } + } + _ => maybe_filter, + } + }, + ); + + // Merge in any predicates passed from an outer query, which may have + // been restricted as we moved through group_by operations. + match (outer, maybe_filter) { + (None, any) => any, + (Some(outer), None) => Some(outer), + (Some(outer), Some(inner)) => { + Some(outer.merge(&inner, LogicalOp::And)) + } + } + } + + pub(crate) fn split(&self) -> SplitQuery { + self.parsed.split(self.end_time) + } +} + +// Return a new filter containing only parts that refer to either: +// +// - a `timestamp` column +// - a column listed in `identifiers` +fn restrict_filter_idents( + current_filter: &Filter, + identifiers: &[Ident], +) -> Option { + match ¤t_filter.expr { + FilterExpr::Simple(inner) => { + let ident = inner.ident.as_str(); + if ident == "timestamp" + || identifiers.iter().map(Ident::as_str).any(|id| id == ident) + { + Some(current_filter.clone()) + } else { + None + } + } + FilterExpr::Compound(CompoundFilter { left, op, right }) => { + let maybe_left = restrict_filter_idents(left, identifiers); + let maybe_right = restrict_filter_idents(right, identifiers); + match (maybe_left, maybe_right) { + (Some(left), Some(right)) => Some(Filter { + negated: current_filter.negated, + expr: FilterExpr::Compound(CompoundFilter { + left: Box::new(left), + op: *op, + right: Box::new(right), + }), + }), + (Some(single), None) | (None, Some(single)) => Some(single), + (None, None) => None, + } + } + } +} + +/// Describes the time alignment for an OxQL query. +#[derive(Clone, Copy, Debug, PartialEq)] +pub struct Alignment { + /// The end time of the query, which the temporal reference point. + pub end_time: DateTime, + /// The alignment period, the interval on which values are produced. + pub period: Duration, +} + +#[cfg(test)] +mod tests { + use super::Filter; + use super::Ident; + use super::Query; + use crate::oxql::ast::cmp::Comparison; + use crate::oxql::ast::literal::Literal; + use crate::oxql::ast::logical_op::LogicalOp; + use crate::oxql::ast::table_ops::filter::CompoundFilter; + use crate::oxql::ast::table_ops::filter::FilterExpr; + use crate::oxql::ast::table_ops::filter::SimpleFilter; + use crate::oxql::ast::table_ops::join::Join; + use crate::oxql::ast::table_ops::BasicTableOp; + use crate::oxql::ast::table_ops::TableOp; + use crate::oxql::ast::SplitQuery; + use crate::oxql::query::restrict_filter_idents; + use chrono::NaiveDateTime; + use chrono::Utc; + use std::time::Duration; + + #[test] + fn test_restrict_filter_idents_single_atom() { + let ident = Ident("foo".into()); + let filter = Filter { + negated: false, + expr: FilterExpr::Simple(SimpleFilter { + ident: ident.clone(), + cmp: Comparison::Eq, + value: Literal::Boolean(false), + }), + }; + assert_eq!( + restrict_filter_idents(&filter, &[ident.clone()]).unwrap(), + filter + ); + assert_eq!(restrict_filter_idents(&filter, &[]), None); + } + + #[test] + fn test_restrict_filter_idents_single_atom_with_timestamp() { + let filter = Filter { + negated: false, + expr: FilterExpr::Simple(SimpleFilter { + ident: Ident("timestamp".into()), + cmp: Comparison::Eq, + value: Literal::Boolean(false), + }), + }; + assert_eq!(restrict_filter_idents(&filter, &[]).unwrap(), filter); + } + + #[test] + fn test_restrict_filter_idents_expr() { + let idents = [Ident("foo".into()), Ident("bar".into())]; + let left = Filter { + negated: false, + expr: FilterExpr::Simple(SimpleFilter { + ident: idents[0].clone(), + cmp: Comparison::Eq, + value: Literal::Boolean(false), + }), + }; + let right = Filter { + negated: false, + expr: FilterExpr::Simple(SimpleFilter { + ident: idents[1].clone(), + cmp: Comparison::Eq, + value: Literal::Boolean(false), + }), + }; + let filter = Filter { + negated: false, + expr: FilterExpr::Compound(CompoundFilter { + left: Box::new(left.clone()), + op: LogicalOp::And, + right: Box::new(right.clone()), + }), + }; + assert_eq!(restrict_filter_idents(&filter, &idents).unwrap(), filter); + + // This should remove the right filter. + assert_eq!( + restrict_filter_idents(&filter, &idents[..1]).unwrap(), + left + ); + + // And both + assert_eq!(restrict_filter_idents(&filter, &[]), None); + } + + #[test] + fn test_split_query() { + let q = Query::new("get a:b").unwrap(); + let split = q.split(); + assert_eq!(split, SplitQuery::Flat(q)); + + let q = Query::new("get a:b | filter x == 0").unwrap(); + let split = q.split(); + assert_eq!(split, SplitQuery::Flat(q)); + + let q = Query::new("{ get a:b } | join").unwrap(); + let split = q.split(); + let mut inner = Query::new("get a:b").unwrap(); + inner.end_time = q.end_time; + assert_eq!( + split, + SplitQuery::Nested { + subqueries: vec![inner], + transformations: vec![TableOp::Basic(BasicTableOp::Join(Join))], + } + ); + + let q = Query::new("{ get a:b | filter x == 0 } | join").unwrap(); + let split = q.split(); + let mut inner = Query::new("get a:b | filter x == 0").unwrap(); + inner.end_time = q.end_time; + assert_eq!( + split, + SplitQuery::Nested { + subqueries: vec![inner], + transformations: vec![TableOp::Basic(BasicTableOp::Join(Join))], + } + ); + + let q = Query::new("{ get a:b ; get a:b } | join").unwrap(); + let split = q.split(); + let mut inner = Query::new("get a:b").unwrap(); + inner.end_time = q.end_time; + assert_eq!( + split, + SplitQuery::Nested { + subqueries: vec![inner; 2], + transformations: vec![TableOp::Basic(BasicTableOp::Join(Join))], + } + ); + + let q = Query::new("{ { get a:b ; get a:b } | join } | join").unwrap(); + let split = q.split(); + let mut subqueries = + vec![Query::new("{ get a:b; get a:b } | join").unwrap()]; + subqueries[0].end_time = q.end_time; + let expected = SplitQuery::Nested { + subqueries: subqueries.clone(), + transformations: vec![TableOp::Basic(BasicTableOp::Join(Join))], + }; + assert_eq!(split, expected); + let split = subqueries[0].split(); + let mut inner = Query::new("get a:b").unwrap(); + inner.end_time = q.end_time; + assert_eq!( + split, + SplitQuery::Nested { + subqueries: vec![inner; 2], + transformations: vec![TableOp::Basic(BasicTableOp::Join(Join))], + } + ); + } + + #[test] + fn test_coalesce_predicates() { + // Passed through group-by unchanged. + let q = Query::new("get a:b | group_by [a] | filter a == 0").unwrap(); + let preds = Filter { + negated: false, + expr: FilterExpr::Simple(SimpleFilter { + ident: Ident("a".to_string()), + cmp: Comparison::Eq, + value: Literal::Integer(0), + }), + }; + assert_eq!(q.coalesced_predicates(None), Some(preds)); + + // Merge the first two, then pass through group by. + let q = Query::new( + "get a:b | group_by [a] | filter a == 0 | filter a == 0", + ) + .unwrap(); + let atom = Filter { + negated: false, + expr: FilterExpr::Simple(SimpleFilter { + ident: Ident("a".to_string()), + cmp: Comparison::Eq, + value: Literal::Integer(0), + }), + }; + let preds = Filter { + negated: false, + expr: FilterExpr::Compound(CompoundFilter { + left: Box::new(atom.clone()), + op: LogicalOp::And, + right: Box::new(atom.clone()), + }), + }; + assert_eq!(q.coalesced_predicates(None), Some(preds)); + + // These are also merged, even though they're on different sides of the + // group by. + let q = Query::new( + "get a:b | filter a == 0 | group_by [a] | filter a == 0", + ) + .unwrap(); + let atom = Filter { + negated: false, + expr: FilterExpr::Simple(SimpleFilter { + ident: Ident("a".to_string()), + cmp: Comparison::Eq, + value: Literal::Integer(0), + }), + }; + let preds = Filter { + negated: false, + expr: FilterExpr::Compound(CompoundFilter { + left: Box::new(atom.clone()), + op: LogicalOp::And, + right: Box::new(atom.clone()), + }), + }; + assert_eq!(q.coalesced_predicates(None), Some(preds)); + + // Second filter is _not_ passed through, because it refers to columns + // not in the group by. We have only the first filter. + let q = Query::new( + "get a:b | filter a == 0 | group_by [a] | filter b == 0", + ) + .unwrap(); + let preds = Filter { + negated: false, + expr: FilterExpr::Simple(SimpleFilter { + ident: Ident("a".to_string()), + cmp: Comparison::Eq, + value: Literal::Integer(0), + }), + }; + assert_eq!(q.coalesced_predicates(None), Some(preds)); + } + + #[test] + fn test_coalesce_predicates_into_subqueries() { + let q = "{ get a:b; get a:b } | join | filter foo == 'bar'"; + let query = Query::new(q).unwrap(); + let preds = query.coalesced_predicates(None).unwrap(); + let expected_predicate = Filter { + negated: false, + expr: FilterExpr::Simple(SimpleFilter { + ident: Ident("foo".to_string()), + cmp: Comparison::Eq, + value: Literal::String("bar".into()), + }), + }; + assert_eq!(preds, expected_predicate); + + // Split the query, which should give us a list of two subqueries, + // followed by the join and filter. + let SplitQuery::Nested { subqueries, .. } = query.split() else { + panic!(); + }; + for subq in subqueries.iter() { + let inner = subq + .coalesced_predicates(Some(expected_predicate.clone())) + .unwrap(); + assert_eq!( + inner, expected_predicate, + "Predicates passed into an inner subquery should be preserved" + ); + } + } + + #[test] + fn test_coalesce_predicates_into_subqueries_with_group_by() { + let q = "{ get a:b | group_by [baz]; get a:b | group_by [foo] } | \ + join | filter foo == 'bar'"; + let query = Query::new(q).unwrap(); + let preds = query.coalesced_predicates(None).unwrap(); + let expected_predicate = Filter { + negated: false, + expr: FilterExpr::Simple(SimpleFilter { + ident: Ident("foo".to_string()), + cmp: Comparison::Eq, + value: Literal::String("bar".into()), + }), + }; + assert_eq!(preds, expected_predicate); + + // Split the query, which should give us a list of two subqueries, + // followed by the join and filter. + let SplitQuery::Nested { subqueries, .. } = query.split() else { + panic!(); + }; + + // The first subquery groups by a field "baz", which isn't in the outer + // filter. It should have that outer predicate removed, and have no + // predicates at all. + let subq = &subqueries[0]; + assert!( + subq.coalesced_predicates(Some(expected_predicate.clone())) + .is_none(), + "Should not push an outer predicate into a subquery, when that \ + subquery includes a group_by that does not name a field in the \ + outer predicate" + ); + + // The second subquery should include the expected predicate, since the + // group_by includes the field named in the filter itself. + let subq = &subqueries[1]; + let inner = subq + .coalesced_predicates(Some(expected_predicate.clone())) + .unwrap(); + assert_eq!( + inner, expected_predicate, + "Predicates passed into an inner subquery should be preserved, \ + when that inner subquery includes a group_by that names the \ + ident in the outer filter" + ); + } + + #[test] + fn test_coalesce_predicates_merged_into_subqueries() { + let q = "{ get a:b | filter baz == 0; get a:b | filter baz == 0 } \ + | join | filter foo == 'bar'"; + let query = Query::new(q).unwrap(); + let preds = query.coalesced_predicates(None).unwrap(); + let expected_predicate = Filter { + negated: false, + expr: FilterExpr::Simple(SimpleFilter { + ident: Ident("foo".to_string()), + cmp: Comparison::Eq, + value: Literal::String("bar".into()), + }), + }; + assert_eq!(preds, expected_predicate); + let expected_inner_predicate = Filter { + negated: false, + expr: FilterExpr::Simple(SimpleFilter { + ident: Ident("baz".to_string()), + cmp: Comparison::Eq, + value: Literal::Integer(0), + }), + }; + + // Split the query, which should give us a list of two subqueries, + // followed by the join and filter. + let SplitQuery::Nested { subqueries, .. } = query.split() else { + panic!(); + }; + for subq in subqueries.iter() { + let inner = subq + .coalesced_predicates(Some(expected_predicate.clone())) + .unwrap(); + assert_eq!( + inner, + expected_predicate.merge(&expected_inner_predicate, LogicalOp::And), + "Predicates passed into an inner subquery should be preserved, \ + and merged with any subquery predicates", + ); + } + } + + #[test] + fn test_query_end_time() { + const MAX_DIFF: i64 = 1_000; + let q = Query::new("get a:b").unwrap(); + assert!( + (q.end_time - Utc::now()).num_nanoseconds().unwrap() < MAX_DIFF, + "Query which does not explicitly name an end time should \ + use now as the end time", + ); + + let q = Query::new("get a:b | filter timestamp > @now() - 1s").unwrap(); + assert!( + (q.end_time - Utc::now()).num_nanoseconds().unwrap() < MAX_DIFF, + "Query which does not explicitly name an end time should \ + use now as the end time", + ); + + let then = Utc::now() - Duration::from_secs(60); + let as_str = then.format("%Y-%m-%dT%H:%M:%S.%f"); + let q = Query::new(&format!("get a:b | filter timestamp < @{as_str}")) + .unwrap(); + assert_eq!( + q.end_time, then, + "Query with a less-than filter and a timestamp should \ + set the query end time" + ); + + let q = Query::new(&format!("get a:b | filter timestamp <= @{as_str}")) + .unwrap(); + assert_eq!( + q.end_time, then, + "Query with a less-than-or-equal filter and a timestamp should \ + set the query end time" + ); + + let q = Query::new(&format!("get a:b | filter timestamp > @{as_str}")) + .unwrap(); + assert!( + (q.end_time - Utc::now()).num_nanoseconds().unwrap() < MAX_DIFF, + "Query with a greater-than timestamp filter should not set an \ + explicit query end time, and so use now" + ); + + let q = Query::new("get a:b | filter timestamp > @now() - 1d").unwrap(); + assert!( + (q.end_time - Utc::now()).num_nanoseconds().unwrap() < MAX_DIFF, + "Query which does not explicitly name an end time should \ + use now as the end time", + ); + + let q = Query::new(&format!( + "get a:b | filter timestamp > @now() - 1d && timestamp < @{as_str}" + )) + .unwrap(); + assert_eq!( + q.end_time, + then, + "Query with a compound less-than-or-equal filter and a timestamp should \ + set the query end time" + ); + + let then = Utc::now() - Duration::from_secs(60); + let then_as_str = then.format("%Y-%m-%dT%H:%M:%S.%f"); + let even_earlier = then - Duration::from_secs(10); + let even_earlier_as_str = even_earlier.format("%Y-%m-%dT%H:%M:%S.%f"); + let q = Query::new(&format!( + "get a:b | filter timestamp < @{then_as_str} || timestamp < @{even_earlier_as_str}" + )) + .unwrap(); + assert_eq!( + q.end_time, + then, + "Query with two less-than timestamp filters should use the later timestamp" + ); + + let expected = NaiveDateTime::parse_from_str( + "2024-03-13T06:24:00", + "%Y-%m-%dT%H:%M:%S%.f", + ) + .unwrap() + .and_utc(); + let q = "{ \ + get physical_data_link:bytes_sent ; \ + get physical_data_link:bytes_received \ + } | filter timestamp > @2024-03-13T06:20:00 && timestamp < @2024-03-13T06:24:00"; + let query = Query::new(q).unwrap(); + assert_eq!(query.end_time, expected); + } + + #[test] + fn test_query_end_time_across_subqueries() { + let now = Utc::now(); + const FMT: &str = "%Y-%m-%dT%H:%M:%S.%f"; + let first = now - Duration::from_secs(1); + let second = now - Duration::from_secs_f64(1e-3); + let q = format!( + "{{ \ + get a:b | filter timestamp > @{}; \ + get a:b | filter timestamp > @{} \ + }}", + first.format(FMT), + second.format(FMT), + ); + let query = Query::new(q).unwrap(); + assert!( + query.end_time > second, + "This nested query should have used Utc::now() as the end time" + ); + let end_time = query.end_time; + let SplitQuery::Nested { subqueries, .. } = query.split() else { + unreachable!(); + }; + for subq in subqueries.iter() { + assert_eq!( + subq.end_time, end_time, + "All subqueries should have the same end time." + ); + } + } +} diff --git a/oximeter/db/src/oxql/table.rs b/oximeter/db/src/oxql/table.rs new file mode 100644 index 0000000000..025935090b --- /dev/null +++ b/oximeter/db/src/oxql/table.rs @@ -0,0 +1,293 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Definitions of timeseries and groups of them, a [`Table`]. + +// Copyright 2024 Oxide Computer Company + +use super::point::DataType; +use super::point::MetricType; +use super::point::Points; +use super::query::Alignment; +use super::Error; +use crate::TimeseriesKey; +use highway::HighwayHasher; +use oximeter::FieldValue; +use schemars::JsonSchema; +use serde::Deserialize; +use serde::Serialize; +use std::collections::btree_map::Entry; +use std::collections::BTreeMap; +use std::hash::Hash; +use std::hash::Hasher; + +/// A timeseries contains a timestamped set of values from one source. +/// +/// This includes the typed key-value pairs that uniquely identify it, and the +/// set of timestamps and data values from it. +#[derive(Clone, Debug, Deserialize, JsonSchema, Serialize)] +pub struct Timeseries { + pub fields: BTreeMap, + pub points: Points, + #[serde(skip)] + pub(crate) alignment: Option, +} + +impl Timeseries { + /// Construct a new timeseries, from its fields. + /// + /// It holds no points or type information. That will be enforced by the + /// points type as they are added. + pub fn new( + fields: impl Iterator, + data_type: DataType, + metric_type: MetricType, + ) -> Result { + let fields: BTreeMap<_, _> = fields.collect(); + anyhow::ensure!(!fields.is_empty(), "Fields cannot be empty"); + Ok(Self { + fields, + points: Points::empty(data_type, metric_type), + alignment: None, + }) + } + + pub fn key(&self) -> TimeseriesKey { + // NOTE: The key here is _not_ stable, like the one used in the database + // itself to identify timeseries. That's OK, however, because we do not + // serialize this value anywhere -- it's used entirely for the lifetime + // of one query, and then thrown away, and only needs to be consistent + // for that long. + let mut hasher = HighwayHasher::default(); + for (name, value) in self.fields.iter() { + name.hash(&mut hasher); + value.hash(&mut hasher); + } + hasher.finish() + } + + /// Return a copy of the timeseries, keeping only the provided fields. + /// + /// An error is returned if the timeseries does not contain those fields. + pub(crate) fn copy_with_fields( + &self, + kept_fields: &[&str], + ) -> Result { + let mut fields = BTreeMap::new(); + for field in kept_fields { + let Some(f) = self.fields.get(*field) else { + anyhow::bail!("Timeseries does not contain field '{}'", field); + }; + fields.insert(field.to_string(), f.clone()); + } + Ok(Self { + fields, + points: self.points.clone(), + alignment: self.alignment, + }) + } + + // Return `true` if the schema in `other` matches that of `self`. + fn matches_schema(&self, other: &Timeseries) -> bool { + if self.fields.len() != other.fields.len() { + return false; + } + for (f0, f1) in self.fields.iter().zip(other.fields.iter()) { + // Check the field names. + if f0.0 != f1.0 { + return false; + } + // And types. + if f0.1.field_type() != f1.1.field_type() { + return false; + } + } + + // And the type info is the same as well. + if !self + .points + .data_types() + .zip(other.points.data_types()) + .all(|(x, y)| x == y) + { + return false; + } + self.points + .metric_types() + .zip(other.points.metric_types()) + .all(|(x, y)| x == y) + } + + /// Return a new timeseries, with the points cast to the provided list of + /// data types. + /// + /// This returns an error if the points cannot be so cast, or the + /// dimensionality of the types requested differs from the dimensionality of + /// the points themselves. + pub(crate) fn cast(&self, types: &[DataType]) -> Result { + let fields = self.fields.clone(); + Ok(Self { + fields, + points: self.points.cast(types)?, + alignment: self.alignment, + }) + } +} + +/// A table represents one or more timeseries with the same schema. +/// +/// A table is the result of an OxQL query. It contains a name, usually the name +/// of the timeseries schema from which the data is derived, and any number of +/// timeseries, which contain the actual data. +#[derive(Clone, Debug, Deserialize, JsonSchema, Serialize)] +pub struct Table { + // The name of the table. + // + // This starts as the name of the timeseries schema the data is derived + // from, but can be modified as operations are done. + pub(super) name: String, + // The set of timeseries in the table, ordered by key. + timeseries: BTreeMap, +} + +impl Table { + /// Create a new table, with no timeseries. + pub fn new(name: impl AsRef) -> Self { + Self { name: name.as_ref().to_string(), timeseries: BTreeMap::new() } + } + + /// Create a table from a set of timeseries. + pub fn from_timeseries( + name: impl AsRef, + t: impl Iterator, + ) -> Result { + let mut out = Self::new(name); + for each in t { + out.insert(each)?; + } + Ok(out) + } + + /// Return the name of the table. + pub fn name(&self) -> &str { + self.name.as_str() + } + + /// Return the number of timeseries in this table. + pub fn n_timeseries(&self) -> usize { + self.timeseries.len() + } + + /// Return the list of timeseries in this table, ordered by key. + pub fn timeseries(&self) -> impl ExactSizeIterator { + self.timeseries.values() + } + + // Check that the schema of `other` matches `self`. + // + // That means the fields have the same names and types, and the timeseries + // have the same type info. + fn matches_schema(&self, other: &Timeseries) -> bool { + if let Some((_, first)) = self.timeseries.first_key_value() { + first.matches_schema(other) + } else { + // Table is empty. + true + } + } + + /// Get a timeseries matching the provided key, if any. + pub fn get_mut(&mut self, key: TimeseriesKey) -> Option<&mut Timeseries> { + self.timeseries.get_mut(&key) + } + + /// Insert a new timeseries into the table. + /// + /// If the timeseries already exists, an error is returned. Use + /// [`Table::replace()`] to replace an existing timeseries. + /// + /// It is an error if the timeseries does not have the same schema as the + /// others in the table (if any). + pub fn insert(&mut self, timeseries: Timeseries) -> Result<(), Error> { + anyhow::ensure!( + self.matches_schema(×eries), + "Timeseries in a table must have the same schema", + ); + let key = timeseries.key(); + let Entry::Vacant(e) = self.timeseries.entry(key) else { + return Err(anyhow::anyhow!( + "Timeseries with key {} already exists", + key, + )); + }; + e.insert(timeseries); + Ok(()) + } + + /// Replace a timeseries in the table. + pub fn replace(&mut self, timeseries: Timeseries) { + let key = timeseries.key(); + let _ = self.timeseries.insert(key, timeseries); + } + + /// Add multiple timeseries to the table. + /// + /// An error is returned if any timeseries already exist. + pub fn extend( + &mut self, + timeseries: impl Iterator, + ) -> Result<(), Error> { + for t in timeseries { + self.insert(t)?; + } + Ok(()) + } + + /// Return the number of timeseries in the table. + pub fn len(&self) -> usize { + self.timeseries.len() + } + + /// Return a mutable iterator over timeseries in the table. + pub fn iter_mut(&mut self) -> impl Iterator { + self.timeseries.values_mut() + } + + /// Return an iterator over timeseries in the table. + pub fn iter(&self) -> impl Iterator { + self.timeseries.values() + } + + /// Consume the table and return an iterator over its timeseries. + pub fn into_iter(self) -> impl Iterator { + self.timeseries.into_values() + } + + /// Return `true` if all the timeseries in this table are aligned, with the + /// same alignment information. + /// + /// If there are no timeseries, `false` is returned. + pub fn is_aligned(&self) -> bool { + let mut timeseries = self.timeseries.values(); + let Some(t) = timeseries.next() else { + return false; + }; + let Some(alignment) = t.alignment else { + return false; + }; + timeseries.all(|t| t.alignment == Some(alignment)) + } + + /// Return the alignment of this table, if all timeseries are aligned with + /// the same alignment. + pub fn alignment(&self) -> Option { + if self.is_aligned() { + Some( + self.timeseries.first_key_value().unwrap().1.alignment.unwrap(), + ) + } else { + None + } + } +} diff --git a/oximeter/db/src/query.rs b/oximeter/db/src/query.rs index 9212769573..e14dfbbc55 100644 --- a/oximeter/db/src/query.rs +++ b/oximeter/db/src/query.rs @@ -576,33 +576,32 @@ impl SelectQuery { match self.field_selectors.len() { 0 => None, n => { - // Select timeseries key for first column, plus field name and field value for - // all columns. - const SELECTED_COLUMNS: &[&str] = - &["field_name", "field_value"]; + // Select timeseries key for first column, plus the field value + // for all columns, aliased to the field name. const JOIN_COLUMNS: &[&str] = &["timeseries_name", "timeseries_key"]; - let mut top_level_columns = - Vec::with_capacity(1 + SELECTED_COLUMNS.len() * n); + let mut top_level_columns = Vec::with_capacity(2 + n); top_level_columns.push(String::from( "filter0.timeseries_key as timeseries_key", )); let mut from_statements = String::new(); - for (i, subquery) in self + for (i, (field_name, subquery)) in self .field_selectors - .values() - .map(|sel| { - sel.as_query(&self.timeseries_schema.timeseries_name) + .iter() + .map(|(field_schema, selector)| { + ( + &field_schema.name, + selector.as_query( + &self.timeseries_schema.timeseries_name, + ), + ) }) .enumerate() { - for column in SELECTED_COLUMNS { - top_level_columns.push(format!( - "filter{i}.{column}", - i = i, - column = column - )); - } + top_level_columns.push(format!( + "filter{}.field_value AS {}", + i, field_name, + )); if i == 0 { from_statements.push_str(&format!( @@ -1028,8 +1027,8 @@ mod tests { concat!( "SELECT ", "filter0.timeseries_key as timeseries_key, ", - "filter0.field_name, filter0.field_value, ", - "filter1.field_name, filter1.field_value ", + "filter0.field_value AS f0, ", + "filter1.field_value AS f1 ", "FROM (", "SELECT * FROM oximeter.fields_i64 ", "WHERE timeseries_name = 'foo:bar' ", @@ -1095,8 +1094,8 @@ mod tests { concat!( "SELECT ", "filter0.timeseries_key as timeseries_key, ", - "filter0.field_name, filter0.field_value, ", - "filter1.field_name, filter1.field_value ", + "filter0.field_value AS f0, ", + "filter1.field_value AS f1 ", "FROM (", "SELECT * FROM oximeter.fields_i64 ", "WHERE timeseries_name = 'foo:bar' AND field_name = 'f0' AND field_value = 0", @@ -1152,8 +1151,8 @@ mod tests { query.field_query().unwrap(), concat!( "SELECT filter0.timeseries_key as timeseries_key, ", - "filter0.field_name, filter0.field_value, ", - "filter1.field_name, filter1.field_value ", + "filter0.field_value AS f0, ", + "filter1.field_value AS f1 ", "FROM (", "SELECT * FROM oximeter.fields_i64 ", "WHERE timeseries_name = 'foo:bar' AND field_name = 'f0' AND field_value = 0", diff --git a/oximeter/db/src/sql/mod.rs b/oximeter/db/src/sql/mod.rs index 5d9685d19f..8a5bd20bde 100644 --- a/oximeter/db/src/sql/mod.rs +++ b/oximeter/db/src/sql/mod.rs @@ -32,6 +32,7 @@ use crate::query::measurement_table_name; use crate::DatumType; use crate::Error as OxdbError; use crate::FieldType; +use crate::QuerySummary; use crate::TimeseriesName; use crate::TimeseriesSchema; use indexmap::IndexSet; @@ -131,6 +132,31 @@ macro_rules! unsupported { }; } +/// A tabular result from a SQL query against a timeseries. +#[derive(Clone, Debug, Default, serde::Serialize)] +pub struct Table { + /// The name of each column in the result set. + pub column_names: Vec, + /// The rows of the result set, one per column. + pub rows: Vec>, +} + +/// The full result of running a SQL query against a timeseries. +#[derive(Clone, Debug)] +pub struct QueryResult { + /// The query as written by the client. + pub original_query: String, + /// The rewritten query, run against the JOINed representation of the + /// timeseries. + /// + /// This is the query that is actually run in the database itself. + pub rewritten_query: String, + /// Summary of the resource usage of the query. + pub summary: QuerySummary, + /// The result of the query, with column names and rows. + pub table: Table, +} + /// A helper type to preprocess any ClickHouse-specific SQL, and present a /// known-safe version of it to the main `sqlparser` code. /// diff --git a/oximeter/oximeter/src/types.rs b/oximeter/oximeter/src/types.rs index eff5c399e3..04289a7297 100644 --- a/oximeter/oximeter/src/types.rs +++ b/oximeter/oximeter/src/types.rs @@ -311,7 +311,7 @@ pub enum DatumType { impl DatumType { /// Return `true` if this datum type is cumulative, and `false` otherwise. - pub fn is_cumulative(&self) -> bool { + pub const fn is_cumulative(&self) -> bool { matches!( self, DatumType::CumulativeI64 @@ -331,9 +331,26 @@ impl DatumType { ) } + /// Return `true` if this datum type is a scalar, and `false` otherwise. + pub const fn is_scalar(&self) -> bool { + !self.is_histogram() + } + /// Return `true` if this datum type is a histogram, and `false` otherwise. pub const fn is_histogram(&self) -> bool { - matches!(self, DatumType::HistogramF64 | DatumType::HistogramI64) + matches!( + self, + DatumType::HistogramI8 + | DatumType::HistogramU8 + | DatumType::HistogramI16 + | DatumType::HistogramU16 + | DatumType::HistogramI32 + | DatumType::HistogramU32 + | DatumType::HistogramI64 + | DatumType::HistogramU64 + | DatumType::HistogramF32 + | DatumType::HistogramF64 + ) } } @@ -450,6 +467,11 @@ impl Datum { Datum::Missing(ref inner) => inner.start_time(), } } + + /// Return true if this datum is missing. + pub fn is_missing(&self) -> bool { + matches!(self, Datum::Missing(_)) + } } // Helper macro to generate `From` and `From<&T>` for the datum types. @@ -580,7 +602,7 @@ impl Measurement { /// Return true if this measurement represents a missing datum. pub fn is_missing(&self) -> bool { - matches!(self.datum, Datum::Missing(_)) + self.datum.is_missing() } /// Return the datum for this measurement diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index a2d853ac23..659b10c721 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -74,6 +74,7 @@ num-integer = { version = "0.1.46", features = ["i128"] } num-iter = { version = "0.1.44", default-features = false, features = ["i128"] } num-traits = { version = "0.2.18", features = ["i128", "libm"] } openapiv3 = { version = "2.0.0", default-features = false, features = ["skip_serializing_defaults"] } +peg-runtime = { version = "0.8.2", default-features = false, features = ["std"] } pem-rfc7468 = { version = "0.7.0", default-features = false, features = ["std"] } petgraph = { version = "0.6.4", features = ["serde-1"] } postgres-types = { version = "0.2.6", default-features = false, features = ["with-chrono-0_4", "with-serde_json-1", "with-uuid-1"] } @@ -179,6 +180,7 @@ num-integer = { version = "0.1.46", features = ["i128"] } num-iter = { version = "0.1.44", default-features = false, features = ["i128"] } num-traits = { version = "0.2.18", features = ["i128", "libm"] } openapiv3 = { version = "2.0.0", default-features = false, features = ["skip_serializing_defaults"] } +peg-runtime = { version = "0.8.2", default-features = false, features = ["std"] } pem-rfc7468 = { version = "0.7.0", default-features = false, features = ["std"] } petgraph = { version = "0.6.4", features = ["serde-1"] } postgres-types = { version = "0.2.6", default-features = false, features = ["with-chrono-0_4", "with-serde_json-1", "with-uuid-1"] } From e961b0b9bad337750d08125bae74fccacba9fcd5 Mon Sep 17 00:00:00 2001 From: "oxide-reflector-bot[bot]" <130185838+oxide-reflector-bot[bot]@users.noreply.github.com> Date: Sat, 30 Mar 2024 15:37:51 -0700 Subject: [PATCH 019/334] Update dendrite to 8646e58 (#5362) Updated dendrite to commit 8646e58. Co-authored-by: reflector[bot] <130185838+reflector[bot]@users.noreply.github.com> --- package-manifest.toml | 12 ++++++------ tools/dendrite_openapi_version | 2 +- tools/dendrite_stub_checksums | 4 ++-- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/package-manifest.toml b/package-manifest.toml index 2d9d272525..806156b7ed 100644 --- a/package-manifest.toml +++ b/package-manifest.toml @@ -595,8 +595,8 @@ only_for_targets.image = "standard" # 2. Copy dendrite.tar.gz from dendrite/out to omicron/out source.type = "prebuilt" source.repo = "dendrite" -source.commit = "eeb194461a4b863dae25a933541b1a4fb8efe14d" -source.sha256 = "79c32441d7d5328a06e92e7d9c410805e9d8be9d78d59ce1ed6c3e0fba93198d" +source.commit = "8646e588a564ebf74da6fd0e854bcfe74be43690" +source.sha256 = "fda1842bc4c45af98771fa8cfb8c5cc54ca0759c754d7e5f41504390f65e43f3" output.type = "zone" output.intermediate_only = true @@ -620,8 +620,8 @@ only_for_targets.image = "standard" # 2. Copy the output zone image from dendrite/out to omicron/out source.type = "prebuilt" source.repo = "dendrite" -source.commit = "eeb194461a4b863dae25a933541b1a4fb8efe14d" -source.sha256 = "4e77c3aea01be77be440bf30a7f6d48fbeb97b3ecbc72c431eeeca217356d487" +source.commit = "8646e588a564ebf74da6fd0e854bcfe74be43690" +source.sha256 = "809c4052400ea385b4c98ff1fbf54c34f184f12b098444663e3d386b4cb0dc6c" output.type = "zone" output.intermediate_only = true @@ -638,8 +638,8 @@ only_for_targets.image = "standard" # 2. Copy dendrite.tar.gz from dendrite/out to omicron/out/dendrite-softnpu.tar.gz source.type = "prebuilt" source.repo = "dendrite" -source.commit = "eeb194461a4b863dae25a933541b1a4fb8efe14d" -source.sha256 = "89c9212991656d8aee799c30a6bb63105a6a45e45b396f6dd56d43cf4c294e11" +source.commit = "8646e588a564ebf74da6fd0e854bcfe74be43690" +source.sha256 = "80bd4d2af6ef764ca4abe87d7f26559c4bf27079b0289fa751f7ca80b2edb385" output.type = "zone" output.intermediate_only = true diff --git a/tools/dendrite_openapi_version b/tools/dendrite_openapi_version index 2d459e6c3a..b7a2bfa4d4 100644 --- a/tools/dendrite_openapi_version +++ b/tools/dendrite_openapi_version @@ -1,2 +1,2 @@ -COMMIT="eeb194461a4b863dae25a933541b1a4fb8efe14d" +COMMIT="8646e588a564ebf74da6fd0e854bcfe74be43690" SHA2="50eff6d9f986b7b1af5970d11d8d01b812de37269731c6c691a244b3fdae82ae" diff --git a/tools/dendrite_stub_checksums b/tools/dendrite_stub_checksums index 8bae48c3aa..86f6a774bb 100644 --- a/tools/dendrite_stub_checksums +++ b/tools/dendrite_stub_checksums @@ -1,3 +1,3 @@ -CIDL_SHA256_ILLUMOS="79c32441d7d5328a06e92e7d9c410805e9d8be9d78d59ce1ed6c3e0fba93198d" -CIDL_SHA256_LINUX_DPD="c56a5754996bdce4cf4142829a80f050563c5cab8c30a05b9e56b8d85723d0f5" +CIDL_SHA256_ILLUMOS="fda1842bc4c45af98771fa8cfb8c5cc54ca0759c754d7e5f41504390f65e43f3" +CIDL_SHA256_LINUX_DPD="cb84fb7b2ba9cedaee5a09d400c88315ef30f7826610c1acb1ad6f07fa672b0a" CIDL_SHA256_LINUX_SWADM="54042fb53e304bfade94ea7ca1b41c62c86bf48c32ca355b2c09dd6067ccb53b" From f2ea80014b390575be74a8b4e2b1d0d714feee0b Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Mon, 1 Apr 2024 10:19:46 -0400 Subject: [PATCH 020/334] [Reconfigurator] Introduce `PlanningInput` that includes external networking information from CRDB (#5344) The primary change in this PR is that the blueprint planner now wants a `PlanningInput` (which contains a `Policy`) instead of a `Policy`. The bulk of the diff is adding new `DataStore` methods (and tests for them) that fetch all currently-allocated external IPs and NICs for services. Some incidental changes that came along for the ride that I hope are not controversial, but could be backed out if they are: * `nexus_db_model::NetworkInterface::slot` is now a `SqlU8` instead of an `i16`. I didn't have to change the queries here, so I think they're still converting this to an `i16`, which is probably okay? I could make a pass on them if needed though. * I added an `omicron_uuid_kinds::ServiceKind` and started using it in this PR. I did not attempt to make a pass through all service UUIDs to start using this; I think this can be done incrementally? Other notes: * I'm not sure about the name `PlanningInput`. It feels vague; isn't every argument to the planner a kind of "planning input"? But I'm not sure what else to call "`Policy` plus extra CRDB state". * This does not change execution at all. It's possible when I get to that there will need to be some changes here, but I think this is probably close enough that it can be reviewed, and any changes will be small and can be rolled into the execution work. --- Cargo.lock | 4 + dev-tools/omdb/src/bin/omdb/db.rs | 4 +- dev-tools/reconfigurator-cli/Cargo.toml | 1 + dev-tools/reconfigurator-cli/src/main.rs | 67 +++++- nexus/db-model/src/external_ip.rs | 6 + nexus/db-model/src/network_interface.rs | 25 ++- .../db-queries/src/db/datastore/deployment.rs | 55 +++-- .../src/db/datastore/external_ip.rs | 175 ++++++++++++++- .../src/db/datastore/network_interface.rs | 205 ++++++++++++++++++ .../db-queries/src/db/queries/external_ip.rs | 46 ++-- .../src/db/queries/network_interface.rs | 7 +- nexus/reconfigurator/execution/src/dns.rs | 14 +- .../execution/src/resource_allocation.rs | 32 ++- nexus/reconfigurator/planning/Cargo.toml | 1 + .../planning/src/blueprint_builder.rs | 71 +++--- nexus/reconfigurator/planning/src/example.rs | 54 ++++- nexus/reconfigurator/planning/src/planner.rs | 87 ++++---- nexus/src/app/deployment.rs | 58 ++++- nexus/types/Cargo.toml | 2 + nexus/types/src/deployment.rs | 46 ++++ uuid-kinds/src/lib.rs | 1 + 21 files changed, 793 insertions(+), 168 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 4548d0a3d7..3959eef5e3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4827,6 +4827,7 @@ dependencies = [ "nexus-types", "omicron-common", "omicron-test-utils", + "omicron-uuid-kinds", "omicron-workspace-hack", "rand 0.8.5", "sled-agent-client", @@ -4928,6 +4929,8 @@ dependencies = [ "futures", "gateway-client", "humantime", + "ipnetwork", + "newtype-uuid", "omicron-common", "omicron-passwords", "omicron-uuid-kinds", @@ -7482,6 +7485,7 @@ dependencies = [ "omicron-nexus", "omicron-rpaths", "omicron-test-utils", + "omicron-uuid-kinds", "omicron-workspace-hack", "pq-sys", "reedline", diff --git a/dev-tools/omdb/src/bin/omdb/db.rs b/dev-tools/omdb/src/bin/omdb/db.rs index 30473fccd4..5d9cb594ca 100644 --- a/dev-tools/omdb/src/bin/omdb/db.rs +++ b/dev-tools/omdb/src/bin/omdb/db.rs @@ -2120,7 +2120,7 @@ async fn cmd_db_network_list_vnics( struct NicRow { ip: IpNetwork, mac: MacAddr, - slot: i16, + slot: u8, primary: bool, kind: &'static str, subnet: String, @@ -2241,7 +2241,7 @@ async fn cmd_db_network_list_vnics( let row = NicRow { ip: nic.ip, mac: *nic.mac, - slot: nic.slot, + slot: *nic.slot, primary: nic.primary, kind, subnet, diff --git a/dev-tools/reconfigurator-cli/Cargo.toml b/dev-tools/reconfigurator-cli/Cargo.toml index cae07ec9b6..ad3cdf61f1 100644 --- a/dev-tools/reconfigurator-cli/Cargo.toml +++ b/dev-tools/reconfigurator-cli/Cargo.toml @@ -20,6 +20,7 @@ nexus-reconfigurator-planning.workspace = true nexus-reconfigurator-execution.workspace = true nexus-types.workspace = true omicron-common.workspace = true +omicron-uuid-kinds.workspace = true # See omicron-rpaths for more about the "pq-sys" dependency. pq-sys = "*" reedline.workspace = true diff --git a/dev-tools/reconfigurator-cli/src/main.rs b/dev-tools/reconfigurator-cli/src/main.rs index cef5c3c63f..08755a4537 100644 --- a/dev-tools/reconfigurator-cli/src/main.rs +++ b/dev-tools/reconfigurator-cli/src/main.rs @@ -20,6 +20,9 @@ use nexus_reconfigurator_planning::planner::Planner; use nexus_reconfigurator_planning::system::{ SledBuilder, SledHwInventory, SystemDescription, }; +use nexus_types::deployment::ExternalIp; +use nexus_types::deployment::PlanningInput; +use nexus_types::deployment::ServiceNetworkInterface; use nexus_types::deployment::{Blueprint, UnstableReconfiguratorState}; use nexus_types::internal_api::params::DnsConfigParams; use nexus_types::inventory::Collection; @@ -27,9 +30,12 @@ use nexus_types::inventory::OmicronZonesConfig; use nexus_types::inventory::SledRole; use omicron_common::api::external::Generation; use omicron_common::api::external::Name; +use omicron_uuid_kinds::{GenericUuid, OmicronZoneKind, TypedUuid}; use reedline::{Reedline, Signal}; +use std::cell::RefCell; use std::collections::BTreeMap; use std::io::BufRead; +use std::net::IpAddr; use swrite::{swriteln, SWrite}; use tabled::Tabled; use uuid::Uuid; @@ -50,6 +56,14 @@ struct ReconfiguratorSim { /// blueprints created by the user blueprints: IndexMap, + /// external IPs allocated to services + /// + /// In the real system, external IPs have IDs, but those IDs only live in + /// CRDB - they're not part of the zone config sent from Reconfigurator to + /// sled-agent. This mimics the minimal bit of the CRDB `external_ip` table + /// we need. + external_ips: RefCell>, + /// internal DNS configurations internal_dns: BTreeMap, /// external DNS configurations @@ -92,6 +106,49 @@ impl ReconfiguratorSim { let _ = entry.or_insert(blueprint); Ok(()) } + + fn planning_input( + &self, + parent_blueprint: &Blueprint, + ) -> anyhow::Result { + let policy = self.system.to_policy().context("generating policy")?; + let service_external_ips = parent_blueprint + .all_omicron_zones() + .filter_map(|(_, zone)| { + let Ok(Some(ip)) = zone.zone_type.external_ip() else { + return None; + }; + let service_id = + TypedUuid::::from_untyped_uuid(zone.id); + let external_ip = ExternalIp { + id: *self + .external_ips + .borrow_mut() + .entry(ip) + .or_insert_with(Uuid::new_v4), + ip: ip.into(), + }; + Some((service_id, external_ip)) + }) + .collect(); + let service_nics = parent_blueprint + .all_omicron_zones() + .filter_map(|(_, zone)| { + let nic = zone.zone_type.service_vnic()?; + let service_id = + TypedUuid::::from_untyped_uuid(zone.id); + let nic = ServiceNetworkInterface { + id: nic.id, + mac: nic.mac, + ip: nic.ip.into(), + slot: nic.slot, + primary: nic.primary, + }; + Some((service_id, nic)) + }) + .collect(); + Ok(PlanningInput { policy, service_external_ips, service_nics }) + } } /// interactive REPL for exploring the planner @@ -115,6 +172,7 @@ fn main() -> anyhow::Result<()> { system: SystemDescription::new(), collections: IndexMap::new(), blueprints: IndexMap::new(), + external_ips: RefCell::new(IndexMap::new()), internal_dns: BTreeMap::new(), external_dns: BTreeMap::new(), log, @@ -655,9 +713,8 @@ fn cmd_blueprint_plan( .collections .get(&collection_id) .ok_or_else(|| anyhow!("no such collection: {}", collection_id))?; - let policy = sim.system.to_policy().context("generating policy")?; let creator = "reconfigurator-sim"; - + let planning_input = sim.planning_input(parent_blueprint)?; let planner = Planner::new_based_on( sim.log.clone(), parent_blueprint, @@ -688,7 +745,7 @@ fn cmd_blueprint_plan( // matter, either. We'll just pick the parent blueprint's. parent_blueprint.internal_dns_version, parent_blueprint.external_dns_version, - &policy, + &planning_input, creator, collection, ) @@ -709,13 +766,13 @@ fn cmd_blueprint_edit( let blueprint_id = args.blueprint_id; let blueprint = sim.blueprint_lookup(blueprint_id)?; let creator = args.creator.as_deref().unwrap_or("reconfigurator-cli"); - let policy = sim.system.to_policy().context("assembling policy")?; + let planning_input = sim.planning_input(blueprint)?; let mut builder = BlueprintBuilder::new_based_on( &sim.log, &blueprint, blueprint.internal_dns_version, blueprint.external_dns_version, - &policy, + &planning_input, creator, ) .context("creating blueprint builder")?; diff --git a/nexus/db-model/src/external_ip.rs b/nexus/db-model/src/external_ip.rs index 337e7ef2a7..f290fdcd0f 100644 --- a/nexus/db-model/src/external_ip.rs +++ b/nexus/db-model/src/external_ip.rs @@ -130,6 +130,12 @@ pub struct ExternalIp { pub is_probe: bool, } +impl From for nexus_types::deployment::ExternalIp { + fn from(ext_ip: ExternalIp) -> Self { + Self { id: ext_ip.id, ip: ext_ip.ip } + } +} + /// A view type constructed from `ExternalIp` used to represent Floating IP /// objects in user-facing APIs. /// diff --git a/nexus/db-model/src/network_interface.rs b/nexus/db-model/src/network_interface.rs index fdcfcbf588..a632772043 100644 --- a/nexus/db-model/src/network_interface.rs +++ b/nexus/db-model/src/network_interface.rs @@ -8,6 +8,7 @@ use crate::schema::instance_network_interface; use crate::schema::network_interface; use crate::schema::service_network_interface; use crate::Name; +use crate::SqlU8; use chrono::DateTime; use chrono::Utc; use db_macros::Resource; @@ -59,7 +60,7 @@ pub struct NetworkInterface { // If neither is specified, auto-assign one of each? pub ip: ipnetwork::IpNetwork, - pub slot: i16, + pub slot: SqlU8, #[diesel(column_name = is_primary)] pub primary: bool, } @@ -91,10 +92,10 @@ impl NetworkInterface { name: self.name().clone(), ip: self.ip.ip(), mac: self.mac.into(), - subnet: subnet, + subnet, vni: external::Vni::try_from(0).unwrap(), primary: self.primary, - slot: self.slot.try_into().unwrap(), + slot: *self.slot, } } } @@ -117,7 +118,7 @@ pub struct InstanceNetworkInterface { pub mac: MacAddr, pub ip: ipnetwork::IpNetwork, - pub slot: i16, + pub slot: SqlU8, #[diesel(column_name = is_primary)] pub primary: bool, } @@ -140,11 +141,25 @@ pub struct ServiceNetworkInterface { pub mac: MacAddr, pub ip: ipnetwork::IpNetwork, - pub slot: i16, + pub slot: SqlU8, #[diesel(column_name = is_primary)] pub primary: bool, } +impl From + for nexus_types::deployment::ServiceNetworkInterface +{ + fn from(nic: ServiceNetworkInterface) -> Self { + Self { + id: nic.id(), + mac: *nic.mac, + ip: nic.ip, + slot: *nic.slot, + primary: nic.primary, + } + } +} + impl NetworkInterface { /// Treat this `NetworkInterface` as an `InstanceNetworkInterface`. /// diff --git a/nexus/db-queries/src/db/datastore/deployment.rs b/nexus/db-queries/src/db/datastore/deployment.rs index 8f6b9abf58..b04dc9a03d 100644 --- a/nexus/db-queries/src/db/datastore/deployment.rs +++ b/nexus/db-queries/src/db/datastore/deployment.rs @@ -1182,6 +1182,7 @@ mod tests { use nexus_reconfigurator_planning::blueprint_builder::BlueprintBuilder; use nexus_reconfigurator_planning::blueprint_builder::Ensure; use nexus_test_utils::db::test_setup_database; + use nexus_types::deployment::PlanningInput; use nexus_types::deployment::Policy; use nexus_types::deployment::SledResources; use nexus_types::external_api::views::SledPolicy; @@ -1196,10 +1197,14 @@ mod tests { use std::mem; use std::net::Ipv6Addr; - static EMPTY_POLICY: Policy = Policy { - sleds: BTreeMap::new(), - service_ip_pool_ranges: Vec::new(), - target_nexus_zone_count: 0, + static EMPTY_PLANNING_INPUT: PlanningInput = PlanningInput { + policy: Policy { + sleds: BTreeMap::new(), + service_ip_pool_ranges: Vec::new(), + target_nexus_zone_count: 0, + }, + service_external_ips: BTreeMap::new(), + service_nics: BTreeMap::new(), }; // This is a not-super-future-maintainer-friendly helper to check that all @@ -1283,7 +1288,7 @@ mod tests { } } - fn representative() -> (Collection, Policy, Blueprint) { + fn representative() -> (Collection, PlanningInput, Blueprint) { // We'll start with a representative collection... let mut collection = nexus_inventory::examples::representative().builder.build(); @@ -1307,16 +1312,21 @@ mod tests { } let policy = policy_from_collection(&collection); + let planning_input = PlanningInput { + policy, + service_external_ips: BTreeMap::new(), + service_nics: BTreeMap::new(), + }; let blueprint = BlueprintBuilder::build_initial_from_collection( &collection, Generation::new(), Generation::new(), - &policy, + &planning_input.policy, "test", ) .unwrap(); - (collection, policy, blueprint) + (collection, planning_input, blueprint) } async fn blueprint_list_all_ids( @@ -1346,7 +1356,7 @@ mod tests { &collection, Generation::new(), Generation::new(), - &EMPTY_POLICY, + &EMPTY_PLANNING_INPUT.policy, "test", ) .unwrap(); @@ -1402,7 +1412,7 @@ mod tests { let (opctx, datastore) = datastore_test(&logctx, &db).await; // Create a cohesive representative collection/policy/blueprint - let (collection, mut policy, blueprint1) = representative(); + let (collection, mut planning_input, blueprint1) = representative(); let authz_blueprint1 = authz_blueprint_from_id(blueprint1.id); // Write it to the database and read it back. @@ -1421,7 +1431,10 @@ mod tests { ); // Check the number of blueprint elements against our collection. - assert_eq!(blueprint1.blueprint_zones.len(), policy.sleds.len()); + assert_eq!( + blueprint1.blueprint_zones.len(), + planning_input.policy.sleds.len() + ); assert_eq!( blueprint1.blueprint_zones.len(), collection.omicron_zones.len() @@ -1463,8 +1476,12 @@ mod tests { // Add a new sled to `policy`. let new_sled_id = Uuid::new_v4(); - policy.sleds.insert(new_sled_id, fake_sled_resources(None)); - let new_sled_zpools = &policy.sleds.get(&new_sled_id).unwrap().zpools; + planning_input + .policy + .sleds + .insert(new_sled_id, fake_sled_resources(None)); + let new_sled_zpools = + &planning_input.policy.sleds.get(&new_sled_id).unwrap().zpools; // Create a builder for a child blueprint. While we're at it, use a // different DNS version to test that that works. @@ -1475,7 +1492,7 @@ mod tests { &blueprint1, new_internal_dns_version, new_external_dns_version, - &policy, + &planning_input, "test", ) .expect("failed to create builder"); @@ -1621,7 +1638,7 @@ mod tests { &collection, Generation::new(), Generation::new(), - &EMPTY_POLICY, + &EMPTY_PLANNING_INPUT.policy, "test1", ) .unwrap(); @@ -1630,7 +1647,7 @@ mod tests { &blueprint1, Generation::new(), Generation::new(), - &EMPTY_POLICY, + &EMPTY_PLANNING_INPUT, "test2", ) .expect("failed to create builder") @@ -1640,7 +1657,7 @@ mod tests { &blueprint1, Generation::new(), Generation::new(), - &EMPTY_POLICY, + &EMPTY_PLANNING_INPUT, "test3", ) .expect("failed to create builder") @@ -1738,7 +1755,7 @@ mod tests { &blueprint3, Generation::new(), Generation::new(), - &EMPTY_POLICY, + &EMPTY_PLANNING_INPUT, "test3", ) .expect("failed to create builder") @@ -1778,7 +1795,7 @@ mod tests { &collection, Generation::new(), Generation::new(), - &EMPTY_POLICY, + &EMPTY_PLANNING_INPUT.policy, "test1", ) .unwrap(); @@ -1787,7 +1804,7 @@ mod tests { &blueprint1, Generation::new(), Generation::new(), - &EMPTY_POLICY, + &EMPTY_PLANNING_INPUT, "test2", ) .expect("failed to create builder") diff --git a/nexus/db-queries/src/db/datastore/external_ip.rs b/nexus/db-queries/src/db/datastore/external_ip.rs index 017d2f22d2..cc5ddc50d5 100644 --- a/nexus/db-queries/src/db/datastore/external_ip.rs +++ b/nexus/db-queries/src/db/datastore/external_ip.rs @@ -5,6 +5,7 @@ //! [`DataStore`] methods on [`ExternalIp`]s. use super::DataStore; +use super::SQL_BATCH_SIZE; use crate::authz; use crate::authz::ApiResource; use crate::context::OpContext; @@ -24,6 +25,7 @@ use crate::db::model::IncompleteExternalIp; use crate::db::model::IpKind; use crate::db::model::Name; use crate::db::pagination::paginated; +use crate::db::pagination::Paginator; use crate::db::pool::DbConnection; use crate::db::queries::external_ip::NextExternalIp; use crate::db::queries::external_ip::MAX_EXTERNAL_IPS_PER_INSTANCE; @@ -41,6 +43,7 @@ use nexus_db_model::IpAttachState; use nexus_types::identity::Resource; use omicron_common::api::external::http_pagination::PaginatedBy; use omicron_common::api::external::CreateResult; +use omicron_common::api::external::DataPageParams; use omicron_common::api::external::DeleteResult; use omicron_common::api::external::Error; use omicron_common::api::external::IdentityMetadataCreateParams; @@ -206,7 +209,7 @@ impl DataStore { } /// Fetch all external IP addresses of any kind for the provided service. - pub async fn service_lookup_external_ips( + pub async fn external_ip_list_service( &self, opctx: &OpContext, service_id: Uuid, @@ -223,7 +226,7 @@ impl DataStore { } /// Allocates an IP address for internal service usage. - pub async fn allocate_service_ip( + pub async fn external_ip_allocate_service( &self, opctx: &OpContext, ip_id: Uuid, @@ -244,7 +247,7 @@ impl DataStore { } /// Allocates an SNAT IP address for internal service usage. - pub async fn allocate_service_snat_ip( + pub async fn external_ip_allocate_service_snat( &self, opctx: &OpContext, ip_id: Uuid, @@ -384,7 +387,7 @@ impl DataStore { /// /// Unlike the other IP allocation requests, this does not search for an /// available IP address, it asks for one explicitly. - pub async fn allocate_explicit_service_ip( + pub async fn external_ip_allocate_service_explicit( &self, opctx: &OpContext, ip_id: Uuid, @@ -410,7 +413,7 @@ impl DataStore { /// /// Unlike the other IP allocation requests, this does not search for an /// available IP address, it asks for one explicitly. - pub async fn allocate_explicit_service_snat_ip( + pub async fn external_ip_allocate_service_explicit_snat( &self, opctx: &OpContext, ip_id: Uuid, @@ -430,6 +433,50 @@ impl DataStore { self.allocate_external_ip(opctx, data).await } + /// List one page of all external IPs allocated to internal services + pub async fn external_ip_list_service_all( + &self, + opctx: &OpContext, + pagparams: &DataPageParams<'_, Uuid>, + ) -> ListResultVec { + use db::schema::external_ip::dsl; + + let (authz_pool, _pool) = self.ip_pools_service_lookup(opctx).await?; + opctx.authorize(authz::Action::ListChildren, &authz_pool).await?; + + paginated(dsl::external_ip, dsl::id, pagparams) + .filter(dsl::is_service) + .filter(dsl::time_deleted.is_null()) + .select(ExternalIp::as_select()) + .get_results_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + } + + /// List all external IPs allocated to internal services, making as many + /// queries as needed to get them all + /// + /// This should generally not be used in API handlers or other + /// latency-sensitive contexts, but it can make sense in saga actions or + /// background tasks. + pub async fn external_ip_list_service_all_batched( + &self, + opctx: &OpContext, + ) -> ListResultVec { + opctx.check_complex_operations_allowed()?; + + let mut all_ips = Vec::new(); + let mut paginator = Paginator::new(SQL_BATCH_SIZE); + while let Some(p) = paginator.next() { + let batch = self + .external_ip_list_service_all(opctx, &p.current_pagparams()) + .await?; + paginator = p.found_batch(&batch, &|ip: &ExternalIp| ip.id); + all_ips.extend(batch); + } + Ok(all_ips) + } + /// Attempt to move a target external IP from detached to attaching, /// checking that its parent instance does not have too many addresses /// and is in a valid state. @@ -1163,3 +1210,121 @@ impl DataStore { .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) } } + +#[cfg(test)] +mod tests { + use super::*; + use crate::db::datastore::test_utils::datastore_test; + use nexus_test_utils::db::test_setup_database; + use nexus_types::external_api::shared::IpRange; + use omicron_common::address::NUM_SOURCE_NAT_PORTS; + use omicron_test_utils::dev; + use std::collections::BTreeSet; + use std::net::Ipv4Addr; + + async fn read_all_service_ips( + datastore: &DataStore, + opctx: &OpContext, + ) -> Vec { + let all_batched = datastore + .external_ip_list_service_all_batched(opctx) + .await + .expect("failed to fetch all service IPs batched"); + let all_paginated = datastore + .external_ip_list_service_all(opctx, &DataPageParams::max_page()) + .await + .expect("failed to fetch all service IPs paginated"); + assert_eq!(all_batched, all_paginated); + all_batched + } + + #[tokio::test] + async fn test_service_ip_list() { + usdt::register_probes().unwrap(); + let logctx = dev::test_setup_log("test_service_ip_list"); + let mut db = test_setup_database(&logctx.log).await; + let (opctx, datastore) = datastore_test(&logctx, &db).await; + + // No IPs, to start + let ips = read_all_service_ips(&datastore, &opctx).await; + assert_eq!(ips, vec![]); + + // Set up service IP pool range + let ip_range = IpRange::try_from(( + Ipv4Addr::new(10, 0, 0, 1), + Ipv4Addr::new(10, 0, 0, 10), + )) + .unwrap(); + let (service_ip_pool, _) = datastore + .ip_pools_service_lookup(&opctx) + .await + .expect("lookup service ip pool"); + datastore + .ip_pool_add_range(&opctx, &service_ip_pool, &ip_range) + .await + .expect("add range to service ip pool"); + + // Allocate a bunch of fake service IPs. + let mut external_ips = Vec::new(); + let mut allocate_snat = false; // flip-flop between regular and snat + for (i, ip) in ip_range.iter().enumerate() { + let name = format!("service-ip-{i}"); + let external_ip = if allocate_snat { + datastore + .external_ip_allocate_service_explicit_snat( + &opctx, + Uuid::new_v4(), + Uuid::new_v4(), + ip, + (0, NUM_SOURCE_NAT_PORTS - 1), + ) + .await + .expect("failed to allocate service IP") + } else { + datastore + .external_ip_allocate_service_explicit( + &opctx, + Uuid::new_v4(), + &Name(name.parse().unwrap()), + &name, + Uuid::new_v4(), + ip, + ) + .await + .expect("failed to allocate service IP") + }; + external_ips.push(external_ip); + allocate_snat = !allocate_snat; + } + external_ips.sort_by_key(|ip| ip.id); + + // Ensure we see them all. + let ips = read_all_service_ips(&datastore, &opctx).await; + assert_eq!(ips, external_ips); + + // Deallocate a few, and ensure we don't see them anymore. + let mut removed_ip_ids = BTreeSet::new(); + for (i, external_ip) in external_ips.iter().enumerate() { + if i % 3 == 0 { + let id = external_ip.id; + datastore + .deallocate_external_ip(&opctx, id) + .await + .expect("failed to deallocate IP"); + removed_ip_ids.insert(id); + } + } + + // Check that we removed at least one, then prune them from our list of + // expected IPs. + assert!(!removed_ip_ids.is_empty()); + external_ips.retain(|ip| !removed_ip_ids.contains(&ip.id)); + + // Ensure we see them all remaining IPs. + let ips = read_all_service_ips(&datastore, &opctx).await; + assert_eq!(ips, external_ips); + + db.cleanup().await.unwrap(); + logctx.cleanup_successful(); + } +} diff --git a/nexus/db-queries/src/db/datastore/network_interface.rs b/nexus/db-queries/src/db/datastore/network_interface.rs index 1bccca4e97..795c973407 100644 --- a/nexus/db-queries/src/db/datastore/network_interface.rs +++ b/nexus/db-queries/src/db/datastore/network_interface.rs @@ -5,6 +5,7 @@ //! [`DataStore`] methods on [`NetworkInterface`]s. use super::DataStore; +use super::SQL_BATCH_SIZE; use crate::authz; use crate::context::OpContext; use crate::db; @@ -22,6 +23,7 @@ use crate::db::model::NetworkInterfaceKind; use crate::db::model::NetworkInterfaceUpdate; use crate::db::model::VpcSubnet; use crate::db::pagination::paginated; +use crate::db::pagination::Paginator; use crate::db::pool::DbConnection; use crate::db::queries::network_interface; use crate::transaction_retry::OptionalError; @@ -30,8 +32,10 @@ use chrono::Utc; use diesel::prelude::*; use diesel::result::Error as DieselError; use nexus_db_model::ServiceNetworkInterface; +use nexus_types::identity::Resource; use omicron_common::api::external; use omicron_common::api::external::http_pagination::PaginatedBy; +use omicron_common::api::external::DataPageParams; use omicron_common::api::external::DeleteResult; use omicron_common::api::external::Error; use omicron_common::api::external::ListResultVec; @@ -169,6 +173,58 @@ impl DataStore { .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) } + /// List one page of all network interfaces associated with internal services + pub async fn service_network_interfaces_all_list( + &self, + opctx: &OpContext, + pagparams: &DataPageParams<'_, Uuid>, + ) -> ListResultVec { + use db::schema::service_network_interface::dsl; + + // See the comment in `service_create_network_interface`. There's no + // obvious parent for a service network interface (as opposed to + // instance network interfaces, which require ListChildren on the + // instance to list). As a logical proxy, we check for listing children + // of the service IP pool. + let (authz_pool, _pool) = self.ip_pools_service_lookup(opctx).await?; + opctx.authorize(authz::Action::ListChildren, &authz_pool).await?; + + paginated(dsl::service_network_interface, dsl::id, pagparams) + .filter(dsl::time_deleted.is_null()) + .select(ServiceNetworkInterface::as_select()) + .get_results_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + } + + /// List all network interfaces associated with internal services, making as + /// many queries as needed to get them all + /// + /// This should generally not be used in API handlers or other + /// latency-sensitive contexts, but it can make sense in saga actions or + /// background tasks. + pub async fn service_network_interfaces_all_list_batched( + &self, + opctx: &OpContext, + ) -> ListResultVec { + opctx.check_complex_operations_allowed()?; + + let mut all_ips = Vec::new(); + let mut paginator = Paginator::new(SQL_BATCH_SIZE); + while let Some(p) = paginator.next() { + let batch = self + .service_network_interfaces_all_list( + opctx, + &p.current_pagparams(), + ) + .await?; + paginator = p + .found_batch(&batch, &|nic: &ServiceNetworkInterface| nic.id()); + all_ips.extend(batch); + } + Ok(all_ips) + } + /// Create a network interface attached to the provided service zone. pub async fn service_create_network_interface( &self, @@ -345,6 +401,47 @@ impl DataStore { Ok(()) } + /// Delete a `ServiceNetworkInterface` attached to a provided service. + pub async fn service_delete_network_interface( + &self, + opctx: &OpContext, + service_id: Uuid, + network_interface_id: Uuid, + ) -> Result<(), network_interface::DeleteError> { + // See the comment in `service_create_network_interface`. There's no + // obvious parent for a service network interface (as opposed to + // instance network interfaces, which require permissions on the + // instance). As a logical proxy, we check for listing children of the + // service IP pool. + let (authz_service_ip_pool, _) = self + .ip_pools_service_lookup(opctx) + .await + .map_err(network_interface::DeleteError::External)?; + opctx + .authorize(authz::Action::Delete, &authz_service_ip_pool) + .await + .map_err(network_interface::DeleteError::External)?; + + let query = network_interface::DeleteQuery::new( + NetworkInterfaceKind::Service, + service_id, + network_interface_id, + ); + query + .clone() + .execute_async( + &*self + .pool_connection_authorized(opctx) + .await + .map_err(network_interface::DeleteError::External)?, + ) + .await + .map_err(|e| { + network_interface::DeleteError::from_diesel(e, &query) + })?; + Ok(()) + } + /// Return information about network interfaces required for the sled /// agent to instantiate or modify them via OPTE. This function takes /// a partially constructed query over the network interface table so @@ -688,3 +785,111 @@ impl DataStore { }) } } + +#[cfg(test)] +mod tests { + use super::*; + use crate::db::datastore::test_utils::datastore_test; + use crate::db::fixed_data::vpc_subnet::NEXUS_VPC_SUBNET; + use nexus_config::NUM_INITIAL_RESERVED_IP_ADDRESSES; + use nexus_test_utils::db::test_setup_database; + use omicron_common::address::NEXUS_OPTE_IPV4_SUBNET; + use omicron_test_utils::dev; + use std::collections::BTreeSet; + + async fn read_all_service_nics( + datastore: &DataStore, + opctx: &OpContext, + ) -> Vec { + let all_batched = datastore + .service_network_interfaces_all_list_batched(opctx) + .await + .expect("failed to fetch all service NICs batched"); + let all_paginated = datastore + .service_network_interfaces_all_list( + opctx, + &DataPageParams::max_page(), + ) + .await + .expect("failed to fetch all service NICs paginated"); + assert_eq!(all_batched, all_paginated); + all_batched + } + + #[tokio::test] + async fn test_service_network_interfaces_list() { + usdt::register_probes().unwrap(); + let logctx = + dev::test_setup_log("test_service_network_interfaces_list"); + let mut db = test_setup_database(&logctx.log).await; + let (opctx, datastore) = datastore_test(&logctx, &db).await; + + // No IPs, to start + let nics = read_all_service_nics(&datastore, &opctx).await; + assert_eq!(nics, vec![]); + + // Insert 10 Nexus NICs + let ip_range = NEXUS_OPTE_IPV4_SUBNET + .0 + .iter() + .skip(NUM_INITIAL_RESERVED_IP_ADDRESSES) + .take(10); + let mut macs = external::MacAddr::iter_system(); + let mut service_nics = Vec::new(); + for (i, ip) in ip_range.enumerate() { + let name = format!("service-nic-{i}"); + let interface = IncompleteNetworkInterface::new_service( + Uuid::new_v4(), + Uuid::new_v4(), + NEXUS_VPC_SUBNET.clone(), + external::IdentityMetadataCreateParams { + name: name.parse().unwrap(), + description: name, + }, + ip.into(), + macs.next().unwrap(), + 0, + ) + .unwrap(); + let nic = datastore + .service_create_network_interface(&opctx, interface) + .await + .expect("failed to insert service nic"); + service_nics.push(nic); + } + service_nics.sort_by_key(|nic| nic.id()); + + // Ensure we see them all. + let nics = read_all_service_nics(&datastore, &opctx).await; + assert_eq!(nics, service_nics); + + // Delete a few, and ensure we don't see them anymore. + let mut removed_nic_ids = BTreeSet::new(); + for (i, nic) in service_nics.iter().enumerate() { + if i % 3 == 0 { + let id = nic.id(); + datastore + .service_delete_network_interface( + &opctx, + nic.service_id, + id, + ) + .await + .expect("failed to delete NIC"); + removed_nic_ids.insert(id); + } + } + + // Check that we removed at least one, then prune them from our list of + // expected IPs. + assert!(!removed_nic_ids.is_empty()); + service_nics.retain(|nic| !removed_nic_ids.contains(&nic.id())); + + // Ensure we see them all remaining IPs. + let nics = read_all_service_nics(&datastore, &opctx).await; + assert_eq!(nics, service_nics); + + db.cleanup().await.unwrap(); + logctx.cleanup_successful(); + } +} diff --git a/nexus/db-queries/src/db/queries/external_ip.rs b/nexus/db-queries/src/db/queries/external_ip.rs index 0502450121..3969c808f9 100644 --- a/nexus/db-queries/src/db/queries/external_ip.rs +++ b/nexus/db-queries/src/db/queries/external_ip.rs @@ -1345,7 +1345,7 @@ mod tests { assert_eq!( context .db_datastore - .service_lookup_external_ips(&context.opctx, service1_id) + .external_ip_list_service(&context.opctx, service1_id) .await .expect("Failed to look up service external IPs"), Vec::new(), @@ -1354,7 +1354,7 @@ mod tests { let id1 = Uuid::new_v4(); let ip1 = context .db_datastore - .allocate_service_ip( + .external_ip_allocate_service( &context.opctx, id1, &Name("service1-ip".parse().unwrap()), @@ -1372,7 +1372,7 @@ mod tests { assert_eq!( context .db_datastore - .service_lookup_external_ips(&context.opctx, service1_id) + .external_ip_list_service(&context.opctx, service1_id) .await .expect("Failed to look up service external IPs"), vec![ip1], @@ -1383,7 +1383,7 @@ mod tests { let id2 = Uuid::new_v4(); let ip2 = context .db_datastore - .allocate_service_snat_ip(&context.opctx, id2, service2_id) + .external_ip_allocate_service_snat(&context.opctx, id2, service2_id) .await .expect("Failed to allocate service IP address"); assert!(ip2.is_service); @@ -1395,7 +1395,7 @@ mod tests { assert_eq!( context .db_datastore - .service_lookup_external_ips(&context.opctx, service2_id) + .external_ip_list_service(&context.opctx, service2_id) .await .expect("Failed to look up service external IPs"), vec![ip2], @@ -1406,7 +1406,7 @@ mod tests { let id3 = Uuid::new_v4(); let ip3 = context .db_datastore - .allocate_service_ip( + .external_ip_allocate_service( &context.opctx, id3, &Name("service3-ip".parse().unwrap()), @@ -1424,7 +1424,7 @@ mod tests { assert_eq!( context .db_datastore - .service_lookup_external_ips(&context.opctx, service3_id) + .external_ip_list_service(&context.opctx, service3_id) .await .expect("Failed to look up service external IPs"), vec![ip3], @@ -1435,7 +1435,7 @@ mod tests { let id3 = Uuid::new_v4(); let err = context .db_datastore - .allocate_service_ip( + .external_ip_allocate_service( &context.opctx, id3, &Name("service3-ip".parse().unwrap()), @@ -1457,7 +1457,7 @@ mod tests { let id4 = Uuid::new_v4(); let ip4 = context .db_datastore - .allocate_service_snat_ip(&context.opctx, id4, service4_id) + .external_ip_allocate_service_snat(&context.opctx, id4, service4_id) .await .expect("Failed to allocate service IP address"); assert!(ip4.is_service); @@ -1469,7 +1469,7 @@ mod tests { assert_eq!( context .db_datastore - .service_lookup_external_ips(&context.opctx, service4_id) + .external_ip_list_service(&context.opctx, service4_id) .await .expect("Failed to look up service external IPs"), vec![ip4], @@ -1498,7 +1498,7 @@ mod tests { let id = Uuid::new_v4(); let ip = context .db_datastore - .allocate_explicit_service_ip( + .external_ip_allocate_service_explicit( &context.opctx, id, &Name("service-ip".parse().unwrap()), @@ -1517,7 +1517,7 @@ mod tests { // Try allocating the same service IP again. let ip_again = context .db_datastore - .allocate_explicit_service_ip( + .external_ip_allocate_service_explicit( &context.opctx, id, &Name("service-ip".parse().unwrap()), @@ -1535,7 +1535,7 @@ mod tests { // different UUID. let err = context .db_datastore - .allocate_explicit_service_ip( + .external_ip_allocate_service_explicit( &context.opctx, Uuid::new_v4(), &Name("service-ip".parse().unwrap()), @@ -1554,7 +1554,7 @@ mod tests { // different input address. let err = context .db_datastore - .allocate_explicit_service_ip( + .external_ip_allocate_service_explicit( &context.opctx, id, &Name("service-ip".parse().unwrap()), @@ -1573,7 +1573,7 @@ mod tests { // different port range. let err = context .db_datastore - .allocate_explicit_service_snat_ip( + .external_ip_allocate_service_explicit_snat( &context.opctx, id, service_id, @@ -1592,7 +1592,7 @@ mod tests { let snat_id = Uuid::new_v4(); let snat_ip = context .db_datastore - .allocate_explicit_service_snat_ip( + .external_ip_allocate_service_explicit_snat( &context.opctx, snat_id, snat_service_id, @@ -1611,7 +1611,7 @@ mod tests { // Try allocating the same service IP again. let snat_ip_again = context .db_datastore - .allocate_explicit_service_snat_ip( + .external_ip_allocate_service_explicit_snat( &context.opctx, snat_id, snat_service_id, @@ -1630,7 +1630,7 @@ mod tests { // different port range. let err = context .db_datastore - .allocate_explicit_service_snat_ip( + .external_ip_allocate_service_explicit_snat( &context.opctx, snat_id, snat_service_id, @@ -1665,7 +1665,7 @@ mod tests { let id = Uuid::new_v4(); let err = context .db_datastore - .allocate_explicit_service_ip( + .external_ip_allocate_service_explicit( &context.opctx, id, &Name("service-ip".parse().unwrap()), @@ -1703,7 +1703,7 @@ mod tests { let id = Uuid::new_v4(); let ip = context .db_datastore - .allocate_service_ip( + .external_ip_allocate_service( &context.opctx, id, &Name("service-ip".parse().unwrap()), @@ -1720,7 +1720,7 @@ mod tests { let ip_again = context .db_datastore - .allocate_service_ip( + .external_ip_allocate_service( &context.opctx, id, &Name("service-ip".parse().unwrap()), @@ -1760,7 +1760,7 @@ mod tests { let id = Uuid::new_v4(); let ip = context .db_datastore - .allocate_service_ip( + .external_ip_allocate_service( &context.opctx, id, &Name("service-ip".parse().unwrap()), @@ -1777,7 +1777,7 @@ mod tests { let ip_again = context .db_datastore - .allocate_service_ip( + .external_ip_allocate_service( &context.opctx, id, &Name("service-ip".parse().unwrap()), diff --git a/nexus/db-queries/src/db/queries/network_interface.rs b/nexus/db-queries/src/db/queries/network_interface.rs index c0fc18aca1..afd6af1140 100644 --- a/nexus/db-queries/src/db/queries/network_interface.rs +++ b/nexus/db-queries/src/db/queries/network_interface.rs @@ -2300,7 +2300,7 @@ mod tests { .service_create_network_interface_raw(&context.opctx, interface) .await .expect("Failed to insert interface"); - assert_eq!(inserted_interface.slot, i16::from(slot)); + assert_eq!(*inserted_interface.slot, slot); } context.success().await; @@ -2413,7 +2413,7 @@ mod tests { .service_create_network_interface_raw(&context.opctx, interface) .await .expect("Failed to insert interface"); - assert_eq!(inserted_interface.slot, 0); + assert_eq!(*inserted_interface.slot, 0); // Inserting an interface with the same slot on the same service should let new_interface = IncompleteNetworkInterface::new_service( @@ -2776,8 +2776,7 @@ mod tests { ) .await .expect("Should be able to insert up to 8 interfaces"); - let actual_slot = usize::try_from(inserted_interface.slot) - .expect("Bad slot index"); + let actual_slot = usize::from(*inserted_interface.slot); assert_eq!( slot, actual_slot, "Failed to allocate next available interface slot" diff --git a/nexus/reconfigurator/execution/src/dns.rs b/nexus/reconfigurator/execution/src/dns.rs index fc95414103..420a1ec84f 100644 --- a/nexus/reconfigurator/execution/src/dns.rs +++ b/nexus/reconfigurator/execution/src/dns.rs @@ -509,6 +509,7 @@ mod test { use nexus_types::deployment::BlueprintZoneDisposition; use nexus_types::deployment::OmicronZoneConfig; use nexus_types::deployment::OmicronZoneType; + use nexus_types::deployment::PlanningInput; use nexus_types::deployment::Policy; use nexus_types::deployment::SledResources; use nexus_types::deployment::ZpoolName; @@ -864,13 +865,13 @@ mod test { async fn test_blueprint_external_dns_basic() { static TEST_NAME: &str = "test_blueprint_external_dns_basic"; let logctx = test_setup_log(TEST_NAME); - let (collection, policy) = example(&logctx.log, TEST_NAME, 5); + let (collection, input) = example(&logctx.log, TEST_NAME, 5); let initial_external_dns_generation = Generation::new(); let blueprint = BlueprintBuilder::build_initial_from_collection( &collection, Generation::new(), initial_external_dns_generation, - &policy, + &input.policy, "test suite", ) .expect("failed to generate initial blueprint"); @@ -1215,6 +1216,13 @@ mod test { policy .service_ip_pool_ranges .push(IpRange::from(IpAddr::V4(Ipv4Addr::LOCALHOST))); + let planning_input = PlanningInput { + policy, + // These are not used because we're not actually going through the + // planner. + service_external_ips: BTreeMap::new(), + service_nics: BTreeMap::new(), + }; let mut builder = BlueprintBuilder::new_based_on( &log, &blueprint, @@ -1224,7 +1232,7 @@ mod test { Generation::from( u32::try_from(dns_latest_external.generation).unwrap(), ), - &policy, + &planning_input, "test suite", ) .unwrap(); diff --git a/nexus/reconfigurator/execution/src/resource_allocation.rs b/nexus/reconfigurator/execution/src/resource_allocation.rs index 92262ce133..2803482058 100644 --- a/nexus/reconfigurator/execution/src/resource_allocation.rs +++ b/nexus/reconfigurator/execution/src/resource_allocation.rs @@ -100,7 +100,7 @@ impl<'a> ResourceAllocator<'a> { let allocated_ips = self .datastore - .service_lookup_external_ips(self.opctx, zone_id) + .external_ip_list_service(self.opctx, zone_id) .await .with_context(|| { format!( @@ -186,7 +186,7 @@ impl<'a> ResourceAllocator<'a> { for allocated_nic in &allocated_nics { if allocated_nic.ip.ip() == nic.ip && *allocated_nic.mac == nic.mac - && allocated_nic.slot == i16::from(nic.slot) + && *allocated_nic.slot == nic.slot && allocated_nic.primary == nic.primary { info!( @@ -258,7 +258,7 @@ impl<'a> ResourceAllocator<'a> { let ip_id = Uuid::new_v4(); let description = zone_type; self.datastore - .allocate_explicit_service_ip( + .external_ip_allocate_service_explicit( self.opctx, ip_id, ip_name, @@ -313,7 +313,7 @@ impl<'a> ResourceAllocator<'a> { let ip_id = Uuid::new_v4(); self.datastore - .allocate_explicit_service_snat_ip( + .external_ip_allocate_service_explicit_snat( self.opctx, ip_id, service_id, @@ -403,14 +403,12 @@ impl<'a> ResourceAllocator<'a> { // We do not check `nic.vni`, because it's not stored in the // database. (All services are given the constant vni // `Vni::SERVICES_VNI`.) - if created_nic.primary != nic.primary - || created_nic.slot != i16::from(nic.slot) - { + if created_nic.primary != nic.primary || *created_nic.slot != nic.slot { warn!( self.opctx.log, "unexpected property on allocated NIC"; "db_primary" => created_nic.primary, "expected_primary" => nic.primary, - "db_slot" => created_nic.slot, + "db_slot" => *created_nic.slot, "expected_slot" => nic.slot, ); @@ -671,7 +669,7 @@ mod tests { // Check that the external IP records were created. let db_nexus_ips = datastore - .service_lookup_external_ips(&opctx, nexus_id) + .external_ip_list_service(&opctx, nexus_id) .await .expect("failed to get external IPs"); assert_eq!(db_nexus_ips.len(), 1); @@ -682,7 +680,7 @@ mod tests { assert_eq!(db_nexus_ips[0].last_port, SqlU16(65535)); let db_dns_ips = datastore - .service_lookup_external_ips(&opctx, dns_id) + .external_ip_list_service(&opctx, dns_id) .await .expect("failed to get external IPs"); assert_eq!(db_dns_ips.len(), 1); @@ -693,7 +691,7 @@ mod tests { assert_eq!(db_dns_ips[0].last_port, SqlU16(65535)); let db_ntp_ips = datastore - .service_lookup_external_ips(&opctx, ntp_id) + .external_ip_list_service(&opctx, ntp_id) .await .expect("failed to get external IPs"); assert_eq!(db_ntp_ips.len(), 1); @@ -715,7 +713,7 @@ mod tests { assert_eq!(db_nexus_nics[0].subnet_id, NEXUS_VPC_SUBNET.id()); assert_eq!(*db_nexus_nics[0].mac, nexus_nic.mac); assert_eq!(db_nexus_nics[0].ip, nexus_nic.ip.into()); - assert_eq!(db_nexus_nics[0].slot, i16::from(nexus_nic.slot)); + assert_eq!(*db_nexus_nics[0].slot, nexus_nic.slot); assert_eq!(db_nexus_nics[0].primary, nexus_nic.primary); let db_dns_nics = datastore @@ -729,7 +727,7 @@ mod tests { assert_eq!(db_dns_nics[0].subnet_id, DNS_VPC_SUBNET.id()); assert_eq!(*db_dns_nics[0].mac, dns_nic.mac); assert_eq!(db_dns_nics[0].ip, dns_nic.ip.into()); - assert_eq!(db_dns_nics[0].slot, i16::from(dns_nic.slot)); + assert_eq!(*db_dns_nics[0].slot, dns_nic.slot); assert_eq!(db_dns_nics[0].primary, dns_nic.primary); let db_ntp_nics = datastore @@ -743,7 +741,7 @@ mod tests { assert_eq!(db_ntp_nics[0].subnet_id, NTP_VPC_SUBNET.id()); assert_eq!(*db_ntp_nics[0].mac, ntp_nic.mac); assert_eq!(db_ntp_nics[0].ip, ntp_nic.ip.into()); - assert_eq!(db_ntp_nics[0].slot, i16::from(ntp_nic.slot)); + assert_eq!(*db_ntp_nics[0].slot, ntp_nic.slot); assert_eq!(db_ntp_nics[0].primary, ntp_nic.primary); // We should be able to run the function again with the same inputs, and @@ -755,21 +753,21 @@ mod tests { assert_eq!( db_nexus_ips, datastore - .service_lookup_external_ips(&opctx, nexus_id) + .external_ip_list_service(&opctx, nexus_id) .await .expect("failed to get external IPs") ); assert_eq!( db_dns_ips, datastore - .service_lookup_external_ips(&opctx, dns_id) + .external_ip_list_service(&opctx, dns_id) .await .expect("failed to get external IPs") ); assert_eq!( db_ntp_ips, datastore - .service_lookup_external_ips(&opctx, ntp_id) + .external_ip_list_service(&opctx, ntp_id) .await .expect("failed to get external IPs") ); diff --git a/nexus/reconfigurator/planning/Cargo.toml b/nexus/reconfigurator/planning/Cargo.toml index cb55d9aa7c..f990a92157 100644 --- a/nexus/reconfigurator/planning/Cargo.toml +++ b/nexus/reconfigurator/planning/Cargo.toml @@ -15,6 +15,7 @@ nexus-config.workspace = true nexus-inventory.workspace = true nexus-types.workspace = true omicron-common.workspace = true +omicron-uuid-kinds.workspace = true rand.workspace = true sled-agent-client.workspace = true slog.workspace = true diff --git a/nexus/reconfigurator/planning/src/blueprint_builder.rs b/nexus/reconfigurator/planning/src/blueprint_builder.rs index dc0f1e501c..827693beb1 100644 --- a/nexus/reconfigurator/planning/src/blueprint_builder.rs +++ b/nexus/reconfigurator/planning/src/blueprint_builder.rs @@ -19,6 +19,7 @@ use nexus_types::deployment::BlueprintZonesConfig; use nexus_types::deployment::OmicronZoneConfig; use nexus_types::deployment::OmicronZoneDataset; use nexus_types::deployment::OmicronZoneType; +use nexus_types::deployment::PlanningInput; use nexus_types::deployment::Policy; use nexus_types::deployment::SledResources; use nexus_types::deployment::ZpoolName; @@ -114,7 +115,7 @@ pub struct BlueprintBuilder<'a> { external_dns_version: Generation, // These fields are used to allocate resources from sleds. - policy: &'a Policy, + input: &'a PlanningInput, sled_ip_allocators: BTreeMap, // These fields will become part of the final blueprint. See the @@ -241,7 +242,7 @@ impl<'a> BlueprintBuilder<'a> { parent_blueprint: &'a Blueprint, internal_dns_version: Generation, external_dns_version: Generation, - policy: &'a Policy, + input: &'a PlanningInput, creator: &str, ) -> anyhow::Result> { let log = log.new(o!( @@ -341,7 +342,8 @@ impl<'a> BlueprintBuilder<'a> { .filter(move |ip| !existing_nexus_v6_ips.contains(ip)), ); let available_external_ips = Box::new( - policy + input + .policy .service_ip_pool_ranges .iter() .flat_map(|r| r.iter()) @@ -356,7 +358,7 @@ impl<'a> BlueprintBuilder<'a> { parent_blueprint, internal_dns_version, external_dns_version, - policy, + input, sled_ip_allocators: BTreeMap::new(), zones: BlueprintZonesBuilder::new(parent_blueprint), creator: creator.to_owned(), @@ -373,7 +375,7 @@ impl<'a> BlueprintBuilder<'a> { pub fn build(mut self) -> Blueprint { // Collect the Omicron zones config for each in-service sled. let blueprint_zones = - self.zones.into_zones_map(self.policy.sleds.keys().copied()); + self.zones.into_zones_map(self.input.policy.sleds.keys().copied()); Blueprint { id: self.rng.blueprint_rng.next(), blueprint_zones, @@ -715,7 +717,7 @@ impl<'a> BlueprintBuilder<'a> { } fn sled_resources(&self, sled_id: Uuid) -> Result<&SledResources, Error> { - self.policy.sleds.get(&sled_id).ok_or_else(|| { + self.input.policy.sleds.get(&sled_id).ok_or_else(|| { Error::Planner(anyhow!( "attempted to use sled that is not in service: {}", sled_id @@ -891,14 +893,14 @@ pub mod test { // describes no changes. static TEST_NAME: &str = "blueprint_builder_test_initial"; let logctx = test_setup_log(TEST_NAME); - let (collection, policy) = + let (collection, input) = example(&logctx.log, TEST_NAME, DEFAULT_N_SLEDS); let blueprint_initial = BlueprintBuilder::build_initial_from_collection_seeded( &collection, Generation::new(), Generation::new(), - &policy, + &input.policy, "the_test", TEST_NAME, ) @@ -929,7 +931,7 @@ pub mod test { &blueprint_initial, Generation::new(), Generation::new(), - &policy, + &input, "test_basic", ) .expect("failed to create builder"); @@ -961,7 +963,7 @@ pub mod test { blueprint1, Generation::new(), Generation::new(), - &example.policy, + &example.input, "test_basic", ) .expect("failed to create builder"); @@ -969,7 +971,7 @@ pub mod test { // The example blueprint should have internal NTP zones on all the // existing sleds, plus Crucible zones on all pools. So if we ensure // all these zones exist, we should see no change. - for (sled_id, sled_resources) in &example.policy.sleds { + for (sled_id, sled_resources) in &example.input.policy.sleds { builder.sled_ensure_zone_ntp(*sled_id).unwrap(); for pool_name in &sled_resources.zpools { builder @@ -994,17 +996,22 @@ pub mod test { let _ = example.system.sled(SledBuilder::new().id(new_sled_id)).unwrap(); let policy = example.system.to_policy().unwrap(); + let input = PlanningInput { + policy, + service_external_ips: example.input.service_external_ips, + service_nics: example.input.service_nics, + }; let mut builder = BlueprintBuilder::new_based_on( &logctx.log, &blueprint2, Generation::new(), Generation::new(), - &policy, + &input, "test_basic", ) .expect("failed to create builder"); builder.sled_ensure_zone_ntp(new_sled_id).unwrap(); - let new_sled_resources = policy.sleds.get(&new_sled_id).unwrap(); + let new_sled_resources = input.policy.sleds.get(&new_sled_id).unwrap(); for pool_name in &new_sled_resources.zpools { builder .sled_ensure_zone_crucible(new_sled_id, pool_name.clone()) @@ -1078,7 +1085,7 @@ pub mod test { static TEST_NAME: &str = "blueprint_builder_test_add_nexus_with_no_existing_nexus_zones"; let logctx = test_setup_log(TEST_NAME); - let (mut collection, policy) = + let (mut collection, input) = example(&logctx.log, TEST_NAME, DEFAULT_N_SLEDS); // We don't care about the DNS versions here. @@ -1099,7 +1106,7 @@ pub mod test { &collection, internal_dns_version, external_dns_version, - &policy, + &input.policy, "test", TEST_NAME, ) @@ -1110,7 +1117,7 @@ pub mod test { &parent, internal_dns_version, external_dns_version, - &policy, + &input, "test", ) .expect("failed to create builder"); @@ -1139,7 +1146,7 @@ pub mod test { fn test_add_nexus_error_cases() { static TEST_NAME: &str = "blueprint_builder_test_add_nexus_error_cases"; let logctx = test_setup_log(TEST_NAME); - let (mut collection, policy) = + let (mut collection, input) = example(&logctx.log, TEST_NAME, DEFAULT_N_SLEDS); // We don't care about the DNS versions here. @@ -1168,7 +1175,7 @@ pub mod test { &collection, Generation::new(), Generation::new(), - &policy, + &input.policy, "test", TEST_NAME, ) @@ -1182,7 +1189,7 @@ pub mod test { &parent, internal_dns_version, external_dns_version, - &policy, + &input, "test", ) .expect("failed to create builder"); @@ -1202,7 +1209,7 @@ pub mod test { &parent, internal_dns_version, external_dns_version, - &policy, + &input, "test", ) .expect("failed to create builder"); @@ -1217,7 +1224,7 @@ pub mod test { // Replace the policy's external service IP pool ranges with ranges // that are already in use by existing zones. Attempting to add a // Nexus with no remaining external IPs should fail. - let mut policy = policy.clone(); + let mut input = input.clone(); let mut used_ip_ranges = Vec::new(); for (_, z) in parent.all_omicron_zones() { if let Some(ip) = z @@ -1229,14 +1236,14 @@ pub mod test { } } assert!(!used_ip_ranges.is_empty()); - policy.service_ip_pool_ranges = used_ip_ranges; + input.policy.service_ip_pool_ranges = used_ip_ranges; let mut builder = BlueprintBuilder::new_based_on( &logctx.log, &parent, internal_dns_version, external_dns_version, - &policy, + &input, "test", ) .expect("failed to create builder"); @@ -1267,7 +1274,7 @@ pub mod test { "blueprint_builder_test_invalid_parent_blueprint_\ two_zones_with_same_external_ip"; let logctx = test_setup_log(TEST_NAME); - let (mut collection, policy) = + let (mut collection, input) = example(&logctx.log, TEST_NAME, DEFAULT_N_SLEDS); // We should fail if the parent blueprint claims to contain two @@ -1299,7 +1306,7 @@ pub mod test { &collection, Generation::new(), Generation::new(), - &policy, + &input.policy, "test", TEST_NAME, ) @@ -1310,7 +1317,7 @@ pub mod test { &parent, Generation::new(), Generation::new(), - &policy, + &input, "test", ) { Ok(_) => panic!("unexpected success"), @@ -1329,7 +1336,7 @@ pub mod test { "blueprint_builder_test_invalid_parent_blueprint_\ two_nexus_zones_with_same_nic_ip"; let logctx = test_setup_log(TEST_NAME); - let (mut collection, policy) = + let (mut collection, input) = example(&logctx.log, TEST_NAME, DEFAULT_N_SLEDS); // We should fail if the parent blueprint claims to contain two @@ -1359,7 +1366,7 @@ pub mod test { &collection, Generation::new(), Generation::new(), - &policy, + &input.policy, "test", TEST_NAME, ) @@ -1370,7 +1377,7 @@ pub mod test { &parent, Generation::new(), Generation::new(), - &policy, + &input, "test", ) { Ok(_) => panic!("unexpected success"), @@ -1389,7 +1396,7 @@ pub mod test { "blueprint_builder_test_invalid_parent_blueprint_\ two_zones_with_same_vnic_mac"; let logctx = test_setup_log(TEST_NAME); - let (mut collection, policy) = + let (mut collection, input) = example(&logctx.log, TEST_NAME, DEFAULT_N_SLEDS); // We should fail if the parent blueprint claims to contain two @@ -1419,7 +1426,7 @@ pub mod test { &collection, Generation::new(), Generation::new(), - &policy, + &input.policy, "test", TEST_NAME, ) @@ -1430,7 +1437,7 @@ pub mod test { &parent, Generation::new(), Generation::new(), - &policy, + &input, "test", ) { Ok(_) => panic!("unexpected success"), diff --git a/nexus/reconfigurator/planning/src/example.rs b/nexus/reconfigurator/planning/src/example.rs index a18e3b71cf..563b3662bf 100644 --- a/nexus/reconfigurator/planning/src/example.rs +++ b/nexus/reconfigurator/planning/src/example.rs @@ -9,15 +9,22 @@ use crate::system::SledBuilder; use crate::system::SystemDescription; use nexus_types::deployment::Blueprint; use nexus_types::deployment::BlueprintZoneFilter; -use nexus_types::deployment::Policy; +use nexus_types::deployment::ExternalIp; +use nexus_types::deployment::PlanningInput; +use nexus_types::deployment::ServiceNetworkInterface; use nexus_types::inventory::Collection; use omicron_common::api::external::Generation; +use omicron_uuid_kinds::GenericUuid; +use omicron_uuid_kinds::OmicronZoneKind; +use omicron_uuid_kinds::TypedUuid; use sled_agent_client::types::OmicronZonesConfig; +use std::collections::BTreeMap; use typed_rng::UuidRng; +use uuid::Uuid; pub struct ExampleSystem { pub system: SystemDescription, - pub policy: Policy, + pub input: PlanningInput, pub collection: Collection, pub blueprint: Blueprint, // If we add more types of RNGs than just sleds here, we'll need to @@ -46,6 +53,11 @@ impl ExampleSystem { let policy = system.to_policy().expect("failed to make policy"); let mut inventory_builder = system.to_collection_builder().expect("failed to build collection"); + let mut input = PlanningInput { + policy, + service_external_ips: BTreeMap::new(), + service_nics: BTreeMap::new(), + }; // For each sled, have it report 0 zones in the initial inventory. // This will enable us to build a blueprint from the initial @@ -69,7 +81,7 @@ impl ExampleSystem { &empty_zone_inventory, Generation::new(), Generation::new(), - &policy, + &input.policy, "test suite", (test_name, "ExampleSystem initial"), ) @@ -77,16 +89,16 @@ impl ExampleSystem { // Now make a blueprint and collection with some zones on each sled. let mut builder = BlueprintBuilder::new_based_on( - &log, + log, &initial_blueprint, Generation::new(), Generation::new(), - &policy, + &input, "test suite", ) .unwrap(); builder.set_rng_seed((test_name, "ExampleSystem make_zones")); - for (sled_id, sled_resources) in &policy.sleds { + for (sled_id, sled_resources) in &input.policy.sleds { let _ = builder.sled_ensure_zone_ntp(*sled_id).unwrap(); let _ = builder .sled_ensure_zone_multiple_nexus_with_config( @@ -112,6 +124,28 @@ impl ExampleSystem { let Some(zones) = blueprint.blueprint_zones.get(&sled_id) else { continue; }; + for zone in zones.zones.iter().map(|z| &z.config) { + let service_id = + TypedUuid::::from_untyped_uuid(zone.id); + if let Ok(Some(ip)) = zone.zone_type.external_ip() { + input.service_external_ips.insert( + service_id, + ExternalIp { id: Uuid::new_v4(), ip: ip.into() }, + ); + } + if let Some(nic) = zone.zone_type.service_vnic() { + input.service_nics.insert( + service_id, + ServiceNetworkInterface { + id: nic.id, + mac: nic.mac, + ip: nic.ip.into(), + slot: nic.slot, + primary: nic.primary, + }, + ); + } + } builder .found_sled_omicron_zones( "fake sled agent", @@ -125,7 +159,7 @@ impl ExampleSystem { ExampleSystem { system, - policy, + input, collection: builder.build(), blueprint, sled_rng, @@ -133,7 +167,7 @@ impl ExampleSystem { } } -/// Returns a collection and policy describing a pretty simple system. +/// Returns a collection and planning input describing a pretty simple system. /// /// The test name is used as the RNG seed. /// @@ -144,7 +178,7 @@ pub fn example( log: &slog::Logger, test_name: &str, nsleds: usize, -) -> (Collection, Policy) { +) -> (Collection, PlanningInput) { let example = ExampleSystem::new(log, test_name, nsleds); - (example.collection, example.policy) + (example.collection, example.input) } diff --git a/nexus/reconfigurator/planning/src/planner.rs b/nexus/reconfigurator/planning/src/planner.rs index ce5660e7f6..280ac61ede 100644 --- a/nexus/reconfigurator/planning/src/planner.rs +++ b/nexus/reconfigurator/planning/src/planner.rs @@ -11,7 +11,7 @@ use crate::blueprint_builder::Ensure; use crate::blueprint_builder::EnsureMultiple; use crate::blueprint_builder::Error; use nexus_types::deployment::Blueprint; -use nexus_types::deployment::Policy; +use nexus_types::deployment::PlanningInput; use nexus_types::external_api::views::SledState; use nexus_types::inventory::Collection; use omicron_common::api::external::Generation; @@ -23,7 +23,7 @@ use uuid::Uuid; pub struct Planner<'a> { log: Logger, - policy: &'a Policy, + input: &'a PlanningInput, blueprint: BlueprintBuilder<'a>, // latest inventory collection // @@ -43,7 +43,7 @@ impl<'a> Planner<'a> { parent_blueprint: &'a Blueprint, internal_dns_version: Generation, external_dns_version: Generation, - policy: &'a Policy, + input: &'a PlanningInput, creator: &str, // NOTE: Right now, we just assume that this is the latest inventory // collection. See the comment on the corresponding field in `Planner`. @@ -54,10 +54,10 @@ impl<'a> Planner<'a> { parent_blueprint, internal_dns_version, external_dns_version, - policy, + input, creator, )?; - Ok(Planner { log, policy, blueprint, inventory }) + Ok(Planner { log, input, blueprint, inventory }) } /// Within tests, set a seeded RNG for deterministic results. @@ -98,7 +98,7 @@ impl<'a> Planner<'a> { // is fine. let mut sleds_ineligible_for_services = BTreeSet::new(); - for (sled_id, sled_info) in &self.policy.sleds { + for (sled_id, sled_info) in &self.input.policy.sleds { // Decommissioned sleds don't get any services. (This is an // explicit match so that when more states are added, this fails to // compile.) @@ -200,10 +200,12 @@ impl<'a> Planner<'a> { // sleds so we can avoid any non-provisionable sleds under the // assumption that there is something amiss with them. sleds_ineligible_for_services.extend( - self.policy.sleds.iter().filter_map(|(sled_id, sled_info)| { - (!sled_info.is_eligible_for_discretionary_services()) - .then_some(*sled_id) - }), + self.input.policy.sleds.iter().filter_map( + |(sled_id, sled_info)| { + (!sled_info.is_eligible_for_discretionary_services()) + .then_some(*sled_id) + }, + ), ); self.ensure_correct_number_of_nexus_zones( @@ -222,7 +224,7 @@ impl<'a> Planner<'a> { let mut num_total_nexus = 0; let mut sleds_by_num_nexus: BTreeMap> = BTreeMap::new(); - for &sled_id in self.policy.sleds.keys() { + for &sled_id in self.input.policy.sleds.keys() { let num_nexus = self.blueprint.sled_num_nexus_zones(sled_id); num_total_nexus += num_nexus; @@ -237,12 +239,15 @@ impl<'a> Planner<'a> { // TODO-correctness What should we do if we have _too many_ Nexus // instances? For now, just log it the number of zones any time we have // at least the minimum number. - let nexus_to_add = - self.policy.target_nexus_zone_count.saturating_sub(num_total_nexus); + let nexus_to_add = self + .input + .policy + .target_nexus_zone_count + .saturating_sub(num_total_nexus); if nexus_to_add == 0 { info!( self.log, "sufficient Nexus zones exist in plan"; - "desired_count" => self.policy.target_nexus_zone_count, + "desired_count" => self.input.policy.target_nexus_zone_count, "current_count" => num_total_nexus, ); return Ok(()); @@ -345,6 +350,7 @@ mod test { use nexus_inventory::now_db_precision; use nexus_types::deployment::BlueprintZoneDisposition; use nexus_types::deployment::BlueprintZoneFilter; + use nexus_types::deployment::PlanningInput; use nexus_types::external_api::views::SledPolicy; use nexus_types::external_api::views::SledProvisionPolicy; use nexus_types::external_api::views::SledState; @@ -374,7 +380,7 @@ mod test { &example.collection, internal_dns_version, external_dns_version, - &example.policy, + &example.input.policy, "the_test", (TEST_NAME, "bp1"), ) @@ -389,7 +395,7 @@ mod test { &blueprint1, internal_dns_version, external_dns_version, - &example.policy, + &example.input, "no-op?", &example.collection, ) @@ -410,6 +416,11 @@ mod test { let _ = example.system.sled(SledBuilder::new().id(new_sled_id)).unwrap(); let policy = example.system.to_policy().unwrap(); + let input = PlanningInput { + policy, + service_external_ips: example.input.service_external_ips, + service_nics: example.input.service_nics, + }; // Check that the first step is to add an NTP zone let blueprint3 = Planner::new_based_on( @@ -417,7 +428,7 @@ mod test { &blueprint2, internal_dns_version, external_dns_version, - &policy, + &input, "test: add NTP?", &example.collection, ) @@ -459,7 +470,7 @@ mod test { &blueprint3, internal_dns_version, external_dns_version, - &policy, + &input, "test: add nothing more", &example.collection, ) @@ -501,7 +512,7 @@ mod test { &blueprint3, internal_dns_version, external_dns_version, - &policy, + &input, "test: add Crucible zones?", &collection, ) @@ -543,7 +554,7 @@ mod test { &blueprint5, internal_dns_version, external_dns_version, - &policy, + &input, "test: no-op?", &collection, ) @@ -575,21 +586,21 @@ mod test { // Use our example inventory collection as a starting point, but strip // it down to just one sled. - let (sled_id, collection, mut policy) = { - let (mut collection, mut policy) = + let (sled_id, collection, mut input) = { + let (mut collection, mut input) = example(&logctx.log, TEST_NAME, DEFAULT_N_SLEDS); // Pick one sled ID to keep and remove the rest. let keep_sled_id = - policy.sleds.keys().next().copied().expect("no sleds"); - policy.sleds.retain(|&k, _v| keep_sled_id == k); + input.policy.sleds.keys().next().copied().expect("no sleds"); + input.policy.sleds.retain(|&k, _v| keep_sled_id == k); collection.sled_agents.retain(|&k, _v| keep_sled_id == k); collection.omicron_zones.retain(|&k, _v| keep_sled_id == k); assert_eq!(collection.sled_agents.len(), 1); assert_eq!(collection.omicron_zones.len(), 1); - (keep_sled_id, collection, policy) + (keep_sled_id, collection, input) }; // Build the initial blueprint. @@ -598,7 +609,7 @@ mod test { &collection, internal_dns_version, external_dns_version, - &policy, + &input.policy, "the_test", (TEST_NAME, "bp1"), ) @@ -621,13 +632,13 @@ mod test { // Now run the planner. It should add additional Nexus instances to the // one sled we have. - policy.target_nexus_zone_count = 5; + input.policy.target_nexus_zone_count = 5; let blueprint2 = Planner::new_based_on( logctx.log.clone(), &blueprint1, internal_dns_version, external_dns_version, - &policy, + &input, "test_blueprint2", &collection, ) @@ -647,7 +658,7 @@ mod test { assert_eq!(sled_changes.zones_removed().len(), 0); assert_eq!(sled_changes.zones_modified().count(), 0); let zones = sled_changes.zones_added().collect::>(); - assert_eq!(zones.len(), policy.target_nexus_zone_count - 1); + assert_eq!(zones.len(), input.policy.target_nexus_zone_count - 1); for zone in &zones { if !zone.config.zone_type.is_nexus() { panic!("unexpectedly added a non-Nexus zone: {zone:?}"); @@ -666,7 +677,7 @@ mod test { let logctx = test_setup_log(TEST_NAME); // Use our example inventory collection as a starting point. - let (collection, mut policy) = + let (collection, mut input) = example(&logctx.log, TEST_NAME, DEFAULT_N_SLEDS); // Build the initial blueprint. @@ -675,7 +686,7 @@ mod test { &collection, Generation::new(), Generation::new(), - &policy, + &input.policy, "the_test", (TEST_NAME, "bp1"), ) @@ -695,13 +706,13 @@ mod test { } // Now run the planner with a high number of target Nexus zones. - policy.target_nexus_zone_count = 14; + input.policy.target_nexus_zone_count = 14; let blueprint2 = Planner::new_based_on( logctx.log.clone(), &blueprint1, Generation::new(), Generation::new(), - &policy, + &input, "test_blueprint2", &collection, ) @@ -758,7 +769,7 @@ mod test { // and decommissioned sleds. (When we add more kinds of // non-provisionable states in the future, we'll have to add more // sleds.) - let (collection, mut policy) = example(&logctx.log, TEST_NAME, 5); + let (collection, mut input) = example(&logctx.log, TEST_NAME, 5); // Build the initial blueprint. let blueprint1 = @@ -766,7 +777,7 @@ mod test { &collection, Generation::new(), Generation::new(), - &policy, + &input.policy, "the_test", (TEST_NAME, "bp1"), ) @@ -787,7 +798,7 @@ mod test { // Arbitrarily choose some of the sleds and mark them non-provisionable // in various ways. - let mut sleds_iter = policy.sleds.iter_mut(); + let mut sleds_iter = input.policy.sleds.iter_mut(); let nonprovisionable_sled_id = { let (sled_id, resources) = sleds_iter.next().expect("no sleds"); @@ -817,13 +828,13 @@ mod test { // // When the planner gets smarter about removing zones from expunged // and/or removed sleds, we'll have to adjust this number. - policy.target_nexus_zone_count = 16; + input.policy.target_nexus_zone_count = 16; let mut blueprint2 = Planner::new_based_on( logctx.log.clone(), &blueprint1, Generation::new(), Generation::new(), - &policy, + &input, "test_blueprint2", &collection, ) diff --git a/nexus/src/app/deployment.rs b/nexus/src/app/deployment.rs index b38508d74c..9e926b202a 100644 --- a/nexus/src/app/deployment.rs +++ b/nexus/src/app/deployment.rs @@ -14,7 +14,9 @@ use nexus_types::deployment::Blueprint; use nexus_types::deployment::BlueprintMetadata; use nexus_types::deployment::BlueprintTarget; use nexus_types::deployment::BlueprintTargetSet; -use nexus_types::deployment::Policy; +use nexus_types::deployment::ExternalIp; +use nexus_types::deployment::PlanningInput; +use nexus_types::deployment::ServiceNetworkInterface; use nexus_types::inventory::Collection; use omicron_common::address::NEXUS_REDUNDANCY; use omicron_common::api::external::CreateResult; @@ -26,12 +28,15 @@ use omicron_common::api::external::InternalContext; use omicron_common::api::external::ListResultVec; use omicron_common::api::external::LookupResult; use omicron_common::api::external::LookupType; +use omicron_uuid_kinds::GenericUuid; +use omicron_uuid_kinds::OmicronZoneKind; +use omicron_uuid_kinds::TypedUuid; use slog_error_chain::InlineErrorChain; use uuid::Uuid; /// Common structure for collecting information that the planner needs struct PlanningContext { - policy: Policy, + planning_input: PlanningInput, creator: String, inventory: Option, internal_dns_version: Generation, @@ -151,6 +156,49 @@ impl super::Nexus { NEXUS_REDUNDANCY, )?; + let service_external_ips = datastore + .external_ip_list_service_all_batched(opctx) + .await? + .into_iter() + .filter_map(|external_ip| { + if !external_ip.is_service { + error!( + opctx.log, + "non-service external IP returned by service IP query"; + "external-ip" => ?external_ip, + ); + return None; + } + let Some(service_id) = external_ip.parent_id else { + error!( + opctx.log, + "service external IP with no parent ID set"; + "external-ip" => ?external_ip, + ); + return None; + }; + Some(( + TypedUuid::::from_untyped_uuid(service_id), + ExternalIp::from(external_ip), + )) + }) + .collect(); + let service_nics = datastore + .service_network_interfaces_all_list_batched(opctx) + .await? + .into_iter() + .map(|nic| { + ( + TypedUuid::::from_untyped_uuid( + nic.service_id, + ), + ServiceNetworkInterface::from(nic), + ) + }) + .collect(); + let planning_input = + PlanningInput { policy, service_external_ips, service_nics }; + // The choice of which inventory collection to use here is not // necessarily trivial. Inventory collections may be incomplete due to // transient (or even persistent) errors. It's not yet clear what @@ -186,8 +234,8 @@ impl super::Nexus { )?; Ok(PlanningContext { + planning_input, creator, - policy, inventory, internal_dns_version: *internal_dns_version.version, external_dns_version: *external_dns_version.version, @@ -216,7 +264,7 @@ impl super::Nexus { &collection, planning_context.internal_dns_version, planning_context.external_dns_version, - &planning_context.policy, + &planning_context.planning_input.policy, &planning_context.creator, ) .map_err(|error| { @@ -252,7 +300,7 @@ impl super::Nexus { &parent_blueprint, planning_context.internal_dns_version, planning_context.external_dns_version, - &planning_context.policy, + &planning_context.planning_input, &planning_context.creator, &inventory, ) diff --git a/nexus/types/Cargo.toml b/nexus/types/Cargo.toml index aff45d07de..68b1444cc1 100644 --- a/nexus/types/Cargo.toml +++ b/nexus/types/Cargo.toml @@ -10,6 +10,7 @@ chrono.workspace = true base64.workspace = true futures.workspace = true humantime.workspace = true +ipnetwork.workspace = true omicron-uuid-kinds.workspace = true openssl.workspace = true parse-display.workspace = true @@ -21,6 +22,7 @@ steno.workspace = true strum.workspace = true tabled.workspace = true thiserror.workspace = true +newtype-uuid.workspace = true uuid.workspace = true api_identity.workspace = true diff --git a/nexus/types/src/deployment.rs b/nexus/types/src/deployment.rs index 4c4f3823c6..bed66adaca 100644 --- a/nexus/types/src/deployment.rs +++ b/nexus/types/src/deployment.rs @@ -22,10 +22,14 @@ pub use crate::inventory::OmicronZoneType; pub use crate::inventory::OmicronZonesConfig; pub use crate::inventory::SourceNatConfig; pub use crate::inventory::ZpoolName; +use ipnetwork::IpNetwork; +use newtype_uuid::TypedUuid; use omicron_common::address::IpRange; use omicron_common::address::Ipv6Subnet; use omicron_common::address::SLED_PREFIX; use omicron_common::api::external::Generation; +use omicron_common::api::external::MacAddr; +use omicron_uuid_kinds::OmicronZoneKind; use schemars::JsonSchema; use serde::Deserialize; use serde::Serialize; @@ -101,6 +105,48 @@ impl SledResources { } } +/// Policy and database inputs to the Reconfigurator planner +/// +/// The primary inputs to the planner are the parent (either a parent blueprint +/// or an inventory collection) and this structure. This type holds the +/// fleet-wide policy as well as any additional information fetched from CRDB +/// that the planner needs to make decisions. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct PlanningInput { + /// fleet-wide policy + pub policy: Policy, + + /// external IPs allocated to services + pub service_external_ips: BTreeMap, ExternalIp>, + + /// vNICs allocated to services + pub service_nics: + BTreeMap, ServiceNetworkInterface>, +} + +/// External IP allocated to a service +/// +/// This is a slimmer `nexus_db_model::ExternalIp` that only stores the fields +/// necessary for blueprint planning. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ExternalIp { + pub id: Uuid, + pub ip: IpNetwork, +} + +/// Network interface allocated to a service +/// +/// This is a slimmer `nexus_db_model::ServiceNetworkInterface` that only stores +/// the fields necessary for blueprint planning. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ServiceNetworkInterface { + pub id: Uuid, + pub mac: MacAddr, + pub ip: IpNetwork, + pub slot: u8, + pub primary: bool, +} + /// Describes a complete set of software and configuration for the system // Blueprints are a fundamental part of how the system modifies itself. Each // blueprint completely describes all of the software and configuration diff --git a/uuid-kinds/src/lib.rs b/uuid-kinds/src/lib.rs index 7018485b59..17aa803d13 100644 --- a/uuid-kinds/src/lib.rs +++ b/uuid-kinds/src/lib.rs @@ -48,6 +48,7 @@ impl_typed_uuid_kind! { DownstairsKind => "downstairs", DownstairsRegionKind => "downstairs_region", LoopbackAddressKind => "loopback_address", + OmicronZoneKind => "service", TufRepoKind => "tuf_repo", UpstairsKind => "upstairs", UpstairsRepairKind => "upstairs_repair", From aa671fe76d5dab29938ed30a9c85a6b599f3c593 Mon Sep 17 00:00:00 2001 From: bnaecker Date: Mon, 1 Apr 2024 12:09:13 -0700 Subject: [PATCH 021/334] Deleting a nonexistent producer should succeed (#5366) --- oximeter/collector/src/agent.rs | 29 +++++++++++++++++++++++++++-- oximeter/collector/src/lib.rs | 11 +---------- 2 files changed, 28 insertions(+), 12 deletions(-) diff --git a/oximeter/collector/src/agent.rs b/oximeter/collector/src/agent.rs index 33146b3579..d3ed56a7a7 100644 --- a/oximeter/collector/src/agent.rs +++ b/oximeter/collector/src/agent.rs @@ -679,8 +679,10 @@ impl OximeterAgent { >, id: Uuid, ) -> Result<(), Error> { - let (_info, task) = - tasks.remove(&id).ok_or_else(|| Error::NoSuchProducer(id))?; + let Some((_info, task)) = tasks.remove(&id) else { + // We have no such producer, so good news, we've removed it! + return Ok(()); + }; debug!( self.log, "removed collection task from set"; @@ -1121,4 +1123,27 @@ mod tests { assert_eq!(stats.failed_collections.len(), 1); logctx.cleanup_successful(); } + + #[tokio::test] + async fn test_delete_nonexistent_producer_succeeds() { + let logctx = + test_setup_log("test_delete_nonexistent_producer_succeeds"); + let log = &logctx.log; + + // Spawn an oximeter collector ... + let collector = OximeterAgent::new_standalone( + Uuid::new_v4(), + SocketAddrV6::new(Ipv6Addr::LOCALHOST, 0, 0, 0), + crate::default_refresh_interval(), + None, + log, + ) + .await + .unwrap(); + assert!( + collector.delete_producer(Uuid::new_v4()).await.is_ok(), + "Deleting a non-existent producer should be OK" + ); + logctx.cleanup_successful(); + } } diff --git a/oximeter/collector/src/lib.rs b/oximeter/collector/src/lib.rs index 596c0dc785..fa699d67d8 100644 --- a/oximeter/collector/src/lib.rs +++ b/oximeter/collector/src/lib.rs @@ -60,22 +60,13 @@ pub enum Error { #[error(transparent)] ResolveError(#[from] ResolveError), - #[error("No producer is registered with ID")] - NoSuchProducer(Uuid), - #[error("Error running standalone")] Standalone(#[from] anyhow::Error), } impl From for HttpError { fn from(e: Error) -> Self { - match e { - Error::NoSuchProducer(id) => HttpError::for_not_found( - None, - format!("No such producer: {id}"), - ), - _ => HttpError::for_internal_error(e.to_string()), - } + HttpError::for_internal_error(e.to_string()) } } From afb2e9aee43ce9463404db0d7f58378224960c69 Mon Sep 17 00:00:00 2001 From: bnaecker Date: Mon, 1 Apr 2024 12:37:33 -0700 Subject: [PATCH 022/334] Oximeter producer registration should be more lax about success (#5340) As part of https://github.com/oxidecomputer/omicron/issues/5284, we'd like to change the Nexus API used to register as a metric producer. It currently returns a 204 and no data, and we'd like it to return a 201 and a lease duration. However, if we change the server API first, Progenitor-based clients will see an unexpected response, which the `oximeter-producer` registration method currently returns as an error. This commit relaxes the registration method, so that it still succeeds if it finds an unexpected-but-successful response from Nexus. This will let clients handle the desired changes on the server even before they truly recognize the new response as valid. --- oximeter/producer/src/lib.rs | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/oximeter/producer/src/lib.rs b/oximeter/producer/src/lib.rs index 3fecaadf4f..f576c96069 100644 --- a/oximeter/producer/src/lib.rs +++ b/oximeter/producer/src/lib.rs @@ -251,8 +251,16 @@ pub async fn register( ) -> Result<(), Error> { let client = nexus_client::Client::new(&format!("http://{}", address), log.clone()); - client.cpapi_producers_post(&server_info.into()).await.map(|_| ()).map_err( - |err| { + match client.cpapi_producers_post(&server_info.into()).await { + Ok(_) => Ok(()), + // Convert any unexpected-but-successful response to an Ok. + // See https://github.com/oxidecomputer/omicron/issues/5284 for details. + Err(nexus_client::Error::UnexpectedResponse(resp)) + if resp.status().is_success() => + { + Ok(()) + } + Err(err) => { let retryable = match &err { nexus_client::Error::CommunicationError(..) => true, nexus_client::Error::ErrorResponse(resp) => { @@ -261,9 +269,9 @@ pub async fn register( _ => false, }; let msg = err.to_string(); - Error::RegistrationError { retryable, msg } - }, - ) + Err(Error::RegistrationError { retryable, msg }) + } + } } /// Handle a request to pull available metric data from a [`ProducerRegistry`]. From ac11458f59cdaf0c79dc93e17c47c41416eb21da Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Mon, 1 Apr 2024 15:43:46 -0700 Subject: [PATCH 023/334] [nexus] Sled Expungement causes PhysicalDisk expungement (#5371) Fixes https://github.com/oxidecomputer/omicron/issues/5369 This updates the "expunge sled" endpoint which is part of the internal API, but the same implementation should still apply when the endpoint is exposed to operators. --- nexus/db-queries/src/db/datastore/sled.rs | 280 +++++++++++++++++----- 1 file changed, 220 insertions(+), 60 deletions(-) diff --git a/nexus/db-queries/src/db/datastore/sled.rs b/nexus/db-queries/src/db/datastore/sled.rs index 93d3d0e6a2..ad9edc063c 100644 --- a/nexus/db-queries/src/db/datastore/sled.rs +++ b/nexus/db-queries/src/db/datastore/sled.rs @@ -329,6 +329,9 @@ impl DataStore { /// sufficient warning to the operator. /// /// This is idempotent, and it returns the old policy of the sled. + /// + /// Calling this function also implicitly marks the disks attached to a sled + /// as "expunged". pub async fn sled_set_policy_to_expunged( &self, opctx: &OpContext, @@ -348,73 +351,127 @@ impl DataStore { &self, opctx: &OpContext, authz_sled: &authz::Sled, - new_policy: SledPolicy, + new_sled_policy: SledPolicy, check: ValidateTransition, ) -> Result { - use db::schema::sled::dsl; - opctx.authorize(authz::Action::Modify, authz_sled).await?; let sled_id = authz_sled.id(); - let query = diesel::update(dsl::sled) - .filter(dsl::time_deleted.is_null()) - .filter(dsl::id.eq(sled_id)); - - let t = SledTransition::Policy(new_policy); - let valid_old_policies = t.valid_old_policies(); - let valid_old_states = t.valid_old_states(); - - let query = match check { - ValidateTransition::Yes => query - .filter(dsl::sled_policy.eq_any( - valid_old_policies.into_iter().map(to_db_sled_policy), - )) - .filter( - dsl::sled_state.eq_any(valid_old_states.iter().copied()), - ) - .into_boxed(), - #[cfg(test)] - ValidateTransition::No => query.into_boxed(), - }; + let err = OptionalError::new(); + let conn = self.pool_connection_authorized(opctx).await?; + let policy = self + .transaction_retry_wrapper("sled_set_policy") + .transaction(&conn, |conn| { + let err = err.clone(); - let query = query - .set(( - dsl::sled_policy.eq(to_db_sled_policy(new_policy)), - dsl::time_modified.eq(Utc::now()), - )) - .check_if_exists::(sled_id); + async move { + let t = SledTransition::Policy(new_sled_policy); + let valid_old_policies = t.valid_old_policies(); + let valid_old_states = t.valid_old_states(); + + use db::schema::sled::dsl; + let query = diesel::update(dsl::sled) + .filter(dsl::time_deleted.is_null()) + .filter(dsl::id.eq(sled_id)); + + let query = match check { + ValidateTransition::Yes => query + .filter( + dsl::sled_policy.eq_any( + valid_old_policies + .into_iter() + .map(to_db_sled_policy), + ), + ) + .filter( + dsl::sled_state + .eq_any(valid_old_states.iter().copied()), + ) + .into_boxed(), + #[cfg(test)] + ValidateTransition::No => query.into_boxed(), + }; + + let query = query + .set(( + dsl::sled_policy + .eq(to_db_sled_policy(new_sled_policy)), + dsl::time_modified.eq(Utc::now()), + )) + .check_if_exists::(sled_id); + + let result = query.execute_and_check(&conn).await?; + + let old_policy = match (check, result.status) { + (ValidateTransition::Yes, UpdateStatus::Updated) => { + result.found.policy() + } + ( + ValidateTransition::Yes, + UpdateStatus::NotUpdatedButExists, + ) => { + // Two reasons this can happen: + // 1. An idempotent update: this is treated as a success. + // 2. Invalid state transition: a failure. + // + // To differentiate between the two, check that the new policy + // is the same as the old policy, and that the old state is + // valid. + if result.found.policy() == new_sled_policy + && valid_old_states + .contains(&result.found.state()) + { + result.found.policy() + } else { + return Err(err.bail( + TransitionError::InvalidTransition { + current: result.found, + transition: SledTransition::Policy( + new_sled_policy, + ), + }, + )); + } + } + #[cfg(test)] + (ValidateTransition::No, _) => result.found.policy(), + }; + + // When a sled is expunged, the associated disks with that + // sled should also be implicitly set to expunged. + let new_disk_policy = match new_sled_policy { + SledPolicy::InService { .. } => None, + SledPolicy::Expunged => { + Some(nexus_db_model::PhysicalDiskPolicy::Expunged) + } + }; + if let Some(new_disk_policy) = new_disk_policy { + use db::schema::physical_disk::dsl as physical_disk_dsl; + diesel::update(physical_disk_dsl::physical_disk) + .filter(physical_disk_dsl::time_deleted.is_null()) + .filter(physical_disk_dsl::sled_id.eq(sled_id)) + .set( + physical_disk_dsl::disk_policy + .eq(new_disk_policy), + ) + .execute_async(&conn) + .await?; + } - let result = query - .execute_and_check(&*self.pool_connection_authorized(opctx).await?) + Ok(old_policy) + } + }) .await - .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; - - match (check, result.status) { - (ValidateTransition::Yes, UpdateStatus::Updated) => { - Ok(result.found.policy()) - } - (ValidateTransition::Yes, UpdateStatus::NotUpdatedButExists) => { - // Two reasons this can happen: - // 1. An idempotent update: this is treated as a success. - // 2. Invalid state transition: a failure. - // - // To differentiate between the two, check that the new policy - // is the same as the old policy, and that the old state is - // valid. - if result.found.policy() == new_policy - && valid_old_states.contains(&result.found.state()) - { - Ok(result.found.policy()) - } else { - Err(TransitionError::InvalidTransition { - current: result.found, - transition: SledTransition::Policy(new_policy), - }) + .map_err(|e| { + if let Some(err) = err.take() { + return err; } - } - #[cfg(test)] - (ValidateTransition::No, _) => Ok(result.found.policy()), - } + TransitionError::from(public_error_from_diesel( + e, + ErrorHandler::Server, + )) + })?; + Ok(policy) } /// Marks the state of the sled as decommissioned, as believed by Nexus. @@ -675,6 +732,9 @@ mod test { use anyhow::{Context, Result}; use itertools::Itertools; use nexus_db_model::Generation; + use nexus_db_model::PhysicalDisk; + use nexus_db_model::PhysicalDiskKind; + use nexus_db_model::PhysicalDiskPolicy; use nexus_test_utils::db::test_setup_database; use nexus_types::identity::Asset; use omicron_common::api::external; @@ -967,6 +1027,107 @@ mod test { logctx.cleanup_successful(); } + async fn lookup_physical_disk( + datastore: &DataStore, + id: Uuid, + ) -> PhysicalDisk { + use db::schema::physical_disk::dsl; + dsl::physical_disk + .filter(dsl::id.eq(id)) + .filter(dsl::time_deleted.is_null()) + .select(PhysicalDisk::as_select()) + .get_result_async( + &*datastore + .pool_connection_for_tests() + .await + .expect("No connection"), + ) + .await + .expect("Failed to lookup physical disk") + } + + #[tokio::test] + async fn test_sled_expungement_also_expunges_disks() { + let logctx = + dev::test_setup_log("test_sled_expungement_also_expunges_disks"); + let mut db = test_setup_database(&logctx.log).await; + + let (opctx, datastore) = datastore_test(&logctx, &db).await; + + // Set up a sled to test against. + let sled = datastore.sled_upsert(test_new_sled_update()).await.unwrap(); + let sled_id = sled.id(); + + // Add a couple disks to this sled. + // + // (Note: This isn't really enough DB fakery to actually provision e.g. + // Crucible regions, but it creates enough of a control plane object to + // be associated with the Sled by UUID) + let disk1 = PhysicalDisk::new( + Uuid::new_v4(), + "vendor1".to_string(), + "serial1".to_string(), + "model1".to_string(), + PhysicalDiskKind::U2, + sled_id, + ); + let disk2 = PhysicalDisk::new( + Uuid::new_v4(), + "vendor2".to_string(), + "serial2".to_string(), + "model2".to_string(), + PhysicalDiskKind::U2, + sled_id, + ); + + datastore + .physical_disk_upsert(&opctx, disk1.clone()) + .await + .expect("Failed to upsert physical disk"); + datastore + .physical_disk_upsert(&opctx, disk2.clone()) + .await + .expect("Failed to upsert physical disk"); + + // Confirm the disks are "in-service". + // + // We verify this state because it should be changing below. + assert_eq!( + PhysicalDiskPolicy::InService, + lookup_physical_disk(&datastore, disk1.id()).await.disk_policy + ); + assert_eq!( + PhysicalDiskPolicy::InService, + lookup_physical_disk(&datastore, disk2.id()).await.disk_policy + ); + + // Expunge the sled. As a part of this process, the query should UPDATE + // the physical_disk table. + sled_set_policy( + &opctx, + &datastore, + sled_id, + SledPolicy::Expunged, + ValidateTransition::Yes, + Expected::Ok(SledPolicy::provisionable()), + ) + .await + .expect("Could not expunge sled"); + + // Observe that the disk state is now expunged + assert_eq!( + PhysicalDiskPolicy::Expunged, + lookup_physical_disk(&datastore, disk1.id()).await.disk_policy + ); + assert_eq!( + PhysicalDiskPolicy::Expunged, + lookup_physical_disk(&datastore, disk2.id()).await.disk_policy + ); + + db.cleanup().await.unwrap(); + logctx.cleanup_successful(); + } + #[tokio::test] async fn test_sled_transitions() { // Test valid and invalid state and policy transitions. @@ -1199,8 +1360,7 @@ mod test { /// Tests listing large numbers of sleds via the batched interface #[tokio::test] async fn sled_list_batch() { - let logctx = - dev::test_setup_log("sled_reservation_create_non_provisionable"); + let logctx = dev::test_setup_log("sled_list_batch"); let mut db = test_setup_database(&logctx.log).await; let (opctx, datastore) = datastore_test(&logctx, &db).await; From bb26b0a0e54c0d1c67a5b5dffb361586578b81df Mon Sep 17 00:00:00 2001 From: Alan Hanson Date: Mon, 1 Apr 2024 21:50:41 -0700 Subject: [PATCH 024/334] New crucible and propolis (#5374) Crucible Increase max backpressure again (Crucible issue #1243) Bump backpressure delay for byte-based backpressure (Crucible issue #1240) `crutest` cleaning; adding rand-read/write tests (Crucible issue #1233) Propolis changes, just the above crucible bits. Co-authored-by: Alan Hanson --- Cargo.lock | 26 +++++++++++++------------- Cargo.toml | 12 ++++++------ package-manifest.toml | 12 ++++++------ 3 files changed, 25 insertions(+), 25 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 3959eef5e3..4d4bd26265 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -491,9 +491,9 @@ dependencies = [ [[package]] name = "bhyve_api" version = "0.0.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=9b2deee64874b315427962b1c7fccceef99436b2#9b2deee64874b315427962b1c7fccceef99436b2" +source = "git+https://github.com/oxidecomputer/propolis?rev=84e423bfd3bf84ebb04acb95cf7600731e9f361f#84e423bfd3bf84ebb04acb95cf7600731e9f361f" dependencies = [ - "bhyve_api_sys 0.0.0 (git+https://github.com/oxidecomputer/propolis?rev=9b2deee64874b315427962b1c7fccceef99436b2)", + "bhyve_api_sys 0.0.0 (git+https://github.com/oxidecomputer/propolis?rev=84e423bfd3bf84ebb04acb95cf7600731e9f361f)", "libc", "strum 0.26.1", ] @@ -510,7 +510,7 @@ dependencies = [ [[package]] name = "bhyve_api_sys" version = "0.0.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=9b2deee64874b315427962b1c7fccceef99436b2#9b2deee64874b315427962b1c7fccceef99436b2" +source = "git+https://github.com/oxidecomputer/propolis?rev=84e423bfd3bf84ebb04acb95cf7600731e9f361f#84e423bfd3bf84ebb04acb95cf7600731e9f361f" dependencies = [ "libc", "strum 0.26.1", @@ -1418,7 +1418,7 @@ dependencies = [ [[package]] name = "crucible-agent-client" version = "0.0.1" -source = "git+https://github.com/oxidecomputer/crucible?rev=09bcfa6b9201f75891a5413928bb088cc150d319#09bcfa6b9201f75891a5413928bb088cc150d319" +source = "git+https://github.com/oxidecomputer/crucible?rev=4661c23b248da18862012cf55af21b17b79a468e#4661c23b248da18862012cf55af21b17b79a468e" dependencies = [ "anyhow", "chrono", @@ -1434,7 +1434,7 @@ dependencies = [ [[package]] name = "crucible-pantry-client" version = "0.0.1" -source = "git+https://github.com/oxidecomputer/crucible?rev=09bcfa6b9201f75891a5413928bb088cc150d319#09bcfa6b9201f75891a5413928bb088cc150d319" +source = "git+https://github.com/oxidecomputer/crucible?rev=4661c23b248da18862012cf55af21b17b79a468e#4661c23b248da18862012cf55af21b17b79a468e" dependencies = [ "anyhow", "chrono", @@ -1451,7 +1451,7 @@ dependencies = [ [[package]] name = "crucible-smf" version = "0.0.0" -source = "git+https://github.com/oxidecomputer/crucible?rev=09bcfa6b9201f75891a5413928bb088cc150d319#09bcfa6b9201f75891a5413928bb088cc150d319" +source = "git+https://github.com/oxidecomputer/crucible?rev=4661c23b248da18862012cf55af21b17b79a468e#4661c23b248da18862012cf55af21b17b79a468e" dependencies = [ "crucible-workspace-hack", "libc", @@ -3539,7 +3539,7 @@ version = "0.1.0" dependencies = [ "anyhow", "async-trait", - "bhyve_api 0.0.0 (git+https://github.com/oxidecomputer/propolis?rev=9b2deee64874b315427962b1c7fccceef99436b2)", + "bhyve_api 0.0.0 (git+https://github.com/oxidecomputer/propolis?rev=84e423bfd3bf84ebb04acb95cf7600731e9f361f)", "byteorder", "camino", "camino-tempfile", @@ -5462,7 +5462,7 @@ dependencies = [ "pq-sys", "pretty_assertions", "progenitor-client", - "propolis-client 0.1.0 (git+https://github.com/oxidecomputer/propolis?rev=9b2deee64874b315427962b1c7fccceef99436b2)", + "propolis-client 0.1.0 (git+https://github.com/oxidecomputer/propolis?rev=84e423bfd3bf84ebb04acb95cf7600731e9f361f)", "rand 0.8.5", "rcgen", "ref-cast", @@ -5674,7 +5674,7 @@ dependencies = [ "oximeter-instruments", "oximeter-producer", "pretty_assertions", - "propolis-client 0.1.0 (git+https://github.com/oxidecomputer/propolis?rev=9b2deee64874b315427962b1c7fccceef99436b2)", + "propolis-client 0.1.0 (git+https://github.com/oxidecomputer/propolis?rev=84e423bfd3bf84ebb04acb95cf7600731e9f361f)", "propolis-mock-server", "rand 0.8.5", "rcgen", @@ -7115,7 +7115,7 @@ dependencies = [ [[package]] name = "propolis-client" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=9b2deee64874b315427962b1c7fccceef99436b2#9b2deee64874b315427962b1c7fccceef99436b2" +source = "git+https://github.com/oxidecomputer/propolis?rev=84e423bfd3bf84ebb04acb95cf7600731e9f361f#84e423bfd3bf84ebb04acb95cf7600731e9f361f" dependencies = [ "async-trait", "base64 0.21.7", @@ -7136,7 +7136,7 @@ dependencies = [ [[package]] name = "propolis-mock-server" version = "0.0.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=9b2deee64874b315427962b1c7fccceef99436b2#9b2deee64874b315427962b1c7fccceef99436b2" +source = "git+https://github.com/oxidecomputer/propolis?rev=84e423bfd3bf84ebb04acb95cf7600731e9f361f#84e423bfd3bf84ebb04acb95cf7600731e9f361f" dependencies = [ "anyhow", "atty", @@ -7146,7 +7146,7 @@ dependencies = [ "futures", "hyper 0.14.28", "progenitor", - "propolis_types 0.0.0 (git+https://github.com/oxidecomputer/propolis?rev=9b2deee64874b315427962b1c7fccceef99436b2)", + "propolis_types 0.0.0 (git+https://github.com/oxidecomputer/propolis?rev=84e423bfd3bf84ebb04acb95cf7600731e9f361f)", "rand 0.8.5", "reqwest", "schemars", @@ -7187,7 +7187,7 @@ dependencies = [ [[package]] name = "propolis_types" version = "0.0.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=9b2deee64874b315427962b1c7fccceef99436b2#9b2deee64874b315427962b1c7fccceef99436b2" +source = "git+https://github.com/oxidecomputer/propolis?rev=84e423bfd3bf84ebb04acb95cf7600731e9f361f#84e423bfd3bf84ebb04acb95cf7600731e9f361f" dependencies = [ "schemars", "serde", diff --git a/Cargo.toml b/Cargo.toml index 0d66583a82..07ffaf172b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -197,9 +197,9 @@ cookie = "0.18" criterion = { version = "0.5.1", features = [ "async_tokio" ] } crossbeam = "0.8" crossterm = { version = "0.27.0", features = ["event-stream"] } -crucible-agent-client = { git = "https://github.com/oxidecomputer/crucible", rev = "09bcfa6b9201f75891a5413928bb088cc150d319" } -crucible-pantry-client = { git = "https://github.com/oxidecomputer/crucible", rev = "09bcfa6b9201f75891a5413928bb088cc150d319" } -crucible-smf = { git = "https://github.com/oxidecomputer/crucible", rev = "09bcfa6b9201f75891a5413928bb088cc150d319" } +crucible-agent-client = { git = "https://github.com/oxidecomputer/crucible", rev = "4661c23b248da18862012cf55af21b17b79a468e" } +crucible-pantry-client = { git = "https://github.com/oxidecomputer/crucible", rev = "4661c23b248da18862012cf55af21b17b79a468e" } +crucible-smf = { git = "https://github.com/oxidecomputer/crucible", rev = "4661c23b248da18862012cf55af21b17b79a468e" } csv = "1.3.0" curve25519-dalek = "4" datatest-stable = "0.2.3" @@ -339,9 +339,9 @@ prettyplease = { version = "0.2.16", features = ["verbatim"] } proc-macro2 = "1.0" progenitor = { git = "https://github.com/oxidecomputer/progenitor", branch = "main" } progenitor-client = { git = "https://github.com/oxidecomputer/progenitor", branch = "main" } -bhyve_api = { git = "https://github.com/oxidecomputer/propolis", rev = "9b2deee64874b315427962b1c7fccceef99436b2" } -propolis-client = { git = "https://github.com/oxidecomputer/propolis", rev = "9b2deee64874b315427962b1c7fccceef99436b2" } -propolis-mock-server = { git = "https://github.com/oxidecomputer/propolis", rev = "9b2deee64874b315427962b1c7fccceef99436b2" } +bhyve_api = { git = "https://github.com/oxidecomputer/propolis", rev = "84e423bfd3bf84ebb04acb95cf7600731e9f361f" } +propolis-client = { git = "https://github.com/oxidecomputer/propolis", rev = "84e423bfd3bf84ebb04acb95cf7600731e9f361f" } +propolis-mock-server = { git = "https://github.com/oxidecomputer/propolis", rev = "84e423bfd3bf84ebb04acb95cf7600731e9f361f" } proptest = "1.4.0" quote = "1.0" rand = "0.8.5" diff --git a/package-manifest.toml b/package-manifest.toml index 806156b7ed..b8d1727432 100644 --- a/package-manifest.toml +++ b/package-manifest.toml @@ -492,10 +492,10 @@ only_for_targets.image = "standard" # 3. Use source.type = "manual" instead of "prebuilt" source.type = "prebuilt" source.repo = "crucible" -source.commit = "09bcfa6b9201f75891a5413928bb088cc150d319" +source.commit = "4661c23b248da18862012cf55af21b17b79a468e" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/crucible/image//crucible.sha256.txt -source.sha256 = "32a0cc78b436679ed9966564e5a7c0214d67f56c4a5fbac0a5b9507d99752b15" +source.sha256 = "14e607d04234a6749e981c8049437523dbc75494938541822e31ea61090800bf" output.type = "zone" output.intermediate_only = true @@ -504,10 +504,10 @@ service_name = "crucible_pantry_prebuilt" only_for_targets.image = "standard" source.type = "prebuilt" source.repo = "crucible" -source.commit = "09bcfa6b9201f75891a5413928bb088cc150d319" +source.commit = "4661c23b248da18862012cf55af21b17b79a468e" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/crucible/image//crucible-pantry.sha256.txt -source.sha256 = "99028aaac8c879e4855296ce0bde826ceb8f73504fadf0ded7674dcf45fb0446" +source.sha256 = "9a2181b43d7581468d075e37b5286e478ff008de65dd73b7f49a6e72bc9a43f5" output.type = "zone" output.intermediate_only = true @@ -519,10 +519,10 @@ service_name = "propolis-server" only_for_targets.image = "standard" source.type = "prebuilt" source.repo = "propolis" -source.commit = "9b2deee64874b315427962b1c7fccceef99436b2" +source.commit = "84e423bfd3bf84ebb04acb95cf7600731e9f361f" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/propolis/image//propolis-server.sha256.txt -source.sha256 = "b32be7167e0c10ebad874de011a752edcbf936cf55abdaddef7f40025beb9b6a" +source.sha256 = "db72c83b4c0a09e0759ec52e48a5589e9d732c3f390fb4c084f820d173b4f058" output.type = "zone" [package.mg-ddm-gz] From 64b288d94d6f3128be6d03764429e23d242a8b02 Mon Sep 17 00:00:00 2001 From: bnaecker Date: Mon, 1 Apr 2024 22:22:47 -0700 Subject: [PATCH 025/334] Fetch all entries when listing assigned producers (#5373) This works around a confusing interface in Progenitor, which lists the second argument to the `*_stream()` methods as a `limit`, but which describes it as a page size. It currently is the former, a total limit on all entries, so this change ensures we refresh our producer entire producer list. --- oximeter/collector/src/agent.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/oximeter/collector/src/agent.rs b/oximeter/collector/src/agent.rs index d3ed56a7a7..7b51b62921 100644 --- a/oximeter/collector/src/agent.rs +++ b/oximeter/collector/src/agent.rs @@ -36,7 +36,6 @@ use std::collections::btree_map::Entry; use std::collections::BTreeMap; use std::net::SocketAddr; use std::net::SocketAddrV6; -use std::num::NonZeroU32; use std::ops::Bound; use std::sync::Arc; use std::sync::Mutex as StdMutex; @@ -748,7 +747,6 @@ impl OximeterAgent { // A task which periodically updates our list of producers from Nexus. async fn refresh_producer_list(agent: OximeterAgent, resolver: Resolver) { let mut interval = tokio::time::interval(agent.refresh_interval); - let page_size = Some(NonZeroU32::new(100).unwrap()); loop { interval.tick().await; info!(agent.log, "refreshing list of producers from Nexus"); @@ -758,7 +756,9 @@ async fn refresh_producer_list(agent: OximeterAgent, resolver: Resolver) { let client = nexus_client::Client::new(&url, agent.log.clone()); let mut stream = client.cpapi_assigned_producers_list_stream( &agent.id, - page_size, + // This is a _total_ limit, not a page size, so `None` means "get + // all entries". + None, Some(IdSortMode::IdAscending), ); let mut expected_producers = BTreeMap::new(); From 8979f810f4c69e206389e158a3c230000f38ccc7 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Tue, 2 Apr 2024 11:28:09 -0700 Subject: [PATCH 026/334] Update Rust crate heck to 0.5 (#5263) Co-authored-by: oxide-renovate[bot] <146848827+oxide-renovate[bot]@users.noreply.github.com> --- Cargo.lock | 10 ++++++++-- Cargo.toml | 2 +- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 4d4bd26265..3c970bee53 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -359,7 +359,7 @@ name = "authz-macros" version = "0.1.0" dependencies = [ "expectorate", - "heck 0.4.1", + "heck 0.5.0", "nexus-macros-common", "omicron-workspace-hack", "prettyplease", @@ -1657,7 +1657,7 @@ name = "db-macros" version = "0.1.0" dependencies = [ "expectorate", - "heck 0.4.1", + "heck 0.5.0", "nexus-macros-common", "omicron-workspace-hack", "prettyplease", @@ -3109,6 +3109,12 @@ version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8" +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + [[package]] name = "hermit-abi" version = "0.1.19" diff --git a/Cargo.toml b/Cargo.toml index 07ffaf172b..25dc79baf2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -233,7 +233,7 @@ gethostname = "0.4.3" glob = "0.3.1" guppy = "0.17.5" headers = "0.3.9" -heck = "0.4" +heck = "0.5" hex = "0.4.3" hex-literal = "0.4.1" highway = "1.1.0" From bf5c4b5d0e4e21533accbb509a1f8160d3bfa3ca Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Tue, 2 Apr 2024 11:32:33 -0700 Subject: [PATCH 027/334] Update Rust crate tui-tree-widget to 0.19.0 (#5140) Co-authored-by: oxide-renovate[bot] <146848827+oxide-renovate[bot]@users.noreply.github.com> --- Cargo.lock | 4 ++-- Cargo.toml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 3c970bee53..8f37c0de72 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -10305,9 +10305,9 @@ dependencies = [ [[package]] name = "tui-tree-widget" -version = "0.17.0" +version = "0.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c317bb061f42d943a2eb118b5de0ee98fc2443f0631e54b24a19de014a28810" +checksum = "fb0c6f924587e719c50b8f83485afbe4d4c16edca6b641d5d9a3204edeba5cf0" dependencies = [ "ratatui", "unicode-width", diff --git a/Cargo.toml b/Cargo.toml index 25dc79baf2..56ffd7eb38 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -435,7 +435,7 @@ trust-dns-server = "0.22" trybuild = "1.0.89" tufaceous = { path = "tufaceous" } tufaceous-lib = { path = "tufaceous-lib" } -tui-tree-widget = "0.17.0" +tui-tree-widget = "0.19.0" typed-rng = { path = "typed-rng" } unicode-width = "0.1.11" update-common = { path = "update-common" } From cf22afcc2fe9adeb2788d7562b4d67b50082ba35 Mon Sep 17 00:00:00 2001 From: Rain Date: Tue, 2 Apr 2024 12:17:52 -0700 Subject: [PATCH 028/334] [oxlog] sort log files before printing them out (#5356) Found this while attempting to debug core files during today's mupdate -- I was confused about logs getting skipped for a bit. We should sort log files so they're printed in ascending order, disregarding the directories they're from. Sort by filename only, because log files can go in. Sorting by file name sorts by (service name, timestamp), which is the order we want 99% of the time. There is one wrinkle here, which is that for log files that are written before NTP sync, the timestamps are of the form: ``` /pool/ext/f522118c-5dcd-4116-8044-07f0cceec52e/crypt/debug/oxz_switch/oxide-wicketd:default.log.536112897 ``` But for log files written after NTP sync, they're of the form: ``` /pool/ext/f522118c-5dcd-4116-8044-07f0cceec52e/crypt/debug/oxz_switch/oxide-wicketd:default.log.1699421382 ``` The latter gets sorted before the former, which isn't ideal. But also it works fine and achieves the goal of keeping related logs together. (Note that sorting by file name is possible with `sort` but a bit hard. Something like `sort -t/ -k7` would work: the `7` here indicates the depth at which log files are found.) --- dev-tools/oxlog/src/lib.rs | 85 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 85 insertions(+) diff --git a/dev-tools/oxlog/src/lib.rs b/dev-tools/oxlog/src/lib.rs index 625d360368..0e72b4b13b 100644 --- a/dev-tools/oxlog/src/lib.rs +++ b/dev-tools/oxlog/src/lib.rs @@ -98,6 +98,10 @@ impl LogFile { } } } + + pub fn file_name_cmp(&self, other: &Self) -> std::cmp::Ordering { + self.path.file_name().cmp(&other.path.file_name()) + } } impl PartialEq for LogFile { @@ -142,6 +146,22 @@ pub struct SvcLogs { pub extra: Vec, } +impl SvcLogs { + /// Sort the archived and extra log files by filename. + /// + /// readdir traverses over directories in indeterminate order, so sort by + /// filename (which is enough to sort by service name and timestamp in most + /// cases). + /// + /// Generally we don't want to sort by full path, because log files may be + /// scattered across several different directories -- and we care more + /// about filename than which directory they are in. + pub fn sort_by_file_name(&mut self) { + self.archived.sort_unstable_by(LogFile::file_name_cmp); + self.extra.sort_unstable_by(LogFile::file_name_cmp); + } +} + // These probably don't warrant newtypes. They are just to make the // keys in maps a bit easier to read. type ZoneName = String; @@ -284,10 +304,19 @@ impl Zones { load_extra_logs(dir, svc_name, &mut output, filter.show_empty); } } + + sort_logs(&mut output); + output } } +fn sort_logs(output: &mut BTreeMap) { + for svc_logs in output.values_mut() { + svc_logs.sort_by_file_name(); + } +} + const OX_SMF_PREFIXES: [&str; 2] = ["oxide-", "system-illumos-"]; /// Return true if the provided file name appears to be a valid log file for an @@ -464,4 +493,60 @@ mod tests { ) .is_none()); } + + #[test] + fn test_sort_logs() { + use super::{LogFile, SvcLogs}; + use std::collections::BTreeMap; + + let mut logs = BTreeMap::new(); + logs.insert( + "blah".to_string(), + SvcLogs { + current: None, + archived: vec![ + // "foo" comes after "bar", but the sorted order should + // have 1600000000 before 1700000000. + LogFile { + path: "/bar/blah:default.log.1700000000".into(), + size: None, + modified: None, + }, + LogFile { + path: "/foo/blah:default.log.1600000000".into(), + size: None, + modified: None, + }, + ], + extra: vec![ + // "foo" comes after "bar", but the sorted order should + // have log1 before log2. + LogFile { + path: "/foo/blah/sub.default.log1".into(), + size: None, + modified: None, + }, + LogFile { + path: "/bar/blah/sub.default.log2".into(), + size: None, + modified: None, + }, + ], + }, + ); + + super::sort_logs(&mut logs); + + let svc_logs = logs.get("blah").unwrap(); + assert_eq!( + svc_logs.archived[0].path, + "/foo/blah:default.log.1600000000" + ); + assert_eq!( + svc_logs.archived[1].path, + "/bar/blah:default.log.1700000000" + ); + assert_eq!(svc_logs.extra[0].path, "/foo/blah/sub.default.log1"); + assert_eq!(svc_logs.extra[1].path, "/bar/blah/sub.default.log2"); + } } From 02873bd3ca51fa82017e3eb5686e49b70893b000 Mon Sep 17 00:00:00 2001 From: Rain Date: Tue, 2 Apr 2024 12:38:28 -0700 Subject: [PATCH 029/334] [oxlog] add sigpipe::reset (#5358) We have a number of CLI tools that panic when piped to things like `head` instead of quietly exiting. There's a long history about this within the Rust community (see https://github.com/rust-lang/rust/issues/62569), but the long and short of it is that SIGPIPE really should be set to its default handler (`SIG_DFL`, terminate the process) for CLI tools. Because oxlog doesn't make any network requests, reset the SIGPIPE handler to `SIG_DFL`. I looked at also potentially doing this for some of our other CLI tools that wait on network services. This should be fine to do if and only if whenever we send data over a socket, the `MSG_NOSIGNAL` flag is set. (This causes an `EPIPE` error to be returned, but no `SIGPIPE` signal to be generated.) Rust does set this flag [here]. **However, as of Rust 1.77 this flag is not set on illumos.** That's a bug and I'll fix it in Rust upstream. [here]: https://github.com/rust-lang/rust/blob/877d36b1928b5a4f7d193517b48290ecbe404d71/library/std/src/sys_common/net.rs#L32 --- Cargo.lock | 10 ++++++++++ Cargo.toml | 1 + dev-tools/oxlog/Cargo.toml | 1 + dev-tools/oxlog/src/bin/oxlog.rs | 2 ++ 4 files changed, 14 insertions(+) diff --git a/Cargo.lock b/Cargo.lock index 8f37c0de72..d2f1bf4df2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6302,6 +6302,7 @@ dependencies = [ "chrono", "clap 4.5.1", "omicron-workspace-hack", + "sigpipe", "uuid 1.7.0", ] @@ -8676,6 +8677,15 @@ dependencies = [ "rand_core 0.6.4", ] +[[package]] +name = "sigpipe" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5584bfb3e0d348139d8210285e39f6d2f8a1902ac06de343e06357d1d763d8e6" +dependencies = [ + "libc", +] + [[package]] name = "similar" version = "2.4.0" diff --git a/Cargo.toml b/Cargo.toml index 56ffd7eb38..7a0bcd93aa 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -378,6 +378,7 @@ sha3 = "0.10.8" shell-words = "1.1.0" signal-hook = "0.3" signal-hook-tokio = { version = "0.3", features = [ "futures-v0_3" ] } +sigpipe = "0.1.3" similar-asserts = "1.5.0" sled = "0.34" sled-agent-client = { path = "clients/sled-agent-client" } diff --git a/dev-tools/oxlog/Cargo.toml b/dev-tools/oxlog/Cargo.toml index 5d7cfaf5c1..3c3d983d09 100644 --- a/dev-tools/oxlog/Cargo.toml +++ b/dev-tools/oxlog/Cargo.toml @@ -9,6 +9,7 @@ anyhow.workspace = true camino.workspace = true chrono.workspace = true clap.workspace = true +sigpipe.workspace = true uuid.workspace = true omicron-workspace-hack.workspace = true diff --git a/dev-tools/oxlog/src/bin/oxlog.rs b/dev-tools/oxlog/src/bin/oxlog.rs index 88e067c382..ceeb98b3bd 100644 --- a/dev-tools/oxlog/src/bin/oxlog.rs +++ b/dev-tools/oxlog/src/bin/oxlog.rs @@ -57,6 +57,8 @@ struct FilterArgs { } fn main() -> Result<(), anyhow::Error> { + sigpipe::reset(); + let cli = Cli::parse(); match cli.command { From 1fc108deea70840504a229725072d6fc23570e1d Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Tue, 2 Apr 2024 19:41:05 +0000 Subject: [PATCH 030/334] Update Rust crate toml_edit to 0.22.9 (#5253) Co-authored-by: Rain --- Cargo.lock | 10 +++++----- Cargo.toml | 2 +- wicket/src/cli/rack_setup/config_toml.rs | 6 +++--- workspace-hack/Cargo.toml | 4 ++-- 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d2f1bf4df2..c2fff59a69 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5867,7 +5867,7 @@ dependencies = [ "toml 0.7.8", "toml_datetime", "toml_edit 0.19.15", - "toml_edit 0.22.6", + "toml_edit 0.22.9", "tracing", "trust-dns-proto", "unicode-bidi", @@ -9991,7 +9991,7 @@ dependencies = [ "serde", "serde_spanned", "toml_datetime", - "toml_edit 0.22.6", + "toml_edit 0.22.9", ] [[package]] @@ -10018,9 +10018,9 @@ dependencies = [ [[package]] name = "toml_edit" -version = "0.22.6" +version = "0.22.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c1b5fd4128cc8d3e0cb74d4ed9a9cc7c7284becd4df68f5f940e1ad123606f6" +checksum = "8e40bb779c5187258fd7aad0eb68cb8706a0a81fa712fbea808ab43c4b8374c4" dependencies = [ "indexmap 2.2.5", "serde", @@ -11038,7 +11038,7 @@ dependencies = [ "tokio", "tokio-util", "toml 0.8.10", - "toml_edit 0.22.6", + "toml_edit 0.22.9", "tui-tree-widget", "unicode-width", "update-engine", diff --git a/Cargo.toml b/Cargo.toml index 7a0bcd93aa..1d2457e25c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -427,7 +427,7 @@ tokio-stream = "0.1.14" tokio-tungstenite = "0.20" tokio-util = { version = "0.7.10", features = ["io", "io-util"] } toml = "0.8.10" -toml_edit = "0.22.6" +toml_edit = "0.22.9" tough = { version = "0.16.0", features = [ "http" ] } trust-dns-client = "0.22" trust-dns-proto = "0.22" diff --git a/wicket/src/cli/rack_setup/config_toml.rs b/wicket/src/cli/rack_setup/config_toml.rs index d050610c30..f898a8ece4 100644 --- a/wicket/src/cli/rack_setup/config_toml.rs +++ b/wicket/src/cli/rack_setup/config_toml.rs @@ -9,7 +9,7 @@ use serde::Serialize; use std::borrow::Cow; use std::fmt; use toml_edit::Array; -use toml_edit::Document; +use toml_edit::DocumentMut; use toml_edit::Formatted; use toml_edit::InlineTable; use toml_edit::Item; @@ -28,12 +28,12 @@ static TEMPLATE: &str = include_str!("config_template.toml"); const ARRAY_SEP: &str = "\n "; pub(super) struct TomlTemplate { - doc: Document, + doc: DocumentMut, } impl TomlTemplate { pub(crate) fn populate(config: &CurrentRssUserConfigInsensitive) -> Self { - let mut doc = TEMPLATE.parse::().unwrap(); + let mut doc = TEMPLATE.parse::().unwrap(); *doc.get_mut("external_dns_zone_name") .unwrap() diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index 659b10c721..9e3c7487d0 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -106,7 +106,7 @@ tokio-postgres = { version = "0.7.10", features = ["with-chrono-0_4", "with-serd tokio-stream = { version = "0.1.14", features = ["net"] } tokio-util = { version = "0.7.10", features = ["codec", "io-util"] } toml = { version = "0.7.8" } -toml_edit-3c51e837cfc5589a = { package = "toml_edit", version = "0.22.6", features = ["serde"] } +toml_edit-3c51e837cfc5589a = { package = "toml_edit", version = "0.22.9", features = ["serde"] } tracing = { version = "0.1.40", features = ["log"] } trust-dns-proto = { version = "0.22.0" } unicode-bidi = { version = "0.3.15" } @@ -213,7 +213,7 @@ tokio-postgres = { version = "0.7.10", features = ["with-chrono-0_4", "with-serd tokio-stream = { version = "0.1.14", features = ["net"] } tokio-util = { version = "0.7.10", features = ["codec", "io-util"] } toml = { version = "0.7.8" } -toml_edit-3c51e837cfc5589a = { package = "toml_edit", version = "0.22.6", features = ["serde"] } +toml_edit-3c51e837cfc5589a = { package = "toml_edit", version = "0.22.9", features = ["serde"] } tracing = { version = "0.1.40", features = ["log"] } trust-dns-proto = { version = "0.22.0" } unicode-bidi = { version = "0.3.15" } From fcd0f1305e72423763356d5c6887341a5e23c391 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Tue, 2 Apr 2024 20:13:28 +0000 Subject: [PATCH 031/334] Update Rust crate argon2alt to v2 (#4550) --- Cargo.lock | 5 ++--- passwords/Cargo.toml | 2 +- passwords/src/lib.rs | 1 - 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c2fff59a69..1176119063 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -7947,14 +7947,13 @@ dependencies = [ [[package]] name = "rust-argon2" -version = "1.0.1" +version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a5885493fdf0be6cdff808d1533ce878d21cfa49c7086fa00c66355cd9141bfc" +checksum = "9d9848531d60c9cbbcf9d166c885316c24bc0e2a9d3eba0956bb6cbbd79bc6e8" dependencies = [ "base64 0.21.7", "blake2b_simd", "constant_time_eq 0.3.0", - "crossbeam-utils", ] [[package]] diff --git a/passwords/Cargo.toml b/passwords/Cargo.toml index 4f3922a7a5..bb411a9449 100644 --- a/passwords/Cargo.toml +++ b/passwords/Cargo.toml @@ -14,7 +14,7 @@ serde_with.workspace = true omicron-workspace-hack.workspace = true [dev-dependencies] -argon2alt = { package = "rust-argon2", version = "1.0" } +argon2alt = { package = "rust-argon2", version = "2.1.0" } criterion.workspace = true [[bench]] diff --git a/passwords/src/lib.rs b/passwords/src/lib.rs index ca2292420c..c7e9f1a118 100644 --- a/passwords/src/lib.rs +++ b/passwords/src/lib.rs @@ -563,7 +563,6 @@ mod test { mem_cost: ARGON2_COST_M_KIB, time_cost: ARGON2_COST_T, lanes: ARGON2_COST_P, - thread_mode: argon2alt::ThreadMode::Sequential, secret: &[], ad: &[], hash_length: 32, From 7598c9d07eac0c27dfd62877d8ad3ebdd9a8bb4b Mon Sep 17 00:00:00 2001 From: bnaecker Date: Tue, 2 Apr 2024 13:43:01 -0700 Subject: [PATCH 032/334] Add address information to oximeter collector logs (#5378) - Resolves #5377 - Adds collector IP, producer socket address and full route to all logs --- oximeter/collector/src/agent.rs | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/oximeter/collector/src/agent.rs b/oximeter/collector/src/agent.rs index 7b51b62921..693c157df3 100644 --- a/oximeter/collector/src/agent.rs +++ b/oximeter/collector/src/agent.rs @@ -149,12 +149,16 @@ async fn perform_collection( // also send a `CollectionMessage`, for example to update the collection interval. This is not // currently used, but will likely be exposed via control plane interfaces in the future. async fn collection_task( - log: Logger, + orig_log: Logger, collector: self_stats::OximeterCollector, mut producer: ProducerEndpoint, mut inbox: mpsc::Receiver, outbox: mpsc::Sender<(Option, ProducerResults)>, ) { + let mut log = orig_log.new(o!( + "route" => producer.collection_route(), + "address" => producer.address, + )); let client = reqwest::Client::new(); let mut collection_timer = interval(producer.interval); collection_timer.tick().await; // completes immediately @@ -193,6 +197,12 @@ async fn collection_task( "interval" => ?producer.interval, "address" => producer.address, ); + + // Update the logger with the new information as well. + log = orig_log.new(o!( + "route" => producer.collection_route(), + "address" => producer.address, + )); collection_timer = interval(producer.interval); collection_timer.tick().await; // completes immediately } @@ -386,6 +396,7 @@ impl OximeterAgent { let log = log.new(o!( "component" => "oximeter-agent", "collector_id" => id.to_string(), + "collector_ip" => address.ip().to_string(), )); let insertion_log = log.new(o!("component" => "results-sink")); @@ -496,6 +507,7 @@ impl OximeterAgent { let log = log.new(o!( "component" => "oximeter-standalone", "collector_id" => id.to_string(), + "collector_ip" => address.ip().to_string(), )); // If we have configuration for ClickHouse, we'll spawn the results @@ -588,7 +600,10 @@ impl OximeterAgent { // Build channel to control the task and receive results. let (tx, rx) = mpsc::channel(4); let q = self.result_sender.clone(); - let log = self.log.new(o!("component" => "collection-task", "producer_id" => id.to_string())); + let log = self.log.new(o!( + "component" => "collection-task", + "producer_id" => id.to_string(), + )); let info_clone = info.clone(); let target = self.collection_target; let task = tokio::spawn(async move { From b4d1a7b50a93783d6ceca2561eec93d4b286bf9d Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Tue, 2 Apr 2024 21:50:32 +0000 Subject: [PATCH 033/334] Update Rust crate toml to 0.8.12 (#5252) Co-authored-by: oxide-renovate[bot] <146848827+oxide-renovate[bot]@users.noreply.github.com> --- Cargo.lock | 40 ++++++++++++++++++++-------------------- Cargo.toml | 2 +- 2 files changed, 21 insertions(+), 21 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 1176119063..30cd4755ee 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -873,7 +873,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "922d6ea3081d68b9e3e09557204bff47f9b5406a4a304dc917e187f8cafd582b" dependencies = [ "serde", - "toml 0.8.10", + "toml 0.8.12", ] [[package]] @@ -2017,7 +2017,7 @@ dependencies = [ "tempfile", "thiserror", "tokio", - "toml 0.8.10", + "toml 0.8.12", "trust-dns-client", "trust-dns-proto", "trust-dns-resolver", @@ -2099,7 +2099,7 @@ dependencies = [ "serde", "serde_json", "slog", - "toml 0.8.10", + "toml 0.8.12", "uuid 1.7.0", ] @@ -2142,7 +2142,7 @@ dependencies = [ "slog-term", "tokio", "tokio-rustls 0.25.0", - "toml 0.8.10", + "toml 0.8.12", "usdt 0.3.5", "uuid 1.7.0", "version_check", @@ -2323,7 +2323,7 @@ dependencies = [ "serde_json", "socket2 0.5.5", "tokio", - "toml 0.8.10", + "toml 0.8.12", "trust-dns-resolver", "uuid 1.7.0", ] @@ -3569,7 +3569,7 @@ dependencies = [ "smf", "thiserror", "tokio", - "toml 0.8.10", + "toml 0.8.12", "uuid 1.7.0", "whoami", "zone 0.3.0", @@ -4558,7 +4558,7 @@ dependencies = [ "serde_json", "serde_with", "tokio-postgres", - "toml 0.8.10", + "toml 0.8.12", "uuid 1.7.0", ] @@ -5304,7 +5304,7 @@ dependencies = [ "slog", "thiserror", "tokio", - "toml 0.8.10", + "toml 0.8.12", "uuid 1.7.0", ] @@ -5339,7 +5339,7 @@ dependencies = [ "subprocess", "tokio", "tokio-postgres", - "toml 0.8.10", + "toml 0.8.12", ] [[package]] @@ -5381,7 +5381,7 @@ dependencies = [ "tokio", "tokio-stream", "tokio-tungstenite 0.20.1", - "toml 0.8.10", + "toml 0.8.12", "uuid 1.7.0", ] @@ -5595,7 +5595,7 @@ dependencies = [ "tar", "thiserror", "tokio", - "toml 0.8.10", + "toml 0.8.12", "walkdir", ] @@ -5711,7 +5711,7 @@ dependencies = [ "tokio", "tokio-stream", "tokio-util", - "toml 0.8.10", + "toml 0.8.12", "usdt 0.5.0", "uuid 1.7.0", "zeroize", @@ -6190,7 +6190,7 @@ dependencies = [ "subprocess", "thiserror", "tokio", - "toml 0.8.10", + "toml 0.8.12", "uuid 1.7.0", ] @@ -9073,7 +9073,7 @@ dependencies = [ "sprockets-rot", "thiserror", "tokio", - "toml 0.8.10", + "toml 0.8.12", ] [[package]] @@ -9983,9 +9983,9 @@ dependencies = [ [[package]] name = "toml" -version = "0.8.10" +version = "0.8.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a9aad4a3066010876e8dcf5a8a06e70a558751117a145c6ce2b82c2e2054290" +checksum = "e9dd1545e8208b4a5af1aa9bbd0b4cf7e9ea08fabc5d0a5c67fcaafa17433aa3" dependencies = [ "serde", "serde_spanned", @@ -10306,7 +10306,7 @@ dependencies = [ "slog", "tar", "tokio", - "toml 0.8.10", + "toml 0.8.12", "tough", "url", "zip", @@ -11036,7 +11036,7 @@ dependencies = [ "textwrap 0.16.1", "tokio", "tokio-util", - "toml 0.8.10", + "toml 0.8.12", "toml_edit 0.22.9", "tui-tree-widget", "unicode-width", @@ -11144,7 +11144,7 @@ dependencies = [ "tokio", "tokio-stream", "tokio-util", - "toml 0.8.10", + "toml 0.8.12", "tough", "trust-dns-resolver", "tufaceous", @@ -11424,7 +11424,7 @@ dependencies = [ "fs-err", "serde", "swrite", - "toml 0.8.10", + "toml 0.8.12", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 1d2457e25c..ed9130d46e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -426,7 +426,7 @@ tokio-postgres = { version = "0.7", features = [ "with-chrono-0_4", "with-uuid-1 tokio-stream = "0.1.14" tokio-tungstenite = "0.20" tokio-util = { version = "0.7.10", features = ["io", "io-util"] } -toml = "0.8.10" +toml = "0.8.12" toml_edit = "0.22.9" tough = { version = "0.16.0", features = [ "http" ] } trust-dns-client = "0.22" From 210689f46388355ed333e2f83de695dee0435dad Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Wed, 3 Apr 2024 04:12:02 +0000 Subject: [PATCH 034/334] Update taiki-e/install-action digest to 29beae9 (#5382) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR contains the following updates: | Package | Type | Update | Change | |---|---|---|---| | [taiki-e/install-action](https://togithub.com/taiki-e/install-action) | action | digest | [`3d5321a` -> `29beae9`](https://togithub.com/taiki-e/install-action/compare/3d5321a...29beae9) | --- ### Configuration 📅 **Schedule**: Branch creation - "after 8pm,before 6am" in timezone America/Los_Angeles, Automerge - "after 8pm,before 6am" in timezone America/Los_Angeles. 🚦 **Automerge**: Enabled. ♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. 🔕 **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box --- This PR has been generated by [Renovate Bot](https://togithub.com/renovatebot/renovate). Co-authored-by: oxide-renovate[bot] <146848827+oxide-renovate[bot]@users.noreply.github.com> --- .github/workflows/hakari.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/hakari.yml b/.github/workflows/hakari.yml index 5194f2d28a..8cda830b58 100644 --- a/.github/workflows/hakari.yml +++ b/.github/workflows/hakari.yml @@ -24,7 +24,7 @@ jobs: with: toolchain: stable - name: Install cargo-hakari - uses: taiki-e/install-action@3d5321a5e3ceb69232a18ca12966908a643cbce3 # v2 + uses: taiki-e/install-action@29beae9445d6ef8516259305b219de7ff43a0118 # v2 with: tool: cargo-hakari - name: Check workspace-hack Cargo.toml is up-to-date From d3af469d30a9c7a4fde6174888fe1e161e058819 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Tue, 2 Apr 2024 23:36:22 -0700 Subject: [PATCH 035/334] Update Rust crate async-trait to 0.1.79 (#5383) --- Cargo.lock | 4 ++-- Cargo.toml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 30cd4755ee..928103f000 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -308,9 +308,9 @@ dependencies = [ [[package]] name = "async-trait" -version = "0.1.77" +version = "0.1.79" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c980ee35e870bd1a4d2c8294d4c04d0499e67bca1e4b5cefcc693c2fa00caea9" +checksum = "a507401cad91ec6a857ed5513a2073c82a9b9048762b885bb98655b306964681" dependencies = [ "proc-macro2", "quote", diff --git a/Cargo.toml b/Cargo.toml index ed9130d46e..c41f169184 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -170,7 +170,7 @@ approx = "0.5.1" assert_matches = "1.5.0" assert_cmd = "2.0.14" async-bb8-diesel = { git = "https://github.com/oxidecomputer/async-bb8-diesel", rev = "ed7ab5ef0513ba303d33efd41d3e9e381169d59b" } -async-trait = "0.1.77" +async-trait = "0.1.79" atomicwrites = "0.4.3" authz-macros = { path = "nexus/authz-macros" } backoff = { version = "0.4.0", features = [ "tokio" ] } From 0dfafab16d662bccc702a3e34aa2ea0e24019850 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Tue, 2 Apr 2024 23:36:32 -0700 Subject: [PATCH 036/334] Update Rust crate diesel to 2.1.5 (#5384) --- Cargo.lock | 4 ++-- Cargo.toml | 2 +- workspace-hack/Cargo.toml | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 928103f000..aa658f9142 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1843,9 +1843,9 @@ checksum = "a7993efb860416547839c115490d4951c6d0f8ec04a3594d9dd99d50ed7ec170" [[package]] name = "diesel" -version = "2.1.4" +version = "2.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62c6fcf842f17f8c78ecf7c81d75c5ce84436b41ee07e03f490fbb5f5a8731d8" +checksum = "03fc05c17098f21b89bc7d98fe1dd3cce2c11c2ad8e145f2a44fe08ed28eb559" dependencies = [ "bitflags 2.4.2", "byteorder", diff --git a/Cargo.toml b/Cargo.toml index c41f169184..14b0b5617b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -209,7 +209,7 @@ db-macros = { path = "nexus/db-macros" } debug-ignore = "1.0.5" derive_more = "0.99.17" derive-where = "1.2.7" -diesel = { version = "2.1.4", features = ["postgres", "r2d2", "chrono", "serde_json", "network-address", "uuid"] } +diesel = { version = "2.1.5", features = ["postgres", "r2d2", "chrono", "serde_json", "network-address", "uuid"] } diesel-dtrace = { git = "https://github.com/oxidecomputer/diesel-dtrace", branch = "main" } dns-server = { path = "dns-server" } dns-service-client = { path = "clients/dns-service-client" } diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index 9e3c7487d0..8ef63f12cf 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -37,7 +37,7 @@ crossbeam-utils = { version = "0.8.19" } crossterm = { version = "0.27.0", features = ["event-stream", "serde"] } crypto-common = { version = "0.1.6", default-features = false, features = ["getrandom", "std"] } der = { version = "0.7.8", default-features = false, features = ["derive", "flagset", "oid", "pem", "std"] } -diesel = { version = "2.1.4", features = ["chrono", "i-implement-a-third-party-backend-and-opt-into-breaking-changes", "network-address", "postgres", "r2d2", "serde_json", "uuid"] } +diesel = { version = "2.1.5", features = ["chrono", "i-implement-a-third-party-backend-and-opt-into-breaking-changes", "network-address", "postgres", "r2d2", "serde_json", "uuid"] } digest = { version = "0.10.7", features = ["mac", "oid", "std"] } either = { version = "1.10.0" } elliptic-curve = { version = "0.13.8", features = ["ecdh", "hazmat", "pem", "std"] } @@ -143,7 +143,7 @@ crossbeam-utils = { version = "0.8.19" } crossterm = { version = "0.27.0", features = ["event-stream", "serde"] } crypto-common = { version = "0.1.6", default-features = false, features = ["getrandom", "std"] } der = { version = "0.7.8", default-features = false, features = ["derive", "flagset", "oid", "pem", "std"] } -diesel = { version = "2.1.4", features = ["chrono", "i-implement-a-third-party-backend-and-opt-into-breaking-changes", "network-address", "postgres", "r2d2", "serde_json", "uuid"] } +diesel = { version = "2.1.5", features = ["chrono", "i-implement-a-third-party-backend-and-opt-into-breaking-changes", "network-address", "postgres", "r2d2", "serde_json", "uuid"] } digest = { version = "0.10.7", features = ["mac", "oid", "std"] } either = { version = "1.10.0" } elliptic-curve = { version = "0.13.8", features = ["ecdh", "hazmat", "pem", "std"] } From 7e22cdb139c99a64944dc774662604fb5e8f0869 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Tue, 2 Apr 2024 23:36:41 -0700 Subject: [PATCH 037/334] Update Rust crate indexmap to 2.2.6 (#5385) --- Cargo.lock | 38 +++++++++++++++++++------------------- Cargo.toml | 2 +- workspace-hack/Cargo.toml | 4 ++-- 3 files changed, 22 insertions(+), 22 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index aa658f9142..b49c6f2cb2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2121,7 +2121,7 @@ dependencies = [ "hostname", "http 0.2.12", "hyper 0.14.28", - "indexmap 2.2.5", + "indexmap 2.2.6", "multer", "openapiv3", "paste", @@ -2955,7 +2955,7 @@ dependencies = [ "debug-ignore", "fixedbitset", "guppy-workspace-hack", - "indexmap 2.2.5", + "indexmap 2.2.6", "itertools 0.12.1", "nested", "once_cell", @@ -2987,7 +2987,7 @@ dependencies = [ "futures-sink", "futures-util", "http 0.2.12", - "indexmap 2.2.5", + "indexmap 2.2.6", "slab", "tokio", "tokio-util", @@ -3599,9 +3599,9 @@ dependencies = [ [[package]] name = "indexmap" -version = "2.2.5" +version = "2.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b0b929d511467233429c45a44ac1dcaa21ba0f5ba11e4879e6ed28ddb4f9df4" +checksum = "168fb715dda47215e360912c096649d23d58bf392ac62f73919e831745e40f26" dependencies = [ "equivalent", "hashbrown 0.14.3", @@ -4824,7 +4824,7 @@ dependencies = [ "chrono", "expectorate", "gateway-client", - "indexmap 2.2.5", + "indexmap 2.2.6", "internal-dns", "ipnet", "ipnetwork", @@ -5813,7 +5813,7 @@ dependencies = [ "hex", "hmac", "hyper 0.14.28", - "indexmap 2.2.5", + "indexmap 2.2.6", "inout", "ipnetwork", "itertools 0.10.5", @@ -5937,7 +5937,7 @@ version = "0.4.0" source = "git+https://github.com/oxidecomputer/openapi-lint?branch=main#ef442ee4343e97b6d9c217d3e7533962fe7d7236" dependencies = [ "heck 0.4.1", - "indexmap 2.2.5", + "indexmap 2.2.6", "lazy_static", "openapiv3", "regex", @@ -5949,7 +5949,7 @@ version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cc02deea53ffe807708244e5914f6b099ad7015a207ee24317c22112e17d9c5c" dependencies = [ - "indexmap 2.2.5", + "indexmap 2.2.6", "serde", "serde_json", ] @@ -6211,7 +6211,7 @@ dependencies = [ "expectorate", "futures", "highway", - "indexmap 2.2.5", + "indexmap 2.2.6", "itertools 0.12.1", "num", "omicron-common", @@ -6630,7 +6630,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e1d3afd2628e69da2be385eb6f2fd57c8ac7977ceeff6dc166ff1657b0e386a9" dependencies = [ "fixedbitset", - "indexmap 2.2.5", + "indexmap 2.2.6", "serde", "serde_derive", ] @@ -7037,7 +7037,7 @@ dependencies = [ "getopts", "heck 0.4.1", "http 0.2.12", - "indexmap 2.2.5", + "indexmap 2.2.6", "openapiv3", "proc-macro2", "quote", @@ -7479,7 +7479,7 @@ dependencies = [ "dropshot", "expectorate", "humantime", - "indexmap 2.2.5", + "indexmap 2.2.6", "nexus-client", "nexus-db-queries", "nexus-reconfigurator-execution", @@ -8541,7 +8541,7 @@ dependencies = [ "chrono", "hex", "indexmap 1.9.3", - "indexmap 2.2.5", + "indexmap 2.2.6", "serde", "serde_derive", "serde_json", @@ -8567,7 +8567,7 @@ version = "0.9.25" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1a49e178e4452f45cb61d0cd8cebc1b0fafd3e41929e996cef79aa3aca91f574" dependencies = [ - "indexmap 2.2.5", + "indexmap 2.2.6", "itoa", "ryu", "serde", @@ -10008,7 +10008,7 @@ version = "0.19.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1b5bb770da30e5cbfde35a2d7b9b8a2c4b8ef89548a7a6aeab5c9a576e3e7421" dependencies = [ - "indexmap 2.2.5", + "indexmap 2.2.6", "serde", "serde_spanned", "toml_datetime", @@ -10021,7 +10021,7 @@ version = "0.22.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8e40bb779c5187258fd7aad0eb68cb8706a0a81fa712fbea808ab43c4b8374c4" dependencies = [ - "indexmap 2.2.5", + "indexmap 2.2.6", "serde", "serde_spanned", "toml_datetime", @@ -10569,7 +10569,7 @@ dependencies = [ "derive-where", "either", "futures", - "indexmap 2.2.5", + "indexmap 2.2.6", "indicatif", "libsw", "linear-map", @@ -11012,7 +11012,7 @@ dependencies = [ "crossterm", "futures", "humantime", - "indexmap 2.2.5", + "indexmap 2.2.6", "indicatif", "itertools 0.12.1", "omicron-common", diff --git a/Cargo.toml b/Cargo.toml index 14b0b5617b..7b4e423e91 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -246,7 +246,7 @@ hyper = "0.14" hyper-rustls = "0.26.0" hyper-staticfile = "0.9.5" illumos-utils = { path = "illumos-utils" } -indexmap = "2.2.5" +indexmap = "2.2.6" indicatif = { version = "0.17.8", features = ["rayon"] } installinator = { path = "installinator" } installinator-artifactd = { path = "installinator-artifactd" } diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index 8ef63f12cf..1942ac1590 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -58,7 +58,7 @@ hashbrown = { version = "0.14.3", features = ["raw"] } hex = { version = "0.4.3", features = ["serde"] } hmac = { version = "0.12.1", default-features = false, features = ["reset"] } hyper = { version = "0.14.28", features = ["full"] } -indexmap = { version = "2.2.5", features = ["serde"] } +indexmap = { version = "2.2.6", features = ["serde"] } inout = { version = "0.1.3", default-features = false, features = ["std"] } ipnetwork = { version = "0.20.0", features = ["schemars"] } itertools = { version = "0.10.5" } @@ -164,7 +164,7 @@ hashbrown = { version = "0.14.3", features = ["raw"] } hex = { version = "0.4.3", features = ["serde"] } hmac = { version = "0.12.1", default-features = false, features = ["reset"] } hyper = { version = "0.14.28", features = ["full"] } -indexmap = { version = "2.2.5", features = ["serde"] } +indexmap = { version = "2.2.6", features = ["serde"] } inout = { version = "0.1.3", default-features = false, features = ["std"] } ipnetwork = { version = "0.20.0", features = ["schemars"] } itertools = { version = "0.10.5" } From 8022bdaa468aa81fd2bf21064429c83e16610132 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Tue, 2 Apr 2024 23:47:02 -0700 Subject: [PATCH 038/334] Update Rust crate regex to 1.10.4 (#5387) --- Cargo.lock | 4 ++-- Cargo.toml | 2 +- workspace-hack/Cargo.toml | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index b49c6f2cb2..44272eafab 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -7587,9 +7587,9 @@ dependencies = [ [[package]] name = "regex" -version = "1.10.3" +version = "1.10.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b62dbe01f0b06f9d8dc7d49e05a0785f153b00b2c227856282f671e0318c9b15" +checksum = "c117dbdfde9c8308975b6a18d71f3f385c89461f7b3fb054288ecf2a2058ba4c" dependencies = [ "aho-corasick", "memchr", diff --git a/Cargo.toml b/Cargo.toml index 7b4e423e91..9a8be3e104 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -352,7 +352,7 @@ rayon = "1.9" rcgen = "0.12.1" reedline = "0.30.0" ref-cast = "1.0" -regex = "1.10.3" +regex = "1.10.4" regress = "0.9.0" reqwest = { version = "0.11", default-features = false } ring = "0.17.8" diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index 1942ac1590..96be72a074 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -83,7 +83,7 @@ predicates = { version = "3.1.0" } proc-macro2 = { version = "1.0.78" } rand = { version = "0.8.5" } rand_chacha = { version = "0.3.1", default-features = false, features = ["std"] } -regex = { version = "1.10.3" } +regex = { version = "1.10.4" } regex-automata = { version = "0.4.5", default-features = false, features = ["dfa-onepass", "dfa-search", "hybrid", "meta", "nfa-backtrack", "perf-inline", "perf-literal", "unicode"] } regex-syntax = { version = "0.8.2" } reqwest = { version = "0.11.24", features = ["blocking", "cookies", "json", "rustls-tls", "stream"] } @@ -189,7 +189,7 @@ predicates = { version = "3.1.0" } proc-macro2 = { version = "1.0.78" } rand = { version = "0.8.5" } rand_chacha = { version = "0.3.1", default-features = false, features = ["std"] } -regex = { version = "1.10.3" } +regex = { version = "1.10.4" } regex-automata = { version = "0.4.5", default-features = false, features = ["dfa-onepass", "dfa-search", "hybrid", "meta", "nfa-backtrack", "perf-inline", "perf-literal", "unicode"] } regex-syntax = { version = "0.8.2" } reqwest = { version = "0.11.24", features = ["blocking", "cookies", "json", "rustls-tls", "stream"] } From fdf3a44cc763900e37b7f7f86c377cc3073f913a Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Wed, 3 Apr 2024 09:19:22 +0000 Subject: [PATCH 039/334] Update Rust crate regress to 0.9.1 (#5388) --- Cargo.lock | 4 ++-- Cargo.toml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 44272eafab..015f15cdd3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -7628,9 +7628,9 @@ checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f" [[package]] name = "regress" -version = "0.9.0" +version = "0.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d06f9a1f7cd8473611ba1a480cf35f9c5cffc2954336ba90a982fdb7e7d7f51e" +checksum = "0eae2a1ebfecc58aff952ef8ccd364329abe627762f5bf09ff42eb9d98522479" dependencies = [ "hashbrown 0.14.3", "memchr", diff --git a/Cargo.toml b/Cargo.toml index 9a8be3e104..3b456cf959 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -353,7 +353,7 @@ rcgen = "0.12.1" reedline = "0.30.0" ref-cast = "1.0" regex = "1.10.4" -regress = "0.9.0" +regress = "0.9.1" reqwest = { version = "0.11", default-features = false } ring = "0.17.8" rpassword = "7.3.1" From 2fd02fa5e1db5c0e586f54b5d73f54720c7a5164 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Wed, 3 Apr 2024 09:45:55 +0000 Subject: [PATCH 040/334] Update Rust crate serde_json to 1.0.115 (#5389) --- Cargo.lock | 4 ++-- Cargo.toml | 2 +- workspace-hack/Cargo.toml | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 015f15cdd3..27ee460553 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8448,9 +8448,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.114" +version = "1.0.115" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c5f09b1bd632ef549eaa9f60a1f8de742bdbc698e6cee2095fc84dde5f549ae0" +checksum = "12dc5c46daa8e9fdf4f5e71b6cf9a53f2487da0e86e55808e2d35539666497dd" dependencies = [ "itoa", "ryu", diff --git a/Cargo.toml b/Cargo.toml index 3b456cf959..b0c793326b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -368,7 +368,7 @@ secrecy = "0.8.0" semver = { version = "1.0.22", features = ["std", "serde"] } serde = { version = "1.0", default-features = false, features = [ "derive", "rc" ] } serde_human_bytes = { git = "http://github.com/oxidecomputer/serde_human_bytes", branch = "main" } -serde_json = "1.0.114" +serde_json = "1.0.115" serde_path_to_error = "0.1.16" serde_tokenstream = "0.2" serde_urlencoded = "0.7.1" diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index 96be72a074..9cc65bfa79 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -91,7 +91,7 @@ ring = { version = "0.17.8", features = ["std"] } schemars = { version = "0.8.16", features = ["bytes", "chrono", "uuid", "uuid1"] } semver = { version = "1.0.22", features = ["serde"] } serde = { version = "1.0.197", features = ["alloc", "derive", "rc"] } -serde_json = { version = "1.0.114", features = ["raw_value", "unbounded_depth"] } +serde_json = { version = "1.0.115", features = ["raw_value", "unbounded_depth"] } sha2 = { version = "0.10.8", features = ["oid"] } similar = { version = "2.4.0", features = ["inline", "unicode"] } slog = { version = "2.7.0", features = ["dynamic-keys", "max_level_trace", "release_max_level_debug", "release_max_level_trace"] } @@ -197,7 +197,7 @@ ring = { version = "0.17.8", features = ["std"] } schemars = { version = "0.8.16", features = ["bytes", "chrono", "uuid", "uuid1"] } semver = { version = "1.0.22", features = ["serde"] } serde = { version = "1.0.197", features = ["alloc", "derive", "rc"] } -serde_json = { version = "1.0.114", features = ["raw_value", "unbounded_depth"] } +serde_json = { version = "1.0.115", features = ["raw_value", "unbounded_depth"] } sha2 = { version = "0.10.8", features = ["oid"] } similar = { version = "2.4.0", features = ["inline", "unicode"] } slog = { version = "2.7.0", features = ["dynamic-keys", "max_level_trace", "release_max_level_debug", "release_max_level_trace"] } From 7d86b3a2aff1dcc0aa749203430060c43d2f5ffd Mon Sep 17 00:00:00 2001 From: iliana etaoin Date: Wed, 3 Apr 2024 10:39:16 -0700 Subject: [PATCH 041/334] stop splitting the TUF repo on the 1 GiB boundary (#5325) --- .github/buildomat/jobs/tuf-repo.sh | 20 +++----------------- 1 file changed, 3 insertions(+), 17 deletions(-) diff --git a/.github/buildomat/jobs/tuf-repo.sh b/.github/buildomat/jobs/tuf-repo.sh index 56cb41b51b..c055a3f2ea 100755 --- a/.github/buildomat/jobs/tuf-repo.sh +++ b/.github/buildomat/jobs/tuf-repo.sh @@ -5,7 +5,7 @@ #: target = "helios-2.0" #: output_rules = [ #: "=/work/manifest*.toml", -#: "=/work/repo-*.zip.part*", +#: "=/work/repo-*.zip", #: "=/work/repo-*.zip.sha256.txt", #: ] #: access_repos = [ @@ -23,13 +23,8 @@ #: #: [[publish]] #: series = "rot-all" -#: name = "repo.zip.parta" -#: from_output = "/work/repo-rot-all.zip.parta" -#: -#: [[publish]] -#: series = "rot-all" -#: name = "repo.zip.partb" -#: from_output = "/work/repo-rot-all.zip.partb" +#: name = "repo.zip" +#: from_output = "/work/repo-rot-all.zip" #: #: [[publish]] #: series = "rot-all" @@ -201,12 +196,3 @@ add_hubris_artifacts prod/rel cert-prod-rel-v1.0.7 # Build the TUF ZIP. /work/tufaceous assemble --no-generate-key /work/manifest.toml /work/repo-rot-all.zip digest -a sha256 /work/repo-rot-all.zip > /work/repo-rot-all.zip.sha256.txt - -# -# XXX: There are some issues downloading Buildomat artifacts > 1 GiB, see -# oxidecomputer/buildomat#36. -# -split -a 1 -b 1024m /work/repo-rot-all.zip /work/repo-rot-all.zip.part -rm /work/repo-rot-all.zip -# Ensure the build doesn't fail if the repo gets smaller than 1 GiB. -touch /work/repo-rot-all.zip.partb From 407143e8cfb06a1bef5c7577b8a67bc72cffff96 Mon Sep 17 00:00:00 2001 From: David Crespo Date: Wed, 3 Apr 2024 12:44:12 -0500 Subject: [PATCH 042/334] Bump web console (create instance with existing disk) (#5400) https://github.com/oxidecomputer/console/compare/156c082c...b22ca1dc * [b22ca1dc](https://github.com/oxidecomputer/console/commit/b22ca1dc) add loop comment to scp-assets * [99173b92](https://github.com/oxidecomputer/console/commit/99173b92) bump omicron script: automatically run gh run watch when assets aren't ready * [2cfc8ee7](https://github.com/oxidecomputer/console/commit/2cfc8ee7) oxidecomputer/console#2076 * [11411bb8](https://github.com/oxidecomputer/console/commit/11411bb8) oxidecomputer/console#2121 * [1f8b25d7](https://github.com/oxidecomputer/console/commit/1f8b25d7) oxidecomputer/console#2119 * [95f2e49e](https://github.com/oxidecomputer/console/commit/95f2e49e) oxidecomputer/console#2108 * [8e3a2005](https://github.com/oxidecomputer/console/commit/8e3a2005) oxidecomputer/console#2116 * [bf592a31](https://github.com/oxidecomputer/console/commit/bf592a31) oxidecomputer/console#2105 * [b63c81ea](https://github.com/oxidecomputer/console/commit/b63c81ea) oxidecomputer/console#2115 * [d5d70bd7](https://github.com/oxidecomputer/console/commit/d5d70bd7) oxidecomputer/console#2113 * [1954709e](https://github.com/oxidecomputer/console/commit/1954709e) oxidecomputer/console#2112 * [4db8d830](https://github.com/oxidecomputer/console/commit/4db8d830) oxidecomputer/console#2111 * [9485ca23](https://github.com/oxidecomputer/console/commit/9485ca23) Revert "Revert "Change all uses of RHF `` to `useController` (oxidecomputer/console#2102)"" --- tools/console_version | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/console_version b/tools/console_version index d09192f353..87e3af12e5 100644 --- a/tools/console_version +++ b/tools/console_version @@ -1,2 +1,2 @@ -COMMIT="156c082cdb21231ab95ef7475de199ecb7a96dc3" -SHA2="bf768008b6958e19b8d8cef25710b4ff64eef1e59bb3bedb27bb6bf33459a78b" +COMMIT="b22ca1dce0a9380e7f9adbc7ebc60c85091ef362" +SHA2="6efc13fdc55c70f8a8dae5fad2fb2fb8633f291750e1899f48bf92d52e04e24a" From e8aba10219970c911d3846bb979332ec63937668 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Wed, 3 Apr 2024 11:21:40 -0700 Subject: [PATCH 043/334] Update Rust crate prettyplease to 0.2.17 (#5386) --- Cargo.lock | 4 ++-- Cargo.toml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 27ee460553..1a3c7d1b4e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6944,9 +6944,9 @@ dependencies = [ [[package]] name = "prettyplease" -version = "0.2.16" +version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a41cf62165e97c7f814d2221421dbb9afcbcdb0a88068e5ea206e19951c2cbb5" +checksum = "8d3928fb5db768cb86f891ff014f0144589297e3c6a1aba6ed7cecfdace270c7" dependencies = [ "proc-macro2", "syn 2.0.52", diff --git a/Cargo.toml b/Cargo.toml index b0c793326b..7b3e878178 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -335,7 +335,7 @@ postgres-protocol = "0.6.6" predicates = "3.1.0" pretty_assertions = "1.4.0" pretty-hex = "0.4.1" -prettyplease = { version = "0.2.16", features = ["verbatim"] } +prettyplease = { version = "0.2.17", features = ["verbatim"] } proc-macro2 = "1.0" progenitor = { git = "https://github.com/oxidecomputer/progenitor", branch = "main" } progenitor-client = { git = "https://github.com/oxidecomputer/progenitor", branch = "main" } From fd53df2d6518c5ab28a3fe17d6b7746e6278aa49 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Wed, 3 Apr 2024 11:22:10 -0700 Subject: [PATCH 044/334] Update Rust crate reedline to 0.31.0 (#5399) --- Cargo.lock | 67 ++++++++++++++++++++---------------------------------- Cargo.toml | 2 +- 2 files changed, 26 insertions(+), 43 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 1a3c7d1b4e..3d511e0cc6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -485,7 +485,7 @@ source = "git+https://github.com/oxidecomputer/propolis?rev=6dceb9ef69c217cb78a2 dependencies = [ "bhyve_api_sys 0.0.0 (git+https://github.com/oxidecomputer/propolis?rev=6dceb9ef69c217cb78a2018bbedafbc19f6ec1af)", "libc", - "strum 0.26.1", + "strum", ] [[package]] @@ -495,7 +495,7 @@ source = "git+https://github.com/oxidecomputer/propolis?rev=84e423bfd3bf84ebb04a dependencies = [ "bhyve_api_sys 0.0.0 (git+https://github.com/oxidecomputer/propolis?rev=84e423bfd3bf84ebb04acb95cf7600731e9f361f)", "libc", - "strum 0.26.1", + "strum", ] [[package]] @@ -504,7 +504,7 @@ version = "0.0.0" source = "git+https://github.com/oxidecomputer/propolis?rev=6dceb9ef69c217cb78a2018bbedafbc19f6ec1af#6dceb9ef69c217cb78a2018bbedafbc19f6ec1af" dependencies = [ "libc", - "strum 0.26.1", + "strum", ] [[package]] @@ -513,7 +513,7 @@ version = "0.0.0" source = "git+https://github.com/oxidecomputer/propolis?rev=84e423bfd3bf84ebb04acb95cf7600731e9f361f#84e423bfd3bf84ebb04acb95cf7600731e9f361f" dependencies = [ "libc", - "strum 0.26.1", + "strum", ] [[package]] @@ -1971,7 +1971,7 @@ version = "0.0.0" source = "git+https://github.com/oxidecomputer/propolis?rev=6dceb9ef69c217cb78a2018bbedafbc19f6ec1af#6dceb9ef69c217cb78a2018bbedafbc19f6ec1af" dependencies = [ "libc", - "strum 0.26.1", + "strum", ] [[package]] @@ -2436,17 +2436,6 @@ dependencies = [ "log", ] -[[package]] -name = "fd-lock" -version = "3.0.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ef033ed5e9bad94e55838ca0ca906db0e043f517adda0c8b79c7a8c66c93c1b5" -dependencies = [ - "cfg-if", - "rustix", - "windows-sys 0.48.0", -] - [[package]] name = "fd-lock" version = "4.0.2" @@ -4598,7 +4587,7 @@ dependencies = [ "serde_json", "sled-agent-client", "steno", - "strum 0.26.1", + "strum", "thiserror", "tokio", "uuid 1.7.0", @@ -4678,7 +4667,7 @@ dependencies = [ "slog-error-chain", "static_assertions", "steno", - "strum 0.26.1", + "strum", "subprocess", "swrite", "term", @@ -4721,7 +4710,7 @@ dependencies = [ "serde_json", "sled-agent-client", "slog", - "strum 0.26.1", + "strum", "thiserror", "tokio", "typed-rng", @@ -4949,7 +4938,7 @@ dependencies = [ "serde_with", "sled-agent-client", "steno", - "strum 0.26.1", + "strum", "tabled", "thiserror", "uuid 1.7.0", @@ -5276,7 +5265,7 @@ dependencies = [ "serde_with", "slog", "slog-error-chain", - "strum 0.26.1", + "strum", "test-strategy", "thiserror", "tokio", @@ -5493,7 +5482,7 @@ dependencies = [ "slog-term", "sp-sim", "steno", - "strum 0.26.1", + "strum", "subprocess", "tempfile", "term", @@ -5555,7 +5544,7 @@ dependencies = [ "sled-agent-client", "slog", "slog-error-chain", - "strum 0.26.1", + "strum", "subprocess", "tabled", "textwrap 0.16.1", @@ -5590,7 +5579,7 @@ dependencies = [ "slog-bunyan", "slog-term", "smf", - "strum 0.26.1", + "strum", "swrite", "tar", "thiserror", @@ -5702,7 +5691,7 @@ dependencies = [ "slog-term", "smf", "static_assertions", - "strum 0.26.1", + "strum", "subprocess", "tar", "tempfile", @@ -6133,7 +6122,7 @@ dependencies = [ "schemars", "serde", "serde_json", - "strum 0.26.1", + "strum", "thiserror", "trybuild", "uuid 1.7.0", @@ -6186,7 +6175,7 @@ dependencies = [ "slog-async", "slog-dtrace", "slog-term", - "strum 0.26.1", + "strum", "subprocess", "thiserror", "tokio", @@ -6232,7 +6221,7 @@ dependencies = [ "slog-term", "sqlformat", "sqlparser", - "strum 0.26.1", + "strum", "tabled", "tempfile", "thiserror", @@ -7090,7 +7079,7 @@ dependencies = [ "serde_arrays", "serde_json", "slog", - "strum 0.26.1", + "strum", "thiserror", "tokio", "usdt 0.5.0", @@ -7420,7 +7409,7 @@ dependencies = [ "lru", "paste", "stability", - "strum 0.26.1", + "strum", "unicode-segmentation", "unicode-width", ] @@ -7547,19 +7536,19 @@ dependencies = [ [[package]] name = "reedline" -version = "0.30.0" +version = "0.31.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "413a9fa6a5d8c937d3ae1e975bfb6a918bb0b6cdfae6a10416218c837a31b8fc" +checksum = "65ebc241ed0ccea0bbbd775a55a76f0dd9971ef084589dea938751a03ffedc14" dependencies = [ "chrono", "crossterm", - "fd-lock 3.0.13", + "fd-lock", "itertools 0.12.1", "nu-ansi-term", "serde", "strip-ansi-escapes", - "strum 0.25.0", - "strum_macros 0.25.2", + "strum", + "strum_macros 0.26.1", "thiserror", "unicode-segmentation", "unicode-width", @@ -8142,7 +8131,7 @@ dependencies = [ "bitflags 2.4.2", "cfg-if", "clipboard-win", - "fd-lock 4.0.2", + "fd-lock", "home", "libc", "log", @@ -9325,12 +9314,6 @@ dependencies = [ "syn 1.0.109", ] -[[package]] -name = "strum" -version = "0.25.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "290d54ea6f91c969195bdbcd7442c8c2a2ba87da8bf60a7ee86a235d4bc1e125" - [[package]] name = "strum" version = "0.26.1" diff --git a/Cargo.toml b/Cargo.toml index 7b3e878178..667855649b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -350,7 +350,7 @@ rand_seeder = "0.2.3" ratatui = "0.26.1" rayon = "1.9" rcgen = "0.12.1" -reedline = "0.30.0" +reedline = "0.31.0" ref-cast = "1.0" regex = "1.10.4" regress = "0.9.1" From 4a41459f29d86cfca266664a9dd7a13dd50efcab Mon Sep 17 00:00:00 2001 From: Rain Date: Wed, 3 Apr 2024 11:22:33 -0700 Subject: [PATCH 045/334] [test-utils] perform extra redactions at the beginning, not the end (#5393) --- test-utils/src/dev/test_cmds.rs | 30 ++++++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) diff --git a/test-utils/src/dev/test_cmds.rs b/test-utils/src/dev/test_cmds.rs index 89bf022369..6500eaddfd 100644 --- a/test-utils/src/dev/test_cmds.rs +++ b/test-utils/src/dev/test_cmds.rs @@ -126,11 +126,20 @@ pub fn error_for_enoent() -> String { /// /// This allows use to use expectorate to verify the shape of the CLI output. pub fn redact_variable(input: &str, extra_redactions: &[&str]) -> String { + // Perform extra redactions at the beginning, not the end. This is because + // some of the built-in redactions below might match a substring of + // something that should be handled by extra_redactions (e.g. a temporary + // path). + let mut s = input.to_owned(); + for r in extra_redactions { + s = s.replace(r, ""); + } + // Replace TCP port numbers. We include the localhost characters to avoid // catching any random sequence of numbers. let s = regex::Regex::new(r"\[::1\]:\d{4,5}") .unwrap() - .replace_all(input, "[::1]:REDACTED_PORT") + .replace_all(&s, "[::1]:REDACTED_PORT") .to_string(); let s = regex::Regex::new(r"\[::ffff:127.0.0.1\]:\d{4,5}") .unwrap() @@ -173,7 +182,7 @@ pub fn redact_variable(input: &str, extra_redactions: &[&str]) -> String { .replace_all(&s, "ms") .to_string(); - let mut s = regex::Regex::new( + let s = regex::Regex::new( r"note: database schema version matches expected \(\d+\.\d+\.\d+\)", ) .unwrap() @@ -184,9 +193,18 @@ pub fn redact_variable(input: &str, extra_redactions: &[&str]) -> String { ) .to_string(); - for r in extra_redactions { - s = s.replace(r, ""); - } - s } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_redact_variable() { + // Ens + let input = "time: 123ms, path: /var/tmp/tmp.456ms123s"; + let actual = redact_variable(input, &["/var/tmp/tmp.456ms123s"]); + assert_eq!(actual, "time: ms, path: "); + } +} From 2efb6148774bd56668e9322e180d9cd793c8d620 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Wed, 3 Apr 2024 11:22:55 -0700 Subject: [PATCH 046/334] Update Rust crate tokio-stream to 0.1.15 (#5391) --- Cargo.lock | 4 ++-- Cargo.toml | 2 +- workspace-hack/Cargo.toml | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 3d511e0cc6..f53aa54b0b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -9896,9 +9896,9 @@ dependencies = [ [[package]] name = "tokio-stream" -version = "0.1.14" +version = "0.1.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "397c988d37662c7dda6d2208364a706264bf3d6138b11d436cbac0ad38832842" +checksum = "267ac89e0bec6e691e5813911606935d77c476ff49024f98abcea3e7b15e37af" dependencies = [ "futures-core", "pin-project-lite", diff --git a/Cargo.toml b/Cargo.toml index 667855649b..48594e056d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -423,7 +423,7 @@ thiserror = "1.0" tofino = { git = "http://github.com/oxidecomputer/tofino", branch = "main" } tokio = "1.36.0" tokio-postgres = { version = "0.7", features = [ "with-chrono-0_4", "with-uuid-1" ] } -tokio-stream = "0.1.14" +tokio-stream = "0.1.15" tokio-tungstenite = "0.20" tokio-util = { version = "0.7.10", features = ["io", "io-util"] } toml = "0.8.12" diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index 9cc65bfa79..1e7bea0d2d 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -103,7 +103,7 @@ syn-f595c2ba2a3f28df = { package = "syn", version = "2.0.52", features = ["extra time = { version = "0.3.34", features = ["formatting", "local-offset", "macros", "parsing"] } tokio = { version = "1.36.0", features = ["full", "test-util"] } tokio-postgres = { version = "0.7.10", features = ["with-chrono-0_4", "with-serde_json-1", "with-uuid-1"] } -tokio-stream = { version = "0.1.14", features = ["net"] } +tokio-stream = { version = "0.1.15", features = ["net"] } tokio-util = { version = "0.7.10", features = ["codec", "io-util"] } toml = { version = "0.7.8" } toml_edit-3c51e837cfc5589a = { package = "toml_edit", version = "0.22.9", features = ["serde"] } @@ -210,7 +210,7 @@ time = { version = "0.3.34", features = ["formatting", "local-offset", "macros", time-macros = { version = "0.2.17", default-features = false, features = ["formatting", "parsing"] } tokio = { version = "1.36.0", features = ["full", "test-util"] } tokio-postgres = { version = "0.7.10", features = ["with-chrono-0_4", "with-serde_json-1", "with-uuid-1"] } -tokio-stream = { version = "0.1.14", features = ["net"] } +tokio-stream = { version = "0.1.15", features = ["net"] } tokio-util = { version = "0.7.10", features = ["codec", "io-util"] } toml = { version = "0.7.8" } toml_edit-3c51e837cfc5589a = { package = "toml_edit", version = "0.22.9", features = ["serde"] } From d4b55b7e411d7035ba78c81b54496bf4ec1f2074 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Wed, 3 Apr 2024 12:02:44 -0700 Subject: [PATCH 047/334] [sled-agent] Don't block load_services behind requesting firewall rules when no services are requested (#5379) --- sled-agent/src/services.rs | 14 +++++++++++--- sled-agent/src/sled_agent.rs | 16 ++++++++++++++-- 2 files changed, 25 insertions(+), 5 deletions(-) diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index bfc0b91a71..3584e8f139 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -333,6 +333,14 @@ impl From for omicron_common::api::external::Error { } } +/// Result of [ServiceManager::load_services] +pub enum LoadServicesResult { + /// We didn't load anything, there wasn't anything to load + NoServicesToLoad, + /// We successfully loaded the zones from our ledger. + ServicesLoaded, +} + fn display_zone_init_errors(errors: &[(String, Box)]) -> String { if errors.len() == 1 { return format!( @@ -936,7 +944,7 @@ impl ServiceManager { // - If we know that disks are missing, we could wait for them // - We could permanently fail if we are able to distinguish other errors // more clearly. - pub async fn load_services(&self) -> Result<(), Error> { + pub async fn load_services(&self) -> Result { let log = &self.inner.log; let mut existing_zones = self.inner.zones.lock().await; let Some(mut ledger) = @@ -948,7 +956,7 @@ impl ServiceManager { "Loading Omicron zones - \ no zones nor old-format services found" ); - return Ok(()); + return Ok(LoadServicesResult::NoServicesToLoad); }; let zones_config = ledger.data_mut(); @@ -966,7 +974,7 @@ impl ServiceManager { None, ) .await?; - Ok(()) + Ok(LoadServicesResult::ServicesLoaded) } /// Sets up "Sled Agent" information, including underlay info. diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index e42f708006..fe266e6539 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -614,13 +614,25 @@ impl SledAgent { retry_policy_internal_service_aggressive(), || async { // Load as many services as we can, and don't exit immediately - // upon failure... + // upon failure. let load_services_result = self.inner.services.load_services().await.map_err(|err| { BackoffError::transient(Error::from(err)) }); - // ... and request firewall rule updates for as many services as + // If there wasn't any work to do, we're done immediately. + if matches!( + load_services_result, + Ok(services::LoadServicesResult::NoServicesToLoad) + ) { + info!( + self.log, + "load_services exiting early; no services to be loaded" + ); + return Ok(()); + } + + // Otherwise, request firewall rule updates for as many services as // we can. Note that we still make this request even if we only // partially load some services. let firewall_result = self From 85ecbc3050cd56648296b30ece54b0d350c593f3 Mon Sep 17 00:00:00 2001 From: David Crespo Date: Wed, 3 Apr 2024 16:12:24 -0500 Subject: [PATCH 048/334] Bump web console (image upload chunk timeouts, bump all deps) (#5402) https://github.com/oxidecomputer/console/compare/b22ca1dc...2a0693f3 * [2a0693f3](https://github.com/oxidecomputer/console/commit/2a0693f3) bump all runtime deps + tailwind, which I forgot * [b5b26b37](https://github.com/oxidecomputer/console/commit/b5b26b37) oxidecomputer/console#2117 * [ec0af140](https://github.com/oxidecomputer/console/commit/ec0af140) bump API and update code accordingly * [f77acd4c](https://github.com/oxidecomputer/console/commit/f77acd4c) bump all dev deps * [6ba38a92](https://github.com/oxidecomputer/console/commit/6ba38a92) (minor) use project directly instead of projectSelector in a couple spots --- tools/console_version | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/console_version b/tools/console_version index 87e3af12e5..b625290165 100644 --- a/tools/console_version +++ b/tools/console_version @@ -1,2 +1,2 @@ -COMMIT="b22ca1dce0a9380e7f9adbc7ebc60c85091ef362" -SHA2="6efc13fdc55c70f8a8dae5fad2fb2fb8633f291750e1899f48bf92d52e04e24a" +COMMIT="2a0693f3a5555b6e26130ca5a0e13ec93aa96035" +SHA2="e14f63eec8e4027e72815274deb30082a45888ba6ecaa1d521a1bc053d6239ff" From 36f435b299f3f82940c38d606dc4f94d90a067c6 Mon Sep 17 00:00:00 2001 From: James MacMahon Date: Wed, 3 Apr 2024 17:26:33 -0400 Subject: [PATCH 049/334] Support arbitrary redundancy for region allocation (#5346) Add a `num_regions_required` argument for region allocation, and a new `arbitrary_region_allocate` function. Fixes #5119. --- nexus/db-queries/src/db/datastore/mod.rs | 22 +- nexus/db-queries/src/db/datastore/region.rs | 51 +- .../src/db/queries/region_allocation.rs | 94 +++- .../output/region_allocate_distinct_sleds.sql | 64 ++- .../output/region_allocate_random_sleds.sql | 64 ++- nexus/src/app/sagas/disk_create.rs | 2 +- nexus/src/app/sagas/snapshot_create.rs | 2 +- nexus/tests/integration_tests/disks.rs | 452 ++++++++++++++++++ 8 files changed, 703 insertions(+), 48 deletions(-) diff --git a/nexus/db-queries/src/db/datastore/mod.rs b/nexus/db-queries/src/db/datastore/mod.rs index af6cc73350..b40b641202 100644 --- a/nexus/db-queries/src/db/datastore/mod.rs +++ b/nexus/db-queries/src/db/datastore/mod.rs @@ -117,7 +117,7 @@ pub use volume::CrucibleTargets; // Number of unique datasets required to back a region. // TODO: This should likely turn into a configuration option. -pub(crate) const REGION_REDUNDANCY_THRESHOLD: usize = 3; +pub const REGION_REDUNDANCY_THRESHOLD: usize = 3; /// The name of the built-in IP pool for Oxide services. pub const SERVICE_IP_POOL_NAME: &str = "oxide-service-pool"; @@ -948,7 +948,7 @@ mod test { let expected_region_count = REGION_REDUNDANCY_THRESHOLD; let dataset_and_regions = datastore - .region_allocate( + .disk_region_allocate( &opctx, volume_id, ¶ms.disk_source, @@ -1041,7 +1041,7 @@ mod test { let expected_region_count = REGION_REDUNDANCY_THRESHOLD; let dataset_and_regions = datastore - .region_allocate( + .disk_region_allocate( &opctx, volume_id, ¶ms.disk_source, @@ -1128,7 +1128,7 @@ mod test { let volume_id = Uuid::new_v4(); let err = datastore - .region_allocate( + .disk_region_allocate( &opctx, volume_id, ¶ms.disk_source, @@ -1173,7 +1173,7 @@ mod test { ); let volume_id = Uuid::new_v4(); let mut dataset_and_regions1 = datastore - .region_allocate( + .disk_region_allocate( &opctx, volume_id, ¶ms.disk_source, @@ -1186,7 +1186,7 @@ mod test { // Use a different allocation ordering to ensure we're idempotent even // if the shuffle changes. let mut dataset_and_regions2 = datastore - .region_allocate( + .disk_region_allocate( &opctx, volume_id, ¶ms.disk_source, @@ -1281,7 +1281,7 @@ mod test { ); let volume1_id = Uuid::new_v4(); let err = datastore - .region_allocate( + .disk_region_allocate( &opctx, volume1_id, ¶ms.disk_source, @@ -1304,7 +1304,7 @@ mod test { add_test_zpool_to_inventory(&datastore, zpool_id, sled_id).await; } datastore - .region_allocate( + .disk_region_allocate( &opctx, volume1_id, ¶ms.disk_source, @@ -1380,7 +1380,7 @@ mod test { ); let volume1_id = Uuid::new_v4(); let err = datastore - .region_allocate( + .disk_region_allocate( &opctx, volume1_id, ¶ms.disk_source, @@ -1490,7 +1490,7 @@ mod test { .unwrap(); let result = datastore - .region_allocate( + .disk_region_allocate( &opctx, volume_id, ¶ms.disk_source, @@ -1539,7 +1539,7 @@ mod test { let volume1_id = Uuid::new_v4(); assert!(datastore - .region_allocate( + .disk_region_allocate( &opctx, volume1_id, ¶ms.disk_source, diff --git a/nexus/db-queries/src/db/datastore/region.rs b/nexus/db-queries/src/db/datastore/region.rs index ad89a9ca93..113fc51ee5 100644 --- a/nexus/db-queries/src/db/datastore/region.rs +++ b/nexus/db-queries/src/db/datastore/region.rs @@ -8,6 +8,7 @@ use super::DataStore; use super::RunnableQuery; use crate::context::OpContext; use crate::db; +use crate::db::datastore::REGION_REDUNDANCY_THRESHOLD; use crate::db::error::public_error_from_diesel; use crate::db::error::ErrorHandler; use crate::db::lookup::LookupPath; @@ -56,6 +57,18 @@ impl DataStore { .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) } + pub async fn get_region(&self, region_id: Uuid) -> Result { + use db::schema::region::dsl; + dsl::region + .filter(dsl::id.eq(region_id)) + .select(Region::as_select()) + .get_result_async::( + &*self.pool_connection_unauthorized().await?, + ) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + } + async fn get_block_size_from_disk_source( &self, opctx: &OpContext, @@ -115,13 +128,45 @@ impl DataStore { /// /// Returns the allocated regions, as well as the datasets to which they /// belong. - pub async fn region_allocate( + pub async fn disk_region_allocate( &self, opctx: &OpContext, volume_id: Uuid, disk_source: ¶ms::DiskSource, size: external::ByteCount, allocation_strategy: &RegionAllocationStrategy, + ) -> Result, Error> { + self.arbitrary_region_allocate( + opctx, + volume_id, + disk_source, + size, + allocation_strategy, + REGION_REDUNDANCY_THRESHOLD, + ) + .await + } + + /// Idempotently allocates an arbitrary number of regions for a volume. + /// + /// For regular disk creation, this will be REGION_REDUNDANCY_THRESHOLD. + /// + /// For region replacement, it's important to allocate the *new* region for + /// a volume while respecting the current region allocation strategy. This + /// requires setting `num_regions_required` to one more than the current + /// level for a volume. If a single region is allocated in isolation this + /// could land on the same dataset as one of the existing volume's regions. + /// + /// Returns the allocated regions, as well as the datasets to which they + /// belong. + pub async fn arbitrary_region_allocate( + &self, + opctx: &OpContext, + volume_id: Uuid, + disk_source: ¶ms::DiskSource, + size: external::ByteCount, + allocation_strategy: &RegionAllocationStrategy, + num_regions_required: usize, ) -> Result, Error> { let block_size = self.get_block_size_from_disk_source(opctx, &disk_source).await?; @@ -134,8 +179,11 @@ impl DataStore { blocks_per_extent, extent_count, allocation_strategy, + num_regions_required, ); + let conn = self.pool_connection_authorized(&opctx).await?; + let dataset_and_regions: Vec<(Dataset, Region)> = query.get_results_async(&*conn).await.map_err(|e| { crate::db::queries::region_allocation::from_diesel(e) @@ -147,6 +195,7 @@ impl DataStore { "volume_id" => %volume_id, "datasets_and_regions" => ?dataset_and_regions, ); + Ok(dataset_and_regions) } diff --git a/nexus/db-queries/src/db/queries/region_allocation.rs b/nexus/db-queries/src/db/queries/region_allocation.rs index 971090ccaa..a6f9dbb2ca 100644 --- a/nexus/db-queries/src/db/queries/region_allocation.rs +++ b/nexus/db-queries/src/db/queries/region_allocation.rs @@ -5,7 +5,6 @@ //! Implementation of queries for provisioning regions. use crate::db::column_walker::AllColumnsOf; -use crate::db::datastore::REGION_REDUNDANCY_THRESHOLD; use crate::db::model::{Dataset, Region}; use crate::db::raw_query_builder::{QueryBuilder, TypedSqlQuery}; use crate::db::schema; @@ -74,6 +73,7 @@ pub fn allocation_query( blocks_per_extent: u64, extent_count: u64, allocation_strategy: &RegionAllocationStrategy, + redundancy: usize, ) -> TypedSqlQuery<(SelectableSql, SelectableSql)> { let (seed, distinct_sleds) = { let (input_seed, distinct_sleds) = match allocation_strategy { @@ -99,7 +99,7 @@ pub fn allocation_query( let seed = seed.to_le_bytes().to_vec(); let size_delta = block_size * blocks_per_extent * extent_count; - let redundancy: i64 = i64::try_from(REGION_REDUNDANCY_THRESHOLD).unwrap(); + let redundancy: i64 = i64::try_from(redundancy).unwrap(); let builder = QueryBuilder::new().sql( // Find all old regions associated with a particular volume @@ -117,13 +117,23 @@ pub fn allocation_query( dataset.pool_id, sum(dataset.size_used) AS size_used FROM dataset WHERE ((dataset.size_used IS NOT NULL) AND (dataset.time_deleted IS NULL)) GROUP BY dataset.pool_id),") + + // Any zpool already have this volume's existing regions? .sql(" - candidate_zpools AS ("); + existing_zpools AS ( + SELECT + dataset.pool_id + FROM + dataset INNER JOIN old_regions ON (old_regions.dataset_id = dataset.id) + ),") - // Identifies zpools with enough space for region allocation. + // Identifies zpools with enough space for region allocation, that are not + // currently used by this Volume's existing regions. // // NOTE: 'distinct_sleds' changes the format of the underlying SQL query, as it uses // distinct bind parameters depending on the conditional branch. + .sql(" + candidate_zpools AS ("); let builder = if distinct_sleds { builder.sql("SELECT DISTINCT ON (zpool.sled_id) ") } else { @@ -146,6 +156,7 @@ pub fn allocation_query( AND sled.sled_state = 'active' AND physical_disk.disk_policy = 'in_service' AND physical_disk.disk_state = 'active' + AND NOT(zpool.id = ANY(SELECT existing_zpools.pool_id FROM existing_zpools)) )" ).bind::(size_delta as i64); @@ -182,6 +193,7 @@ pub fn allocation_query( ORDER BY dataset.pool_id, md5((CAST(dataset.id as BYTEA) || ").param().sql(")) ),") .bind::(seed.clone()) + // We order by md5 to shuffle the ordering of the datasets. // md5 has a uniform output distribution so it does the job. .sql(" @@ -194,6 +206,7 @@ pub fn allocation_query( ),") .bind::(seed) .bind::(redundancy) + // Create the regions-to-be-inserted for the volume. .sql(" candidate_regions AS ( @@ -206,12 +219,20 @@ pub fn allocation_query( ").param().sql(" AS block_size, ").param().sql(" AS blocks_per_extent, ").param().sql(" AS extent_count - FROM shuffled_candidate_datasets + FROM shuffled_candidate_datasets") + // Only select the *additional* number of candidate regions for the required + // redundancy level + .sql(" + LIMIT (").param().sql(" - ( + SELECT COUNT(*) FROM old_regions + )) ),") .bind::(volume_id) .bind::(block_size as i64) .bind::(blocks_per_extent as i64) .bind::(extent_count as i64) + .bind::(redundancy) + // A subquery which summarizes the changes we intend to make, showing: // // 1. Which datasets will have size adjustments @@ -225,6 +246,7 @@ pub fn allocation_query( ((candidate_regions.block_size * candidate_regions.blocks_per_extent) * candidate_regions.extent_count) AS size_used_delta FROM (candidate_regions INNER JOIN dataset ON (dataset.id = candidate_regions.dataset_id)) ),") + // Confirms whether or not the insertion and updates should // occur. // @@ -251,17 +273,60 @@ pub fn allocation_query( // an error instead. .sql(" do_insert AS ( - SELECT ((( - ((SELECT COUNT(*) FROM old_regions LIMIT 1) < ").param().sql(") AND - CAST(IF(((SELECT COUNT(*) FROM candidate_zpools LIMIT 1) >= ").param().sql(concatcp!("), 'TRUE', '", NOT_ENOUGH_ZPOOL_SPACE_SENTINEL, "') AS BOOL)) AND - CAST(IF(((SELECT COUNT(*) FROM candidate_regions LIMIT 1) >= ")).param().sql(concatcp!("), 'TRUE', '", NOT_ENOUGH_DATASETS_SENTINEL, "') AS BOOL)) AND - CAST(IF(((SELECT COUNT(DISTINCT dataset.pool_id) FROM (candidate_regions INNER JOIN dataset ON (candidate_regions.dataset_id = dataset.id)) LIMIT 1) >= ")).param().sql(concatcp!("), 'TRUE', '", NOT_ENOUGH_UNIQUE_ZPOOLS_SENTINEL, "') AS BOOL) - ) AS insert - ),")) + SELECT (((") + // There's regions not allocated yet + .sql(" + ((SELECT COUNT(*) FROM old_regions LIMIT 1) < ").param().sql(") AND") + // Enough filtered candidate zpools + existing zpools to meet redundancy + .sql(" + CAST(IF((( + ( + (SELECT COUNT(*) FROM candidate_zpools LIMIT 1) + + (SELECT COUNT(*) FROM existing_zpools LIMIT 1) + ) + ) >= ").param().sql(concatcp!("), 'TRUE', '", NOT_ENOUGH_ZPOOL_SPACE_SENTINEL, "') AS BOOL)) AND")) + // Enough candidate regions + existing regions to meet redundancy + .sql(" + CAST(IF((( + ( + (SELECT COUNT(*) FROM candidate_regions LIMIT 1) + + (SELECT COUNT(*) FROM old_regions LIMIT 1) + ) + ) >= ").param().sql(concatcp!("), 'TRUE', '", NOT_ENOUGH_DATASETS_SENTINEL, "') AS BOOL)) AND")) + // Enough unique zpools (looking at both existing and new) to meet redundancy + .sql(" + CAST(IF((( + ( + SELECT + COUNT(DISTINCT pool_id) + FROM + ( + ( + SELECT + dataset.pool_id + FROM + candidate_regions + INNER JOIN dataset ON (candidate_regions.dataset_id = dataset.id) + ) + UNION + ( + SELECT + dataset.pool_id + FROM + old_regions + INNER JOIN dataset ON (old_regions.dataset_id = dataset.id) + ) + ) + LIMIT 1 + ) + ) >= ").param().sql(concatcp!("), 'TRUE', '", NOT_ENOUGH_UNIQUE_ZPOOLS_SENTINEL, "') AS BOOL) + ) AS insert + ),")) .bind::(redundancy) .bind::(redundancy) .bind::(redundancy) .bind::(redundancy) + .sql(" inserted_regions AS ( INSERT INTO region @@ -302,6 +367,7 @@ UNION #[cfg(test)] mod test { use super::*; + use crate::db::datastore::REGION_REDUNDANCY_THRESHOLD; use crate::db::explain::ExplainableAsync; use nexus_test_utils::db::test_setup_database; use omicron_test_utils::dev; @@ -327,6 +393,7 @@ mod test { &RegionAllocationStrategy::RandomWithDistinctSleds { seed: Some(1), }, + REGION_REDUNDANCY_THRESHOLD, ); let s = dev::db::format_sql( &diesel::debug_query::(®ion_allocate).to_string(), @@ -346,6 +413,7 @@ mod test { blocks_per_extent, extent_count, &RegionAllocationStrategy::Random { seed: Some(1) }, + REGION_REDUNDANCY_THRESHOLD, ); let s = dev::db::format_sql( &diesel::debug_query::(®ion_allocate).to_string(), @@ -382,6 +450,7 @@ mod test { blocks_per_extent, extent_count, &RegionAllocationStrategy::RandomWithDistinctSleds { seed: None }, + REGION_REDUNDANCY_THRESHOLD, ); let _ = region_allocate .explain_async(&conn) @@ -396,6 +465,7 @@ mod test { blocks_per_extent, extent_count, &RegionAllocationStrategy::Random { seed: None }, + REGION_REDUNDANCY_THRESHOLD, ); let _ = region_allocate .explain_async(&conn) diff --git a/nexus/db-queries/tests/output/region_allocate_distinct_sleds.sql b/nexus/db-queries/tests/output/region_allocate_distinct_sleds.sql index e84d47d2bb..b797e0bef7 100644 --- a/nexus/db-queries/tests/output/region_allocate_distinct_sleds.sql +++ b/nexus/db-queries/tests/output/region_allocate_distinct_sleds.sql @@ -26,6 +26,13 @@ WITH GROUP BY dataset.pool_id ), + existing_zpools + AS ( + SELECT + dataset.pool_id + FROM + dataset INNER JOIN old_regions ON old_regions.dataset_id = dataset.id + ), candidate_zpools AS ( SELECT @@ -53,6 +60,7 @@ WITH AND sled.sled_state = 'active' AND physical_disk.disk_policy = 'in_service' AND physical_disk.disk_state = 'active' + AND NOT (zpool.id = ANY (SELECT existing_zpools.pool_id FROM existing_zpools)) ORDER BY zpool.sled_id, md5(CAST(zpool.id AS BYTES) || $3) ), @@ -92,6 +100,8 @@ WITH $10 AS extent_count FROM shuffled_candidate_datasets + LIMIT + $11 - (SELECT count(*) FROM old_regions) ), proposed_dataset_changes AS ( @@ -110,10 +120,18 @@ WITH SELECT ( ( - (SELECT count(*) FROM old_regions LIMIT 1) < $11 + (SELECT count(*) FROM old_regions LIMIT 1) < $12 AND CAST( IF( - ((SELECT count(*) FROM candidate_zpools LIMIT 1) >= $12), + ( + ( + ( + (SELECT count(*) FROM candidate_zpools LIMIT 1) + + (SELECT count(*) FROM existing_zpools LIMIT 1) + ) + ) + >= $13 + ), 'TRUE', 'Not enough space' ) @@ -122,7 +140,15 @@ WITH ) AND CAST( IF( - ((SELECT count(*) FROM candidate_regions LIMIT 1) >= $13), + ( + ( + ( + (SELECT count(*) FROM candidate_regions LIMIT 1) + + (SELECT count(*) FROM old_regions LIMIT 1) + ) + ) + >= $14 + ), 'TRUE', 'Not enough datasets' ) @@ -133,15 +159,31 @@ WITH IF( ( ( - SELECT - count(DISTINCT dataset.pool_id) - FROM - candidate_regions - INNER JOIN dataset ON candidate_regions.dataset_id = dataset.id - LIMIT - 1 + ( + SELECT + count(DISTINCT pool_id) + FROM + ( + ( + SELECT + dataset.pool_id + FROM + candidate_regions + INNER JOIN dataset ON candidate_regions.dataset_id = dataset.id + ) + UNION + ( + SELECT + dataset.pool_id + FROM + old_regions INNER JOIN dataset ON old_regions.dataset_id = dataset.id + ) + ) + LIMIT + 1 + ) ) - >= $14 + >= $15 ), 'TRUE', 'Not enough unique zpools selected' diff --git a/nexus/db-queries/tests/output/region_allocate_random_sleds.sql b/nexus/db-queries/tests/output/region_allocate_random_sleds.sql index 85e5dc85ef..4f60ddf5fe 100644 --- a/nexus/db-queries/tests/output/region_allocate_random_sleds.sql +++ b/nexus/db-queries/tests/output/region_allocate_random_sleds.sql @@ -26,6 +26,13 @@ WITH GROUP BY dataset.pool_id ), + existing_zpools + AS ( + SELECT + dataset.pool_id + FROM + dataset INNER JOIN old_regions ON old_regions.dataset_id = dataset.id + ), candidate_zpools AS ( SELECT @@ -53,6 +60,7 @@ WITH AND sled.sled_state = 'active' AND physical_disk.disk_policy = 'in_service' AND physical_disk.disk_state = 'active' + AND NOT (zpool.id = ANY (SELECT existing_zpools.pool_id FROM existing_zpools)) ), candidate_datasets AS ( @@ -90,6 +98,8 @@ WITH $9 AS extent_count FROM shuffled_candidate_datasets + LIMIT + $10 - (SELECT count(*) FROM old_regions) ), proposed_dataset_changes AS ( @@ -108,10 +118,18 @@ WITH SELECT ( ( - (SELECT count(*) FROM old_regions LIMIT 1) < $10 + (SELECT count(*) FROM old_regions LIMIT 1) < $11 AND CAST( IF( - ((SELECT count(*) FROM candidate_zpools LIMIT 1) >= $11), + ( + ( + ( + (SELECT count(*) FROM candidate_zpools LIMIT 1) + + (SELECT count(*) FROM existing_zpools LIMIT 1) + ) + ) + >= $12 + ), 'TRUE', 'Not enough space' ) @@ -120,7 +138,15 @@ WITH ) AND CAST( IF( - ((SELECT count(*) FROM candidate_regions LIMIT 1) >= $12), + ( + ( + ( + (SELECT count(*) FROM candidate_regions LIMIT 1) + + (SELECT count(*) FROM old_regions LIMIT 1) + ) + ) + >= $13 + ), 'TRUE', 'Not enough datasets' ) @@ -131,15 +157,31 @@ WITH IF( ( ( - SELECT - count(DISTINCT dataset.pool_id) - FROM - candidate_regions - INNER JOIN dataset ON candidate_regions.dataset_id = dataset.id - LIMIT - 1 + ( + SELECT + count(DISTINCT pool_id) + FROM + ( + ( + SELECT + dataset.pool_id + FROM + candidate_regions + INNER JOIN dataset ON candidate_regions.dataset_id = dataset.id + ) + UNION + ( + SELECT + dataset.pool_id + FROM + old_regions INNER JOIN dataset ON old_regions.dataset_id = dataset.id + ) + ) + LIMIT + 1 + ) ) - >= $13 + >= $14 ), 'TRUE', 'Not enough unique zpools selected' diff --git a/nexus/src/app/sagas/disk_create.rs b/nexus/src/app/sagas/disk_create.rs index 9d52ec1501..830a4dd96c 100644 --- a/nexus/src/app/sagas/disk_create.rs +++ b/nexus/src/app/sagas/disk_create.rs @@ -259,7 +259,7 @@ async fn sdc_alloc_regions( let datasets_and_regions = osagactx .datastore() - .region_allocate( + .disk_region_allocate( &opctx, volume_id, ¶ms.create_params.disk_source, diff --git a/nexus/src/app/sagas/snapshot_create.rs b/nexus/src/app/sagas/snapshot_create.rs index 290868aae2..8b6febf71a 100644 --- a/nexus/src/app/sagas/snapshot_create.rs +++ b/nexus/src/app/sagas/snapshot_create.rs @@ -336,7 +336,7 @@ async fn ssc_alloc_regions( let datasets_and_regions = osagactx .datastore() - .region_allocate( + .disk_region_allocate( &opctx, destination_volume_id, ¶ms::DiskSource::Blank { diff --git a/nexus/tests/integration_tests/disks.rs b/nexus/tests/integration_tests/disks.rs index 7337d1b009..6acd542061 100644 --- a/nexus/tests/integration_tests/disks.rs +++ b/nexus/tests/integration_tests/disks.rs @@ -10,7 +10,9 @@ use dropshot::test_util::ClientTestContext; use dropshot::HttpErrorResponseBody; use http::method::Method; use http::StatusCode; +use nexus_config::RegionAllocationStrategy; use nexus_db_queries::context::OpContext; +use nexus_db_queries::db::datastore::REGION_REDUNDANCY_THRESHOLD; use nexus_db_queries::db::fixed_data::{silo::DEFAULT_SILO_ID, FLEET_ID}; use nexus_db_queries::db::lookup::LookupPath; use nexus_test_utils::http_testing::AuthnMode; @@ -27,6 +29,7 @@ use nexus_test_utils::resource_helpers::objects_list_page_authz; use nexus_test_utils::resource_helpers::DiskTest; use nexus_test_utils_macros::nexus_test; use nexus_types::external_api::params; +use nexus_types::identity::Asset; use omicron_common::api::external::ByteCount; use omicron_common::api::external::Disk; use omicron_common::api::external::DiskState; @@ -40,6 +43,7 @@ use omicron_nexus::TestInterfaces as _; use oximeter::types::Datum; use oximeter::types::Measurement; use sled_agent_client::TestInterfaces as _; +use std::collections::HashSet; use std::sync::Arc; use uuid::Uuid; @@ -2014,6 +2018,454 @@ async fn test_project_delete_disk_no_auth_idempotent( .unwrap(); } +// Test allocating a single region +#[nexus_test] +async fn test_single_region_allocate(cptestctx: &ControlPlaneTestContext) { + let nexus = &cptestctx.server.apictx().nexus; + let datastore = nexus.datastore(); + let opctx = + OpContext::for_tests(cptestctx.logctx.log.new(o!()), datastore.clone()); + + // Create three 10 GiB zpools, each with one dataset. + let disk_test = DiskTest::new(&cptestctx).await; + + // Assert default is still 10 GiB + assert_eq!(10, DiskTest::DEFAULT_ZPOOL_SIZE_GIB); + + // Allocate a single 1 GB region + let volume_id = Uuid::new_v4(); + + let datasets_and_regions = datastore + .arbitrary_region_allocate( + &opctx, + volume_id, + ¶ms::DiskSource::Blank { + block_size: params::BlockSize::try_from(512).unwrap(), + }, + ByteCount::from_gibibytes_u32(1), + &RegionAllocationStrategy::Random { seed: None }, + 1, + ) + .await + .unwrap(); + + assert_eq!(datasets_and_regions.len(), 1); + + // Double check! + let allocated_regions = + datastore.get_allocated_regions(volume_id).await.unwrap(); + + assert_eq!(allocated_regions.len(), 1); + + // Triple check! + let allocated_region = + datastore.get_region(datasets_and_regions[0].1.id()).await.unwrap(); + assert_eq!(allocated_region.block_size().to_bytes(), 512); + assert_eq!(allocated_region.blocks_per_extent(), 131072); // based on EXTENT_SIZE const + assert_eq!(allocated_region.extent_count(), 16); + + // Quadruple check! Only one Crucible agent should have received a region + // request + let mut number_of_matching_regions = 0; + + for zpool in &disk_test.zpools { + for dataset in &zpool.datasets { + let total_size = datastore + .regions_total_occupied_size(dataset.id) + .await + .unwrap(); + + if total_size == 1073741824 { + number_of_matching_regions += 1; + } else if total_size == 0 { + // ok, unallocated + } else { + panic!("unexpected regions total size of {total_size}"); + } + } + } + + assert_eq!(number_of_matching_regions, 1); +} + +// Ensure that `disk_region_allocate` is idempotent. +#[nexus_test] +async fn test_region_allocation_strategy_random_is_idempotent( + cptestctx: &ControlPlaneTestContext, +) { + let nexus = &cptestctx.server.apictx().nexus; + let datastore = nexus.datastore(); + let opctx = + OpContext::for_tests(cptestctx.logctx.log.new(o!()), datastore.clone()); + + // Create four 10 GiB zpools, each with one dataset. + let mut disk_test = DiskTest::new(&cptestctx).await; + disk_test.add_zpool_with_dataset(&cptestctx).await; + + // Assert default is still 10 GiB + assert_eq!(10, DiskTest::DEFAULT_ZPOOL_SIZE_GIB); + + // Create a disk + let client = &cptestctx.external_client; + let _project_id = create_project_and_pool(client).await; + + let disk = create_disk(&client, PROJECT_NAME, DISK_NAME).await; + + // Assert disk has three allocated regions + let disk_id = disk.identity.id; + let (.., db_disk) = LookupPath::new(&opctx, &datastore) + .disk_id(disk_id) + .fetch() + .await + .unwrap_or_else(|_| panic!("test disk {:?} should exist", disk_id)); + + let allocated_regions = + datastore.get_allocated_regions(db_disk.volume_id).await.unwrap(); + assert_eq!(allocated_regions.len(), REGION_REDUNDANCY_THRESHOLD); + + // Call `disk_region_allocate` again + let region: &nexus_db_model::Region = &allocated_regions[0].1; + + let region_total_size: ByteCount = ByteCount::try_from( + region.block_size().to_bytes() + * region.blocks_per_extent() + * region.extent_count(), + ) + .unwrap(); + + assert_eq!(region_total_size, ByteCount::from_gibibytes_u32(1)); + + let datasets_and_regions = datastore + .disk_region_allocate( + &opctx, + db_disk.volume_id, + ¶ms::DiskSource::Blank { + block_size: params::BlockSize::try_from( + region.block_size().to_bytes() as u32, + ) + .unwrap(), + }, + region_total_size, + &RegionAllocationStrategy::Random { seed: None }, + ) + .await + .unwrap(); + + // There should be the same amount + assert_eq!(allocated_regions.len(), datasets_and_regions.len()); +} + +// Ensure that adjusting redundancy level with `arbitrary_region_allocate` works +#[nexus_test] +async fn test_region_allocation_strategy_random_is_idempotent_arbitrary( + cptestctx: &ControlPlaneTestContext, +) { + let nexus = &cptestctx.server.apictx().nexus; + let datastore = nexus.datastore(); + let opctx = + OpContext::for_tests(cptestctx.logctx.log.new(o!()), datastore.clone()); + + // Create four 10 GiB zpools, each with one dataset. + let mut disk_test = DiskTest::new(&cptestctx).await; + disk_test.add_zpool_with_dataset(&cptestctx).await; + + // Assert default is still 10 GiB + assert_eq!(10, DiskTest::DEFAULT_ZPOOL_SIZE_GIB); + + // Call region allocation in isolation + let volume_id = Uuid::new_v4(); + + let datasets_and_regions = datastore + .arbitrary_region_allocate( + &opctx, + volume_id, + ¶ms::DiskSource::Blank { + block_size: params::BlockSize::try_from(512).unwrap(), + }, + ByteCount::from_gibibytes_u32(1), + &RegionAllocationStrategy::Random { seed: None }, + REGION_REDUNDANCY_THRESHOLD, + ) + .await + .unwrap(); + + // There should be the same amount as we requested + assert_eq!(REGION_REDUNDANCY_THRESHOLD, datasets_and_regions.len()); + + // Bump up the number of required regions + let datasets_and_regions = datastore + .arbitrary_region_allocate( + &opctx, + volume_id, + ¶ms::DiskSource::Blank { + block_size: params::BlockSize::try_from(512).unwrap(), + }, + ByteCount::from_gibibytes_u32(1), + &RegionAllocationStrategy::Random { seed: None }, + REGION_REDUNDANCY_THRESHOLD + 1, + ) + .await + .unwrap(); + + // There should be the same amount as we requested + assert_eq!(REGION_REDUNDANCY_THRESHOLD + 1, datasets_and_regions.len()); +} + +// Test allocating a single region to replace a disk's region +#[nexus_test] +async fn test_single_region_allocate_for_replace( + cptestctx: &ControlPlaneTestContext, +) { + let nexus = &cptestctx.server.apictx().nexus; + let datastore = nexus.datastore(); + let opctx = + OpContext::for_tests(cptestctx.logctx.log.new(o!()), datastore.clone()); + + // Create three 10 GiB zpools, each with one dataset. + let mut disk_test = DiskTest::new(&cptestctx).await; + + // One more zpool and dataset is required to meet `region_allocate`'s + // redundancy requirement. + disk_test.add_zpool_with_dataset(&cptestctx).await; + + // Assert default is still 10 GiB + assert_eq!(10, DiskTest::DEFAULT_ZPOOL_SIZE_GIB); + + // Create a disk + let client = &cptestctx.external_client; + let _project_id = create_project_and_pool(client).await; + + let disk = create_disk(&client, PROJECT_NAME, DISK_NAME).await; + + // Assert disk has three allocated regions + let disk_id = disk.identity.id; + let (.., db_disk) = LookupPath::new(&opctx, &datastore) + .disk_id(disk_id) + .fetch() + .await + .unwrap_or_else(|_| panic!("test disk {:?} should exist", disk_id)); + + let allocated_regions = + datastore.get_allocated_regions(db_disk.volume_id).await.unwrap(); + assert_eq!(allocated_regions.len(), REGION_REDUNDANCY_THRESHOLD); + + // Allocate one more single 1 GB region to replace one of the disk's regions + let region_to_replace: &nexus_db_model::Region = &allocated_regions[0].1; + + let one_more = allocated_regions.len() + 1; + assert_eq!(one_more, REGION_REDUNDANCY_THRESHOLD + 1); + + let region_total_size: ByteCount = ByteCount::try_from( + region_to_replace.block_size().to_bytes() + * region_to_replace.blocks_per_extent() + * region_to_replace.extent_count(), + ) + .unwrap(); + + assert_eq!(region_total_size, ByteCount::from_gibibytes_u32(1)); + + let datasets_and_regions = datastore + .arbitrary_region_allocate( + &opctx, + db_disk.volume_id, + ¶ms::DiskSource::Blank { + block_size: params::BlockSize::try_from( + region_to_replace.block_size().to_bytes() as u32, + ) + .unwrap(), + }, + region_total_size, + &RegionAllocationStrategy::Random { seed: None }, + one_more, + ) + .await + .unwrap(); + + eprintln!("{:?}", datasets_and_regions); + + assert_eq!(datasets_and_regions.len(), one_more); + + // There should be `one_more` regions for this disk's volume id. + let allocated_regions = + datastore.get_allocated_regions(db_disk.volume_id).await.unwrap(); + assert_eq!(allocated_regions.len(), one_more); + + // Each region should be on a different pool + let pools_used: HashSet = datasets_and_regions + .iter() + .map(|(dataset, _)| dataset.pool_id) + .collect(); + + assert_eq!(pools_used.len(), REGION_REDUNDANCY_THRESHOLD + 1); +} + +// Confirm allocating a single region to replace a disk's region fails if +// there's not enough unique zpools +#[nexus_test] +async fn test_single_region_allocate_for_replace_not_enough_zpools( + cptestctx: &ControlPlaneTestContext, +) { + let nexus = &cptestctx.server.apictx().nexus; + let datastore = nexus.datastore(); + let opctx = + OpContext::for_tests(cptestctx.logctx.log.new(o!()), datastore.clone()); + + // Create three 10 GiB zpools, each with one dataset. + let _disk_test = DiskTest::new(&cptestctx).await; + + // Assert default is still 10 GiB + assert_eq!(10, DiskTest::DEFAULT_ZPOOL_SIZE_GIB); + + // Create a disk + let client = &cptestctx.external_client; + let _project_id = create_project_and_pool(client).await; + + let disk = create_disk(&client, PROJECT_NAME, DISK_NAME).await; + + // Assert disk has three allocated regions + let disk_id = disk.identity.id; + let (.., db_disk) = LookupPath::new(&opctx, &datastore) + .disk_id(disk_id) + .fetch() + .await + .unwrap_or_else(|_| panic!("test disk {:?} should exist", disk_id)); + + let allocated_regions = + datastore.get_allocated_regions(db_disk.volume_id).await.unwrap(); + assert_eq!(allocated_regions.len(), REGION_REDUNDANCY_THRESHOLD); + + // Allocate one more single 1 GB region to replace one of the disk's regions + let region_to_replace: &nexus_db_model::Region = &allocated_regions[0].1; + + let one_more = allocated_regions.len() + 1; + assert_eq!(one_more, REGION_REDUNDANCY_THRESHOLD + 1); + + let region_total_size: ByteCount = ByteCount::try_from( + region_to_replace.block_size().to_bytes() + * region_to_replace.blocks_per_extent() + * region_to_replace.extent_count(), + ) + .unwrap(); + + assert_eq!(region_total_size, ByteCount::from_gibibytes_u32(1)); + + // Trying to allocate one more should fail + let result = datastore + .arbitrary_region_allocate( + &opctx, + db_disk.volume_id, + ¶ms::DiskSource::Blank { + block_size: params::BlockSize::try_from( + region_to_replace.block_size().to_bytes() as u32, + ) + .unwrap(), + }, + region_total_size, + &RegionAllocationStrategy::Random { seed: None }, + one_more, + ) + .await; + + assert!(result.is_err()); + + // Confirm calling `arbitrary_region_allocate` still idempotently works + let datasets_and_regions = datastore + .arbitrary_region_allocate( + &opctx, + db_disk.volume_id, + ¶ms::DiskSource::Blank { + block_size: params::BlockSize::try_from( + region_to_replace.block_size().to_bytes() as u32, + ) + .unwrap(), + }, + region_total_size, + &RegionAllocationStrategy::Random { seed: None }, + allocated_regions.len(), + ) + .await + .unwrap(); + + assert_eq!(datasets_and_regions.len(), REGION_REDUNDANCY_THRESHOLD); +} + +// Confirm that a region set can start at N, a region can be deleted, and the +// allocation CTE can bring the redundancy back to N. +#[nexus_test] +async fn test_region_allocation_after_delete( + cptestctx: &ControlPlaneTestContext, +) { + let nexus = &cptestctx.server.apictx().nexus; + let datastore = nexus.datastore(); + let opctx = + OpContext::for_tests(cptestctx.logctx.log.new(o!()), datastore.clone()); + + // Create three 10 GiB zpools, each with one dataset. + let _disk_test = DiskTest::new(&cptestctx).await; + + // Assert default is still 10 GiB + assert_eq!(10, DiskTest::DEFAULT_ZPOOL_SIZE_GIB); + + // Create a disk + let client = &cptestctx.external_client; + let _project_id = create_project_and_pool(client).await; + + let disk = create_disk(&client, PROJECT_NAME, DISK_NAME).await; + + // Assert disk has three allocated regions + let disk_id = disk.identity.id; + let (.., db_disk) = LookupPath::new(&opctx, &datastore) + .disk_id(disk_id) + .fetch() + .await + .unwrap_or_else(|_| panic!("test disk {:?} should exist", disk_id)); + + let allocated_regions = + datastore.get_allocated_regions(db_disk.volume_id).await.unwrap(); + assert_eq!(allocated_regions.len(), REGION_REDUNDANCY_THRESHOLD); + + // Delete one of the regions + let region_to_delete: &nexus_db_model::Region = &allocated_regions[0].1; + datastore + .regions_hard_delete(&opctx.log, vec![region_to_delete.id()]) + .await + .unwrap(); + + // Assert disk's volume has one less allocated region + let allocated_regions = + datastore.get_allocated_regions(db_disk.volume_id).await.unwrap(); + assert_eq!(allocated_regions.len(), REGION_REDUNDANCY_THRESHOLD - 1); + + let region_total_size: ByteCount = ByteCount::try_from( + region_to_delete.block_size().to_bytes() + * region_to_delete.blocks_per_extent() + * region_to_delete.extent_count(), + ) + .unwrap(); + + // Rerun disk region allocation + datastore + .disk_region_allocate( + &opctx, + db_disk.volume_id, + ¶ms::DiskSource::Blank { + block_size: params::BlockSize::try_from( + region_to_delete.block_size().to_bytes() as u32, + ) + .unwrap(), + }, + region_total_size, + &RegionAllocationStrategy::Random { seed: None }, + ) + .await + .unwrap(); + + // Assert redundancy was restored + let allocated_regions = + datastore.get_allocated_regions(db_disk.volume_id).await.unwrap(); + assert_eq!(allocated_regions.len(), REGION_REDUNDANCY_THRESHOLD); +} + async fn disk_get(client: &ClientTestContext, disk_url: &str) -> Disk { NexusRequest::object_get(client, disk_url) .authn_as(AuthnMode::PrivilegedUser) From 93927c3592b7976af8fb6b646f4a400c3d4b9a9c Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Wed, 3 Apr 2024 15:37:40 -0700 Subject: [PATCH 050/334] Update Rust crate rayon to 1.10 (#5397) --- Cargo.lock | 4 ++-- Cargo.toml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f53aa54b0b..8332fa0413 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -7416,9 +7416,9 @@ dependencies = [ [[package]] name = "rayon" -version = "1.9.0" +version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e4963ed1bc86e4f3ee217022bd855b297cef07fb9eac5dfa1f788b220b49b3bd" +checksum = "b418a60154510ca1a002a752ca9714984e21e4241e804d32555251faf8b78ffa" dependencies = [ "either", "rayon-core", diff --git a/Cargo.toml b/Cargo.toml index 48594e056d..fab318d1bd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -348,7 +348,7 @@ rand = "0.8.5" rand_core = "0.6.4" rand_seeder = "0.2.3" ratatui = "0.26.1" -rayon = "1.9" +rayon = "1.10" rcgen = "0.12.1" reedline = "0.31.0" ref-cast = "1.0" From 0cc9233e221ed2f03921d5379c6f14483341bfaf Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Wed, 3 Apr 2024 15:38:15 -0700 Subject: [PATCH 051/334] Update Rust crate async-recursion to 1.1.0 (#5394) --- Cargo.lock | 4 ++-- oximeter/db/Cargo.toml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 8332fa0413..dbec94dd31 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -275,9 +275,9 @@ dependencies = [ [[package]] name = "async-recursion" -version = "1.0.5" +version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5fd55a5ba1179988837d24ab4c7cc8ed6efdeff578ede0416b4225a5fca35bd0" +checksum = "30c5ef0ede93efbf733c1a727f3b6b5a1060bbedd5600183e66f6e4be4af0ec5" dependencies = [ "proc-macro2", "quote", diff --git a/oximeter/db/Cargo.toml b/oximeter/db/Cargo.toml index 88a2ab8a89..c86060b909 100644 --- a/oximeter/db/Cargo.toml +++ b/oximeter/db/Cargo.toml @@ -7,7 +7,7 @@ license = "MPL-2.0" [dependencies] anyhow.workspace = true -async-recursion = "1.0.5" +async-recursion = "1.1.0" async-trait.workspace = true bcs.workspace = true camino.workspace = true From 04b0e34c972eb948b578e87de412b92f082a7907 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Wed, 3 Apr 2024 15:38:27 -0700 Subject: [PATCH 052/334] Update Rust crate trybuild to 1.0.91 (#5392) --- Cargo.lock | 15 +++------------ Cargo.toml | 2 +- 2 files changed, 4 insertions(+), 13 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index dbec94dd31..59c16e10e9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -435,15 +435,6 @@ version = "1.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8c3c1a368f70d6cf7302d78f8f7093da241fb8e8807c05cc9e51a125895a6d5b" -[[package]] -name = "basic-toml" -version = "0.1.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2db21524cad41c5591204d22d75e1970a2d1f71060214ca931dc7d5afe2c14e5" -dependencies = [ - "serde", -] - [[package]] name = "bb8" version = "0.8.3" @@ -10219,17 +10210,17 @@ checksum = "3528ecfd12c466c6f163363caf2d02a71161dd5e1cc6ae7b34207ea2d42d81ed" [[package]] name = "trybuild" -version = "1.0.89" +version = "1.0.91" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a9d3ba662913483d6722303f619e75ea10b7855b0f8e0d72799cf8621bb488f" +checksum = "8ad7eb6319ebadebca3dacf1f85a93bc54b73dd81b9036795f73de7ddfe27d5a" dependencies = [ - "basic-toml", "glob", "once_cell", "serde", "serde_derive", "serde_json", "termcolor", + "toml 0.8.12", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index fab318d1bd..43bf7da5d0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -433,7 +433,7 @@ trust-dns-client = "0.22" trust-dns-proto = "0.22" trust-dns-resolver = "0.22" trust-dns-server = "0.22" -trybuild = "1.0.89" +trybuild = "1.0.91" tufaceous = { path = "tufaceous" } tufaceous-lib = { path = "tufaceous-lib" } tui-tree-widget = "0.19.0" From 35dfd9abc6788a19e78e4723faf086a6e8bc79ed Mon Sep 17 00:00:00 2001 From: James MacMahon Date: Wed, 3 Apr 2024 18:56:10 -0400 Subject: [PATCH 053/334] Prevent unintentional Upstairs takeovers (#5221) Volumes are "checked out" from Nexus for many reasons, some of which include sending to another service for use in `Volume::construct`. When that service activates the resulting Volume, this will forcibly take over any existing downstairs connections based on the Upstairs' generation number. This is intentional, and was designed so Nexus, in handing out Volumes with increasing generation numbers, can be sure that the resulting Volume works no matter what (for example, even if a previous Upstairs is wedged somehow, even if the service that is running the previous Upstairs is no longer accepting network connections). Up until now, Nexus wouldn't allow checking out a Volume if there is any chance a Propolis could be running that may use that Volume. This meant restricting certain operations, like creating a snapshot when a disk is attached to an instance that is stopped: any action Nexus would take to attempt a snapshot using a Pantry would race with a user's request to start that instance, and if the Volume checkouts occur in the wrong order the Pantry would take over connections from Propolis, resulting in guest OS errors. Nexus _can_ do this safely though: it has all the information required to know when a checkout is safe to do, and when it may not be safe. This commit adds checks to the Volume checkout transaction that are based on the reason that checkout is occurring, and requires call sites that are performing a checkout to say why they are. Because these checks are performed inside a transaction, Nexus can say for sure when it is safe to allow a Volume to be checked out for a certain reason. For example, in the scenario of taking a snapshot of a disk attached to an instance that is stopped, there are two checkout operations that have the possiblity of racing: 1) the one that Nexus will send to a Pantry during a snapshot create saga. 2) the one that Nexus will send to a Propolis during an instance start saga. If 1 occurs before 2, then Propolis will take over the downstairs connections that the Pantry has established, and the snapshot create saga will fail, but the guest OS for that Propolis will not see any errors. If 2 occurs before 1, then the 1 checkout will fail due to one of the conditions added in this commit: the checkout is being performed for use with a Pantry, and a Propolis _may_ exist, so reject the checkout attempt. Fixes #3289. --- nexus/db-model/src/schema_versions.rs | 3 +- nexus/db-queries/src/db/datastore/mod.rs | 1 + nexus/db-queries/src/db/datastore/volume.rs | 394 +++++++++++++++++- nexus/src/app/image.rs | 5 +- nexus/src/app/instance.rs | 23 +- nexus/src/app/sagas/common_storage.rs | 5 +- nexus/src/app/sagas/disk_create.rs | 10 +- nexus/src/app/sagas/finalize_disk.rs | 3 +- nexus/src/app/sagas/instance_migrate.rs | 7 +- nexus/src/app/sagas/instance_start.rs | 2 + nexus/src/app/sagas/snapshot_create.rs | 140 ++++--- nexus/src/app/snapshot.rs | 31 +- nexus/tests/integration_tests/snapshots.rs | 103 +++++ .../integration_tests/volume_management.rs | 144 ++++++- .../add-lookup-disk-by-volume-id-index/up.sql | 4 + schema/crdb/dbinit.sql | 7 +- 16 files changed, 764 insertions(+), 118 deletions(-) create mode 100644 schema/crdb/add-lookup-disk-by-volume-id-index/up.sql diff --git a/nexus/db-model/src/schema_versions.rs b/nexus/db-model/src/schema_versions.rs index 64ddc7c451..853db4195a 100644 --- a/nexus/db-model/src/schema_versions.rs +++ b/nexus/db-model/src/schema_versions.rs @@ -17,7 +17,7 @@ use std::collections::BTreeMap; /// /// This must be updated when you change the database schema. Refer to /// schema/crdb/README.adoc in the root of this repository for details. -pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(49, 0, 0); +pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(50, 0, 0); /// List of all past database schema versions, in *reverse* order /// @@ -29,6 +29,7 @@ static KNOWN_VERSIONS: Lazy> = Lazy::new(|| { // | leaving the first copy as an example for the next person. // v // KnownVersion::new(next_int, "unique-dirname-with-the-sql-files"), + KnownVersion::new(50, "add-lookup-disk-by-volume-id-index"), KnownVersion::new(49, "physical-disk-state-and-policy"), KnownVersion::new(48, "add-metrics-producers-time-modified-index"), KnownVersion::new(47, "add-view-for-bgp-peer-configs"), diff --git a/nexus/db-queries/src/db/datastore/mod.rs b/nexus/db-queries/src/db/datastore/mod.rs index b40b641202..c753ac5436 100644 --- a/nexus/db-queries/src/db/datastore/mod.rs +++ b/nexus/db-queries/src/db/datastore/mod.rs @@ -114,6 +114,7 @@ pub use virtual_provisioning_collection::StorageType; pub use volume::read_only_resources_associated_with_volume; pub use volume::CrucibleResources; pub use volume::CrucibleTargets; +pub use volume::VolumeCheckoutReason; // Number of unique datasets required to back a region. // TODO: This should likely turn into a configuration option. diff --git a/nexus/db-queries/src/db/datastore/volume.rs b/nexus/db-queries/src/db/datastore/volume.rs index a9646b9ef6..0e80ee3e3c 100644 --- a/nexus/db-queries/src/db/datastore/volume.rs +++ b/nexus/db-queries/src/db/datastore/volume.rs @@ -11,8 +11,10 @@ use crate::db::error::public_error_from_diesel; use crate::db::error::ErrorHandler; use crate::db::identity::Asset; use crate::db::model::Dataset; +use crate::db::model::Disk; use crate::db::model::DownstairsClientStopRequestNotification; use crate::db::model::DownstairsClientStoppedNotification; +use crate::db::model::Instance; use crate::db::model::Region; use crate::db::model::RegionSnapshot; use crate::db::model::UpstairsRepairNotification; @@ -25,6 +27,7 @@ use anyhow::bail; use async_bb8_diesel::AsyncRunQueryDsl; use diesel::prelude::*; use diesel::OptionalExtension; +use nexus_types::identity::Resource; use omicron_common::api::external::CreateResult; use omicron_common::api::external::DeleteResult; use omicron_common::api::external::Error; @@ -44,6 +47,40 @@ use serde::Serialize; use sled_agent_client::types::VolumeConstructionRequest; use uuid::Uuid; +#[derive(Debug, Clone, Copy)] +pub enum VolumeCheckoutReason { + /// Check out a read-only Volume. + ReadOnlyCopy, + + /// Check out a Volume to modify and store back to the database. + CopyAndModify, + + /// Check out a Volume to send to Propolis to start an instance. + InstanceStart { vmm_id: Uuid }, + + /// Check out a Volume to send to a migration destination Propolis. + InstanceMigrate { vmm_id: Uuid, target_vmm_id: Uuid }, + + /// Check out a Volume to send to a Pantry (for background maintenance + /// operations). + Pantry, +} + +#[derive(Debug, thiserror::Error)] +enum VolumeGetError { + #[error("Serde error during volume_checkout: {0}")] + SerdeError(#[from] serde_json::Error), + + #[error("Updated {0} database rows, expected {1}")] + UnexpectedDatabaseUpdate(usize, usize), + + #[error("Checkout condition failed: {0}")] + CheckoutConditionFailed(String), + + #[error("Invalid Volume: {0}")] + InvalidVolume(String), +} + impl DataStore { pub async fn volume_create(&self, volume: Volume) -> CreateResult { use db::schema::volume::dsl; @@ -194,6 +231,244 @@ impl DataStore { .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) } + async fn volume_checkout_allowed( + reason: &VolumeCheckoutReason, + vcr: &VolumeConstructionRequest, + maybe_disk: Option, + maybe_instance: Option, + ) -> Result<(), VolumeGetError> { + match reason { + VolumeCheckoutReason::ReadOnlyCopy => { + // When checking out to make a copy (usually for use as a + // read-only parent), the volume must be read only. Even if a + // call-site that uses Copy sends this copied Volume to a + // Propolis or Pantry, the Upstairs that will be created will be + // read-only, and will not take over from other read-only + // Upstairs. + + match volume_is_read_only(&vcr) { + Ok(read_only) => { + if !read_only { + return Err(VolumeGetError::CheckoutConditionFailed( + String::from("Non-read-only Volume Checkout for use Copy!") + )); + } + + Ok(()) + } + + Err(e) => Err(VolumeGetError::InvalidVolume(e.to_string())), + } + } + + VolumeCheckoutReason::CopyAndModify => { + // `CopyAndModify` is used when taking a read/write Volume, + // modifying it (for example, when taking a snapshot, to point + // to read-only resources), and committing it back to the DB. + // This is a checkout of a read/write Volume, so creating an + // Upstairs from it *may* take over from something else. The + // call-site must ensure this doesn't happen, but we can't do + // that here. + + Ok(()) + } + + VolumeCheckoutReason::InstanceStart { vmm_id } => { + // Check out this volume to send to Propolis to start an + // Instance. The VMM id in the enum must match the instance's + // propolis_id. + + let Some(instance) = &maybe_instance else { + return Err(VolumeGetError::CheckoutConditionFailed( + format!( + "InstanceStart {}: instance does not exist", + vmm_id + ), + )); + }; + + let runtime = instance.runtime(); + match (runtime.propolis_id, runtime.dst_propolis_id) { + (Some(_), Some(_)) => { + Err(VolumeGetError::CheckoutConditionFailed( + format!( + "InstanceStart {}: instance {} is undergoing migration", + vmm_id, + instance.id(), + ) + )) + } + + (None, None) => { + Err(VolumeGetError::CheckoutConditionFailed( + format!( + "InstanceStart {}: instance {} has no propolis ids", + vmm_id, + instance.id(), + ) + )) + } + + (Some(propolis_id), None) => { + if propolis_id != *vmm_id { + return Err(VolumeGetError::CheckoutConditionFailed( + format!( + "InstanceStart {}: instance {} propolis id {} mismatch", + vmm_id, + instance.id(), + propolis_id, + ) + )); + } + + Ok(()) + } + + (None, Some(dst_propolis_id)) => { + Err(VolumeGetError::CheckoutConditionFailed( + format!( + "InstanceStart {}: instance {} has no propolis id but dst propolis id {}", + vmm_id, + instance.id(), + dst_propolis_id, + ) + )) + } + } + } + + VolumeCheckoutReason::InstanceMigrate { vmm_id, target_vmm_id } => { + // Check out this volume to send to destination Propolis to + // migrate an Instance. Only take over from the specified source + // VMM. + + let Some(instance) = &maybe_instance else { + return Err(VolumeGetError::CheckoutConditionFailed( + format!( + "InstanceMigrate {} {}: instance does not exist", + vmm_id, target_vmm_id + ), + )); + }; + + let runtime = instance.runtime(); + match (runtime.propolis_id, runtime.dst_propolis_id) { + (Some(propolis_id), Some(dst_propolis_id)) => { + if propolis_id != *vmm_id || dst_propolis_id != *target_vmm_id { + return Err(VolumeGetError::CheckoutConditionFailed( + format!( + "InstanceMigrate {} {}: instance {} propolis id mismatches {} {}", + vmm_id, + target_vmm_id, + instance.id(), + propolis_id, + dst_propolis_id, + ) + )); + } + + Ok(()) + } + + (None, None) => { + Err(VolumeGetError::CheckoutConditionFailed( + format!( + "InstanceMigrate {} {}: instance {} has no propolis ids", + vmm_id, + target_vmm_id, + instance.id(), + ) + )) + } + + (Some(propolis_id), None) => { + // XXX is this right? + if propolis_id != *vmm_id { + return Err(VolumeGetError::CheckoutConditionFailed( + format!( + "InstanceMigrate {} {}: instance {} propolis id {} mismatch", + vmm_id, + target_vmm_id, + instance.id(), + propolis_id, + ) + )); + } + + Ok(()) + } + + (None, Some(dst_propolis_id)) => { + Err(VolumeGetError::CheckoutConditionFailed( + format!( + "InstanceMigrate {} {}: instance {} has no propolis id but dst propolis id {}", + vmm_id, + target_vmm_id, + instance.id(), + dst_propolis_id, + ) + )) + } + } + } + + VolumeCheckoutReason::Pantry => { + // Check out this Volume to send to a Pantry, which will create + // a read/write Upstairs, for background maintenance operations. + // There must not be any Propolis, otherwise this will take over + // from that and cause errors for guest OSes. + + let Some(disk) = maybe_disk else { + // This volume isn't backing a disk, it won't take over from + // a Propolis' Upstairs. + return Ok(()); + }; + + let Some(attach_instance_id) = + disk.runtime().attach_instance_id + else { + // The volume is backing a disk that is not attached to an + // instance. At this moment it won't take over from a + // Propolis' Upstairs, so send it to a Pantry to create an + // Upstairs there. A future checkout that happens after + // this transaction that is sent to a Propolis _will_ take + // over from this checkout (sent to a Pantry), which is ok. + return Ok(()); + }; + + let Some(instance) = maybe_instance else { + // The instance, which the disk that this volume backs is + // attached to, doesn't exist? + // + // XXX this is a Nexus bug! + return Err(VolumeGetError::CheckoutConditionFailed( + format!( + "Pantry: instance {} backing disk {} does not exist?", + attach_instance_id, + disk.id(), + ) + )); + }; + + if let Some(propolis_id) = instance.runtime().propolis_id { + // The instance, which the disk that this volume backs is + // attached to, exists and has an active propolis ID. A + // propolis _may_ exist, so bail here - an activation from + // the Pantry is not allowed to take over from a Propolis. + Err(VolumeGetError::CheckoutConditionFailed(format!( + "Pantry: possible Propolis {}", + propolis_id + ))) + } else { + // The instance, which the disk that this volume backs is + // attached to, exists, but there is no active propolis ID. + // This is ok. + Ok(()) + } + } + } + } + /// Checkout a copy of the Volume from the database. /// This action (getting a copy) will increase the generation number /// of Volumes of the VolumeConstructionRequest::Volume type that have @@ -203,18 +478,10 @@ impl DataStore { pub async fn volume_checkout( &self, volume_id: Uuid, + reason: VolumeCheckoutReason, ) -> LookupResult { use db::schema::volume::dsl; - #[derive(Debug, thiserror::Error)] - enum VolumeGetError { - #[error("Serde error during volume_checkout: {0}")] - SerdeError(#[from] serde_json::Error), - - #[error("Updated {0} database rows, expected {1}")] - UnexpectedDatabaseUpdate(usize, usize), - } - // We perform a transaction here, to be sure that on completion // of this, the database contains an updated version of the // volume with the generation number incremented (for the volume @@ -241,6 +508,56 @@ impl DataStore { err.bail(VolumeGetError::SerdeError(e)) })?; + // The VolumeConstructionRequest resulting from this checkout will have its + // generation numbers bumped, and as result will (if it has non-read-only + // sub-volumes) take over from previous read/write activations when sent to a + // place that will `construct` a new Volume. Depending on the checkout reason, + // prevent creating multiple read/write Upstairs acting on the same Volume, + // except where the take over is intended. + + let (maybe_disk, maybe_instance) = { + use db::schema::instance::dsl as instance_dsl; + use db::schema::disk::dsl as disk_dsl; + + let maybe_disk: Option = disk_dsl::disk + .filter(disk_dsl::time_deleted.is_null()) + .filter(disk_dsl::volume_id.eq(volume_id)) + .select(Disk::as_select()) + .get_result_async(&conn) + .await + .optional()?; + + let maybe_instance: Option = if let Some(disk) = &maybe_disk { + if let Some(attach_instance_id) = disk.runtime().attach_instance_id { + instance_dsl::instance + .filter(instance_dsl::time_deleted.is_null()) + .filter(instance_dsl::id.eq(attach_instance_id)) + .select(Instance::as_select()) + .get_result_async(&conn) + .await + .optional()? + } else { + // Disk not attached to an instance + None + } + } else { + // Volume not associated with disk + None + }; + + (maybe_disk, maybe_instance) + }; + + if let Err(e) = Self::volume_checkout_allowed( + &reason, + &vcr, + maybe_disk, + maybe_instance, + ) + .await { + return Err(err.bail(e)); + } + // Look to see if the VCR is a Volume type, and if so, look at // its sub_volumes. If they are of type Region, then we need // to update their generation numbers and record that update @@ -353,8 +670,17 @@ impl DataStore { .await .map_err(|e| { if let Some(err) = err.take() { - return Error::internal_error(&format!("Transaction error: {}", err)); + match err { + VolumeGetError::CheckoutConditionFailed(message) => { + return Error::conflict(message); + } + + _ => { + return Error::internal_error(&format!("Transaction error: {}", err)); + } + } } + public_error_from_diesel(e, ErrorHandler::Server) }) } @@ -447,8 +773,9 @@ impl DataStore { pub async fn volume_checkout_randomize_ids( &self, volume_id: Uuid, + reason: VolumeCheckoutReason, ) -> CreateResult { - let volume = self.volume_checkout(volume_id).await?; + let volume = self.volume_checkout(volume_id, reason).await?; let vcr: sled_agent_client::types::VolumeConstructionRequest = serde_json::from_str(volume.data())?; @@ -1309,6 +1636,51 @@ pub fn read_only_resources_associated_with_volume( } } +/// Returns true if the sub-volumes of a Volume are all read-only +pub fn volume_is_read_only( + vcr: &VolumeConstructionRequest, +) -> anyhow::Result { + match vcr { + VolumeConstructionRequest::Volume { sub_volumes, .. } => { + for sv in sub_volumes { + match sv { + VolumeConstructionRequest::Region { opts, .. } => { + if !opts.read_only { + return Ok(false); + } + } + + _ => { + bail!("Saw non-Region in sub-volume {:?}", sv); + } + } + } + + Ok(true) + } + + VolumeConstructionRequest::Region { .. } => { + // We don't support a pure Region VCR at the volume + // level in the database, so this choice should + // never be encountered, but I want to know if it is. + panic!("Region not supported as a top level volume"); + } + + VolumeConstructionRequest::File { .. } => { + // Effectively, this is read-only, as this BlockIO implementation + // does not have a `write` implementation. This will be hit if + // trying to make a snapshot or image out of a + // `YouCanBootAnythingAsLongAsItsAlpine` image source. + Ok(true) + } + + VolumeConstructionRequest::Url { .. } => { + // ImageSource::Url was deprecated + bail!("Saw VolumeConstructionRequest::Url"); + } + } +} + #[cfg(test)] mod tests { use super::*; diff --git a/nexus/src/app/image.rs b/nexus/src/app/image.rs index a7fe75a464..96a3e6b06f 100644 --- a/nexus/src/app/image.rs +++ b/nexus/src/app/image.rs @@ -121,7 +121,10 @@ impl super::Nexus { let image_volume = self .db_datastore - .volume_checkout_randomize_ids(db_snapshot.volume_id) + .volume_checkout_randomize_ids( + db_snapshot.volume_id, + db::datastore::VolumeCheckoutReason::ReadOnlyCopy, + ) .await?; db::model::Image { diff --git a/nexus/src/app/instance.rs b/nexus/src/app/instance.rs index e29ed21192..a82a53331e 100644 --- a/nexus/src/app/instance.rs +++ b/nexus/src/app/instance.rs @@ -173,6 +173,13 @@ enum InstanceStateChangeRequestAction { SendToSled(Uuid), } +/// What is the higher level operation that is calling +/// `instance_ensure_registered`? +pub(crate) enum InstanceRegisterReason { + Start { vmm_id: Uuid }, + Migrate { vmm_id: Uuid, target_vmm_id: Uuid }, +} + impl super::Nexus { pub fn instance_lookup<'a>( &'a self, @@ -1010,6 +1017,7 @@ impl super::Nexus { db_instance: &db::model::Instance, propolis_id: &Uuid, initial_vmm: &db::model::Vmm, + operation: InstanceRegisterReason, ) -> Result<(), Error> { opctx.authorize(authz::Action::Modify, authz_instance).await?; @@ -1065,8 +1073,19 @@ impl super::Nexus { } }; - let volume = - self.db_datastore.volume_checkout(disk.volume_id).await?; + let volume = self + .db_datastore + .volume_checkout( + disk.volume_id, + match operation { + InstanceRegisterReason::Start { vmm_id } => + db::datastore::VolumeCheckoutReason::InstanceStart { vmm_id }, + InstanceRegisterReason::Migrate { vmm_id, target_vmm_id } => + db::datastore::VolumeCheckoutReason::InstanceMigrate { vmm_id, target_vmm_id }, + } + ) + .await?; + disk_reqs.push(sled_agent_client::types::DiskRequest { name: disk.name().to_string(), slot: sled_agent_client::types::Slot(slot.0), diff --git a/nexus/src/app/sagas/common_storage.rs b/nexus/src/app/sagas/common_storage.rs index 3b590f6205..bf530ef858 100644 --- a/nexus/src/app/sagas/common_storage.rs +++ b/nexus/src/app/sagas/common_storage.rs @@ -769,7 +769,10 @@ pub(crate) async fn call_pantry_attach_for_disk( let disk_volume = nexus .datastore() - .volume_checkout(disk.volume_id) + .volume_checkout( + disk.volume_id, + db::datastore::VolumeCheckoutReason::Pantry, + ) .await .map_err(ActionError::action_failed)?; diff --git a/nexus/src/app/sagas/disk_create.rs b/nexus/src/app/sagas/disk_create.rs index 830a4dd96c..165bf7573c 100644 --- a/nexus/src/app/sagas/disk_create.rs +++ b/nexus/src/app/sagas/disk_create.rs @@ -390,7 +390,10 @@ async fn sdc_regions_ensure( let volume = osagactx .datastore() - .volume_checkout(db_snapshot.volume_id) + .volume_checkout( + db_snapshot.volume_id, + db::datastore::VolumeCheckoutReason::ReadOnlyCopy, + ) .await .map_err(ActionError::action_failed)?; @@ -433,7 +436,10 @@ async fn sdc_regions_ensure( let volume = osagactx .datastore() - .volume_checkout(image.volume_id) + .volume_checkout( + image.volume_id, + db::datastore::VolumeCheckoutReason::ReadOnlyCopy, + ) .await .map_err(ActionError::action_failed)?; diff --git a/nexus/src/app/sagas/finalize_disk.rs b/nexus/src/app/sagas/finalize_disk.rs index d4f6fc39aa..89893fb703 100644 --- a/nexus/src/app/sagas/finalize_disk.rs +++ b/nexus/src/app/sagas/finalize_disk.rs @@ -79,7 +79,8 @@ impl NexusSaga for SagaFinalizeDisk { silo_id: params.silo_id, project_id: params.project_id, disk_id: params.disk_id, - attached_instance_and_sled: None, + attach_instance_id: None, + use_the_pantry: true, create_params: params::SnapshotCreate { identity: external::IdentityMetadataCreateParams { name: snapshot_name.clone(), diff --git a/nexus/src/app/sagas/instance_migrate.rs b/nexus/src/app/sagas/instance_migrate.rs index da3b3e93ea..e4bdd989cc 100644 --- a/nexus/src/app/sagas/instance_migrate.rs +++ b/nexus/src/app/sagas/instance_migrate.rs @@ -4,7 +4,8 @@ use super::{NexusActionContext, NexusSaga, ACTION_GENERATE_ID}; use crate::app::instance::{ - InstanceStateChangeError, InstanceStateChangeRequest, + InstanceRegisterReason, InstanceStateChangeError, + InstanceStateChangeRequest, }; use crate::app::sagas::{ declare_saga_actions, instance_common::allocate_vmm_ipv6, @@ -356,6 +357,10 @@ async fn sim_ensure_destination_propolis( &db_instance, &vmm.id, &vmm, + InstanceRegisterReason::Migrate { + vmm_id: params.src_vmm.id, + target_vmm_id: vmm.id, + }, ) .await .map_err(ActionError::action_failed)?; diff --git a/nexus/src/app/sagas/instance_start.rs b/nexus/src/app/sagas/instance_start.rs index b1d9506c31..98fcec13a7 100644 --- a/nexus/src/app/sagas/instance_start.rs +++ b/nexus/src/app/sagas/instance_start.rs @@ -10,6 +10,7 @@ use super::{ instance_common::allocate_vmm_ipv6, NexusActionContext, NexusSaga, SagaInitError, ACTION_GENERATE_ID, }; +use crate::app::instance::InstanceRegisterReason; use crate::app::instance::InstanceStateChangeError; use crate::app::sagas::declare_saga_actions; use chrono::Utc; @@ -527,6 +528,7 @@ async fn sis_ensure_registered( &db_instance, &propolis_id, &vmm_record, + InstanceRegisterReason::Start { vmm_id: propolis_id }, ) .await .map_err(ActionError::action_failed)?; diff --git a/nexus/src/app/sagas/snapshot_create.rs b/nexus/src/app/sagas/snapshot_create.rs index 8b6febf71a..ff57470a5f 100644 --- a/nexus/src/app/sagas/snapshot_create.rs +++ b/nexus/src/app/sagas/snapshot_create.rs @@ -130,7 +130,8 @@ pub(crate) struct Params { pub silo_id: Uuid, pub project_id: Uuid, pub disk_id: Uuid, - pub attached_instance_and_sled: Option<(Uuid, Uuid)>, + pub attach_instance_id: Option, + pub use_the_pantry: bool, pub create_params: params::SnapshotCreate, } @@ -251,8 +252,7 @@ impl NexusSaga for SagaSnapshotCreate { // (DB) Tracks virtual resource provisioning. builder.append(space_account_action()); - let use_the_pantry = params.attached_instance_and_sled.is_none(); - if !use_the_pantry { + if !params.use_the_pantry { // (Sleds) If the disk is attached to an instance, send a // snapshot request to sled-agent to create a ZFS snapshot. builder.append(send_snapshot_request_to_sled_agent_action()); @@ -284,7 +284,7 @@ impl NexusSaga for SagaSnapshotCreate { // (DB) Mark snapshot as "ready" builder.append(finalize_snapshot_record_action()); - if use_the_pantry { + if params.use_the_pantry { // (Pantry) Set the state back to Detached // // This has to be the last saga node! Otherwise, concurrent @@ -675,22 +675,47 @@ async fn ssc_send_snapshot_request_to_sled_agent( let snapshot_id = sagactx.lookup::("snapshot_id")?; // If this node was reached, the saga initiator thought the disk was - // attached to an instance that was running on a specific sled. Contact that - // sled and ask it to initiate a snapshot. Note that this is best-effort: - // the instance may have stopped (or may be have stopped, had the disk - // detached, and resumed running on the same sled) while the saga was - // executing. - let (instance_id, sled_id) = - params.attached_instance_and_sled.ok_or_else(|| { - ActionError::action_failed(Error::internal_error( - "snapshot saga in send_snapshot_request_to_sled_agent but no \ - instance/sled pair was provided", - )) - })?; + // attached to an instance that _may_ have a running Propolis. Contact that + // Propolis and ask it to initiate a snapshot. Note that this is + // best-effort: the instance may have stopped (or may be have stopped, had + // the disk detached, and resumed running on the same sled) while the saga + // was executing. + let Some(attach_instance_id) = params.attach_instance_id else { + return Err(ActionError::action_failed(Error::internal_error( + "attach instance id is None!", + ))); + }; + + let opctx = crate::context::op_context_for_saga_action( + &sagactx, + ¶ms.serialized_authn, + ); + + let (.., authz_instance) = LookupPath::new(&opctx, &osagactx.datastore()) + .instance_id(attach_instance_id) + .lookup_for(authz::Action::Read) + .await + .map_err(ActionError::action_failed)?; + + let sled_id = osagactx + .datastore() + .instance_fetch_with_vmm(&opctx, &authz_instance) + .await + .map_err(ActionError::action_failed)? + .sled_id(); + + // If this instance does not currently have a sled, we can't continue this + // saga - the user will have to reissue the snapshot request and it will get + // run on a Pantry. + let Some(sled_id) = sled_id else { + return Err(ActionError::action_failed(Error::unavail( + "sled id is None!", + ))); + }; info!(log, "asking for disk snapshot from Propolis via sled agent"; "disk_id" => %params.disk_id, - "instance_id" => %instance_id, + "instance_id" => %attach_instance_id, "sled_id" => %sled_id); let sled_agent_client = osagactx @@ -702,7 +727,7 @@ async fn ssc_send_snapshot_request_to_sled_agent( retry_until_known_result(log, || async { sled_agent_client .instance_issue_disk_snapshot_request( - &instance_id, + &attach_instance_id, ¶ms.disk_id, &InstanceIssueDiskSnapshotRequestBody { snapshot_id }, ) @@ -838,6 +863,16 @@ async fn ssc_attach_disk_to_pantry( info!(log, "disk {} in state finalizing", params.disk_id); } + external::DiskState::Attached(attach_instance_id) => { + // No state change required + info!( + log, + "disk {} in state attached to instance id {}", + params.disk_id, + attach_instance_id + ); + } + _ => { // Return a 503 indicating that the user should retry return Err(ActionError::action_failed( @@ -1358,7 +1393,10 @@ async fn ssc_create_volume_record( let disk_volume = osagactx .datastore() - .volume_checkout(disk.volume_id) + .volume_checkout( + disk.volume_id, + db::datastore::VolumeCheckoutReason::CopyAndModify, + ) .await .map_err(ActionError::action_failed)?; @@ -1815,14 +1853,16 @@ mod test { project_id: Uuid, disk_id: Uuid, disk: NameOrId, - instance_and_sled: Option<(Uuid, Uuid)>, + attach_instance_id: Option, + use_the_pantry: bool, ) -> Params { Params { serialized_authn: authn::saga::Serialized::for_opctx(opctx), silo_id, project_id, disk_id, - attached_instance_and_sled: instance_and_sled, + attach_instance_id, + use_the_pantry, create_params: params::SnapshotCreate { identity: IdentityMetadataCreateParams { name: "my-snapshot".parse().expect("Invalid disk name"), @@ -1871,7 +1911,8 @@ mod test { project_id, disk_id, Name::from_str(DISK_NAME).unwrap().into(), - None, + None, // not attached to an instance + true, // use the pantry ); let dag = create_saga_dag::(params).unwrap(); let runnable_saga = nexus.create_runnable_saga(dag).await.unwrap(); @@ -2079,7 +2120,7 @@ mod test { // since this is just a test, bypass the normal // attachment machinery and just update the disk's // database record directly. - let instance_and_sled = if !use_the_pantry { + let attach_instance_id = if !use_the_pantry { let state = setup_test_instance( cptestctx, client, @@ -2092,11 +2133,7 @@ mod test { ) .await; - let sled_id = state - .sled_id() - .expect("running instance should have a vmm"); - - Some((state.instance().id(), sled_id)) + Some(state.instance().id()) } else { None }; @@ -2107,7 +2144,8 @@ mod test { project_id, disk_id, Name::from_str(DISK_NAME).unwrap().into(), - instance_and_sled, + attach_instance_id, + use_the_pantry, ) } }) @@ -2205,36 +2243,31 @@ mod test { Name::from_str(DISK_NAME).unwrap().into(), // The disk isn't attached at this time, so don't supply a sled. None, + true, // use the pantry ); let dag = create_saga_dag::(params).unwrap(); let runnable_saga = nexus.create_runnable_saga(dag).await.unwrap(); // Before running the saga, attach the disk to an instance! - let (.., authz_disk, db_disk) = - LookupPath::new(&opctx, nexus.datastore()) - .disk_id(disk_id) - .fetch_for(authz::Action::Read) - .await - .expect("Failed to look up created disk"); - - assert!(nexus - .datastore() - .disk_update_runtime( - &opctx, - &authz_disk, - &db_disk.runtime().attach(Uuid::new_v4()), - ) - .await - .expect("failed to attach disk")); + let _instance_and_vmm = setup_test_instance( + &cptestctx, + &client, + vec![params::InstanceDiskAttachment::Attach( + params::InstanceDiskAttach { + name: Name::from_str(DISK_NAME).unwrap(), + }, + )], + ) + .await; // Actually run the saga let output = nexus.run_saga(runnable_saga).await; - // Expect to see 503 + // Expect to see 409 match output { Err(e) => { - assert!(matches!(e, Error::ServiceUnavailable { .. })); + assert!(matches!(e, Error::Conflict { .. })); } Ok(_) => { @@ -2269,6 +2302,7 @@ mod test { Name::from_str(DISK_NAME).unwrap().into(), // The disk isn't attached at this time, so don't supply a sled. None, + true, // use the pantry ); let dag = create_saga_dag::(params).unwrap(); @@ -2313,8 +2347,6 @@ mod test { // the saga, stopping the instance, detaching the disk, and then letting // the saga run. let fake_instance_id = Uuid::new_v4(); - let fake_sled_id = - Uuid::parse_str(nexus_test_utils::SLED_AGENT_UUID).unwrap(); let params = new_test_params( &opctx, @@ -2322,7 +2354,8 @@ mod test { project_id, disk_id, Name::from_str(DISK_NAME).unwrap().into(), - Some((fake_instance_id, fake_sled_id)), + Some(fake_instance_id), + false, // use the pantry ); let dag = create_saga_dag::(params).unwrap(); @@ -2363,10 +2396,6 @@ mod test { ) .await; - let sled_id = instance_state - .sled_id() - .expect("running instance should have a vmm"); - // Rerun the saga let params = new_test_params( &opctx, @@ -2374,7 +2403,8 @@ mod test { project_id, disk_id, Name::from_str(DISK_NAME).unwrap().into(), - Some((instance_state.instance().id(), sled_id)), + Some(instance_state.instance().id()), + false, // use the pantry ); let dag = create_saga_dag::(params).unwrap(); diff --git a/nexus/src/app/snapshot.rs b/nexus/src/app/snapshot.rs index 0c90ac31fb..c28d180d3c 100644 --- a/nexus/src/app/snapshot.rs +++ b/nexus/src/app/snapshot.rs @@ -12,7 +12,6 @@ use omicron_common::api::external::http_pagination::PaginatedBy; use omicron_common::api::external::CreateResult; use omicron_common::api::external::DeleteResult; use omicron_common::api::external::Error; -use omicron_common::api::external::InstanceState; use omicron_common::api::external::ListResultVec; use omicron_common::api::external::LookupResult; use omicron_common::api::external::NameOrId; @@ -93,7 +92,7 @@ impl super::Nexus { // If there isn't a running propolis, Nexus needs to use the Crucible // Pantry to make this snapshot - let instance_and_sled = if let Some(attach_instance_id) = + let use_the_pantry = if let Some(attach_instance_id) = &db_disk.runtime_state.attach_instance_id { let (.., authz_instance) = @@ -107,29 +106,12 @@ impl super::Nexus { .instance_fetch_with_vmm(&opctx, &authz_instance) .await?; - match instance_state.vmm().as_ref() { - None => None, - Some(vmm) => match vmm.runtime.state.0 { - // If the VM might be running, or it's rebooting (which - // doesn't deactivate the volume), send the snapshot request - // to the relevant VMM. Otherwise, there's no way to know if - // the instance has attached the volume or is in the process - // of detaching it, so bail. - InstanceState::Running | InstanceState::Rebooting => { - Some((*attach_instance_id, vmm.sled_id)) - } - _ => { - return Err(Error::invalid_request(&format!( - "cannot snapshot attached disk for instance in \ - state {}", - vmm.runtime.state.0 - ))); - } - }, - } + // If a Propolis _may_ exist, send the snapshot request there, + // otherwise use the pantry. + !instance_state.vmm().is_some() } else { // This disk is not attached to an instance, use the pantry. - None + true }; let saga_params = sagas::snapshot_create::Params { @@ -137,7 +119,8 @@ impl super::Nexus { silo_id: authz_silo.id(), project_id: authz_project.id(), disk_id: authz_disk.id(), - attached_instance_and_sled: instance_and_sled, + attach_instance_id: db_disk.runtime_state.attach_instance_id, + use_the_pantry, create_params: params.clone(), }; diff --git a/nexus/tests/integration_tests/snapshots.rs b/nexus/tests/integration_tests/snapshots.rs index 63ea81f13f..251b729f98 100644 --- a/nexus/tests/integration_tests/snapshots.rs +++ b/nexus/tests/integration_tests/snapshots.rs @@ -256,6 +256,109 @@ async fn test_snapshot_without_instance(cptestctx: &ControlPlaneTestContext) { assert_eq!(disk.state, DiskState::Detached); } +#[nexus_test] +async fn test_snapshot_stopped_instance(cptestctx: &ControlPlaneTestContext) { + let client = &cptestctx.external_client; + DiskTest::new(&cptestctx).await; + create_project_and_pool(client).await; + let disks_url = get_disks_url(); + + // Define a global image + let image_create_params = params::ImageCreate { + identity: IdentityMetadataCreateParams { + name: "alpine-edge".parse().unwrap(), + description: String::from( + "you can boot any image, as long as it's alpine", + ), + }, + source: params::ImageSource::YouCanBootAnythingAsLongAsItsAlpine, + os: "alpine".to_string(), + version: "edge".to_string(), + }; + + let images_url = format!("/v1/images?project={}", PROJECT_NAME); + let image = + NexusRequest::objects_post(client, &images_url, &image_create_params) + .authn_as(AuthnMode::PrivilegedUser) + .execute_and_parse_unwrap::() + .await; + + // Create a disk from this image + let disk_size = ByteCount::from_gibibytes_u32(2); + let base_disk_name: Name = "base-disk".parse().unwrap(); + let base_disk = params::DiskCreate { + identity: IdentityMetadataCreateParams { + name: base_disk_name.clone(), + description: String::from("sells rainsticks"), + }, + disk_source: params::DiskSource::Image { image_id: image.identity.id }, + size: disk_size, + }; + + let base_disk: Disk = NexusRequest::new( + RequestBuilder::new(client, Method::POST, &disks_url) + .body(Some(&base_disk)) + .expect_status(Some(StatusCode::CREATED)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap() + .parsed_body() + .unwrap(); + + // Create a stopped instance with attached disk + let instances_url = format!("/v1/instances?project={}", PROJECT_NAME,); + let instance_name = "base-instance"; + + let instance: Instance = object_create( + client, + &instances_url, + ¶ms::InstanceCreate { + identity: IdentityMetadataCreateParams { + name: instance_name.parse().unwrap(), + description: format!("instance {:?}", instance_name), + }, + ncpus: InstanceCpuCount(2), + memory: ByteCount::from_gibibytes_u32(1), + hostname: "base-instance".parse().unwrap(), + user_data: + b"#cloud-config\nsystem_info:\n default_user:\n name: oxide" + .to_vec(), + ssh_public_keys: Some(Vec::new()), + network_interfaces: + params::InstanceNetworkInterfaceAttachment::None, + disks: vec![params::InstanceDiskAttachment::Attach( + params::InstanceDiskAttach { name: base_disk_name.clone() }, + )], + external_ips: vec![], + start: false, + }, + ) + .await; + + assert_eq!(instance.runtime.run_state, external::InstanceState::Stopped); + + // Issue snapshot request + let snapshots_url = format!("/v1/snapshots?project={}", PROJECT_NAME); + + let snapshot: views::Snapshot = object_create( + client, + &snapshots_url, + ¶ms::SnapshotCreate { + identity: IdentityMetadataCreateParams { + name: instance_name.parse().unwrap(), + description: format!("instance {:?}", instance_name), + }, + disk: base_disk_name.into(), + }, + ) + .await; + + assert_eq!(snapshot.disk_id, base_disk.identity.id); + assert_eq!(snapshot.size, base_disk.size); +} + #[nexus_test] async fn test_delete_snapshot(cptestctx: &ControlPlaneTestContext) { let client = &cptestctx.external_client; diff --git a/nexus/tests/integration_tests/volume_management.rs b/nexus/tests/integration_tests/volume_management.rs index daf78823ed..ecfa7cf0f1 100644 --- a/nexus/tests/integration_tests/volume_management.rs +++ b/nexus/tests/integration_tests/volume_management.rs @@ -9,6 +9,7 @@ use chrono::Utc; use dropshot::test_util::ClientTestContext; use http::method::Method; use http::StatusCode; +use nexus_db_queries::db; use nexus_db_queries::db::DataStore; use nexus_test_utils::http_testing::AuthnMode; use nexus_test_utils::http_testing::NexusRequest; @@ -1375,7 +1376,13 @@ async fn test_volume_remove_read_only_parent_base( // Go and get the volume from the database, verify it no longer // has a read only parent. - let new_vol = datastore.volume_checkout(volume_id).await.unwrap(); + let new_vol = datastore + .volume_checkout( + volume_id, + db::datastore::VolumeCheckoutReason::CopyAndModify, + ) + .await + .unwrap(); let vcr: VolumeConstructionRequest = serde_json::from_str(new_vol.data()).unwrap(); @@ -1394,7 +1401,13 @@ async fn test_volume_remove_read_only_parent_base( } // Verify the t_vid now has a ROP. - let new_vol = datastore.volume_checkout(t_vid).await.unwrap(); + let new_vol = datastore + .volume_checkout( + t_vid, + db::datastore::VolumeCheckoutReason::CopyAndModify, + ) + .await + .unwrap(); let vcr: VolumeConstructionRequest = serde_json::from_str(new_vol.data()).unwrap(); @@ -1421,7 +1434,13 @@ async fn test_volume_remove_read_only_parent_base( // We want to verify we can call volume_remove_rop twice and the second // time through it won't change what it did the first time. This is // critical to supporting replay of the saga, should it be needed. - let new_vol = datastore.volume_checkout(t_vid).await.unwrap(); + let new_vol = datastore + .volume_checkout( + t_vid, + db::datastore::VolumeCheckoutReason::CopyAndModify, + ) + .await + .unwrap(); let vcr: VolumeConstructionRequest = serde_json::from_str(new_vol.data()).unwrap(); @@ -1570,7 +1589,13 @@ async fn test_volume_remove_rop_saga(cptestctx: &ControlPlaneTestContext) { .await .unwrap(); - let new_vol = datastore.volume_checkout(volume_id).await.unwrap(); + let new_vol = datastore + .volume_checkout( + volume_id, + db::datastore::VolumeCheckoutReason::CopyAndModify, + ) + .await + .unwrap(); let vcr: VolumeConstructionRequest = serde_json::from_str(new_vol.data()).unwrap(); @@ -1628,7 +1653,13 @@ async fn test_volume_remove_rop_saga_twice( .unwrap(); println!("first returns {:?}", res); - let new_vol = datastore.volume_checkout(volume_id).await.unwrap(); + let new_vol = datastore + .volume_checkout( + volume_id, + db::datastore::VolumeCheckoutReason::CopyAndModify, + ) + .await + .unwrap(); let vcr: VolumeConstructionRequest = serde_json::from_str(new_vol.data()).unwrap(); @@ -1762,7 +1793,13 @@ async fn test_volume_remove_rop_saga_deleted_volume( .await .unwrap(); - let new_vol = datastore.volume_checkout(volume_id).await.unwrap(); + let new_vol = datastore + .volume_checkout( + volume_id, + db::datastore::VolumeCheckoutReason::CopyAndModify, + ) + .await + .unwrap(); let vcr: VolumeConstructionRequest = serde_json::from_str(new_vol.data()).unwrap(); @@ -1811,11 +1848,23 @@ async fn test_volume_checkout(cptestctx: &ControlPlaneTestContext) { // The first time back, we get 1 but internally the generation number goes // to 2. - let new_vol = datastore.volume_checkout(volume_id).await.unwrap(); + let new_vol = datastore + .volume_checkout( + volume_id, + db::datastore::VolumeCheckoutReason::CopyAndModify, + ) + .await + .unwrap(); volume_match_gen(new_vol, vec![Some(1)]); // Request again, we should get 2 now. - let new_vol = datastore.volume_checkout(volume_id).await.unwrap(); + let new_vol = datastore + .volume_checkout( + volume_id, + db::datastore::VolumeCheckoutReason::CopyAndModify, + ) + .await + .unwrap(); volume_match_gen(new_vol, vec![Some(2)]); } @@ -1853,9 +1902,21 @@ async fn test_volume_checkout_updates_nothing( .unwrap(); // Verify nothing happens to our non generation number volume. - let new_vol = datastore.volume_checkout(volume_id).await.unwrap(); + let new_vol = datastore + .volume_checkout( + volume_id, + db::datastore::VolumeCheckoutReason::CopyAndModify, + ) + .await + .unwrap(); volume_match_gen(new_vol, vec![None]); - let new_vol = datastore.volume_checkout(volume_id).await.unwrap(); + let new_vol = datastore + .volume_checkout( + volume_id, + db::datastore::VolumeCheckoutReason::CopyAndModify, + ) + .await + .unwrap(); volume_match_gen(new_vol, vec![None]); } @@ -1894,15 +1955,33 @@ async fn test_volume_checkout_updates_multiple_gen( // The first time back, we get our original values, but internally the // generation number goes up. - let new_vol = datastore.volume_checkout(volume_id).await.unwrap(); + let new_vol = datastore + .volume_checkout( + volume_id, + db::datastore::VolumeCheckoutReason::CopyAndModify, + ) + .await + .unwrap(); volume_match_gen(new_vol, vec![Some(3), Some(8)]); // Request again, we should see the incremented values now.. - let new_vol = datastore.volume_checkout(volume_id).await.unwrap(); + let new_vol = datastore + .volume_checkout( + volume_id, + db::datastore::VolumeCheckoutReason::CopyAndModify, + ) + .await + .unwrap(); volume_match_gen(new_vol, vec![Some(4), Some(9)]); // Request one more, because why not. - let new_vol = datastore.volume_checkout(volume_id).await.unwrap(); + let new_vol = datastore + .volume_checkout( + volume_id, + db::datastore::VolumeCheckoutReason::CopyAndModify, + ) + .await + .unwrap(); volume_match_gen(new_vol, vec![Some(5), Some(10)]); } @@ -1947,11 +2026,23 @@ async fn test_volume_checkout_updates_sparse_multiple_gen( // The first time back, we get our original values, but internally the // generation number goes up. - let new_vol = datastore.volume_checkout(volume_id).await.unwrap(); + let new_vol = datastore + .volume_checkout( + volume_id, + db::datastore::VolumeCheckoutReason::CopyAndModify, + ) + .await + .unwrap(); volume_match_gen(new_vol, vec![None, Some(7), Some(9)]); // Request again, we should see the incremented values now.. - let new_vol = datastore.volume_checkout(volume_id).await.unwrap(); + let new_vol = datastore + .volume_checkout( + volume_id, + db::datastore::VolumeCheckoutReason::CopyAndModify, + ) + .await + .unwrap(); volume_match_gen(new_vol, vec![None, Some(8), Some(10)]); } @@ -1996,11 +2087,23 @@ async fn test_volume_checkout_updates_sparse_mid_multiple_gen( // The first time back, we get our original values, but internally the // generation number goes up. - let new_vol = datastore.volume_checkout(volume_id).await.unwrap(); + let new_vol = datastore + .volume_checkout( + volume_id, + db::datastore::VolumeCheckoutReason::CopyAndModify, + ) + .await + .unwrap(); volume_match_gen(new_vol, vec![Some(7), None, Some(9)]); // Request again, we should see the incremented values now.. - let new_vol = datastore.volume_checkout(volume_id).await.unwrap(); + let new_vol = datastore + .volume_checkout( + volume_id, + db::datastore::VolumeCheckoutReason::CopyAndModify, + ) + .await + .unwrap(); volume_match_gen(new_vol, vec![Some(8), None, Some(10)]); } @@ -2038,7 +2141,12 @@ async fn test_volume_checkout_randomize_ids_only_read_only( .unwrap(); // volume_checkout_randomize_ids should fail - let r = datastore.volume_checkout_randomize_ids(volume_id).await; + let r = datastore + .volume_checkout_randomize_ids( + volume_id, + db::datastore::VolumeCheckoutReason::CopyAndModify, + ) + .await; assert!(r.is_err()); } diff --git a/schema/crdb/add-lookup-disk-by-volume-id-index/up.sql b/schema/crdb/add-lookup-disk-by-volume-id-index/up.sql new file mode 100644 index 0000000000..2f129f334c --- /dev/null +++ b/schema/crdb/add-lookup-disk-by-volume-id-index/up.sql @@ -0,0 +1,4 @@ +CREATE UNIQUE INDEX IF NOT EXISTS lookup_disk_by_volume_id ON omicron.public.disk ( + volume_id +) WHERE + time_deleted IS NULL; diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index da3dbb3f4c..9f28efbd16 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -1152,6 +1152,11 @@ CREATE UNIQUE INDEX IF NOT EXISTS lookup_deleted_disk ON omicron.public.disk ( ) WHERE time_deleted IS NOT NULL; +CREATE UNIQUE INDEX IF NOT EXISTS lookup_disk_by_volume_id ON omicron.public.disk ( + volume_id +) WHERE + time_deleted IS NULL; + CREATE TABLE IF NOT EXISTS omicron.public.image ( /* Identity metadata (resource) */ id UUID PRIMARY KEY, @@ -3770,7 +3775,7 @@ INSERT INTO omicron.public.db_metadata ( version, target_version ) VALUES - ( TRUE, NOW(), NOW(), '49.0.0', NULL) + ( TRUE, NOW(), NOW(), '50.0.0', NULL) ON CONFLICT DO NOTHING; COMMIT; From fd67302ffa2827305594aba9e5702a936e7f3d36 Mon Sep 17 00:00:00 2001 From: iliana etaoin Date: Wed, 3 Apr 2024 16:33:03 -0700 Subject: [PATCH 054/334] ci: publish the tufaceous manifest (#5403) While working on updating the script we use for downloading TUF repos over in meta, I was hitting unexpected 403 errors. This is because the script wants to also download the manifest.toml, and it can only get the URL for that artefact by hitting GitHub's check runs endpoint to get the URL from [the job status posted there by Buildomat](https://github.com/oxidecomputer/omicron/runs/23406222391), and GitHub [rate limits anonymous API requests significantly](https://docs.github.com/en/rest/using-the-rest-api/rate-limits-for-the-rest-api?apiVersion=2022-11-28#primary-rate-limit-for-unauthenticated-users). The fewer things we ask of GitHub the better, I think. --- .github/buildomat/jobs/tuf-repo.sh | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/.github/buildomat/jobs/tuf-repo.sh b/.github/buildomat/jobs/tuf-repo.sh index c055a3f2ea..31b9d157ed 100755 --- a/.github/buildomat/jobs/tuf-repo.sh +++ b/.github/buildomat/jobs/tuf-repo.sh @@ -23,6 +23,11 @@ #: #: [[publish]] #: series = "rot-all" +#: name = "manifest.toml" +#: from_output = "/work/manifest.toml" +#: +#: [[publish]] +#: series = "rot-all" #: name = "repo.zip" #: from_output = "/work/repo-rot-all.zip" #: From 4b5ddcca124fa14f0046747f0865679175752dca Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Thu, 4 Apr 2024 01:23:22 +0000 Subject: [PATCH 055/334] Update Rust crate bytes to 1.6.0 (#5395) --- Cargo.lock | 4 ++-- Cargo.toml | 2 +- workspace-hack/Cargo.toml | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 59c16e10e9..50003b01b0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -765,9 +765,9 @@ checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" [[package]] name = "bytes" -version = "1.5.0" +version = "1.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a2bd12c1caf447e69cd4528f47f94d203fd2582878ecb9e9465484c4148a8223" +checksum = "514de17de45fdb8dc022b1a7975556c53c86f9f0aa5f534b98977b171857c2c9" dependencies = [ "serde", ] diff --git a/Cargo.toml b/Cargo.toml index 43bf7da5d0..c58fcf7509 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -182,7 +182,7 @@ bootstore = { path = "bootstore" } bootstrap-agent-client = { path = "clients/bootstrap-agent-client" } buf-list = { version = "1.0.3", features = ["tokio1"] } byteorder = "1.5.0" -bytes = "1.5.0" +bytes = "1.6.0" camino = { version = "1.1", features = ["serde1"] } camino-tempfile = "1.1.1" cancel-safe-futures = "0.1.5" diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index 1e7bea0d2d..29e1db8e4f 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -25,7 +25,7 @@ bitflags-f595c2ba2a3f28df = { package = "bitflags", version = "2.4.2", default-f bstr-6f8ce4dd05d13bba = { package = "bstr", version = "0.2.17" } bstr-dff4ba8e3ae991db = { package = "bstr", version = "1.9.0" } byteorder = { version = "1.5.0" } -bytes = { version = "1.5.0", features = ["serde"] } +bytes = { version = "1.6.0", features = ["serde"] } chrono = { version = "0.4.34", features = ["serde"] } cipher = { version = "0.4.4", default-features = false, features = ["block-padding", "zeroize"] } clap = { version = "4.5.1", features = ["cargo", "derive", "env", "wrap_help"] } @@ -131,7 +131,7 @@ bitflags-f595c2ba2a3f28df = { package = "bitflags", version = "2.4.2", default-f bstr-6f8ce4dd05d13bba = { package = "bstr", version = "0.2.17" } bstr-dff4ba8e3ae991db = { package = "bstr", version = "1.9.0" } byteorder = { version = "1.5.0" } -bytes = { version = "1.5.0", features = ["serde"] } +bytes = { version = "1.6.0", features = ["serde"] } chrono = { version = "0.4.34", features = ["serde"] } cipher = { version = "0.4.4", default-features = false, features = ["block-padding", "zeroize"] } clap = { version = "4.5.1", features = ["cargo", "derive", "env", "wrap_help"] } From 1f1a63f5b537ba778e5f3a8ba652ca12ec21cf61 Mon Sep 17 00:00:00 2001 From: Rain Date: Wed, 3 Apr 2024 19:14:45 -0700 Subject: [PATCH 056/334] [nexus-db-queries] add a retry loop to saga record_event (#5390) We observed during a mupdate on Friday that Nexus panicked because of network flakiness. This is an attempt to address that by adding a retry loop. Fixes #2416. --- nexus/db-queries/src/db/sec_store.rs | 76 +++++++++++++++++++++++++--- nexus/src/app/saga.rs | 12 +++++ 2 files changed, 81 insertions(+), 7 deletions(-) diff --git a/nexus/db-queries/src/db/sec_store.rs b/nexus/db-queries/src/db/sec_store.rs index 1c63a48463..f8fd4ab86d 100644 --- a/nexus/db-queries/src/db/sec_store.rs +++ b/nexus/db-queries/src/db/sec_store.rs @@ -7,9 +7,13 @@ use crate::db::{self, model::Generation}; use anyhow::Context; use async_trait::async_trait; +use dropshot::HttpError; +use futures::TryFutureExt; +use omicron_common::backoff; use slog::Logger; use std::fmt; use std::sync::Arc; +use std::time::Duration; use steno::SagaId; /// Implementation of [`steno::SecStore`] backed by the Omicron CockroachDB @@ -53,16 +57,74 @@ impl steno::SecStore for CockroachDbSecStore { } async fn record_event(&self, event: steno::SagaNodeEvent) { - debug!(&self.log, "recording saga event"; + let log = self.log.new(o!( "saga_id" => event.saga_id.to_string(), - "node_id" => ?event.node_id, - "event_type" => ?event.event_type, - ); + "node_id" => event.node_id.to_string(), + "event_type" => format!("{:?}", event.event_type), + )); + + debug!(&log, "recording saga event"); let our_event = db::saga_types::SagaNodeEvent::new(event, self.sec_id); - // TODO-robustness This should be wrapped with a retry loop rather than - // unwrapping the result. See omicron#2416. - self.datastore.saga_create_event(&our_event).await.unwrap(); + backoff::retry_notify_ext( + // This is an internal service query to CockroachDB. + backoff::retry_policy_internal_service(), + || { + // An interesting question is how to handle errors. + // + // In general, there are some kinds of database errors that are + // temporary/server errors (e.g. network failures), and some + // that are permanent/client errors (e.g. conflict during + // insertion). The permanent ones would require operator + // intervention to fix. + // + // However, there is no way to bubble up errors here, and for + // good reason: it is inherent to the nature of sagas that + // progress is durably recorded. So within *this* code there is + // no option but to retry forever. (Below, however, we do mark + // errors that likely require operator intervention.) + // + // At a higher level, callers should plan for the fact that + // record_event could potentially loop forever. See + // https://github.com/oxidecomputer/omicron/issues/5406 and the + // note in `nexus/src/app/saga.rs`'s `execute_saga` for more + // details. + self.datastore + .saga_create_event(&our_event) + .map_err(backoff::BackoffError::transient) + }, + move |error, call_count, total_duration| { + let http_error = HttpError::from(error.clone()); + if http_error.status_code.is_client_error() { + error!( + &log, + "client error while recording saga event (likely \ + requires operator intervention), retrying anyway"; + "error" => &error, + "call_count" => call_count, + "total_duration" => ?total_duration, + ); + } else if total_duration > Duration::from_secs(20) { + warn!( + &log, + "server error while recording saga event, retrying"; + "error" => &error, + "call_count" => call_count, + "total_duration" => ?total_duration, + ); + } else { + info!( + &log, + "server error while recording saga event, retrying"; + "error" => &error, + "call_count" => call_count, + "total_duration" => ?total_duration, + ); + } + }, + ) + .await + .expect("the above backoff retries forever") } async fn saga_update(&self, id: SagaId, update: steno::SagaCachedState) { diff --git a/nexus/src/app/saga.rs b/nexus/src/app/saga.rs index 93d22df7e1..8a717839f0 100644 --- a/nexus/src/app/saga.rs +++ b/nexus/src/app/saga.rs @@ -213,6 +213,18 @@ impl super::Nexus { let runnable_saga = self.create_runnable_saga(dag).await?; // Actually run the saga to completion. + // + // XXX: This may loop forever in case `SecStore::record_event` fails. + // Ideally, `run_saga` wouldn't both start the saga and wait for it to + // be finished -- instead, it would start off the saga, and then return + // a notification channel that the caller could use to decide: + // + // - either to .await until completion + // - or to stop waiting after a certain period, while still letting the + // saga run in the background. + // + // For more, see https://github.com/oxidecomputer/omicron/issues/5406 + // and the note in `sec_store.rs`'s `record_event`. self.run_saga(runnable_saga).await } } From e36654221275623550505ea3867d32c638c1e744 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Thu, 4 Apr 2024 04:20:43 +0000 Subject: [PATCH 057/334] chore(deps): update taiki-e/install-action digest to 834a7b9 (#5407) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR contains the following updates: | Package | Type | Update | Change | |---|---|---|---| | [taiki-e/install-action](https://togithub.com/taiki-e/install-action) | action | digest | [`29beae9` -> `834a7b9`](https://togithub.com/taiki-e/install-action/compare/29beae9...834a7b9) | --- ### Configuration 📅 **Schedule**: Branch creation - "after 8pm,before 6am" in timezone America/Los_Angeles, Automerge - "after 8pm,before 6am" in timezone America/Los_Angeles. 🚦 **Automerge**: Enabled. ♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. 🔕 **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box --- This PR has been generated by [Renovate Bot](https://togithub.com/renovatebot/renovate). Co-authored-by: oxide-renovate[bot] <146848827+oxide-renovate[bot]@users.noreply.github.com> --- .github/workflows/hakari.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/hakari.yml b/.github/workflows/hakari.yml index 8cda830b58..f1114ee128 100644 --- a/.github/workflows/hakari.yml +++ b/.github/workflows/hakari.yml @@ -24,7 +24,7 @@ jobs: with: toolchain: stable - name: Install cargo-hakari - uses: taiki-e/install-action@29beae9445d6ef8516259305b219de7ff43a0118 # v2 + uses: taiki-e/install-action@834a7b93e0c678fb40309ee0e36546336d5c6ea7 # v2 with: tool: cargo-hakari - name: Check workspace-hack Cargo.toml is up-to-date From 1b5b7be345d0577e8f2a14c7d4f947734b20270f Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Wed, 3 Apr 2024 23:10:21 -0700 Subject: [PATCH 058/334] chore(deps): update rust crate samael to 0.0.15 (#5408) --- Cargo.lock | 91 +++++++++++++++++++----------------------------------- Cargo.toml | 2 +- 2 files changed, 32 insertions(+), 61 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 50003b01b0..afb25f79ad 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1557,43 +1557,19 @@ dependencies = [ [[package]] name = "darling" -version = "0.14.4" +version = "0.20.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b750cb3417fd1b327431a470f388520309479ab0bf5e323505daf0290cd3850" +checksum = "54e36fcd13ed84ffdfda6f5be89b31287cbb80c439841fe69e04841435464391" dependencies = [ - "darling_core 0.14.4", - "darling_macro 0.14.4", -] - -[[package]] -name = "darling" -version = "0.20.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0209d94da627ab5605dcccf08bb18afa5009cfbef48d8a8b7d7bdbc79be25c5e" -dependencies = [ - "darling_core 0.20.3", - "darling_macro 0.20.3", + "darling_core", + "darling_macro", ] [[package]] name = "darling_core" -version = "0.14.4" +version = "0.20.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "109c1ca6e6b7f82cc233a97004ea8ed7ca123a9af07a8230878fcfda9b158bf0" -dependencies = [ - "fnv", - "ident_case", - "proc-macro2", - "quote", - "strsim 0.10.0", - "syn 1.0.109", -] - -[[package]] -name = "darling_core" -version = "0.20.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "177e3443818124b357d8e76f53be906d60937f0d3a90773a664fa63fa253e621" +checksum = "9c2cf1c23a687a1feeb728783b993c4e1ad83d99f351801977dd809b48d0a70f" dependencies = [ "fnv", "ident_case", @@ -1605,22 +1581,11 @@ dependencies = [ [[package]] name = "darling_macro" -version = "0.14.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4aab4dbc9f7611d8b55048a3a16d2d010c2c8334e46304b40ac1cc14bf3b48e" -dependencies = [ - "darling_core 0.14.4", - "quote", - "syn 1.0.109", -] - -[[package]] -name = "darling_macro" -version = "0.20.3" +version = "0.20.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "836a9bbc7ad63342d6d6e7b815ccab164bc77a2d95d84bc3117a8c0d5c98e2d5" +checksum = "a668eda54683121533a393014d8692171709ff57a7d61f187b6e782719f8933f" dependencies = [ - "darling_core 0.20.3", + "darling_core", "quote", "syn 2.0.52", ] @@ -1758,33 +1723,33 @@ dependencies = [ [[package]] name = "derive_builder" -version = "0.12.0" +version = "0.20.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8d67778784b508018359cbc8696edb3db78160bab2c2a28ba7f56ef6932997f8" +checksum = "0350b5cb0331628a5916d6c5c0b72e97393b8b6b03b47a9284f4e7f5a405ffd7" dependencies = [ "derive_builder_macro", ] [[package]] name = "derive_builder_core" -version = "0.12.0" +version = "0.20.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c11bdc11a0c47bc7d37d582b5285da6849c96681023680b906673c5707af7b0f" +checksum = "d48cda787f839151732d396ac69e3473923d54312c070ee21e9effcaa8ca0b1d" dependencies = [ - "darling 0.14.4", + "darling", "proc-macro2", "quote", - "syn 1.0.109", + "syn 2.0.52", ] [[package]] name = "derive_builder_macro" -version = "0.12.0" +version = "0.20.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ebcda35c7a396850a55ffeac740804b40ffec779b98fffbb1738f4033f0ee79e" +checksum = "206868b8242f27cecce124c19fd88157fbd0dd334df2587f36417bafbc85097b" dependencies = [ "derive_builder_core", - "syn 1.0.109", + "syn 2.0.52", ] [[package]] @@ -1805,7 +1770,7 @@ name = "derror-macro" version = "0.1.0" source = "git+https://github.com/oxidecomputer/opte?rev=7ee353a470ea59529ee1b34729681da887aa88ce#7ee353a470ea59529ee1b34729681da887aa88ce" dependencies = [ - "darling 0.20.3", + "darling", "proc-macro2", "quote", "syn 2.0.52", @@ -7224,9 +7189,9 @@ checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0" [[package]] name = "quick-xml" -version = "0.30.0" +version = "0.31.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eff6510e86862b57b210fd8cbe8ed3f0d7d600b9c2863cd4549a2e033c66e956" +checksum = "1004a344b30a54e2ee58d66a71b32d2db2feb0a31f9a2d302bf0536f15de2a33" dependencies = [ "memchr", "serde", @@ -8154,10 +8119,11 @@ dependencies = [ [[package]] name = "samael" -version = "0.0.14" -source = "git+https://github.com/oxidecomputer/samael?branch=oxide/omicron#9e609a8f6fa0dd84e3bb8f579f46bd780c8be62b" +version = "0.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5da862a2115c0767681e28309a367dbd0a2366026948aae0272787e582d71eaf" dependencies = [ - "base64 0.21.7", + "base64 0.22.0", "bindgen", "chrono", "data-encoding", @@ -8535,7 +8501,7 @@ version = "3.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "865f9743393e638991566a8b7a479043c2c8da94a33e0a31f18214c9cae0a64d" dependencies = [ - "darling 0.20.3", + "darling", "proc-macro2", "quote", "syn 2.0.52", @@ -11575,3 +11541,8 @@ dependencies = [ "quote", "syn 1.0.109", ] + +[[patch.unused]] +name = "samael" +version = "0.0.14" +source = "git+https://github.com/oxidecomputer/samael?branch=oxide/omicron#9e609a8f6fa0dd84e3bb8f579f46bd780c8be62b" diff --git a/Cargo.toml b/Cargo.toml index c58fcf7509..ef4d15ea44 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -362,7 +362,7 @@ rustfmt-wrapper = "0.2" rustls = "0.22.2" rustls-pemfile = "2.1.1" rustyline = "13.0.0" -samael = { version = "0.0.14", features = ["xmlsec"] } +samael = { version = "0.0.15", features = ["xmlsec"] } schemars = "0.8.16" secrecy = "0.8.0" semver = { version = "1.0.22", features = ["std", "serde"] } From d016a2f729458539bbbb184d8d76e260d05e5b2c Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Thu, 4 Apr 2024 06:22:28 +0000 Subject: [PATCH 059/334] chore(deps): update rust crate serde_with to 3.7.0 (#5411) --- Cargo.lock | 8 ++++---- Cargo.toml | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index afb25f79ad..6437e9dcff 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8479,9 +8479,9 @@ dependencies = [ [[package]] name = "serde_with" -version = "3.6.1" +version = "3.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "15d167997bd841ec232f5b2b8e0e26606df2e7caa4c31b95ea9ca52b200bd270" +checksum = "ee80b0e361bbf88fd2f6e242ccd19cfda072cb0faa6ae694ecee08199938569a" dependencies = [ "base64 0.21.7", "chrono", @@ -8497,9 +8497,9 @@ dependencies = [ [[package]] name = "serde_with_macros" -version = "3.6.1" +version = "3.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "865f9743393e638991566a8b7a479043c2c8da94a33e0a31f18214c9cae0a64d" +checksum = "6561dc161a9224638a31d876ccdfefbc1df91d3f3a8342eddb35f055d48c7655" dependencies = [ "darling", "proc-macro2", diff --git a/Cargo.toml b/Cargo.toml index ef4d15ea44..75e6120f9f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -372,7 +372,7 @@ serde_json = "1.0.115" serde_path_to_error = "0.1.16" serde_tokenstream = "0.2" serde_urlencoded = "0.7.1" -serde_with = "3.6.1" +serde_with = "3.7.0" sha2 = "0.10.8" sha3 = "0.10.8" shell-words = "1.1.0" From 4c54580e797b042f131c5727388fbf8bbfb1e993 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Thu, 4 Apr 2024 10:18:34 -0700 Subject: [PATCH 060/334] [tools] Don't try to delete the vdev when destroying virtual hardware (#5381) --- tools/virtual_hardware.sh | 38 +++++++++++++++++++++++++++++++++----- 1 file changed, 33 insertions(+), 5 deletions(-) diff --git a/tools/virtual_hardware.sh b/tools/virtual_hardware.sh index 883b98a04e..33b52577b1 100755 --- a/tools/virtual_hardware.sh +++ b/tools/virtual_hardware.sh @@ -45,18 +45,46 @@ function ensure_vdevs { } function try_destroy_zpools { + # Grab the list of all files used for swap + SWAP_LIST=$( swap -l | tail -n +2 | cut -d' ' -f1 ) + ZVOL_ROOT="/dev/zvol/dsk" + ZPOOL_TYPES=('oxp_' 'oxi_') for ZPOOL_TYPE in "${ZPOOL_TYPES[@]}"; do readarray -t ZPOOLS < <(zfs list -d 0 -o name | grep "^$ZPOOL_TYPE") for ZPOOL in "${ZPOOLS[@]}"; do - VDEV_FILE="${VDEV_DIR:-/var/tmp}/$VDEV" + # If this zpool contains a volume for swap, remove it. + readarray -t SWAP_PATHS < <(echo "$SWAP_LIST" | grep "$ZVOL_ROOT/$ZPOOL") + for SWAP_PATH in "${SWAP_PATHS[@]}"; do + swap -d "$SWAP_PATH" || \ + fail "Failed to remove swap for $SWAP_PATH" + + success "Removed swap at $SWAP_PATH" + done + + # After dealing with swap, try to unmount the zpool and destroy it zfs destroy -r "$ZPOOL" && \ (zfs unmount "$ZPOOL" || true) && \ - zpool destroy "$ZPOOL" && \ - rm -f "$VDEV_FILE" || \ - warn "Failed to remove ZFS pool and vdev: $ZPOOL" + zpool destroy "$ZPOOL" || \ + fail "Failed to remove ZFS pool: $ZPOOL" + + success "Verified ZFS pool $ZPOOL does not exist" + done + done - success "Verified ZFS pool and vdev $ZPOOL does not exist" + VDEV_TYPES=('m2_' 'u2_') + for VDEV_TYPE in "${VDEV_TYPES[@]}"; do + readarray -t VDEVS < <( \ + grep "\"$VDEV_TYPE" "$OMICRON_TOP/smf/sled-agent/non-gimlet/config.toml" | \ + sed 's/[ ",]//g' \ + ) + for VDEV in "${VDEVS[@]}"; do + echo "Device: [$VDEV]" + VDEV_PATH="${VDEV_DIR:-/var/tmp}/$VDEV" + if [[ -f "$VDEV_PATH" ]]; then + rm -f "$VDEV_PATH" + success "vdev $VDEV_PATH removed" + fi done done } From 9fde48956ce23d499520d530746f7ecb3a188959 Mon Sep 17 00:00:00 2001 From: David Crespo Date: Thu, 4 Apr 2024 14:29:36 -0500 Subject: [PATCH 061/334] [nexus] Improve metrics endpoint summaries for docs (#5419) This is an immediate improvement so I'm not going to hold it on figuring out what to link to as an OxQL reference. ### Before ### After --- nexus/src/external_api/http_entrypoints.rs | 16 ++++++++++++---- openapi/nexus.json | 11 +++++++---- 2 files changed, 19 insertions(+), 8 deletions(-) diff --git a/nexus/src/external_api/http_entrypoints.rs b/nexus/src/external_api/http_entrypoints.rs index a570cd60c4..74244e112b 100644 --- a/nexus/src/external_api/http_entrypoints.rs +++ b/nexus/src/external_api/http_entrypoints.rs @@ -5539,7 +5539,9 @@ struct SystemMetricsPathParam { metric_name: SystemMetricName, } -/// Access metrics data +/// View metrics +/// +/// View CPU, memory, or storage utilization metrics at the fleet or silo level. #[endpoint { method = GET, path = "/v1/system/metrics/{metric_name}", @@ -5581,7 +5583,9 @@ async fn system_metric( apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await } -/// Access metrics data +/// View metrics +/// +/// View CPU, memory, or storage utilization metrics at the silo or project level. #[endpoint { method = GET, path = "/v1/metrics/{metric_name}", @@ -5628,7 +5632,7 @@ async fn silo_metric( apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await } -/// List available timeseries schema. +/// List timeseries schemas #[endpoint { method = GET, path = "/v1/timeseries/schema", @@ -5654,7 +5658,11 @@ async fn timeseries_schema_list( apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await } -/// Run a timeseries query, written OxQL. +// TODO: can we link to an OxQL reference? Do we have one? Can we even do links? + +/// Run timeseries query +/// +/// Queries are written in OxQL. #[endpoint { method = POST, path = "/v1/timeseries/query", diff --git a/openapi/nexus.json b/openapi/nexus.json index e7e4c1d31c..76e75d1ada 100644 --- a/openapi/nexus.json +++ b/openapi/nexus.json @@ -3089,7 +3089,8 @@ "tags": [ "metrics" ], - "summary": "Access metrics data", + "summary": "View metrics", + "description": "View CPU, memory, or storage utilization metrics at the silo or project level.", "operationId": "silo_metric", "parameters": [ { @@ -5954,7 +5955,8 @@ "tags": [ "system/metrics" ], - "summary": "Access metrics data", + "summary": "View metrics", + "description": "View CPU, memory, or storage utilization metrics at the fleet or silo level.", "operationId": "system_metric", "parameters": [ { @@ -7934,7 +7936,8 @@ "tags": [ "metrics" ], - "summary": "Run a timeseries query, written OxQL.", + "summary": "Run timeseries query", + "description": "Queries are written in OxQL.", "operationId": "timeseries_query", "requestBody": { "content": { @@ -7975,7 +7978,7 @@ "tags": [ "metrics" ], - "summary": "List available timeseries schema.", + "summary": "List timeseries schemas", "operationId": "timeseries_schema_list", "parameters": [ { From c4bd152926387b17815f20b475a4da459c8cdb12 Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Thu, 4 Apr 2024 16:49:59 -0400 Subject: [PATCH 062/334] Blueprint `PlanningInput` API rework (#5404) The impetus for this change was [@davepacheco's comment on #5344](https://github.com/oxidecomputer/omicron/pull/5344#discussion_r1544638094). Summarizing the changes, several of which are open to bikeshedding: * The real meat of the PR: None of `PlanningInput`'s fields are public anymore; all access have to go through methods, and none of the methods allow mutation. The methods that iterate over sleds take a required `SledFilter`; making use of this revealed at least one legit bug (prior to this PR, the planner would attempt to put an NTP zone on a sled if it was in the `(SledPolicy::Expunged, SledState::Active)` state and had no NTP zone, because it was checking `SledState` but not `SledPolicy`). * There is a separate `PlanningInputBuilder` that allows mutation; all the unit tests go through this. For tests that want to take a `PlanningInput` and modify it, they use the pattern `PlanningInput::into_builder()` -> mutate via builder as desired -> `PlanningInputBuilder::build()`. * I used a `TypedUuid` in `PlanningInput{,Builder}`. I did not change all the other sled IDs, so this diff is littered with `TODO-cleanup`s where I'm having to convert between typed and untyped. This is already big enough to review, so my preference is to do this in a followup PR. * `Policy` no longer includes the `sleds`; it's conceptually more like "system-level policy". Currently it only includes the service IP pool ranges and the target Nexus count. * I broke `SledResources` up a bit, removing the sled state and policy. The resources+policy+state are combined in `SledDetails`. (This was just for clarity, because seeing "policy" under "resources" seems confusing, but if I'm in the minority it'd be easy to change this back.) * Internal and external DNS versions are now embedded in `PlanningInput` instead of being separate arguments to `Planner::new_based_on()`. They _are_ still separate arguments to `BlueprintBuilder::build_initial_from_collection()`, because that doesn't take a `PlanningInput` (but it still wants the DNS versions). I think this is fine, especially since we plan to drop `build_initial_from_collection()` in the next few weeks anyway. I think this is worth doing for the `SledFilter`-makes-it-harder-to-write-bugs alone, but hopefully it also hits some of Dave's goals with reducing churn as we modify `PlanningInput` in the future. --- Cargo.lock | 3 + dev-tools/omdb/tests/test_all_output.rs | 11 +- dev-tools/reconfigurator-cli/src/main.rs | 197 +++++---- .../reconfigurator-cli/tests/test_basic.rs | 9 +- .../db-queries/src/db/datastore/deployment.rs | 112 +++-- nexus/reconfigurator/execution/Cargo.toml | 1 + nexus/reconfigurator/execution/src/dns.rs | 93 ++-- .../planning/src/blueprint_builder.rs | 110 +++-- nexus/reconfigurator/planning/src/example.rs | 64 +-- nexus/reconfigurator/planning/src/planner.rs | 244 +++++------ nexus/reconfigurator/planning/src/system.rs | 67 ++- .../output/planner_nonprovisionable_1_2.txt | 11 +- .../output/planner_nonprovisionable_2_2a.txt | 11 +- .../output/planner_nonprovisionable_bp2.txt | 11 +- nexus/reconfigurator/preparation/Cargo.toml | 2 + nexus/reconfigurator/preparation/src/lib.rs | 182 +++++--- nexus/src/app/deployment.rs | 122 ++---- nexus/types/src/deployment.rs | 124 +----- nexus/types/src/deployment/planning_input.rs | 397 ++++++++++++++++++ nexus/types/src/external_api/views.rs | 11 + uuid-kinds/src/lib.rs | 1 + 21 files changed, 1072 insertions(+), 711 deletions(-) create mode 100644 nexus/types/src/deployment/planning_input.rs diff --git a/Cargo.lock b/Cargo.lock index 6437e9dcff..f8f52010f1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4751,6 +4751,7 @@ dependencies = [ "omicron-nexus", "omicron-rpaths", "omicron-test-utils", + "omicron-uuid-kinds", "omicron-workspace-hack", "pq-sys", "reqwest", @@ -4799,7 +4800,9 @@ dependencies = [ "nexus-db-queries", "nexus-types", "omicron-common", + "omicron-uuid-kinds", "omicron-workspace-hack", + "slog", ] [[package]] diff --git a/dev-tools/omdb/tests/test_all_output.rs b/dev-tools/omdb/tests/test_all_output.rs index 4a9802eee6..2c16cc1482 100644 --- a/dev-tools/omdb/tests/test_all_output.rs +++ b/dev-tools/omdb/tests/test_all_output.rs @@ -9,6 +9,7 @@ use expectorate::assert_contents; use nexus_test_utils_macros::nexus_test; +use nexus_types::deployment::SledFilter; use nexus_types::deployment::UnstableReconfiguratorState; use omicron_test_utils::dev::test_cmds::path_to_executable; use omicron_test_utils::dev::test_cmds::redact_variable; @@ -136,8 +137,14 @@ async fn test_omdb_success_cases(cptestctx: &ControlPlaneTestContext) { InlineErrorChain::new(&error), ) }); - assert!(parsed.policy.sleds.len() > 0); - assert!(parsed.collections.len() > 0); + // Did we find at least one sled in the planning input, and at least one + // collection? + assert!(parsed + .planning_input + .all_sled_ids(SledFilter::All) + .next() + .is_some()); + assert!(!parsed.collections.is_empty()); gwtestctx.teardown().await; } diff --git a/dev-tools/reconfigurator-cli/src/main.rs b/dev-tools/reconfigurator-cli/src/main.rs index 08755a4537..24174b9e4f 100644 --- a/dev-tools/reconfigurator-cli/src/main.rs +++ b/dev-tools/reconfigurator-cli/src/main.rs @@ -23,6 +23,7 @@ use nexus_reconfigurator_planning::system::{ use nexus_types::deployment::ExternalIp; use nexus_types::deployment::PlanningInput; use nexus_types::deployment::ServiceNetworkInterface; +use nexus_types::deployment::SledFilter; use nexus_types::deployment::{Blueprint, UnstableReconfiguratorState}; use nexus_types::internal_api::params::DnsConfigParams; use nexus_types::inventory::Collection; @@ -30,6 +31,7 @@ use nexus_types::inventory::OmicronZonesConfig; use nexus_types::inventory::SledRole; use omicron_common::api::external::Generation; use omicron_common::api::external::Name; +use omicron_uuid_kinds::SledKind; use omicron_uuid_kinds::{GenericUuid, OmicronZoneKind, TypedUuid}; use reedline::{Reedline, Signal}; use std::cell::RefCell; @@ -111,15 +113,43 @@ impl ReconfiguratorSim { &self, parent_blueprint: &Blueprint, ) -> anyhow::Result { - let policy = self.system.to_policy().context("generating policy")?; - let service_external_ips = parent_blueprint - .all_omicron_zones() - .filter_map(|(_, zone)| { - let Ok(Some(ip)) = zone.zone_type.external_ip() else { - return None; - }; - let service_id = - TypedUuid::::from_untyped_uuid(zone.id); + let mut builder = self + .system + .to_planning_input_builder() + .context("generating planning input builder")?; + + // The internal and external DNS numbers that go here are supposed to be + // the _current_ internal and external DNS generations at the point + // when planning happened. This is racy (these generations can change + // immediately after they're fetched from the database) but correctness + // only requires that the values here be *no newer* than the real + // values so it's okay if the real values get changed. + // + // The problem is we have no real system here to fetch these values + // from. What should the value be? + // + // - If we assume that the parent blueprint here was successfully + // executed immediately before generating this plan, then the values + // here should come from the generation number produced by executing + // the parent blueprint. + // + // - If the parent blueprint was never executed, or execution is still + // in progress, or if other blueprints have been executed in the + // meantime that changed DNS, then the values here could be different + // (older if the blueprint was never executed or is currently + // executing and newer if other blueprints have changed DNS in the + // meantime). + // + // But in this CLI, there's no execution at all. As a result, there's + // no way to really choose between these -- and it doesn't really + // matter, either. We'll just pick the parent blueprint's. + builder.set_internal_dns_version(parent_blueprint.internal_dns_version); + builder.set_external_dns_version(parent_blueprint.external_dns_version); + + for (_, zone) in parent_blueprint.all_omicron_zones() { + let zone_id = + TypedUuid::::from_untyped_uuid(zone.id); + if let Ok(Some(ip)) = zone.zone_type.external_ip() { let external_ip = ExternalIp { id: *self .external_ips @@ -128,15 +158,11 @@ impl ReconfiguratorSim { .or_insert_with(Uuid::new_v4), ip: ip.into(), }; - Some((service_id, external_ip)) - }) - .collect(); - let service_nics = parent_blueprint - .all_omicron_zones() - .filter_map(|(_, zone)| { - let nic = zone.zone_type.service_vnic()?; - let service_id = - TypedUuid::::from_untyped_uuid(zone.id); + builder + .add_omicron_zone_external_ip(zone_id, external_ip) + .context("adding omicron zone external IP")?; + } + if let Some(nic) = zone.zone_type.service_vnic() { let nic = ServiceNetworkInterface { id: nic.id, mac: nic.mac, @@ -144,10 +170,12 @@ impl ReconfiguratorSim { slot: nic.slot, primary: nic.primary, }; - Some((service_id, nic)) - }) - .collect(); - Ok(PlanningInput { policy, service_external_ips, service_nics }) + builder + .add_omicron_zone_nic(zone_id, nic) + .context("adding omicron zone NIC")?; + } + } + Ok(builder.build()) } } @@ -381,7 +409,7 @@ struct SledAddArgs { #[derive(Debug, Args)] struct SledArgs { /// id of the sled - sled_id: Uuid, + sled_id: TypedUuid, } #[derive(Debug, Args)] @@ -545,17 +573,23 @@ fn cmd_sled_list( #[derive(Tabled)] #[tabled(rename_all = "SCREAMING_SNAKE_CASE")] struct Sled { - id: Uuid, + id: TypedUuid, nzpools: usize, subnet: String, } - let policy = sim.system.to_policy().context("failed to generate policy")?; - let rows = policy.sleds.iter().map(|(sled_id, sled_resources)| Sled { - id: *sled_id, - subnet: sled_resources.subnet.net().to_string(), - nzpools: sled_resources.zpools.len(), - }); + let planning_input = sim + .system + .to_planning_input_builder() + .context("failed to generate planning input")? + .build(); + let rows = planning_input.all_sled_resources(SledFilter::All).map( + |(sled_id, sled_resources)| Sled { + id: sled_id, + subnet: sled_resources.subnet.net().to_string(), + nzpools: sled_resources.zpools.len(), + }, + ); let table = tabled::Table::new(rows) .with(tabled::settings::Style::empty()) .with(tabled::settings::Padding::new(0, 1, 0, 0)) @@ -580,12 +614,15 @@ fn cmd_sled_show( sim: &mut ReconfiguratorSim, args: SledArgs, ) -> anyhow::Result> { - let policy = sim.system.to_policy().context("failed to generate policy")?; + let planning_input = sim + .system + .to_planning_input_builder() + .context("failed to generate planning_input builder")? + .build(); let sled_id = args.sled_id; - let sled_resources = policy - .sleds - .get(&sled_id) - .ok_or_else(|| anyhow!("no sled with id {:?}", sled_id))?; + let sled_resources = planning_input + .sled_resources(&sled_id) + .ok_or_else(|| anyhow!("no sled with id {sled_id}"))?; let mut s = String::new(); swriteln!(s, "sled {}", sled_id); swriteln!(s, "subnet {}", sled_resources.subnet.net()); @@ -632,12 +669,13 @@ fn cmd_inventory_generate( sim.system.to_collection_builder().context("generating inventory")?; // For an inventory we just generated from thin air, pretend like each sled // has no zones on it. - let sled_ids = sim.system.to_policy().unwrap().sleds.into_keys(); - for sled_id in sled_ids { + let planning_input = + sim.system.to_planning_input_builder().unwrap().build(); + for sled_id in planning_input.all_sled_ids(SledFilter::All) { builder .found_sled_omicron_zones( "fake sled agent", - sled_id, + *sled_id.as_untyped_uuid(), OmicronZonesConfig { generation: Generation::new(), zones: vec![], @@ -684,13 +722,17 @@ fn cmd_blueprint_from_inventory( .get(&collection_id) .ok_or_else(|| anyhow!("no such collection: {}", collection_id))?; let dns_version = Generation::new(); - let policy = sim.system.to_policy().context("generating policy")?; + let planning_input = sim + .system + .to_planning_input_builder() + .context("generating planning_input builder")? + .build(); let creator = "reconfigurator-sim"; let blueprint = BlueprintBuilder::build_initial_from_collection( collection, dns_version, dns_version, - &policy, + planning_input.all_sled_ids(SledFilter::All), creator, ) .context("building collection")?; @@ -718,33 +760,6 @@ fn cmd_blueprint_plan( let planner = Planner::new_based_on( sim.log.clone(), parent_blueprint, - // The internal and external DNS numbers that go here are supposed to be - // the _current_ internal and external DNS generations at the point - // when planning happened. This is racy (these generations can change - // immediately after they're fetched from the database) but correctness - // only requires that the values here be *no newer* than the real - // values so it's okay if the real values get changed. - // - // The problem is we have no real system here to fetch these values - // from. What should the value be? - // - // - If we assume that the parent blueprint here was successfully - // executed immediately before generating this plan, then the values - // here should come from the generation number produced by executing - // the parent blueprint. - // - // - If the parent blueprint was never executed, or execution is still - // in progress, or if other blueprints have been executed in the - // meantime that changed DNS, then the values here could be different - // (older if the blueprint was never executed or is currently - // executing and newer if other blueprints have changed DNS in the - // meantime). - // - // But in this CLI, there's no execution at all. As a result, there's - // no way to really choose between these -- and it doesn't really - // matter, either. We'll just pick the parent blueprint's. - parent_blueprint.internal_dns_version, - parent_blueprint.external_dns_version, &planning_input, creator, collection, @@ -769,9 +784,7 @@ fn cmd_blueprint_edit( let planning_input = sim.planning_input(blueprint)?; let mut builder = BlueprintBuilder::new_based_on( &sim.log, - &blueprint, - blueprint.internal_dns_version, - blueprint.external_dns_version, + blueprint, &planning_input, creator, ) @@ -965,9 +978,13 @@ fn cmd_save( sim: &mut ReconfiguratorSim, args: SaveArgs, ) -> anyhow::Result> { - let policy = sim.system.to_policy().context("creating policy")?; + let planning_input = sim + .system + .to_planning_input_builder() + .context("creating planning input builder")? + .build(); let saved = UnstableReconfiguratorState { - policy, + planning_input, collections: sim.collections.values().cloned().collect(), blueprints: sim.blueprints.values().cloned().collect(), internal_dns: sim.internal_dns.clone(), @@ -982,7 +999,7 @@ fn cmd_save( std::fs::write(&output_path, &output_str) .with_context(|| format!("write {:?}", output_path))?; Ok(Some(format!( - "saved policy, collections, and blueprints to {:?}", + "saved planning input, collections, and blueprints to {:?}", output_path ))) } @@ -1113,9 +1130,15 @@ fn cmd_load( }, )?; - let current_policy = sim.system.to_policy().context("generating policy")?; - for (sled_id, sled_resources) in loaded.policy.sleds { - if current_policy.sleds.contains_key(&sled_id) { + let current_planning_input = sim + .system + .to_planning_input_builder() + .context("generating planning input")? + .build(); + for (sled_id, sled_details) in + loaded.planning_input.all_sleds(SledFilter::All) + { + if current_planning_input.sled_resources(&sled_id).is_some() { swriteln!( s, "sled {}: skipped (one with \ @@ -1126,7 +1149,7 @@ fn cmd_load( } let Some(inventory_sled_agent) = - primary_collection.sled_agents.get(&sled_id) + primary_collection.sled_agents.get(sled_id.as_untyped_uuid()) else { swriteln!( s, @@ -1155,8 +1178,9 @@ fn cmd_load( ); let result = sim.system.sled_full( - sled_id, - sled_resources, + *sled_id.as_untyped_uuid(), + sled_details.policy, + sled_details.resources.clone(), inventory_sp, inventory_sled_agent, ); @@ -1200,9 +1224,14 @@ fn cmd_load( } } - let ranges = format!("{:?}", loaded.policy.service_ip_pool_ranges); - sim.system.service_ip_pool_ranges(loaded.policy.service_ip_pool_ranges); - swriteln!(s, "loaded service IP pool ranges: {:?}", ranges); + sim.system.service_ip_pool_ranges( + loaded.planning_input.service_ip_pool_ranges().to_vec(), + ); + swriteln!( + s, + "loaded service IP pool ranges: {:?}", + loaded.planning_input.service_ip_pool_ranges() + ); sim.internal_dns = loaded.internal_dns; sim.external_dns = loaded.external_dns; @@ -1231,7 +1260,9 @@ fn cmd_file_contents(args: FileContentsArgs) -> anyhow::Result> { let mut s = String::new(); - for (sled_id, sled_resources) in loaded.policy.sleds { + for (sled_id, sled_resources) in + loaded.planning_input.all_sled_resources(SledFilter::All) + { swriteln!( s, "sled: {} (subnet: {}, zpools: {})", diff --git a/dev-tools/reconfigurator-cli/tests/test_basic.rs b/dev-tools/reconfigurator-cli/tests/test_basic.rs index 19522bace6..5502d954b4 100644 --- a/dev-tools/reconfigurator-cli/tests/test_basic.rs +++ b/dev-tools/reconfigurator-cli/tests/test_basic.rs @@ -20,6 +20,8 @@ use omicron_test_utils::dev::test_cmds::path_to_executable; use omicron_test_utils::dev::test_cmds::redact_variable; use omicron_test_utils::dev::test_cmds::run_command; use omicron_test_utils::dev::test_cmds::EXIT_SUCCESS; +use omicron_uuid_kinds::SledKind; +use omicron_uuid_kinds::TypedUuid; use slog::debug; use std::io::BufReader; use std::io::BufWriter; @@ -29,7 +31,6 @@ use std::time::Duration; use subprocess::Exec; use swrite::swriteln; use swrite::SWrite; -use uuid::Uuid; fn path_to_cli() -> PathBuf { path_to_executable(env!("CARGO_BIN_EXE_reconfigurator-cli")) @@ -118,9 +119,9 @@ async fn test_blueprint_edit(cptestctx: &ControlPlaneTestContext) { .expect("failed to assemble reconfigurator state"); // Smoke check the initial state. - let sled_id: Uuid = SLED_AGENT_UUID.parse().unwrap(); - assert!(state1.policy.sleds.contains_key(&sled_id)); - assert!(!state1.policy.service_ip_pool_ranges.is_empty()); + let sled_id: TypedUuid = SLED_AGENT_UUID.parse().unwrap(); + assert!(state1.planning_input.sled_resources(&sled_id).is_some()); + assert!(!state1.planning_input.service_ip_pool_ranges().is_empty()); assert!(!state1.silo_names.is_empty()); assert!(!state1.external_dns_zone_names.is_empty()); // We waited for the first inventory collection already. diff --git a/nexus/db-queries/src/db/datastore/deployment.rs b/nexus/db-queries/src/db/datastore/deployment.rs index b04dc9a03d..4b9b473bbd 100644 --- a/nexus/db-queries/src/db/datastore/deployment.rs +++ b/nexus/db-queries/src/db/datastore/deployment.rs @@ -1183,7 +1183,10 @@ mod tests { use nexus_reconfigurator_planning::blueprint_builder::Ensure; use nexus_test_utils::db::test_setup_database; use nexus_types::deployment::PlanningInput; + use nexus_types::deployment::PlanningInputBuilder; use nexus_types::deployment::Policy; + use nexus_types::deployment::SledDetails; + use nexus_types::deployment::SledFilter; use nexus_types::deployment::SledResources; use nexus_types::external_api::views::SledPolicy; use nexus_types::external_api::views::SledState; @@ -1191,21 +1194,16 @@ mod tests { use omicron_common::address::Ipv6Subnet; use omicron_common::api::external::Generation; use omicron_test_utils::dev; + use omicron_uuid_kinds::GenericUuid; + use omicron_uuid_kinds::TypedUuid; use pretty_assertions::assert_eq; use rand::thread_rng; use rand::Rng; use std::mem; use std::net::Ipv6Addr; - static EMPTY_PLANNING_INPUT: PlanningInput = PlanningInput { - policy: Policy { - sleds: BTreeMap::new(), - service_ip_pool_ranges: Vec::new(), - target_nexus_zone_count: 0, - }, - service_external_ips: BTreeMap::new(), - service_nics: BTreeMap::new(), - }; + static EMPTY_PLANNING_INPUT: PlanningInput = + PlanningInputBuilder::empty_input(); // This is a not-super-future-maintainer-friendly helper to check that all // the subtables related to blueprints have been pruned of a specific @@ -1244,9 +1242,9 @@ mod tests { } } - // Create a fake set of `SledResources`, either with a subnet matching + // Create a fake set of `SledDetails`, either with a subnet matching // `ip` or with an arbitrary one. - fn fake_sled_resources(ip: Option) -> SledResources { + fn fake_sled_details(ip: Option) -> SledDetails { use illumos_utils::zpool::ZpoolName; let zpools = (0..4) .map(|_| { @@ -1255,31 +1253,17 @@ mod tests { }) .collect(); let ip = ip.unwrap_or_else(|| thread_rng().gen::().into()); - SledResources { + let resources = SledResources { zpools, subnet: Ipv6Subnet::new(ip) }; + SledDetails { policy: SledPolicy::provisionable(), state: SledState::Active, - zpools, - subnet: Ipv6Subnet::new(ip), + resources, } } // Create a `Policy` that contains all the sleds found in `collection` fn policy_from_collection(collection: &Collection) -> Policy { Policy { - sleds: collection - .sled_agents - .iter() - .map(|(sled_id, agent)| { - // `Collection` doesn't currently hold zpool names, so - // we'll construct fake resources for each sled. - ( - *sled_id, - fake_sled_resources(Some( - *agent.sled_agent_address.ip(), - )), - ) - }) - .collect(), service_ip_pool_ranges: Vec::new(), target_nexus_zone_count: collection .all_omicron_zones() @@ -1312,16 +1296,29 @@ mod tests { } let policy = policy_from_collection(&collection); - let planning_input = PlanningInput { - policy, - service_external_ips: BTreeMap::new(), - service_nics: BTreeMap::new(), + let planning_input = { + let mut builder = PlanningInputBuilder::new( + policy, + Generation::new(), + Generation::new(), + ); + for (sled_id, agent) in &collection.sled_agents { + // TODO-cleanup use `TypedUuid` everywhere + let sled_id = TypedUuid::from_untyped_uuid(*sled_id); + builder + .add_sled( + sled_id, + fake_sled_details(Some(*agent.sled_agent_address.ip())), + ) + .expect("failed to add sled to representative"); + } + builder.build() }; let blueprint = BlueprintBuilder::build_initial_from_collection( &collection, Generation::new(), Generation::new(), - &planning_input.policy, + planning_input.all_sled_ids(SledFilter::All), "test", ) .unwrap(); @@ -1356,7 +1353,7 @@ mod tests { &collection, Generation::new(), Generation::new(), - &EMPTY_PLANNING_INPUT.policy, + std::iter::empty(), "test", ) .unwrap(); @@ -1412,7 +1409,7 @@ mod tests { let (opctx, datastore) = datastore_test(&logctx, &db).await; // Create a cohesive representative collection/policy/blueprint - let (collection, mut planning_input, blueprint1) = representative(); + let (collection, planning_input, blueprint1) = representative(); let authz_blueprint1 = authz_blueprint_from_id(blueprint1.id); // Write it to the database and read it back. @@ -1433,7 +1430,7 @@ mod tests { // Check the number of blueprint elements against our collection. assert_eq!( blueprint1.blueprint_zones.len(), - planning_input.policy.sleds.len() + planning_input.all_sled_ids(SledFilter::All).count(), ); assert_eq!( blueprint1.blueprint_zones.len(), @@ -1474,24 +1471,31 @@ mod tests { "unexpected error: {err}" ); - // Add a new sled to `policy`. - let new_sled_id = Uuid::new_v4(); - planning_input - .policy - .sleds - .insert(new_sled_id, fake_sled_resources(None)); - let new_sled_zpools = - &planning_input.policy.sleds.get(&new_sled_id).unwrap().zpools; + // Add a new sled. + let new_sled_id = TypedUuid::new_v4(); - // Create a builder for a child blueprint. While we're at it, use a - // different DNS version to test that that works. + // While we're at it, use a different DNS version to test that that + // works. let new_internal_dns_version = blueprint1.internal_dns_version.next(); let new_external_dns_version = new_internal_dns_version.next(); + let planning_input = { + let mut builder = planning_input.into_builder(); + builder + .add_sled(new_sled_id, fake_sled_details(None)) + .expect("failed to add sled"); + builder.set_internal_dns_version(new_internal_dns_version); + builder.set_external_dns_version(new_external_dns_version); + builder.build() + }; + let new_sled_zpools = + &planning_input.sled_resources(&new_sled_id).unwrap().zpools; + // TODO-cleanup use `TypedUuid` everywhere + let new_sled_id = *new_sled_id.as_untyped_uuid(); + + // Create a builder for a child blueprint. let mut builder = BlueprintBuilder::new_based_on( &logctx.log, &blueprint1, - new_internal_dns_version, - new_external_dns_version, &planning_input, "test", ) @@ -1638,15 +1642,13 @@ mod tests { &collection, Generation::new(), Generation::new(), - &EMPTY_PLANNING_INPUT.policy, + std::iter::empty(), "test1", ) .unwrap(); let blueprint2 = BlueprintBuilder::new_based_on( &logctx.log, &blueprint1, - Generation::new(), - Generation::new(), &EMPTY_PLANNING_INPUT, "test2", ) @@ -1655,8 +1657,6 @@ mod tests { let blueprint3 = BlueprintBuilder::new_based_on( &logctx.log, &blueprint1, - Generation::new(), - Generation::new(), &EMPTY_PLANNING_INPUT, "test3", ) @@ -1753,8 +1753,6 @@ mod tests { let blueprint4 = BlueprintBuilder::new_based_on( &logctx.log, &blueprint3, - Generation::new(), - Generation::new(), &EMPTY_PLANNING_INPUT, "test3", ) @@ -1795,15 +1793,13 @@ mod tests { &collection, Generation::new(), Generation::new(), - &EMPTY_PLANNING_INPUT.policy, + std::iter::empty(), "test1", ) .unwrap(); let blueprint2 = BlueprintBuilder::new_based_on( &logctx.log, &blueprint1, - Generation::new(), - Generation::new(), &EMPTY_PLANNING_INPUT, "test2", ) diff --git a/nexus/reconfigurator/execution/Cargo.toml b/nexus/reconfigurator/execution/Cargo.toml index b479ae67ee..7e57107c43 100644 --- a/nexus/reconfigurator/execution/Cargo.toml +++ b/nexus/reconfigurator/execution/Cargo.toml @@ -19,6 +19,7 @@ nexus-db-queries.workspace = true nexus-networking.workspace = true nexus-types.workspace = true omicron-common.workspace = true +omicron-uuid-kinds.workspace = true reqwest.workspace = true sled-agent-client.workspace = true slog.workspace = true diff --git a/nexus/reconfigurator/execution/src/dns.rs b/nexus/reconfigurator/execution/src/dns.rs index 420a1ec84f..3cb963ae62 100644 --- a/nexus/reconfigurator/execution/src/dns.rs +++ b/nexus/reconfigurator/execution/src/dns.rs @@ -500,7 +500,7 @@ mod test { use nexus_reconfigurator_planning::blueprint_builder::BlueprintBuilder; use nexus_reconfigurator_planning::blueprint_builder::EnsureMultiple; use nexus_reconfigurator_planning::example::example; - use nexus_reconfigurator_preparation::policy_from_db; + use nexus_reconfigurator_preparation::PlanningInputFromDb; use nexus_test_utils::resource_helpers::create_silo; use nexus_test_utils_macros::nexus_test; use nexus_types::deployment::Blueprint; @@ -509,14 +509,11 @@ mod test { use nexus_types::deployment::BlueprintZoneDisposition; use nexus_types::deployment::OmicronZoneConfig; use nexus_types::deployment::OmicronZoneType; - use nexus_types::deployment::PlanningInput; - use nexus_types::deployment::Policy; + use nexus_types::deployment::SledFilter; use nexus_types::deployment::SledResources; use nexus_types::deployment::ZpoolName; use nexus_types::external_api::params; use nexus_types::external_api::shared; - use nexus_types::external_api::views::SledPolicy; - use nexus_types::external_api::views::SledState; use nexus_types::identity::Resource; use nexus_types::internal_api::params::DnsConfigParams; use nexus_types::internal_api::params::DnsConfigZone; @@ -532,6 +529,8 @@ mod test { use omicron_common::api::external::Generation; use omicron_common::api::external::IdentityMetadataCreateParams; use omicron_test_utils::dev::test_setup_log; + use omicron_uuid_kinds::GenericUuid; + use omicron_uuid_kinds::TypedUuid; use std::collections::BTreeMap; use std::collections::BTreeSet; use std::collections::HashMap; @@ -549,16 +548,11 @@ mod test { fn blueprint_empty() -> Blueprint { let builder = CollectionBuilder::new("test-suite"); let collection = builder.build(); - let policy = Policy { - sleds: BTreeMap::new(), - service_ip_pool_ranges: vec![], - target_nexus_zone_count: 3, - }; BlueprintBuilder::build_initial_from_collection( &collection, Generation::new(), Generation::new(), - &policy, + std::iter::empty(), "test-suite", ) .expect("failed to generate empty blueprint") @@ -613,8 +607,6 @@ mod test { .zip(possible_sled_subnets) .map(|(sled_id, subnet)| { let sled_resources = SledResources { - policy: SledPolicy::provisionable(), - state: SledState::Active, zpools: BTreeSet::from([ZpoolName::from_str(&format!( "oxp_{}", Uuid::new_v4() @@ -624,13 +616,8 @@ mod test { }; (*sled_id, sled_resources) }) - .collect(); + .collect::>(); - let policy = Policy { - sleds: policy_sleds, - service_ip_pool_ranges: vec![], - target_nexus_zone_count: 3, - }; let dns_empty = dns_config_empty(); let initial_dns_generation = Generation::from(u32::try_from(dns_empty.generation).unwrap()); @@ -638,7 +625,10 @@ mod test { &collection, initial_dns_generation, Generation::new(), - &policy, + policy_sleds.keys().map(|sled_id| { + // TODO-cleanup use `TypedUuid` everywhere + TypedUuid::from_untyped_uuid(*sled_id) + }), "test-suite", ) .expect("failed to build initial blueprint"); @@ -668,8 +658,7 @@ mod test { // To generate the blueprint's DNS config, we need to make up a // different set of information about the Quiesced fake system. - let sleds_by_id = policy - .sleds + let sleds_by_id = policy_sleds .iter() .enumerate() .map(|(i, (sled_id, sled_resources))| { @@ -729,7 +718,7 @@ mod test { .iter() .filter_map(|(sled_id, sled)| { if sled.is_scrimlet { - let sled_subnet = policy.sleds.get(sled_id).unwrap().subnet; + let sled_subnet = policy_sleds.get(sled_id).unwrap().subnet; let switch_zone_ip = get_switch_zone_address(sled_subnet); Some((switch_zone_ip, *sled_id)) } else { @@ -871,7 +860,7 @@ mod test { &collection, Generation::new(), initial_external_dns_generation, - &input.policy, + input.all_sled_ids(SledFilter::All), "test suite", ) .expect("failed to generate initial blueprint"); @@ -1203,35 +1192,41 @@ mod test { .await .unwrap() }; - let mut policy = policy_from_db( - &sled_rows, - &zpool_rows, - &ip_pool_range_rows, - // This is not used because we're not actually going through the - // planner. - NEXUS_REDUNDANCY, - ) - .unwrap(); - // We'll need another (fake) external IP for this new Nexus. - policy - .service_ip_pool_ranges - .push(IpRange::from(IpAddr::V4(Ipv4Addr::LOCALHOST))); - let planning_input = PlanningInput { - policy, - // These are not used because we're not actually going through the - // planner. - service_external_ips: BTreeMap::new(), - service_nics: BTreeMap::new(), + let planning_input = { + let mut builder = PlanningInputFromDb { + sled_rows: &sled_rows, + zpool_rows: &zpool_rows, + ip_pool_range_rows: &ip_pool_range_rows, + internal_dns_version: Generation::from( + u32::try_from(dns_initial_internal.generation).unwrap(), + ) + .into(), + external_dns_version: Generation::from( + u32::try_from(dns_latest_external.generation).unwrap(), + ) + .into(), + // These are not used because we're not actually going through + // the planner. + external_ip_rows: &[], + service_nic_rows: &[], + target_nexus_zone_count: NEXUS_REDUNDANCY, + log, + } + .build() + .unwrap() + .into_builder(); + + // We'll need another (fake) external IP for this new Nexus. + builder + .policy_mut() + .service_ip_pool_ranges + .push(IpRange::from(IpAddr::V4(Ipv4Addr::LOCALHOST))); + + builder.build() }; let mut builder = BlueprintBuilder::new_based_on( &log, &blueprint, - Generation::from( - u32::try_from(dns_initial_internal.generation).unwrap(), - ), - Generation::from( - u32::try_from(dns_latest_external.generation).unwrap(), - ), &planning_input, "test suite", ) diff --git a/nexus/reconfigurator/planning/src/blueprint_builder.rs b/nexus/reconfigurator/planning/src/blueprint_builder.rs index 827693beb1..efc5c9ff39 100644 --- a/nexus/reconfigurator/planning/src/blueprint_builder.rs +++ b/nexus/reconfigurator/planning/src/blueprint_builder.rs @@ -20,7 +20,7 @@ use nexus_types::deployment::OmicronZoneConfig; use nexus_types::deployment::OmicronZoneDataset; use nexus_types::deployment::OmicronZoneType; use nexus_types::deployment::PlanningInput; -use nexus_types::deployment::Policy; +use nexus_types::deployment::SledFilter; use nexus_types::deployment::SledResources; use nexus_types::deployment::ZpoolName; use nexus_types::inventory::Collection; @@ -38,6 +38,9 @@ use omicron_common::api::external::MacAddr; use omicron_common::api::external::Vni; use omicron_common::api::internal::shared::NetworkInterface; use omicron_common::api::internal::shared::NetworkInterfaceKind; +use omicron_uuid_kinds::GenericUuid; +use omicron_uuid_kinds::SledKind; +use omicron_uuid_kinds::TypedUuid; use rand::rngs::StdRng; use rand::SeedableRng; use slog::o; @@ -111,8 +114,6 @@ pub struct BlueprintBuilder<'a> { /// previous blueprint, on which this one will be based parent_blueprint: &'a Blueprint, - internal_dns_version: Generation, - external_dns_version: Generation, // These fields are used to allocate resources from sleds. input: &'a PlanningInput, @@ -145,14 +146,14 @@ impl<'a> BlueprintBuilder<'a> { collection: &Collection, internal_dns_version: Generation, external_dns_version: Generation, - policy: &Policy, + all_sleds: impl Iterator>, creator: &str, ) -> Result { Self::build_initial_impl( collection, internal_dns_version, external_dns_version, - policy, + all_sleds, creator, BlueprintBuilderRng::new(), ) @@ -164,7 +165,7 @@ impl<'a> BlueprintBuilder<'a> { collection: &Collection, internal_dns_version: Generation, external_dns_version: Generation, - policy: &Policy, + all_sleds: impl Iterator>, creator: &str, seed: H, ) -> Result { @@ -174,7 +175,7 @@ impl<'a> BlueprintBuilder<'a> { collection, internal_dns_version, external_dns_version, - policy, + all_sleds, creator, rng, ) @@ -184,17 +185,15 @@ impl<'a> BlueprintBuilder<'a> { collection: &Collection, internal_dns_version: Generation, external_dns_version: Generation, - policy: &Policy, + all_sleds: impl Iterator>, creator: &str, mut rng: BlueprintBuilderRng, ) -> Result { - let blueprint_zones = policy - .sleds - .keys() + let blueprint_zones = all_sleds .map(|sled_id| { let zones = collection .omicron_zones - .get(sled_id) + .get(sled_id.as_untyped_uuid()) .map(|z| &z.zones) .ok_or_else(|| { // We should not find a sled that's supposed to be @@ -218,7 +217,7 @@ impl<'a> BlueprintBuilder<'a> { })?; Ok(( - *sled_id, + *sled_id.as_untyped_uuid(), BlueprintZonesConfig::initial_from_collection(&zones), )) }) @@ -240,8 +239,6 @@ impl<'a> BlueprintBuilder<'a> { pub fn new_based_on( log: &Logger, parent_blueprint: &'a Blueprint, - internal_dns_version: Generation, - external_dns_version: Generation, input: &'a PlanningInput, creator: &str, ) -> anyhow::Result> { @@ -343,8 +340,7 @@ impl<'a> BlueprintBuilder<'a> { ); let available_external_ips = Box::new( input - .policy - .service_ip_pool_ranges + .service_ip_pool_ranges() .iter() .flat_map(|r| r.iter()) .filter(move |ip| !used_external_ips.contains(ip)), @@ -356,8 +352,6 @@ impl<'a> BlueprintBuilder<'a> { Ok(BlueprintBuilder { log, parent_blueprint, - internal_dns_version, - external_dns_version, input, sled_ip_allocators: BTreeMap::new(), zones: BlueprintZonesBuilder::new(parent_blueprint), @@ -373,15 +367,16 @@ impl<'a> BlueprintBuilder<'a> { /// Assemble a final [`Blueprint`] based on the contents of the builder pub fn build(mut self) -> Blueprint { - // Collect the Omicron zones config for each in-service sled. + // Collect the Omicron zones config for all sleds, including sleds that + // are no longer in service and need expungement work. let blueprint_zones = - self.zones.into_zones_map(self.input.policy.sleds.keys().copied()); + self.zones.into_zones_map(self.input.all_sled_ids(SledFilter::All)); Blueprint { id: self.rng.blueprint_rng.next(), blueprint_zones, parent_blueprint_id: Some(self.parent_blueprint.id), - internal_dns_version: self.internal_dns_version, - external_dns_version: self.external_dns_version, + internal_dns_version: self.input.internal_dns_version(), + external_dns_version: self.input.external_dns_version(), time_created: now_db_precision(), creator: self.creator, comment: self.comments.join(", "), @@ -717,7 +712,9 @@ impl<'a> BlueprintBuilder<'a> { } fn sled_resources(&self, sled_id: Uuid) -> Result<&SledResources, Error> { - self.input.policy.sleds.get(&sled_id).ok_or_else(|| { + // TODO-cleanup use `TypedUuid` everywhere + let sled_id = TypedUuid::from_untyped_uuid(sled_id); + self.input.sled_resources(&sled_id).ok_or_else(|| { Error::Planner(anyhow!( "attempted to use sled that is not in service: {}", sled_id @@ -828,10 +825,12 @@ impl<'a> BlueprintZonesBuilder<'a> { /// Produces an owned map of zones for the requested sleds pub fn into_zones_map( mut self, - sled_ids: impl Iterator, + sled_ids: impl Iterator>, ) -> BTreeMap { sled_ids .map(|sled_id| { + // TODO-cleanup use `TypedUuid` everywhere + let sled_id = *sled_id.as_untyped_uuid(); // Start with self.changed_zones, which contains entries for any // sled whose zones config is changing in this blueprint. let mut zones = self @@ -900,7 +899,7 @@ pub mod test { &collection, Generation::new(), Generation::new(), - &input.policy, + input.all_sled_ids(SledFilter::All), "the_test", TEST_NAME, ) @@ -929,8 +928,6 @@ pub mod test { let builder = BlueprintBuilder::new_based_on( &logctx.log, &blueprint_initial, - Generation::new(), - Generation::new(), &input, "test_basic", ) @@ -961,8 +958,6 @@ pub mod test { let mut builder = BlueprintBuilder::new_based_on( &logctx.log, blueprint1, - Generation::new(), - Generation::new(), &example.input, "test_basic", ) @@ -971,7 +966,11 @@ pub mod test { // The example blueprint should have internal NTP zones on all the // existing sleds, plus Crucible zones on all pools. So if we ensure // all these zones exist, we should see no change. - for (sled_id, sled_resources) in &example.input.policy.sleds { + for (sled_id, sled_resources) in + example.input.all_sled_resources(SledFilter::All) + { + // TODO-cleanup use `TypedUuid` everywhere + let sled_id = sled_id.as_untyped_uuid(); builder.sled_ensure_zone_ntp(*sled_id).unwrap(); for pool_name in &sled_resources.zpools { builder @@ -995,23 +994,19 @@ pub mod test { let new_sled_id = example.sled_rng.next(); let _ = example.system.sled(SledBuilder::new().id(new_sled_id)).unwrap(); - let policy = example.system.to_policy().unwrap(); - let input = PlanningInput { - policy, - service_external_ips: example.input.service_external_ips, - service_nics: example.input.service_nics, - }; + let input = example.system.to_planning_input_builder().unwrap().build(); let mut builder = BlueprintBuilder::new_based_on( &logctx.log, &blueprint2, - Generation::new(), - Generation::new(), &input, "test_basic", ) .expect("failed to create builder"); builder.sled_ensure_zone_ntp(new_sled_id).unwrap(); - let new_sled_resources = input.policy.sleds.get(&new_sled_id).unwrap(); + // TODO-cleanup use `TypedUuid` everywhere + let new_sled_resources = input + .sled_resources(&TypedUuid::from_untyped_uuid(new_sled_id)) + .unwrap(); for pool_name in &new_sled_resources.zpools { builder .sled_ensure_zone_crucible(new_sled_id, pool_name.clone()) @@ -1106,7 +1101,7 @@ pub mod test { &collection, internal_dns_version, external_dns_version, - &input.policy, + input.all_sled_ids(SledFilter::All), "test", TEST_NAME, ) @@ -1115,8 +1110,6 @@ pub mod test { let mut builder = BlueprintBuilder::new_based_on( &logctx.log, &parent, - internal_dns_version, - external_dns_version, &input, "test", ) @@ -1173,9 +1166,9 @@ pub mod test { let parent = BlueprintBuilder::build_initial_from_collection_seeded( &collection, - Generation::new(), - Generation::new(), - &input.policy, + internal_dns_version, + external_dns_version, + input.all_sled_ids(SledFilter::All), "test", TEST_NAME, ) @@ -1187,8 +1180,6 @@ pub mod test { let mut builder = BlueprintBuilder::new_based_on( &logctx.log, &parent, - internal_dns_version, - external_dns_version, &input, "test", ) @@ -1207,8 +1198,6 @@ pub mod test { let mut builder = BlueprintBuilder::new_based_on( &logctx.log, &parent, - internal_dns_version, - external_dns_version, &input, "test", ) @@ -1224,7 +1213,6 @@ pub mod test { // Replace the policy's external service IP pool ranges with ranges // that are already in use by existing zones. Attempting to add a // Nexus with no remaining external IPs should fail. - let mut input = input.clone(); let mut used_ip_ranges = Vec::new(); for (_, z) in parent.all_omicron_zones() { if let Some(ip) = z @@ -1236,13 +1224,15 @@ pub mod test { } } assert!(!used_ip_ranges.is_empty()); - input.policy.service_ip_pool_ranges = used_ip_ranges; + let input = { + let mut builder = input.into_builder(); + builder.policy_mut().service_ip_pool_ranges = used_ip_ranges; + builder.build() + }; let mut builder = BlueprintBuilder::new_based_on( &logctx.log, &parent, - internal_dns_version, - external_dns_version, &input, "test", ) @@ -1306,7 +1296,7 @@ pub mod test { &collection, Generation::new(), Generation::new(), - &input.policy, + input.all_sled_ids(SledFilter::All), "test", TEST_NAME, ) @@ -1315,8 +1305,6 @@ pub mod test { match BlueprintBuilder::new_based_on( &logctx.log, &parent, - Generation::new(), - Generation::new(), &input, "test", ) { @@ -1366,7 +1354,7 @@ pub mod test { &collection, Generation::new(), Generation::new(), - &input.policy, + input.all_sled_ids(SledFilter::All), "test", TEST_NAME, ) @@ -1375,8 +1363,6 @@ pub mod test { match BlueprintBuilder::new_based_on( &logctx.log, &parent, - Generation::new(), - Generation::new(), &input, "test", ) { @@ -1426,7 +1412,7 @@ pub mod test { &collection, Generation::new(), Generation::new(), - &input.policy, + input.all_sled_ids(SledFilter::All), "test", TEST_NAME, ) @@ -1435,8 +1421,6 @@ pub mod test { match BlueprintBuilder::new_based_on( &logctx.log, &parent, - Generation::new(), - Generation::new(), &input, "test", ) { diff --git a/nexus/reconfigurator/planning/src/example.rs b/nexus/reconfigurator/planning/src/example.rs index 563b3662bf..85e9d52ee8 100644 --- a/nexus/reconfigurator/planning/src/example.rs +++ b/nexus/reconfigurator/planning/src/example.rs @@ -12,13 +12,13 @@ use nexus_types::deployment::BlueprintZoneFilter; use nexus_types::deployment::ExternalIp; use nexus_types::deployment::PlanningInput; use nexus_types::deployment::ServiceNetworkInterface; +use nexus_types::deployment::SledFilter; use nexus_types::inventory::Collection; use omicron_common::api::external::Generation; use omicron_uuid_kinds::GenericUuid; use omicron_uuid_kinds::OmicronZoneKind; use omicron_uuid_kinds::TypedUuid; use sled_agent_client::types::OmicronZonesConfig; -use std::collections::BTreeMap; use typed_rng::UuidRng; use uuid::Uuid; @@ -50,14 +50,12 @@ impl ExampleSystem { let _ = system.sled(SledBuilder::new().id(*sled_id)).unwrap(); } - let policy = system.to_policy().expect("failed to make policy"); + let mut input_builder = system + .to_planning_input_builder() + .expect("failed to make planning input builder"); let mut inventory_builder = system.to_collection_builder().expect("failed to build collection"); - let mut input = PlanningInput { - policy, - service_external_ips: BTreeMap::new(), - service_nics: BTreeMap::new(), - }; + let base_input = input_builder.clone().build(); // For each sled, have it report 0 zones in the initial inventory. // This will enable us to build a blueprint from the initial @@ -81,7 +79,7 @@ impl ExampleSystem { &empty_zone_inventory, Generation::new(), Generation::new(), - &input.policy, + base_input.all_sled_ids(SledFilter::All), "test suite", (test_name, "ExampleSystem initial"), ) @@ -91,18 +89,20 @@ impl ExampleSystem { let mut builder = BlueprintBuilder::new_based_on( log, &initial_blueprint, - Generation::new(), - Generation::new(), - &input, + &base_input, "test suite", ) .unwrap(); builder.set_rng_seed((test_name, "ExampleSystem make_zones")); - for (sled_id, sled_resources) in &input.policy.sleds { - let _ = builder.sled_ensure_zone_ntp(*sled_id).unwrap(); + for (sled_id, sled_resources) in + base_input.all_sled_resources(SledFilter::All) + { + // TODO-cleanup use `TypedUuid` everywhere + let sled_id = *sled_id.as_untyped_uuid(); + let _ = builder.sled_ensure_zone_ntp(sled_id).unwrap(); let _ = builder .sled_ensure_zone_multiple_nexus_with_config( - *sled_id, + sled_id, 1, false, vec![], @@ -110,7 +110,7 @@ impl ExampleSystem { .unwrap(); for pool_name in &sled_resources.zpools { let _ = builder - .sled_ensure_zone_crucible(*sled_id, pool_name.clone()) + .sled_ensure_zone_crucible(sled_id, pool_name.clone()) .unwrap(); } } @@ -128,22 +128,26 @@ impl ExampleSystem { let service_id = TypedUuid::::from_untyped_uuid(zone.id); if let Ok(Some(ip)) = zone.zone_type.external_ip() { - input.service_external_ips.insert( - service_id, - ExternalIp { id: Uuid::new_v4(), ip: ip.into() }, - ); + input_builder + .add_omicron_zone_external_ip( + service_id, + ExternalIp { id: Uuid::new_v4(), ip: ip.into() }, + ) + .expect("failed to add Omicron zone external IP"); } if let Some(nic) = zone.zone_type.service_vnic() { - input.service_nics.insert( - service_id, - ServiceNetworkInterface { - id: nic.id, - mac: nic.mac, - ip: nic.ip.into(), - slot: nic.slot, - primary: nic.primary, - }, - ); + input_builder + .add_omicron_zone_nic( + service_id, + ServiceNetworkInterface { + id: nic.id, + mac: nic.mac, + ip: nic.ip.into(), + slot: nic.slot, + primary: nic.primary, + }, + ) + .expect("failed to add Omicron zone NIC"); } } builder @@ -159,7 +163,7 @@ impl ExampleSystem { ExampleSystem { system, - input, + input: input_builder.build(), collection: builder.build(), blueprint, sled_rng, diff --git a/nexus/reconfigurator/planning/src/planner.rs b/nexus/reconfigurator/planning/src/planner.rs index 280ac61ede..afd32be7d4 100644 --- a/nexus/reconfigurator/planning/src/planner.rs +++ b/nexus/reconfigurator/planning/src/planner.rs @@ -12,9 +12,11 @@ use crate::blueprint_builder::EnsureMultiple; use crate::blueprint_builder::Error; use nexus_types::deployment::Blueprint; use nexus_types::deployment::PlanningInput; -use nexus_types::external_api::views::SledState; +use nexus_types::deployment::SledFilter; use nexus_types::inventory::Collection; -use omicron_common::api::external::Generation; +use omicron_uuid_kinds::GenericUuid; +use omicron_uuid_kinds::SledKind; +use omicron_uuid_kinds::TypedUuid; use slog::{info, warn, Logger}; use std::collections::BTreeMap; use std::collections::BTreeSet; @@ -41,8 +43,6 @@ impl<'a> Planner<'a> { pub fn new_based_on( log: Logger, parent_blueprint: &'a Blueprint, - internal_dns_version: Generation, - external_dns_version: Generation, input: &'a PlanningInput, creator: &str, // NOTE: Right now, we just assume that this is the latest inventory @@ -52,8 +52,6 @@ impl<'a> Planner<'a> { let blueprint = BlueprintBuilder::new_based_on( &log, parent_blueprint, - internal_dns_version, - external_dns_version, input, creator, )?; @@ -96,19 +94,13 @@ impl<'a> Planner<'a> { // We will not mark sleds getting Crucible zones as ineligible; other // control plane service zones starting concurrently with Crucible zones // is fine. - let mut sleds_ineligible_for_services = BTreeSet::new(); - - for (sled_id, sled_info) in &self.input.policy.sleds { - // Decommissioned sleds don't get any services. (This is an - // explicit match so that when more states are added, this fails to - // compile.) - match sled_info.state { - SledState::Decommissioned => { - sleds_ineligible_for_services.insert(*sled_id); - continue; - } - SledState::Active => {} - } + let mut sleds_waiting_for_ntp_zones = BTreeSet::new(); + + for (sled_id, sled_info) in + self.input.all_sled_resources(SledFilter::InService) + { + // TODO-cleanup use `TypedUuid` everywhere + let sled_id = sled_id.as_untyped_uuid(); // Check for an NTP zone. Every sled should have one. If it's not // there, all we can do is provision that one zone. We have to wait @@ -125,7 +117,7 @@ impl<'a> Planner<'a> { // Don't make any other changes to this sled. However, this // change is compatible with any other changes to other sleds, // so we can "continue" here rather than "break". - sleds_ineligible_for_services.insert(*sled_id); + sleds_waiting_for_ntp_zones.insert(*sled_id); continue; } @@ -194,22 +186,8 @@ impl<'a> Planner<'a> { } } - // We've now placed all the services that should always exist on all - // sleds. Before moving on to make decisions about placing services that - // are _not_ present on all sleds, check the provision state of all our - // sleds so we can avoid any non-provisionable sleds under the - // assumption that there is something amiss with them. - sleds_ineligible_for_services.extend( - self.input.policy.sleds.iter().filter_map( - |(sled_id, sled_info)| { - (!sled_info.is_eligible_for_discretionary_services()) - .then_some(*sled_id) - }, - ), - ); - self.ensure_correct_number_of_nexus_zones( - &sleds_ineligible_for_services, + &sleds_waiting_for_ntp_zones, )?; Ok(()) @@ -217,23 +195,17 @@ impl<'a> Planner<'a> { fn ensure_correct_number_of_nexus_zones( &mut self, - sleds_ineligible_for_services: &BTreeSet, + sleds_waiting_for_ntp_zone: &BTreeSet, ) -> Result<(), Error> { - // Bin every sled by the number of Nexus zones it currently has while - // counting the total number of Nexus zones. + // Count the number of Nexus zones on all in-service sleds. This will + // include sleds that are in service but not eligible for new services, + // but will not include sleds that have been expunged or decommissioned. let mut num_total_nexus = 0; - let mut sleds_by_num_nexus: BTreeMap> = - BTreeMap::new(); - for &sled_id in self.input.policy.sleds.keys() { + for sled_id in self.input.all_sled_ids(SledFilter::InService) { + // TODO-cleanup use `TypedUuid` everywhere + let sled_id = *sled_id.as_untyped_uuid(); let num_nexus = self.blueprint.sled_num_nexus_zones(sled_id); num_total_nexus += num_nexus; - - // Only bin this sled if we're allowed to use it. If we have a sled - // we're not allowed to use that's already running a Nexus (seems - // fishy!), we counted its Nexus above but will ignore it here. - if !sleds_ineligible_for_services.contains(&sled_id) { - sleds_by_num_nexus.entry(num_nexus).or_default().push(sled_id); - } } // TODO-correctness What should we do if we have _too many_ Nexus @@ -241,18 +213,36 @@ impl<'a> Planner<'a> { // at least the minimum number. let nexus_to_add = self .input - .policy - .target_nexus_zone_count + .target_nexus_zone_count() .saturating_sub(num_total_nexus); if nexus_to_add == 0 { info!( self.log, "sufficient Nexus zones exist in plan"; - "desired_count" => self.input.policy.target_nexus_zone_count, + "desired_count" => self.input.target_nexus_zone_count(), "current_count" => num_total_nexus, ); return Ok(()); } + // Now bin all the sleds which are eligible choices for a new Nexus zone + // by their current Nexus zone count. Skip sleds with a policy/state + // that should be eligible for Nexus but that don't yet have an NTP + // zone. + let mut sleds_by_num_nexus: BTreeMap>> = + BTreeMap::new(); + for sled_id in self + .input + .all_sled_ids(SledFilter::EligibleForDiscretionaryServices) + .filter(|sled_id| { + // TODO-cleanup use `TypedUuid` everywhere + !sleds_waiting_for_ntp_zone.contains(sled_id.as_untyped_uuid()) + }) + { + let num_nexus = + self.blueprint.sled_num_nexus_zones(*sled_id.as_untyped_uuid()); + sleds_by_num_nexus.entry(num_nexus).or_default().push(sled_id); + } + // Ensure we have at least one sled on which we can add Nexus zones. If // we don't, we have nothing else to do. This isn't a hard error, // because we might be waiting for NTP on all eligible sleds (although @@ -264,7 +254,8 @@ impl<'a> Planner<'a> { } // Build a map of sled -> new nexus zone count. - let mut sleds_to_change: BTreeMap = BTreeMap::new(); + let mut sleds_to_change: BTreeMap, usize> = + BTreeMap::new(); 'outer: for _ in 0..nexus_to_add { // `sleds_by_num_nexus` is sorted by key already, and we want to @@ -302,10 +293,10 @@ impl<'a> Planner<'a> { // For each sled we need to change, actually do so. let mut total_added = 0; for (sled_id, new_nexus_count) in sleds_to_change { - match self - .blueprint - .sled_ensure_zone_multiple_nexus(sled_id, new_nexus_count)? - { + match self.blueprint.sled_ensure_zone_multiple_nexus( + *sled_id.as_untyped_uuid(), + new_nexus_count, + )? { EnsureMultiple::Added(n) => { info!( self.log, "will add {n} Nexus zone(s) to sled"; @@ -350,7 +341,7 @@ mod test { use nexus_inventory::now_db_precision; use nexus_types::deployment::BlueprintZoneDisposition; use nexus_types::deployment::BlueprintZoneFilter; - use nexus_types::deployment::PlanningInput; + use nexus_types::deployment::SledFilter; use nexus_types::external_api::views::SledPolicy; use nexus_types::external_api::views::SledProvisionPolicy; use nexus_types::external_api::views::SledState; @@ -358,6 +349,7 @@ mod test { use nexus_types::inventory::OmicronZonesFound; use omicron_common::api::external::Generation; use omicron_test_utils::dev::test_setup_log; + use omicron_uuid_kinds::GenericUuid; /// Runs through a basic sequence of blueprints for adding a sled #[test] @@ -380,7 +372,7 @@ mod test { &example.collection, internal_dns_version, external_dns_version, - &example.input.policy, + example.input.all_sled_ids(SledFilter::All), "the_test", (TEST_NAME, "bp1"), ) @@ -393,8 +385,6 @@ mod test { let blueprint2 = Planner::new_based_on( logctx.log.clone(), &blueprint1, - internal_dns_version, - external_dns_version, &example.input, "no-op?", &example.collection, @@ -415,19 +405,12 @@ mod test { let new_sled_id = example.sled_rng.next(); let _ = example.system.sled(SledBuilder::new().id(new_sled_id)).unwrap(); - let policy = example.system.to_policy().unwrap(); - let input = PlanningInput { - policy, - service_external_ips: example.input.service_external_ips, - service_nics: example.input.service_nics, - }; + let input = example.system.to_planning_input_builder().unwrap().build(); // Check that the first step is to add an NTP zone let blueprint3 = Planner::new_based_on( logctx.log.clone(), &blueprint2, - internal_dns_version, - external_dns_version, &input, "test: add NTP?", &example.collection, @@ -468,8 +451,6 @@ mod test { let blueprint4 = Planner::new_based_on( logctx.log.clone(), &blueprint3, - internal_dns_version, - external_dns_version, &input, "test: add nothing more", &example.collection, @@ -510,8 +491,6 @@ mod test { let blueprint5 = Planner::new_based_on( logctx.log.clone(), &blueprint3, - internal_dns_version, - external_dns_version, &input, "test: add Crucible zones?", &collection, @@ -552,8 +531,6 @@ mod test { let blueprint6 = Planner::new_based_on( logctx.log.clone(), &blueprint5, - internal_dns_version, - external_dns_version, &input, "test: no-op?", &collection, @@ -586,21 +563,27 @@ mod test { // Use our example inventory collection as a starting point, but strip // it down to just one sled. - let (sled_id, collection, mut input) = { - let (mut collection, mut input) = + let (sled_id, collection, input) = { + let (mut collection, input) = example(&logctx.log, TEST_NAME, DEFAULT_N_SLEDS); // Pick one sled ID to keep and remove the rest. + let mut builder = input.into_builder(); let keep_sled_id = - input.policy.sleds.keys().next().copied().expect("no sleds"); - input.policy.sleds.retain(|&k, _v| keep_sled_id == k); - collection.sled_agents.retain(|&k, _v| keep_sled_id == k); - collection.omicron_zones.retain(|&k, _v| keep_sled_id == k); + builder.sleds().keys().next().copied().expect("no sleds"); + builder.sleds_mut().retain(|&k, _v| keep_sled_id == k); + // TODO-cleanup use `TypedUuid` everywhere + collection + .sled_agents + .retain(|&k, _v| *keep_sled_id.as_untyped_uuid() == k); + collection + .omicron_zones + .retain(|&k, _v| *keep_sled_id.as_untyped_uuid() == k); assert_eq!(collection.sled_agents.len(), 1); assert_eq!(collection.omicron_zones.len(), 1); - (keep_sled_id, collection, input) + (keep_sled_id, collection, builder.build()) }; // Build the initial blueprint. @@ -609,7 +592,7 @@ mod test { &collection, internal_dns_version, external_dns_version, - &input.policy, + input.all_sled_ids(SledFilter::All), "the_test", (TEST_NAME, "bp1"), ) @@ -621,7 +604,7 @@ mod test { assert_eq!( blueprint1 .blueprint_zones - .get(&sled_id) + .get(sled_id.as_untyped_uuid()) .expect("missing kept sled") .zones .iter() @@ -632,12 +615,12 @@ mod test { // Now run the planner. It should add additional Nexus instances to the // one sled we have. - input.policy.target_nexus_zone_count = 5; + let mut builder = input.into_builder(); + builder.policy_mut().target_nexus_zone_count = 5; + let input = builder.build(); let blueprint2 = Planner::new_based_on( logctx.log.clone(), &blueprint1, - internal_dns_version, - external_dns_version, &input, "test_blueprint2", &collection, @@ -654,11 +637,12 @@ mod test { let mut sleds = diff.sleds_modified().collect::>(); assert_eq!(sleds.len(), 1); let (changed_sled_id, sled_changes) = sleds.pop().unwrap(); - assert_eq!(changed_sled_id, sled_id); + // TODO-cleanup use `TypedUuid` everywhere + assert_eq!(changed_sled_id, *sled_id.as_untyped_uuid()); assert_eq!(sled_changes.zones_removed().len(), 0); assert_eq!(sled_changes.zones_modified().count(), 0); let zones = sled_changes.zones_added().collect::>(); - assert_eq!(zones.len(), input.policy.target_nexus_zone_count - 1); + assert_eq!(zones.len(), input.target_nexus_zone_count() - 1); for zone in &zones { if !zone.config.zone_type.is_nexus() { panic!("unexpectedly added a non-Nexus zone: {zone:?}"); @@ -677,7 +661,7 @@ mod test { let logctx = test_setup_log(TEST_NAME); // Use our example inventory collection as a starting point. - let (collection, mut input) = + let (collection, input) = example(&logctx.log, TEST_NAME, DEFAULT_N_SLEDS); // Build the initial blueprint. @@ -686,7 +670,7 @@ mod test { &collection, Generation::new(), Generation::new(), - &input.policy, + input.all_sled_ids(SledFilter::All), "the_test", (TEST_NAME, "bp1"), ) @@ -706,12 +690,12 @@ mod test { } // Now run the planner with a high number of target Nexus zones. - input.policy.target_nexus_zone_count = 14; + let mut builder = input.into_builder(); + builder.policy_mut().target_nexus_zone_count = 14; + let input = builder.build(); let blueprint2 = Planner::new_based_on( logctx.log.clone(), &blueprint1, - Generation::new(), - Generation::new(), &input, "test_blueprint2", &collection, @@ -769,7 +753,7 @@ mod test { // and decommissioned sleds. (When we add more kinds of // non-provisionable states in the future, we'll have to add more // sleds.) - let (collection, mut input) = example(&logctx.log, TEST_NAME, 5); + let (collection, input) = example(&logctx.log, TEST_NAME, 5); // Build the initial blueprint. let blueprint1 = @@ -777,7 +761,7 @@ mod test { &collection, Generation::new(), Generation::new(), - &input.policy, + input.all_sled_ids(SledFilter::All), "the_test", (TEST_NAME, "bp1"), ) @@ -798,42 +782,47 @@ mod test { // Arbitrarily choose some of the sleds and mark them non-provisionable // in various ways. - let mut sleds_iter = input.policy.sleds.iter_mut(); + let mut builder = input.into_builder(); + let mut sleds_iter = builder.sleds_mut().iter_mut(); let nonprovisionable_sled_id = { - let (sled_id, resources) = sleds_iter.next().expect("no sleds"); - resources.policy = SledPolicy::InService { + let (sled_id, details) = sleds_iter.next().expect("no sleds"); + details.policy = SledPolicy::InService { provision_policy: SledProvisionPolicy::NonProvisionable, }; - *sled_id + // TODO-cleanup use `TypedUuid` everywhere + *sled_id.as_untyped_uuid() }; + println!("1 -> 2: marked non-provisionable {nonprovisionable_sled_id}"); let expunged_sled_id = { - let (sled_id, resources) = sleds_iter.next().expect("no sleds"); - resources.policy = SledPolicy::Expunged; - *sled_id + let (sled_id, details) = sleds_iter.next().expect("no sleds"); + details.policy = SledPolicy::Expunged; + // TODO-cleanup use `TypedUuid` everywhere + *sled_id.as_untyped_uuid() }; + println!("1 -> 2: expunged {expunged_sled_id}"); let decommissioned_sled_id = { - let (sled_id, resources) = sleds_iter.next().expect("no sleds"); - resources.state = SledState::Decommissioned; - *sled_id + let (sled_id, details) = sleds_iter.next().expect("no sleds"); + details.state = SledState::Decommissioned; + // TODO-cleanup use `TypedUuid` everywhere + *sled_id.as_untyped_uuid() }; + println!("1 -> 2: decommissioned {decommissioned_sled_id}"); // Now run the planner with a high number of target Nexus zones. The - // number (16) is chosen such that: + // number (9) is chosen such that: // - // * we start with 5 sleds - // * we need to add 11 Nexus zones - // * there are two sleds eligible for provisioning - // * => 5 or 6 new Nexus zones per sled - // - // When the planner gets smarter about removing zones from expunged - // and/or removed sleds, we'll have to adjust this number. - input.policy.target_nexus_zone_count = 16; + // * we start with 5 sleds with 1 Nexus each + // * we take two sleds out of service (one expunged, one + // decommissioned), so we're down to 3 in-service Nexuses: we need to + // add 6 to get to the new policy target of 9 + // * of the remaining 3 sleds, only 2 are eligible for provisioning + // * each of those 2 sleds should get exactly 3 new Nexuses + builder.policy_mut().target_nexus_zone_count = 9; + let input = builder.build(); let mut blueprint2 = Planner::new_based_on( logctx.log.clone(), &blueprint1, - Generation::new(), - Generation::new(), &input, "test_blueprint2", &collection, @@ -863,9 +852,8 @@ mod test { let sleds = diff.sleds_modified().collect::>(); // Only 2 of the 3 sleds should get additional Nexus zones. We expect a - // total of 12 new Nexus zones, which should be spread evenly across the - // two sleds (one gets 6 and the other gets 5), while the - // non-provisionable sled should be unchanged. + // total of 6 new Nexus zones, which should be split evenly between the + // two sleds, while the non-provisionable sled should be unchanged. assert_eq!(sleds.len(), 2); let mut total_new_nexus_zones = 0; for (sled_id, sled_changes) in sleds { @@ -875,22 +863,22 @@ mod test { assert_eq!(sled_changes.zones_removed().len(), 0); assert_eq!(sled_changes.zones_modified().count(), 0); let zones = sled_changes.zones_added().collect::>(); - match zones.len() { - n @ (5 | 6) => { - total_new_nexus_zones += n; - } - n => { - panic!("unexpected number of zones added to {sled_id}: {n}") - } - } for zone in &zones { let OmicronZoneType::Nexus { .. } = zone.config.zone_type else { panic!("unexpectedly added a non-Crucible zone: {zone:?}"); }; } + if zones.len() == 3 { + total_new_nexus_zones += 3; + } else { + panic!( + "unexpected number of zones added to {sled_id}: {}", + zones.len() + ); + } } - assert_eq!(total_new_nexus_zones, 11); + assert_eq!(total_new_nexus_zones, 6); // --- diff --git a/nexus/reconfigurator/planning/src/system.rs b/nexus/reconfigurator/planning/src/system.rs index e224e3c6df..bbe5353b48 100644 --- a/nexus/reconfigurator/planning/src/system.rs +++ b/nexus/reconfigurator/planning/src/system.rs @@ -10,7 +10,9 @@ use gateway_client::types::RotState; use gateway_client::types::SpState; use indexmap::IndexMap; use nexus_inventory::CollectionBuilder; +use nexus_types::deployment::PlanningInputBuilder; use nexus_types::deployment::Policy; +use nexus_types::deployment::SledDetails; use nexus_types::deployment::SledResources; use nexus_types::external_api::views::SledPolicy; use nexus_types::external_api::views::SledProvisionPolicy; @@ -28,6 +30,9 @@ use omicron_common::address::NEXUS_REDUNDANCY; use omicron_common::address::RACK_PREFIX; use omicron_common::address::SLED_PREFIX; use omicron_common::api::external::ByteCount; +use omicron_common::api::external::Generation; +use omicron_uuid_kinds::GenericUuid; +use omicron_uuid_kinds::TypedUuid; use std::collections::BTreeSet; use std::fmt::Debug; use std::net::Ipv4Addr; @@ -42,8 +47,8 @@ impl SubnetIterator for T where /// Describes an actual or synthetic Oxide rack for planning and testing /// -/// From this description, you can extract a `Policy` or inventory `Collection`. -/// There are a few intended purposes here: +/// From this description, you can extract a `PlanningInput` or inventory +/// `Collection`. There are a few intended purposes here: /// /// 1. to easily construct fake racks in automated tests for the Planner and /// other parts of Reconfigurator @@ -65,6 +70,8 @@ pub struct SystemDescription { available_scrimlet_slots: BTreeSet, target_nexus_zone_count: usize, service_ip_pool_ranges: Vec, + internal_dns_version: Generation, + external_dns_version: Generation, } impl SystemDescription { @@ -124,6 +131,8 @@ impl SystemDescription { available_scrimlet_slots, target_nexus_zone_count, service_ip_pool_ranges, + internal_dns_version: Generation::new(), + external_dns_version: Generation::new(), } } @@ -223,6 +232,7 @@ impl SystemDescription { pub fn sled_full( &mut self, sled_id: Uuid, + sled_policy: SledPolicy, sled_resources: SledResources, inventory_sp: Option>, inventory_sled_agent: &nexus_types::inventory::SledAgent, @@ -236,6 +246,7 @@ impl SystemDescription { sled_id, Sled::new_full( sled_id, + sled_policy, sled_resources, inventory_sp, inventory_sled_agent, @@ -275,26 +286,38 @@ impl SystemDescription { Ok(builder) } - pub fn to_policy(&self) -> anyhow::Result { - let sleds = self - .sleds - .values() - .map(|sled| { - let sled_resources = SledResources { - policy: sled.policy, - state: SledState::Active, + /// Construct a [`PlanningInputBuilder`] primed with all this system's sleds + /// + /// Does not populate extra information like Omicron zone external IPs or + /// NICs. + pub fn to_planning_input_builder( + &self, + ) -> anyhow::Result { + let policy = Policy { + service_ip_pool_ranges: self.service_ip_pool_ranges.clone(), + target_nexus_zone_count: self.target_nexus_zone_count, + }; + let mut builder = PlanningInputBuilder::new( + policy, + self.internal_dns_version, + self.external_dns_version, + ); + + for sled in self.sleds.values() { + let sled_details = SledDetails { + policy: sled.policy, + state: SledState::Active, + resources: SledResources { zpools: sled.zpools.iter().cloned().collect(), subnet: sled.sled_subnet, - }; - (sled.sled_id, sled_resources) - }) - .collect(); + }, + }; + // TODO-cleanup use `TypedUuid` everywhere + let sled_id = TypedUuid::from_untyped_uuid(sled.sled_id); + builder.add_sled(sled_id, sled_details)?; + } - Ok(Policy { - sleds, - service_ip_pool_ranges: self.service_ip_pool_ranges.clone(), - target_nexus_zone_count: self.target_nexus_zone_count, - }) + Ok(builder) } } @@ -391,7 +414,8 @@ pub struct SledHwInventory<'a> { /// Our abstract description of a `Sled` /// -/// This needs to be rich enough to generate a Policy and inventory Collection. +/// This needs to be rich enough to generate a PlanningInput and inventory +/// Collection. #[derive(Clone, Debug)] struct Sled { sled_id: Uuid, @@ -496,6 +520,7 @@ impl Sled { /// inventory `Collection` fn new_full( sled_id: Uuid, + sled_policy: SledPolicy, sled_resources: SledResources, inventory_sp: Option>, inv_sled_agent: &nexus_types::inventory::SledAgent, @@ -568,7 +593,7 @@ impl Sled { zpools: sled_resources.zpools.into_iter().collect(), inventory_sp, inventory_sled_agent, - policy: sled_resources.policy, + policy: sled_policy, } } diff --git a/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_1_2.txt b/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_1_2.txt index 380beaecf5..c19403906e 100644 --- a/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_1_2.txt +++ b/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_1_2.txt @@ -65,11 +65,8 @@ to: blueprint 9f71f5d3-a272-4382-9154-6ea2e171a6c6 internal_ntp 57b96d5c-b71e-43e4-8869-7d514003d00d in service fd00:1122:3344:104::21 nexus b4947d31-f70e-4ee0-8817-0ca6cea9b16b in service fd00:1122:3344:104::22 + nexus 2ec75441-3d7d-4b4b-9614-af03de5a3666 in service fd00:1122:3344:104::2d added -+ nexus 3ca5292f-8a59-4475-bb72-0f43714d0fff in service fd00:1122:3344:104::31 added + nexus 508abd03-cbfe-4654-9a6d-7f15a1ad32e5 in service fd00:1122:3344:104::2e added + nexus 59950bc8-1497-44dd-8cbf-b6502ba921b2 in service fd00:1122:3344:104::2f added -+ nexus 99f6d544-8599-4e2b-a55a-82d9e0034662 in service fd00:1122:3344:104::30 added -+ nexus c26b3bda-5561-44a1-a69f-22103fe209a1 in service fd00:1122:3344:104::32 added * sled affab35f-600a-4109-8ea0-34a067a4e0bc: zones at generation: 2 -> 3 crucible 0dfbf374-9ef9-430f-b06d-f271bf7f84c4 in service fd00:1122:3344:101::27 @@ -84,11 +81,9 @@ to: blueprint 9f71f5d3-a272-4382-9154-6ea2e171a6c6 crucible d47f4996-fac0-4657-bcea-01b1fee6404d in service fd00:1122:3344:101::2b internal_ntp f1a7b9a7-fc6a-4b23-b829-045ff33117ff in service fd00:1122:3344:101::21 nexus 15c103f0-ac63-423b-ba5d-1b5fcd563ba3 in service fd00:1122:3344:101::22 -+ nexus 6f86d5cb-17d7-424b-9d4c-39f670532cbe in service fd00:1122:3344:101::2e added -+ nexus 87c299eb-470e-4b6d-b8c7-6759694e66b6 in service fd00:1122:3344:101::30 added -+ nexus c72b7930-0580-4f00-93b9-8cba2c8d344e in service fd00:1122:3344:101::2d added -+ nexus d0095508-bdb8-4faf-b091-964276a20b15 in service fd00:1122:3344:101::31 added -+ nexus ff422442-4b31-4ade-a11a-9e5a25f0404c in service fd00:1122:3344:101::2f added ++ nexus 3ca5292f-8a59-4475-bb72-0f43714d0fff in service fd00:1122:3344:101::2e added ++ nexus 99f6d544-8599-4e2b-a55a-82d9e0034662 in service fd00:1122:3344:101::2d added ++ nexus c26b3bda-5561-44a1-a69f-22103fe209a1 in service fd00:1122:3344:101::2f added METADATA: internal DNS version: 1 (unchanged) diff --git a/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_2_2a.txt b/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_2_2a.txt index 58fbbd26be..74dd0fbbaf 100644 --- a/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_2_2a.txt +++ b/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_2_2a.txt @@ -20,12 +20,9 @@ to: blueprint 9f71f5d3-a272-4382-9154-6ea2e171a6c6 crucible f0ff59e8-4105-4980-a4bb-a1f4c58de1e3 in service fd00:1122:3344:104::2b internal_ntp 57b96d5c-b71e-43e4-8869-7d514003d00d in service fd00:1122:3344:104::21 nexus 2ec75441-3d7d-4b4b-9614-af03de5a3666 in service fd00:1122:3344:104::2d - nexus 3ca5292f-8a59-4475-bb72-0f43714d0fff in service fd00:1122:3344:104::31 nexus 508abd03-cbfe-4654-9a6d-7f15a1ad32e5 in service fd00:1122:3344:104::2e nexus 59950bc8-1497-44dd-8cbf-b6502ba921b2 in service fd00:1122:3344:104::2f - nexus 99f6d544-8599-4e2b-a55a-82d9e0034662 in service fd00:1122:3344:104::30 nexus b4947d31-f70e-4ee0-8817-0ca6cea9b16b in service fd00:1122:3344:104::22 - nexus c26b3bda-5561-44a1-a69f-22103fe209a1 in service fd00:1122:3344:104::32 sled affab35f-600a-4109-8ea0-34a067a4e0bc: zones at generation 3 crucible 0dfbf374-9ef9-430f-b06d-f271bf7f84c4 in service fd00:1122:3344:101::27 @@ -40,11 +37,9 @@ to: blueprint 9f71f5d3-a272-4382-9154-6ea2e171a6c6 crucible d47f4996-fac0-4657-bcea-01b1fee6404d in service fd00:1122:3344:101::2b internal_ntp f1a7b9a7-fc6a-4b23-b829-045ff33117ff in service fd00:1122:3344:101::21 nexus 15c103f0-ac63-423b-ba5d-1b5fcd563ba3 in service fd00:1122:3344:101::22 - nexus 6f86d5cb-17d7-424b-9d4c-39f670532cbe in service fd00:1122:3344:101::2e - nexus 87c299eb-470e-4b6d-b8c7-6759694e66b6 in service fd00:1122:3344:101::30 - nexus c72b7930-0580-4f00-93b9-8cba2c8d344e in service fd00:1122:3344:101::2d - nexus d0095508-bdb8-4faf-b091-964276a20b15 in service fd00:1122:3344:101::31 - nexus ff422442-4b31-4ade-a11a-9e5a25f0404c in service fd00:1122:3344:101::2f + nexus 3ca5292f-8a59-4475-bb72-0f43714d0fff in service fd00:1122:3344:101::2e + nexus 99f6d544-8599-4e2b-a55a-82d9e0034662 in service fd00:1122:3344:101::2d + nexus c26b3bda-5561-44a1-a69f-22103fe209a1 in service fd00:1122:3344:101::2f REMOVED SLEDS: diff --git a/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_bp2.txt b/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_bp2.txt index 46920c47f3..3417089d99 100644 --- a/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_bp2.txt +++ b/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_bp2.txt @@ -60,12 +60,9 @@ parent: 55502b1b-e255-438b-a16a-2680a4b5f962 crucible f0ff59e8-4105-4980-a4bb-a1f4c58de1e3 in service fd00:1122:3344:104::2b internal_ntp 57b96d5c-b71e-43e4-8869-7d514003d00d in service fd00:1122:3344:104::21 nexus 2ec75441-3d7d-4b4b-9614-af03de5a3666 in service fd00:1122:3344:104::2d - nexus 3ca5292f-8a59-4475-bb72-0f43714d0fff in service fd00:1122:3344:104::31 nexus 508abd03-cbfe-4654-9a6d-7f15a1ad32e5 in service fd00:1122:3344:104::2e nexus 59950bc8-1497-44dd-8cbf-b6502ba921b2 in service fd00:1122:3344:104::2f - nexus 99f6d544-8599-4e2b-a55a-82d9e0034662 in service fd00:1122:3344:104::30 nexus b4947d31-f70e-4ee0-8817-0ca6cea9b16b in service fd00:1122:3344:104::22 - nexus c26b3bda-5561-44a1-a69f-22103fe209a1 in service fd00:1122:3344:104::32 sled affab35f-600a-4109-8ea0-34a067a4e0bc: zones at generation 3 crucible 0dfbf374-9ef9-430f-b06d-f271bf7f84c4 in service fd00:1122:3344:101::27 @@ -80,11 +77,9 @@ parent: 55502b1b-e255-438b-a16a-2680a4b5f962 crucible d47f4996-fac0-4657-bcea-01b1fee6404d in service fd00:1122:3344:101::2b internal_ntp f1a7b9a7-fc6a-4b23-b829-045ff33117ff in service fd00:1122:3344:101::21 nexus 15c103f0-ac63-423b-ba5d-1b5fcd563ba3 in service fd00:1122:3344:101::22 - nexus 6f86d5cb-17d7-424b-9d4c-39f670532cbe in service fd00:1122:3344:101::2e - nexus 87c299eb-470e-4b6d-b8c7-6759694e66b6 in service fd00:1122:3344:101::30 - nexus c72b7930-0580-4f00-93b9-8cba2c8d344e in service fd00:1122:3344:101::2d - nexus d0095508-bdb8-4faf-b091-964276a20b15 in service fd00:1122:3344:101::31 - nexus ff422442-4b31-4ade-a11a-9e5a25f0404c in service fd00:1122:3344:101::2f + nexus 3ca5292f-8a59-4475-bb72-0f43714d0fff in service fd00:1122:3344:101::2e + nexus 99f6d544-8599-4e2b-a55a-82d9e0034662 in service fd00:1122:3344:101::2d + nexus c26b3bda-5561-44a1-a69f-22103fe209a1 in service fd00:1122:3344:101::2f METADATA: created by: test_blueprint2 diff --git a/nexus/reconfigurator/preparation/Cargo.toml b/nexus/reconfigurator/preparation/Cargo.toml index 44538ecb03..ab4dbb396e 100644 --- a/nexus/reconfigurator/preparation/Cargo.toml +++ b/nexus/reconfigurator/preparation/Cargo.toml @@ -11,5 +11,7 @@ nexus-db-model.workspace = true nexus-db-queries.workspace = true nexus-types.workspace = true omicron-common.workspace = true +omicron-uuid-kinds.workspace = true +slog.workspace = true omicron-workspace-hack.workspace = true diff --git a/nexus/reconfigurator/preparation/src/lib.rs b/nexus/reconfigurator/preparation/src/lib.rs index 46f71e5834..d7aeec51d8 100644 --- a/nexus/reconfigurator/preparation/src/lib.rs +++ b/nexus/reconfigurator/preparation/src/lib.rs @@ -16,7 +16,10 @@ use nexus_db_queries::db::pagination::Paginator; use nexus_db_queries::db::DataStore; use nexus_types::deployment::Blueprint; use nexus_types::deployment::BlueprintMetadata; +use nexus_types::deployment::PlanningInput; +use nexus_types::deployment::PlanningInputBuilder; use nexus_types::deployment::Policy; +use nexus_types::deployment::SledDetails; use nexus_types::deployment::SledResources; use nexus_types::deployment::UnstableReconfiguratorState; use nexus_types::deployment::ZpoolName; @@ -29,65 +32,118 @@ use omicron_common::address::NEXUS_REDUNDANCY; use omicron_common::address::SLED_PREFIX; use omicron_common::api::external::Error; use omicron_common::api::external::LookupType; +use omicron_uuid_kinds::GenericUuid; +use omicron_uuid_kinds::TypedUuid; +use slog::error; +use slog::Logger; use std::collections::BTreeMap; use std::collections::BTreeSet; use std::str::FromStr; /// Given various pieces of database state that go into the blueprint planning -/// process, produce a `Policy` object encapsulating what the planner needs to -/// generate a blueprint -pub fn policy_from_db( - sled_rows: &[nexus_db_model::Sled], - zpool_rows: &[nexus_db_model::Zpool], - ip_pool_range_rows: &[nexus_db_model::IpPoolRange], - target_nexus_zone_count: usize, -) -> Result { - let mut zpools_by_sled_id = { - let mut zpools = BTreeMap::new(); - for z in zpool_rows { - let sled_zpool_names = - zpools.entry(z.sled_id).or_insert_with(BTreeSet::new); - // It's unfortunate that Nexus knows how Sled Agent - // constructs zpool names, but there's not currently an - // alternative. - let zpool_name_generated = - illumos_utils::zpool::ZpoolName::new_external(z.id()) - .to_string(); - let zpool_name = ZpoolName::from_str(&zpool_name_generated) - .map_err(|e| { - Error::internal_error(&format!( - "unexpectedly failed to parse generated \ - zpool name: {}: {}", - zpool_name_generated, e - )) - })?; - sled_zpool_names.insert(zpool_name); - } - zpools - }; +/// process, produce a `PlanningInput` object encapsulating what the planner +/// needs to generate a blueprint +pub struct PlanningInputFromDb<'a> { + pub sled_rows: &'a [nexus_db_model::Sled], + pub zpool_rows: &'a [nexus_db_model::Zpool], + pub ip_pool_range_rows: &'a [nexus_db_model::IpPoolRange], + pub external_ip_rows: &'a [nexus_db_model::ExternalIp], + pub service_nic_rows: &'a [nexus_db_model::ServiceNetworkInterface], + pub target_nexus_zone_count: usize, + pub internal_dns_version: nexus_db_model::Generation, + pub external_dns_version: nexus_db_model::Generation, + pub log: &'a Logger, +} - let sleds = sled_rows - .into_iter() - .map(|sled_row| { +impl PlanningInputFromDb<'_> { + pub fn build(&self) -> Result { + let service_ip_pool_ranges = + self.ip_pool_range_rows.iter().map(IpRange::from).collect(); + let policy = Policy { + service_ip_pool_ranges, + target_nexus_zone_count: self.target_nexus_zone_count, + }; + let mut builder = PlanningInputBuilder::new( + policy, + self.internal_dns_version.into(), + self.external_dns_version.into(), + ); + + let mut zpools_by_sled_id = { + let mut zpools = BTreeMap::new(); + for z in self.zpool_rows { + let sled_zpool_names = + zpools.entry(z.sled_id).or_insert_with(BTreeSet::new); + // It's unfortunate that Nexus knows how Sled Agent + // constructs zpool names, but there's not currently an + // alternative. + let zpool_name_generated = + illumos_utils::zpool::ZpoolName::new_external(z.id()) + .to_string(); + let zpool_name = ZpoolName::from_str(&zpool_name_generated) + .map_err(|e| { + Error::internal_error(&format!( + "unexpectedly failed to parse generated \ + zpool name: {}: {}", + zpool_name_generated, e + )) + })?; + sled_zpool_names.insert(zpool_name); + } + zpools + }; + + for sled_row in self.sled_rows { let sled_id = sled_row.id(); let subnet = Ipv6Subnet::::new(sled_row.ip()); let zpools = zpools_by_sled_id .remove(&sled_id) .unwrap_or_else(BTreeSet::new); - let sled_info = SledResources { + let sled_details = SledDetails { policy: sled_row.policy(), state: sled_row.state().into(), - subnet, - zpools, + resources: SledResources { subnet, zpools }, }; - (sled_id, sled_info) - }) - .collect(); + // TODO-cleanup use `TypedUuid` everywhere + let sled_id = TypedUuid::from_untyped_uuid(sled_id); + builder.add_sled(sled_id, sled_details).map_err(|e| { + Error::internal_error(&format!( + "unexpectedly failed to add sled to planning input: {e}" + )) + })?; + } - let service_ip_pool_ranges = - ip_pool_range_rows.iter().map(IpRange::from).collect(); + for external_ip_row in + self.external_ip_rows.iter().filter(|r| r.is_service) + { + let Some(zone_id) = external_ip_row.parent_id else { + error!( + self.log, + "internal database consistency error: service external IP \ + is missing parent_id (should be the Omicron zone ID)"; + "ip_row" => ?external_ip_row, + ); + continue; + }; + let zone_id = TypedUuid::from_untyped_uuid(zone_id); + builder + .add_omicron_zone_external_ip( + zone_id, + nexus_types::deployment::ExternalIp { + id: external_ip_row.id, + ip: external_ip_row.ip, + }, + ) + .map_err(|e| { + Error::internal_error(&format!( + "unexpectedly failed to add external IP \ + to planning input: {e}" + )) + })?; + } - Ok(Policy { sleds, service_ip_pool_ranges, target_nexus_zone_count }) + Ok(builder.build()) + } } /// Loads state for import into `reconfigurator-cli` @@ -116,14 +172,38 @@ pub async fn reconfigurator_state_load( .await .context("listing services IP pool ranges")? }; + let external_ip_rows = datastore + .external_ip_list_service_all_batched(opctx) + .await + .context("fetching service external IPs")?; + let service_nic_rows = datastore + .service_network_interfaces_all_list_batched(opctx) + .await + .context("fetching service NICs")?; + let internal_dns_version = datastore + .dns_group_latest_version(opctx, DnsGroup::Internal) + .await + .context("fetching internal DNS version")? + .version; + let external_dns_version = datastore + .dns_group_latest_version(opctx, DnsGroup::External) + .await + .context("fetching external DNS version")? + .version; - let policy = policy_from_db( - &sled_rows, - &zpool_rows, - &ip_pool_range_rows, - NEXUS_REDUNDANCY, - ) - .context("assembling policy")?; + let planning_input = PlanningInputFromDb { + sled_rows: &sled_rows, + zpool_rows: &zpool_rows, + ip_pool_range_rows: &ip_pool_range_rows, + target_nexus_zone_count: NEXUS_REDUNDANCY, + external_ip_rows: &external_ip_rows, + service_nic_rows: &service_nic_rows, + log: &opctx.log, + internal_dns_version, + external_dns_version, + } + .build() + .context("assembling planning_input")?; let collection_ids = datastore .inventory_collections() @@ -224,7 +304,7 @@ pub async fn reconfigurator_state_load( .map(|dns_zone| dns_zone.zone_name) .collect(); Ok(UnstableReconfiguratorState { - policy, + planning_input, collections, blueprints, internal_dns, diff --git a/nexus/src/app/deployment.rs b/nexus/src/app/deployment.rs index 9e926b202a..1a384a1fd9 100644 --- a/nexus/src/app/deployment.rs +++ b/nexus/src/app/deployment.rs @@ -9,28 +9,23 @@ use nexus_db_queries::authz; use nexus_db_queries::context::OpContext; use nexus_reconfigurator_planning::blueprint_builder::BlueprintBuilder; use nexus_reconfigurator_planning::planner::Planner; -use nexus_reconfigurator_preparation::policy_from_db; +use nexus_reconfigurator_preparation::PlanningInputFromDb; use nexus_types::deployment::Blueprint; use nexus_types::deployment::BlueprintMetadata; use nexus_types::deployment::BlueprintTarget; use nexus_types::deployment::BlueprintTargetSet; -use nexus_types::deployment::ExternalIp; use nexus_types::deployment::PlanningInput; -use nexus_types::deployment::ServiceNetworkInterface; +use nexus_types::deployment::SledFilter; use nexus_types::inventory::Collection; use omicron_common::address::NEXUS_REDUNDANCY; use omicron_common::api::external::CreateResult; use omicron_common::api::external::DataPageParams; use omicron_common::api::external::DeleteResult; use omicron_common::api::external::Error; -use omicron_common::api::external::Generation; use omicron_common::api::external::InternalContext; use omicron_common::api::external::ListResultVec; use omicron_common::api::external::LookupResult; use omicron_common::api::external::LookupType; -use omicron_uuid_kinds::GenericUuid; -use omicron_uuid_kinds::OmicronZoneKind; -use omicron_uuid_kinds::TypedUuid; use slog_error_chain::InlineErrorChain; use uuid::Uuid; @@ -39,8 +34,6 @@ struct PlanningContext { planning_input: PlanningInput, creator: String, inventory: Option, - internal_dns_version: Generation, - external_dns_version: Generation, } impl super::Nexus { @@ -148,56 +141,39 @@ impl super::Nexus { .ip_pool_list_ranges_batched(opctx, &authz_service_ip_pool) .await? }; + let external_ip_rows = + datastore.external_ip_list_service_all_batched(opctx).await?; + let service_nic_rows = datastore + .service_network_interfaces_all_list_batched(opctx) + .await?; - let policy = policy_from_db( - &sled_rows, - &zpool_rows, - &ip_pool_range_rows, - NEXUS_REDUNDANCY, - )?; + let internal_dns_version = datastore + .dns_group_latest_version(opctx, DnsGroup::Internal) + .await + .internal_context( + "fetching internal DNS version for blueprint planning", + )? + .version; + let external_dns_version = datastore + .dns_group_latest_version(opctx, DnsGroup::External) + .await + .internal_context( + "fetching external DNS version for blueprint planning", + )? + .version; - let service_external_ips = datastore - .external_ip_list_service_all_batched(opctx) - .await? - .into_iter() - .filter_map(|external_ip| { - if !external_ip.is_service { - error!( - opctx.log, - "non-service external IP returned by service IP query"; - "external-ip" => ?external_ip, - ); - return None; - } - let Some(service_id) = external_ip.parent_id else { - error!( - opctx.log, - "service external IP with no parent ID set"; - "external-ip" => ?external_ip, - ); - return None; - }; - Some(( - TypedUuid::::from_untyped_uuid(service_id), - ExternalIp::from(external_ip), - )) - }) - .collect(); - let service_nics = datastore - .service_network_interfaces_all_list_batched(opctx) - .await? - .into_iter() - .map(|nic| { - ( - TypedUuid::::from_untyped_uuid( - nic.service_id, - ), - ServiceNetworkInterface::from(nic), - ) - }) - .collect(); - let planning_input = - PlanningInput { policy, service_external_ips, service_nics }; + let planning_input = PlanningInputFromDb { + sled_rows: &sled_rows, + zpool_rows: &zpool_rows, + ip_pool_range_rows: &ip_pool_range_rows, + external_ip_rows: &external_ip_rows, + service_nic_rows: &service_nic_rows, + target_nexus_zone_count: NEXUS_REDUNDANCY, + log: &opctx.log, + internal_dns_version, + external_dns_version, + } + .build()?; // The choice of which inventory collection to use here is not // necessarily trivial. Inventory collections may be incomplete due to @@ -217,29 +193,7 @@ impl super::Nexus { "fetching latest inventory collection for blueprint planner", )?; - // Fetch the current DNS versions. This could be made part of - // inventory, but it's enough of a one-off that there's no particular - // advantage to doing that work now. - let internal_dns_version = datastore - .dns_group_latest_version(opctx, DnsGroup::Internal) - .await - .internal_context( - "fetching internal DNS version for blueprint planning", - )?; - let external_dns_version = datastore - .dns_group_latest_version(opctx, DnsGroup::External) - .await - .internal_context( - "fetching external DNS version for blueprint planning", - )?; - - Ok(PlanningContext { - planning_input, - creator, - inventory, - internal_dns_version: *internal_dns_version.version, - external_dns_version: *external_dns_version.version, - }) + Ok(PlanningContext { planning_input, creator, inventory }) } async fn blueprint_add( @@ -262,9 +216,9 @@ impl super::Nexus { let planning_context = self.blueprint_planning_context(opctx).await?; let blueprint = BlueprintBuilder::build_initial_from_collection( &collection, - planning_context.internal_dns_version, - planning_context.external_dns_version, - &planning_context.planning_input.policy, + planning_context.planning_input.internal_dns_version(), + planning_context.planning_input.external_dns_version(), + planning_context.planning_input.all_sled_ids(SledFilter::All), &planning_context.creator, ) .map_err(|error| { @@ -298,8 +252,6 @@ impl super::Nexus { let planner = Planner::new_based_on( opctx.log.clone(), &parent_blueprint, - planning_context.internal_dns_version, - planning_context.external_dns_version, &planning_context.planning_input, &planning_context.creator, &inventory, diff --git a/nexus/types/src/deployment.rs b/nexus/types/src/deployment.rs index bed66adaca..26d213c53e 100644 --- a/nexus/types/src/deployment.rs +++ b/nexus/types/src/deployment.rs @@ -12,8 +12,6 @@ //! nexus/db-model, but nexus/reconfigurator/planning does not currently know //! about nexus/db-model and it's convenient to separate these concerns.) -use crate::external_api::views::SledPolicy; -use crate::external_api::views::SledState; use crate::internal_api::params::DnsConfigParams; use crate::inventory::Collection; pub use crate::inventory::OmicronZoneConfig; @@ -22,20 +20,12 @@ pub use crate::inventory::OmicronZoneType; pub use crate::inventory::OmicronZonesConfig; pub use crate::inventory::SourceNatConfig; pub use crate::inventory::ZpoolName; -use ipnetwork::IpNetwork; -use newtype_uuid::TypedUuid; -use omicron_common::address::IpRange; -use omicron_common::address::Ipv6Subnet; -use omicron_common::address::SLED_PREFIX; use omicron_common::api::external::Generation; -use omicron_common::api::external::MacAddr; -use omicron_uuid_kinds::OmicronZoneKind; use schemars::JsonSchema; use serde::Deserialize; use serde::Serialize; use sled_agent_client::ZoneKind; use std::collections::BTreeMap; -use std::collections::BTreeSet; use std::collections::HashMap; use std::fmt; use strum::EnumIter; @@ -43,109 +33,17 @@ use strum::IntoEnumIterator; use thiserror::Error; use uuid::Uuid; -/// Fleet-wide deployment policy -/// -/// The **policy** represents the deployment controls that people (operators and -/// support engineers) can modify directly under normal operation. In the -/// limit, this would include things like: which sleds are supposed to be part -/// of the system, how many CockroachDB nodes should be part of the cluster, -/// what system version the system should be running, etc. It would _not_ -/// include things like which services should be running on which sleds or which -/// host OS version should be on each sled because that's up to the control -/// plane to decide. (To be clear, the intent is that for extenuating -/// circumstances, people could exercise control over such things, but that -/// would not be part of normal operation.) -/// -/// The current policy is pretty limited. It's aimed primarily at supporting -/// the add/remove sled use case. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct Policy { - /// set of sleds that are supposed to be part of the control plane, along - /// with information about resources available to the planner - pub sleds: BTreeMap, - - /// ranges specified by the IP pool for externally-visible control plane - /// services (e.g., external DNS, Nexus, boundary NTP) - pub service_ip_pool_ranges: Vec, - - /// desired total number of deployed Nexus zones - pub target_nexus_zone_count: usize, -} - -/// Describes the resources available on each sled for the planner -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct SledResources { - /// current sled policy - pub policy: SledPolicy, - - /// current sled state - pub state: SledState, +mod planning_input; - /// zpools on this sled - /// - /// (used to allocate storage for control plane zones with persistent - /// storage) - pub zpools: BTreeSet, - - /// the IPv6 subnet of this sled on the underlay network - /// - /// (implicitly specifies the whole range of addresses that the planner can - /// use for control plane components) - pub subnet: Ipv6Subnet, -} - -impl SledResources { - /// Returns true if the sled can have services provisioned on it that - /// aren't required to be on every sled. - /// - /// For example, NTP must exist on every sled, but Nexus does not have to. - pub fn is_eligible_for_discretionary_services(&self) -> bool { - self.policy.is_provisionable() - && self.state.is_eligible_for_discretionary_services() - } -} - -/// Policy and database inputs to the Reconfigurator planner -/// -/// The primary inputs to the planner are the parent (either a parent blueprint -/// or an inventory collection) and this structure. This type holds the -/// fleet-wide policy as well as any additional information fetched from CRDB -/// that the planner needs to make decisions. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct PlanningInput { - /// fleet-wide policy - pub policy: Policy, - - /// external IPs allocated to services - pub service_external_ips: BTreeMap, ExternalIp>, - - /// vNICs allocated to services - pub service_nics: - BTreeMap, ServiceNetworkInterface>, -} - -/// External IP allocated to a service -/// -/// This is a slimmer `nexus_db_model::ExternalIp` that only stores the fields -/// necessary for blueprint planning. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct ExternalIp { - pub id: Uuid, - pub ip: IpNetwork, -} - -/// Network interface allocated to a service -/// -/// This is a slimmer `nexus_db_model::ServiceNetworkInterface` that only stores -/// the fields necessary for blueprint planning. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct ServiceNetworkInterface { - pub id: Uuid, - pub mac: MacAddr, - pub ip: IpNetwork, - pub slot: u8, - pub primary: bool, -} +pub use planning_input::ExternalIp; +pub use planning_input::PlanningInput; +pub use planning_input::PlanningInputBuildError; +pub use planning_input::PlanningInputBuilder; +pub use planning_input::Policy; +pub use planning_input::ServiceNetworkInterface; +pub use planning_input::SledDetails; +pub use planning_input::SledFilter; +pub use planning_input::SledResources; /// Describes a complete set of software and configuration for the system // Blueprints are a fundamental part of how the system modifies itself. Each @@ -1024,7 +922,7 @@ impl DiffZoneCommon { /// backwards-compatibility guarantees.** #[derive(Debug, Clone, Serialize, Deserialize)] pub struct UnstableReconfiguratorState { - pub policy: Policy, + pub planning_input: PlanningInput, pub collections: Vec, pub blueprints: Vec, pub internal_dns: BTreeMap, diff --git a/nexus/types/src/deployment/planning_input.rs b/nexus/types/src/deployment/planning_input.rs new file mode 100644 index 0000000000..1a0e7abd7a --- /dev/null +++ b/nexus/types/src/deployment/planning_input.rs @@ -0,0 +1,397 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Types describing inputs the Reconfigurator needs to plan and produce new +//! blueprints. + +use crate::external_api::views::SledPolicy; +use crate::external_api::views::SledState; +use crate::inventory::ZpoolName; +use ipnetwork::IpNetwork; +use omicron_common::address::IpRange; +use omicron_common::address::Ipv6Subnet; +use omicron_common::address::SLED_PREFIX; +use omicron_common::api::external::Generation; +use omicron_common::api::external::MacAddr; +use omicron_uuid_kinds::OmicronZoneKind; +use omicron_uuid_kinds::SledKind; +use omicron_uuid_kinds::TypedUuid; +use serde::Deserialize; +use serde::Serialize; +use std::collections::btree_map::Entry; +use std::collections::BTreeMap; +use std::collections::BTreeSet; +use uuid::Uuid; + +/// Describes the resources available on each sled for the planner +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct SledResources { + /// zpools on this sled + /// + /// (used to allocate storage for control plane zones with persistent + /// storage) + pub zpools: BTreeSet, + + /// the IPv6 subnet of this sled on the underlay network + /// + /// (implicitly specifies the whole range of addresses that the planner can + /// use for control plane components) + pub subnet: Ipv6Subnet, +} + +/// External IP allocated to a service +/// +/// This is a slimmer `nexus_db_model::ExternalIp` that only stores the fields +/// necessary for blueprint planning. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ExternalIp { + pub id: Uuid, + pub ip: IpNetwork, +} + +/// Network interface allocated to a service +/// +/// This is a slimmer `nexus_db_model::ServiceNetworkInterface` that only stores +/// the fields necessary for blueprint planning. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ServiceNetworkInterface { + pub id: Uuid, + pub mac: MacAddr, + pub ip: IpNetwork, + pub slot: u8, + pub primary: bool, +} + +/// Filters that apply to sleds. +/// +/// This logic lives here rather than within the individual components making +/// decisions, so that this is easier to read. +/// +/// The meaning of a particular filter should not be overloaded -- each time a +/// new use case wants to make a decision based on the zone disposition, a new +/// variant should be added to this enum. +#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] +pub enum SledFilter { + // --- + // Prefer to keep this list in alphabetical order. + // --- + /// All sleds. + All, + + /// Sleds that are eligible for discretionary services. + EligibleForDiscretionaryServices, + + /// Sleds that are in service (even if they might not be eligible for + /// discretionary services). + InService, +} + +/// Fleet-wide deployment policy +/// +/// The **policy** represents the deployment controls that people (operators and +/// support engineers) can modify directly under normal operation. In the +/// limit, this would include things like: how many CockroachDB nodes should be +/// part of the cluster, what system version the system should be running, etc. +/// It would _not_ include things like which services should be running on which +/// sleds or which host OS version should be on each sled because that's up to +/// the control plane to decide. (To be clear, the intent is that for +/// extenuating circumstances, people could exercise control over such things, +/// but that would not be part of normal operation.) +/// +/// Conceptually the policy should also include the set of sleds that are +/// supposed to be part of the system and their individual [`SledPolicy`]s; +/// however, those are tracked as a separate part of [`PlanningInput`] as each +/// sled additionally has non-policy [`SledResources`] needed for planning. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct Policy { + /// ranges specified by the IP pool for externally-visible control plane + /// services (e.g., external DNS, Nexus, boundary NTP) + pub service_ip_pool_ranges: Vec, + + /// desired total number of deployed Nexus zones + pub target_nexus_zone_count: usize, +} + +/// Policy and database inputs to the Reconfigurator planner +/// +/// The primary inputs to the planner are the parent (either a parent blueprint +/// or an inventory collection) and this structure. This type holds the +/// fleet-wide policy as well as any additional information fetched from CRDB +/// that the planner needs to make decisions. +/// +/// +/// The current policy is pretty limited. It's aimed primarily at supporting +/// the add/remove sled use case. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct PlanningInput { + /// fleet-wide policy + policy: Policy, + + /// current internal DNS version + internal_dns_version: Generation, + + /// current external DNS version + external_dns_version: Generation, + + /// per-sled policy and resources + sleds: BTreeMap, SledDetails>, + + /// external IPs allocated to Omicron zones + omicron_zone_external_ips: BTreeMap, ExternalIp>, + + /// vNICs allocated to Omicron zones + omicron_zone_nics: + BTreeMap, ServiceNetworkInterface>, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct SledDetails { + /// current sled policy + pub policy: SledPolicy, + /// current sled state + pub state: SledState, + /// current resources allocated to this sled + pub resources: SledResources, +} + +impl PlanningInput { + pub fn internal_dns_version(&self) -> Generation { + self.internal_dns_version + } + + pub fn external_dns_version(&self) -> Generation { + self.external_dns_version + } + + pub fn target_nexus_zone_count(&self) -> usize { + self.policy.target_nexus_zone_count + } + + pub fn service_ip_pool_ranges(&self) -> &[IpRange] { + &self.policy.service_ip_pool_ranges + } + + pub fn all_sleds( + &self, + filter: SledFilter, + ) -> impl Iterator, &SledDetails)> + '_ { + self.sleds.iter().filter_map(move |(&sled_id, details)| match filter { + SledFilter::All => Some((sled_id, details)), + SledFilter::EligibleForDiscretionaryServices => { + if details.policy.is_provisionable() + && details.state.is_eligible_for_discretionary_services() + { + Some((sled_id, details)) + } else { + None + } + } + SledFilter::InService => { + if details.policy.is_in_service() { + // Check for illegal states; we shouldn't be able to have a + // policy+state combo where the policy says the sled is in + // service but the state is decommissioned, for example, but + // the two separate types let us represent that, so we'll + // guard against it here. + match details.state { + SledState::Active => Some((sled_id, details)), + SledState::Decommissioned => None, + } + } else { + None + } + } + }) + } + + pub fn all_sled_ids( + &self, + filter: SledFilter, + ) -> impl Iterator> + '_ { + self.all_sleds(filter).map(|(sled_id, _)| sled_id) + } + + pub fn all_sled_resources( + &self, + filter: SledFilter, + ) -> impl Iterator, &SledResources)> + '_ { + self.all_sleds(filter) + .map(|(sled_id, details)| (sled_id, &details.resources)) + } + + pub fn sled_policy( + &self, + sled_id: &TypedUuid, + ) -> Option { + self.sleds.get(sled_id).map(|details| details.policy) + } + + pub fn sled_resources( + &self, + sled_id: &TypedUuid, + ) -> Option<&SledResources> { + self.sleds.get(sled_id).map(|details| &details.resources) + } + + // Convert this `PlanningInput` back into a [`PlanningInputBuilder`] + // + // This is primarily useful for tests that want to mutate an existing + // `PlanningInput`. + pub fn into_builder(self) -> PlanningInputBuilder { + PlanningInputBuilder { + policy: self.policy, + internal_dns_version: self.internal_dns_version, + external_dns_version: self.external_dns_version, + sleds: self.sleds, + omicron_zone_external_ips: self.omicron_zone_external_ips, + omicron_zone_nics: self.omicron_zone_nics, + } + } +} + +#[derive(Debug, thiserror::Error)] +pub enum PlanningInputBuildError { + #[error("duplicate sled ID: {0}")] + DuplicateSledId(TypedUuid), + #[error("Omicron zone {zone_id} already has an external IP ({ip:?})")] + DuplicateOmicronZoneExternalIp { + zone_id: TypedUuid, + ip: ExternalIp, + }, + #[error("Omicron zone {zone_id} already has a NIC ({nic:?})")] + DuplicateOmicronZoneNic { + zone_id: TypedUuid, + nic: ServiceNetworkInterface, + }, +} + +/// Constructor for [`PlanningInput`]. +#[derive(Clone, Debug)] +pub struct PlanningInputBuilder { + policy: Policy, + internal_dns_version: Generation, + external_dns_version: Generation, + sleds: BTreeMap, SledDetails>, + omicron_zone_external_ips: BTreeMap, ExternalIp>, + omicron_zone_nics: + BTreeMap, ServiceNetworkInterface>, +} + +impl PlanningInputBuilder { + pub const fn empty_input() -> PlanningInput { + PlanningInput { + policy: Policy { + service_ip_pool_ranges: Vec::new(), + target_nexus_zone_count: 0, + }, + internal_dns_version: Generation::new(), + external_dns_version: Generation::new(), + sleds: BTreeMap::new(), + omicron_zone_external_ips: BTreeMap::new(), + omicron_zone_nics: BTreeMap::new(), + } + } + + pub fn new( + policy: Policy, + internal_dns_version: Generation, + external_dns_version: Generation, + ) -> Self { + Self { + policy, + internal_dns_version, + external_dns_version, + sleds: BTreeMap::new(), + omicron_zone_external_ips: BTreeMap::new(), + omicron_zone_nics: BTreeMap::new(), + } + } + + pub fn add_sled( + &mut self, + sled_id: TypedUuid, + details: SledDetails, + ) -> Result<(), PlanningInputBuildError> { + match self.sleds.entry(sled_id) { + Entry::Vacant(slot) => { + slot.insert(details); + Ok(()) + } + Entry::Occupied(_) => { + Err(PlanningInputBuildError::DuplicateSledId(sled_id)) + } + } + } + + pub fn add_omicron_zone_external_ip( + &mut self, + zone_id: TypedUuid, + ip: ExternalIp, + ) -> Result<(), PlanningInputBuildError> { + match self.omicron_zone_external_ips.entry(zone_id) { + Entry::Vacant(slot) => { + slot.insert(ip); + Ok(()) + } + Entry::Occupied(prev) => { + Err(PlanningInputBuildError::DuplicateOmicronZoneExternalIp { + zone_id, + ip: prev.get().clone(), + }) + } + } + } + + pub fn add_omicron_zone_nic( + &mut self, + zone_id: TypedUuid, + nic: ServiceNetworkInterface, + ) -> Result<(), PlanningInputBuildError> { + match self.omicron_zone_nics.entry(zone_id) { + Entry::Vacant(slot) => { + slot.insert(nic); + Ok(()) + } + Entry::Occupied(prev) => { + Err(PlanningInputBuildError::DuplicateOmicronZoneNic { + zone_id, + nic: prev.get().clone(), + }) + } + } + } + + pub fn policy_mut(&mut self) -> &mut Policy { + &mut self.policy + } + + pub fn sleds(&mut self) -> &BTreeMap, SledDetails> { + &self.sleds + } + + pub fn sleds_mut( + &mut self, + ) -> &mut BTreeMap, SledDetails> { + &mut self.sleds + } + + pub fn set_internal_dns_version(&mut self, new_version: Generation) { + self.internal_dns_version = new_version; + } + + pub fn set_external_dns_version(&mut self, new_version: Generation) { + self.external_dns_version = new_version; + } + + pub fn build(self) -> PlanningInput { + PlanningInput { + policy: self.policy, + internal_dns_version: self.internal_dns_version, + external_dns_version: self.external_dns_version, + sleds: self.sleds, + omicron_zone_external_ips: self.omicron_zone_external_ips, + omicron_zone_nics: self.omicron_zone_nics, + } + } +} diff --git a/nexus/types/src/external_api/views.rs b/nexus/types/src/external_api/views.rs index f8997d6ff9..2ffe508b9a 100644 --- a/nexus/types/src/external_api/views.rs +++ b/nexus/types/src/external_api/views.rs @@ -604,6 +604,17 @@ impl SledPolicy { ] } + /// Returns true if the sled is in-service. + /// + /// Note that a sled being in service does not mean it's provisionable; most + /// consumers probably want `is_provisionable` instead. + pub fn is_in_service(&self) -> bool { + match self { + Self::InService { .. } => true, + Self::Expunged => false, + } + } + /// Returns true if the sled can have services provisioned on it. pub fn is_provisionable(&self) -> bool { match self { diff --git a/uuid-kinds/src/lib.rs b/uuid-kinds/src/lib.rs index 17aa803d13..b698b2a44a 100644 --- a/uuid-kinds/src/lib.rs +++ b/uuid-kinds/src/lib.rs @@ -49,6 +49,7 @@ impl_typed_uuid_kind! { DownstairsRegionKind => "downstairs_region", LoopbackAddressKind => "loopback_address", OmicronZoneKind => "service", + SledKind => "sled", TufRepoKind => "tuf_repo", UpstairsKind => "upstairs", UpstairsRepairKind => "upstairs_repair", From d2e5756ad1ea3bb601589614cd71b02c7000a2fb Mon Sep 17 00:00:00 2001 From: iliana etaoin Date: Thu, 4 Apr 2024 13:55:35 -0700 Subject: [PATCH 063/334] remove cargo registry patch for samael (#5421) Fixes the following warning introduced by #5408: ``` warning: Patch `samael v0.0.14 (https://github.com/oxidecomputer/samael?branch=oxide/omicron#9e609a8f)` was not used in the crate graph. Check that the patched package version and available features are compatible with the dependency requirements. If the patch has a different version from what is locked in the Cargo.lock file, run `cargo update` to use the new version. This may also occur with an optional dependency that is not enabled. ``` #5408 bumped samael to v0.0.15, which brought in the upstream fix for #4920, which is why we had the patch in the first place. --- Cargo.lock | 5 ----- Cargo.toml | 5 ----- 2 files changed, 10 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f8f52010f1..5c2d448f6c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -11544,8 +11544,3 @@ dependencies = [ "quote", "syn 1.0.109", ] - -[[patch.unused]] -name = "samael" -version = "0.0.14" -source = "git+https://github.com/oxidecomputer/samael?branch=oxide/omicron#9e609a8f6fa0dd84e3bb8f579f46bd780c8be62b" diff --git a/Cargo.toml b/Cargo.toml index 75e6120f9f..c74a43b961 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -660,11 +660,6 @@ branch = "oxide/omicron" [patch.crates-io.omicron-workspace-hack] path = "workspace-hack" -# Pulls in https://github.com/njaremko/samael/pull/41 -[patch.crates-io.samael] -git = "https://github.com/oxidecomputer/samael" -branch = "oxide/omicron" - # Several crates such as crucible and propolis have have a Git dependency on # this repo. Omicron itself depends on these crates, which can lead to two # copies of these crates in the dependency graph. (As a Git dependency, and as From 95c9b3d665b63d057c5811e88fbfeecb87999f6a Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Thu, 4 Apr 2024 15:07:33 -0700 Subject: [PATCH 064/334] chore(deps): update rust crate tokio to 1.37.0 (#5414) --- Cargo.lock | 4 ++-- Cargo.toml | 2 +- workspace-hack/Cargo.toml | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5c2d448f6c..ad95fe27ca 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -9769,9 +9769,9 @@ dependencies = [ [[package]] name = "tokio" -version = "1.36.0" +version = "1.37.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "61285f6515fa018fb2d1e46eb21223fff441ee8db5d0f1435e8ab4f5cdb80931" +checksum = "1adbebffeca75fcfd058afa480fb6c0b81e165a0323f9c9d39c9697e37c46787" dependencies = [ "backtrace", "bytes", diff --git a/Cargo.toml b/Cargo.toml index c74a43b961..e03bd2ecbe 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -421,7 +421,7 @@ textwrap = "0.16.1" test-strategy = "0.3.1" thiserror = "1.0" tofino = { git = "http://github.com/oxidecomputer/tofino", branch = "main" } -tokio = "1.36.0" +tokio = "1.37.0" tokio-postgres = { version = "0.7", features = [ "with-chrono-0_4", "with-uuid-1" ] } tokio-stream = "0.1.15" tokio-tungstenite = "0.20" diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index 29e1db8e4f..2261eb24df 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -101,7 +101,7 @@ subtle = { version = "2.5.0" } syn-dff4ba8e3ae991db = { package = "syn", version = "1.0.109", features = ["extra-traits", "fold", "full", "visit"] } syn-f595c2ba2a3f28df = { package = "syn", version = "2.0.52", features = ["extra-traits", "fold", "full", "visit", "visit-mut"] } time = { version = "0.3.34", features = ["formatting", "local-offset", "macros", "parsing"] } -tokio = { version = "1.36.0", features = ["full", "test-util"] } +tokio = { version = "1.37.0", features = ["full", "test-util"] } tokio-postgres = { version = "0.7.10", features = ["with-chrono-0_4", "with-serde_json-1", "with-uuid-1"] } tokio-stream = { version = "0.1.15", features = ["net"] } tokio-util = { version = "0.7.10", features = ["codec", "io-util"] } @@ -208,7 +208,7 @@ syn-dff4ba8e3ae991db = { package = "syn", version = "1.0.109", features = ["extr syn-f595c2ba2a3f28df = { package = "syn", version = "2.0.52", features = ["extra-traits", "fold", "full", "visit", "visit-mut"] } time = { version = "0.3.34", features = ["formatting", "local-offset", "macros", "parsing"] } time-macros = { version = "0.2.17", default-features = false, features = ["formatting", "parsing"] } -tokio = { version = "1.36.0", features = ["full", "test-util"] } +tokio = { version = "1.37.0", features = ["full", "test-util"] } tokio-postgres = { version = "0.7.10", features = ["with-chrono-0_4", "with-serde_json-1", "with-uuid-1"] } tokio-stream = { version = "0.1.15", features = ["net"] } tokio-util = { version = "0.7.10", features = ["codec", "io-util"] } From d7d70d8366ee106d49cc0c7bfae2c6aa499ecf3b Mon Sep 17 00:00:00 2001 From: Rain Date: Thu, 4 Apr 2024 17:55:31 -0700 Subject: [PATCH 065/334] [nexus] fix up some uses of blueprints that were using the Debug impl (#5413) Before we added the `display()` method, the way to get nicely formatted blueprints was to use the Debug impl. While working on #5211, I noticed that these callers were still using the old way to do things. Fix this up. I tested this by making the `Debug` impl for `Blueprint` panic, then saw which tests failed. I also realized that we didn't have tests for omdb's blueprints. I believe at the time it wasn't possible to write a test for this because we didn't have an initial blueprint -- but now we do, so include a test. (I also wanted to ensure that the actual blueprint ID was what we expected, so I expanded the scope of `redact_variable` a bit. Introduce `ExtraRedactions` so we can handle both fixed- and variable-length redactions, and use the same logic for UUIDs.) --- dev-tools/omdb/src/bin/omdb/nexus.rs | 2 +- dev-tools/omdb/tests/env.out | 12 +- dev-tools/omdb/tests/successes.out | 91 +++++++++---- dev-tools/omdb/tests/test_all_output.rs | 48 ++++--- .../tests/output/cmd-stdout | 50 +++---- .../reconfigurator-cli/tests/test_basic.rs | 2 +- nexus/reconfigurator/execution/src/dns.rs | 4 +- nexus/test-utils/src/lib.rs | 6 + test-utils/src/dev/test_cmds.rs | 127 +++++++++++++++--- 9 files changed, 250 insertions(+), 92 deletions(-) diff --git a/dev-tools/omdb/src/bin/omdb/nexus.rs b/dev-tools/omdb/src/bin/omdb/nexus.rs index bdcfe0cdc4..64b1ef7276 100644 --- a/dev-tools/omdb/src/bin/omdb/nexus.rs +++ b/dev-tools/omdb/src/bin/omdb/nexus.rs @@ -974,7 +974,7 @@ async fn cmd_nexus_blueprints_show( .blueprint_view(&args.blueprint_id) .await .with_context(|| format!("fetching blueprint {}", args.blueprint_id))?; - println!("{:?}", blueprint); + println!("{}", blueprint.display()); Ok(()) } diff --git a/dev-tools/omdb/tests/env.out b/dev-tools/omdb/tests/env.out index 512c05fc86..0e0a198f34 100644 --- a/dev-tools/omdb/tests/env.out +++ b/dev-tools/omdb/tests/env.out @@ -3,8 +3,8 @@ termination: Exited(0) --------------------------------------------- stdout: SERIAL IP ROLE ID -sim-039be560 [::1]:REDACTED_PORT scrimlet REDACTED_UUID_REDACTED_UUID_REDACTED -sim-b6d65341 [::1]:REDACTED_PORT scrimlet REDACTED_UUID_REDACTED_UUID_REDACTED +sim-039be560 [::1]:REDACTED_PORT scrimlet ..................... +sim-b6d65341 [::1]:REDACTED_PORT scrimlet ..................... --------------------------------------------- stderr: note: using database URL postgresql://root@[::1]:REDACTED_PORT/omicron?sslmode=disable @@ -293,8 +293,8 @@ termination: Exited(0) --------------------------------------------- stdout: SERIAL IP ROLE ID -sim-039be560 [::1]:REDACTED_PORT scrimlet REDACTED_UUID_REDACTED_UUID_REDACTED -sim-b6d65341 [::1]:REDACTED_PORT scrimlet REDACTED_UUID_REDACTED_UUID_REDACTED +sim-039be560 [::1]:REDACTED_PORT scrimlet ..................... +sim-b6d65341 [::1]:REDACTED_PORT scrimlet ..................... --------------------------------------------- stderr: note: database URL not specified. Will search DNS. @@ -307,8 +307,8 @@ termination: Exited(0) --------------------------------------------- stdout: SERIAL IP ROLE ID -sim-039be560 [::1]:REDACTED_PORT scrimlet REDACTED_UUID_REDACTED_UUID_REDACTED -sim-b6d65341 [::1]:REDACTED_PORT scrimlet REDACTED_UUID_REDACTED_UUID_REDACTED +sim-039be560 [::1]:REDACTED_PORT scrimlet ..................... +sim-b6d65341 [::1]:REDACTED_PORT scrimlet ..................... --------------------------------------------- stderr: note: database URL not specified. Will search DNS. diff --git a/dev-tools/omdb/tests/successes.out b/dev-tools/omdb/tests/successes.out index 6e25f7b3a3..ff19bbb9a7 100644 --- a/dev-tools/omdb/tests/successes.out +++ b/dev-tools/omdb/tests/successes.out @@ -26,7 +26,7 @@ termination: Exited(0) stdout: DNS zone: oxide-dev.test (External) requested version: 2 (created at ) -version created by Nexus: REDACTED_UUID_REDACTED_UUID_REDACTED +version created by Nexus: ..................... version created because: create silo: "test-suite-silo" changes: names added: 1, names removed: 0 @@ -58,7 +58,7 @@ stderr: note: using database URL postgresql://root@[::1]:REDACTED_PORT/omicron?sslmode=disable note: database schema version matches expected () ============================================= -EXECUTING COMMAND: omdb ["db", "reconfigurator-save", ""] +EXECUTING COMMAND: omdb ["db", "reconfigurator-save", ""] termination: Exited(0) --------------------------------------------- stdout: @@ -67,19 +67,19 @@ stderr: note: using database URL postgresql://root@[::1]:REDACTED_PORT/omicron?sslmode=disable note: database schema version matches expected () assembling reconfigurator state ... done -wrote +wrote ============================================= EXECUTING COMMAND: omdb ["db", "services", "list-instances"] termination: Exited(0) --------------------------------------------- stdout: SERVICE INSTANCE_ID ADDR SLED_SERIAL -CruciblePantry REDACTED_UUID_REDACTED_UUID_REDACTED [::1]:REDACTED_PORT sim-b6d65341 -ExternalDns REDACTED_UUID_REDACTED_UUID_REDACTED [::1]:REDACTED_PORT sim-b6d65341 -InternalDns REDACTED_UUID_REDACTED_UUID_REDACTED [::1]:REDACTED_PORT sim-b6d65341 -Nexus REDACTED_UUID_REDACTED_UUID_REDACTED [::ffff:127.0.0.1]:REDACTED_PORT sim-b6d65341 -Mgd REDACTED_UUID_REDACTED_UUID_REDACTED [::1]:REDACTED_PORT sim-039be560 -Mgd REDACTED_UUID_REDACTED_UUID_REDACTED [::1]:REDACTED_PORT sim-b6d65341 +CruciblePantry ..................... [::1]:REDACTED_PORT sim-b6d65341 +ExternalDns ..................... [::1]:REDACTED_PORT sim-b6d65341 +InternalDns ..................... [::1]:REDACTED_PORT sim-b6d65341 +Nexus ..................... [::ffff:127.0.0.1]:REDACTED_PORT sim-b6d65341 +Mgd ..................... [::1]:REDACTED_PORT sim-039be560 +Mgd ..................... [::1]:REDACTED_PORT sim-b6d65341 --------------------------------------------- stderr: note: using database URL postgresql://root@[::1]:REDACTED_PORT/omicron?sslmode=disable @@ -89,19 +89,19 @@ EXECUTING COMMAND: omdb ["db", "services", "list-by-sled"] termination: Exited(0) --------------------------------------------- stdout: -sled: sim-039be560 (id REDACTED_UUID_REDACTED_UUID_REDACTED) +sled: sim-039be560 (id .....................) SERVICE INSTANCE_ID ADDR - Mgd REDACTED_UUID_REDACTED_UUID_REDACTED [::1]:REDACTED_PORT + Mgd ..................... [::1]:REDACTED_PORT -sled: sim-b6d65341 (id REDACTED_UUID_REDACTED_UUID_REDACTED) +sled: sim-b6d65341 (id .....................) SERVICE INSTANCE_ID ADDR - CruciblePantry REDACTED_UUID_REDACTED_UUID_REDACTED [::1]:REDACTED_PORT - ExternalDns REDACTED_UUID_REDACTED_UUID_REDACTED [::1]:REDACTED_PORT - InternalDns REDACTED_UUID_REDACTED_UUID_REDACTED [::1]:REDACTED_PORT - Nexus REDACTED_UUID_REDACTED_UUID_REDACTED [::ffff:127.0.0.1]:REDACTED_PORT - Mgd REDACTED_UUID_REDACTED_UUID_REDACTED [::1]:REDACTED_PORT + CruciblePantry ..................... [::1]:REDACTED_PORT + ExternalDns ..................... [::1]:REDACTED_PORT + InternalDns ..................... [::1]:REDACTED_PORT + Nexus ..................... [::ffff:127.0.0.1]:REDACTED_PORT + Mgd ..................... [::1]:REDACTED_PORT --------------------------------------------- stderr: @@ -113,8 +113,8 @@ termination: Exited(0) --------------------------------------------- stdout: SERIAL IP ROLE ID -sim-039be560 [::1]:REDACTED_PORT scrimlet REDACTED_UUID_REDACTED_UUID_REDACTED -sim-b6d65341 [::1]:REDACTED_PORT scrimlet REDACTED_UUID_REDACTED_UUID_REDACTED +sim-039be560 [::1]:REDACTED_PORT scrimlet ..................... +sim-b6d65341 [::1]:REDACTED_PORT scrimlet ..................... --------------------------------------------- stderr: note: using database URL postgresql://root@[::1]:REDACTED_PORT/omicron?sslmode=disable @@ -405,12 +405,12 @@ task: "external_endpoints" external API endpoints: 2 ('*' below marks default) SILO_ID DNS_NAME - REDACTED_UUID_REDACTED_UUID_REDACTED default-silo.sys.oxide-dev.test - * REDACTED_UUID_REDACTED_UUID_REDACTED test-suite-silo.sys.oxide-dev.test + ..................... default-silo.sys.oxide-dev.test + * ..................... test-suite-silo.sys.oxide-dev.test warnings: 2 - warning: silo REDACTED_UUID_REDACTED_UUID_REDACTED with DNS name "default-silo.sys.oxide-dev.test" has no usable certificates - warning: silo REDACTED_UUID_REDACTED_UUID_REDACTED with DNS name "test-suite-silo.sys.oxide-dev.test" has no usable certificates + warning: silo ..................... with DNS name "default-silo.sys.oxide-dev.test" has no usable certificates + warning: silo ..................... with DNS name "test-suite-silo.sys.oxide-dev.test" has no usable certificates TLS certificates: 0 @@ -419,7 +419,7 @@ task: "inventory_collection" currently executing: no last completed activation: iter 3, triggered by an explicit signal started at (s ago) and ran for ms - last collection id: REDACTED_UUID_REDACTED_UUID_REDACTED + last collection id: ..................... last collection started: last collection done: @@ -464,3 +464,46 @@ warning: unknown background task: "switch_port_config_manager" (don't know how t stderr: note: using Nexus URL http://127.0.0.1:REDACTED_PORT/ ============================================= +EXECUTING COMMAND: omdb ["nexus", "blueprints", "list"] +termination: Exited(0) +--------------------------------------------- +stdout: +T ENA ID PARENT TIME_CREATED +* no ............. +--------------------------------------------- +stderr: +note: using Nexus URL http://127.0.0.1:REDACTED_PORT/ +============================================= +EXECUTING COMMAND: omdb ["nexus", "blueprints", "show", "............."] +termination: Exited(0) +--------------------------------------------- +stdout: +blueprint ............. +parent: + + ----------------------------------------------------------------------------------------- + zone type zone ID disposition underlay IP + ----------------------------------------------------------------------------------------- + + sled .....................: zones at generation 2 + (no zones) + + sled .....................: zones at generation 2 + clickhouse ..................... in service ::1 + cockroach_db ..................... in service ::1 + crucible_pantry ..................... in service ::1 + external_dns ..................... in service ::1 + internal_dns ..................... in service ::1 + nexus ..................... in service ::ffff:127.0.0.1 + +METADATA: + created by: nexus-test-utils + created at: + comment: initial test blueprint + internal DNS version: 1 + external DNS version: 2 + +--------------------------------------------- +stderr: +note: using Nexus URL http://127.0.0.1:REDACTED_PORT/ +============================================= diff --git a/dev-tools/omdb/tests/test_all_output.rs b/dev-tools/omdb/tests/test_all_output.rs index 2c16cc1482..ca24637040 100644 --- a/dev-tools/omdb/tests/test_all_output.rs +++ b/dev-tools/omdb/tests/test_all_output.rs @@ -12,8 +12,9 @@ use nexus_test_utils_macros::nexus_test; use nexus_types::deployment::SledFilter; use nexus_types::deployment::UnstableReconfiguratorState; use omicron_test_utils::dev::test_cmds::path_to_executable; -use omicron_test_utils::dev::test_cmds::redact_variable; +use omicron_test_utils::dev::test_cmds::redact_extra; use omicron_test_utils::dev::test_cmds::run_command; +use omicron_test_utils::dev::test_cmds::ExtraRedactions; use slog_error_chain::InlineErrorChain; use std::fmt::Write; use std::path::Path; @@ -57,7 +58,7 @@ async fn test_omdb_usage_errors() { ]; for args in invocations { - do_run(&mut output, |exec| exec, &cmd_path, args, &[]).await; + do_run(&mut output, |exec| exec, &cmd_path, args).await; } assert_contents("tests/usage_errors.out", &output); @@ -78,7 +79,10 @@ async fn test_omdb_success_cases(cptestctx: &ControlPlaneTestContext) { let tmpdir = camino_tempfile::tempdir() .expect("failed to create temporary directory"); let tmppath = tmpdir.path().join("reconfigurator-save.out"); + let initial_blueprint_id = cptestctx.initial_blueprint_id.to_string(); + let mut output = String::new(); + let invocations: &[&[&str]] = &[ &["db", "disks", "list"], &["db", "dns", "show"], @@ -92,6 +96,8 @@ async fn test_omdb_success_cases(cptestctx: &ControlPlaneTestContext) { &["mgs", "inventory"], &["nexus", "background-tasks", "doc"], &["nexus", "background-tasks", "show"], + &["nexus", "blueprints", "list"], + &["nexus", "blueprints", "show", &initial_blueprint_id], // We can't easily test the sled agent output because that's only // provided by a real sled agent, which is not available in the // ControlPlaneTestContext. @@ -102,7 +108,7 @@ async fn test_omdb_success_cases(cptestctx: &ControlPlaneTestContext) { let p = postgres_url.to_string(); let u = nexus_internal_url.clone(); let g = mgs_url.clone(); - do_run( + do_run_extra( &mut output, move |exec| { exec.env("OMDB_DB_URL", &p) @@ -111,7 +117,9 @@ async fn test_omdb_success_cases(cptestctx: &ControlPlaneTestContext) { }, &cmd_path, args, - &[tmppath.as_str()], + ExtraRedactions::new() + .variable_length("tmp_path", tmppath.as_str()) + .fixed_length("blueprint_id", &initial_blueprint_id), ) .await; } @@ -170,7 +178,7 @@ async fn test_omdb_env_settings(cptestctx: &ControlPlaneTestContext) { // Database URL // Case 1: specified on the command line let args = &["db", "--db-url", &postgres_url, "sleds"]; - do_run(&mut output, |exec| exec, &cmd_path, args, &[]).await; + do_run(&mut output, |exec| exec, &cmd_path, args).await; // Case 2: specified in multiple places (command-line argument wins) let args = &["db", "--db-url", "junk", "sleds"]; @@ -180,7 +188,6 @@ async fn test_omdb_env_settings(cptestctx: &ControlPlaneTestContext) { move |exec| exec.env("OMDB_DB_URL", &p), &cmd_path, args, - &[], ) .await; @@ -193,7 +200,7 @@ async fn test_omdb_env_settings(cptestctx: &ControlPlaneTestContext) { "background-tasks", "doc", ]; - do_run(&mut output, |exec| exec, &cmd_path.clone(), args, &[]).await; + do_run(&mut output, |exec| exec, &cmd_path.clone(), args).await; // Case 2: specified in multiple places (command-line argument wins) let args = @@ -204,7 +211,6 @@ async fn test_omdb_env_settings(cptestctx: &ControlPlaneTestContext) { move |exec| exec.env("OMDB_NEXUS_URL", &n), &cmd_path, args, - &[], ) .await; @@ -217,7 +223,6 @@ async fn test_omdb_env_settings(cptestctx: &ControlPlaneTestContext) { move |exec| exec.env("OMDB_DNS_SERVER", dns_sockaddr.to_string()), &cmd_path, args, - &[], ) .await; @@ -228,7 +233,7 @@ async fn test_omdb_env_settings(cptestctx: &ControlPlaneTestContext) { "background-tasks", "doc", ]; - do_run(&mut output, move |exec| exec, &cmd_path, args, &[]).await; + do_run(&mut output, move |exec| exec, &cmd_path, args).await; let args = &["db", "sleds"]; do_run( @@ -236,12 +241,11 @@ async fn test_omdb_env_settings(cptestctx: &ControlPlaneTestContext) { move |exec| exec.env("OMDB_DNS_SERVER", dns_sockaddr.to_string()), &cmd_path, args, - &[], ) .await; let args = &["--dns-server", &dns_sockaddr.to_string(), "db", "sleds"]; - do_run(&mut output, move |exec| exec, &cmd_path, args, &[]).await; + do_run(&mut output, move |exec| exec, &cmd_path, args).await; assert_contents("tests/env.out", &output); } @@ -251,7 +255,19 @@ async fn do_run( modexec: F, cmd_path: &Path, args: &[&str], - extra_redactions: &[&str], +) where + F: FnOnce(Exec) -> Exec + Send + 'static, +{ + do_run_extra(output, modexec, cmd_path, args, &ExtraRedactions::new()) + .await; +} + +async fn do_run_extra( + output: &mut String, + modexec: F, + cmd_path: &Path, + args: &[&str], + extra_redactions: &ExtraRedactions<'_>, ) where F: FnOnce(Exec) -> Exec + Send + 'static, { @@ -261,7 +277,7 @@ async fn do_run( "EXECUTING COMMAND: {} {:?}\n", cmd_path.file_name().expect("missing command").to_string_lossy(), args.iter() - .map(|r| redact_variable(r, extra_redactions)) + .map(|r| redact_extra(r, extra_redactions)) .collect::>(), ) .unwrap(); @@ -294,9 +310,9 @@ async fn do_run( write!(output, "termination: {:?}\n", exit_status).unwrap(); write!(output, "---------------------------------------------\n").unwrap(); write!(output, "stdout:\n").unwrap(); - output.push_str(&redact_variable(&stdout_text, extra_redactions)); + output.push_str(&redact_extra(&stdout_text, extra_redactions)); write!(output, "---------------------------------------------\n").unwrap(); write!(output, "stderr:\n").unwrap(); - output.push_str(&redact_variable(&stderr_text, extra_redactions)); + output.push_str(&redact_extra(&stderr_text, extra_redactions)); write!(output, "=============================================\n").unwrap(); } diff --git a/dev-tools/reconfigurator-cli/tests/output/cmd-stdout b/dev-tools/reconfigurator-cli/tests/output/cmd-stdout index 10b158f218..7bedb54bf9 100644 --- a/dev-tools/reconfigurator-cli/tests/output/cmd-stdout +++ b/dev-tools/reconfigurator-cli/tests/output/cmd-stdout @@ -9,50 +9,50 @@ ID > -> sled-show REDACTED_UUID_REDACTED_UUID_REDACTED -error: no sled with id REDACTED_UUID_REDACTED_UUID_REDACTED +> sled-show ..................... +error: no sled with id ..................... -> sled-add REDACTED_UUID_REDACTED_UUID_REDACTED +> sled-add ..................... added sled > sled-list ID NZPOOLS SUBNET -REDACTED_UUID_REDACTED_UUID_REDACTED 10 fd00:1122:3344:101::/64 +..................... 10 fd00:1122:3344:101::/64 -> sled-show REDACTED_UUID_REDACTED_UUID_REDACTED -sled REDACTED_UUID_REDACTED_UUID_REDACTED +> sled-show ..................... +sled ..................... subnet fd00:1122:3344:101::/64 zpools (10): - ZpoolName("oxp_REDACTED_UUID_REDACTED_UUID_REDACTED") - ZpoolName("oxp_REDACTED_UUID_REDACTED_UUID_REDACTED") - ZpoolName("oxp_REDACTED_UUID_REDACTED_UUID_REDACTED") - ZpoolName("oxp_REDACTED_UUID_REDACTED_UUID_REDACTED") - ZpoolName("oxp_REDACTED_UUID_REDACTED_UUID_REDACTED") - ZpoolName("oxp_REDACTED_UUID_REDACTED_UUID_REDACTED") - ZpoolName("oxp_REDACTED_UUID_REDACTED_UUID_REDACTED") - ZpoolName("oxp_REDACTED_UUID_REDACTED_UUID_REDACTED") - ZpoolName("oxp_REDACTED_UUID_REDACTED_UUID_REDACTED") - ZpoolName("oxp_REDACTED_UUID_REDACTED_UUID_REDACTED") - - -> sled-add REDACTED_UUID_REDACTED_UUID_REDACTED + ZpoolName("oxp_.....................") + ZpoolName("oxp_.....................") + ZpoolName("oxp_.....................") + ZpoolName("oxp_.....................") + ZpoolName("oxp_.....................") + ZpoolName("oxp_.....................") + ZpoolName("oxp_.....................") + ZpoolName("oxp_.....................") + ZpoolName("oxp_.....................") + ZpoolName("oxp_.....................") + + +> sled-add ..................... added sled -> sled-add REDACTED_UUID_REDACTED_UUID_REDACTED +> sled-add ..................... added sled > sled-list ID NZPOOLS SUBNET -REDACTED_UUID_REDACTED_UUID_REDACTED 10 fd00:1122:3344:103::/64 -REDACTED_UUID_REDACTED_UUID_REDACTED 10 fd00:1122:3344:102::/64 -REDACTED_UUID_REDACTED_UUID_REDACTED 10 fd00:1122:3344:101::/64 +..................... 10 fd00:1122:3344:103::/64 +..................... 10 fd00:1122:3344:102::/64 +..................... 10 fd00:1122:3344:101::/64 > > inventory-generate -generated inventory collection REDACTED_UUID_REDACTED_UUID_REDACTED from configured sleds +generated inventory collection ..................... from configured sleds > inventory-list ID NERRORS TIME_DONE -REDACTED_UUID_REDACTED_UUID_REDACTED 0 +..................... 0 diff --git a/dev-tools/reconfigurator-cli/tests/test_basic.rs b/dev-tools/reconfigurator-cli/tests/test_basic.rs index 5502d954b4..87450181ec 100644 --- a/dev-tools/reconfigurator-cli/tests/test_basic.rs +++ b/dev-tools/reconfigurator-cli/tests/test_basic.rs @@ -42,7 +42,7 @@ fn test_basic() { let exec = Exec::cmd(path_to_cli()).arg("tests/input/cmds.txt"); let (exit_status, stdout_text, stderr_text) = run_command(exec); assert_exit_code(exit_status, EXIT_SUCCESS, &stderr_text); - let stdout_text = redact_variable(&stdout_text, &[]); + let stdout_text = redact_variable(&stdout_text); assert_contents("tests/output/cmd-stdout", &stdout_text); assert_contents("tests/output/cmd-stderr", &stderr_text); } diff --git a/nexus/reconfigurator/execution/src/dns.rs b/nexus/reconfigurator/execution/src/dns.rs index 3cb963ae62..13b21f9961 100644 --- a/nexus/reconfigurator/execution/src/dns.rs +++ b/nexus/reconfigurator/execution/src/dns.rs @@ -1139,7 +1139,7 @@ mod test { .await .expect("failed to read current target blueprint") .expect("no target blueprint set"); - eprintln!("blueprint: {:?}", blueprint); + eprintln!("blueprint: {}", blueprint.display()); // Now, execute the initial blueprint. let overrides = Overridables::for_test(cptestctx); @@ -1239,7 +1239,7 @@ mod test { .unwrap(); assert_eq!(rv, EnsureMultiple::Added(1)); let blueprint2 = builder.build(); - eprintln!("blueprint2: {:?}", blueprint2); + eprintln!("blueprint2: {}", blueprint2.display()); // Figure out the id of the new zone. let zones_before = blueprint .all_omicron_zones() diff --git a/nexus/test-utils/src/lib.rs b/nexus/test-utils/src/lib.rs index 81814efda3..c1acdc1848 100644 --- a/nexus/test-utils/src/lib.rs +++ b/nexus/test-utils/src/lib.rs @@ -114,6 +114,7 @@ pub struct ControlPlaneTestContext { pub external_dns_zone_name: String, pub external_dns: dns_server::TransientServer, pub internal_dns: dns_server::TransientServer, + pub initial_blueprint_id: Uuid, pub silo_name: Name, pub user_name: UserId, } @@ -299,6 +300,7 @@ pub struct ControlPlaneTestContextBuilder<'a, N: NexusServer> { pub external_dns: Option, pub internal_dns: Option, dns_config: Option, + initial_blueprint_id: Option, omicron_zones: Vec, omicron_zones2: Vec, @@ -343,6 +345,7 @@ impl<'a, N: NexusServer> ControlPlaneTestContextBuilder<'a, N> { external_dns: None, internal_dns: None, dns_config: None, + initial_blueprint_id: None, omicron_zones: Vec::new(), omicron_zones2: Vec::new(), silo_name: None, @@ -836,6 +839,8 @@ impl<'a, N: NexusServer> ControlPlaneTestContextBuilder<'a, N> { } }; + self.initial_blueprint_id = Some(blueprint.id); + // Handoff all known service information to Nexus let server = N::start( self.nexus_internal @@ -1124,6 +1129,7 @@ impl<'a, N: NexusServer> ControlPlaneTestContextBuilder<'a, N> { external_dns_zone_name: self.external_dns_zone_name.unwrap(), external_dns: self.external_dns.unwrap(), internal_dns: self.internal_dns.unwrap(), + initial_blueprint_id: self.initial_blueprint_id.unwrap(), silo_name: self.silo_name.unwrap(), user_name: self.user_name.unwrap(), } diff --git a/test-utils/src/dev/test_cmds.rs b/test-utils/src/dev/test_cmds.rs index 6500eaddfd..5ef2da672b 100644 --- a/test-utils/src/dev/test_cmds.rs +++ b/test-utils/src/dev/test_cmds.rs @@ -125,21 +125,12 @@ pub fn error_for_enoent() -> String { /// invocation to invocation (e.g., assigned TCP port numbers, timestamps) /// /// This allows use to use expectorate to verify the shape of the CLI output. -pub fn redact_variable(input: &str, extra_redactions: &[&str]) -> String { - // Perform extra redactions at the beginning, not the end. This is because - // some of the built-in redactions below might match a substring of - // something that should be handled by extra_redactions (e.g. a temporary - // path). - let mut s = input.to_owned(); - for r in extra_redactions { - s = s.replace(r, ""); - } - +pub fn redact_variable(input: &str) -> String { // Replace TCP port numbers. We include the localhost characters to avoid // catching any random sequence of numbers. let s = regex::Regex::new(r"\[::1\]:\d{4,5}") .unwrap() - .replace_all(&s, "[::1]:REDACTED_PORT") + .replace_all(&input, "[::1]:REDACTED_PORT") .to_string(); let s = regex::Regex::new(r"\[::ffff:127.0.0.1\]:\d{4,5}") .unwrap() @@ -151,12 +142,16 @@ pub fn redact_variable(input: &str, extra_redactions: &[&str]) -> String { .to_string(); // Replace uuids. + // + // The length of a UUID is 32 nibbles for the hex encoding of a u128 + 4 + // dashes = 36. + const UUID_LEN: usize = 36; let s = regex::Regex::new( "[a-zA-Z0-9]{8}-[a-zA-Z0-9]{4}-[a-zA-Z0-9]{4}-\ [a-zA-Z0-9]{4}-[a-zA-Z0-9]{12}", ) .unwrap() - .replace_all(&s, "REDACTED_UUID_REDACTED_UUID_REDACTED") + .replace_all(&s, fill_redaction_text("uuid", UUID_LEN)) .to_string(); // Replace timestamps. @@ -196,15 +191,113 @@ pub fn redact_variable(input: &str, extra_redactions: &[&str]) -> String { s } +/// Redact text from a string, allowing for extra redactions to be specified. +pub fn redact_extra( + input: &str, + extra_redactions: &ExtraRedactions<'_>, +) -> String { + // Perform extra redactions at the beginning, not the end. This is because + // some of the built-in redactions in redact_variable might match a + // substring of something that should be handled by extra_redactions (e.g. + // a temporary path). + let mut s = input.to_owned(); + for (name, replacement) in &extra_redactions.redactions { + s = s.replace(name, replacement); + } + redact_variable(&s) +} + +/// Represents a list of extra redactions for [`redact_variable`]. +/// +/// Extra redactions are applied in-order, before any builtin redactions. +#[derive(Clone, Debug, Default)] +pub struct ExtraRedactions<'a> { + // A pair of redaction and replacement strings. + redactions: Vec<(&'a str, String)>, +} + +impl<'a> ExtraRedactions<'a> { + pub fn new() -> Self { + Self { redactions: Vec::new() } + } + + pub fn fixed_length( + &mut self, + name: &str, + text_to_redact: &'a str, + ) -> &mut Self { + // Use the same number of chars as the number of bytes in + // text_to_redact. We're almost entirely in ASCII-land so they're the + // same, and getting the length right is nice but doesn't matter for + // correctness. + // + // A technically more correct impl would use unicode-width, but ehhh. + let replacement = fill_redaction_text(name, text_to_redact.len()); + self.redactions.push((text_to_redact, replacement)); + self + } + + pub fn variable_length( + &mut self, + name: &str, + text_to_redact: &'a str, + ) -> &mut Self { + let gen = format!("<{}_REDACTED>", name.to_uppercase()); + let replacement = gen.to_string(); + + self.redactions.push((text_to_redact, replacement)); + self + } +} + +fn fill_redaction_text(name: &str, text_to_redact_len: usize) -> String { + // The overall plan is to generate a string of the form + // ------, depending on the length of the text to + // redact. + // + // * Always include the < > signs for clarity, and either shorten the + // text or add dashes to compensate for the length. + + let base = format!("REDACTED_{}", name.to_uppercase()); + + let text_len_minus_2 = text_to_redact_len.saturating_sub(2); + + let replacement = if text_len_minus_2 <= base.len() { + // Shorten the base string to fit the text. + format!("<{:.width$}>", base, width = text_len_minus_2) + } else { + // Add dashes on both sides to make up the difference. + let dash_len = text_len_minus_2 - base.len(); + format!( + "{}<{base}>{}", + ".".repeat(dash_len / 2), + ".".repeat(dash_len - dash_len / 2) + ) + }; + replacement +} + #[cfg(test)] mod tests { use super::*; #[test] - fn test_redact_variable() { - // Ens - let input = "time: 123ms, path: /var/tmp/tmp.456ms123s"; - let actual = redact_variable(input, &["/var/tmp/tmp.456ms123s"]); - assert_eq!(actual, "time: ms, path: "); + fn test_redact_extra() { + let input = "time: 123ms, path: /var/tmp/tmp.456ms123s, \ + path2: /short, \ + path3: /variable-length/path"; + let actual = redact_extra( + input, + ExtraRedactions::new() + .fixed_length("tp", "/var/tmp/tmp.456ms123s") + .fixed_length("short_redact", "/short") + .variable_length("variable", "/variable-length/path"), + ); + assert_eq!( + actual, + "time: ms, path: ........., \ + path2: , \ + path3: " + ); } } From 03432f6b8f662317abb5e9b9a9b56d3537b3f05c Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Fri, 5 Apr 2024 04:55:38 +0000 Subject: [PATCH 066/334] chore(deps): update taiki-e/install-action digest to 882330f (#5424) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR contains the following updates: | Package | Type | Update | Change | |---|---|---|---| | [taiki-e/install-action](https://togithub.com/taiki-e/install-action) | action | digest | [`834a7b9` -> `882330f`](https://togithub.com/taiki-e/install-action/compare/834a7b9...882330f) | --- ### Configuration 📅 **Schedule**: Branch creation - "after 8pm,before 6am" in timezone America/Los_Angeles, Automerge - "after 8pm,before 6am" in timezone America/Los_Angeles. 🚦 **Automerge**: Enabled. ♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. 🔕 **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box --- This PR has been generated by [Renovate Bot](https://togithub.com/renovatebot/renovate). Co-authored-by: oxide-renovate[bot] <146848827+oxide-renovate[bot]@users.noreply.github.com> --- .github/workflows/hakari.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/hakari.yml b/.github/workflows/hakari.yml index f1114ee128..5443582cc0 100644 --- a/.github/workflows/hakari.yml +++ b/.github/workflows/hakari.yml @@ -24,7 +24,7 @@ jobs: with: toolchain: stable - name: Install cargo-hakari - uses: taiki-e/install-action@834a7b93e0c678fb40309ee0e36546336d5c6ea7 # v2 + uses: taiki-e/install-action@882330fb2472a0660b5c990be7c5ccd3cbdf3282 # v2 with: tool: cargo-hakari - name: Check workspace-hack Cargo.toml is up-to-date From 245ec2b7bfb067e517fff6ca6177fb9fd92ed620 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Fri, 5 Apr 2024 09:32:27 -0700 Subject: [PATCH 067/334] Remove image population bash scripts (#5428) These are wrappers around the CLI, but they pull from catacomb. Though these APIs are still supported, I recall this largely being a helper mechanism for demos. I propose removing them. --- tools/populate/populate-alpine.sh | 29 -------- tools/populate/populate-images.sh | 110 ------------------------------ 2 files changed, 139 deletions(-) delete mode 100755 tools/populate/populate-alpine.sh delete mode 100755 tools/populate/populate-images.sh diff --git a/tools/populate/populate-alpine.sh b/tools/populate/populate-alpine.sh deleted file mode 100755 index 63a3400ee8..0000000000 --- a/tools/populate/populate-alpine.sh +++ /dev/null @@ -1,29 +0,0 @@ -#!/usr/bin/env bash -# Simple script to install the alpine image included with propolis. - -if ! oxide api /v1/images > /dev/null; then - echo "Problem detected running the oxide CLI" - echo "Please install, set path, or setup authorization" - exit 1 -fi - -oxide api /v1/images --method POST --input - < \"tail \\\$(svcs -L nexus)\"" - exit 1 -fi diff --git a/tools/populate/populate-images.sh b/tools/populate/populate-images.sh deleted file mode 100755 index 6580341aae..0000000000 --- a/tools/populate/populate-images.sh +++ /dev/null @@ -1,110 +0,0 @@ -#!/usr/bin/env bash -# Populate an Oxide host running Omicron with images from server catacomb. -# -# Note that the default tunnel IP of `fd00:...` will only be available _after_ -# launching the control plane with `omicron-package install`, since Omicron -# creates that address. - -set -eu -CATACOMB_TUNNEL="${CATACOMB_TUNNEL:-"[fd00:1122:3344:101::1]:54321"}" -echo "Populating debian" -oxide api /v1/images --method POST --input - < Date: Fri, 5 Apr 2024 10:13:44 -0700 Subject: [PATCH 068/334] Refactor xtask commands into separate files (#5422) This is a no-op PR in preparation for https://github.com/oxidecomputer/omicron/pull/5423 . I want to split xtasks into separate files so it's easier to see what's what. --- dev-tools/xtask/src/check_workspace_deps.rs | 124 ++++++++++ dev-tools/xtask/src/clippy.rs | 77 +++++++ dev-tools/xtask/src/main.rs | 211 ++---------------- .../src/{illumos.rs => verify_libraries.rs} | 2 +- 4 files changed, 216 insertions(+), 198 deletions(-) create mode 100644 dev-tools/xtask/src/check_workspace_deps.rs create mode 100644 dev-tools/xtask/src/clippy.rs rename dev-tools/xtask/src/{illumos.rs => verify_libraries.rs} (99%) diff --git a/dev-tools/xtask/src/check_workspace_deps.rs b/dev-tools/xtask/src/check_workspace_deps.rs new file mode 100644 index 0000000000..6e84380c69 --- /dev/null +++ b/dev-tools/xtask/src/check_workspace_deps.rs @@ -0,0 +1,124 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Subcommand: cargo xtask check-workspace-deps + +use anyhow::{bail, Context, Result}; +use camino::Utf8Path; +use cargo_toml::{Dependency, Manifest}; +use fs_err as fs; +use std::collections::BTreeMap; + +const WORKSPACE_HACK_PACKAGE_NAME: &str = "omicron-workspace-hack"; + +pub fn run_cmd() -> Result<()> { + // Ignore issues with "pq-sys". See the omicron-rpaths package for details. + const EXCLUDED: &[&'static str] = &["pq-sys"]; + + // Collect a list of all packages used in any workspace package as a + // workspace dependency. + let mut workspace_dependencies = BTreeMap::new(); + + // Collect a list of all packages used in any workspace package as a + // NON-workspace dependency. + let mut non_workspace_dependencies = BTreeMap::new(); + + // Load information about the Cargo workspace. + let workspace = crate::load_workspace()?; + let mut nwarnings = 0; + let mut nerrors = 0; + + // Iterate the workspace packages and fill out the maps above. + for pkg_info in workspace.workspace_packages() { + if pkg_info.name == WORKSPACE_HACK_PACKAGE_NAME { + // Skip over workspace-hack because hakari doesn't yet support + // workspace deps: https://github.com/guppy-rs/guppy/issues/7 + continue; + } + + let manifest_path = &pkg_info.manifest_path; + let manifest = read_cargo_toml(manifest_path)?; + for tree in [ + &manifest.dependencies, + &manifest.dev_dependencies, + &manifest.build_dependencies, + ] { + for (name, dep) in tree { + if let Dependency::Inherited(inherited) = dep { + if inherited.workspace { + workspace_dependencies + .entry(name.to_owned()) + .or_insert_with(Vec::new) + .push(pkg_info.name.clone()); + + if !inherited.features.is_empty() { + eprintln!( + "warning: package is used as a workspace dep \ + with extra features: {:?} (in {:?})", + name, pkg_info.name, + ); + nwarnings += 1; + } + + continue; + } + } + + non_workspace_dependencies + .entry(name.to_owned()) + .or_insert_with(Vec::new) + .push(pkg_info.name.clone()); + } + } + } + + // Look for any packages that are used as both a workspace dependency and a + // non-workspace dependency. Generally, the non-workspace dependency should + // be replaced with a workspace dependency. + for (pkgname, ws_examples) in &workspace_dependencies { + if let Some(non_ws_examples) = non_workspace_dependencies.get(pkgname) { + eprintln!( + "error: package is used as both a workspace dep and a \ + non-workspace dep: {:?}", + pkgname + ); + eprintln!(" workspace dep: {}", ws_examples.join(", ")); + eprintln!(" non-workspace dep: {}", non_ws_examples.join(", ")); + nerrors += 1; + } + } + + // Look for any packages used as non-workspace dependencies by more than one + // workspace package. These should generally be moved to a workspace + // dependency. + for (pkgname, examples) in + non_workspace_dependencies.iter().filter(|(pkgname, examples)| { + examples.len() > 1 && !EXCLUDED.contains(&pkgname.as_str()) + }) + { + eprintln!( + "error: package is used by multiple workspace packages without \ + a workspace dependency: {:?}", + pkgname + ); + eprintln!(" used in: {}", examples.join(", ")); + nerrors += 1; + } + + eprintln!( + "check-workspace-deps: errors: {}, warnings: {}", + nerrors, nwarnings + ); + + if nerrors != 0 { + bail!("errors with workspace dependencies"); + } + + Ok(()) +} + +fn read_cargo_toml(path: &Utf8Path) -> Result { + let bytes = fs::read(path)?; + Manifest::from_slice(&bytes).with_context(|| format!("parse {:?}", path)) +} diff --git a/dev-tools/xtask/src/clippy.rs b/dev-tools/xtask/src/clippy.rs new file mode 100644 index 0000000000..02a71dc5de --- /dev/null +++ b/dev-tools/xtask/src/clippy.rs @@ -0,0 +1,77 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Subcommand: cargo xtask clippy + +use anyhow::{bail, Context, Result}; +use clap::Parser; +use std::process::Command; + +#[derive(Parser)] +pub struct ClippyArgs { + /// Automatically apply lint suggestions. + #[clap(long)] + fix: bool, +} + +pub fn run_cmd(args: ClippyArgs) -> Result<()> { + let cargo = + std::env::var("CARGO").unwrap_or_else(|_| String::from("cargo")); + let mut command = Command::new(&cargo); + command.arg("clippy"); + + if args.fix { + command.arg("--fix"); + } + + command + // Make sure we check everything. + .arg("--all-targets") + .arg("--") + // For a list of lints, see + // https://rust-lang.github.io/rust-clippy/master. + // + // We disallow warnings by default. + .arg("--deny") + .arg("warnings") + // Clippy's style nits are useful, but not worth keeping in CI. This + // override belongs in src/lib.rs, and it is there, but that doesn't + // reliably work due to rust-lang/rust-clippy#6610. + .arg("--allow") + .arg("clippy::style") + // But continue to warn on anything in the "disallowed_" namespace. + // (These will be turned into errors by `--deny warnings` above.) + .arg("--warn") + .arg("clippy::disallowed_macros") + .arg("--warn") + .arg("clippy::disallowed_methods") + .arg("--warn") + .arg("clippy::disallowed_names") + .arg("--warn") + .arg("clippy::disallowed_script_idents") + .arg("--warn") + .arg("clippy::disallowed_types"); + + eprintln!( + "running: {:?} {}", + &cargo, + command + .get_args() + .map(|arg| format!("{:?}", arg.to_str().unwrap())) + .collect::>() + .join(" ") + ); + + let exit_status = command + .spawn() + .context("failed to spawn child process")? + .wait() + .context("failed to wait for child process")?; + + if !exit_status.success() { + bail!("clippy failed: {}", exit_status); + } + + Ok(()) +} diff --git a/dev-tools/xtask/src/main.rs b/dev-tools/xtask/src/main.rs index c682fc247e..c0d8a6aa64 100644 --- a/dev-tools/xtask/src/main.rs +++ b/dev-tools/xtask/src/main.rs @@ -6,18 +6,14 @@ //! //! See . -use anyhow::{bail, Context, Result}; -use camino::Utf8Path; +use anyhow::{Context, Result}; use cargo_metadata::Metadata; -use cargo_toml::{Dependency, Manifest}; use clap::{Parser, Subcommand}; -use fs_err as fs; -use std::{collections::BTreeMap, process::Command}; +mod check_workspace_deps; +mod clippy; #[cfg(target_os = "illumos")] -mod illumos; -#[cfg(target_os = "illumos")] -use illumos::cmd_verify_libraries; +mod verify_libraries; #[derive(Parser)] #[command(name = "cargo xtask", about = "Workspace-related developer tools")] @@ -32,208 +28,29 @@ enum Cmds { /// workspace CheckWorkspaceDeps, /// Run configured clippy checks - Clippy(ClippyArgs), + Clippy(clippy::ClippyArgs), /// Verify we are not leaking library bindings outside of intended /// crates VerifyLibraries, } -#[derive(Parser)] -struct ClippyArgs { - /// Automatically apply lint suggestions. - #[clap(long)] - fix: bool, -} - fn main() -> Result<()> { let args = Args::parse(); match args.cmd { - Cmds::Clippy(args) => cmd_clippy(args), - Cmds::CheckWorkspaceDeps => cmd_check_workspace_deps(), - Cmds::VerifyLibraries => cmd_verify_libraries(), - } -} - -fn cmd_clippy(args: ClippyArgs) -> Result<()> { - let cargo = - std::env::var("CARGO").unwrap_or_else(|_| String::from("cargo")); - let mut command = Command::new(&cargo); - command.arg("clippy"); - - if args.fix { - command.arg("--fix"); - } - - command - // Make sure we check everything. - .arg("--all-targets") - .arg("--") - // For a list of lints, see - // https://rust-lang.github.io/rust-clippy/master. - // - // We disallow warnings by default. - .arg("--deny") - .arg("warnings") - // Clippy's style nits are useful, but not worth keeping in CI. This - // override belongs in src/lib.rs, and it is there, but that doesn't - // reliably work due to rust-lang/rust-clippy#6610. - .arg("--allow") - .arg("clippy::style") - // But continue to warn on anything in the "disallowed_" namespace. - // (These will be turned into errors by `--deny warnings` above.) - .arg("--warn") - .arg("clippy::disallowed_macros") - .arg("--warn") - .arg("clippy::disallowed_methods") - .arg("--warn") - .arg("clippy::disallowed_names") - .arg("--warn") - .arg("clippy::disallowed_script_idents") - .arg("--warn") - .arg("clippy::disallowed_types"); - - eprintln!( - "running: {:?} {}", - &cargo, - command - .get_args() - .map(|arg| format!("{:?}", arg.to_str().unwrap())) - .collect::>() - .join(" ") - ); - - let exit_status = command - .spawn() - .context("failed to spawn child process")? - .wait() - .context("failed to wait for child process")?; - - if !exit_status.success() { - bail!("clippy failed: {}", exit_status); - } - - Ok(()) -} - -const WORKSPACE_HACK_PACKAGE_NAME: &str = "omicron-workspace-hack"; - -fn cmd_check_workspace_deps() -> Result<()> { - // Ignore issues with "pq-sys". See the omicron-rpaths package for details. - const EXCLUDED: &[&'static str] = &["pq-sys"]; - - // Collect a list of all packages used in any workspace package as a - // workspace dependency. - let mut workspace_dependencies = BTreeMap::new(); - - // Collect a list of all packages used in any workspace package as a - // NON-workspace dependency. - let mut non_workspace_dependencies = BTreeMap::new(); - - // Load information about the Cargo workspace. - let workspace = load_workspace()?; - let mut nwarnings = 0; - let mut nerrors = 0; - - // Iterate the workspace packages and fill out the maps above. - for pkg_info in workspace.workspace_packages() { - if pkg_info.name == WORKSPACE_HACK_PACKAGE_NAME { - // Skip over workspace-hack because hakari doesn't yet support - // workspace deps: https://github.com/guppy-rs/guppy/issues/7 - continue; - } - - let manifest_path = &pkg_info.manifest_path; - let manifest = read_cargo_toml(manifest_path)?; - for tree in [ - &manifest.dependencies, - &manifest.dev_dependencies, - &manifest.build_dependencies, - ] { - for (name, dep) in tree { - if let Dependency::Inherited(inherited) = dep { - if inherited.workspace { - workspace_dependencies - .entry(name.to_owned()) - .or_insert_with(Vec::new) - .push(pkg_info.name.clone()); - - if !inherited.features.is_empty() { - eprintln!( - "warning: package is used as a workspace dep \ - with extra features: {:?} (in {:?})", - name, pkg_info.name, - ); - nwarnings += 1; - } - - continue; - } - } - - non_workspace_dependencies - .entry(name.to_owned()) - .or_insert_with(Vec::new) - .push(pkg_info.name.clone()); - } - } - } - - // Look for any packages that are used as both a workspace dependency and a - // non-workspace dependency. Generally, the non-workspace dependency should - // be replaced with a workspace dependency. - for (pkgname, ws_examples) in &workspace_dependencies { - if let Some(non_ws_examples) = non_workspace_dependencies.get(pkgname) { - eprintln!( - "error: package is used as both a workspace dep and a \ - non-workspace dep: {:?}", - pkgname + Cmds::Clippy(args) => clippy::run_cmd(args), + Cmds::CheckWorkspaceDeps => check_workspace_deps::run_cmd(), + Cmds::VerifyLibraries => { + #[cfg(target_os = "illumos")] + return verify_libraries::run_cmd(); + #[cfg(not(target_os = "illumos"))] + unimplemented!( + "Library verification is only available on illumos!" ); - eprintln!(" workspace dep: {}", ws_examples.join(", ")); - eprintln!(" non-workspace dep: {}", non_ws_examples.join(", ")); - nerrors += 1; } } - - // Look for any packages used as non-workspace dependencies by more than one - // workspace package. These should generally be moved to a workspace - // dependency. - for (pkgname, examples) in - non_workspace_dependencies.iter().filter(|(pkgname, examples)| { - examples.len() > 1 && !EXCLUDED.contains(&pkgname.as_str()) - }) - { - eprintln!( - "error: package is used by multiple workspace packages without \ - a workspace dependency: {:?}", - pkgname - ); - eprintln!(" used in: {}", examples.join(", ")); - nerrors += 1; - } - - eprintln!( - "check-workspace-deps: errors: {}, warnings: {}", - nerrors, nwarnings - ); - - if nerrors != 0 { - bail!("errors with workspace dependencies"); - } - - Ok(()) -} - -#[cfg(not(target_os = "illumos"))] -fn cmd_verify_libraries() -> Result<()> { - unimplemented!("Library verification is only available on illumos!") -} - -fn read_cargo_toml(path: &Utf8Path) -> Result { - let bytes = fs::read(path)?; - Manifest::from_slice(&bytes).with_context(|| format!("parse {:?}", path)) } -fn load_workspace() -> Result { +pub fn load_workspace() -> Result { cargo_metadata::MetadataCommand::new() .exec() .context("loading cargo metadata") diff --git a/dev-tools/xtask/src/illumos.rs b/dev-tools/xtask/src/verify_libraries.rs similarity index 99% rename from dev-tools/xtask/src/illumos.rs rename to dev-tools/xtask/src/verify_libraries.rs index a2daab2c9e..72aa622a07 100644 --- a/dev-tools/xtask/src/illumos.rs +++ b/dev-tools/xtask/src/verify_libraries.rs @@ -83,7 +83,7 @@ fn verify_executable( Ok(()) } -pub fn cmd_verify_libraries() -> Result<()> { +pub fn run_cmd() -> Result<()> { let metadata = load_workspace()?; let mut config_path = metadata.workspace_root; config_path.push(".cargo/xtask.toml"); From c60858fe39bfeb5d958842d73f28e77c5cff41a4 Mon Sep 17 00:00:00 2001 From: Rain Date: Fri, 5 Apr 2024 11:08:36 -0700 Subject: [PATCH 069/334] [nexus] use the SledFilter type in the database as well (#5430) Now that we have the `SledFilter` type, we should be able to use it for database queries as well. I spent some time trying to figure out the most ergonomic way to use the `sled_filter` method, and this is the one I settled on. This also serves as an example for how we're going to do this for blueprint zone filters. --- nexus/db-model/src/sled.rs | 64 ++++++++ nexus/db-queries/src/db/datastore/sled.rs | 54 +++---- nexus/types/src/deployment/planning_input.rs | 145 +++++++++++++++---- nexus/types/src/external_api/views.rs | 63 ++------ 4 files changed, 222 insertions(+), 104 deletions(-) diff --git a/nexus/db-model/src/sled.rs b/nexus/db-model/src/sled.rs index a603f28d57..1fa436c992 100644 --- a/nexus/db-model/src/sled.rs +++ b/nexus/db-model/src/sled.rs @@ -347,3 +347,67 @@ impl SledReservationConstraintBuilder { self.constraints } } + +mod diesel_util { + use crate::{ + schema::sled::{sled_policy, sled_state}, + sled_policy::DbSledPolicy, + to_db_sled_policy, + }; + use diesel::{ + helper_types::{And, EqAny}, + prelude::*, + query_dsl::methods::FilterDsl, + }; + use nexus_types::{ + deployment::SledFilter, + external_api::views::{SledPolicy, SledState}, + }; + + /// An extension trait to apply a [`SledFilter`] to a Diesel expression. + /// + /// This is applicable to any Diesel expression which includes the `sled` + /// table. + /// + /// This needs to live here, rather than in `nexus-db-queries`, because it + /// names the `DbSledPolicy` type which is private to this crate. + pub trait ApplySledFilterExt { + type Output; + + /// Applies a [`SledFilter`] to a Diesel expression. + fn sled_filter(self, filter: SledFilter) -> Self::Output; + } + + impl ApplySledFilterExt for E + where + E: FilterDsl, + { + type Output = E::Output; + + fn sled_filter(self, filter: SledFilter) -> Self::Output { + use crate::schema::sled::dsl as sled_dsl; + + // These are only boxed for ease of reference above. + let all_matching_policies: BoxedIterator = Box::new( + SledPolicy::all_matching(filter).map(to_db_sled_policy), + ); + let all_matching_states: BoxedIterator = + Box::new(SledState::all_matching(filter).map(Into::into)); + + FilterDsl::filter( + self, + sled_dsl::sled_policy + .eq_any(all_matching_policies) + .and(sled_dsl::sled_state.eq_any(all_matching_states)), + ) + } + } + + type BoxedIterator = Box>; + type SledFilterQuery = And< + EqAny>, + EqAny>, + >; +} + +pub use diesel_util::ApplySledFilterExt; diff --git a/nexus/db-queries/src/db/datastore/sled.rs b/nexus/db-queries/src/db/datastore/sled.rs index ad9edc063c..667516fe23 100644 --- a/nexus/db-queries/src/db/datastore/sled.rs +++ b/nexus/db-queries/src/db/datastore/sled.rs @@ -24,6 +24,8 @@ use crate::transaction_retry::OptionalError; use async_bb8_diesel::AsyncRunQueryDsl; use chrono::Utc; use diesel::prelude::*; +use nexus_db_model::ApplySledFilterExt; +use nexus_types::deployment::SledFilter; use nexus_types::external_api::views::SledPolicy; use nexus_types::external_api::views::SledProvisionPolicy; use nexus_types::identity::Asset; @@ -198,26 +200,22 @@ impl DataStore { // Generate a query describing all of the sleds that have space // for this reservation. - let mut sled_targets = - sled_dsl::sled - .left_join( - resource_dsl::sled_resource - .on(resource_dsl::sled_id.eq(sled_dsl::id)), - ) - .group_by(sled_dsl::id) - .having( - sled_has_space_for_threads - .and(sled_has_space_for_rss) - .and(sled_has_space_in_reservoir), - ) - .filter(sled_dsl::time_deleted.is_null()) - // Ensure that the sled is in-service and active. - .filter(sled_dsl::sled_policy.eq( - to_db_sled_policy(SledPolicy::provisionable()), - )) - .filter(sled_dsl::sled_state.eq(SledState::Active)) - .select(sled_dsl::id) - .into_boxed(); + let mut sled_targets = sled_dsl::sled + .left_join( + resource_dsl::sled_resource + .on(resource_dsl::sled_id.eq(sled_dsl::id)), + ) + .group_by(sled_dsl::id) + .having( + sled_has_space_for_threads + .and(sled_has_space_for_rss) + .and(sled_has_space_in_reservoir), + ) + .filter(sled_dsl::time_deleted.is_null()) + // Ensure that reservations can be created on the sled. + .sled_filter(SledFilter::ReservationCreate) + .select(sled_dsl::id) + .into_boxed(); // Further constrain the sled IDs according to any caller- // supplied constraints. @@ -484,8 +482,8 @@ impl DataStore { /// # Errors /// /// This method returns an error if the sled policy is not a state that is - /// valid to decommission from (i.e. if, for the current sled policy, - /// [`SledPolicy::is_decommissionable`] returns `false`). + /// valid to decommission from (i.e. if [`SledPolicy::is_decommissionable`] + /// returns `false`). pub async fn sled_set_state_to_decommissioned( &self, opctx: &OpContext, @@ -1147,7 +1145,9 @@ mod test { ( // In-service and active sleds can be marked as expunged. Before::new( - predicate::in_iter(SledPolicy::all_in_service()), + predicate::in_iter(SledPolicy::all_matching( + SledFilter::InService, + )), predicate::eq(SledState::Active), ), SledTransition::Policy(SledPolicy::Expunged), @@ -1156,7 +1156,9 @@ mod test { // The provision policy of in-service sleds can be changed, or // kept the same (1 of 2). Before::new( - predicate::in_iter(SledPolicy::all_in_service()), + predicate::in_iter(SledPolicy::all_matching( + SledFilter::InService, + )), predicate::eq(SledState::Active), ), SledTransition::Policy(SledPolicy::InService { @@ -1166,7 +1168,9 @@ mod test { ( // (2 of 2) Before::new( - predicate::in_iter(SledPolicy::all_in_service()), + predicate::in_iter(SledPolicy::all_matching( + SledFilter::InService, + )), predicate::eq(SledState::Active), ), SledTransition::Policy(SledPolicy::InService { diff --git a/nexus/types/src/deployment/planning_input.rs b/nexus/types/src/deployment/planning_input.rs index 1a0e7abd7a..a90aa61db3 100644 --- a/nexus/types/src/deployment/planning_input.rs +++ b/nexus/types/src/deployment/planning_input.rs @@ -6,6 +6,7 @@ //! blueprints. use crate::external_api::views::SledPolicy; +use crate::external_api::views::SledProvisionPolicy; use crate::external_api::views::SledState; use crate::inventory::ZpoolName; use ipnetwork::IpNetwork; @@ -22,6 +23,7 @@ use serde::Serialize; use std::collections::btree_map::Entry; use std::collections::BTreeMap; use std::collections::BTreeSet; +use strum::IntoEnumIterator; use uuid::Uuid; /// Describes the resources available on each sled for the planner @@ -85,6 +87,119 @@ pub enum SledFilter { /// Sleds that are in service (even if they might not be eligible for /// discretionary services). InService, + + /// Sleds on which reservations can be created. + ReservationCreate, +} + +impl SledFilter { + /// Returns true if self matches the provided policy and state. + pub fn matches_policy_and_state( + self, + policy: SledPolicy, + state: SledState, + ) -> bool { + policy.matches(self) && state.matches(self) + } +} + +impl SledPolicy { + /// Returns true if self matches the filter. + /// + /// Any users of this must also compare against the [`SledState`], if + /// relevant: a sled filter is fully matched when it matches both the + /// policy and the state. See [`SledFilter::matches_policy_and_state`]. + pub fn matches(self, filter: SledFilter) -> bool { + // Some notes: + // + // # Match style + // + // This code could be written in three ways: + // + // 1. match self { match filter { ... } } + // 2. match filter { match self { ... } } + // 3. match (self, filter) { ... } + // + // We choose 1 here because we expect many filters and just a few + // policies, and 1 is the easiest form to represent that. + // + // # Illegal states + // + // Some of the code that checks against both policies and filters is + // effectively checking for illegal states. We shouldn't be able to + // have a policy+state combo where the policy says the sled is in + // service but the state is decommissioned, for example, but the two + // separate types let us represent that. Code that ANDs + // policy.matches(filter) and state.matches(filter) naturally guards + // against those states. + match self { + SledPolicy::InService { + provision_policy: SledProvisionPolicy::Provisionable, + } => match filter { + SledFilter::All => true, + SledFilter::EligibleForDiscretionaryServices => true, + SledFilter::InService => true, + SledFilter::ReservationCreate => true, + }, + SledPolicy::InService { + provision_policy: SledProvisionPolicy::NonProvisionable, + } => match filter { + SledFilter::All => true, + SledFilter::EligibleForDiscretionaryServices => false, + SledFilter::InService => true, + SledFilter::ReservationCreate => false, + }, + SledPolicy::Expunged => match filter { + SledFilter::All => true, + SledFilter::EligibleForDiscretionaryServices => false, + SledFilter::InService => false, + SledFilter::ReservationCreate => false, + }, + } + } + + /// Returns all policies matching the given filter. + /// + /// This is meant for database access, and is generally paired with + /// [`SledState::all_matching`]. See `ApplySledFilterExt` in + /// nexus-db-model. + pub fn all_matching(filter: SledFilter) -> impl Iterator { + Self::iter().filter(move |policy| policy.matches(filter)) + } +} + +impl SledState { + /// Returns true if self matches the filter. + /// + /// Any users of this must also compare against the [`SledPolicy`], if + /// relevant: a sled filter is fully matched when both the policy and the + /// state match. See [`SledFilter::matches_policy_and_state`]. + pub fn matches(self, filter: SledFilter) -> bool { + // See `SledFilter::matches` above for some notes. + match self { + SledState::Active => match filter { + SledFilter::All => true, + SledFilter::EligibleForDiscretionaryServices => true, + SledFilter::InService => true, + SledFilter::ReservationCreate => true, + }, + SledState::Decommissioned => match filter { + SledFilter::All => true, + SledFilter::EligibleForDiscretionaryServices => false, + SledFilter::InService => false, + SledFilter::ReservationCreate => false, + }, + } + } + + /// Returns all policies matching the given filter. + /// + /// This is meant for database access, and is generally paired with + /// [`SledPolicy::all_matching`]. See `ApplySledFilterExt` in + /// nexus-db-model. + pub fn all_matching(filter: SledFilter) -> impl Iterator { + Self::iter().filter(move |state| state.matches(filter)) + } } /// Fleet-wide deployment policy @@ -176,32 +291,10 @@ impl PlanningInput { &self, filter: SledFilter, ) -> impl Iterator, &SledDetails)> + '_ { - self.sleds.iter().filter_map(move |(&sled_id, details)| match filter { - SledFilter::All => Some((sled_id, details)), - SledFilter::EligibleForDiscretionaryServices => { - if details.policy.is_provisionable() - && details.state.is_eligible_for_discretionary_services() - { - Some((sled_id, details)) - } else { - None - } - } - SledFilter::InService => { - if details.policy.is_in_service() { - // Check for illegal states; we shouldn't be able to have a - // policy+state combo where the policy says the sled is in - // service but the state is decommissioned, for example, but - // the two separate types let us represent that, so we'll - // guard against it here. - match details.state { - SledState::Active => Some((sled_id, details)), - SledState::Decommissioned => None, - } - } else { - None - } - } + self.sleds.iter().filter_map(move |(&sled_id, details)| { + filter + .matches_policy_and_state(details.policy, details.state) + .then_some((sled_id, details)) }) } diff --git a/nexus/types/src/external_api/views.rs b/nexus/types/src/external_api/views.rs index 2ffe508b9a..feed319692 100644 --- a/nexus/types/src/external_api/views.rs +++ b/nexus/types/src/external_api/views.rs @@ -592,42 +592,6 @@ impl SledPolicy { Self::InService { provision_policy: SledProvisionPolicy::Provisionable } } - /// Returns the list of all in-service policies. - pub fn all_in_service() -> &'static [Self] { - &[ - Self::InService { - provision_policy: SledProvisionPolicy::Provisionable, - }, - Self::InService { - provision_policy: SledProvisionPolicy::NonProvisionable, - }, - ] - } - - /// Returns true if the sled is in-service. - /// - /// Note that a sled being in service does not mean it's provisionable; most - /// consumers probably want `is_provisionable` instead. - pub fn is_in_service(&self) -> bool { - match self { - Self::InService { .. } => true, - Self::Expunged => false, - } - } - - /// Returns true if the sled can have services provisioned on it. - pub fn is_provisionable(&self) -> bool { - match self { - Self::InService { - provision_policy: SledProvisionPolicy::Provisionable, - } => true, - Self::InService { - provision_policy: SledProvisionPolicy::NonProvisionable, - } - | Self::Expunged => false, - } - } - /// Returns the provision policy, if the sled is in service. pub fn provision_policy(&self) -> Option { match self { @@ -636,9 +600,13 @@ impl SledPolicy { } } - /// Returns true if the sled can be decommissioned in this state. + /// Returns true if the sled can be decommissioned with this policy + /// + /// This is a method here, rather than being a variant on `SledFilter`, + /// because the "decommissionable" condition only has meaning for policies, + /// not states. pub fn is_decommissionable(&self) -> bool { - // This should be kept in sync with decommissionable_states below. + // This should be kept in sync with `all_decommissionable` below. match self { Self::InService { .. } => false, Self::Expunged => true, @@ -647,6 +615,10 @@ impl SledPolicy { /// Returns all the possible policies a sled can have for it to be /// decommissioned. + /// + /// This is a method here, rather than being a variant on `SledFilter`, + /// because the "decommissionable" condition only has meaning for policies, + /// not states. pub fn all_decommissionable() -> &'static [Self] { &[Self::Expunged] } @@ -691,21 +663,6 @@ pub enum SledState { Decommissioned, } -impl SledState { - /// Returns true if the sled state makes it eligible for services that - /// aren't required to be on every sled. - /// - /// For example, NTP must exist on every sled, but Nexus does not have to. - pub fn is_eligible_for_discretionary_services(&self) -> bool { - // (Explicit match, so that this fails to compile if a new state is - // added.) - match self { - SledState::Active => true, - SledState::Decommissioned => false, - } - } -} - impl fmt::Display for SledState { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { From fa85588c2d468803919b4209e4e837b28190a24c Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Fri, 5 Apr 2024 19:11:34 +0000 Subject: [PATCH 070/334] chore(deps): update rust crate walkdir to 2.5 (#5418) --- Cargo.lock | 4 ++-- Cargo.toml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ad95fe27ca..bd663e5d40 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -10790,9 +10790,9 @@ dependencies = [ [[package]] name = "walkdir" -version = "2.4.0" +version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d71d857dc86794ca4c280d616f7da00d2dbfd8cd788846559a6813e6aa4b54ee" +checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b" dependencies = [ "same-file", "winapi-util", diff --git a/Cargo.toml b/Cargo.toml index e03bd2ecbe..06597147af 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -443,7 +443,7 @@ update-common = { path = "update-common" } update-engine = { path = "update-engine" } usdt = "0.5.0" uuid = { version = "1.7.0", features = ["serde", "v4"] } -walkdir = "2.4" +walkdir = "2.5" whoami = "1.5" wicket = { path = "wicket" } wicket-common = { path = "wicket-common" } From a83e044612b4035c2c35c25286debc71090bc62c Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Fri, 5 Apr 2024 15:35:00 -0400 Subject: [PATCH 071/334] Fix `CAST(... AS UUID)` error reporting gadget for newer CRDB (#5434) See the comment on the expanded CTE in `nexus/db-queries/src/db/datastore/deployment.rs` for details. --- nexus/db-queries/src/db/datastore/deployment.rs | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/nexus/db-queries/src/db/datastore/deployment.rs b/nexus/db-queries/src/db/datastore/deployment.rs index 4b9b473bbd..d956dffb31 100644 --- a/nexus/db-queries/src/db/datastore/deployment.rs +++ b/nexus/db-queries/src/db/datastore/deployment.rs @@ -945,7 +945,17 @@ impl From for Error { /// AND "parent_blueprint_id" IS NULL /// AND NOT EXISTS (SELECT version FROM current_target) /// ) = 1, -/// , +/// -- Sometime between v22.1.9 and v22.2.19, Cockroach's type checker +/// -- became too smart for our `CAST(... as UUID)` error checking +/// -- gadget: it can infer that `` must be a UUID, so +/// -- then tries to parse 'parent-not-target' and 'no-such-blueprint' +/// -- as UUIDs _during typechecking_, which causes the query to always +/// -- fail. We can defeat this by casting the UUID to text here, which +/// -- will allow the 'parent-not-target' and 'no-such-blueprint' +/// -- sentinels to survive type checking, making it to query execution +/// -- where they will only be cast to UUIDs at runtime in the failure +/// -- cases they're supposed to catch. +/// CAST( AS text), /// 'parent-not-target' /// ) /// ) AS UUID) @@ -1109,8 +1119,9 @@ impl QueryFragment for InsertTargetQuery { SELECT version FROM current_target) \ ) = 1, ", ); + out.push_sql(" CAST("); out.push_bind_param::(&self.target_id)?; - out.push_sql(", "); + out.push_sql(" AS text), "); out.push_bind_param::( &PARENT_NOT_TARGET_SENTINEL, )?; From e217030b30e4e9ccd6c0576ef57f7da00a6edb50 Mon Sep 17 00:00:00 2001 From: Rain Date: Fri, 5 Apr 2024 13:45:05 -0700 Subject: [PATCH 072/334] [nexus-db-queries] make vpc_resolve_to_sled be aware of expunged and decommissioned sleds (#5433) We shouldn't be sending firewall rules to sleds that are gone. This is also a nice demonstration of the new `ApplySledFilterExt` added in #5430. Depends on #5430. --- nexus/db-queries/src/db/datastore/vpc.rs | 41 ++++++++++++++++++-- nexus/types/src/deployment/planning_input.rs | 8 ++++ 2 files changed, 45 insertions(+), 4 deletions(-) diff --git a/nexus/db-queries/src/db/datastore/vpc.rs b/nexus/db-queries/src/db/datastore/vpc.rs index dd05498038..d73b71a09b 100644 --- a/nexus/db-queries/src/db/datastore/vpc.rs +++ b/nexus/db-queries/src/db/datastore/vpc.rs @@ -14,6 +14,7 @@ use crate::db::error::public_error_from_diesel; use crate::db::error::ErrorHandler; use crate::db::fixed_data::vpc::SERVICES_VPC_ID; use crate::db::identity::Resource; +use crate::db::model::ApplySledFilterExt; use crate::db::model::IncompleteVpc; use crate::db::model::InstanceNetworkInterface; use crate::db::model::Name; @@ -45,6 +46,7 @@ use diesel::result::Error as DieselError; use ipnetwork::IpNetwork; use nexus_types::deployment::BlueprintZoneDisposition; use nexus_types::deployment::BlueprintZoneFilter; +use nexus_types::deployment::SledFilter; use omicron_common::api::external::http_pagination::PaginatedBy; use omicron_common::api::external::CreateResult; use omicron_common::api::external::DeleteResult; @@ -766,6 +768,7 @@ impl DataStore { let mut sleds = sled::table .select(Sled::as_select()) .filter(sled::time_deleted.is_null()) + .sled_filter(SledFilter::VpcFirewall) .into_boxed(); if !sleds_filter.is_empty() { sleds = sleds.filter(sled::id.eq_any(sleds_filter.to_vec())); @@ -1284,6 +1287,7 @@ mod tests { use crate::db::datastore::test::sled_baseboard_for_test; use crate::db::datastore::test::sled_system_hardware_for_test; use crate::db::datastore::test_utils::datastore_test; + use crate::db::datastore::test_utils::IneligibleSleds; use crate::db::fixed_data::vpc_subnet::NEXUS_VPC_SUBNET; use crate::db::model::Project; use crate::db::queries::vpc::MAX_VNI_SEARCH_RANGE_SIZE; @@ -1673,8 +1677,8 @@ mod tests { service_sled_ids }; - // Create four sleds. - let harness = Harness::new(4); + // Create five sleds. + let harness = Harness::new(5); for sled in harness.db_sleds() { datastore.sled_upsert(sled).await.expect("failed to upsert sled"); } @@ -1859,8 +1863,18 @@ mod tests { fetch_service_sled_ids().await ); - // Finally, create a blueprint that includes our third and fourth sleds, - // make it the target, and ensure we resolve to all four sleds. + // --- + + // Add a vNIC record for our fifth sled's Nexus, then create a blueprint + // that includes sleds with indexes 2, 3, and 4. Make it the target, + // and ensure we resolve to all five sleds. + datastore + .service_create_network_interface_raw( + &opctx, + harness.db_services().nth(4).unwrap().1, + ) + .await + .expect("failed to insert service VNIC"); let bp4_zones = { let mut zones = BTreeMap::new(); for (sled_id, zone_config) in @@ -1904,6 +1918,25 @@ mod tests { .expect("failed to set blueprint target"); assert_eq!(harness.sled_ids, fetch_service_sled_ids().await); + // --- + + // Mark some sleds as ineligible. Only the non-provisionable and + // in-service sleds should be returned. + let ineligible = IneligibleSleds { + expunged: harness.sled_ids[0], + decommissioned: harness.sled_ids[1], + illegal_decommissioned: harness.sled_ids[2], + non_provisionable: harness.sled_ids[3], + }; + ineligible + .setup(&opctx, &datastore) + .await + .expect("failed to set up ineligible sleds"); + + assert_eq!(&harness.sled_ids[3..=4], fetch_service_sled_ids().await); + + // --- + db.cleanup().await.unwrap(); logctx.cleanup_successful(); } diff --git a/nexus/types/src/deployment/planning_input.rs b/nexus/types/src/deployment/planning_input.rs index a90aa61db3..da5ae07eef 100644 --- a/nexus/types/src/deployment/planning_input.rs +++ b/nexus/types/src/deployment/planning_input.rs @@ -90,6 +90,9 @@ pub enum SledFilter { /// Sleds on which reservations can be created. ReservationCreate, + + /// Sleds which should be sent VPC firewall rules. + VpcFirewall, } impl SledFilter { @@ -140,6 +143,7 @@ impl SledPolicy { SledFilter::EligibleForDiscretionaryServices => true, SledFilter::InService => true, SledFilter::ReservationCreate => true, + SledFilter::VpcFirewall => true, }, SledPolicy::InService { provision_policy: SledProvisionPolicy::NonProvisionable, @@ -148,12 +152,14 @@ impl SledPolicy { SledFilter::EligibleForDiscretionaryServices => false, SledFilter::InService => true, SledFilter::ReservationCreate => false, + SledFilter::VpcFirewall => true, }, SledPolicy::Expunged => match filter { SledFilter::All => true, SledFilter::EligibleForDiscretionaryServices => false, SledFilter::InService => false, SledFilter::ReservationCreate => false, + SledFilter::VpcFirewall => false, }, } } @@ -182,12 +188,14 @@ impl SledState { SledFilter::EligibleForDiscretionaryServices => true, SledFilter::InService => true, SledFilter::ReservationCreate => true, + SledFilter::VpcFirewall => true, }, SledState::Decommissioned => match filter { SledFilter::All => true, SledFilter::EligibleForDiscretionaryServices => false, SledFilter::InService => false, SledFilter::ReservationCreate => false, + SledFilter::VpcFirewall => false, }, } } From 7620b5ab5877cba39367e06a51db61c54af79c96 Mon Sep 17 00:00:00 2001 From: Rain Date: Fri, 5 Apr 2024 15:27:35 -0700 Subject: [PATCH 073/334] [xtask] add some style lints to clippy (#5437) I noticed that in a few spots we were doing things like `.iter().skip(2).next()` or `.len() == 0` -- turns out the clippy lints for them were disabled. Turn some of the style lints on as they make sense. --- dev-tools/omdb/src/bin/omdb/nexus.rs | 4 ++-- .../reconfigurator-cli/tests/test_basic.rs | 2 +- dev-tools/xtask/src/clippy.rs | 18 +++++++++++++++++- illumos-utils/src/opte/firewall_rules.rs | 6 +++--- nexus/db-queries/src/authn/external/mod.rs | 2 +- nexus/db-queries/src/authn/silos.rs | 2 +- .../db-queries/src/db/datastore/address_lot.rs | 2 +- nexus/db-queries/src/db/datastore/inventory.rs | 4 ++-- nexus/test-utils-macros/src/lib.rs | 2 +- nexus/test-utils/src/http_testing.rs | 2 +- nexus/tests/integration_tests/basic.rs | 10 +++++----- nexus/types/src/internal_api/views.rs | 2 +- oximeter/db/src/client/oxql.rs | 2 +- oximeter/db/src/oxql/ast/table_ops/filter.rs | 2 +- oximeter/db/src/oxql/ast/table_ops/group_by.rs | 2 +- oximeter/oximeter/src/histogram.rs | 11 ++++++++--- package/src/dot.rs | 2 +- test-utils/src/dev/clickhouse.rs | 6 +++--- test-utils/src/dev/db.rs | 2 +- update-engine/src/engine.rs | 2 +- 20 files changed, 53 insertions(+), 32 deletions(-) diff --git a/dev-tools/omdb/src/bin/omdb/nexus.rs b/dev-tools/omdb/src/bin/omdb/nexus.rs index 64b1ef7276..4bee664c71 100644 --- a/dev-tools/omdb/src/bin/omdb/nexus.rs +++ b/dev-tools/omdb/src/bin/omdb/nexus.rs @@ -589,7 +589,7 @@ fn print_task_details(bgtask: &BackgroundTask, details: &serde_json::Value) { ); let server_results = &details.server_results; - if server_results.len() != 0 { + if !server_results.is_empty() { let rows = server_results.iter().map(|(addr, result)| { DnsPropRow { dns_server_addr: addr, @@ -713,7 +713,7 @@ fn print_task_details(bgtask: &BackgroundTask, details: &serde_json::Value) { println!(""); println!(" TLS certificates: {}", tls_cert_rows.len()); - if tls_cert_rows.len() > 0 { + if !tls_cert_rows.is_empty() { let table = tabled::Table::new(tls_cert_rows) .with(tabled::settings::Style::empty()) .with(tabled::settings::Padding::new(0, 1, 0, 0)) diff --git a/dev-tools/reconfigurator-cli/tests/test_basic.rs b/dev-tools/reconfigurator-cli/tests/test_basic.rs index 87450181ec..38c6e5a3c5 100644 --- a/dev-tools/reconfigurator-cli/tests/test_basic.rs +++ b/dev-tools/reconfigurator-cli/tests/test_basic.rs @@ -135,7 +135,7 @@ async fn test_blueprint_edit(cptestctx: &ControlPlaneTestContext) { assert!(!state1.external_dns.is_empty()); // unwrap: we checked above that this list was non-empty. - let blueprint = state1.blueprints.iter().next().unwrap(); + let blueprint = state1.blueprints.first().unwrap(); // Write a reconfigurator-cli script to load the file, edit the // blueprint, and save the entire state to a new file. diff --git a/dev-tools/xtask/src/clippy.rs b/dev-tools/xtask/src/clippy.rs index 02a71dc5de..babb86cdaf 100644 --- a/dev-tools/xtask/src/clippy.rs +++ b/dev-tools/xtask/src/clippy.rs @@ -51,7 +51,23 @@ pub fn run_cmd(args: ClippyArgs) -> Result<()> { .arg("--warn") .arg("clippy::disallowed_script_idents") .arg("--warn") - .arg("clippy::disallowed_types"); + .arg("clippy::disallowed_types") + // Warn on some more style lints that are relatively stable and make + // sense. + .arg("--warn") + .arg("clippy::iter_cloned_collect") + .arg("--warn") + .arg("clippy::iter_next_slice") + .arg("--warn") + .arg("clippy::iter_nth") + .arg("--warn") + .arg("clippy::iter_nth_zero") + .arg("--warn") + .arg("clippy::iter_skip_next") + .arg("--warn") + .arg("clippy::len_zero") + .arg("--warn") + .arg("clippy::redundant_field_names"); eprintln!( "running: {:?} {}", diff --git a/illumos-utils/src/opte/firewall_rules.rs b/illumos-utils/src/opte/firewall_rules.rs index 78d2ec0b73..02882a226b 100644 --- a/illumos-utils/src/opte/firewall_rules.rs +++ b/illumos-utils/src/opte/firewall_rules.rs @@ -62,7 +62,7 @@ impl FromVpcFirewallRule for VpcFirewallRule { fn hosts(&self) -> Vec
{ match self.filter_hosts { - Some(ref hosts) if hosts.len() > 0 => hosts + Some(ref hosts) if !hosts.is_empty() => hosts .iter() .map(|host| match host { HostIdentifier::Ip(IpNet::V4(net)) @@ -98,7 +98,7 @@ impl FromVpcFirewallRule for VpcFirewallRule { fn ports(&self) -> Ports { match self.filter_ports { - Some(ref ports) if ports.len() > 0 => Ports::PortList( + Some(ref ports) if !ports.is_empty() => Ports::PortList( ports .iter() .flat_map(|range| { @@ -117,7 +117,7 @@ impl FromVpcFirewallRule for VpcFirewallRule { fn protos(&self) -> Vec { match self.filter_protocols { - Some(ref protos) if protos.len() > 0 => protos + Some(ref protos) if !protos.is_empty() => protos .iter() .map(|proto| { ProtoFilter::Proto(match proto { diff --git a/nexus/db-queries/src/authn/external/mod.rs b/nexus/db-queries/src/authn/external/mod.rs index 051db35ebf..623544d38c 100644 --- a/nexus/db-queries/src/authn/external/mod.rs +++ b/nexus/db-queries/src/authn/external/mod.rs @@ -94,7 +94,7 @@ where }), Err(source) => Err(authn::Error { reason: Reason::LoadSiloAuthnPolicy { source }, - schemes_tried: schemes_tried, + schemes_tried, }), }; } diff --git a/nexus/db-queries/src/authn/silos.rs b/nexus/db-queries/src/authn/silos.rs index ff1ae71133..fc5068fc3c 100644 --- a/nexus/db-queries/src/authn/silos.rs +++ b/nexus/db-queries/src/authn/silos.rs @@ -413,7 +413,7 @@ impl SamlIdentityProvider { group.trim().to_string(); // Skip empty groups - if group.len() == 0 { + if group.is_empty() { continue; } diff --git a/nexus/db-queries/src/db/datastore/address_lot.rs b/nexus/db-queries/src/db/datastore/address_lot.rs index 9c75c6fd1b..459c2a4c36 100644 --- a/nexus/db-queries/src/db/datastore/address_lot.rs +++ b/nexus/db-queries/src/db/datastore/address_lot.rs @@ -384,7 +384,7 @@ pub(crate) async fn try_reserve_block( address_lot_id: lot_id, first_address: inet, last_address: inet, - anycast: anycast, + anycast, }; diesel::insert_into(rsvd_block_dsl::address_lot_rsvd_block) diff --git a/nexus/db-queries/src/db/datastore/inventory.rs b/nexus/db-queries/src/db/datastore/inventory.rs index 1a6d59337c..0b815b686c 100644 --- a/nexus/db-queries/src/db/datastore/inventory.rs +++ b/nexus/db-queries/src/db/datastore/inventory.rs @@ -1049,8 +1049,8 @@ impl DataStore { .rev() .find(|(_i, (_collection_id, nerrors))| *nerrors == 0); let candidate = match last_completed_idx { - Some((0, _)) => candidates.iter().skip(1).next(), - _ => candidates.iter().next(), + Some((0, _)) => candidates.get(1), + _ => candidates.first(), } .map(|(collection_id, _nerrors)| *collection_id); if let Some(c) = candidate { diff --git a/nexus/test-utils-macros/src/lib.rs b/nexus/test-utils-macros/src/lib.rs index ac21768641..2b87b7a030 100644 --- a/nexus/test-utils-macros/src/lib.rs +++ b/nexus/test-utils-macros/src/lib.rs @@ -88,7 +88,7 @@ pub fn nexus_test(attrs: TokenStream, input: TokenStream) -> TokenStream { syn::ReturnType::Default => true, syn::ReturnType::Type(_, ref t) => { if let syn::Type::Tuple(syn::TypeTuple { elems, .. }) = &**t { - elems.len() == 0 + elems.is_empty() } else { false } diff --git a/nexus/test-utils/src/http_testing.rs b/nexus/test-utils/src/http_testing.rs index ae62218c93..89fc99f4c9 100644 --- a/nexus/test-utils/src/http_testing.rs +++ b/nexus/test-utils/src/http_testing.rs @@ -424,7 +424,7 @@ impl<'a> RequestBuilder<'a> { // the body. if status == http::StatusCode::NO_CONTENT { ensure!( - response_body.len() == 0, + response_body.is_empty(), "expected empty response for 204 status code" ) } diff --git a/nexus/tests/integration_tests/basic.rs b/nexus/tests/integration_tests/basic.rs index 282ec0cd96..cd23b7dd87 100644 --- a/nexus/tests/integration_tests/basic.rs +++ b/nexus/tests/integration_tests/basic.rs @@ -205,13 +205,13 @@ async fn test_projects_basic(cptestctx: &ControlPlaneTestContext) { assert_eq!(initial_projects.len(), 3); assert_eq!(initial_projects[0].identity.id, new_project_ids[0]); assert_eq!(initial_projects[0].identity.name, "simproject1"); - assert!(initial_projects[0].identity.description.len() > 0); + assert!(!initial_projects[0].identity.description.is_empty()); assert_eq!(initial_projects[1].identity.id, new_project_ids[1]); assert_eq!(initial_projects[1].identity.name, "simproject2"); - assert!(initial_projects[1].identity.description.len() > 0); + assert!(!initial_projects[1].identity.description.is_empty()); assert_eq!(initial_projects[2].identity.id, new_project_ids[2]); assert_eq!(initial_projects[2].identity.name, "simproject3"); - assert!(initial_projects[2].identity.description.len() > 0); + assert!(!initial_projects[2].identity.description.is_empty()); // Basic test of out-of-the-box GET project let project = project_get(&client, "/v1/projects/simproject2").await; @@ -219,7 +219,7 @@ async fn test_projects_basic(cptestctx: &ControlPlaneTestContext) { assert_eq!(project.identity.id, expected.identity.id); assert_eq!(project.identity.name, expected.identity.name); assert_eq!(project.identity.description, expected.identity.description); - assert!(project.identity.description.len() > 0); + assert!(!project.identity.description.is_empty()); // Delete "simproject2", but first delete: // - The default subnet within the default VPC @@ -440,7 +440,7 @@ async fn test_projects_basic(cptestctx: &ControlPlaneTestContext) { assert_eq!(projects[1].identity.name, "lil-lightnin"); assert_eq!(projects[1].identity.description, "little lightning"); assert_eq!(projects[2].identity.name, "simproject1"); - assert!(projects[2].identity.description.len() > 0); + assert!(!projects[2].identity.description.is_empty()); } #[nexus_test] diff --git a/nexus/types/src/internal_api/views.rs b/nexus/types/src/internal_api/views.rs index b7a097431b..fde2d07072 100644 --- a/nexus/types/src/internal_api/views.rs +++ b/nexus/types/src/internal_api/views.rs @@ -141,7 +141,7 @@ impl From for SagaState { }, .. } => SagaState::Failed { - error_node_name: error_node_name, + error_node_name, error_info: SagaErrorInfo::from(error_source), }, } diff --git a/oximeter/db/src/client/oxql.rs b/oximeter/db/src/client/oxql.rs index 9da4abd007..7816d5c25f 100644 --- a/oximeter/db/src/client/oxql.rs +++ b/oximeter/db/src/client/oxql.rs @@ -662,7 +662,7 @@ impl Client { // Push the predicate that selects the timeseries keys, which // are unique to this group. - let maybe_key_set = if group.consistent_keys.len() > 0 { + let maybe_key_set = if !group.consistent_keys.is_empty() { let mut chunk = String::from("timeseries_key IN ("); let keys = group .consistent_keys diff --git a/oximeter/db/src/oxql/ast/table_ops/filter.rs b/oximeter/db/src/oxql/ast/table_ops/filter.rs index e97673c8f8..d363ec58f0 100644 --- a/oximeter/db/src/oxql/ast/table_ops/filter.rs +++ b/oximeter/db/src/oxql/ast/table_ops/filter.rs @@ -286,7 +286,7 @@ impl Filter { // Apply the filtering table operation. pub(crate) fn apply(&self, tables: &[Table]) -> Result, Error> { anyhow::ensure!( - tables.len() >= 1, + !tables.is_empty(), "Filtering operations require at least one table", ); let mut output_tables = Vec::with_capacity(tables.len()); diff --git a/oximeter/db/src/oxql/ast/table_ops/group_by.rs b/oximeter/db/src/oxql/ast/table_ops/group_by.rs index da2b1413db..3284c70c1f 100644 --- a/oximeter/db/src/oxql/ast/table_ops/group_by.rs +++ b/oximeter/db/src/oxql/ast/table_ops/group_by.rs @@ -48,7 +48,7 @@ impl GroupBy { } fn check_input_timeseries(input: &Timeseries) -> Result<(), Error> { - anyhow::ensure!(input.points.len() > 0, "Timeseries cannot be empty"); + anyhow::ensure!(!input.points.is_empty(), "Timeseries cannot be empty"); // For now, we can only apply this to 1-D timeseries. anyhow::ensure!( diff --git a/oximeter/oximeter/src/histogram.rs b/oximeter/oximeter/src/histogram.rs index aaf9297ca4..82b9916153 100644 --- a/oximeter/oximeter/src/histogram.rs +++ b/oximeter/oximeter/src/histogram.rs @@ -513,6 +513,11 @@ where self.bins.iter() } + /// Get the bin at the given index. + pub fn get(&self, index: usize) -> Option<&Bin> { + self.bins.get(index) + } + /// Generate paired arrays with the left bin edges and the counts, for each bin. /// /// The returned edges are always left-inclusive, by construction of the histogram. @@ -993,9 +998,9 @@ mod tests { let mut hist = Histogram::with_bins(&[(0..1).into()]).unwrap(); assert!(hist.sample(i64::MIN).is_ok()); assert!(hist.sample(i64::MAX).is_ok()); - assert_eq!(hist.iter().nth(0).unwrap().count, 1); - assert_eq!(hist.iter().nth(1).unwrap().count, 0); - assert_eq!(hist.iter().nth(2).unwrap().count, 1); + assert_eq!(hist.get(0).unwrap().count, 1); + assert_eq!(hist.get(1).unwrap().count, 0); + assert_eq!(hist.get(2).unwrap().count, 1); let mut hist = Histogram::with_bins(&[(0.0..1.0).into()]).unwrap(); assert!(hist.sample(f64::MIN).is_ok()); diff --git a/package/src/dot.rs b/package/src/dot.rs index 3307d100ba..141adcf368 100644 --- a/package/src/dot.rs +++ b/package/src/dot.rs @@ -196,7 +196,7 @@ pub fn do_dot( // Similarly, regardless of the type of local package, create // a node showing any local paths that get included in the // package. - if paths.len() > 0 { + if !paths.is_empty() { let paths = paths .iter() .map(|mapping| { diff --git a/test-utils/src/dev/clickhouse.rs b/test-utils/src/dev/clickhouse.rs index 8c415d949e..01ba402f62 100644 --- a/test-utils/src/dev/clickhouse.rs +++ b/test-utils/src/dev/clickhouse.rs @@ -125,7 +125,7 @@ impl ClickHouseInstance { data_dir: Some(data_dir), data_path, port, - address: address, + address, args, child: Some(child), }) @@ -197,7 +197,7 @@ impl ClickHouseInstance { data_dir: Some(data_dir), data_path, port, - address: address, + address, args, child: Some(child), }), @@ -271,7 +271,7 @@ impl ClickHouseInstance { data_dir: Some(data_dir), data_path, port, - address: address, + address, args, child: Some(child), }), diff --git a/test-utils/src/dev/db.rs b/test-utils/src/dev/db.rs index d8b15520a4..fcb14a4f15 100644 --- a/test-utils/src/dev/db.rs +++ b/test-utils/src/dev/db.rs @@ -873,7 +873,7 @@ fn make_pg_config( let unsupported_values = check_unsupported.into_iter().flatten().collect::>(); - if unsupported_values.len() > 0 { + if !unsupported_values.is_empty() { bail!( "unsupported PostgreSQL listen URL \ (did not expect any of these fields: {}): {:?}", diff --git a/update-engine/src/engine.rs b/update-engine/src/engine.rs index 6d59a82221..685056042e 100644 --- a/update-engine/src/engine.rs +++ b/update-engine/src/engine.rs @@ -1238,7 +1238,7 @@ impl usize> StepProgressReporter { component: self.step_info.info.component.clone(), id: self.step_info.info.id.clone(), description: self.step_info.info.description.clone(), - message: message, + message, }, Err(error) => error, } From 92e18134be44711bc44cf973ce42b91a692ca4cc Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Fri, 5 Apr 2024 15:39:04 -0700 Subject: [PATCH 074/334] chore(deps): update rust crate tough to 0.17.1 (#5416) Co-authored-by: Rain --- Cargo.lock | 15 +++++++-------- Cargo.toml | 2 +- .../src/artifacts/artifacts_with_plan.rs | 2 +- update-common/src/errors.rs | 2 +- 4 files changed, 10 insertions(+), 11 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index bd663e5d40..25b78e35b1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8960,11 +8960,10 @@ dependencies = [ [[package]] name = "snafu" -version = "0.7.5" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e4de37ad025c587a29e8f3f5605c00f70b98715ef90b9061a815b9e59e9042d6" +checksum = "75976f4748ab44f6e5332102be424e7c2dc18daeaf7e725f2040c3ebb133512e" dependencies = [ - "doc-comment", "futures-core", "pin-project", "snafu-derive", @@ -8972,14 +8971,14 @@ dependencies = [ [[package]] name = "snafu-derive" -version = "0.7.5" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "990079665f075b699031e9c08fd3ab99be5029b96f3b78dc0709e8f77e4efebf" +checksum = "b4b19911debfb8c2fb1107bc6cb2d61868aaf53a988449213959bb1b5b1ed95f" dependencies = [ "heck 0.4.1", "proc-macro2", "quote", - "syn 1.0.109", + "syn 2.0.52", ] [[package]] @@ -9992,9 +9991,9 @@ checksum = "ea68304e134ecd095ac6c3574494fc62b909f416c4fca77e440530221e549d3d" [[package]] name = "tough" -version = "0.16.0" +version = "0.17.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49455926f64001de53ef047c2053e2f17440e412b8b1e958d4ad8a6008db7128" +checksum = "b8d7a87d51ca5a113542e1b9f5ee2b14b6864bf7f34d103740086fa9c3d57d3b" dependencies = [ "async-recursion", "async-trait", diff --git a/Cargo.toml b/Cargo.toml index 06597147af..6fdea73083 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -428,7 +428,7 @@ tokio-tungstenite = "0.20" tokio-util = { version = "0.7.10", features = ["io", "io-util"] } toml = "0.8.12" toml_edit = "0.22.9" -tough = { version = "0.16.0", features = [ "http" ] } +tough = { version = "0.17.1", features = [ "http" ] } trust-dns-client = "0.22" trust-dns-proto = "0.22" trust-dns-resolver = "0.22" diff --git a/update-common/src/artifacts/artifacts_with_plan.rs b/update-common/src/artifacts/artifacts_with_plan.rs index c2be69e82e..950e2c5ab7 100644 --- a/update-common/src/artifacts/artifacts_with_plan.rs +++ b/update-common/src/artifacts/artifacts_with_plan.rs @@ -207,7 +207,7 @@ impl ArtifactsWithPlan { .find_target(&target_name) .map_err(|error| RepositoryError::TargetHashRead { target: artifact.target.clone(), - error, + error: Box::new(error), })? .hashes .sha256 diff --git a/update-common/src/errors.rs b/update-common/src/errors.rs index 4d992e70b2..0d65312c56 100644 --- a/update-common/src/errors.rs +++ b/update-common/src/errors.rs @@ -53,7 +53,7 @@ pub enum RepositoryError { TargetHashRead { target: String, #[source] - error: tough::schema::Error, + error: Box, }, #[error("target hash `{}` expected to be 32 bytes long, was {}", hex::encode(.0), .0.len())] From c2fdc3eab02fe21a5e3813fc6b8a938e736652b5 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Sat, 6 Apr 2024 01:30:11 +0000 Subject: [PATCH 075/334] chore(deps): update rust crate uuid to 1.8.0 (#5417) Co-authored-by: Rain --- Cargo.lock | 140 +++++++++++++++++++------------------- Cargo.toml | 2 +- workspace-hack/Cargo.toml | 4 +- 3 files changed, 73 insertions(+), 73 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 25b78e35b1..9c9dfe5364 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -691,7 +691,7 @@ dependencies = [ "slog-term", "thiserror", "tokio", - "uuid 1.7.0", + "uuid 1.8.0", "vsss-rs", "zeroize", ] @@ -710,7 +710,7 @@ dependencies = [ "serde", "sled-hardware-types", "slog", - "uuid 1.7.0", + "uuid 1.8.0", ] [[package]] @@ -1436,7 +1436,7 @@ dependencies = [ "schemars", "serde", "serde_json", - "uuid 1.7.0", + "uuid 1.8.0", ] [[package]] @@ -1635,7 +1635,7 @@ dependencies = [ "serde", "serde_json", "slog", - "uuid 1.7.0", + "uuid 1.8.0", ] [[package]] @@ -1813,7 +1813,7 @@ dependencies = [ "pq-sys", "r2d2", "serde_json", - "uuid 1.7.0", + "uuid 1.8.0", ] [[package]] @@ -1824,7 +1824,7 @@ dependencies = [ "diesel", "serde", "usdt 0.5.0", - "uuid 1.7.0", + "uuid 1.8.0", "version_check", ] @@ -1978,7 +1978,7 @@ dependencies = [ "trust-dns-proto", "trust-dns-resolver", "trust-dns-server", - "uuid 1.7.0", + "uuid 1.8.0", ] [[package]] @@ -2056,7 +2056,7 @@ dependencies = [ "serde_json", "slog", "toml 0.8.12", - "uuid 1.7.0", + "uuid 1.8.0", ] [[package]] @@ -2100,7 +2100,7 @@ dependencies = [ "tokio-rustls 0.25.0", "toml 0.8.12", "usdt 0.3.5", - "uuid 1.7.0", + "uuid 1.8.0", "version_check", "waitgroup", ] @@ -2281,7 +2281,7 @@ dependencies = [ "tokio", "toml 0.8.12", "trust-dns-resolver", - "uuid 1.7.0", + "uuid 1.8.0", ] [[package]] @@ -2692,7 +2692,7 @@ dependencies = [ "termios", "tokio", "tokio-tungstenite 0.20.1", - "uuid 1.7.0", + "uuid 1.8.0", ] [[package]] @@ -2710,7 +2710,7 @@ dependencies = [ "serde", "serde_json", "slog", - "uuid 1.7.0", + "uuid 1.8.0", ] [[package]] @@ -2725,7 +2725,7 @@ dependencies = [ "smoltcp 0.9.1", "static_assertions", "strum_macros 0.25.2", - "uuid 1.7.0", + "uuid 1.8.0", "zerocopy 0.6.4", ] @@ -2755,7 +2755,7 @@ dependencies = [ "tlvc 0.3.1 (git+https://github.com/oxidecomputer/tlvc.git?branch=main)", "tokio", "usdt 0.3.5", - "uuid 1.7.0", + "uuid 1.8.0", "version_check", "zip", ] @@ -2773,7 +2773,7 @@ dependencies = [ "slog", "sp-sim", "tokio", - "uuid 1.7.0", + "uuid 1.8.0", ] [[package]] @@ -3515,7 +3515,7 @@ dependencies = [ "thiserror", "tokio", "toml 0.8.12", - "uuid 1.7.0", + "uuid 1.8.0", "whoami", "zone 0.3.0", ] @@ -3634,7 +3634,7 @@ dependencies = [ "tokio-stream", "tufaceous-lib", "update-engine", - "uuid 1.7.0", + "uuid 1.8.0", ] [[package]] @@ -3651,7 +3651,7 @@ dependencies = [ "serde_json", "slog", "update-engine", - "uuid 1.7.0", + "uuid 1.8.0", ] [[package]] @@ -3675,7 +3675,7 @@ dependencies = [ "serde_json", "slog", "subprocess", - "uuid 1.7.0", + "uuid 1.8.0", ] [[package]] @@ -3733,7 +3733,7 @@ dependencies = [ "thiserror", "tokio", "trust-dns-resolver", - "uuid 1.7.0", + "uuid 1.8.0", ] [[package]] @@ -3770,7 +3770,7 @@ dependencies = [ "serde", "test-strategy", "thiserror", - "uuid 1.7.0", + "uuid 1.8.0", ] [[package]] @@ -3965,7 +3965,7 @@ dependencies = [ "libc", "libefi-sys", "thiserror", - "uuid 1.7.0", + "uuid 1.8.0", ] [[package]] @@ -4005,7 +4005,7 @@ dependencies = [ "tokio", "tokio-tungstenite 0.21.0", "toml 0.7.8", - "uuid 1.7.0", + "uuid 1.8.0", "zone 0.1.8", ] @@ -4453,7 +4453,7 @@ checksum = "6a5ff2b31594942586c1520da8f1e5c705729ec67b3c2ad0fe459f0b576e4d9a" dependencies = [ "schemars", "serde", - "uuid 1.7.0", + "uuid 1.8.0", ] [[package]] @@ -4484,7 +4484,7 @@ dependencies = [ "serde", "serde_json", "slog", - "uuid 1.7.0", + "uuid 1.8.0", ] [[package]] @@ -4504,7 +4504,7 @@ dependencies = [ "serde_with", "tokio-postgres", "toml 0.8.12", - "uuid 1.7.0", + "uuid 1.8.0", ] [[package]] @@ -4546,7 +4546,7 @@ dependencies = [ "strum", "thiserror", "tokio", - "uuid 1.7.0", + "uuid 1.8.0", ] [[package]] @@ -4630,7 +4630,7 @@ dependencies = [ "thiserror", "tokio", "usdt 0.5.0", - "uuid 1.7.0", + "uuid 1.8.0", ] [[package]] @@ -4670,7 +4670,7 @@ dependencies = [ "thiserror", "tokio", "typed-rng", - "uuid 1.7.0", + "uuid 1.8.0", ] [[package]] @@ -4707,7 +4707,7 @@ dependencies = [ "slog-error-chain", "thiserror", "tokio", - "uuid 1.7.0", + "uuid 1.8.0", ] [[package]] @@ -4722,7 +4722,7 @@ dependencies = [ "reqwest", "sled-agent-client", "slog", - "uuid 1.7.0", + "uuid 1.8.0", ] [[package]] @@ -4759,7 +4759,7 @@ dependencies = [ "slog", "slog-error-chain", "tokio", - "uuid 1.7.0", + "uuid 1.8.0", ] [[package]] @@ -4786,7 +4786,7 @@ dependencies = [ "slog", "thiserror", "typed-rng", - "uuid 1.7.0", + "uuid 1.8.0", ] [[package]] @@ -4815,7 +4815,7 @@ dependencies = [ "omicron-common", "omicron-workspace-hack", "slog", - "uuid 1.7.0", + "uuid 1.8.0", ] [[package]] @@ -4859,7 +4859,7 @@ dependencies = [ "tokio", "tokio-util", "trust-dns-resolver", - "uuid 1.7.0", + "uuid 1.8.0", ] [[package]] @@ -4900,7 +4900,7 @@ dependencies = [ "strum", "tabled", "thiserror", - "uuid 1.7.0", + "uuid 1.8.0", ] [[package]] @@ -5228,7 +5228,7 @@ dependencies = [ "test-strategy", "thiserror", "tokio", - "uuid 1.7.0", + "uuid 1.8.0", ] [[package]] @@ -5253,7 +5253,7 @@ dependencies = [ "thiserror", "tokio", "toml 0.8.12", - "uuid 1.7.0", + "uuid 1.8.0", ] [[package]] @@ -5330,7 +5330,7 @@ dependencies = [ "tokio-stream", "tokio-tungstenite 0.20.1", "toml 0.8.12", - "uuid 1.7.0", + "uuid 1.8.0", ] [[package]] @@ -5453,7 +5453,7 @@ dependencies = [ "tufaceous", "tufaceous-lib", "update-common", - "uuid 1.7.0", + "uuid 1.8.0", ] [[package]] @@ -5509,7 +5509,7 @@ dependencies = [ "textwrap 0.16.1", "tokio", "unicode-width", - "uuid 1.7.0", + "uuid 1.8.0", ] [[package]] @@ -5661,7 +5661,7 @@ dependencies = [ "tokio-util", "toml 0.8.12", "usdt 0.5.0", - "uuid 1.7.0", + "uuid 1.8.0", "zeroize", "zone 0.3.0", ] @@ -5700,7 +5700,7 @@ dependencies = [ "tokio", "tokio-postgres", "usdt 0.5.0", - "uuid 1.7.0", + "uuid 1.8.0", "walkdir", ] @@ -5822,7 +5822,7 @@ dependencies = [ "unicode-normalization", "usdt 0.3.5", "usdt-impl 0.5.0", - "uuid 1.7.0", + "uuid 1.8.0", "yasna", "zerocopy 0.7.32", "zeroize", @@ -6047,7 +6047,7 @@ dependencies = [ "thiserror", "tokio", "trust-dns-resolver", - "uuid 1.7.0", + "uuid 1.8.0", ] [[package]] @@ -6084,7 +6084,7 @@ dependencies = [ "strum", "thiserror", "trybuild", - "uuid 1.7.0", + "uuid 1.8.0", ] [[package]] @@ -6099,7 +6099,7 @@ dependencies = [ "reqwest", "serde", "slog", - "uuid 1.7.0", + "uuid 1.8.0", ] [[package]] @@ -6139,7 +6139,7 @@ dependencies = [ "thiserror", "tokio", "toml 0.8.12", - "uuid 1.7.0", + "uuid 1.8.0", ] [[package]] @@ -6186,7 +6186,7 @@ dependencies = [ "thiserror", "tokio", "usdt 0.5.0", - "uuid 1.7.0", + "uuid 1.8.0", ] [[package]] @@ -6207,7 +6207,7 @@ dependencies = [ "slog-term", "thiserror", "tokio", - "uuid 1.7.0", + "uuid 1.8.0", ] [[package]] @@ -6238,7 +6238,7 @@ dependencies = [ "slog-dtrace", "thiserror", "tokio", - "uuid 1.7.0", + "uuid 1.8.0", ] [[package]] @@ -6251,7 +6251,7 @@ dependencies = [ "clap 4.5.1", "omicron-workspace-hack", "sigpipe", - "uuid 1.7.0", + "uuid 1.8.0", ] [[package]] @@ -6809,7 +6809,7 @@ dependencies = [ "postgres-protocol", "serde", "serde_json", - "uuid 1.7.0", + "uuid 1.8.0", ] [[package]] @@ -7042,7 +7042,7 @@ dependencies = [ "thiserror", "tokio", "usdt 0.5.0", - "uuid 1.7.0", + "uuid 1.8.0", "viona_api", ] @@ -7064,7 +7064,7 @@ dependencies = [ "thiserror", "tokio", "tokio-tungstenite 0.20.1", - "uuid 1.7.0", + "uuid 1.8.0", ] [[package]] @@ -7085,7 +7085,7 @@ dependencies = [ "thiserror", "tokio", "tokio-tungstenite 0.20.1", - "uuid 1.7.0", + "uuid 1.8.0", ] [[package]] @@ -7115,7 +7115,7 @@ dependencies = [ "thiserror", "tokio", "tokio-tungstenite 0.20.1", - "uuid 1.7.0", + "uuid 1.8.0", ] [[package]] @@ -7452,7 +7452,7 @@ dependencies = [ "swrite", "tabled", "tokio", - "uuid 1.7.0", + "uuid 1.8.0", ] [[package]] @@ -8144,7 +8144,7 @@ dependencies = [ "serde", "thiserror", "url", - "uuid 1.7.0", + "uuid 1.8.0", ] [[package]] @@ -8187,7 +8187,7 @@ dependencies = [ "serde", "serde_json", "uuid 0.8.2", - "uuid 1.7.0", + "uuid 1.8.0", ] [[package]] @@ -8701,7 +8701,7 @@ dependencies = [ "schemars", "serde", "slog", - "uuid 1.7.0", + "uuid 1.8.0", ] [[package]] @@ -8731,7 +8731,7 @@ dependencies = [ "thiserror", "tofino", "tokio", - "uuid 1.7.0", + "uuid 1.8.0", ] [[package]] @@ -8773,7 +8773,7 @@ dependencies = [ "slog", "thiserror", "tokio", - "uuid 1.7.0", + "uuid 1.8.0", ] [[package]] @@ -9149,7 +9149,7 @@ dependencies = [ "slog", "thiserror", "tokio", - "uuid 1.7.0", + "uuid 1.8.0", ] [[package]] @@ -10316,7 +10316,7 @@ dependencies = [ "rand 0.8.5", "rand_core 0.6.4", "rand_seeder", - "uuid 1.7.0", + "uuid 1.8.0", ] [[package]] @@ -10529,7 +10529,7 @@ dependencies = [ "tokio", "tokio-stream", "unicode-width", - "uuid 1.7.0", + "uuid 1.8.0", ] [[package]] @@ -10688,9 +10688,9 @@ checksum = "bc5cf98d8186244414c848017f0e2676b3fcb46807f6668a97dfe67359a3c4b7" [[package]] name = "uuid" -version = "1.7.0" +version = "1.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f00cc9702ca12d3c81455259621e676d0f7251cec66a21e98fe2e9a37db93b2a" +checksum = "a183cf7feeba97b4dd1c0d46788634f6221d87fa961b305bed08c851829efcc0" dependencies = [ "getrandom 0.2.12", "serde", @@ -11093,7 +11093,7 @@ dependencies = [ "tufaceous-lib", "update-common", "update-engine", - "uuid 1.7.0", + "uuid 1.8.0", "wicket", "wicket-common", "wicketd-client", @@ -11115,7 +11115,7 @@ dependencies = [ "serde_json", "slog", "update-engine", - "uuid 1.7.0", + "uuid 1.8.0", "wicket-common", ] diff --git a/Cargo.toml b/Cargo.toml index 6fdea73083..b291d40fd0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -442,7 +442,7 @@ unicode-width = "0.1.11" update-common = { path = "update-common" } update-engine = { path = "update-engine" } usdt = "0.5.0" -uuid = { version = "1.7.0", features = ["serde", "v4"] } +uuid = { version = "1.8.0", features = ["serde", "v4"] } walkdir = "2.5" whoami = "1.5" wicket = { path = "wicket" } diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index 2261eb24df..4fbd218cf4 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -113,7 +113,7 @@ unicode-bidi = { version = "0.3.15" } unicode-normalization = { version = "0.1.22" } usdt = { version = "0.3.5" } usdt-impl = { version = "0.5.0", default-features = false, features = ["asm", "des"] } -uuid = { version = "1.7.0", features = ["serde", "v4"] } +uuid = { version = "1.8.0", features = ["serde", "v4"] } yasna = { version = "0.5.2", features = ["bit-vec", "num-bigint", "std", "time"] } zerocopy = { version = "0.7.32", features = ["derive", "simd"] } zeroize = { version = "1.7.0", features = ["std", "zeroize_derive"] } @@ -220,7 +220,7 @@ unicode-bidi = { version = "0.3.15" } unicode-normalization = { version = "0.1.22" } usdt = { version = "0.3.5" } usdt-impl = { version = "0.5.0", default-features = false, features = ["asm", "des"] } -uuid = { version = "1.7.0", features = ["serde", "v4"] } +uuid = { version = "1.8.0", features = ["serde", "v4"] } yasna = { version = "0.5.2", features = ["bit-vec", "num-bigint", "std", "time"] } zerocopy = { version = "0.7.32", features = ["derive", "simd"] } zeroize = { version = "1.7.0", features = ["std", "zeroize_derive"] } From a9b7bcf52f6875c8ebbf73d9d059f91659ce45d7 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Sat, 6 Apr 2024 01:36:31 +0000 Subject: [PATCH 076/334] chore(deps): update rust crate sqlparser to 0.44.0 (#5412) --- Cargo.lock | 4 ++-- Cargo.toml | 2 +- oximeter/db/src/sql/mod.rs | 5 ++++- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 9c9dfe5364..79ad8bbaa7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -9089,9 +9089,9 @@ dependencies = [ [[package]] name = "sqlparser" -version = "0.43.1" +version = "0.44.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f95c4bae5aba7cd30bd506f7140026ade63cff5afd778af8854026f9606bf5d4" +checksum = "aaf9c7ff146298ffda83a200f8d5084f08dcee1edfc135fcc1d646a45d50ffd6" dependencies = [ "log", "sqlparser_derive", diff --git a/Cargo.toml b/Cargo.toml index b291d40fd0..870727fdff 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -399,7 +399,7 @@ sprockets-common = { git = "http://github.com/oxidecomputer/sprockets", rev = "7 sprockets-host = { git = "http://github.com/oxidecomputer/sprockets", rev = "77df31efa5619d0767ffc837ef7468101608aee9" } sprockets-rot = { git = "http://github.com/oxidecomputer/sprockets", rev = "77df31efa5619d0767ffc837ef7468101608aee9" } sqlformat = "0.2.3" -sqlparser = { version = "0.43.1", features = [ "visitor" ] } +sqlparser = { version = "0.44.0", features = [ "visitor" ] } static_assertions = "1.1.0" # Please do not change the Steno version to a Git dependency. It makes it # harder than expected to make breaking changes (even if you specify a specific diff --git a/oximeter/db/src/sql/mod.rs b/oximeter/db/src/sql/mod.rs index 8a5bd20bde..f3082dcaa5 100644 --- a/oximeter/db/src/sql/mod.rs +++ b/oximeter/db/src/sql/mod.rs @@ -588,10 +588,11 @@ impl RestrictedQuery { having: None, named_window: vec![], qualify: None, + value_table_mode: None, }; let mut query = Self::select_to_query(top_level_select); query.order_by = order_by; - Cte { alias, query, from: None } + Cte { alias, query, from: None, materialized: None } } // Create a SQL parser `Ident` with a the given name. @@ -716,6 +717,7 @@ impl RestrictedQuery { having: None, named_window: vec![], qualify: None, + value_table_mode: None, } } @@ -786,6 +788,7 @@ impl RestrictedQuery { having: None, named_window: vec![], qualify: None, + value_table_mode: None, } } From a01caf9cb8f34d78177720f191e5c335bbf07eee Mon Sep 17 00:00:00 2001 From: bnaecker Date: Fri, 5 Apr 2024 19:41:33 -0700 Subject: [PATCH 077/334] Not an error to filter an empty table (#5444) Fixes #5439 --- oximeter/db/src/oxql/ast/table_ops/filter.rs | 28 ++++++++++++++++---- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/oximeter/db/src/oxql/ast/table_ops/filter.rs b/oximeter/db/src/oxql/ast/table_ops/filter.rs index d363ec58f0..c76a4e713f 100644 --- a/oximeter/db/src/oxql/ast/table_ops/filter.rs +++ b/oximeter/db/src/oxql/ast/table_ops/filter.rs @@ -18,7 +18,6 @@ use crate::oxql::query::special_idents; use crate::oxql::Error; use crate::oxql::Table; use crate::oxql::Timeseries; -use anyhow::Context; use chrono::DateTime; use chrono::Utc; use oximeter::FieldType; @@ -293,10 +292,10 @@ impl Filter { // Ensure that all the identifiers in this filter apply to the // input timeseries. We can do this once at the beginning, because all // the timeseries in a table have the same set of fields. - let first_timeseries = tables[0] - .iter() - .next() - .context("Table contains no timeseries to filter")?; + let Some(first_timeseries) = tables[0].iter().next() else { + // You give nothing, you get nothing. + return Ok(tables.to_vec()); + }; let ident_names = self.ident_names(); // There are extra, implied names that depend on the data type of the @@ -1089,10 +1088,13 @@ impl SimpleFilter { mod tests { use crate::oxql::ast::grammar::query_parser; use crate::oxql::ast::logical_op::LogicalOp; + use crate::oxql::point::DataType; use crate::oxql::point::MetricType; use crate::oxql::point::Points; use crate::oxql::point::ValueArray; use crate::oxql::point::Values; + use crate::oxql::Table; + use crate::oxql::Timeseries; use chrono::Utc; use oximeter::FieldValue; use std::time::Duration; @@ -1280,4 +1282,20 @@ mod tests { "Should fail for extremely deep logical expressions" ); } + + #[test] + fn test_filter_empty_timeseries() { + let ts = Timeseries::new( + std::iter::once((String::from("foo"), FieldValue::U8(0))), + DataType::Double, + MetricType::Gauge, + ) + .unwrap(); + let table = Table::from_timeseries("foo", std::iter::once(ts)).unwrap(); + let filt = query_parser::filter_expr("timestamp > @now()").unwrap(); + assert!( + filt.apply(&[table]).is_ok(), + "It's not an error to filter an empty table" + ); + } } From 88f327d2a093936ffcdf0e27615cfb56a305c08c Mon Sep 17 00:00:00 2001 From: Rain Date: Fri, 5 Apr 2024 19:53:36 -0700 Subject: [PATCH 078/334] [nexus] add BlueprintZoneDisposition::Expunged, make disposition a column in bp_omicron_zone (#5438) This PR (finally) adds a `BlueprintZoneDisposition::Expunged` enum, along with database support for it by making the disposition a column. We also drop the `bp_omicron_zones_not_in_service` table -- we don't bother to check for its contents because we aren't ever setting any zones as quiesced on customer sites currently. I've also included a Diesel extension to allow easily filtering against `BlueprintZoneDisposition`s, along with an example query for it. --- nexus/db-model/src/deployment.rs | 130 ++++++++++++-- nexus/db-model/src/schema.rs | 10 +- nexus/db-model/src/schema_versions.rs | 3 +- .../db-queries/src/db/datastore/deployment.rs | 125 +------------- .../db-queries/src/db/datastore/test_utils.rs | 62 +++++++ nexus/db-queries/src/db/datastore/vpc.rs | 160 +++++++++++++----- nexus/db-queries/src/db/pool_connection.rs | 1 + nexus/types/src/deployment.rs | 10 ++ openapi/nexus-internal.json | 7 + .../crdb/blueprint-disposition-column/up1.sql | 6 + .../crdb/blueprint-disposition-column/up2.sql | 9 + .../crdb/blueprint-disposition-column/up3.sql | 2 + .../crdb/blueprint-disposition-column/up4.sql | 3 + schema/crdb/dbinit.sql | 35 ++-- 14 files changed, 352 insertions(+), 211 deletions(-) create mode 100644 schema/crdb/blueprint-disposition-column/up1.sql create mode 100644 schema/crdb/blueprint-disposition-column/up2.sql create mode 100644 schema/crdb/blueprint-disposition-column/up3.sql create mode 100644 schema/crdb/blueprint-disposition-column/up4.sql diff --git a/nexus/db-model/src/deployment.rs b/nexus/db-model/src/deployment.rs index e56c8bff54..d425f0ac34 100644 --- a/nexus/db-model/src/deployment.rs +++ b/nexus/db-model/src/deployment.rs @@ -8,10 +8,12 @@ use crate::inventory::ZoneType; use crate::omicron_zone_config::{OmicronZone, OmicronZoneNic}; use crate::schema::{ - blueprint, bp_omicron_zone, bp_omicron_zone_nic, - bp_omicron_zones_not_in_service, bp_sled_omicron_zones, bp_target, + blueprint, bp_omicron_zone, bp_omicron_zone_nic, bp_sled_omicron_zones, + bp_target, +}; +use crate::{ + impl_enum_type, ipv6, Generation, MacAddr, Name, SqlU16, SqlU32, SqlU8, }; -use crate::{ipv6, Generation, MacAddr, Name, SqlU16, SqlU32, SqlU8}; use chrono::{DateTime, Utc}; use ipnetwork::IpNetwork; use nexus_types::deployment::BlueprintTarget; @@ -141,15 +143,17 @@ pub struct BpOmicronZone { pub snat_ip: Option, pub snat_first_port: Option, pub snat_last_port: Option, + + disposition: DbBpZoneDisposition, } impl BpOmicronZone { pub fn new( blueprint_id: Uuid, sled_id: Uuid, - zone: &BlueprintZoneConfig, + blueprint_zone: &BlueprintZoneConfig, ) -> Result { - let zone = OmicronZone::new(sled_id, &zone.config)?; + let zone = OmicronZone::new(sled_id, &blueprint_zone.config)?; Ok(Self { blueprint_id, sled_id: zone.sled_id, @@ -172,13 +176,13 @@ impl BpOmicronZone { snat_ip: zone.snat_ip, snat_first_port: zone.snat_first_port, snat_last_port: zone.snat_last_port, + disposition: to_db_bp_zone_disposition(blueprint_zone.disposition), }) } pub fn into_blueprint_zone_config( self, nic_row: Option, - disposition: BlueprintZoneDisposition, ) -> Result { let zone = OmicronZone { sled_id: self.sled_id, @@ -204,7 +208,52 @@ impl BpOmicronZone { }; let config = zone.into_omicron_zone_config(nic_row.map(OmicronZoneNic::from))?; - Ok(BlueprintZoneConfig { config, disposition }) + Ok(BlueprintZoneConfig { config, disposition: self.disposition.into() }) + } +} + +impl_enum_type!( + #[derive(Clone, SqlType, Debug, QueryId)] + #[diesel(postgres_type(name = "bp_zone_disposition", schema = "public"))] + pub struct DbBpZoneDispositionEnum; + + /// This type is not actually public, because [`BlueprintZoneDisposition`] + /// interacts with external logic. + /// + /// However, it must be marked `pub` to avoid errors like `crate-private + /// type `BpZoneDispositionEnum` in public interface`. Marking this type `pub`, + /// without actually making it public, tricks rustc in a desirable way. + #[derive(Clone, Copy, Debug, AsExpression, FromSqlRow, PartialEq)] + #[diesel(sql_type = DbBpZoneDispositionEnum)] + pub enum DbBpZoneDisposition; + + // Enum values + InService => b"in_service" + Quiesced => b"quiesced" + Expunged => b"expunged" +); + +/// Converts a [`BlueprintZoneDisposition`] to a version that can be inserted +/// into a database. +pub fn to_db_bp_zone_disposition( + disposition: BlueprintZoneDisposition, +) -> DbBpZoneDisposition { + match disposition { + BlueprintZoneDisposition::InService => DbBpZoneDisposition::InService, + BlueprintZoneDisposition::Quiesced => DbBpZoneDisposition::Quiesced, + BlueprintZoneDisposition::Expunged => DbBpZoneDisposition::Expunged, + } +} + +impl From for BlueprintZoneDisposition { + fn from(disposition: DbBpZoneDisposition) -> Self { + match disposition { + DbBpZoneDisposition::InService => { + BlueprintZoneDisposition::InService + } + DbBpZoneDisposition::Quiesced => BlueprintZoneDisposition::Quiesced, + DbBpZoneDisposition::Expunged => BlueprintZoneDisposition::Expunged, + } } } @@ -265,12 +314,63 @@ impl BpOmicronZoneNic { } } -/// Nexus wants to think in terms of "zones in service", but since most zones of -/// most blueprints are in service, we store the zones NOT in service in the -/// database. We handle that inversion internally in the db-queries layer. -#[derive(Queryable, Clone, Debug, Selectable, Insertable)] -#[diesel(table_name = bp_omicron_zones_not_in_service)] -pub struct BpOmicronZoneNotInService { - pub blueprint_id: Uuid, - pub bp_omicron_zone_id: Uuid, +mod diesel_util { + use crate::{ + schema::bp_omicron_zone::disposition, to_db_bp_zone_disposition, + DbBpZoneDisposition, + }; + use diesel::{ + helper_types::EqAny, prelude::*, query_dsl::methods::FilterDsl, + }; + use nexus_types::deployment::{ + BlueprintZoneDisposition, BlueprintZoneFilter, + }; + + /// An extension trait to apply a [`BlueprintZoneFilter`] to a Diesel + /// expression. + /// + /// This is applicable to any Diesel expression which includes the + /// `bp_omicron_zone` table. + /// + /// This needs to live here, rather than in `nexus-db-queries`, because it + /// names the `DbBpZoneDisposition` type which is private to this crate. + pub trait ApplyBlueprintZoneFilterExt { + type Output; + + /// Applies a [`BlueprintZoneFilter`] to a Diesel expression. + fn blueprint_zone_filter( + self, + filter: BlueprintZoneFilter, + ) -> Self::Output; + } + + impl ApplyBlueprintZoneFilterExt for E + where + E: FilterDsl, + { + type Output = E::Output; + + fn blueprint_zone_filter( + self, + filter: BlueprintZoneFilter, + ) -> Self::Output { + // This is only boxed for ease of reference above. + let all_matching_dispositions: BoxedIterator = + Box::new( + BlueprintZoneDisposition::all_matching(filter) + .map(to_db_bp_zone_disposition), + ); + + FilterDsl::filter( + self, + disposition.eq_any(all_matching_dispositions), + ) + } + } + + type BoxedIterator = Box>; + type BlueprintZoneFilterQuery = + EqAny>; } + +pub use diesel_util::ApplyBlueprintZoneFilterExt; diff --git a/nexus/db-model/src/schema.rs b/nexus/db-model/src/schema.rs index a5b217d222..64ddca2c34 100644 --- a/nexus/db-model/src/schema.rs +++ b/nexus/db-model/src/schema.rs @@ -391,6 +391,7 @@ table! { state_generation -> Int8, } } +joinable!(vmm -> sled (sled_id)); table! { sled_instance (id) { @@ -483,6 +484,7 @@ table! { is_primary -> Bool, } } +joinable!(instance_network_interface -> instance (instance_id)); table! { service_network_interface (id) { @@ -1505,6 +1507,7 @@ table! { snat_ip -> Nullable, snat_first_port -> Nullable, snat_last_port -> Nullable, + disposition -> crate::DbBpZoneDispositionEnum, } } @@ -1522,13 +1525,6 @@ table! { } } -table! { - bp_omicron_zones_not_in_service (blueprint_id, bp_omicron_zone_id) { - blueprint_id -> Uuid, - bp_omicron_zone_id -> Uuid, - } -} - table! { bootstore_keys (key, generation) { key -> Text, diff --git a/nexus/db-model/src/schema_versions.rs b/nexus/db-model/src/schema_versions.rs index 853db4195a..362333c442 100644 --- a/nexus/db-model/src/schema_versions.rs +++ b/nexus/db-model/src/schema_versions.rs @@ -17,7 +17,7 @@ use std::collections::BTreeMap; /// /// This must be updated when you change the database schema. Refer to /// schema/crdb/README.adoc in the root of this repository for details. -pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(50, 0, 0); +pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(51, 0, 0); /// List of all past database schema versions, in *reverse* order /// @@ -29,6 +29,7 @@ static KNOWN_VERSIONS: Lazy> = Lazy::new(|| { // | leaving the first copy as an example for the next person. // v // KnownVersion::new(next_int, "unique-dirname-with-the-sql-files"), + KnownVersion::new(51, "blueprint-disposition-column"), KnownVersion::new(50, "add-lookup-disk-by-volume-id-index"), KnownVersion::new(49, "physical-disk-state-and-policy"), KnownVersion::new(48, "add-metrics-producers-time-modified-index"), diff --git a/nexus/db-queries/src/db/datastore/deployment.rs b/nexus/db-queries/src/db/datastore/deployment.rs index d956dffb31..38899050c6 100644 --- a/nexus/db-queries/src/db/datastore/deployment.rs +++ b/nexus/db-queries/src/db/datastore/deployment.rs @@ -37,14 +37,11 @@ use diesel::RunQueryDsl; use nexus_db_model::Blueprint as DbBlueprint; use nexus_db_model::BpOmicronZone; use nexus_db_model::BpOmicronZoneNic; -use nexus_db_model::BpOmicronZoneNotInService; use nexus_db_model::BpSledOmicronZones; use nexus_db_model::BpTarget; use nexus_types::deployment::Blueprint; use nexus_types::deployment::BlueprintMetadata; use nexus_types::deployment::BlueprintTarget; -use nexus_types::deployment::BlueprintZoneDisposition; -use nexus_types::deployment::BlueprintZoneFilter; use nexus_types::deployment::BlueprintZonesConfig; use omicron_common::api::external::DataPageParams; use omicron_common::api::external::Error; @@ -53,7 +50,6 @@ use omicron_common::api::external::LookupType; use omicron_common::api::external::ResourceType; use omicron_common::bail_unless; use std::collections::BTreeMap; -use std::collections::BTreeSet; use uuid::Uuid; impl DataStore { @@ -108,36 +104,6 @@ impl DataStore { let row_blueprint = DbBlueprint::from(blueprint); let blueprint_id = row_blueprint.id; - // `Blueprint` stores the policy for each zone next to the zone itself. - // This would ideally be represented as a simple column in - // bp_omicron_zone. - // - // But historically, `Blueprint` used to store the set of zones in - // service in a BTreeSet. Since most zones are expected to be in - // service, we store the set of zones NOT in service (which we expect - // to be much smaller, often empty). Build that inverted set here. - // - // This will soon be replaced with an extra column in the - // `bp_omicron_zone` table, coupled with other data migrations. - let omicron_zones_not_in_service = blueprint - .all_blueprint_zones(BlueprintZoneFilter::All) - .filter_map(|(_, zone)| { - // This is going to go away soon when we change the database - // representation to store the zone disposition enum next to - // each zone. For now, do an exhaustive match so that this - // fails if we add a new variant. - match zone.disposition { - BlueprintZoneDisposition::InService => None, - BlueprintZoneDisposition::Quiesced => { - Some(BpOmicronZoneNotInService { - blueprint_id, - bp_omicron_zone_id: zone.config.id, - }) - } - } - }) - .collect::>(); - let sled_omicron_zones = blueprint .blueprint_zones .iter() @@ -218,15 +184,6 @@ impl DataStore { .await?; } - { - use db::schema::bp_omicron_zones_not_in_service::dsl; - let _ = - diesel::insert_into(dsl::bp_omicron_zones_not_in_service) - .values(omicron_zones_not_in_service) - .execute_async(&conn) - .await?; - } - Ok(()) }) .await @@ -369,45 +326,6 @@ impl DataStore { omicron_zone_nics }; - // Load the list of not-in-service zones. Similar to NICs, we'll use a - // mutable set of zone IDs so we can tell if a zone we expected to be - // inactive wasn't present in the blueprint at all. - let mut omicron_zones_not_in_service = { - use db::schema::bp_omicron_zones_not_in_service::dsl; - - let mut omicron_zones_not_in_service = BTreeSet::new(); - let mut paginator = Paginator::new(SQL_BATCH_SIZE); - while let Some(p) = paginator.next() { - let batch = paginated( - dsl::bp_omicron_zones_not_in_service, - dsl::bp_omicron_zone_id, - &p.current_pagparams(), - ) - .filter(dsl::blueprint_id.eq(blueprint_id)) - .select(BpOmicronZoneNotInService::as_select()) - .load_async(&*conn) - .await - .map_err(|e| { - public_error_from_diesel(e, ErrorHandler::Server) - })?; - - paginator = p.found_batch(&batch, &|z| z.bp_omicron_zone_id); - - for z in batch { - let inserted = omicron_zones_not_in_service - .insert(z.bp_omicron_zone_id); - bail_unless!( - inserted, - "found duplicate zone ID in \ - bp_omicron_zones_not_in_service: {}", - z.bp_omicron_zone_id, - ); - } - } - - omicron_zones_not_in_service - }; - // Load all the zones for each sled. { use db::schema::bp_omicron_zone::dsl; @@ -464,14 +382,8 @@ impl DataStore { )) })?; let zone_id = z.id; - let disposition = - if omicron_zones_not_in_service.remove(&zone_id) { - BlueprintZoneDisposition::Quiesced - } else { - BlueprintZoneDisposition::InService - }; let zone = z - .into_blueprint_zone_config(nic_row, disposition) + .into_blueprint_zone_config(nic_row) .with_context(|| { format!("zone {:?}: parse from database", zone_id) }) @@ -496,11 +408,6 @@ impl DataStore { "found extra Omicron zone NICs: {:?}", omicron_zone_nics.keys() ); - bail_unless!( - omicron_zones_not_in_service.is_empty(), - "found extra Omicron zones not in service: {:?}", - omicron_zones_not_in_service, - ); Ok(Blueprint { id: blueprint_id, @@ -531,13 +438,7 @@ impl DataStore { // collection if we crash while deleting it. let conn = self.pool_connection_authorized(opctx).await?; - let ( - nblueprints, - nsled_agent_zones, - nzones, - nnics, - nzones_not_in_service, - ) = conn + let (nblueprints, nsled_agent_zones, nzones, nnics) = conn .transaction_async(|conn| async move { // Ensure that blueprint we're about to delete is not the // current target. @@ -604,23 +505,7 @@ impl DataStore { .await? }; - let nzones_not_in_service = { - use db::schema::bp_omicron_zones_not_in_service::dsl; - diesel::delete( - dsl::bp_omicron_zones_not_in_service - .filter(dsl::blueprint_id.eq(blueprint_id)), - ) - .execute_async(&conn) - .await? - }; - - Ok(( - nblueprints, - nsled_agent_zones, - nzones, - nnics, - nzones_not_in_service, - )) + Ok((nblueprints, nsled_agent_zones, nzones, nnics)) }) .await .map_err(|error| match error { @@ -636,7 +521,6 @@ impl DataStore { "nsled_agent_zones" => nsled_agent_zones, "nzones" => nzones, "nnics" => nnics, - "nzones_not_in_service" => nzones_not_in_service, ); Ok(()) @@ -1193,6 +1077,8 @@ mod tests { use nexus_reconfigurator_planning::blueprint_builder::BlueprintBuilder; use nexus_reconfigurator_planning::blueprint_builder::Ensure; use nexus_test_utils::db::test_setup_database; + use nexus_types::deployment::BlueprintZoneDisposition; + use nexus_types::deployment::BlueprintZoneFilter; use nexus_types::deployment::PlanningInput; use nexus_types::deployment::PlanningInputBuilder; use nexus_types::deployment::Policy; @@ -1242,7 +1128,6 @@ mod tests { query_count!(blueprint, id), query_count!(bp_omicron_zone, blueprint_id), query_count!(bp_omicron_zone_nic, blueprint_id), - query_count!(bp_omicron_zones_not_in_service, blueprint_id), ] { let count: i64 = result.unwrap(); assert_eq!( diff --git a/nexus/db-queries/src/db/datastore/test_utils.rs b/nexus/db-queries/src/db/datastore/test_utils.rs index a678bccd49..13b0a017e7 100644 --- a/nexus/db-queries/src/db/datastore/test_utils.rs +++ b/nexus/db-queries/src/db/datastore/test_utils.rs @@ -14,6 +14,7 @@ use anyhow::ensure; use anyhow::Context; use anyhow::Result; use dropshot::test_util::LogContext; +use futures::future::try_join_all; use nexus_db_model::SledState; use nexus_types::external_api::views::SledPolicy; use nexus_types::external_api::views::SledProvisionPolicy; @@ -190,6 +191,67 @@ impl IneligibleSleds { Ok(()) } + + /// Brings all of the sleds back to being in-service and provisionable. + /// + /// This is never going to happen in production, but it's easier to do this + /// in many tests than to set up a new set of sleds. + /// + /// Note: there's no memory of the previous state stored here -- this just + /// resets the sleds to the default state. + pub async fn undo( + &self, + opctx: &OpContext, + datastore: &DataStore, + ) -> Result<()> { + async fn undo_single( + opctx: &OpContext, + datastore: &DataStore, + sled_id: Uuid, + kind: IneligibleSledKind, + ) -> Result<()> { + sled_set_policy( + &opctx, + &datastore, + sled_id, + SledPolicy::provisionable(), + ValidateTransition::No, + Expected::Ignore, + ) + .await + .with_context(|| { + format!( + "failed to set provisionable policy for sled {} ({:?})", + sled_id, kind, + ) + })?; + + sled_set_state( + &opctx, + &datastore, + sled_id, + SledState::Active, + ValidateTransition::No, + Expected::Ignore, + ) + .await + .with_context(|| { + format!( + "failed to set active state for sled {} ({:?})", + sled_id, kind, + ) + })?; + + Ok(()) + } + + _ = try_join_all(self.iter().map(|(kind, sled_id)| { + undo_single(opctx, datastore, sled_id, kind) + })) + .await?; + + Ok(()) + } } pub(super) async fn sled_set_policy( diff --git a/nexus/db-queries/src/db/datastore/vpc.rs b/nexus/db-queries/src/db/datastore/vpc.rs index d73b71a09b..1651719f7e 100644 --- a/nexus/db-queries/src/db/datastore/vpc.rs +++ b/nexus/db-queries/src/db/datastore/vpc.rs @@ -14,6 +14,7 @@ use crate::db::error::public_error_from_diesel; use crate::db::error::ErrorHandler; use crate::db::fixed_data::vpc::SERVICES_VPC_ID; use crate::db::identity::Resource; +use crate::db::model::ApplyBlueprintZoneFilterExt; use crate::db::model::ApplySledFilterExt; use crate::db::model::IncompleteVpc; use crate::db::model::InstanceNetworkInterface; @@ -44,7 +45,6 @@ use diesel::prelude::*; use diesel::result::DatabaseErrorKind; use diesel::result::Error as DieselError; use ipnetwork::IpNetwork; -use nexus_types::deployment::BlueprintZoneDisposition; use nexus_types::deployment::BlueprintZoneFilter; use nexus_types::deployment::SledFilter; use omicron_common::api::external::http_pagination::PaginatedBy; @@ -665,11 +665,7 @@ impl DataStore { ); let instance_query = instance_network_interface::table - .inner_join( - instance::table - .on(instance::id - .eq(instance_network_interface::instance_id)), - ) + .inner_join(instance::table) .inner_join( vmm::table .on(vmm::id.nullable().eq(instance::active_propolis_id)), @@ -699,43 +695,6 @@ impl DataStore { // ... and we also need to query for the current target blueprint to // support systems that _are_ under Reconfigurator control. - { - // Ideally this would do something like: - // - // .filter(bp_omicron_zone::disposition.eq_any( - // BlueprintZoneDisposition::all_matching( - // BlueprintZoneFilter::VpcFirewall, - // ), - // ) - // - // But that doesn't quite work today because we currently don't - // store the disposition enum next to each zone. Instead, this code - // makes its decision to select which sleds to return by just - // ignoring the zones_in_service table today. - // - // The purpose of this otherwise pointless block is to ensure that - // it is correct to ensure that the expressed logic by - // `BlueprintZoneFilter::VpcFirewall` matches the actual - // implementation. It will hopefully soon be replaced with storing - // the disposition in the bp_omicron_zone table and using the - // filter directly. - - let mut matching = BlueprintZoneDisposition::all_matching( - BlueprintZoneFilter::VpcFirewall, - ) - .collect::>(); - matching.sort(); - let mut all = BlueprintZoneDisposition::all_matching( - BlueprintZoneFilter::All, - ) - .collect::>(); - all.sort(); - debug_assert_eq!( - matching, all, - "vpc firewall dispositions should match all dispositions" - ); - } - let reconfig_service_query = service_network_interface::table .inner_join(bp_omicron_zone::table.on( bp_omicron_zone::id.eq(service_network_interface::service_id), @@ -761,6 +720,9 @@ impl DataStore { .limit(1), ), ) + // Filter out services that are expunged and shouldn't be resolved + // here. + .blueprint_zone_filter(BlueprintZoneFilter::VpcFirewall) .filter(service_network_interface::vpc_id.eq(vpc_id)) .filter(service_network_interface::time_deleted.is_null()) .select(Sled::as_select()); @@ -1291,6 +1253,8 @@ mod tests { use crate::db::fixed_data::vpc_subnet::NEXUS_VPC_SUBNET; use crate::db::model::Project; use crate::db::queries::vpc::MAX_VNI_SEARCH_RANGE_SIZE; + use async_bb8_diesel::AsyncConnection; + use async_bb8_diesel::AsyncSimpleConnection; use nexus_config::NUM_INITIAL_RESERVED_IP_ADDRESSES; use nexus_db_model::SledUpdate; use nexus_test_utils::db::test_setup_database; @@ -1937,6 +1901,116 @@ mod tests { // --- + // Bring the sleds marked above back to life. + ineligible + .undo(&opctx, &datastore) + .await + .expect("failed to undo ineligible sleds"); + + // Clear out the service table entirely so we're only testing + // blueprints. (The services table is going to go away soon so this is + // an easy workaround for now.) + { + use db::schema::service::dsl; + + let conn = datastore + .pool_connection_authorized(&opctx) + .await + .expect("getting a connection succeeded"); + conn.transaction_async(|conn| async move { + // Need to do a full table scan for a full delete. + conn.batch_execute_async( + nexus_test_utils::db::ALLOW_FULL_TABLE_SCAN_SQL, + ) + .await + .expect("allowing full table scan succeeded"); + + diesel::delete(dsl::service) + .execute_async(&conn) + .await + .expect("failed to delete services"); + + Ok::<_, DieselError>(()) + }) + .await + .expect("transaction succeed"); + } + + // Make a new blueprint marking one of the zones as quiesced and one as + // expunged. Ensure that the sled with *quiesced* zone is returned by + // vpc_resolve_to_sleds, but the sled with the *expunged* zone is not. + // (But other services are still running.) + let bp5_zones = { + let mut zones = BTreeMap::new(); + // Skip over sled index 0 (should be excluded). + let mut iter = harness.blueprint_zone_configs().skip(1); + + // Sled index 1's zone is active (should be included). + let (sled_id, zone_config) = iter.next().unwrap(); + zones.insert( + sled_id, + BlueprintZonesConfig { + generation: Generation::new(), + zones: vec![zone_config], + }, + ); + + // Sled index 2's zone is quiesced (should be included). + let (sled_id, mut zone_config) = iter.next().unwrap(); + zone_config.disposition = BlueprintZoneDisposition::Quiesced; + zones.insert( + sled_id, + BlueprintZonesConfig { + generation: Generation::new(), + zones: vec![zone_config], + }, + ); + + // Sled index 3's zone is expunged (should be excluded). + let (sled_id, mut zone_config) = iter.next().unwrap(); + zone_config.disposition = BlueprintZoneDisposition::Expunged; + zones.insert( + sled_id, + BlueprintZonesConfig { + generation: Generation::new(), + zones: vec![zone_config], + }, + ); + + // Sled index 4's zone is not in the blueprint (should be excluded). + + zones + }; + + let bp5_id = Uuid::new_v4(); + let bp5 = Blueprint { + id: bp5_id, + blueprint_zones: bp5_zones, + parent_blueprint_id: Some(bp4_id), + internal_dns_version: Generation::new(), + external_dns_version: Generation::new(), + time_created: Utc::now(), + creator: "test".to_string(), + comment: "test".to_string(), + }; + + datastore + .blueprint_insert(&opctx, &bp5) + .await + .expect("failed to insert blueprint"); + datastore + .blueprint_target_set_current( + &opctx, + BlueprintTarget { + target_id: bp5_id, + enabled: true, + time_made_target: Utc::now(), + }, + ) + .await + .expect("failed to set blueprint target"); + assert_eq!(&harness.sled_ids[1..=2], fetch_service_sled_ids().await); + db.cleanup().await.unwrap(); logctx.cleanup_successful(); } diff --git a/nexus/db-queries/src/db/pool_connection.rs b/nexus/db-queries/src/db/pool_connection.rs index bb455cbf2d..b3311c540a 100644 --- a/nexus/db-queries/src/db/pool_connection.rs +++ b/nexus/db-queries/src/db/pool_connection.rs @@ -41,6 +41,7 @@ static CUSTOM_TYPE_KEYS: &'static [&'static str] = &[ "authentication_mode", "bfd_mode", "block_size", + "bp_zone_disposition", "caboose_which", "dataset_kind", "dns_group", diff --git a/nexus/types/src/deployment.rs b/nexus/types/src/deployment.rs index 26d213c53e..b973c0b6c8 100644 --- a/nexus/types/src/deployment.rs +++ b/nexus/types/src/deployment.rs @@ -381,6 +381,9 @@ pub enum BlueprintZoneDisposition { /// The zone is not in service. Quiesced, + + /// The zone is permanently gone. + Expunged, } impl BlueprintZoneDisposition { @@ -413,6 +416,12 @@ impl BlueprintZoneDisposition { // Quiesced zones should get firewall rules. BlueprintZoneFilter::VpcFirewall => true, }, + Self::Expunged => match filter { + BlueprintZoneFilter::All => true, + BlueprintZoneFilter::InternalDns => false, + BlueprintZoneFilter::SledAgentPut => false, + BlueprintZoneFilter::VpcFirewall => false, + }, } } @@ -431,6 +440,7 @@ impl fmt::Display for BlueprintZoneDisposition { // and alignment (used above), but this does. BlueprintZoneDisposition::InService => "in service".fmt(f), BlueprintZoneDisposition::Quiesced => "quiesced".fmt(f), + BlueprintZoneDisposition::Expunged => "expunged".fmt(f), } } } diff --git a/openapi/nexus-internal.json b/openapi/nexus-internal.json index 28d82976c7..dd8238fbf6 100644 --- a/openapi/nexus-internal.json +++ b/openapi/nexus-internal.json @@ -2787,6 +2787,13 @@ "enum": [ "quiesced" ] + }, + { + "description": "The zone is permanently gone.", + "type": "string", + "enum": [ + "expunged" + ] } ] }, diff --git a/schema/crdb/blueprint-disposition-column/up1.sql b/schema/crdb/blueprint-disposition-column/up1.sql new file mode 100644 index 0000000000..6426d80142 --- /dev/null +++ b/schema/crdb/blueprint-disposition-column/up1.sql @@ -0,0 +1,6 @@ +-- Add the disposition enum. +CREATE TYPE IF NOT EXISTS omicron.public.bp_zone_disposition AS ENUM ( + 'in_service', + 'quiesced', + 'expunged' +); diff --git a/schema/crdb/blueprint-disposition-column/up2.sql b/schema/crdb/blueprint-disposition-column/up2.sql new file mode 100644 index 0000000000..cf5ae38a4d --- /dev/null +++ b/schema/crdb/blueprint-disposition-column/up2.sql @@ -0,0 +1,9 @@ +-- Add the disposition column to the bp_omicron_zone table. +ALTER TABLE omicron.public.bp_omicron_zone + ADD COLUMN IF NOT EXISTS disposition omicron.public.bp_zone_disposition + NOT NULL + -- The only currently-representable zones are in-service and quiesced + -- (represented by bp_omicron_zones_not_in_service, which we're going to + -- drop in the next statement). We don't actually have any quiesced zones + -- yet, so it's fine to just do this. + DEFAULT 'in_service'; diff --git a/schema/crdb/blueprint-disposition-column/up3.sql b/schema/crdb/blueprint-disposition-column/up3.sql new file mode 100644 index 0000000000..8848a5f2e6 --- /dev/null +++ b/schema/crdb/blueprint-disposition-column/up3.sql @@ -0,0 +1,2 @@ +-- Drop the not-in-service table. +DROP TABLE IF EXISTS omicron.public.bp_omicron_zones_not_in_service; diff --git a/schema/crdb/blueprint-disposition-column/up4.sql b/schema/crdb/blueprint-disposition-column/up4.sql new file mode 100644 index 0000000000..e3b2cd9948 --- /dev/null +++ b/schema/crdb/blueprint-disposition-column/up4.sql @@ -0,0 +1,3 @@ +-- Drop the default for the disposition now that in_service is set. +ALTER TABLE omicron.public.bp_omicron_zone + ALTER COLUMN disposition DROP DEFAULT; diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index 9f28efbd16..1fb1c6f3f3 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -3196,18 +3196,14 @@ CREATE TABLE IF NOT EXISTS omicron.public.inv_omicron_zone_nic ( * `bp_sled_omicron_zones`, `bp_omicron_zone`, and `bp_omicron_zone_nic` are * nearly identical to their `inv_*` counterparts, and record the * `OmicronZonesConfig` for each sled. - * - * `bp_omicron_zones_not_in_service` stores a list of Omicron zones (present in - * `bp_omicron_zone`) that are NOT in service; e.g., should not appear in - * internal DNS. Nexus's in-memory `Blueprint` representation stores the set of - * zones that ARE in service. We invert that logic at this layer because we - * expect most blueprints to have a relatively large number of omicron zones, - * almost all of which will be in service. This is a minor and perhaps - * unnecessary optimization at the database layer, but it's also relatively - * simple and hidden by the relevant read and insert queries in - * `nexus-db-queries`. */ +CREATE TYPE IF NOT EXISTS omicron.public.bp_zone_disposition AS ENUM ( + 'in_service', + 'quiesced', + 'expunged' +); + -- list of all blueprints CREATE TABLE IF NOT EXISTS omicron.public.blueprint ( id UUID PRIMARY KEY, @@ -3337,6 +3333,9 @@ CREATE TABLE IF NOT EXISTS omicron.public.bp_omicron_zone ( snat_last_port INT4 CHECK (snat_last_port IS NULL OR snat_last_port BETWEEN 0 AND 65535), + -- Zone disposition + disposition omicron.public.bp_zone_disposition NOT NULL, + PRIMARY KEY (blueprint_id, id) ); @@ -3354,20 +3353,6 @@ CREATE TABLE IF NOT EXISTS omicron.public.bp_omicron_zone_nic ( PRIMARY KEY (blueprint_id, id) ); --- list of omicron zones that are considered NOT in-service for a blueprint --- --- In Rust code, we generally want to deal with "zones in service", which means --- they should appear in DNS. However, almost all zones in almost all blueprints --- will be in service, so we can induce considerably less database work by --- storing the zones _not_ in service. Our DB wrapper layer handles this --- inversion, so the rest of our Rust code can ignore it. -CREATE TABLE IF NOT EXISTS omicron.public.bp_omicron_zones_not_in_service ( - blueprint_id UUID NOT NULL, - bp_omicron_zone_id UUID NOT NULL, - - PRIMARY KEY (blueprint_id, bp_omicron_zone_id) -); - /*******************************************************************/ /* @@ -3775,7 +3760,7 @@ INSERT INTO omicron.public.db_metadata ( version, target_version ) VALUES - ( TRUE, NOW(), NOW(), '50.0.0', NULL) + ( TRUE, NOW(), NOW(), '51.0.0', NULL) ON CONFLICT DO NOTHING; COMMIT; From f3c87ffe6b8f204abd28714d46d11c31822a1df8 Mon Sep 17 00:00:00 2001 From: Rain Date: Fri, 5 Apr 2024 20:27:23 -0700 Subject: [PATCH 079/334] [deps] update slog_term to 2.9.1 (#5446) 2.9.0 depends on atty which is deprecated. --- Cargo.lock | 6 +++--- Cargo.toml | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 79ad8bbaa7..0f9bc0ccf5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8890,11 +8890,11 @@ dependencies = [ [[package]] name = "slog-term" -version = "2.9.0" +version = "2.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87d29185c55b7b258b4f120eab00f48557d4d9bc814f41713f449d35b0f8977c" +checksum = "b6e022d0b998abfe5c3782c1f03551a596269450ccd677ea51c56f8b214610e8" dependencies = [ - "atty", + "is-terminal", "slog", "term", "thread_local", diff --git a/Cargo.toml b/Cargo.toml index 870727fdff..0a4f9bab60 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -391,7 +391,7 @@ slog-bunyan = "2.5" slog-dtrace = "0.3" slog-envlogger = "2.2" slog-error-chain = { git = "https://github.com/oxidecomputer/slog-error-chain", branch = "main", features = ["derive"] } -slog-term = "2.9" +slog-term = "2.9.1" smf = "0.2" socket2 = { version = "0.5", features = ["all"] } sp-sim = { path = "sp-sim" } From 8bc9010003673506af2309dd83612bdb427287c1 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Sat, 6 Apr 2024 05:44:21 +0000 Subject: [PATCH 080/334] chore(deps): update taiki-e/install-action digest to e4ef34d (#5447) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR contains the following updates: | Package | Type | Update | Change | |---|---|---|---| | [taiki-e/install-action](https://togithub.com/taiki-e/install-action) | action | digest | [`882330f` -> `e4ef34d`](https://togithub.com/taiki-e/install-action/compare/882330f...e4ef34d) | --- ### Configuration 📅 **Schedule**: Branch creation - "after 8pm,before 6am" in timezone America/Los_Angeles, Automerge - "after 8pm,before 6am" in timezone America/Los_Angeles. 🚦 **Automerge**: Enabled. ♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. 🔕 **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box --- This PR has been generated by [Renovate Bot](https://togithub.com/renovatebot/renovate). Co-authored-by: oxide-renovate[bot] <146848827+oxide-renovate[bot]@users.noreply.github.com> --- .github/workflows/hakari.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/hakari.yml b/.github/workflows/hakari.yml index 5443582cc0..a54321fad6 100644 --- a/.github/workflows/hakari.yml +++ b/.github/workflows/hakari.yml @@ -24,7 +24,7 @@ jobs: with: toolchain: stable - name: Install cargo-hakari - uses: taiki-e/install-action@882330fb2472a0660b5c990be7c5ccd3cbdf3282 # v2 + uses: taiki-e/install-action@e4ef34df890c5af6027f55257634401a93b14dc7 # v2 with: tool: cargo-hakari - name: Check workspace-hack Cargo.toml is up-to-date From 5b95eaa98bc06ccf0868df4b7c82beeb29aac7b5 Mon Sep 17 00:00:00 2001 From: Rain Date: Fri, 5 Apr 2024 23:00:38 -0700 Subject: [PATCH 081/334] [dns-server] tempdir -> camino-tempfile (#5445) tempdir is unmaintained -- tempfile replaces it, and we can use the camino wrapper around it because we're in the camino world anyway. --- Cargo.lock | 64 +------------------------------ Cargo.toml | 1 - dns-server/Cargo.toml | 2 +- dns-server/src/storage.rs | 10 ++--- dns-server/tests/basic_test.rs | 8 ++-- dns-server/tests/commands_test.rs | 7 ++-- 6 files changed, 13 insertions(+), 79 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 0f9bc0ccf5..cdb34e49ce 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1949,6 +1949,7 @@ version = "0.1.0" dependencies = [ "anyhow", "camino", + "camino-tempfile", "chrono", "clap 4.5.1", "dns-service-client", @@ -1969,7 +1970,6 @@ dependencies = [ "slog-envlogger", "slog-term", "subprocess", - "tempdir", "tempfile", "thiserror", "tokio", @@ -2556,12 +2556,6 @@ dependencies = [ "winapi", ] -[[package]] -name = "fuchsia-cprng" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a06f77d526c1a601b7c4cdd98f54b5eaabffc14d5f2f0296febdc7f357c6d3ba" - [[package]] name = "funty" version = "2.0.0" @@ -7236,19 +7230,6 @@ dependencies = [ "nibble_vec", ] -[[package]] -name = "rand" -version = "0.4.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "552840b97013b1a26992c11eac34bdd778e464601a4c2054b5f0bff7c6761293" -dependencies = [ - "fuchsia-cprng", - "libc", - "rand_core 0.3.1", - "rdrand", - "winapi", -] - [[package]] name = "rand" version = "0.7.3" @@ -7293,21 +7274,6 @@ dependencies = [ "rand_core 0.6.4", ] -[[package]] -name = "rand_core" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a6fdeb83b075e8266dcc8762c22776f6877a63111121f5f8c7411e5be7eed4b" -dependencies = [ - "rand_core 0.4.2", -] - -[[package]] -name = "rand_core" -version = "0.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c33a3c44ca05fa6f1807d8e6743f3824e8509beca625669633be0acbdf509dc" - [[package]] name = "rand_core" version = "0.5.1" @@ -7405,15 +7371,6 @@ dependencies = [ "yasna", ] -[[package]] -name = "rdrand" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "678054eb77286b51581ba43620cc911abf02758c91f93f479767aed0f90458b2" -dependencies = [ - "rand_core 0.3.1", -] - [[package]] name = "reconfigurator-cli" version = "0.1.0" @@ -7590,15 +7547,6 @@ version = "1.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c707298afce11da2efef2f600116fa93ffa7a032b5d7b628aa17711ec81383ca" -[[package]] -name = "remove_dir_all" -version = "0.5.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3acd125665422973a33ac9d3dd2df85edad0f4ae9b00dafb1a05e43a9f5ef8e7" -dependencies = [ - "winapi", -] - [[package]] name = "reqwest" version = "0.11.24" @@ -9487,16 +9435,6 @@ dependencies = [ "unicode-ident", ] -[[package]] -name = "tempdir" -version = "0.3.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "15f2b5fb00ccdf689e0149d1b1b3c03fead81c2b37735d812fa8bddbbf41b6d8" -dependencies = [ - "rand 0.4.6", - "remove_dir_all", -] - [[package]] name = "tempfile" version = "3.10.0" diff --git a/Cargo.toml b/Cargo.toml index 0a4f9bab60..bf6ca9ccc9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -413,7 +413,6 @@ libsw = { version = "3.3.1", features = ["tokio"] } syn = { version = "2.0" } tabled = "0.15.0" tar = "0.4" -tempdir = "0.3" tempfile = "3.10" term = "0.7" termios = "0.3" diff --git a/dns-server/Cargo.toml b/dns-server/Cargo.toml index f91cbfafdb..3440ebcfe8 100644 --- a/dns-server/Cargo.toml +++ b/dns-server/Cargo.toml @@ -33,11 +33,11 @@ uuid.workspace = true omicron-workspace-hack.workspace = true [dev-dependencies] +camino-tempfile.workspace = true expectorate.workspace = true omicron-test-utils.workspace = true openapiv3.workspace = true openapi-lint.workspace = true serde_json.workspace = true subprocess.workspace = true -tempdir.workspace = true trust-dns-resolver.workspace = true diff --git a/dns-server/src/storage.rs b/dns-server/src/storage.rs index 270cc500d1..21fb9ebdc6 100644 --- a/dns-server/src/storage.rs +++ b/dns-server/src/storage.rs @@ -783,6 +783,7 @@ mod test { use crate::storage::QueryError; use anyhow::Context; use camino::Utf8PathBuf; + use camino_tempfile::Utf8TempDir; use omicron_test_utils::dev::test_setup_log; use std::collections::BTreeSet; use std::collections::HashMap; @@ -796,7 +797,7 @@ mod test { /// our tests and helps make sure they get cleaned up properly. struct TestContext { logctx: dropshot::test_util::LogContext, - tmpdir: tempdir::TempDir, + tmpdir: Utf8TempDir, store: Store, db: Arc, } @@ -804,12 +805,9 @@ mod test { impl TestContext { fn new(test_name: &str) -> TestContext { let logctx = test_setup_log(test_name); - let tmpdir = tempdir::TempDir::new("dns-server-storage-test") + let tmpdir = Utf8TempDir::with_prefix("dns-server-storage-test") .expect("failed to create tmp directory for test"); - let storage_path = - Utf8PathBuf::from_path_buf(tmpdir.path().to_path_buf()).expect( - "failed to create Utf8PathBuf for test temporary directory", - ); + let storage_path = tmpdir.path().to_path_buf(); let db = Arc::new( sled::open(&storage_path).context("creating db").unwrap(), diff --git a/dns-server/tests/basic_test.rs b/dns-server/tests/basic_test.rs index 98cd1487ab..19666e82c1 100644 --- a/dns-server/tests/basic_test.rs +++ b/dns-server/tests/basic_test.rs @@ -3,6 +3,7 @@ // file, You can obtain one at https://mozilla.org/MPL/2.0/. use anyhow::{Context, Result}; +use camino_tempfile::Utf8TempDir; use dns_service_client::{ types::{DnsConfigParams, DnsConfigZone, DnsRecord, Srv}, Client, @@ -332,7 +333,7 @@ struct TestContext { resolver: TokioAsyncResolver, dns_server: dns_server::dns_server::ServerHandle, dropshot_server: dropshot::HttpServer, - tmp: tempdir::TempDir, + tmp: Utf8TempDir, logctx: LogContext, } @@ -401,7 +402,7 @@ fn test_config( test_name: &str, ) -> Result< ( - tempdir::TempDir, + Utf8TempDir, dns_server::storage::Config, dropshot::ConfigDropshot, LogContext, @@ -409,10 +410,9 @@ fn test_config( anyhow::Error, > { let logctx = test_setup_log(test_name); - let tmp_dir = tempdir::TempDir::new("dns-server-test")?; + let tmp_dir = Utf8TempDir::with_prefix("dns-server-test")?; let mut storage_path = tmp_dir.path().to_path_buf(); storage_path.push("test"); - let storage_path = storage_path.to_str().unwrap().into(); let config_storage = dns_server::storage::Config { storage_path, keep_old_generations: 3 }; let config_dropshot = dropshot::ConfigDropshot { diff --git a/dns-server/tests/commands_test.rs b/dns-server/tests/commands_test.rs index 85a6f0e07e..8c812cb2aa 100644 --- a/dns-server/tests/commands_test.rs +++ b/dns-server/tests/commands_test.rs @@ -2,7 +2,7 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -use camino::Utf8PathBuf; +use camino_tempfile::Utf8TempDir; use dns_server::storage::Store; use omicron_test_utils::dev::test_cmds::assert_exit_code; use omicron_test_utils::dev::test_cmds::path_to_executable; @@ -17,10 +17,9 @@ const CMD_DNSADM: &str = env!("CARGO_BIN_EXE_dnsadm"); async fn test_dnsadm() { // Start a DNS server with some sample data. let logctx = test_setup_log("test_dnsadm"); - let tmpdir = tempdir::TempDir::new("test_dnsadm") + let tmpdir = Utf8TempDir::with_prefix("test_dnsadm") .expect("failed to create tmp directory for test"); - let storage_path = Utf8PathBuf::from_path_buf(tmpdir.path().to_path_buf()) - .expect("failed to create Utf8PathBuf for test temporary directory"); + let storage_path = tmpdir.path().to_path_buf(); let store = Store::new( logctx.log.clone(), From 3b6f7f86277f64c63f0a8ae50060404171b3849b Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Sat, 6 Apr 2024 11:29:56 -0700 Subject: [PATCH 082/334] chore(deps): update rust crate rustyline to v14 (#5451) --- Cargo.lock | 19 +++++++++++++------ Cargo.toml | 2 +- 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index cdb34e49ce..7f3416a2fc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -931,6 +931,12 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +[[package]] +name = "cfg_aliases" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fd16c4719339c4530435d38e511904438d07cce7950afa3718a84ac36c10e89e" + [[package]] name = "chacha20" version = "0.9.1" @@ -4921,12 +4927,13 @@ dependencies = [ [[package]] name = "nix" -version = "0.27.1" +version = "0.28.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2eb04e9c688eff1c89d72b407f168cf79bb9e867a9d3323ed6c01519eb9cc053" +checksum = "ab2156c4fce2f8df6c499cc1c763e4394b7482525bf2a9701c9d79d215f519e4" dependencies = [ "bitflags 2.4.2", "cfg-if", + "cfg_aliases", "libc", ] @@ -8031,9 +8038,9 @@ dependencies = [ [[package]] name = "rustyline" -version = "13.0.0" +version = "14.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "02a2d683a4ac90aeef5b1013933f6d977bd37d51ff3f4dad829d4931a7e6be86" +checksum = "7803e8936da37efd9b6d4478277f4b2b9bb5cdb37a113e8d63222e58da647e63" dependencies = [ "bitflags 2.4.2", "cfg-if", @@ -8043,12 +8050,12 @@ dependencies = [ "libc", "log", "memchr", - "nix 0.27.1", + "nix 0.28.0", "radix_trie", "unicode-segmentation", "unicode-width", "utf8parse", - "winapi", + "windows-sys 0.52.0", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index bf6ca9ccc9..976ee02f9c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -361,7 +361,7 @@ rstest = "0.18.2" rustfmt-wrapper = "0.2" rustls = "0.22.2" rustls-pemfile = "2.1.1" -rustyline = "13.0.0" +rustyline = "14.0.0" samael = { version = "0.0.15", features = ["xmlsec"] } schemars = "0.8.16" secrecy = "0.8.0" From 29404c2d7be4e290762b6d1871b92bf4d0bd802e Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Sun, 7 Apr 2024 18:27:13 -0700 Subject: [PATCH 083/334] chore(deps): update rust crate rustls-pemfile to 2.1.2 (#5452) --- Cargo.lock | 13 +++++++------ Cargo.toml | 2 +- workspace-hack/Cargo.toml | 2 ++ 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 7f3416a2fc..a8869eb9b4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2090,7 +2090,7 @@ dependencies = [ "percent-encoding", "proc-macro2", "rustls 0.22.2", - "rustls-pemfile 2.1.1", + "rustls-pemfile 2.1.2", "schemars", "serde", "serde_json", @@ -5425,7 +5425,7 @@ dependencies = [ "reqwest", "ring 0.17.8", "rustls 0.22.2", - "rustls-pemfile 2.1.1", + "rustls-pemfile 2.1.2", "samael", "schemars", "semver 1.0.22", @@ -5721,6 +5721,7 @@ dependencies = [ "aho-corasick", "anyhow", "base16ct", + "base64 0.22.0", "bit-set", "bit-vec", "bitflags 1.3.2", @@ -7948,7 +7949,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8f1fb85efa936c42c6d5fc28d2629bb51e4b2f4b8a5211e297d599cc5a093792" dependencies = [ "openssl-probe", - "rustls-pemfile 2.1.1", + "rustls-pemfile 2.1.2", "rustls-pki-types", "schannel", "security-framework", @@ -7965,11 +7966,11 @@ dependencies = [ [[package]] name = "rustls-pemfile" -version = "2.1.1" +version = "2.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f48172685e6ff52a556baa527774f61fcaa884f59daf3375c62a3f1cd2549dab" +checksum = "29993a25686778eb88d4189742cd713c9bce943bc54251a33509dc63cbacf73d" dependencies = [ - "base64 0.21.7", + "base64 0.22.0", "rustls-pki-types", ] diff --git a/Cargo.toml b/Cargo.toml index 976ee02f9c..cdb5d8f64c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -360,7 +360,7 @@ rpassword = "7.3.1" rstest = "0.18.2" rustfmt-wrapper = "0.2" rustls = "0.22.2" -rustls-pemfile = "2.1.1" +rustls-pemfile = "2.1.2" rustyline = "14.0.0" samael = { version = "0.0.15", features = ["xmlsec"] } schemars = "0.8.16" diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index 4fbd218cf4..f4845196b8 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -18,6 +18,7 @@ ahash = { version = "0.8.8" } aho-corasick = { version = "1.1.2" } anyhow = { version = "1.0.79", features = ["backtrace"] } base16ct = { version = "0.2.0", default-features = false, features = ["alloc"] } +base64 = { version = "0.22.0" } bit-set = { version = "0.5.3" } bit-vec = { version = "0.6.3" } bitflags-dff4ba8e3ae991db = { package = "bitflags", version = "1.3.2" } @@ -124,6 +125,7 @@ ahash = { version = "0.8.8" } aho-corasick = { version = "1.1.2" } anyhow = { version = "1.0.79", features = ["backtrace"] } base16ct = { version = "0.2.0", default-features = false, features = ["alloc"] } +base64 = { version = "0.22.0" } bit-set = { version = "0.5.3" } bit-vec = { version = "0.6.3" } bitflags-dff4ba8e3ae991db = { package = "bitflags", version = "1.3.2" } From e093f564516ae707d1c6bf063850e49793bb0183 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Mon, 8 Apr 2024 10:09:46 -0700 Subject: [PATCH 084/334] Xtask virtual hardware (#5423) Adds an xtask to create and destroy virtual hardware. For simplicity, and to avoid breaking external repos, this PR does not remove the `./tools/create_virtual_hardware.sh` scripts. However, that is the goal. Depends on https://github.com/oxidecomputer/omicron/pull/5422 Part of https://github.com/oxidecomputer/omicron/issues/3939 Fixes https://github.com/oxidecomputer/omicron/issues/5401 --- Cargo.lock | 1 + dev-tools/xtask/Cargo.toml | 1 + dev-tools/xtask/src/main.rs | 9 + dev-tools/xtask/src/virtual_hardware.rs | 875 +++++++++++++++++++ dev-tools/xtask/src/virtual_hardware_stub.rs | 13 + docs/boundary-services-a-to-z.adoc | 4 +- docs/how-to-run.adoc | 30 +- illumos-utils/src/opte/illumos.rs | 2 +- package/src/target.rs | 2 +- smf/sled-agent/non-gimlet/config.toml | 2 +- tools/install_runner_prerequisites.sh | 2 +- 11 files changed, 917 insertions(+), 24 deletions(-) create mode 100644 dev-tools/xtask/src/virtual_hardware.rs create mode 100644 dev-tools/xtask/src/virtual_hardware_stub.rs diff --git a/Cargo.lock b/Cargo.lock index a8869eb9b4..1dccdbc719 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -11310,6 +11310,7 @@ dependencies = [ "cargo_toml", "clap 4.5.1", "fs-err", + "macaddr", "serde", "swrite", "toml 0.8.12", diff --git a/dev-tools/xtask/Cargo.toml b/dev-tools/xtask/Cargo.toml index 73bfe0b37a..6941406df1 100644 --- a/dev-tools/xtask/Cargo.toml +++ b/dev-tools/xtask/Cargo.toml @@ -10,6 +10,7 @@ camino.workspace = true cargo_toml = "0.19" cargo_metadata = "0.18" clap.workspace = true +macaddr.workspace = true serde.workspace = true toml.workspace = true fs-err.workspace = true diff --git a/dev-tools/xtask/src/main.rs b/dev-tools/xtask/src/main.rs index c0d8a6aa64..dd090943a2 100644 --- a/dev-tools/xtask/src/main.rs +++ b/dev-tools/xtask/src/main.rs @@ -15,6 +15,12 @@ mod clippy; #[cfg(target_os = "illumos")] mod verify_libraries; +#[cfg(target_os = "illumos")] +mod virtual_hardware; +#[cfg(not(target_os = "illumos"))] +#[path = "virtual_hardware_stub.rs"] +mod virtual_hardware; + #[derive(Parser)] #[command(name = "cargo xtask", about = "Workspace-related developer tools")] struct Args { @@ -32,6 +38,8 @@ enum Cmds { /// Verify we are not leaking library bindings outside of intended /// crates VerifyLibraries, + /// Manage virtual hardware + VirtualHardware(virtual_hardware::Args), } fn main() -> Result<()> { @@ -47,6 +55,7 @@ fn main() -> Result<()> { "Library verification is only available on illumos!" ); } + Cmds::VirtualHardware(args) => virtual_hardware::run_cmd(args), } } diff --git a/dev-tools/xtask/src/virtual_hardware.rs b/dev-tools/xtask/src/virtual_hardware.rs new file mode 100644 index 0000000000..c98d350c73 --- /dev/null +++ b/dev-tools/xtask/src/virtual_hardware.rs @@ -0,0 +1,875 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Subcommand: cargo xtask virtual-hardware + +use anyhow::{anyhow, bail, Context, Result}; +use camino::{Utf8Path, Utf8PathBuf}; +use clap::{Parser, Subcommand}; +use macaddr::MacAddr; +use serde::Deserialize; +use std::process::{Command, Output}; +use std::str::FromStr; + +#[derive(Subcommand)] +enum Commands { + /// Create virtual hardware to simulate a Gimlet + Create { + /// The physical link over which Chelsio links are simulated + /// + /// Will be inferred by `dladm show-phys` if unsupplied. + #[clap(long)] + physical_link: Option, + + /// Sets `promisc-filtered` off for the sc0_1 vnic. + /// + /// Won't do anything if unsupplied. + #[clap(long)] + promiscuous_filter_off: bool, + + /// The gateway IP address of your local network + /// + /// Will be inferred via `netstat` if unsupplied. + #[clap(long)] + gateway_ip: Option, + + /// The MAC address of your gateway IP + /// + /// Will be inferred via `arp` if unsupplied. + #[clap(long)] + gateway_mac: Option, + + #[command(flatten)] + pxa: Pxa, + + #[clap(long, default_value = PXA_MAC_DEFAULT)] + pxa_mac: String, + }, + /// Destroy virtual hardware which was initialized with "Create" + Destroy, +} + +/// Describes which objects should be manipulated by these commands. +#[derive(clap::ValueEnum, Clone, Copy, Debug)] +pub enum Scope { + /// Everything (this is the default). + All, + /// Only storage (e.g. vdevs). + Disks, + /// Only networking (e.g. SoftNPU). + Network, +} + +#[derive(clap::Args)] +#[group(multiple = true)] +pub struct Pxa { + /// The first IP address your Oxide cluster can use. + /// + /// Requires `pxa-end`. + #[clap(long = "pxa-start", requires = "end")] + start: Option, + + /// The last IP address your Oxide cluster can use + /// + /// Requires `pxa-start`. + #[clap(long = "pxa-end", requires = "start")] + end: Option, +} + +#[derive(Parser)] +pub struct Args { + #[clap(long, value_enum, default_value_t = Scope::All)] + scope: Scope, + + /// The directory in which virtual devices are stored + #[clap(long, default_value = "/var/tmp")] + vdev_dir: Utf8PathBuf, + + #[command(subcommand)] + command: Commands, +} + +static NO_INSTALL_MARKER: &'static str = "/etc/opt/oxide/NO_INSTALL"; +const GB: u64 = 1 << 30; +const VDEV_SIZE: u64 = 20 * GB; + +const ARP: &'static str = "/usr/sbin/arp"; +const DLADM: &'static str = "/usr/sbin/dladm"; +const IPADM: &'static str = "/usr/sbin/ipadm"; +const MODINFO: &'static str = "/usr/sbin/modinfo"; +const MODUNLOAD: &'static str = "/usr/sbin/modunload"; +const NETSTAT: &'static str = "/usr/bin/netstat"; +const PFEXEC: &'static str = "/usr/bin/pfexec"; +const PING: &'static str = "/usr/sbin/ping"; +const SWAP: &'static str = "/usr/sbin/swap"; +const ZFS: &'static str = "/usr/sbin/zfs"; +const ZLOGIN: &'static str = "/usr/sbin/zlogin"; +const ZPOOL: &'static str = "/usr/sbin/zpool"; +const ZONEADM: &'static str = "/usr/sbin/zoneadm"; + +const SIDECAR_LITE_COMMIT: &'static str = + "e3ea4b495ba0a71801ded0776ae4bbd31df57e26"; +const SOFTNPU_COMMIT: &'static str = "dbab082dfa89da5db5ca2325c257089d2f130092"; +const PXA_MAC_DEFAULT: &'static str = "a8:e1:de:01:70:1d"; + +const PXA_WARNING: &'static str = r#" You have not set up the proxy-ARP environment variables + PXA_START and PXA_END. These variables are necessary to allow + SoftNPU to respond to ARP requests for the portion of the + network you've dedicated to Omicron. + You must either destroy / recreate the Omicron environment + with these variables or run `scadm standalon add-proxy-arp` + in the SoftNPU zone later"#; + +pub fn run_cmd(args: Args) -> Result<()> { + if Utf8Path::new(NO_INSTALL_MARKER).exists() { + bail!("This system has the marker file {NO_INSTALL_MARKER}, aborting"); + } + + let workspace_root = match crate::load_workspace() { + Ok(metadata) => metadata.workspace_root, + Err(_err) => { + let pwd = Utf8PathBuf::try_from(std::env::current_dir()?)?; + eprintln!( + "Couldn't find Cargo.toml, using {pwd} as workspace root" + ); + pwd + } + }; + + let smf_path = "smf/sled-agent/non-gimlet/config.toml"; + let sled_agent_config = workspace_root.join(smf_path); + if !sled_agent_config.exists() { + bail!("Could not find {smf_path}. We need it to configure vdevs"); + } + + let npuzone_path = "out/npuzone/npuzone"; + let npu_zone = workspace_root.join(npuzone_path); + if !npu_zone.exists() { + bail!("Could not find {npuzone_path}. We need it to configure SoftNPU"); + } + + match args.command { + Commands::Create { + physical_link, + promiscuous_filter_off, + gateway_ip, + gateway_mac, + pxa, + pxa_mac, + } => { + let physical_link = if let Some(l) = physical_link { + l + } else { + default_physical_link()? + }; + + println!("creating virtual hardware"); + if matches!(args.scope, Scope::All | Scope::Disks) { + ensure_vdevs(&sled_agent_config, &args.vdev_dir)?; + } + if matches!(args.scope, Scope::All | Scope::Network) { + ensure_simulated_links(&physical_link, promiscuous_filter_off)?; + ensure_softnpu_zone(&npu_zone)?; + initialize_softnpu_zone(gateway_ip, gateway_mac, pxa, pxa_mac)?; + } + println!("created virtual hardware"); + } + Commands::Destroy => { + println!("destroying virtual hardware"); + verify_omicron_uninstalled()?; + demount_backingfs()?; + if matches!(args.scope, Scope::All | Scope::Network) { + unload_xde_driver()?; + remove_softnpu_zone(&npu_zone)?; + remove_vnics()?; + } + if matches!(args.scope, Scope::All | Scope::Disks) { + destroy_vdevs(&sled_agent_config, &args.vdev_dir)?; + } + println!("destroyed virtual hardware"); + } + } + + Ok(()) +} + +fn verify_omicron_uninstalled() -> Result<()> { + let mut cmd = Command::new("svcs"); + cmd.arg("svc:/oxide/sled-agent:default"); + if let Ok(_) = execute(cmd) { + bail!("Omicron is still installed, please run `omicron-package uninstall` first"); + } + Ok(()) +} + +// Some services have their working data overlaid by backing mounts from the +// internal boot disk. Before we can destroy the ZFS pools, we need to unmount +// these. +fn demount_backingfs() -> Result<()> { + const BACKED_SERVICES: &str = "svc:/system/fmd:default"; + println!("Disabling {BACKED_SERVICES}"); + svcadm_temporary_toggle(BACKED_SERVICES, false)?; + for dataset in zfs_list_internal("yes", "noauto")? { + println!("unmounting: {dataset}"); + zfs_umount(&dataset)?; + } + println!("Re-enabling {BACKED_SERVICES}"); + svcadm_temporary_toggle(BACKED_SERVICES, true)?; + Ok(()) +} + +fn unload_xde_driver() -> Result<()> { + let cmd = Command::new(MODINFO); + let output = execute(cmd)?; + + let id = String::from_utf8(output.stdout) + .context("Invalid modinfo output")? + .lines() + .find_map(|line| { + let mut cols = line.trim().splitn(2, ' '); + let id = cols.next()?; + let desc = cols.next()?; + if !desc.contains("xde") { + return None; + } + return Some(id.to_string()); + }); + + let Some(id) = id else { + println!("xde driver already unloaded"); + return Ok(()); + }; + println!("unloading xde driver"); + + let mut cmd = Command::new(PFEXEC); + cmd.arg(MODUNLOAD); + cmd.arg("-i"); + cmd.arg(id); + execute(cmd)?; + Ok(()) +} + +fn remove_softnpu_zone(npu_zone: &Utf8Path) -> Result<()> { + println!("ensuring softnpu zone destroyed"); + let mut cmd = Command::new(PFEXEC); + cmd.arg(npu_zone); + cmd.args([ + "destroy", + "sidecar", + "--omicron-zone", + "--ports", + "sc0_0,tfportrear0_0", + "--ports", + "sc0_1,tfportqsfp0_0", + ]); + execute(cmd)?; + Ok(()) +} + +fn remove_vnics() -> Result<()> { + delete_address("lo0/underlay")?; + delete_interface("sc0_1")?; + delete_vnic("sc0_1")?; + + for i in 0..=1 { + let net = format!("net{i}"); + let sc = format!("sc{i}_0"); + + delete_interface(&net)?; + delete_simnet(&net)?; + delete_simnet(&sc)?; + } + + Ok(()) +} + +fn ensure_simulated_links( + physical_link: &str, + promiscuous_filter_off: bool, +) -> Result<()> { + for i in 0..=1 { + let net = format!("net{i}"); + let sc = format!("sc{i}_0"); + if !simnet_exists(&net) { + create_simnet(&net)?; + create_simnet(&sc)?; + modify_simnet(&sc, &net)?; + set_linkprop(&sc, "mtu", "9000")?; + } + println!("Simnet {net}/{sc} exists"); + } + + let sc = "sc0_1".to_string(); + if !vnic_exists(&sc) { + create_vnic(&sc, physical_link, PXA_MAC_DEFAULT)?; + if promiscuous_filter_off { + set_linkprop(&sc, "promisc-filtered", "off")?; + } + } + println!("Vnic {sc} exists"); + Ok(()) +} + +fn ensure_softnpu_zone(npu_zone: &Utf8Path) -> Result<()> { + let zones = zoneadm_list()?; + if !zones.iter().any(|z| z == "sidecar_softnpu") { + if !npu_zone.exists() { + bail!("npu binary is not installed. Please re-run ./tools/install_prerequisites.sh"); + } + + let mut cmd = Command::new(PFEXEC); + cmd.arg(npu_zone); + cmd.args([ + "create", + "sidecar", + "--omicron-zone", + "--ports", + "sc0_0,tfportrear0_0", + "--ports", + "sc0_1,tfportqsfp0_0", + "--sidecar-lite-commit", + SIDECAR_LITE_COMMIT, + "--softnpu-commit", + SOFTNPU_COMMIT, + ]); + execute(cmd)?; + } + + Ok(()) +} + +fn initialize_softnpu_zone( + gateway_ip: Option, + gateway_mac: Option, + pxa: Pxa, + pxa_mac: String, +) -> Result<()> { + let gateway_ip = match gateway_ip { + Some(ip) => ip, + None => default_gateway_ip()?, + }; + println!("Using {gateway_ip} as gateway ip"); + + let gateway_mac = get_gateway_mac(gateway_mac, &gateway_ip)?.to_string(); + println!("using {gateway_mac} as gateway mac"); + + // Configure upstream network gateway ARP entry + println!("configuring SoftNPU ARP entry"); + run_scadm_command(vec!["add-arp-entry", &gateway_ip, &gateway_mac])?; + + match (pxa.start, pxa.end) { + (Some(start), Some(end)) => { + println!("configuring SoftNPU proxy ARP"); + run_scadm_command(vec!["add-proxy-arp", &start, &end, &pxa_mac])?; + } + _ => { + eprintln!("{PXA_WARNING}"); + } + } + + let output = run_scadm_command(vec!["dump-state"])?; + let stdout = String::from_utf8(output.stdout) + .context("Invalid dump-state output")?; + println!("SoftNPU state:"); + for line in stdout.lines() { + println!(" {line}"); + } + + Ok(()) +} + +fn run_scadm_command(args: Vec<&str>) -> Result { + let mut cmd = Command::new(PFEXEC); + cmd.args([ + ZLOGIN, + "sidecar_softnpu", + "/softnpu/scadm", + "--server", + "/softnpu/server", + "--client", + "/softnpu/client", + "standalone", + ]); + for arg in &args { + cmd.arg(arg); + } + Ok(execute(cmd)?) +} + +fn default_gateway_ip() -> Result { + let mut cmd = Command::new(NETSTAT); + cmd.args(["-rn", "-f", "inet"]); + let output = execute(cmd)?; + + String::from_utf8(output.stdout) + .context("Invalid netstat output")? + .lines() + .find_map(|line| { + let mut columns = line.trim().split_whitespace(); + let dst = columns.next()?; + let gateway = columns.next()?; + + if dst == "default" { + return Some(gateway.to_owned()); + } + None + }) + .ok_or_else(|| anyhow!("No default gateway found")) +} + +fn get_gateway_mac( + gateway_mac: Option, + gateway_ip: &str, +) -> Result { + match gateway_mac { + Some(mac) => Ok(MacAddr::from_str(&mac)?), + None => { + let attempts = 3; + for i in 0..=attempts { + println!( + "Pinging {gateway_ip} and sleeping ({i} / {attempts})" + ); + let mut cmd = Command::new(PING); + cmd.arg(&gateway_ip); + execute(cmd)?; + std::thread::sleep(std::time::Duration::from_secs(1)); + } + + let mut cmd = Command::new(ARP); + cmd.arg("-an"); + let output = execute(cmd)?; + + let mac = String::from_utf8(output.stdout) + .context("Invalid arp output")? + .lines() + .find_map(|line| { + let mut columns = line.trim().split_whitespace().skip(1); + let ip = columns.next()?; + let mac = columns.last()?; + if ip == gateway_ip { + return Some(mac.to_string()); + } + None + }) + .ok_or_else(|| anyhow!("No gateway MAC found"))?; + Ok(MacAddr::from_str(&mac)?) + } + } +} + +/// This is a subset of omicron-sled-agent's "config/Config" structure. +/// +/// We don't depend on it directly to avoid rebuilding whenever the +/// Sled Agent changes, though it's important for us to stay in sync +/// to parse these fields correctly. +#[derive(Clone, Debug, Deserialize)] +struct SledAgentConfig { + /// Optional list of virtual devices to be used as "discovered disks". + pub vdevs: Option>, +} + +impl SledAgentConfig { + fn read(path: &Utf8Path) -> Result { + let config = std::fs::read_to_string(path)?; + Ok(toml::from_str(&config) + .context("Could not parse sled agent config as toml")?) + } +} + +fn ensure_vdevs( + sled_agent_config: &Utf8Path, + vdev_dir: &Utf8Path, +) -> Result<()> { + let config = SledAgentConfig::read(sled_agent_config)?; + + let Some(vdevs) = &config.vdevs else { + bail!("No vdevs found in this configuration"); + }; + + for vdev in vdevs { + let vdev_path = if vdev.is_absolute() { + vdev.to_owned() + } else { + vdev_dir.join(vdev) + }; + + if vdev_path.exists() { + println!("{vdev_path} already exists"); + } else { + println!("creating {vdev_path}"); + let file = std::fs::File::create(&vdev_path)?; + file.set_len(VDEV_SIZE)?; + } + } + Ok(()) +} + +const ZVOL_ROOT: &str = "/dev/zvol/dsk"; + +fn destroy_vdevs( + sled_agent_config: &Utf8Path, + vdev_dir: &Utf8Path, +) -> Result<()> { + let swap_devices = swap_list()?; + let zpools = omicron_zpool_list()?; + + for zpool in &zpools { + println!("destroying: {zpool}"); + // Remove any swap devices that appear used by this zpool + for swap_device in &swap_devices { + if swap_device + .starts_with(Utf8PathBuf::from(ZVOL_ROOT).join(&zpool)) + { + println!("Removing {swap_device} from {zpool}"); + swap_delete(&swap_device)?; + } + } + + // Then remove the zpool itself + zpool_destroy(zpool)?; + println!("destroyed: {zpool}"); + } + + // Remove the vdev files themselves, if they are regular files + let config = SledAgentConfig::read(sled_agent_config)?; + if let Some(vdevs) = &config.vdevs { + for vdev in vdevs { + let vdev_path = if vdev.is_absolute() { + vdev.to_owned() + } else { + vdev_dir.join(vdev) + }; + + if !vdev_path.exists() { + continue; + } + + let metadata = std::fs::metadata(&vdev_path)?; + + if metadata.file_type().is_file() { + std::fs::remove_file(&vdev_path)?; + println!("deleted {vdev_path}"); + } + } + } + + Ok(()) +} + +fn execute(mut cmd: Command) -> Result { + let output = cmd + .output() + .context(format!("Could not start command: {:?}", cmd.get_program()))?; + if !output.status.success() { + let stderr = + String::from_utf8(output.stderr).unwrap_or_else(|_| String::new()); + + bail!( + "{:?} failed: {} (stderr: {stderr})", + cmd.get_program(), + output.status + ) + } + + Ok(output) +} + +// Lists all files used for swap +fn swap_list() -> Result> { + let mut cmd = Command::new(SWAP); + cmd.arg("-l"); + + let output = cmd.output().context(format!("Could not start swap"))?; + if !output.status.success() { + if let Ok(stderr) = String::from_utf8(output.stderr) { + // This is an exceptional case - if there are no swap devices, + // we treat this error case as an "empty result". + if stderr.trim() == "No swap devices configured" { + return Ok(vec![]); + } + eprint!("{}", stderr); + } + bail!("swap failed: {}", output.status); + } + + Ok(String::from_utf8(output.stdout) + .context("Invalid swap output")? + .lines() + .skip(1) + .filter_map(|line| { + line.split_whitespace().next().map(|s| Utf8PathBuf::from(s)) + }) + .collect()) +} + +// Deletes a specific swap file +fn swap_delete(file: &Utf8Path) -> Result<()> { + let mut cmd = Command::new(PFEXEC); + cmd.arg(SWAP); + cmd.arg("-d"); + cmd.arg(file); + execute(cmd)?; + Ok(()) +} + +static ZPOOL_PREFIXES: [&'static str; 2] = ["oxp_", "oxi_"]; + +// Lists all zpools managed by omicron. +fn omicron_zpool_list() -> Result> { + let mut cmd = Command::new(ZPOOL); + cmd.args(["list", "-Hpo", "name"]); + let output = execute(cmd)?; + + Ok(String::from_utf8(output.stdout) + .context("Invalid zpool list output")? + .lines() + .filter_map(|line| { + let pool = line.trim().to_string(); + if ZPOOL_PREFIXES.iter().any(|pfx| pool.starts_with(pfx)) { + Some(pool) + } else { + None + } + }) + .collect()) +} + +fn svcadm_temporary_toggle(svc: &str, enable: bool) -> Result<()> { + let mut cmd = Command::new(PFEXEC); + cmd.arg("svcadm"); + if enable { + cmd.arg("enable"); + } else { + cmd.arg("disable"); + } + cmd.arg("-st"); + cmd.arg(svc); + execute(cmd)?; + Ok(()) +} + +fn zfs_list_internal(canmount: &str, mounted: &str) -> Result> { + let mut cmd = Command::new(ZFS); + cmd.args(["list", "-rHpo", "name,canmount,mounted"]); + let output = execute(cmd)?; + + Ok(String::from_utf8(output.stdout) + .context("Invalid zfs list output")? + .lines() + .filter_map(|line| { + let mut cols = line.trim().split_whitespace(); + let dataset = cols.next()?; + if !dataset.starts_with("oxi_") { + return None; + } + if canmount != cols.next()? { + return None; + } + if mounted != cols.next()? { + return None; + } + return Some(dataset.to_string()); + }) + .collect()) +} + +fn zfs_umount(dataset: &str) -> Result<()> { + let mut cmd = Command::new(PFEXEC); + cmd.args([ZFS, "umount"]); + cmd.arg(dataset); + execute(cmd)?; + Ok(()) +} + +fn zpool_destroy(pool: &str) -> Result<()> { + let mut cmd = Command::new(PFEXEC); + cmd.args([ZFS, "destroy", "-r"]); + cmd.arg(pool); + execute(cmd)?; + + // This can fail with an "already unmounted" error, which we opt to ignore. + // + // If it was important, then the zpool destroy command should fail below + // anyway. + let mut cmd = Command::new(PFEXEC); + cmd.args([ZFS, "unmount"]); + cmd.arg(pool); + if let Err(err) = execute(cmd) { + eprintln!( + "Failed to unmount {pool}: {err}, attempting to destroy anyway" + ); + } + + let mut cmd = Command::new(PFEXEC); + cmd.args([ZPOOL, "destroy"]); + cmd.arg(pool); + execute(cmd)?; + + Ok(()) +} + +fn delete_address(addr: &str) -> Result<()> { + let mut cmd = Command::new(PFEXEC); + cmd.arg(IPADM); + cmd.arg("delete-addr"); + cmd.arg(addr); + + let output = cmd.output().context("Failed to start ipadm")?; + if !output.status.success() { + let stderr = String::from_utf8(output.stderr)?; + if stderr.contains("Object not found") { + return Ok(()); + } + bail!("ipadm delete-addr failed: {} (stderr: {stderr})", output.status); + } + + Ok(()) +} + +fn delete_interface(iface: &str) -> Result<()> { + let mut cmd = Command::new(PFEXEC); + cmd.arg(IPADM); + cmd.arg("delete-if"); + cmd.arg(iface); + + let output = cmd.output().context("Failed to start ipadm")?; + if !output.status.success() { + let stderr = String::from_utf8(output.stderr)?; + if stderr.contains("Interface does not exist") { + return Ok(()); + } + bail!("ipadm delete-if failed: {} (stderr: {stderr})", output.status); + } + + Ok(()) +} + +fn delete_vnic(vnic: &str) -> Result<()> { + let mut cmd = Command::new(PFEXEC); + cmd.arg(DLADM); + cmd.arg("delete-vnic"); + cmd.arg(vnic); + + let output = cmd.output().context("Failed to start dladm")?; + if !output.status.success() { + let stderr = String::from_utf8(output.stderr)?; + if stderr.contains("invalid link name") { + return Ok(()); + } + bail!("dladm delete-vnic failed: {} (stderr: {stderr})", output.status); + } + + Ok(()) +} + +fn delete_simnet(simnet: &str) -> Result<()> { + let mut cmd = Command::new(PFEXEC); + cmd.arg(DLADM); + cmd.arg("delete-simnet"); + cmd.arg("-t"); + cmd.arg(simnet); + + let output = cmd.output().context("Failed to start dladm")?; + if !output.status.success() { + let stderr = String::from_utf8(output.stderr)?; + if stderr.contains("not found") { + return Ok(()); + } + bail!( + "dleadm delete-simnet failed: {} (stderr: {stderr})", + output.status + ); + } + + Ok(()) +} + +fn default_physical_link() -> Result { + let mut cmd = Command::new(DLADM); + cmd.args(["show-phys", "-p", "-o", "LINK"]); + let output = execute(cmd)?; + + Ok(String::from_utf8(output.stdout) + .context("Invalid dladm output")? + .lines() + .next() + .ok_or_else(|| anyhow!("Empty dladm output"))? + .to_string()) +} + +// Returns "true" if the VNIC exists. +// +// Returns false if it does not exist, or if we cannot tell. +fn vnic_exists(vnic: &str) -> bool { + let mut cmd = Command::new(DLADM); + cmd.args(["show-vnic", "-p", "-o", "LINK"]); + cmd.arg(vnic); + match execute(cmd) { + Ok(_) => true, + Err(_) => false, + } +} + +fn create_vnic(vnic: &str, physical_link: &str, mac: &str) -> Result<()> { + let mut cmd = Command::new(PFEXEC); + cmd.args([DLADM, "create-vnic", "-t"]); + cmd.arg(vnic); + cmd.arg("-l"); + cmd.arg(physical_link); + cmd.arg("-m"); + cmd.arg(mac); + execute(cmd)?; + Ok(()) +} + +fn create_simnet(simnet: &str) -> Result<()> { + let mut cmd = Command::new(PFEXEC); + cmd.args([DLADM, "create-simnet", "-t"]); + cmd.arg(simnet); + execute(cmd)?; + Ok(()) +} + +fn modify_simnet(simnet: &str, peer: &str) -> Result<()> { + let mut cmd = Command::new(PFEXEC); + cmd.args([DLADM, "modify-simnet", "-t", "-p"]); + cmd.arg(peer); + cmd.arg(simnet); + execute(cmd)?; + Ok(()) +} + +fn set_linkprop(link: &str, key: &str, value: &str) -> Result<()> { + let mut cmd = Command::new(PFEXEC); + cmd.args([DLADM, "set-linkprop", "-p"]); + cmd.arg(format!("{key}={value}")); + cmd.arg(link); + execute(cmd)?; + Ok(()) +} + +// Returns "true" if the simnet exists. +// +// Returns false if it does not exist, or if we cannot tell. +fn simnet_exists(simnet: &str) -> bool { + let mut cmd = Command::new(DLADM); + cmd.args(["show-simnet", "-p", "-o", "LINK"]); + cmd.arg(simnet); + match execute(cmd) { + Ok(_) => true, + Err(_) => false, + } +} + +fn zoneadm_list() -> Result> { + let mut cmd = Command::new(ZONEADM); + cmd.arg("list"); + let output = execute(cmd)?; + + Ok(String::from_utf8(output.stdout) + .context("Invalid zoneadm output")? + .lines() + .map(|line| line.trim().to_owned()) + .collect()) +} diff --git a/dev-tools/xtask/src/virtual_hardware_stub.rs b/dev-tools/xtask/src/virtual_hardware_stub.rs new file mode 100644 index 0000000000..62c0d0b030 --- /dev/null +++ b/dev-tools/xtask/src/virtual_hardware_stub.rs @@ -0,0 +1,13 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use anyhow::{bail, Result}; +use clap::Parser; + +#[derive(Parser)] +pub struct Args {} + +pub fn run_cmd(_args: Args) -> Result<()> { + bail!("Virtual hardware only available on illumos") +} diff --git a/docs/boundary-services-a-to-z.adoc b/docs/boundary-services-a-to-z.adoc index e4c47ac7f9..05b7ab61fb 100644 --- a/docs/boundary-services-a-to-z.adoc +++ b/docs/boundary-services-a-to-z.adoc @@ -7,8 +7,8 @@ The virtual hardware making up SoftNPU is depicted in the diagram below. image::plumbing.png[] -The `softnpu` zone will be configured and launched during the -`create_virtual_hardware.sh` script. +The `softnpu` zone will be configured and launched during +`cargo xtask virtual-hardware create`. Once the control plane is running, `softnpu` can be configured via `dendrite` using the `swadm` binary located in the `oxz_switch` zone. This is not necessary diff --git a/docs/how-to-run.adoc b/docs/how-to-run.adoc index e286fe3730..0cf2bd0899 100644 --- a/docs/how-to-run.adoc +++ b/docs/how-to-run.adoc @@ -199,46 +199,40 @@ The rest of these instructions assume that you're building and running Omicron o The Sled Agent supports operation on both: * a Gimlet (i.e., real Oxide hardware), and -* an ordinary PC that's been set up to look like a Gimlet using the `./tools/create_virtual_hardware.sh` script (described next). +* an ordinary PC running illumos that's been set up to look like a Gimlet using `cargo xtask virtual-hardware create` (described next). This script also sets up a "softnpu" zone to implement Boundary Services. SoftNPU simulates the Tofino device that's used in real systems. Just like Tofino, it can implement sled-to-sled networking, but that's beyond the scope of this doc. -If you're running on a PC and using either of the networking configurations mentioned above, you can usually just run this script with a few environment vaiables set. These environment variables tell SoftNPU about your local network. You will need the gateway for your network as well as the whole range of IPs that you've carved out for the Oxide system (see <<_external_networking>> above): +If you're running on a PC and using either of the networking configurations mentioned above, you can usually just run this script with a few argumnets set. These arguments tell SoftNPU about your local network. You will need the gateway for your network as well as the whole range of IPs that you've carved out for the Oxide system (see <<_external_networking>> above): [source,bash] ---- -export GATEWAY_IP=192.168.1.199 # The gateway IP address for your local network (see above) -export PXA_START=192.168.1.20 # The first IP address your Oxide cluster can use (see above) -export PXA_END=192.168.1.40 # The last IP address your Oxide cluster can use (see above) +cargo xtask virtual-hardware create + --gateway-ip 192.168.1.199 # The gateway IP address for your local network (see above) + --pxa-start 192.168.1.20 # The first IP address your Oxide cluster can use (see above) + --pxa-end 192.168.1.40 # The last IP address your Oxide cluster can use (see above) ---- -If you're using the fake sled-local external network mentioned above, then you'll need to set PHYSICAL_LINK: +If you're using the fake sled-local external network mentioned above, then you'll need to set `--physical-link`: [source,bash] ---- -export PHYSICAL_LINK=fake_external_stub0 # The etherstub for the fake external network + --physical-link fake_external_stub0 # The etherstub for the fake external network ---- If you're using an existing external network, you likely don't need to specify anything here because the script will choose one. You can specify a particular one if you want, though: [source,bash] ---- -export PHYSICAL_LINK=igb0 # The physical link for your external network. + --physical-link igb0 # The physical link for your external network. ---- -Having set those variables, you're ready to run: +If you're running on a bench Gimlet, you may not need (or want) most of what `cargo xtask virtual-hardware create` does, but you do still need SoftNPU. You can tweak what resources are created with the `--scope` flag. -[source,bash] ----- -$ pfexec ./tools/create_virtual_hardware.sh ----- - -If you're running on a Gimlet, you don't need (or want) most of what `create_virtual_hardware.sh` does, but you do still need SoftNPU. You'll have to look at the script and run that part by hand. - -Later, you can clean up the resources created by `create_virtual_hardware.sh` with: +Later, you can clean up the resources created by `cargo xtask virtual-hardware create` with: ---- -$ pfexec ./tools/destroy_virtual_hardware.sh +$ cargo xtask virtual-hardware destroy ---- If you've done all this before and Omicron is still running, these resources will be in use and this script will fail. Uninstall Omicron (see below) before running this script. diff --git a/illumos-utils/src/opte/illumos.rs b/illumos-utils/src/opte/illumos.rs index 527172b976..90bf0bb16a 100644 --- a/illumos-utils/src/opte/illumos.rs +++ b/illumos-utils/src/opte/illumos.rs @@ -92,7 +92,7 @@ pub fn initialize_xde_driver( const MESSAGE: &str = concat!( "There must be at least two underlay NICs for the xde ", "driver to operate. These are currently created by ", - "`./tools/create_virtual_hardware.sh`. Please ensure that ", + "`cargo xtask virtual-hardware create`. Please ensure that ", "script has been run, and that two VNICs named `net{0,1}` ", "exist on the system." ); diff --git a/package/src/target.rs b/package/src/target.rs index d5d5e92c46..589dba7870 100644 --- a/package/src/target.rs +++ b/package/src/target.rs @@ -32,7 +32,7 @@ pub enum Machine { /// Use sled agent configuration for a device emulating a Gimlet /// /// Note that this configuration can actually work on real gimlets, - /// it just relies on the "./tools/create_virtual_hardware.sh" script. + /// it just relies on "cargo xtask virtual-hardware create". NonGimlet, } diff --git a/smf/sled-agent/non-gimlet/config.toml b/smf/sled-agent/non-gimlet/config.toml index 9efdcfbb93..77ca52a647 100644 --- a/smf/sled-agent/non-gimlet/config.toml +++ b/smf/sled-agent/non-gimlet/config.toml @@ -24,7 +24,7 @@ skip_timesync = false # # truncate -s 10GB .vdev # # Note that you'll need to create one such file for each disk below. -# The `create_virtual_hardware.sh` script does this for you. +# `cargo xtask virtual-hardware create` does this for you. # # These paths have the prefix of either "u2" or "m2", followed by an underscore, # followed by a string that is embedded into their fake serial values. diff --git a/tools/install_runner_prerequisites.sh b/tools/install_runner_prerequisites.sh index 2b86c4c5f4..7cf8722447 100755 --- a/tools/install_runner_prerequisites.sh +++ b/tools/install_runner_prerequisites.sh @@ -153,7 +153,7 @@ if [[ "${HOST_OS}" == "SunOS" ]]; then # Grab the SoftNPU machinery (ASIC simulator, scadm, P4 program, etc.) # - # create_virtual_hardware.sh will use those to setup the softnpu zone + # "cargo xtask virtual-hardware create" will use those to setup the softnpu zone retry ./tools/ci_download_softnpu_machinery fi From 5a396488e8342a0d7fdb800fe90dabe881a9cc03 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 8 Apr 2024 19:59:09 +0000 Subject: [PATCH 085/334] Bump h2 from 0.3.24 to 0.3.26 (#5435) --- Cargo.lock | 7 ++++--- workspace-hack/Cargo.toml | 2 ++ 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 1dccdbc719..09b4991449 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2922,9 +2922,9 @@ checksum = "92620684d99f750bae383ecb3be3748142d6095760afd5cbcf2261e9a279d780" [[package]] name = "h2" -version = "0.3.24" +version = "0.3.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bb2c4422095b67ee78da96fbb51a4cc413b3b25883c7717ff7ca1ab31022c9c9" +checksum = "81fe527a889e1532da5c525686d96d4c2e74cdd345badf8dfef9f6b39dd5f5e8" dependencies = [ "bytes", "fnv", @@ -3299,7 +3299,7 @@ dependencies = [ "httpdate", "itoa", "pin-project-lite", - "socket2 0.5.5", + "socket2 0.4.9", "tokio", "tower-service", "tracing", @@ -5803,6 +5803,7 @@ dependencies = [ "sha2", "similar", "slog", + "socket2 0.5.5", "spin 0.9.8", "string_cache", "subtle", diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index f4845196b8..231c4f86ef 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -96,6 +96,7 @@ serde_json = { version = "1.0.115", features = ["raw_value", "unbounded_depth"] sha2 = { version = "0.10.8", features = ["oid"] } similar = { version = "2.4.0", features = ["inline", "unicode"] } slog = { version = "2.7.0", features = ["dynamic-keys", "max_level_trace", "release_max_level_debug", "release_max_level_trace"] } +socket2 = { version = "0.5.5", default-features = false, features = ["all"] } spin = { version = "0.9.8" } string_cache = { version = "0.8.7" } subtle = { version = "2.5.0" } @@ -203,6 +204,7 @@ serde_json = { version = "1.0.115", features = ["raw_value", "unbounded_depth"] sha2 = { version = "0.10.8", features = ["oid"] } similar = { version = "2.4.0", features = ["inline", "unicode"] } slog = { version = "2.7.0", features = ["dynamic-keys", "max_level_trace", "release_max_level_debug", "release_max_level_trace"] } +socket2 = { version = "0.5.5", default-features = false, features = ["all"] } spin = { version = "0.9.8" } string_cache = { version = "0.8.7" } subtle = { version = "2.5.0" } From c03b1cfe66b13c4ca8800a529090e5b6d8ef9755 Mon Sep 17 00:00:00 2001 From: David Crespo Date: Mon, 8 Apr 2024 15:23:45 -0500 Subject: [PATCH 086/334] Bump web console (window.oxql, longer error toast) (#5455) https://github.com/oxidecomputer/console/compare/2a0693f3...7e34c118 * [7e34c118](https://github.com/oxidecomputer/console/commit/7e34c118) fix logging of error messages containing newlines * [238c82bd](https://github.com/oxidecomputer/console/commit/238c82bd) oxidecomputer/console#2132 * [6ae209fb](https://github.com/oxidecomputer/console/commit/6ae209fb) bump omicron to latest (only doc comment changes) * [44515515](https://github.com/oxidecomputer/console/commit/44515515) remove my weird tmux tutorial from the readme * [7383152d](https://github.com/oxidecomputer/console/commit/7383152d) oxidecomputer/console#2131 * [277d8ea0](https://github.com/oxidecomputer/console/commit/277d8ea0) oxql: log timeseries count in the right spot * [d0e90ae1](https://github.com/oxidecomputer/console/commit/d0e90ae1) oxql tweaks * [29b4f879](https://github.com/oxidecomputer/console/commit/29b4f879) oxidecomputer/console#2126 * [fb12f93c](https://github.com/oxidecomputer/console/commit/fb12f93c) regen API client with array response fix * [897eb23d](https://github.com/oxidecomputer/console/commit/897eb23d) oxidecomputer/console#2125 * [a7a11762](https://github.com/oxidecomputer/console/commit/a7a11762) oxql: roll schemasTable into schemas * [b4ef27ce](https://github.com/oxidecomputer/console/commit/b4ef27ce) stick oxql helpers on window.oxql * [c5c8b83d](https://github.com/oxidecomputer/console/commit/c5c8b83d) shrink huge gap under floating IPs page title --- tools/console_version | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/console_version b/tools/console_version index b625290165..97dd5f60c2 100644 --- a/tools/console_version +++ b/tools/console_version @@ -1,2 +1,2 @@ -COMMIT="2a0693f3a5555b6e26130ca5a0e13ec93aa96035" -SHA2="e14f63eec8e4027e72815274deb30082a45888ba6ecaa1d521a1bc053d6239ff" +COMMIT="7e34c118e6e3687c7d2a3931328083a397a06d35" +SHA2="219ee83e8b71bc844203df1ac2cfb3369320c8ad74b393c4229ab8e0d18be8b5" From 534a940c2bd2074331b51440e43ecf517a4162bc Mon Sep 17 00:00:00 2001 From: Rain Date: Mon, 8 Apr 2024 13:59:27 -0700 Subject: [PATCH 087/334] [deps] update zerocopy to 0.6.6 (#5443) Resolves some unsoundness. --- Cargo.lock | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 09b4991449..2c615596ba 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2726,7 +2726,7 @@ dependencies = [ "static_assertions", "strum_macros 0.25.2", "uuid 1.8.0", - "zerocopy 0.6.4", + "zerocopy 0.6.6", ] [[package]] @@ -3272,7 +3272,7 @@ dependencies = [ "tlvc-text", "toml 0.7.8", "x509-cert", - "zerocopy 0.6.4", + "zerocopy 0.6.6", "zip", ] @@ -4175,7 +4175,7 @@ dependencies = [ "sha2", "thiserror", "x509-cert", - "zerocopy 0.6.4", + "zerocopy 0.6.6", ] [[package]] @@ -9677,7 +9677,7 @@ source = "git+https://github.com/oxidecomputer/tlvc.git?branch=main#e644a21a7ca9 dependencies = [ "byteorder", "crc", - "zerocopy 0.6.4", + "zerocopy 0.6.6", ] [[package]] @@ -9687,7 +9687,7 @@ source = "git+https://github.com/oxidecomputer/tlvc.git#e644a21a7ca973ed31499106 dependencies = [ "byteorder", "crc", - "zerocopy 0.6.4", + "zerocopy 0.6.6", ] [[package]] @@ -9698,7 +9698,7 @@ dependencies = [ "ron 0.8.1", "serde", "tlvc 0.3.1 (git+https://github.com/oxidecomputer/tlvc.git)", - "zerocopy 0.6.4", + "zerocopy 0.6.6", ] [[package]] @@ -11346,12 +11346,12 @@ dependencies = [ [[package]] name = "zerocopy" -version = "0.6.4" +version = "0.6.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "20707b61725734c595e840fb3704378a0cd2b9c74cc9e6e20724838fc6a1e2f9" +checksum = "854e949ac82d619ee9a14c66a1b674ac730422372ccb759ce0c39cabcf2bf8e6" dependencies = [ "byteorder", - "zerocopy-derive 0.6.4", + "zerocopy-derive 0.6.6", ] [[package]] @@ -11377,9 +11377,9 @@ dependencies = [ [[package]] name = "zerocopy-derive" -version = "0.6.4" +version = "0.6.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56097d5b91d711293a42be9289403896b68654625021732067eac7a4ca388a1f" +checksum = "125139de3f6b9d625c39e2efdd73d41bdac468ccd556556440e322be0e1bbd91" dependencies = [ "proc-macro2", "quote", From a4eb301c6ed92c89b2dc2f826b997e452ef104b8 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Mon, 8 Apr 2024 16:02:53 -0700 Subject: [PATCH 088/334] chore(deps): update rust to v1.77.1 (#5429) --- bootstore/src/schemes/v0/peer_networking.rs | 8 +++++--- common/src/ledger.rs | 2 +- dev-tools/reconfigurator-cli/tests/test_basic.rs | 7 ++----- nexus/db-queries/src/db/datastore/disk.rs | 4 ++-- nexus/db-queries/src/db/datastore/instance.rs | 2 +- rust-toolchain.toml | 2 +- sled-agent/src/bin/zone-bundle.rs | 12 ++---------- sled-agent/src/zone_bundle.rs | 14 ++------------ sled-storage/src/keyfile.rs | 6 +----- test-utils/src/dev/clickhouse.rs | 2 +- 10 files changed, 18 insertions(+), 41 deletions(-) diff --git a/bootstore/src/schemes/v0/peer_networking.rs b/bootstore/src/schemes/v0/peer_networking.rs index 13afd27fa2..d5e3e3fa71 100644 --- a/bootstore/src/schemes/v0/peer_networking.rs +++ b/bootstore/src/schemes/v0/peer_networking.rs @@ -599,9 +599,11 @@ fn read_frame_size(buf: [u8; FRAME_HEADER_SIZE]) -> usize { #[derive(Debug, From)] enum HandshakeError { - Serialization(ciborium::ser::Error), - Deserialization(ciborium::de::Error), - Io(tokio::io::Error), + // Rust 1.77 warns on tuple variants not being used, but in reality these are + // used for their Debug impl. + Serialization(#[allow(dead_code)] ciborium::ser::Error), + Deserialization(#[allow(dead_code)] ciborium::de::Error), + Io(#[allow(dead_code)] tokio::io::Error), UnsupportedScheme, UnsupportedVersion, Timeout, diff --git a/common/src/ledger.rs b/common/src/ledger.rs index ed5f0b57cf..a52c2441ca 100644 --- a/common/src/ledger.rs +++ b/common/src/ledger.rs @@ -311,7 +311,7 @@ mod test { let log = &logctx.log; // Create the ledger, initialize contents. - let config_dirs = vec![ + let config_dirs = [ camino_tempfile::Utf8TempDir::new().unwrap(), camino_tempfile::Utf8TempDir::new().unwrap(), ]; diff --git a/dev-tools/reconfigurator-cli/tests/test_basic.rs b/dev-tools/reconfigurator-cli/tests/test_basic.rs index 38c6e5a3c5..675fa10fc1 100644 --- a/dev-tools/reconfigurator-cli/tests/test_basic.rs +++ b/dev-tools/reconfigurator-cli/tests/test_basic.rs @@ -247,11 +247,8 @@ fn write_json( path: &Utf8Path, obj: &T, ) -> Result<(), anyhow::Error> { - let file = std::fs::OpenOptions::new() - .write(true) - .create(true) - .open(path) - .with_context(|| format!("open {:?}", path))?; + let file = std::fs::File::create(path) + .with_context(|| format!("create {:?}", path))?; let bufwrite = BufWriter::new(file); serde_json::to_writer_pretty(bufwrite, obj) .with_context(|| format!("write {:?}", path))?; diff --git a/nexus/db-queries/src/db/datastore/disk.rs b/nexus/db-queries/src/db/datastore/disk.rs index 2916573322..2788558a0b 100644 --- a/nexus/db-queries/src/db/datastore/disk.rs +++ b/nexus/db-queries/src/db/datastore/disk.rs @@ -170,7 +170,7 @@ impl DataStore { opctx.authorize(authz::Action::Modify, authz_instance).await?; opctx.authorize(authz::Action::Modify, authz_disk).await?; - let ok_to_attach_disk_states = vec![ + let ok_to_attach_disk_states = [ api::external::DiskState::Creating, api::external::DiskState::Detached, ]; @@ -311,7 +311,7 @@ impl DataStore { opctx.authorize(authz::Action::Modify, authz_disk).await?; let ok_to_detach_disk_states = - vec![api::external::DiskState::Attached(authz_instance.id())]; + [api::external::DiskState::Attached(authz_instance.id())]; let ok_to_detach_disk_state_labels: Vec<_> = ok_to_detach_disk_states.iter().map(|s| s.label()).collect(); diff --git a/nexus/db-queries/src/db/datastore/instance.rs b/nexus/db-queries/src/db/datastore/instance.rs index acea7bb4e3..731f7b4c06 100644 --- a/nexus/db-queries/src/db/datastore/instance.rs +++ b/nexus/db-queries/src/db/datastore/instance.rs @@ -407,7 +407,7 @@ impl DataStore { let detached_label = api::external::DiskState::Detached.label(); let ok_to_detach_disk_states = - vec![api::external::DiskState::Attached(authz_instance.id())]; + [api::external::DiskState::Attached(authz_instance.id())]; let ok_to_detach_disk_state_labels: Vec<_> = ok_to_detach_disk_states.iter().map(|s| s.label()).collect(); diff --git a/rust-toolchain.toml b/rust-toolchain.toml index 2e3f4c137b..f8a9f2db4f 100644 --- a/rust-toolchain.toml +++ b/rust-toolchain.toml @@ -4,5 +4,5 @@ # # We choose a specific toolchain (rather than "stable") for repeatability. The # intent is to keep this up-to-date with recently-released stable Rust. -channel = "1.76.0" +channel = "1.77.1" profile = "default" diff --git a/sled-agent/src/bin/zone-bundle.rs b/sled-agent/src/bin/zone-bundle.rs index d49e22d80a..82433edaf5 100644 --- a/sled-agent/src/bin/zone-bundle.rs +++ b/sled-agent/src/bin/zone-bundle.rs @@ -449,11 +449,7 @@ async fn main() -> anyhow::Result<()> { .await .context("failed to get zone bundle")? .into_inner(); - let mut f = tokio::fs::OpenOptions::new() - .read(true) - .write(true) - .create(true) - .open(&output) + let mut f = tokio::fs::File::create(&output) .await .context("failed to open output file")?; let mut stream = bundle.into_inner(); @@ -654,11 +650,7 @@ async fn main() -> anyhow::Result<()> { } // Open megabundle output file. - let f = tokio::fs::OpenOptions::new() - .read(true) - .write(true) - .create(true) - .open(&output) + let f = tokio::fs::File::create(&output) .await .context("failed to open output file")? .into_std() diff --git a/sled-agent/src/zone_bundle.rs b/sled-agent/src/zone_bundle.rs index 57d3cb1049..16147e5957 100644 --- a/sled-agent/src/zone_bundle.rs +++ b/sled-agent/src/zone_bundle.rs @@ -987,13 +987,7 @@ async fn create( let zone_metadata = ZoneBundleMetadata::new(zone.name(), context.cause); let filename = format!("{}.tar.gz", zone_metadata.id.bundle_id); let full_path = zone_bundle_dirs[0].join(&filename); - let file = match tokio::fs::OpenOptions::new() - .read(true) - .write(true) - .create(true) - .open(&full_path) - .await - { + let file = match tokio::fs::File::create(&full_path).await { Ok(f) => f.into_std().await, Err(e) => { error!( @@ -2676,11 +2670,7 @@ mod illumos_tests { let path = zone_dir.join(format!("{}.tar.gz", metadata.id.bundle_id)); // Create a tarball at the path with this fake metadata. - let file = tokio::fs::OpenOptions::new() - .read(true) - .write(true) - .create(true) - .open(&path) + let file = tokio::fs::File::create(&path) .await .context("failed to open zone bundle path")? .into_std() diff --git a/sled-storage/src/keyfile.rs b/sled-storage/src/keyfile.rs index 2c0524aec7..190dfb9c26 100644 --- a/sled-storage/src/keyfile.rs +++ b/sled-storage/src/keyfile.rs @@ -29,11 +29,7 @@ impl KeyFile { ) -> std::io::Result { info!(log, "About to create keyfile"; "path" => ?path); // We want to overwrite any existing contents. - let mut file = tokio::fs::OpenOptions::new() - .create(true) - .write(true) - .open(&path.0) - .await?; + let mut file = tokio::fs::File::create(&path.0).await?; file.write_all(key).await?; info!(log, "Created keyfile"; "path" => ?path); Ok(KeyFile { diff --git a/test-utils/src/dev/clickhouse.rs b/test-utils/src/dev/clickhouse.rs index 01ba402f62..54483bb433 100644 --- a/test-utils/src/dev/clickhouse.rs +++ b/test-utils/src/dev/clickhouse.rs @@ -418,7 +418,7 @@ impl ClickHouseDataDir { /// /// Removes all files except those in any of the log directories. fn close_unclean(self) -> Result<(), anyhow::Error> { - let keep_prefixes = vec![ + let keep_prefixes = [ self.log_path(), self.err_log_path(), self.keeper_log_path(), From a2cd486ba0d7e1557145e9bd0da9053d43536cc8 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Tue, 9 Apr 2024 00:37:20 +0000 Subject: [PATCH 089/334] chore(deps): update rust crate anyhow to v1.0.81 (#5457) --- Cargo.lock | 4 ++-- workspace-hack/Cargo.toml | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 2c615596ba..053d91bdb3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -160,9 +160,9 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.79" +version = "1.0.81" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "080e9890a082662b09c1ad45f567faeeb47f22b5fb23895fbe1e651e718e25ca" +checksum = "0952808a6c2afd1aa8947271f3a60f1a6763c7b912d210184c5149b5cf147247" dependencies = [ "backtrace", ] diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index 231c4f86ef..218b4ac1e1 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -16,7 +16,7 @@ publish = false [dependencies] ahash = { version = "0.8.8" } aho-corasick = { version = "1.1.2" } -anyhow = { version = "1.0.79", features = ["backtrace"] } +anyhow = { version = "1.0.81", features = ["backtrace"] } base16ct = { version = "0.2.0", default-features = false, features = ["alloc"] } base64 = { version = "0.22.0" } bit-set = { version = "0.5.3" } @@ -124,7 +124,7 @@ zip = { version = "0.6.6", default-features = false, features = ["bzip2", "defla [build-dependencies] ahash = { version = "0.8.8" } aho-corasick = { version = "1.1.2" } -anyhow = { version = "1.0.79", features = ["backtrace"] } +anyhow = { version = "1.0.81", features = ["backtrace"] } base16ct = { version = "0.2.0", default-features = false, features = ["alloc"] } base64 = { version = "0.22.0" } bit-set = { version = "0.5.3" } From 197591f43153b5c5d682f49158f754b602b54fae Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Tue, 9 Apr 2024 04:24:12 +0000 Subject: [PATCH 090/334] chore(deps): update rust crate chrono to v0.4.37 (#5458) --- Cargo.lock | 4 ++-- nexus/db-queries/src/db/collection_insert.rs | 18 +++++------------- workspace-hack/Cargo.toml | 4 ++-- 3 files changed, 9 insertions(+), 17 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 053d91bdb3..f8677f3040 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -963,9 +963,9 @@ dependencies = [ [[package]] name = "chrono" -version = "0.4.34" +version = "0.4.37" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5bc015644b92d5890fab7489e49d21f879d5c990186827d42ec511919404f38b" +checksum = "8a0d04d43504c61aa6c7531f1871dd0d418d91130162063b789da00fd7057a5e" dependencies = [ "android-tzdata", "iana-time-zone", diff --git a/nexus/db-queries/src/db/collection_insert.rs b/nexus/db-queries/src/db/collection_insert.rs index ef2a4a4d48..69906e6498 100644 --- a/nexus/db-queries/src/db/collection_insert.rs +++ b/nexus/db-queries/src/db/collection_insert.rs @@ -409,7 +409,7 @@ mod test { use async_bb8_diesel::{ AsyncRunQueryDsl, AsyncSimpleConnection, ConnectionManager, }; - use chrono::{NaiveDateTime, TimeZone, Utc}; + use chrono::{DateTime, Utc}; use db_macros::Resource; use diesel::expression_methods::ExpressionMethods; use diesel::pg::Pg; @@ -498,12 +498,8 @@ mod test { let resource_id = uuid::Uuid::parse_str("223cb7f7-0d3a-4a4e-a5e1-ad38ecb785d8") .unwrap(); - let create_time = Utc.from_utc_datetime( - &NaiveDateTime::from_timestamp_opt(0, 0).unwrap(), - ); - let modify_time = Utc.from_utc_datetime( - &NaiveDateTime::from_timestamp_opt(1, 0).unwrap(), - ); + let create_time = DateTime::from_timestamp(0, 0).unwrap(); + let modify_time = DateTime::from_timestamp(1, 0).unwrap(); let insert = Collection::insert_resource( collection_id, diesel::insert_into(resource::table).values(vec![( @@ -615,12 +611,8 @@ mod test { .await .unwrap(); - let create_time = Utc.from_utc_datetime( - &NaiveDateTime::from_timestamp_opt(0, 0).unwrap(), - ); - let modify_time = Utc.from_utc_datetime( - &NaiveDateTime::from_timestamp_opt(1, 0).unwrap(), - ); + let create_time = DateTime::from_timestamp(0, 0).unwrap(); + let modify_time = DateTime::from_timestamp(1, 0).unwrap(); let resource = Collection::insert_resource( collection_id, diesel::insert_into(resource::table).values(vec![( diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index 218b4ac1e1..67a9a8af00 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -27,7 +27,7 @@ bstr-6f8ce4dd05d13bba = { package = "bstr", version = "0.2.17" } bstr-dff4ba8e3ae991db = { package = "bstr", version = "1.9.0" } byteorder = { version = "1.5.0" } bytes = { version = "1.6.0", features = ["serde"] } -chrono = { version = "0.4.34", features = ["serde"] } +chrono = { version = "0.4.37", features = ["serde"] } cipher = { version = "0.4.4", default-features = false, features = ["block-padding", "zeroize"] } clap = { version = "4.5.1", features = ["cargo", "derive", "env", "wrap_help"] } clap_builder = { version = "4.5.1", default-features = false, features = ["cargo", "color", "env", "std", "suggestions", "usage", "wrap_help"] } @@ -135,7 +135,7 @@ bstr-6f8ce4dd05d13bba = { package = "bstr", version = "0.2.17" } bstr-dff4ba8e3ae991db = { package = "bstr", version = "1.9.0" } byteorder = { version = "1.5.0" } bytes = { version = "1.6.0", features = ["serde"] } -chrono = { version = "0.4.34", features = ["serde"] } +chrono = { version = "0.4.37", features = ["serde"] } cipher = { version = "0.4.4", default-features = false, features = ["block-padding", "zeroize"] } clap = { version = "4.5.1", features = ["cargo", "derive", "env", "wrap_help"] } clap_builder = { version = "4.5.1", default-features = false, features = ["cargo", "color", "env", "std", "suggestions", "usage", "wrap_help"] } From a0dca2b9966dcb66773b482adc789dfdec9f6caa Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Tue, 9 Apr 2024 04:59:11 +0000 Subject: [PATCH 091/334] chore(deps): update rust crate cookie to v0.18.1 (#5460) --- Cargo.lock | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f8677f3040..3244a40fc5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1195,9 +1195,9 @@ dependencies = [ [[package]] name = "cookie" -version = "0.18.0" +version = "0.18.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3cd91cf61412820176e137621345ee43b3f4423e589e7ae4e50d601d93e35ef8" +checksum = "4ddef33a339a91ea89fb53151bd0a4689cfce27055c291dfa69945475d22c747" dependencies = [ "time", "version_check", @@ -4564,7 +4564,7 @@ dependencies = [ "camino-tempfile", "chrono", "const_format", - "cookie 0.18.0", + "cookie 0.18.1", "db-macros", "diesel", "diesel-dtrace", From d9751eb25d352396b348dc35191ad38d92d57ef3 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Tue, 9 Apr 2024 06:10:36 +0000 Subject: [PATCH 092/334] chore(deps): update rust crate clap to v4.5.4 (#5459) --- Cargo.lock | 78 +++++++++++++++++++-------------------- workspace-hack/Cargo.toml | 8 ++-- 2 files changed, 43 insertions(+), 43 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 3244a40fc5..95ce972641 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1042,9 +1042,9 @@ dependencies = [ [[package]] name = "clap" -version = "4.5.1" +version = "4.5.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c918d541ef2913577a0f9566e9ce27cb35b6df072075769e0b26cb5a554520da" +checksum = "90bc066a67923782aa8515dbaea16946c5bcc5addbd668bb80af688e53e548a0" dependencies = [ "clap_builder", "clap_derive", @@ -1052,9 +1052,9 @@ dependencies = [ [[package]] name = "clap_builder" -version = "4.5.1" +version = "4.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9f3e7391dad68afb0c2ede1bf619f579a3dc9c2ec67f089baa397123a2f3d1eb" +checksum = "ae129e2e766ae0ec03484e609954119f123cc1fe650337e155d03b022f24f7b4" dependencies = [ "anstream", "anstyle", @@ -1065,11 +1065,11 @@ dependencies = [ [[package]] name = "clap_derive" -version = "4.5.0" +version = "4.5.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "307bc0538d5f0f83b8248db3087aa92fe504e4691294d0c96c0eabc33f47ba47" +checksum = "528131438037fd55894f62d6e9f068b8f45ac57ffa77517819645d10aed04f64" dependencies = [ - "heck 0.4.1", + "heck 0.5.0", "proc-macro2", "quote", "syn 2.0.52", @@ -1314,7 +1314,7 @@ dependencies = [ "anes", "cast", "ciborium", - "clap 4.5.1", + "clap 4.5.4", "criterion-plot", "futures", "is-terminal", @@ -1957,7 +1957,7 @@ dependencies = [ "camino", "camino-tempfile", "chrono", - "clap 4.5.1", + "clap 4.5.4", "dns-service-client", "dropshot", "expectorate", @@ -2264,7 +2264,7 @@ dependencies = [ "async-trait", "base64 0.22.0", "chrono", - "clap 4.5.1", + "clap 4.5.4", "colored", "dhcproto", "http 0.2.12", @@ -2677,7 +2677,7 @@ name = "gateway-cli" version = "0.1.0" dependencies = [ "anyhow", - "clap 4.5.1", + "clap 4.5.4", "futures", "gateway-client", "gateway-messages", @@ -3599,7 +3599,7 @@ dependencies = [ "bytes", "camino", "cancel-safe-futures", - "clap 4.5.1", + "clap 4.5.4", "display-error-chain", "futures", "hex", @@ -3660,7 +3660,7 @@ version = "0.1.0" dependencies = [ "anyhow", "async-trait", - "clap 4.5.1", + "clap 4.5.4", "dropshot", "expectorate", "hyper 0.14.28", @@ -3741,7 +3741,7 @@ name = "internal-dns-cli" version = "0.1.0" dependencies = [ "anyhow", - "clap 4.5.1", + "clap 4.5.4", "dropshot", "internal-dns", "omicron-common", @@ -3982,7 +3982,7 @@ dependencies = [ "anstyle", "anyhow", "camino", - "clap 4.5.1", + "clap 4.5.4", "colored", "futures", "libc", @@ -4092,7 +4092,7 @@ version = "0.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6d8de370f98a6cb8a4606618e53e802f93b094ddec0f96988eaec2c27e6e9ce7" dependencies = [ - "clap 4.5.1", + "clap 4.5.4", "termcolor", "threadpool", ] @@ -4148,7 +4148,7 @@ version = "0.2.4" source = "git+https://github.com/oxidecomputer/lpc55_support#96f064eaae5e95930efaab6c29fd1b2e22225dac" dependencies = [ "bitfield", - "clap 4.5.1", + "clap 4.5.4", "packed_struct", "serde", ] @@ -5264,7 +5264,7 @@ dependencies = [ "anyhow", "camino", "camino-tempfile", - "clap 4.5.1", + "clap 4.5.4", "dropshot", "expectorate", "futures", @@ -5298,7 +5298,7 @@ dependencies = [ "anyhow", "base64 0.22.0", "camino", - "clap 4.5.1", + "clap 4.5.4", "dropshot", "expectorate", "futures", @@ -5349,7 +5349,7 @@ dependencies = [ "camino-tempfile", "cancel-safe-futures", "chrono", - "clap 4.5.1", + "clap 4.5.4", "criterion", "crucible-agent-client", "crucible-pantry-client", @@ -5466,7 +5466,7 @@ dependencies = [ "camino", "camino-tempfile", "chrono", - "clap 4.5.1", + "clap 4.5.4", "crossterm", "crucible-agent-client", "csv", @@ -5519,7 +5519,7 @@ version = "0.1.0" dependencies = [ "anyhow", "camino", - "clap 4.5.1", + "clap 4.5.4", "expectorate", "futures", "hex", @@ -5586,7 +5586,7 @@ dependencies = [ "cancel-safe-futures", "cfg-if", "chrono", - "clap 4.5.1", + "clap 4.5.4", "crucible-agent-client", "derive_more", "display-error-chain", @@ -5732,7 +5732,7 @@ dependencies = [ "bytes", "chrono", "cipher", - "clap 4.5.1", + "clap 4.5.4", "clap_builder", "console", "const-oid", @@ -6112,7 +6112,7 @@ dependencies = [ "anyhow", "camino", "chrono", - "clap 4.5.1", + "clap 4.5.4", "dropshot", "expectorate", "futures", @@ -6156,7 +6156,7 @@ dependencies = [ "bytes", "camino", "chrono", - "clap 4.5.1", + "clap 4.5.4", "crossterm", "dropshot", "expectorate", @@ -6229,7 +6229,7 @@ version = "0.1.0" dependencies = [ "anyhow", "chrono", - "clap 4.5.1", + "clap 4.5.4", "dropshot", "nexus-client", "omicron-common", @@ -6251,7 +6251,7 @@ dependencies = [ "anyhow", "camino", "chrono", - "clap 4.5.1", + "clap 4.5.4", "omicron-workspace-hack", "sigpipe", "uuid 1.8.0", @@ -7099,7 +7099,7 @@ dependencies = [ "anyhow", "atty", "base64 0.21.7", - "clap 4.5.1", + "clap 4.5.4", "dropshot", "futures", "hyper 0.14.28", @@ -7388,7 +7388,7 @@ dependencies = [ "assert_matches", "camino", "camino-tempfile", - "clap 4.5.1", + "clap 4.5.4", "dns-service-client", "dropshot", "expectorate", @@ -8964,7 +8964,7 @@ version = "0.1.0" dependencies = [ "anyhow", "async-trait", - "clap 4.5.1", + "clap 4.5.4", "dropshot", "futures", "gateway-messages", @@ -10146,7 +10146,7 @@ dependencies = [ "assert_cmd", "camino", "chrono", - "clap 4.5.1", + "clap 4.5.4", "console", "datatest-stable", "fs-err", @@ -10422,7 +10422,7 @@ dependencies = [ "camino", "camino-tempfile", "chrono", - "clap 4.5.1", + "clap 4.5.4", "debug-ignore", "display-error-chain", "dropshot", @@ -10453,7 +10453,7 @@ dependencies = [ "camino", "camino-tempfile", "cancel-safe-futures", - "clap 4.5.1", + "clap 4.5.4", "debug-ignore", "derive-where", "either", @@ -10897,7 +10897,7 @@ dependencies = [ "buf-list", "camino", "ciborium", - "clap 4.5.1", + "clap 4.5.4", "crossterm", "futures", "humantime", @@ -10958,7 +10958,7 @@ dependencies = [ "bytes", "camino", "ciborium", - "clap 4.5.1", + "clap 4.5.4", "crossterm", "omicron-workspace-hack", "reedline", @@ -10983,7 +10983,7 @@ dependencies = [ "bytes", "camino", "camino-tempfile", - "clap 4.5.1", + "clap 4.5.4", "debug-ignore", "display-error-chain", "dpd-client", @@ -11309,7 +11309,7 @@ dependencies = [ "camino", "cargo_metadata", "cargo_toml", - "clap 4.5.1", + "clap 4.5.4", "fs-err", "macaddr", "serde", @@ -11457,7 +11457,7 @@ name = "zone-network-setup" version = "0.1.0" dependencies = [ "anyhow", - "clap 4.5.1", + "clap 4.5.4", "dropshot", "illumos-utils", "omicron-common", diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index 67a9a8af00..0bbeaaf75a 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -29,8 +29,8 @@ byteorder = { version = "1.5.0" } bytes = { version = "1.6.0", features = ["serde"] } chrono = { version = "0.4.37", features = ["serde"] } cipher = { version = "0.4.4", default-features = false, features = ["block-padding", "zeroize"] } -clap = { version = "4.5.1", features = ["cargo", "derive", "env", "wrap_help"] } -clap_builder = { version = "4.5.1", default-features = false, features = ["cargo", "color", "env", "std", "suggestions", "usage", "wrap_help"] } +clap = { version = "4.5.4", features = ["cargo", "derive", "env", "wrap_help"] } +clap_builder = { version = "4.5.2", default-features = false, features = ["cargo", "color", "env", "std", "suggestions", "usage", "wrap_help"] } console = { version = "0.15.8" } const-oid = { version = "0.9.6", default-features = false, features = ["db", "std"] } crossbeam-epoch = { version = "0.9.18" } @@ -137,8 +137,8 @@ byteorder = { version = "1.5.0" } bytes = { version = "1.6.0", features = ["serde"] } chrono = { version = "0.4.37", features = ["serde"] } cipher = { version = "0.4.4", default-features = false, features = ["block-padding", "zeroize"] } -clap = { version = "4.5.1", features = ["cargo", "derive", "env", "wrap_help"] } -clap_builder = { version = "4.5.1", default-features = false, features = ["cargo", "color", "env", "std", "suggestions", "usage", "wrap_help"] } +clap = { version = "4.5.4", features = ["cargo", "derive", "env", "wrap_help"] } +clap_builder = { version = "4.5.2", default-features = false, features = ["cargo", "color", "env", "std", "suggestions", "usage", "wrap_help"] } console = { version = "0.15.8" } const-oid = { version = "0.9.6", default-features = false, features = ["db", "std"] } crossbeam-epoch = { version = "0.9.18" } From 5ee8b622d94256e8a94cbc2731f7e6b52a69c237 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Tue, 9 Apr 2024 00:45:12 -0700 Subject: [PATCH 093/334] chore(deps): update rust crate openssl to v0.10.64 (#5464) --- Cargo.lock | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 95ce972641..9bdf0b57e1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5907,9 +5907,9 @@ dependencies = [ [[package]] name = "openssl" -version = "0.10.60" +version = "0.10.64" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "79a4c6c3a2b158f7f8f2a2fc5a969fa3a068df6fc9dbb4a43845436e3af7c800" +checksum = "95a0481286a310808298130d22dd1fef0fa571e05a8f44ec801801e84b216b1f" dependencies = [ "bitflags 2.4.2", "cfg-if", @@ -5939,9 +5939,9 @@ checksum = "ff011a302c396a5197692431fc1948019154afc178baf7d8e37367442a4601cf" [[package]] name = "openssl-sys" -version = "0.9.96" +version = "0.9.102" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3812c071ba60da8b5677cc12bcb1d42989a65553772897a7e0355545a819838f" +checksum = "c597637d56fbc83893a35eb0dd04b2b8e7a50c91e64e9493e398b5df4fb45fa2" dependencies = [ "cc", "libc", From d22227667e564de0eed4aae3e6178d259bf7490f Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Tue, 9 Apr 2024 00:45:32 -0700 Subject: [PATCH 094/334] chore(deps): update rust crate mockall to v0.12.1 (#5463) --- Cargo.lock | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 9bdf0b57e1..a43e1818b0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4345,9 +4345,9 @@ dependencies = [ [[package]] name = "mockall" -version = "0.12.0" +version = "0.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a978c8292954bcb9347a4e28772c0a0621166a1598fc1be28ac0076a4bb810e" +checksum = "43766c2b5203b10de348ffe19f7e54564b64f3d6018ff7648d1e2d6d3a0f0a48" dependencies = [ "cfg-if", "downcast", @@ -4360,9 +4360,9 @@ dependencies = [ [[package]] name = "mockall_derive" -version = "0.12.0" +version = "0.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ad2765371d0978ba4ace4ebef047baa62fc068b431e468444b5610dd441c639b" +checksum = "af7cbce79ec385a1d4f54baa90a76401eb15d9cab93685f62e7e9f942aa00ae2" dependencies = [ "cfg-if", "proc-macro2", From 013e966d0465421fcfb341dcf5688777867fef3e Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Tue, 9 Apr 2024 07:56:56 +0000 Subject: [PATCH 095/334] chore(deps): update rust crate datatest-stable to 0.2.5 (#5462) --- Cargo.lock | 21 +++++++++++++++------ Cargo.toml | 2 +- workspace-hack/Cargo.toml | 2 -- 3 files changed, 16 insertions(+), 9 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a43e1818b0..d8ad9463b8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1604,9 +1604,9 @@ checksum = "c2e66c9d817f1720209181c316d28635c050fa304f9c79e47a520882661b7308" [[package]] name = "datatest-stable" -version = "0.2.3" +version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22a384d02609f0774f4dbf0c38fc57eb2769b24c30b9185911ff657ec14837da" +checksum = "d89ee5ef93235f39066e9fbd304b19fd876ec89570e4d7894b3e28ec749400fa" dependencies = [ "camino", "libtest-mimic", @@ -2363,6 +2363,15 @@ version = "3.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "281e452d3bad4005426416cdba5ccfd4f5c1280e10099e21db27f7c1c28347fc" +[[package]] +name = "escape8259" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba4f4911e3666fcd7826997b4745c8224295a6f3072f1418c3067b97a67557ee" +dependencies = [ + "rustversion", +] + [[package]] name = "expectorate" version = "1.1.0" @@ -3299,7 +3308,7 @@ dependencies = [ "httpdate", "itoa", "pin-project-lite", - "socket2 0.4.9", + "socket2 0.5.5", "tokio", "tower-service", "tracing", @@ -4088,11 +4097,12 @@ dependencies = [ [[package]] name = "libtest-mimic" -version = "0.6.1" +version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d8de370f98a6cb8a4606618e53e802f93b094ddec0f96988eaec2c27e6e9ce7" +checksum = "7f0f4c6f44ecfd52e8b443f2ad18f2b996540135771561283c2352ce56a1c70b" dependencies = [ "clap 4.5.4", + "escape8259", "termcolor", "threadpool", ] @@ -5803,7 +5813,6 @@ dependencies = [ "sha2", "similar", "slog", - "socket2 0.5.5", "spin 0.9.8", "string_cache", "subtle", diff --git a/Cargo.toml b/Cargo.toml index cdb5d8f64c..e21061adf6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -202,7 +202,7 @@ crucible-pantry-client = { git = "https://github.com/oxidecomputer/crucible", re crucible-smf = { git = "https://github.com/oxidecomputer/crucible", rev = "4661c23b248da18862012cf55af21b17b79a468e" } csv = "1.3.0" curve25519-dalek = "4" -datatest-stable = "0.2.3" +datatest-stable = "0.2.5" display-error-chain = "0.2.0" omicron-ddm-admin-client = { path = "clients/ddm-admin-client" } db-macros = { path = "nexus/db-macros" } diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index 0bbeaaf75a..1b0e85b85a 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -96,7 +96,6 @@ serde_json = { version = "1.0.115", features = ["raw_value", "unbounded_depth"] sha2 = { version = "0.10.8", features = ["oid"] } similar = { version = "2.4.0", features = ["inline", "unicode"] } slog = { version = "2.7.0", features = ["dynamic-keys", "max_level_trace", "release_max_level_debug", "release_max_level_trace"] } -socket2 = { version = "0.5.5", default-features = false, features = ["all"] } spin = { version = "0.9.8" } string_cache = { version = "0.8.7" } subtle = { version = "2.5.0" } @@ -204,7 +203,6 @@ serde_json = { version = "1.0.115", features = ["raw_value", "unbounded_depth"] sha2 = { version = "0.10.8", features = ["oid"] } similar = { version = "2.4.0", features = ["inline", "unicode"] } slog = { version = "2.7.0", features = ["dynamic-keys", "max_level_trace", "release_max_level_debug", "release_max_level_trace"] } -socket2 = { version = "0.5.5", default-features = false, features = ["all"] } spin = { version = "0.9.8" } string_cache = { version = "0.8.7" } subtle = { version = "2.5.0" } From e8c530f8113f0ec69a426a6987c129693baf58aa Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Tue, 9 Apr 2024 08:58:28 +0000 Subject: [PATCH 096/334] chore(deps): update rust crate oso to v0.27.3 (#5466) --- Cargo.lock | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d8ad9463b8..395a293e4c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6010,9 +6010,9 @@ checksum = "04744f49eae99ab78e0d5c0b603ab218f515ea8cfe5a456d7629ad883a3b6e7d" [[package]] name = "oso" -version = "0.27.0" +version = "0.27.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fceecc04a9e9dcb63a42d937a4249557da8d2695cf83eb5ee78015473ab12ae2" +checksum = "eeabb069616e6a494420f5ab27dbad46efa8dd4b45d30a0302857a7bcdea4293" dependencies = [ "impl-trait-for-tuples", "lazy_static", @@ -6025,9 +6025,9 @@ dependencies = [ [[package]] name = "oso-derive" -version = "0.27.0" +version = "0.27.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1766857f83748ce5596ab98e1a57d64ccfe3259e71b7b53289c8c32c2cfef9a8" +checksum = "a2f5236d7c60cce1bcd76146bcbc4b2a5fb1234894fb84b1ec751863e8399e9c" dependencies = [ "quote", "syn 1.0.109", @@ -6723,9 +6723,9 @@ dependencies = [ [[package]] name = "polar-core" -version = "0.27.0" +version = "0.27.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d1b77e852bec994296c8a1dddc231ab3f112bfa0a0399fc8a7fd8bddfb46b4e" +checksum = "b3aa6f61d235de56ccffbca8627377ebe6ff0052a419f67b098f319a5f32e06d" dependencies = [ "indoc 1.0.9", "js-sys", From 6cb6c3ce9aa8adc56abdd532fa9ec2cccd1c56c1 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Tue, 9 Apr 2024 09:19:23 +0000 Subject: [PATCH 097/334] chore(deps): update rust crate pem to v3.0.4 (#5467) --- Cargo.lock | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 395a293e4c..b7a17d2689 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6515,11 +6515,11 @@ checksum = "36bae92c60fa2398ce4678b98b2c4b5a7c61099961ca1fa305aec04a9ad28922" [[package]] name = "pem" -version = "3.0.2" +version = "3.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3163d2912b7c3b52d651a055f2c7eec9ba5cd22d26ef75b8dd3a59980b185923" +checksum = "8e459365e590736a54c3fa561947c84837534b8e9af6fc5bf781307e82658fae" dependencies = [ - "base64 0.21.7", + "base64 0.22.0", "serde", ] From 120b2ee38dae7b5b8cf3c52d341ad4549d648cd9 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Tue, 9 Apr 2024 09:55:44 -0700 Subject: [PATCH 098/334] chore(deps): update rust crate strum to v0.26.2 (#5473) --- Cargo.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index b7a17d2689..a44815cdd2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -9241,9 +9241,9 @@ dependencies = [ [[package]] name = "strum" -version = "0.26.1" +version = "0.26.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "723b93e8addf9aa965ebe2d11da6d7540fa2283fcea14b3371ff055f7ba13f5f" +checksum = "5d8cec3501a5194c432b2b7976db6b7d10ec95c253208b45f83f7136aa985e29" dependencies = [ "strum_macros 0.26.1", ] From 343715aa7dd313ff00ba93809f4b473e55e8d19a Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Tue, 9 Apr 2024 11:37:10 -0700 Subject: [PATCH 099/334] chore(deps): update rust crate smf to v0.2.3 (#5471) --- Cargo.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a44815cdd2..cedf1ef47e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8890,9 +8890,9 @@ checksum = "f67ad224767faa3c7d8b6d91985b78e70a1324408abcb1cfcc2be4c06bc06043" [[package]] name = "smf" -version = "0.2.2" +version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e6015a9bbf269b84c928dc68e11680bbdfa6f065f1c6d5383ec134f55bab188b" +checksum = "4a491bfc47dffa70a3c267bc379e9de9f4b0a7195e474a94498189b177f8d18c" dependencies = [ "thiserror", ] From e800a0040c28c21aae1b723e7590534e25d3744a Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Tue, 9 Apr 2024 11:38:18 -0700 Subject: [PATCH 100/334] chore(deps): update rust crate rustfmt-wrapper to v0.2.1 (#5470) --- Cargo.lock | 32 +++++++------------------------- 1 file changed, 7 insertions(+), 25 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index cedf1ef47e..4aef3053c1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -7902,14 +7902,14 @@ dependencies = [ [[package]] name = "rustfmt-wrapper" -version = "0.2.0" +version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed729e3bee08ec2befd593c27e90ca9fdd25efdc83c94c3b82eaef16e4f7406e" +checksum = "f1adc9dfed5cc999077978cc7163b9282c5751c8d39827c4ea8c8c220ca5a440" dependencies = [ "serde", "tempfile", "thiserror", - "toml 0.5.11", + "toml 0.8.12", "toolchain_find", ] @@ -8256,15 +8256,6 @@ version = "0.1.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d4f410fedcf71af0345d7607d246e7ad15faaadd49d240ee3b24e5dc21a820ac" -[[package]] -name = "semver" -version = "0.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f301af10236f6df4160f7c3f04eec6dbc70ace82d23326abad5edee88801c6b6" -dependencies = [ - "semver-parser", -] - [[package]] name = "semver" version = "1.0.22" @@ -8274,15 +8265,6 @@ dependencies = [ "serde", ] -[[package]] -name = "semver-parser" -version = "0.10.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "00b0bef5b7f9e0df16536d3961cfb6e84331c065b4066afb39768d0e319411f7" -dependencies = [ - "pest", -] - [[package]] name = "serde" version = "1.0.197" @@ -9928,14 +9910,14 @@ dependencies = [ [[package]] name = "toolchain_find" -version = "0.2.0" +version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e85654a10e7a07a47c6f19d93818f3f343e22927f2fa280c84f7c8042743413" +checksum = "ebc8c9a7f0a2966e1acdaf0461023d0b01471eeead645370cf4c3f5cff153f2a" dependencies = [ "home", - "lazy_static", + "once_cell", "regex", - "semver 0.11.0", + "semver 1.0.22", "walkdir", ] From 0c9753098ef828c2935335aa624e559c7f1ac5f9 Mon Sep 17 00:00:00 2001 From: bnaecker Date: Tue, 9 Apr 2024 15:28:08 -0700 Subject: [PATCH 101/334] Break OxQL queries for huge key sets into chunks (#5441) - Fixes #5405 - Limits the number of consistent keys used in one measurement query, to ensure we always fit within ClickHouse's SQL query size limit (256KiB). If the number of consistent keys in a single group exceeds some fraction of that, break those into chunks to stay under the limit. - Adds tests for method chunking up the consistent key groups. --- oximeter/db/src/client/oxql.rs | 295 ++++++++++++++++++++++++++++++--- 1 file changed, 272 insertions(+), 23 deletions(-) diff --git a/oximeter/db/src/client/oxql.rs b/oximeter/db/src/client/oxql.rs index 7816d5c25f..4f4d1daeeb 100644 --- a/oximeter/db/src/client/oxql.rs +++ b/oximeter/db/src/client/oxql.rs @@ -102,6 +102,7 @@ pub const MAX_DATABASE_ROWS: u64 = 1_000_000; // // This type stores the predicates used to generate the keys, and the keys // consistent with it. +#[derive(Clone, Debug, PartialEq)] struct ConsistentKeyGroup { predicates: Option, consistent_keys: BTreeMap, @@ -476,7 +477,7 @@ impl Client { // organized by timeseries key. That's because we fetch all consistent // samples at once, so we get many concrete _timeseries_ in the returned // response, even though they're all from the same schema. - let (summary, timeseries_by_key) = self + let (summaries, timeseries_by_key) = self .select_matching_samples( query_log, &schema, @@ -484,7 +485,7 @@ impl Client { total_rows_fetched, ) .await?; - query_summaries.push(summary); + query_summaries.extend(summaries); // At this point, let's construct a set of tables and run the results // through the transformation pipeline. @@ -536,24 +537,40 @@ impl Client { schema: &TimeseriesSchema, consistent_key_groups: &[ConsistentKeyGroup], total_rows_fetched: &mut u64, - ) -> Result<(QuerySummary, BTreeMap), Error> - { + ) -> Result< + (Vec, BTreeMap), + Error, + > { // We'll create timeseries for each key on the fly. To enable computing // deltas, we need to track the last measurement we've seen as well. let mut measurements_by_key: BTreeMap<_, Vec<_>> = BTreeMap::new(); - let measurements_query = self.measurements_query( - schema, - consistent_key_groups, - total_rows_fetched, - )?; + + // If the set of consistent keys is quite large, we may run into + // ClickHouse's SQL query size limit, which is 256KiB by default. + // See https://clickhouse.com/docs/en/operations/settings/settings#max_query_size + // for that limit. + // + // To avoid this, we have to split large groups of keys into pages, and + // concatenate the results ourself. let mut n_measurements: u64 = 0; - let (summary, body) = - self.execute_with_body(&measurements_query).await?; - for line in body.lines() { - let (key, measurement) = - model::parse_measurement_from_row(line, schema.datum_type); - measurements_by_key.entry(key).or_default().push(measurement); - n_measurements += 1; + let mut summaries = Vec::new(); + for key_group_chunk in + chunk_consistent_key_groups(consistent_key_groups) + { + let measurements_query = self.measurements_query( + schema, + &key_group_chunk, + total_rows_fetched, + )?; + let (summary, body) = + self.execute_with_body(&measurements_query).await?; + summaries.push(summary); + for line in body.lines() { + let (key, measurement) = + model::parse_measurement_from_row(line, schema.datum_type); + measurements_by_key.entry(key).or_default().push(measurement); + n_measurements += 1; + } } debug!( query_log, @@ -621,7 +638,7 @@ impl Client { ); out.insert(key, timeseries); } - Ok((summary, out)) + Ok((summaries, out)) } fn measurements_query( @@ -881,6 +898,113 @@ impl Client { } } +// Split the list of consistent key groups, ensuring none exceeds ClickHouse's +// query limit. +// +// The set of consistent keys for an OxQL query can be quite large. When stuffed +// into a giant list of keys and used in a SQL query like so: +// +// ``` +// timeseries_key IN (list, of, many, keys) +// ``` +// +// this can hit ClickHouse's SQL query size limit (defaulting to 256KiB, see +// https://clickhouse.com/docs/en/operations/settings/settings#max_query_size). +// +// This function chunks the list of consistent keys, ensuring that each group is +// small enough to fit within that query limit. +// +// Note that this unfortunately needs to chunk and reallocate the groups, +// because it may entail splitting each key group. That requires a copy of the +// internal map, to split it at a particular size. +fn chunk_consistent_key_groups( + consistent_key_groups: &[ConsistentKeyGroup], +) -> Vec> { + // The max number of keys allowed in each measurement query. + // + // Keys are u64s, so their max is 18446744073709551615, which has 20 base-10 + // digits. We also separate the keys by a `,`, so let's call it 21 digits. + // + // ClickHouse's max query size is 256KiB, but we allow for 6KiB of overhead + // for the other parts of the query (select, spaces, column names, etc). + // That's very conservative. + const MAX_QUERY_SIZE_FOR_KEYS: usize = 250 * 1024; + const DIGITS_PER_KEY: usize = 21; + const MAX_KEYS_PER_MEASUREMENT_QUERY: usize = + MAX_QUERY_SIZE_FOR_KEYS / DIGITS_PER_KEY; + chunk_consistent_key_groups_impl( + consistent_key_groups, + MAX_KEYS_PER_MEASUREMENT_QUERY, + ) +} + +fn chunk_consistent_key_groups_impl( + consistent_key_groups: &[ConsistentKeyGroup], + chunk_size: usize, +) -> Vec> { + // Create the output vec-of-vec of key groups. We'll always push to the last + // one, so grab a reference to it. + let mut out = vec![vec![]]; + let mut current_chunk = out.last_mut().unwrap(); + let mut room = chunk_size; + 'group: for next_group in consistent_key_groups.iter().cloned() { + // If we have room for it in this chunk, push it onto the current chunk, + // and then continue to the next group. + let group_size = next_group.consistent_keys.len(); + if room >= group_size { + current_chunk.push(next_group); + room -= group_size; + continue; + } + + // If we don't have enough room for this entire group, then we need to + // split it up and push whatever we can. It's actually possible that the + // next group needs to be split multiple times. So we'll do that until + // it's empty, possibly adding new chunks to the output array. + // + // It's tricky to iterate over a map by the index / count, and since + // we're operating on a clone anyway, convert this to a vec. + let predicates = next_group.predicates; + let mut group_keys: Vec<_> = + next_group.consistent_keys.into_iter().collect(); + while !group_keys.is_empty() { + // On a previous pass through this loop, we may have exhausted all + // the remaining room. As we have re-entered it, we still have items + // in this current group of keys. So "close" the last chunk and push + // a new one, onto which we'll start adding the remaining items. + if room == 0 { + out.push(vec![]); + current_chunk = out.last_mut().unwrap(); + room = chunk_size; + } + + // Fetch up to the remaining set of keys. + let ix = room.min(group_keys.len()); + let consistent_keys: BTreeMap<_, _> = + group_keys.drain(..ix).collect(); + + // There are no more keys in this group, we need to continue to the + // next one. + if consistent_keys.is_empty() { + continue 'group; + } + + // We need to update the amount of room we have left, to be sure we + // don't push this whole group if the chunk boundary falls in the + // middle of it. + room -= consistent_keys.len(); + + // Push this set of keys onto the current chunk. + let this_group_chunk = ConsistentKeyGroup { + predicates: predicates.clone(), + consistent_keys, + }; + current_chunk.push(this_group_chunk); + } + } + out +} + // Helper to update the number of total rows fetched so far, and check it's // still under the limit. fn update_total_rows_and_check( @@ -909,20 +1033,22 @@ fn update_total_rows_and_check( #[cfg(test)] mod tests { + use super::ConsistentKeyGroup; + use crate::client::oxql::chunk_consistent_key_groups_impl; + use crate::{ + oxql::{point::Points, Table, Timeseries}, + Client, DbWrite, + }; + use crate::{Metric, Target}; use chrono::{DateTime, Utc}; use dropshot::test_util::LogContext; use omicron_test_utils::dev::clickhouse::ClickHouseInstance; use omicron_test_utils::dev::test_setup_log; - use oximeter::Sample; use oximeter::{types::Cumulative, FieldValue}; + use oximeter::{DatumType, Sample}; use std::collections::BTreeMap; use std::time::Duration; - use crate::{ - oxql::{point::Points, Table, Timeseries}, - Client, DbWrite, - }; - #[derive( Clone, Debug, Eq, PartialEq, PartialOrd, Ord, oximeter::Target, )] @@ -1278,4 +1404,127 @@ mod tests { } None } + + fn make_consistent_key_group(size: u64) -> ConsistentKeyGroup { + let consistent_keys = (0..size) + .map(|key| { + let target = Target { name: "foo".to_string(), fields: vec![] }; + let metric = Metric { + name: "bar".to_string(), + fields: vec![], + datum_type: DatumType::U8, + }; + (key, (target, metric)) + }) + .collect(); + ConsistentKeyGroup { predicates: None, consistent_keys } + } + + #[test] + fn test_chunk_consistent_key_groups_all_in_one_chunk() { + // Create two key groups, each with 5 keys. + // + // With a chunk size of 12, these should all be in the same chunk, so + // we're really just cloning the inputs. They do go into an outer vec + // though, because we can have multiple chunks in theory. + let keys = + vec![make_consistent_key_group(5), make_consistent_key_group(5)]; + let chunks = chunk_consistent_key_groups_impl(&keys, 12); + assert_eq!( + chunks.len(), + 1, + "All key groups should fit into one chunk when their \ + total size is less than the chunk size" + ); + assert_eq!( + keys, chunks[0], + "All key groups should fit into one chunk when their \ + total size is less than the chunk size" + ); + } + + #[test] + fn test_chunk_consistent_key_groups_split_middle_of_key_group() { + // Create one key group, with 10 keys. + // + // With a chunk size of 5, this should be split in half across two + // chunks. + let keys = vec![make_consistent_key_group(10)]; + let chunks = chunk_consistent_key_groups_impl(&keys, 5); + assert_eq!( + chunks.len(), + 2, + "Consistent key group should be split into two chunks", + ); + + let first = keys[0] + .consistent_keys + .range(..5) + .map(|(k, v)| (*k, v.clone())) + .collect(); + assert_eq!( + chunks[0][0].consistent_keys, first, + "The first chunk of the consistent keys should be \ + the first half of the input keys" + ); + + let second = keys[0] + .consistent_keys + .range(5..) + .map(|(k, v)| (*k, v.clone())) + .collect(); + assert_eq!( + chunks[1][0].consistent_keys, second, + "The second chunk of the consistent keys should be \ + the second half of the input keys" + ); + } + + #[test] + fn test_chunk_consistent_key_groups_split_key_group_multiple_times() { + // Create one key group, with 10 keys. + // + // With a chunk size of 4, this should be split 3 times, with the first + // two having 4 items and the last the remaining 2. + let keys = vec![make_consistent_key_group(10)]; + let chunks = chunk_consistent_key_groups_impl(&keys, 4); + assert_eq!( + chunks.len(), + 3, + "Consistent key group should be split into three chunks", + ); + + let first = keys[0] + .consistent_keys + .range(..4) + .map(|(k, v)| (*k, v.clone())) + .collect(); + assert_eq!( + chunks[0][0].consistent_keys, first, + "The first chunk of the consistent keys should be \ + the first 4 input keys" + ); + + let second = keys[0] + .consistent_keys + .range(4..8) + .map(|(k, v)| (*k, v.clone())) + .collect(); + assert_eq!( + chunks[1][0].consistent_keys, second, + "The second chunk of the consistent keys should be \ + the next 4 input keys", + ); + + let third = keys[0] + .consistent_keys + .range(8..) + .map(|(k, v)| (*k, v.clone())) + .collect(); + assert_eq!( + chunks[2][0].consistent_keys, third, + "The second chunk of the consistent keys should be \ + the remaining 2 input keys", + ); + } } From 2c9fae525e45994fe59e0f3b5bc9e54faa79acf3 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Tue, 9 Apr 2024 22:29:33 +0000 Subject: [PATCH 102/334] chore(deps): update rust crate socket2 to v0.5.6 (#5472) --- Cargo.lock | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 4aef3053c1..eafcfcbacc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2283,7 +2283,7 @@ dependencies = [ "russh-keys", "serde", "serde_json", - "socket2 0.5.5", + "socket2 0.5.6", "tokio", "toml 0.8.12", "trust-dns-resolver", @@ -2758,7 +2758,7 @@ dependencies = [ "serde", "serde-big-array 0.5.1", "slog", - "socket2 0.5.5", + "socket2 0.5.6", "string_cache", "thiserror", "tlvc 0.3.1 (git+https://github.com/oxidecomputer/tlvc.git?branch=main)", @@ -3308,7 +3308,7 @@ dependencies = [ "httpdate", "itoa", "pin-project-lite", - "socket2 0.5.5", + "socket2 0.5.6", "tokio", "tower-service", "tracing", @@ -3411,7 +3411,7 @@ dependencies = [ "http-body 1.0.0", "hyper 1.1.0", "pin-project-lite", - "socket2 0.5.5", + "socket2 0.5.6", "tokio", "tower", "tower-service", @@ -3788,7 +3788,7 @@ version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b58db92f96b720de98181bbbe63c831e87005ab460c1bf306eb2622b4707997f" dependencies = [ - "socket2 0.5.5", + "socket2 0.5.6", "widestring", "windows-sys 0.48.0", "winreg", @@ -8941,12 +8941,12 @@ dependencies = [ [[package]] name = "socket2" -version = "0.5.5" +version = "0.5.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b5fac59a5cb5dd637972e5fca70daf0523c9067fcdc4842f053dae04a18f8e9" +checksum = "05ffd9c0a93b7543e062e759284fcf5f5e3b098501104bfbdde4d404db792871" dependencies = [ "libc", - "windows-sys 0.48.0", + "windows-sys 0.52.0", ] [[package]] @@ -9718,7 +9718,7 @@ dependencies = [ "parking_lot 0.12.1", "pin-project-lite", "signal-hook-registry", - "socket2 0.5.5", + "socket2 0.5.6", "tokio-macros", "windows-sys 0.48.0", ] @@ -9764,7 +9764,7 @@ dependencies = [ "postgres-protocol", "postgres-types", "rand 0.8.5", - "socket2 0.5.5", + "socket2 0.5.6", "tokio", "tokio-util", "whoami", From dafbb772b4427d7f41d2b07344c67143b8c2679f Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Tue, 9 Apr 2024 16:10:31 -0700 Subject: [PATCH 103/334] use virtual-hardware xtask in buildomat (#5425) Depends on https://github.com/oxidecomputer/omicron/pull/5423 --- .github/buildomat/jobs/deploy.sh | 13 +++++++++---- .github/buildomat/jobs/package.sh | 7 ++++--- 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/.github/buildomat/jobs/deploy.sh b/.github/buildomat/jobs/deploy.sh index 9f0629d4c1..8d3e94cd5e 100755 --- a/.github/buildomat/jobs/deploy.sh +++ b/.github/buildomat/jobs/deploy.sh @@ -201,11 +201,16 @@ routeadm -e ipv4-forwarding -u PXA_START="$EXTRA_IP_START" PXA_END="$EXTRA_IP_END" -# These variables are used by softnpu_init, so export them. -export GATEWAY_IP GATEWAY_MAC PXA_START PXA_END - pfexec zpool create -f scratch c1t1d0 c2t1d0 -VDEV_DIR=/scratch ptime -m pfexec ./tools/create_virtual_hardware.sh + +ptime -m \ + pfexec ./target/release/xtask virtual-hardware \ + --vdev-dir /scratch \ + create \ + --gateway-ip "$GATEWAY_IP" \ + --gateway-mac "$GATEWAY_MAC" \ + --pxa-start "$PXA_START" \ + --pxa-end "$PXA_END" # # Generate a self-signed certificate to use as the initial TLS certificate for diff --git a/.github/buildomat/jobs/package.sh b/.github/buildomat/jobs/package.sh index d290976d9f..566c345f76 100755 --- a/.github/buildomat/jobs/package.sh +++ b/.github/buildomat/jobs/package.sh @@ -50,6 +50,9 @@ ptime -m cargo run --locked --release --bin omicron-package -- \ ptime -m cargo run --locked --release --bin omicron-package -- \ -t test package +# Build the xtask binary used by the deploy job +ptime -m cargo build --locked --release -p xtask + # Assemble some utilities into a tarball that can be used by deployment # phases of buildomat. @@ -60,9 +63,7 @@ files=( package-manifest.toml smf/sled-agent/non-gimlet/config.toml target/release/omicron-package - tools/create_virtual_hardware.sh - tools/virtual_hardware.sh - tools/scrimlet/* + target/release/xtask ) pfexec mkdir -p /work && pfexec chown $USER /work From 371d7079bc012423c96abeb69a94ddf87b12eb3f Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Wed, 10 Apr 2024 04:40:06 +0000 Subject: [PATCH 104/334] chore(deps): update taiki-e/install-action digest to f6e0e17 (#5483) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR contains the following updates: | Package | Type | Update | Change | |---|---|---|---| | [taiki-e/install-action](https://togithub.com/taiki-e/install-action) | action | digest | [`e4ef34d` -> `f6e0e17`](https://togithub.com/taiki-e/install-action/compare/e4ef34d...f6e0e17) | --- ### Configuration 📅 **Schedule**: Branch creation - "after 8pm,before 6am" in timezone America/Los_Angeles, Automerge - "after 8pm,before 6am" in timezone America/Los_Angeles. 🚦 **Automerge**: Enabled. ♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. 🔕 **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box --- This PR has been generated by [Renovate Bot](https://togithub.com/renovatebot/renovate). Co-authored-by: oxide-renovate[bot] <146848827+oxide-renovate[bot]@users.noreply.github.com> --- .github/workflows/hakari.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/hakari.yml b/.github/workflows/hakari.yml index a54321fad6..02eeb44d45 100644 --- a/.github/workflows/hakari.yml +++ b/.github/workflows/hakari.yml @@ -24,7 +24,7 @@ jobs: with: toolchain: stable - name: Install cargo-hakari - uses: taiki-e/install-action@e4ef34df890c5af6027f55257634401a93b14dc7 # v2 + uses: taiki-e/install-action@f6e0e17ee402584b4db04cdcf15775bffd443d9b # v2 with: tool: cargo-hakari - name: Check workspace-hack Cargo.toml is up-to-date From a1086bae9c2c8c403407c1273f39f537c9963bd6 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Wed, 10 Apr 2024 00:07:40 -0700 Subject: [PATCH 105/334] chore(deps): update rust crate quote to v1.0.36 (#5486) --- Cargo.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index eafcfcbacc..cf8c93c71e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -7214,9 +7214,9 @@ dependencies = [ [[package]] name = "quote" -version = "1.0.35" +version = "1.0.36" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "291ec9ab5efd934aaf503a6466c5d5251535d108ee747472c3977cc5acc868ef" +checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7" dependencies = [ "proc-macro2", ] From bd57178f08ce82dd61db96b7383d75aed695bb49 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Wed, 10 Apr 2024 00:07:48 -0700 Subject: [PATCH 106/334] chore(deps): update rust crate anyhow to v1.0.82 (#5485) --- Cargo.lock | 4 ++-- workspace-hack/Cargo.toml | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index cf8c93c71e..91e82fd4b1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -160,9 +160,9 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.81" +version = "1.0.82" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0952808a6c2afd1aa8947271f3a60f1a6763c7b912d210184c5149b5cf147247" +checksum = "f538837af36e6f6a9be0faa67f9a314f8119e4e4b5867c6ab40ed60360142519" dependencies = [ "backtrace", ] diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index 1b0e85b85a..7214074775 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -16,7 +16,7 @@ publish = false [dependencies] ahash = { version = "0.8.8" } aho-corasick = { version = "1.1.2" } -anyhow = { version = "1.0.81", features = ["backtrace"] } +anyhow = { version = "1.0.82", features = ["backtrace"] } base16ct = { version = "0.2.0", default-features = false, features = ["alloc"] } base64 = { version = "0.22.0" } bit-set = { version = "0.5.3" } @@ -123,7 +123,7 @@ zip = { version = "0.6.6", default-features = false, features = ["bzip2", "defla [build-dependencies] ahash = { version = "0.8.8" } aho-corasick = { version = "1.1.2" } -anyhow = { version = "1.0.81", features = ["backtrace"] } +anyhow = { version = "1.0.82", features = ["backtrace"] } base16ct = { version = "0.2.0", default-features = false, features = ["alloc"] } base64 = { version = "0.22.0" } bit-set = { version = "0.5.3" } From a760df2dd90b12a347e2e96ae14ca7e21621fe17 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Wed, 10 Apr 2024 00:07:54 -0700 Subject: [PATCH 107/334] chore(deps): update rust crate syn to v2.0.58 (#5487) --- Cargo.lock | 136 +++++++++++++++++++------------------- workspace-hack/Cargo.toml | 4 +- 2 files changed, 70 insertions(+), 70 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 91e82fd4b1..04fd46f623 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -174,7 +174,7 @@ dependencies = [ "omicron-workspace-hack", "proc-macro2", "quote", - "syn 2.0.52", + "syn 2.0.58", ] [[package]] @@ -281,7 +281,7 @@ checksum = "30c5ef0ede93efbf733c1a727f3b6b5a1060bbedd5600183e66f6e4be4af0ec5" dependencies = [ "proc-macro2", "quote", - "syn 2.0.52", + "syn 2.0.58", ] [[package]] @@ -303,7 +303,7 @@ checksum = "16e62a023e7c117e27523144c5d2459f4397fcc3cab0085af8e2224f643a0193" dependencies = [ "proc-macro2", "quote", - "syn 2.0.52", + "syn 2.0.58", ] [[package]] @@ -314,7 +314,7 @@ checksum = "a507401cad91ec6a857ed5513a2073c82a9b9048762b885bb98655b306964681" dependencies = [ "proc-macro2", "quote", - "syn 2.0.52", + "syn 2.0.58", ] [[package]] @@ -367,7 +367,7 @@ dependencies = [ "quote", "serde", "serde_tokenstream 0.2.0", - "syn 2.0.52", + "syn 2.0.58", ] [[package]] @@ -526,7 +526,7 @@ dependencies = [ "regex", "rustc-hash", "shlex", - "syn 2.0.52", + "syn 2.0.58", "which", ] @@ -1072,7 +1072,7 @@ dependencies = [ "heck 0.5.0", "proc-macro2", "quote", - "syn 2.0.52", + "syn 2.0.58", ] [[package]] @@ -1558,7 +1558,7 @@ checksum = "83fdaf97f4804dcebfa5862639bc9ce4121e82140bec2a987ac5140294865b5b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.52", + "syn 2.0.58", ] [[package]] @@ -1582,7 +1582,7 @@ dependencies = [ "proc-macro2", "quote", "strsim 0.10.0", - "syn 2.0.52", + "syn 2.0.58", ] [[package]] @@ -1593,7 +1593,7 @@ checksum = "a668eda54683121533a393014d8692171709ff57a7d61f187b6e782719f8933f" dependencies = [ "darling_core", "quote", - "syn 2.0.52", + "syn 2.0.58", ] [[package]] @@ -1627,7 +1627,7 @@ dependencies = [ "quote", "serde", "serde_tokenstream 0.2.0", - "syn 2.0.52", + "syn 2.0.58", ] [[package]] @@ -1670,7 +1670,7 @@ dependencies = [ "proc-macro-error", "proc-macro2", "quote", - "syn 2.0.52", + "syn 2.0.58", ] [[package]] @@ -1703,7 +1703,7 @@ checksum = "5fe87ce4529967e0ba1dcf8450bab64d97dfd5010a6256187ffe2e43e6f0e049" dependencies = [ "proc-macro2", "quote", - "syn 2.0.52", + "syn 2.0.58", ] [[package]] @@ -1724,7 +1724,7 @@ checksum = "62d671cc41a825ebabc75757b62d3d168c577f9149b2d49ece1dad1f72119d25" dependencies = [ "proc-macro2", "quote", - "syn 2.0.52", + "syn 2.0.58", ] [[package]] @@ -1745,7 +1745,7 @@ dependencies = [ "darling", "proc-macro2", "quote", - "syn 2.0.52", + "syn 2.0.58", ] [[package]] @@ -1755,7 +1755,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "206868b8242f27cecce124c19fd88157fbd0dd334df2587f36417bafbc85097b" dependencies = [ "derive_builder_core", - "syn 2.0.52", + "syn 2.0.58", ] [[package]] @@ -1779,7 +1779,7 @@ dependencies = [ "darling", "proc-macro2", "quote", - "syn 2.0.52", + "syn 2.0.58", ] [[package]] @@ -1843,7 +1843,7 @@ dependencies = [ "diesel_table_macro_syntax", "proc-macro2", "quote", - "syn 2.0.52", + "syn 2.0.58", ] [[package]] @@ -1852,7 +1852,7 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fc5557efc453706fed5e4fa85006fe9817c224c3f480a34c7e5959fd700921c5" dependencies = [ - "syn 2.0.52", + "syn 2.0.58", ] [[package]] @@ -2120,7 +2120,7 @@ dependencies = [ "quote", "serde", "serde_tokenstream 0.2.0", - "syn 2.0.52", + "syn 2.0.58", ] [[package]] @@ -2522,7 +2522,7 @@ checksum = "1a5c6c585bc94aaf2c7b51dd4c2ba22680844aba4c687be581871a6f518c5742" dependencies = [ "proc-macro2", "quote", - "syn 2.0.52", + "syn 2.0.58", ] [[package]] @@ -2633,7 +2633,7 @@ checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac" dependencies = [ "proc-macro2", "quote", - "syn 2.0.52", + "syn 2.0.58", ] [[package]] @@ -3899,7 +3899,7 @@ version = "0.1.0" source = "git+https://github.com/oxidecomputer/opte?rev=7ee353a470ea59529ee1b34729681da887aa88ce#7ee353a470ea59529ee1b34729681da887aa88ce" dependencies = [ "quote", - "syn 2.0.52", + "syn 2.0.58", ] [[package]] @@ -4377,7 +4377,7 @@ dependencies = [ "cfg-if", "proc-macro2", "quote", - "syn 2.0.52", + "syn 2.0.58", ] [[package]] @@ -4690,7 +4690,7 @@ dependencies = [ "omicron-workspace-hack", "proc-macro2", "quote", - "syn 2.0.52", + "syn 2.0.58", ] [[package]] @@ -4878,7 +4878,7 @@ version = "0.1.0" dependencies = [ "omicron-workspace-hack", "quote", - "syn 2.0.52", + "syn 2.0.58", ] [[package]] @@ -5044,7 +5044,7 @@ checksum = "9e6a0fd4f737c707bd9086cc16c925f294943eb62eb71499e9fd4cf71f8b9f4e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.52", + "syn 2.0.58", ] [[package]] @@ -5817,7 +5817,7 @@ dependencies = [ "string_cache", "subtle", "syn 1.0.109", - "syn 2.0.52", + "syn 2.0.58", "time", "time-macros", "tokio", @@ -5937,7 +5937,7 @@ checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.52", + "syn 2.0.58", ] [[package]] @@ -6229,7 +6229,7 @@ dependencies = [ "omicron-workspace-hack", "proc-macro2", "quote", - "syn 2.0.52", + "syn 2.0.58", ] [[package]] @@ -6395,7 +6395,7 @@ dependencies = [ "regex", "regex-syntax 0.8.2", "structmeta 0.3.0", - "syn 2.0.52", + "syn 2.0.58", ] [[package]] @@ -6569,7 +6569,7 @@ dependencies = [ "pest_meta", "proc-macro2", "quote", - "syn 2.0.52", + "syn 2.0.58", ] [[package]] @@ -6639,7 +6639,7 @@ checksum = "4359fd9c9171ec6e8c62926d6faaf553a8dc3f64e1507e76da7911b4f6a04405" dependencies = [ "proc-macro2", "quote", - "syn 2.0.52", + "syn 2.0.58", ] [[package]] @@ -6909,7 +6909,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8d3928fb5db768cb86f891ff014f0144589297e3c6a1aba6ed7cecfdace270c7" dependencies = [ "proc-macro2", - "syn 2.0.52", + "syn 2.0.58", ] [[package]] @@ -7005,7 +7005,7 @@ dependencies = [ "schemars", "serde", "serde_json", - "syn 2.0.52", + "syn 2.0.58", "thiserror", "typify", "unicode-ident", @@ -7025,7 +7025,7 @@ dependencies = [ "serde_json", "serde_tokenstream 0.2.0", "serde_yaml", - "syn 2.0.52", + "syn 2.0.58", ] [[package]] @@ -7505,7 +7505,7 @@ checksum = "7f7473c2cfcf90008193dd0e3e16599455cb601a9fce322b5bb55de799664925" dependencies = [ "proc-macro2", "quote", - "syn 2.0.52", + "syn 2.0.58", ] [[package]] @@ -7761,7 +7761,7 @@ dependencies = [ "regex", "relative-path", "rustc_version 0.4.0", - "syn 2.0.52", + "syn 2.0.58", "unicode-ident", ] @@ -8191,7 +8191,7 @@ checksum = "7f81c2fde025af7e69b1d1420531c8a8811ca898919db177141a85313b1cb932" dependencies = [ "proc-macro2", "quote", - "syn 2.0.52", + "syn 2.0.58", ] [[package]] @@ -8320,7 +8320,7 @@ checksum = "7eb0b34b42edc17f6b7cac84a52a1c5f0e1bb2227e997ca9011ea3dd34e8610b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.52", + "syn 2.0.58", ] [[package]] @@ -8381,7 +8381,7 @@ checksum = "8725e1dfadb3a50f7e5ce0b1a540466f6ed3fe7a0fca2ac2b8b831d31316bd00" dependencies = [ "proc-macro2", "quote", - "syn 2.0.52", + "syn 2.0.58", ] [[package]] @@ -8413,7 +8413,7 @@ dependencies = [ "proc-macro2", "quote", "serde", - "syn 2.0.52", + "syn 2.0.58", ] [[package]] @@ -8455,7 +8455,7 @@ dependencies = [ "darling", "proc-macro2", "quote", - "syn 2.0.52", + "syn 2.0.58", ] [[package]] @@ -8799,7 +8799,7 @@ source = "git+https://github.com/oxidecomputer/slog-error-chain?branch=main#15f6 dependencies = [ "proc-macro2", "quote", - "syn 2.0.52", + "syn 2.0.58", ] [[package]] @@ -8926,7 +8926,7 @@ dependencies = [ "heck 0.4.1", "proc-macro2", "quote", - "syn 2.0.52", + "syn 2.0.58", ] [[package]] @@ -9053,7 +9053,7 @@ checksum = "01b2e185515564f15375f593fb966b5718bc624ba77fe49fa4616ad619690554" dependencies = [ "proc-macro2", "quote", - "syn 2.0.52", + "syn 2.0.58", ] [[package]] @@ -9160,7 +9160,7 @@ dependencies = [ "proc-macro2", "quote", "structmeta-derive 0.2.0", - "syn 2.0.52", + "syn 2.0.58", ] [[package]] @@ -9172,7 +9172,7 @@ dependencies = [ "proc-macro2", "quote", "structmeta-derive 0.3.0", - "syn 2.0.52", + "syn 2.0.58", ] [[package]] @@ -9183,7 +9183,7 @@ checksum = "a60bcaff7397072dca0017d1db428e30d5002e00b6847703e2e42005c95fbe00" dependencies = [ "proc-macro2", "quote", - "syn 2.0.52", + "syn 2.0.58", ] [[package]] @@ -9194,7 +9194,7 @@ checksum = "152a0b65a590ff6c3da95cabe2353ee04e6167c896b28e3b14478c2636c922fc" dependencies = [ "proc-macro2", "quote", - "syn 2.0.52", + "syn 2.0.58", ] [[package]] @@ -9253,7 +9253,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.52", + "syn 2.0.58", ] [[package]] @@ -9266,7 +9266,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.52", + "syn 2.0.58", ] [[package]] @@ -9313,9 +9313,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.52" +version = "2.0.58" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b699d15b36d1f02c3e7c69f8ffef53de37aefae075d8488d4ba1a7788d574a07" +checksum = "44cfb93f38070beee36b3fef7d4f5a16f27751d94b187b666a5cc5e9b0d30687" dependencies = [ "proc-macro2", "quote", @@ -9501,7 +9501,7 @@ dependencies = [ "proc-macro2", "quote", "structmeta 0.2.0", - "syn 2.0.52", + "syn 2.0.58", ] [[package]] @@ -9541,7 +9541,7 @@ checksum = "a953cb265bef375dae3de6663da4d3804eee9682ea80d8e2542529b73c531c81" dependencies = [ "proc-macro2", "quote", - "syn 2.0.52", + "syn 2.0.58", ] [[package]] @@ -9731,7 +9731,7 @@ checksum = "5b8a1e28f2deaa14e508979454cb3a223b10b938b45af148bc0986de36f1923b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.52", + "syn 2.0.58", ] [[package]] @@ -10009,7 +10009,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.52", + "syn 2.0.58", ] [[package]] @@ -10284,7 +10284,7 @@ dependencies = [ "regress", "schemars", "serde_json", - "syn 2.0.52", + "syn 2.0.58", "thiserror", "unicode-ident", ] @@ -10300,7 +10300,7 @@ dependencies = [ "serde", "serde_json", "serde_tokenstream 0.2.0", - "syn 2.0.52", + "syn 2.0.58", "typify-impl", ] @@ -10534,7 +10534,7 @@ dependencies = [ "proc-macro2", "quote", "serde_tokenstream 0.2.0", - "syn 2.0.52", + "syn 2.0.58", "usdt-impl 0.5.0", ] @@ -10572,7 +10572,7 @@ dependencies = [ "quote", "serde", "serde_json", - "syn 2.0.52", + "syn 2.0.58", "thiserror", "thread-id", "version_check", @@ -10602,7 +10602,7 @@ dependencies = [ "proc-macro2", "quote", "serde_tokenstream 0.2.0", - "syn 2.0.52", + "syn 2.0.58", "usdt-impl 0.5.0", ] @@ -10783,7 +10783,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.52", + "syn 2.0.58", "wasm-bindgen-shared", ] @@ -10817,7 +10817,7 @@ checksum = "54681b18a46765f095758388f2d0cf16eb8d4169b639ab575a8f5693af210c7b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.52", + "syn 2.0.58", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -11374,7 +11374,7 @@ checksum = "125139de3f6b9d625c39e2efdd73d41bdac468ccd556556440e322be0e1bbd91" dependencies = [ "proc-macro2", "quote", - "syn 2.0.52", + "syn 2.0.58", ] [[package]] @@ -11385,7 +11385,7 @@ checksum = "9ce1b18ccd8e73a9321186f97e46f9f04b778851177567b1975109d26a08d2a6" dependencies = [ "proc-macro2", "quote", - "syn 2.0.52", + "syn 2.0.58", ] [[package]] @@ -11405,7 +11405,7 @@ checksum = "ce36e65b0d2999d2aafac989fb249189a141aee1f53c612c1f37d72631959f69" dependencies = [ "proc-macro2", "quote", - "syn 2.0.52", + "syn 2.0.58", ] [[package]] diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index 7214074775..a1e2c6ece8 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -100,7 +100,7 @@ spin = { version = "0.9.8" } string_cache = { version = "0.8.7" } subtle = { version = "2.5.0" } syn-dff4ba8e3ae991db = { package = "syn", version = "1.0.109", features = ["extra-traits", "fold", "full", "visit"] } -syn-f595c2ba2a3f28df = { package = "syn", version = "2.0.52", features = ["extra-traits", "fold", "full", "visit", "visit-mut"] } +syn-f595c2ba2a3f28df = { package = "syn", version = "2.0.58", features = ["extra-traits", "fold", "full", "visit", "visit-mut"] } time = { version = "0.3.34", features = ["formatting", "local-offset", "macros", "parsing"] } tokio = { version = "1.37.0", features = ["full", "test-util"] } tokio-postgres = { version = "0.7.10", features = ["with-chrono-0_4", "with-serde_json-1", "with-uuid-1"] } @@ -207,7 +207,7 @@ spin = { version = "0.9.8" } string_cache = { version = "0.8.7" } subtle = { version = "2.5.0" } syn-dff4ba8e3ae991db = { package = "syn", version = "1.0.109", features = ["extra-traits", "fold", "full", "visit"] } -syn-f595c2ba2a3f28df = { package = "syn", version = "2.0.52", features = ["extra-traits", "fold", "full", "visit", "visit-mut"] } +syn-f595c2ba2a3f28df = { package = "syn", version = "2.0.58", features = ["extra-traits", "fold", "full", "visit", "visit-mut"] } time = { version = "0.3.34", features = ["formatting", "local-offset", "macros", "parsing"] } time-macros = { version = "0.2.17", default-features = false, features = ["formatting", "parsing"] } tokio = { version = "1.37.0", features = ["full", "test-util"] } From 3d08b0b68dd6c7db91012539135b460efc3d92e5 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Wed, 10 Apr 2024 00:55:41 -0700 Subject: [PATCH 108/334] chore(deps): update rust crate datatest-stable to 0.2.6 (#5484) --- Cargo.lock | 8 ++++---- Cargo.toml | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 04fd46f623..4e80f330e4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1604,9 +1604,9 @@ checksum = "c2e66c9d817f1720209181c316d28635c050fa304f9c79e47a520882661b7308" [[package]] name = "datatest-stable" -version = "0.2.5" +version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d89ee5ef93235f39066e9fbd304b19fd876ec89570e4d7894b3e28ec749400fa" +checksum = "0d08bd225143f03456cf3dc42ecd1254c623c0f6e47f2033c32a0a1236876a13" dependencies = [ "camino", "libtest-mimic", @@ -4097,9 +4097,9 @@ dependencies = [ [[package]] name = "libtest-mimic" -version = "0.7.0" +version = "0.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f0f4c6f44ecfd52e8b443f2ad18f2b996540135771561283c2352ce56a1c70b" +checksum = "fefdf21230d6143476a28adbee3d930e2b68a3d56443c777cae3fe9340eebff9" dependencies = [ "clap 4.5.4", "escape8259", diff --git a/Cargo.toml b/Cargo.toml index e21061adf6..251b507633 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -202,7 +202,7 @@ crucible-pantry-client = { git = "https://github.com/oxidecomputer/crucible", re crucible-smf = { git = "https://github.com/oxidecomputer/crucible", rev = "4661c23b248da18862012cf55af21b17b79a468e" } csv = "1.3.0" curve25519-dalek = "4" -datatest-stable = "0.2.5" +datatest-stable = "0.2.6" display-error-chain = "0.2.0" omicron-ddm-admin-client = { path = "clients/ddm-admin-client" } db-macros = { path = "nexus/db-macros" } From 216ad612be85e261aeaf27e252d19dcdc0a3c3cf Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Wed, 10 Apr 2024 09:35:38 +0000 Subject: [PATCH 109/334] chore(deps): update rust crate proc-macro2 to v1.0.79 (#5468) --- Cargo.lock | 4 ++-- workspace-hack/Cargo.toml | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 4e80f330e4..83dfcb5a28 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6957,9 +6957,9 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.78" +version = "1.0.79" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2422ad645d89c99f8f3e6b88a9fdeca7fabeac836b1002371c4367c8f984aae" +checksum = "e835ff2298f5721608eb1a980ecaee1aef2c132bf95ecc026a11b7bf3c01c02e" dependencies = [ "unicode-ident", ] diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index a1e2c6ece8..3086078ca7 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -81,7 +81,7 @@ petgraph = { version = "0.6.4", features = ["serde-1"] } postgres-types = { version = "0.2.6", default-features = false, features = ["with-chrono-0_4", "with-serde_json-1", "with-uuid-1"] } ppv-lite86 = { version = "0.2.17", default-features = false, features = ["simd", "std"] } predicates = { version = "3.1.0" } -proc-macro2 = { version = "1.0.78" } +proc-macro2 = { version = "1.0.79" } rand = { version = "0.8.5" } rand_chacha = { version = "0.3.1", default-features = false, features = ["std"] } regex = { version = "1.10.4" } @@ -188,7 +188,7 @@ petgraph = { version = "0.6.4", features = ["serde-1"] } postgres-types = { version = "0.2.6", default-features = false, features = ["with-chrono-0_4", "with-serde_json-1", "with-uuid-1"] } ppv-lite86 = { version = "0.2.17", default-features = false, features = ["simd", "std"] } predicates = { version = "3.1.0" } -proc-macro2 = { version = "1.0.78" } +proc-macro2 = { version = "1.0.79" } rand = { version = "0.8.5" } rand_chacha = { version = "0.3.1", default-features = false, features = ["std"] } regex = { version = "1.10.4" } From abb78e31282fad648e313851ec4978e5265bd8ab Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Wed, 10 Apr 2024 09:41:41 +0000 Subject: [PATCH 110/334] chore(deps): update rust crate tempfile to v3.10.1 (#5489) --- Cargo.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 83dfcb5a28..7421be1559 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -9437,9 +9437,9 @@ dependencies = [ [[package]] name = "tempfile" -version = "3.10.0" +version = "3.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a365e8cd18e44762ef95d87f284f4b5cd04107fec2ff3052bd6a3e6069669e67" +checksum = "85b77fafb263dd9d05cbeac119526425676db3784113aa9295c88498cbf8bff1" dependencies = [ "cfg-if", "fastrand", From 164eafea6319c13c300831ec3adcab2d265d337a Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Wed, 10 Apr 2024 10:08:01 +0000 Subject: [PATCH 111/334] chore(deps): update rust crate thiserror to v1.0.58 (#5490) --- Cargo.lock | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 7421be1559..84aece2182 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -9526,18 +9526,18 @@ dependencies = [ [[package]] name = "thiserror" -version = "1.0.57" +version = "1.0.58" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e45bcbe8ed29775f228095caf2cd67af7a4ccf756ebff23a306bf3e8b47b24b" +checksum = "03468839009160513471e86a034bb2c5c0e4baae3b43f79ffc55c4a5427b3297" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.57" +version = "1.0.58" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a953cb265bef375dae3de6663da4d3804eee9682ea80d8e2542529b73c531c81" +checksum = "c61f3ba182994efc43764a46c018c347bc492c79f024e705f46567b418f6d4f7" dependencies = [ "proc-macro2", "quote", From 134e34e696c94c215d6cc3dad7cab1ffbe89c6eb Mon Sep 17 00:00:00 2001 From: Alan Hanson Date: Wed, 10 Apr 2024 10:41:07 -0700 Subject: [PATCH 112/334] Update Propolis and Crucible versions. (#5497) Propolis changes: Rework storage for Accessors Crucible Changes Update oximeter dependency (#1244) Dummy downstairs cleanup (#1247) Some DTrace updates. (#1246) HEY! LISTEN! HEY! LISTEN! (#1215) Fix clippy lints (#1245) Co-authored-by: Alan Hanson --- Cargo.lock | 26 +++++++++++++------------- Cargo.toml | 12 ++++++------ package-manifest.toml | 12 ++++++------ 3 files changed, 25 insertions(+), 25 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 84aece2182..2ac54c0a55 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -482,9 +482,9 @@ dependencies = [ [[package]] name = "bhyve_api" version = "0.0.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=84e423bfd3bf84ebb04acb95cf7600731e9f361f#84e423bfd3bf84ebb04acb95cf7600731e9f361f" +source = "git+https://github.com/oxidecomputer/propolis?rev=8ff3ab62246fa1f8b8a5bfab0a7b8e1000926361#8ff3ab62246fa1f8b8a5bfab0a7b8e1000926361" dependencies = [ - "bhyve_api_sys 0.0.0 (git+https://github.com/oxidecomputer/propolis?rev=84e423bfd3bf84ebb04acb95cf7600731e9f361f)", + "bhyve_api_sys 0.0.0 (git+https://github.com/oxidecomputer/propolis?rev=8ff3ab62246fa1f8b8a5bfab0a7b8e1000926361)", "libc", "strum", ] @@ -501,7 +501,7 @@ dependencies = [ [[package]] name = "bhyve_api_sys" version = "0.0.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=84e423bfd3bf84ebb04acb95cf7600731e9f361f#84e423bfd3bf84ebb04acb95cf7600731e9f361f" +source = "git+https://github.com/oxidecomputer/propolis?rev=8ff3ab62246fa1f8b8a5bfab0a7b8e1000926361#8ff3ab62246fa1f8b8a5bfab0a7b8e1000926361" dependencies = [ "libc", "strum", @@ -1415,7 +1415,7 @@ dependencies = [ [[package]] name = "crucible-agent-client" version = "0.0.1" -source = "git+https://github.com/oxidecomputer/crucible?rev=4661c23b248da18862012cf55af21b17b79a468e#4661c23b248da18862012cf55af21b17b79a468e" +source = "git+https://github.com/oxidecomputer/crucible?rev=5677c7be81b60d9ba9c30991d10376f279a1d3b7#5677c7be81b60d9ba9c30991d10376f279a1d3b7" dependencies = [ "anyhow", "chrono", @@ -1431,7 +1431,7 @@ dependencies = [ [[package]] name = "crucible-pantry-client" version = "0.0.1" -source = "git+https://github.com/oxidecomputer/crucible?rev=4661c23b248da18862012cf55af21b17b79a468e#4661c23b248da18862012cf55af21b17b79a468e" +source = "git+https://github.com/oxidecomputer/crucible?rev=5677c7be81b60d9ba9c30991d10376f279a1d3b7#5677c7be81b60d9ba9c30991d10376f279a1d3b7" dependencies = [ "anyhow", "chrono", @@ -1448,7 +1448,7 @@ dependencies = [ [[package]] name = "crucible-smf" version = "0.0.0" -source = "git+https://github.com/oxidecomputer/crucible?rev=4661c23b248da18862012cf55af21b17b79a468e#4661c23b248da18862012cf55af21b17b79a468e" +source = "git+https://github.com/oxidecomputer/crucible?rev=5677c7be81b60d9ba9c30991d10376f279a1d3b7#5677c7be81b60d9ba9c30991d10376f279a1d3b7" dependencies = [ "crucible-workspace-hack", "libc", @@ -3499,7 +3499,7 @@ version = "0.1.0" dependencies = [ "anyhow", "async-trait", - "bhyve_api 0.0.0 (git+https://github.com/oxidecomputer/propolis?rev=84e423bfd3bf84ebb04acb95cf7600731e9f361f)", + "bhyve_api 0.0.0 (git+https://github.com/oxidecomputer/propolis?rev=8ff3ab62246fa1f8b8a5bfab0a7b8e1000926361)", "byteorder", "camino", "camino-tempfile", @@ -5427,7 +5427,7 @@ dependencies = [ "pq-sys", "pretty_assertions", "progenitor-client", - "propolis-client 0.1.0 (git+https://github.com/oxidecomputer/propolis?rev=84e423bfd3bf84ebb04acb95cf7600731e9f361f)", + "propolis-client 0.1.0 (git+https://github.com/oxidecomputer/propolis?rev=8ff3ab62246fa1f8b8a5bfab0a7b8e1000926361)", "rand 0.8.5", "rcgen", "ref-cast", @@ -5639,7 +5639,7 @@ dependencies = [ "oximeter-instruments", "oximeter-producer", "pretty_assertions", - "propolis-client 0.1.0 (git+https://github.com/oxidecomputer/propolis?rev=84e423bfd3bf84ebb04acb95cf7600731e9f361f)", + "propolis-client 0.1.0 (git+https://github.com/oxidecomputer/propolis?rev=8ff3ab62246fa1f8b8a5bfab0a7b8e1000926361)", "propolis-mock-server", "rand 0.8.5", "rcgen", @@ -7082,7 +7082,7 @@ dependencies = [ [[package]] name = "propolis-client" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=84e423bfd3bf84ebb04acb95cf7600731e9f361f#84e423bfd3bf84ebb04acb95cf7600731e9f361f" +source = "git+https://github.com/oxidecomputer/propolis?rev=8ff3ab62246fa1f8b8a5bfab0a7b8e1000926361#8ff3ab62246fa1f8b8a5bfab0a7b8e1000926361" dependencies = [ "async-trait", "base64 0.21.7", @@ -7103,7 +7103,7 @@ dependencies = [ [[package]] name = "propolis-mock-server" version = "0.0.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=84e423bfd3bf84ebb04acb95cf7600731e9f361f#84e423bfd3bf84ebb04acb95cf7600731e9f361f" +source = "git+https://github.com/oxidecomputer/propolis?rev=8ff3ab62246fa1f8b8a5bfab0a7b8e1000926361#8ff3ab62246fa1f8b8a5bfab0a7b8e1000926361" dependencies = [ "anyhow", "atty", @@ -7113,7 +7113,7 @@ dependencies = [ "futures", "hyper 0.14.28", "progenitor", - "propolis_types 0.0.0 (git+https://github.com/oxidecomputer/propolis?rev=84e423bfd3bf84ebb04acb95cf7600731e9f361f)", + "propolis_types 0.0.0 (git+https://github.com/oxidecomputer/propolis?rev=8ff3ab62246fa1f8b8a5bfab0a7b8e1000926361)", "rand 0.8.5", "reqwest", "schemars", @@ -7154,7 +7154,7 @@ dependencies = [ [[package]] name = "propolis_types" version = "0.0.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=84e423bfd3bf84ebb04acb95cf7600731e9f361f#84e423bfd3bf84ebb04acb95cf7600731e9f361f" +source = "git+https://github.com/oxidecomputer/propolis?rev=8ff3ab62246fa1f8b8a5bfab0a7b8e1000926361#8ff3ab62246fa1f8b8a5bfab0a7b8e1000926361" dependencies = [ "schemars", "serde", diff --git a/Cargo.toml b/Cargo.toml index 251b507633..dc502a1ff7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -197,9 +197,9 @@ cookie = "0.18" criterion = { version = "0.5.1", features = [ "async_tokio" ] } crossbeam = "0.8" crossterm = { version = "0.27.0", features = ["event-stream"] } -crucible-agent-client = { git = "https://github.com/oxidecomputer/crucible", rev = "4661c23b248da18862012cf55af21b17b79a468e" } -crucible-pantry-client = { git = "https://github.com/oxidecomputer/crucible", rev = "4661c23b248da18862012cf55af21b17b79a468e" } -crucible-smf = { git = "https://github.com/oxidecomputer/crucible", rev = "4661c23b248da18862012cf55af21b17b79a468e" } +crucible-agent-client = { git = "https://github.com/oxidecomputer/crucible", rev = "5677c7be81b60d9ba9c30991d10376f279a1d3b7" } +crucible-pantry-client = { git = "https://github.com/oxidecomputer/crucible", rev = "5677c7be81b60d9ba9c30991d10376f279a1d3b7" } +crucible-smf = { git = "https://github.com/oxidecomputer/crucible", rev = "5677c7be81b60d9ba9c30991d10376f279a1d3b7" } csv = "1.3.0" curve25519-dalek = "4" datatest-stable = "0.2.6" @@ -339,9 +339,9 @@ prettyplease = { version = "0.2.17", features = ["verbatim"] } proc-macro2 = "1.0" progenitor = { git = "https://github.com/oxidecomputer/progenitor", branch = "main" } progenitor-client = { git = "https://github.com/oxidecomputer/progenitor", branch = "main" } -bhyve_api = { git = "https://github.com/oxidecomputer/propolis", rev = "84e423bfd3bf84ebb04acb95cf7600731e9f361f" } -propolis-client = { git = "https://github.com/oxidecomputer/propolis", rev = "84e423bfd3bf84ebb04acb95cf7600731e9f361f" } -propolis-mock-server = { git = "https://github.com/oxidecomputer/propolis", rev = "84e423bfd3bf84ebb04acb95cf7600731e9f361f" } +bhyve_api = { git = "https://github.com/oxidecomputer/propolis", rev = "8ff3ab62246fa1f8b8a5bfab0a7b8e1000926361" } +propolis-client = { git = "https://github.com/oxidecomputer/propolis", rev = "8ff3ab62246fa1f8b8a5bfab0a7b8e1000926361" } +propolis-mock-server = { git = "https://github.com/oxidecomputer/propolis", rev = "8ff3ab62246fa1f8b8a5bfab0a7b8e1000926361" } proptest = "1.4.0" quote = "1.0" rand = "0.8.5" diff --git a/package-manifest.toml b/package-manifest.toml index b8d1727432..2819010335 100644 --- a/package-manifest.toml +++ b/package-manifest.toml @@ -492,10 +492,10 @@ only_for_targets.image = "standard" # 3. Use source.type = "manual" instead of "prebuilt" source.type = "prebuilt" source.repo = "crucible" -source.commit = "4661c23b248da18862012cf55af21b17b79a468e" +source.commit = "5677c7be81b60d9ba9c30991d10376f279a1d3b7" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/crucible/image//crucible.sha256.txt -source.sha256 = "14e607d04234a6749e981c8049437523dbc75494938541822e31ea61090800bf" +source.sha256 = "5341c5572f80b8d1763f6563412dc03d9604d8c7af4022fc5da55338ee60d35c" output.type = "zone" output.intermediate_only = true @@ -504,10 +504,10 @@ service_name = "crucible_pantry_prebuilt" only_for_targets.image = "standard" source.type = "prebuilt" source.repo = "crucible" -source.commit = "4661c23b248da18862012cf55af21b17b79a468e" +source.commit = "5677c7be81b60d9ba9c30991d10376f279a1d3b7" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/crucible/image//crucible-pantry.sha256.txt -source.sha256 = "9a2181b43d7581468d075e37b5286e478ff008de65dd73b7f49a6e72bc9a43f5" +source.sha256 = "bf281bae1331279109dac23328ff86756331d7776e69396b02c77a4d08a225c7" output.type = "zone" output.intermediate_only = true @@ -519,10 +519,10 @@ service_name = "propolis-server" only_for_targets.image = "standard" source.type = "prebuilt" source.repo = "propolis" -source.commit = "84e423bfd3bf84ebb04acb95cf7600731e9f361f" +source.commit = "8ff3ab62246fa1f8b8a5bfab0a7b8e1000926361" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/propolis/image//propolis-server.sha256.txt -source.sha256 = "db72c83b4c0a09e0759ec52e48a5589e9d732c3f390fb4c084f820d173b4f058" +source.sha256 = "35c5956b14d3b0a843351ce8ea7e8cb52e631a96a89041810fe0f91cc4072638" output.type = "zone" [package.mg-ddm-gz] From 02810f83e100ae8fa620c48d3a34a80794c738f6 Mon Sep 17 00:00:00 2001 From: Andy Fiddaman Date: Wed, 10 Apr 2024 20:00:55 +0100 Subject: [PATCH 113/334] support user is missing PATH entries (#5476) The switch zone image contains .bashrc and .profile files that set up the environment based on the zone name, but these are only there for the root user. We can copy these over for the support user as part of switch zone setup. Fixes #5474 --- smf/switch_zone_setup/switch_zone_setup | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/smf/switch_zone_setup/switch_zone_setup b/smf/switch_zone_setup/switch_zone_setup index ca38b15547..ad12677866 100755 --- a/smf/switch_zone_setup/switch_zone_setup +++ b/smf/switch_zone_setup/switch_zone_setup @@ -6,9 +6,9 @@ set -ex -o pipefail # set up the users required for wicket and support. USERS=( - (user=wicket group=wicket gecos='Wicket User' nopasswd=1) - (user=support group=support gecos='Oxide Support' - profiles=('Primary Administrator') + (user=wicket group=wicket gecos='Wicket User' nopasswd=1 shell='/bin/sh') + (user=support group=support gecos='Oxide Support' homedir='/home/support' + shell='/bin/bash' profiles=('Primary Administrator') ) ) @@ -18,8 +18,9 @@ for i in "${!USERS[@]}"; do # Add a new group for the user. getent group "${u.group}" >/dev/null 2>&1 || groupadd "${u.group}" # Add the user. - getent passwd "${u.user}" >/dev/null 2>&1 \ - || useradd -m -g "${u.group}" -c "${u.gecos}" "${u.user}" + getent passwd "${u.user}" >/dev/null 2>&1 || \ + useradd -m -s "${u.shell}" -g "${u.group}" -c "${u.gecos}" \ + "${u.user}" # Either enable passwordless login (wicket) or disable password-based logins # completely (support, which logs in via ssh key). @@ -35,6 +36,14 @@ for i in "${!USERS[@]}"; do else usermod -P '' "${u.user}" fi + + if [[ -n "${u.homedir}" ]]; then + mkdir -p "${u.homedir}" + for f in .bashrc .profile; do + cp "/root/$f" "${u.homedir}/$f" + done + chown -R "${u.user}" "${u.homedir}" + fi done exit $SMF_EXIT_OK From 20299e0b472586014054bc2ebc325e0f4d44b511 Mon Sep 17 00:00:00 2001 From: Rain Date: Wed, 10 Apr 2024 13:09:05 -0700 Subject: [PATCH 114/334] [nexus-types] use TypedUuid in blueprint-internal code (#5488) Most APIs aren't changing, nor is the OpenAPI schema for now -- just code internal to the blueprint. --- Cargo.lock | 1 + dev-tools/reconfigurator-cli/src/main.rs | 6 +- .../db-queries/src/db/datastore/deployment.rs | 2 - .../planning/src/blueprint_builder.rs | 67 ++++++++------- nexus/reconfigurator/planning/src/example.rs | 20 +++-- nexus/reconfigurator/planning/src/planner.rs | 67 +++++++-------- nexus/reconfigurator/planning/src/system.rs | 25 +++--- nexus/types/src/deployment.rs | 85 ++++++++++++++----- typed-rng/Cargo.toml | 1 + typed-rng/src/lib.rs | 8 ++ 10 files changed, 168 insertions(+), 114 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 2ac54c0a55..3bf2713d86 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -10250,6 +10250,7 @@ checksum = "4a90726108dab678edab76459751e1cc7c597c3484a6384d6423191255fa641b" name = "typed-rng" version = "0.1.0" dependencies = [ + "newtype-uuid", "omicron-workspace-hack", "rand 0.8.5", "rand_core 0.6.4", diff --git a/dev-tools/reconfigurator-cli/src/main.rs b/dev-tools/reconfigurator-cli/src/main.rs index 24174b9e4f..b065e9586b 100644 --- a/dev-tools/reconfigurator-cli/src/main.rs +++ b/dev-tools/reconfigurator-cli/src/main.rs @@ -403,7 +403,7 @@ enum Commands { #[derive(Debug, Args)] struct SledAddArgs { /// id of the new sled - sled_id: Option, + sled_id: Option>, } #[derive(Debug, Args)] @@ -451,7 +451,7 @@ enum BlueprintEditCommands { /// add a Nexus instance to a particular sled AddNexus { /// sled on which to deploy the new instance - sled_id: Uuid, + sled_id: TypedUuid, }, } @@ -1178,7 +1178,7 @@ fn cmd_load( ); let result = sim.system.sled_full( - *sled_id.as_untyped_uuid(), + sled_id, sled_details.policy, sled_details.resources.clone(), inventory_sp, diff --git a/nexus/db-queries/src/db/datastore/deployment.rs b/nexus/db-queries/src/db/datastore/deployment.rs index 38899050c6..22b602c71d 100644 --- a/nexus/db-queries/src/db/datastore/deployment.rs +++ b/nexus/db-queries/src/db/datastore/deployment.rs @@ -1385,8 +1385,6 @@ mod tests { }; let new_sled_zpools = &planning_input.sled_resources(&new_sled_id).unwrap().zpools; - // TODO-cleanup use `TypedUuid` everywhere - let new_sled_id = *new_sled_id.as_untyped_uuid(); // Create a builder for a child blueprint. let mut builder = BlueprintBuilder::new_based_on( diff --git a/nexus/reconfigurator/planning/src/blueprint_builder.rs b/nexus/reconfigurator/planning/src/blueprint_builder.rs index efc5c9ff39..677df2201f 100644 --- a/nexus/reconfigurator/planning/src/blueprint_builder.rs +++ b/nexus/reconfigurator/planning/src/blueprint_builder.rs @@ -45,6 +45,7 @@ use rand::rngs::StdRng; use rand::SeedableRng; use slog::o; use slog::Logger; +use std::borrow::Cow; use std::collections::BTreeMap; use std::collections::HashSet; use std::hash::Hash; @@ -60,7 +61,7 @@ use uuid::Uuid; #[derive(Debug, Error)] pub enum Error { #[error("sled {sled_id}: ran out of available addresses for sled")] - OutOfAddresses { sled_id: Uuid }, + OutOfAddresses { sled_id: TypedUuid }, #[error("no Nexus zones exist in parent blueprint")] NoNexusZonesInParentBlueprint, #[error("no external service IP addresses are available")] @@ -117,7 +118,7 @@ pub struct BlueprintBuilder<'a> { // These fields are used to allocate resources from sleds. input: &'a PlanningInput, - sled_ip_allocators: BTreeMap, + sled_ip_allocators: BTreeMap, IpAllocator>, // These fields will become part of the final blueprint. See the // corresponding fields in `Blueprint`. @@ -217,7 +218,8 @@ impl<'a> BlueprintBuilder<'a> { })?; Ok(( - *sled_id.as_untyped_uuid(), + // TODO-cleanup use `TypedUuid` everywhere + sled_id.into_untyped_uuid(), BlueprintZonesConfig::initial_from_collection(&zones), )) }) @@ -405,7 +407,7 @@ impl<'a> BlueprintBuilder<'a> { pub fn sled_ensure_zone_ntp( &mut self, - sled_id: Uuid, + sled_id: TypedUuid, ) -> Result { // If there's already an NTP zone on this sled, do nothing. let has_ntp = self @@ -469,7 +471,7 @@ impl<'a> BlueprintBuilder<'a> { pub fn sled_ensure_zone_crucible( &mut self, - sled_id: Uuid, + sled_id: TypedUuid, pool_name: ZpoolName, ) -> Result { // If this sled already has a Crucible zone on this pool, do nothing. @@ -520,7 +522,7 @@ impl<'a> BlueprintBuilder<'a> { /// /// This value may change before a blueprint is actually generated if /// further changes are made to the builder. - pub fn sled_num_nexus_zones(&self, sled_id: Uuid) -> usize { + pub fn sled_num_nexus_zones(&self, sled_id: TypedUuid) -> usize { self.zones .current_sled_zones(sled_id) .filter(|z| z.config.zone_type.is_nexus()) @@ -529,7 +531,7 @@ impl<'a> BlueprintBuilder<'a> { pub fn sled_ensure_zone_multiple_nexus( &mut self, - sled_id: Uuid, + sled_id: TypedUuid, desired_zone_count: usize, ) -> Result { // Whether Nexus should use TLS and what the external DNS servers it @@ -565,7 +567,7 @@ impl<'a> BlueprintBuilder<'a> { pub fn sled_ensure_zone_multiple_nexus_with_config( &mut self, - sled_id: Uuid, + sled_id: TypedUuid, desired_zone_count: usize, external_tls: bool, external_dns_servers: Vec, @@ -653,7 +655,7 @@ impl<'a> BlueprintBuilder<'a> { fn sled_add_zone( &mut self, - sled_id: Uuid, + sled_id: TypedUuid, zone: BlueprintZoneConfig, ) -> Result<(), Error> { // Check the sled id and return an appropriate error if it's invalid. @@ -674,7 +676,10 @@ impl<'a> BlueprintBuilder<'a> { /// Returns a newly-allocated underlay address suitable for use by Omicron /// zones - fn sled_alloc_ip(&mut self, sled_id: Uuid) -> Result { + fn sled_alloc_ip( + &mut self, + sled_id: TypedUuid, + ) -> Result { let sled_subnet = self.sled_resources(sled_id)?.subnet; let allocator = self.sled_ip_allocators.entry(sled_id).or_insert_with(|| { @@ -711,9 +716,10 @@ impl<'a> BlueprintBuilder<'a> { allocator.alloc().ok_or(Error::OutOfAddresses { sled_id }) } - fn sled_resources(&self, sled_id: Uuid) -> Result<&SledResources, Error> { - // TODO-cleanup use `TypedUuid` everywhere - let sled_id = TypedUuid::from_untyped_uuid(sled_id); + fn sled_resources( + &self, + sled_id: TypedUuid, + ) -> Result<&SledResources, Error> { self.input.sled_resources(&sled_id).ok_or_else(|| { Error::Planner(anyhow!( "attempted to use sled that is not in service: {}", @@ -767,15 +773,18 @@ impl BlueprintBuilderRng { /// that we've changed and a _reference_ to the parent blueprint's zones. This /// struct makes it easy for callers iterate over the right set of zones. struct BlueprintZonesBuilder<'a> { - changed_zones: BTreeMap, - parent_zones: &'a BTreeMap, + changed_zones: BTreeMap, BlueprintZonesConfig>, + // Temporarily make a clone of the parent blueprint's zones so we can use + // typed UUIDs everywhere. Once we're done migrating, this `Cow` can be + // removed. + parent_zones: Cow<'a, BTreeMap, BlueprintZonesConfig>>, } impl<'a> BlueprintZonesBuilder<'a> { pub fn new(parent_blueprint: &'a Blueprint) -> BlueprintZonesBuilder { BlueprintZonesBuilder { changed_zones: BTreeMap::new(), - parent_zones: &parent_blueprint.blueprint_zones, + parent_zones: Cow::Owned(parent_blueprint.typed_blueprint_zones()), } } @@ -785,7 +794,7 @@ impl<'a> BlueprintZonesBuilder<'a> { /// do that if no changes are being made. pub fn change_sled_zones( &mut self, - sled_id: Uuid, + sled_id: TypedUuid, ) -> &mut BlueprintZonesConfig { self.changed_zones.entry(sled_id).or_insert_with(|| { if let Some(old_sled_zones) = self.parent_zones.get(&sled_id) { @@ -809,7 +818,7 @@ impl<'a> BlueprintZonesBuilder<'a> { /// sled in the blueprint that's being built pub fn current_sled_zones( &self, - sled_id: Uuid, + sled_id: TypedUuid, ) -> Box + '_> { if let Some(sled_zones) = self .changed_zones @@ -829,8 +838,6 @@ impl<'a> BlueprintZonesBuilder<'a> { ) -> BTreeMap { sled_ids .map(|sled_id| { - // TODO-cleanup use `TypedUuid` everywhere - let sled_id = *sled_id.as_untyped_uuid(); // Start with self.changed_zones, which contains entries for any // sled whose zones config is changing in this blueprint. let mut zones = self @@ -849,7 +856,8 @@ impl<'a> BlueprintZonesBuilder<'a> { zones.sort(); - (sled_id, zones) + // TODO-cleanup use `TypedUuid` everywhere + (sled_id.into_untyped_uuid(), zones) }) .collect() } @@ -969,12 +977,10 @@ pub mod test { for (sled_id, sled_resources) in example.input.all_sled_resources(SledFilter::All) { - // TODO-cleanup use `TypedUuid` everywhere - let sled_id = sled_id.as_untyped_uuid(); - builder.sled_ensure_zone_ntp(*sled_id).unwrap(); + builder.sled_ensure_zone_ntp(sled_id).unwrap(); for pool_name in &sled_resources.zpools { builder - .sled_ensure_zone_crucible(*sled_id, pool_name.clone()) + .sled_ensure_zone_crucible(sled_id, pool_name.clone()) .unwrap(); } } @@ -1004,9 +1010,7 @@ pub mod test { .expect("failed to create builder"); builder.sled_ensure_zone_ntp(new_sled_id).unwrap(); // TODO-cleanup use `TypedUuid` everywhere - let new_sled_resources = input - .sled_resources(&TypedUuid::from_untyped_uuid(new_sled_id)) - .unwrap(); + let new_sled_resources = input.sled_resources(&new_sled_id).unwrap(); for pool_name in &new_sled_resources.zpools { builder .sled_ensure_zone_crucible(new_sled_id, pool_name.clone()) @@ -1121,7 +1125,7 @@ pub mod test { .omicron_zones .keys() .next() - .copied() + .map(|sled_id| TypedUuid::from_untyped_uuid(*sled_id)) .expect("no sleds present"), 1, ) @@ -1161,7 +1165,10 @@ pub mod test { break; } } - selected_sled_id.expect("found no sleds with Nexus zone") + let sled_id = + selected_sled_id.expect("found no sleds with Nexus zone"); + // TODO-cleanup use `TypedUuid` everywhere + TypedUuid::from_untyped_uuid(sled_id) }; let parent = BlueprintBuilder::build_initial_from_collection_seeded( diff --git a/nexus/reconfigurator/planning/src/example.rs b/nexus/reconfigurator/planning/src/example.rs index 85e9d52ee8..dd47f0f17b 100644 --- a/nexus/reconfigurator/planning/src/example.rs +++ b/nexus/reconfigurator/planning/src/example.rs @@ -17,9 +17,10 @@ use nexus_types::inventory::Collection; use omicron_common::api::external::Generation; use omicron_uuid_kinds::GenericUuid; use omicron_uuid_kinds::OmicronZoneKind; +use omicron_uuid_kinds::SledKind; use omicron_uuid_kinds::TypedUuid; use sled_agent_client::types::OmicronZonesConfig; -use typed_rng::UuidRng; +use typed_rng::TypedUuidRng; use uuid::Uuid; pub struct ExampleSystem { @@ -34,7 +35,7 @@ pub struct ExampleSystem { // This is currently only used for tests, so it looks unused in normal // builds. But in the future it could be used by other consumers, too. #[allow(dead_code)] - pub(crate) sled_rng: UuidRng, + pub(crate) sled_rng: TypedUuidRng, } impl ExampleSystem { @@ -44,7 +45,7 @@ impl ExampleSystem { nsleds: usize, ) -> ExampleSystem { let mut system = SystemDescription::new(); - let mut sled_rng = UuidRng::from_seed(test_name, "ExampleSystem"); + let mut sled_rng = TypedUuidRng::from_seed(test_name, "ExampleSystem"); let sled_ids: Vec<_> = (0..nsleds).map(|_| sled_rng.next()).collect(); for sled_id in &sled_ids { let _ = system.sled(SledBuilder::new().id(*sled_id)).unwrap(); @@ -64,7 +65,8 @@ impl ExampleSystem { inventory_builder .found_sled_omicron_zones( "fake sled agent", - *sled_id, + // TODO-cleanup use `TypedUuid` everywhere + sled_id.into_untyped_uuid(), OmicronZonesConfig { generation: Generation::new(), zones: vec![], @@ -97,8 +99,6 @@ impl ExampleSystem { for (sled_id, sled_resources) in base_input.all_sled_resources(SledFilter::All) { - // TODO-cleanup use `TypedUuid` everywhere - let sled_id = *sled_id.as_untyped_uuid(); let _ = builder.sled_ensure_zone_ntp(sled_id).unwrap(); let _ = builder .sled_ensure_zone_multiple_nexus_with_config( @@ -121,7 +121,10 @@ impl ExampleSystem { builder.set_rng_seed((test_name, "ExampleSystem collection")); for sled_id in blueprint.sleds() { - let Some(zones) = blueprint.blueprint_zones.get(&sled_id) else { + // TODO-cleanup use `TypedUuid` everywhere + let Some(zones) = + blueprint.blueprint_zones.get(sled_id.as_untyped_uuid()) + else { continue; }; for zone in zones.zones.iter().map(|z| &z.config) { @@ -153,7 +156,8 @@ impl ExampleSystem { builder .found_sled_omicron_zones( "fake sled agent", - sled_id, + // TODO-cleanup use `TypedUuid` everywhere + sled_id.into_untyped_uuid(), zones.to_omicron_zones_config( BlueprintZoneFilter::SledAgentPut, ), diff --git a/nexus/reconfigurator/planning/src/planner.rs b/nexus/reconfigurator/planning/src/planner.rs index afd32be7d4..7a7fb941b1 100644 --- a/nexus/reconfigurator/planning/src/planner.rs +++ b/nexus/reconfigurator/planning/src/planner.rs @@ -21,7 +21,6 @@ use slog::{info, warn, Logger}; use std::collections::BTreeMap; use std::collections::BTreeSet; use std::hash::Hash; -use uuid::Uuid; pub struct Planner<'a> { log: Logger, @@ -99,14 +98,11 @@ impl<'a> Planner<'a> { for (sled_id, sled_info) in self.input.all_sled_resources(SledFilter::InService) { - // TODO-cleanup use `TypedUuid` everywhere - let sled_id = sled_id.as_untyped_uuid(); - // Check for an NTP zone. Every sled should have one. If it's not // there, all we can do is provision that one zone. We have to wait // for that to succeed and synchronize the clock before we can // provision anything else. - if self.blueprint.sled_ensure_zone_ntp(*sled_id)? == Ensure::Added { + if self.blueprint.sled_ensure_zone_ntp(sled_id)? == Ensure::Added { info!( &self.log, "found sled missing NTP zone (will add one)"; @@ -117,7 +113,7 @@ impl<'a> Planner<'a> { // Don't make any other changes to this sled. However, this // change is compatible with any other changes to other sleds, // so we can "continue" here rather than "break". - sleds_waiting_for_ntp_zones.insert(*sled_id); + sleds_waiting_for_ntp_zones.insert(sled_id); continue; } @@ -141,7 +137,7 @@ impl<'a> Planner<'a> { let has_ntp_inventory = self .inventory .omicron_zones - .get(sled_id) + .get(sled_id.as_untyped_uuid()) .map(|sled_zones| { sled_zones.zones.zones.iter().any(|z| z.zone_type.is_ntp()) }) @@ -161,7 +157,7 @@ impl<'a> Planner<'a> { for zpool_name in &sled_info.zpools { if self .blueprint - .sled_ensure_zone_crucible(*sled_id, zpool_name.clone())? + .sled_ensure_zone_crucible(sled_id, zpool_name.clone())? == Ensure::Added { info!( @@ -195,15 +191,13 @@ impl<'a> Planner<'a> { fn ensure_correct_number_of_nexus_zones( &mut self, - sleds_waiting_for_ntp_zone: &BTreeSet, + sleds_waiting_for_ntp_zone: &BTreeSet>, ) -> Result<(), Error> { // Count the number of Nexus zones on all in-service sleds. This will // include sleds that are in service but not eligible for new services, // but will not include sleds that have been expunged or decommissioned. let mut num_total_nexus = 0; for sled_id in self.input.all_sled_ids(SledFilter::InService) { - // TODO-cleanup use `TypedUuid` everywhere - let sled_id = *sled_id.as_untyped_uuid(); let num_nexus = self.blueprint.sled_num_nexus_zones(sled_id); num_total_nexus += num_nexus; } @@ -233,13 +227,9 @@ impl<'a> Planner<'a> { for sled_id in self .input .all_sled_ids(SledFilter::EligibleForDiscretionaryServices) - .filter(|sled_id| { - // TODO-cleanup use `TypedUuid` everywhere - !sleds_waiting_for_ntp_zone.contains(sled_id.as_untyped_uuid()) - }) + .filter(|sled_id| !sleds_waiting_for_ntp_zone.contains(sled_id)) { - let num_nexus = - self.blueprint.sled_num_nexus_zones(*sled_id.as_untyped_uuid()); + let num_nexus = self.blueprint.sled_num_nexus_zones(sled_id); sleds_by_num_nexus.entry(num_nexus).or_default().push(sled_id); } @@ -293,10 +283,10 @@ impl<'a> Planner<'a> { // For each sled we need to change, actually do so. let mut total_added = 0; for (sled_id, new_nexus_count) in sleds_to_change { - match self.blueprint.sled_ensure_zone_multiple_nexus( - *sled_id.as_untyped_uuid(), - new_nexus_count, - )? { + match self + .blueprint + .sled_ensure_zone_multiple_nexus(sled_id, new_nexus_count)? + { EnsureMultiple::Added(n) => { info!( self.log, "will add {n} Nexus zone(s) to sled"; @@ -471,14 +461,17 @@ mod test { assert!(collection .omicron_zones .insert( - new_sled_id, + // TODO-cleanup use `TypedUuid` everywhere + new_sled_id.into_untyped_uuid(), OmicronZonesFound { time_collected: now_db_precision(), source: String::from("test suite"), - sled_id: new_sled_id, + // TODO-cleanup use `TypedUuid` everywhere + sled_id: new_sled_id.into_untyped_uuid(), zones: blueprint4 .blueprint_zones - .get(&new_sled_id) + // TODO-cleanup use `TypedUuid` everywhere + .get(new_sled_id.as_untyped_uuid()) .expect("blueprint should contain zones for new sled") .to_omicron_zones_config( BlueprintZoneFilter::SledAgentPut @@ -638,7 +631,7 @@ mod test { assert_eq!(sleds.len(), 1); let (changed_sled_id, sled_changes) = sleds.pop().unwrap(); // TODO-cleanup use `TypedUuid` everywhere - assert_eq!(changed_sled_id, *sled_id.as_untyped_uuid()); + assert_eq!(changed_sled_id, sled_id); assert_eq!(sled_changes.zones_removed().len(), 0); assert_eq!(sled_changes.zones_modified().count(), 0); let zones = sled_changes.zones_added().collect::>(); @@ -790,22 +783,19 @@ mod test { details.policy = SledPolicy::InService { provision_policy: SledProvisionPolicy::NonProvisionable, }; - // TODO-cleanup use `TypedUuid` everywhere - *sled_id.as_untyped_uuid() + *sled_id }; println!("1 -> 2: marked non-provisionable {nonprovisionable_sled_id}"); let expunged_sled_id = { let (sled_id, details) = sleds_iter.next().expect("no sleds"); details.policy = SledPolicy::Expunged; - // TODO-cleanup use `TypedUuid` everywhere - *sled_id.as_untyped_uuid() + *sled_id }; println!("1 -> 2: expunged {expunged_sled_id}"); let decommissioned_sled_id = { let (sled_id, details) = sleds_iter.next().expect("no sleds"); details.state = SledState::Decommissioned; - // TODO-cleanup use `TypedUuid` everywhere - *sled_id.as_untyped_uuid() + *sled_id }; println!("1 -> 2: decommissioned {decommissioned_sled_id}"); @@ -905,7 +895,8 @@ mod test { // Leave the non-provisionable sled's generation alone. let zones = &mut blueprint2a .blueprint_zones - .get_mut(&nonprovisionable_sled_id) + // TODO-cleanup use `TypedUuid` everywhere + .get_mut(nonprovisionable_sled_id.as_untyped_uuid()) .unwrap() .zones; @@ -945,12 +936,18 @@ mod test { } }); - let expunged_zones = - blueprint2a.blueprint_zones.get_mut(&expunged_sled_id).unwrap(); + let expunged_zones = blueprint2a + .blueprint_zones + // TODO-cleanup use `TypedUuid` everywhere + .get_mut(expunged_sled_id.as_untyped_uuid()) + .unwrap(); expunged_zones.zones.clear(); expunged_zones.generation = expunged_zones.generation.next(); - blueprint2a.blueprint_zones.remove(&decommissioned_sled_id); + blueprint2a + .blueprint_zones + // TODO-cleanup use `TypedUuid` everywhere + .remove(decommissioned_sled_id.as_untyped_uuid()); blueprint2a.external_dns_version = blueprint2a.external_dns_version.next(); diff --git a/nexus/reconfigurator/planning/src/system.rs b/nexus/reconfigurator/planning/src/system.rs index bbe5353b48..fcaa096ff9 100644 --- a/nexus/reconfigurator/planning/src/system.rs +++ b/nexus/reconfigurator/planning/src/system.rs @@ -32,6 +32,7 @@ use omicron_common::address::SLED_PREFIX; use omicron_common::api::external::ByteCount; use omicron_common::api::external::Generation; use omicron_uuid_kinds::GenericUuid; +use omicron_uuid_kinds::SledKind; use omicron_uuid_kinds::TypedUuid; use std::collections::BTreeSet; use std::fmt::Debug; @@ -64,7 +65,7 @@ impl SubnetIterator for T where #[derive(Debug)] pub struct SystemDescription { collector: Option, - sleds: IndexMap, + sleds: IndexMap, Sled>, sled_subnets: Box, available_non_scrimlet_slots: BTreeSet, available_scrimlet_slots: BTreeSet, @@ -180,7 +181,7 @@ impl SystemDescription { /// Add a sled to the system, as described by a SledBuilder pub fn sled(&mut self, sled: SledBuilder) -> anyhow::Result<&mut Self> { - let sled_id = sled.id.unwrap_or_else(Uuid::new_v4); + let sled_id = sled.id.unwrap_or_else(TypedUuid::new_v4); ensure!( !self.sleds.contains_key(&sled_id), "attempted to add sled with the same id as an existing one: {}", @@ -231,7 +232,7 @@ impl SystemDescription { /// database of an existing system pub fn sled_full( &mut self, - sled_id: Uuid, + sled_id: TypedUuid, sled_policy: SledPolicy, sled_resources: SledResources, inventory_sp: Option>, @@ -312,9 +313,7 @@ impl SystemDescription { subnet: sled.sled_subnet, }, }; - // TODO-cleanup use `TypedUuid` everywhere - let sled_id = TypedUuid::from_untyped_uuid(sled.sled_id); - builder.add_sled(sled_id, sled_details)?; + builder.add_sled(sled.sled_id, sled_details)?; } Ok(builder) @@ -331,7 +330,7 @@ pub enum SledHardware { #[derive(Clone, Debug)] pub struct SledBuilder { - id: Option, + id: Option>, unique: Option, hardware: SledHardware, hardware_slot: Option, @@ -355,7 +354,7 @@ impl SledBuilder { /// Set the id of the sled /// /// Default: randomly generated - pub fn id(mut self, id: Uuid) -> Self { + pub fn id(mut self, id: TypedUuid) -> Self { self.id = Some(id); self } @@ -418,7 +417,7 @@ pub struct SledHwInventory<'a> { /// Collection. #[derive(Clone, Debug)] struct Sled { - sled_id: Uuid, + sled_id: TypedUuid, sled_subnet: Ipv6Subnet, inventory_sp: Option<(u16, SpState)>, inventory_sled_agent: sled_agent_client::types::Inventory, @@ -429,7 +428,7 @@ struct Sled { impl Sled { /// Create a `Sled` using faked-up information based on a `SledBuilder` fn new_simulated( - sled_id: Uuid, + sled_id: TypedUuid, sled_subnet: Ipv6Subnet, sled_role: SledRole, unique: Option, @@ -496,7 +495,7 @@ impl Sled { reservoir_size: ByteCount::from(1024), sled_role, sled_agent_address, - sled_id, + sled_id: sled_id.into_untyped_uuid(), usable_hardware_threads: 10, usable_physical_ram: ByteCount::from(1024 * 1024), disks: vec![], @@ -519,7 +518,7 @@ impl Sled { /// Create a `Sled` based on real information from another `Policy` and /// inventory `Collection` fn new_full( - sled_id: Uuid, + sled_id: TypedUuid, sled_policy: SledPolicy, sled_resources: SledResources, inventory_sp: Option>, @@ -580,7 +579,7 @@ impl Sled { reservoir_size: inv_sled_agent.reservoir_size, sled_role: inv_sled_agent.sled_role, sled_agent_address: inv_sled_agent.sled_agent_address.to_string(), - sled_id, + sled_id: sled_id.into_untyped_uuid(), usable_hardware_threads: inv_sled_agent.usable_hardware_threads, usable_physical_ram: inv_sled_agent.usable_physical_ram, disks: vec![], diff --git a/nexus/types/src/deployment.rs b/nexus/types/src/deployment.rs index b973c0b6c8..ecd9d3aa49 100644 --- a/nexus/types/src/deployment.rs +++ b/nexus/types/src/deployment.rs @@ -20,7 +20,10 @@ pub use crate::inventory::OmicronZoneType; pub use crate::inventory::OmicronZonesConfig; pub use crate::inventory::SourceNatConfig; pub use crate::inventory::ZpoolName; +use newtype_uuid::GenericUuid; +use newtype_uuid::TypedUuid; use omicron_common::api::external::Generation; +use omicron_uuid_kinds::SledKind; use schemars::JsonSchema; use serde::Deserialize; use serde::Serialize; @@ -153,9 +156,24 @@ impl Blueprint { }) } + // Temporary method that provides the list of Omicron zones using + // `TypedUuid`. + // + // In the future, `all_omicron_zones` will return `TypedUuid`, + // and this method will go away. + pub fn all_omicron_zones_typed( + &self, + ) -> impl Iterator, &OmicronZoneConfig)> { + self.blueprint_zones.iter().flat_map(|(sled_id, z)| { + z.zones.iter().map(move |z| { + (TypedUuid::from_untyped_uuid(*sled_id), &z.config) + }) + }) + } + /// Iterate over the ids of all sleds in the blueprint - pub fn sleds(&self) -> impl Iterator + '_ { - self.blueprint_zones.keys().copied() + pub fn sleds(&self) -> impl Iterator> + '_ { + self.blueprint_zones.keys().copied().map(TypedUuid::from_untyped_uuid) } /// Summarize the difference between sleds and zones between two @@ -170,9 +188,9 @@ impl Blueprint { ) -> Result { BlueprintDiff::new( DiffBeforeMetadata::Blueprint(Box::new(before.metadata())), - before.blueprint_zones.clone(), + before.typed_blueprint_zones(), self.metadata(), - self.blueprint_zones.clone(), + self.typed_blueprint_zones(), ) } @@ -210,7 +228,7 @@ impl Blueprint { generation: zones_found.zones.generation, zones, }; - (*sled_id, zones) + (TypedUuid::from_untyped_uuid(*sled_id), zones) }) .collect(); @@ -218,7 +236,7 @@ impl Blueprint { DiffBeforeMetadata::Collection { id: before.id }, before_zones, self.metadata(), - self.blueprint_zones.clone(), + self.typed_blueprint_zones(), ) } @@ -227,6 +245,21 @@ impl Blueprint { pub fn display(&self) -> BlueprintDisplay<'_> { BlueprintDisplay { blueprint: self } } + + /// Temporary method that returns `self.blueprint_zones`, except the keys + /// are `TypedUuid`. + /// + /// TODO-cleanup use `TypedUuid` everywhere + pub fn typed_blueprint_zones( + &self, + ) -> BTreeMap, BlueprintZonesConfig> { + self.blueprint_zones + .iter() + .map(|(sled_id, zones)| { + (TypedUuid::from_untyped_uuid(*sled_id), zones.clone()) + }) + .collect() + } } /// Wrapper to allow a [`Blueprint`] to be displayed with information. @@ -536,9 +569,9 @@ impl BlueprintDiff { /// data is valid. fn new( before_meta: DiffBeforeMetadata, - before_zones: BTreeMap, + before_zones: BTreeMap, BlueprintZonesConfig>, after_meta: BlueprintMetadata, - after_zones: BTreeMap, + after_zones: BTreeMap, BlueprintZonesConfig>, ) -> Result { let mut errors = Vec::new(); @@ -568,14 +601,18 @@ impl BlueprintDiff { /// Iterate over sleds only present in the second blueprint of a diff pub fn sleds_added( &self, - ) -> impl ExactSizeIterator + '_ { + ) -> impl ExactSizeIterator< + Item = (TypedUuid, &BlueprintZonesConfig), + > + '_ { self.sleds.added.iter().map(|(sled_id, zones)| (*sled_id, zones)) } /// Iterate over sleds only present in the first blueprint of a diff pub fn sleds_removed( &self, - ) -> impl ExactSizeIterator + '_ { + ) -> impl ExactSizeIterator< + Item = (TypedUuid, &BlueprintZonesConfig), + > + '_ { self.sleds.removed.iter().map(|(sled_id, zones)| (*sled_id, zones)) } @@ -583,7 +620,8 @@ impl BlueprintDiff { /// changes. pub fn sleds_modified( &self, - ) -> impl ExactSizeIterator + '_ { + ) -> impl ExactSizeIterator, &DiffSledModified)> + '_ + { self.sleds.modified.iter().map(|(sled_id, sled)| (*sled_id, sled)) } @@ -591,7 +629,8 @@ impl BlueprintDiff { /// changes. pub fn sleds_unchanged( &self, - ) -> impl Iterator + '_ { + ) -> impl Iterator, &BlueprintZonesConfig)> + '_ + { self.sleds.unchanged.iter().map(|(sled_id, zones)| (*sled_id, zones)) } @@ -603,10 +642,10 @@ impl BlueprintDiff { #[derive(Debug)] struct DiffSleds { - added: BTreeMap, - removed: BTreeMap, - modified: BTreeMap, - unchanged: BTreeMap, + added: BTreeMap, BlueprintZonesConfig>, + removed: BTreeMap, BlueprintZonesConfig>, + modified: BTreeMap, DiffSledModified>, + unchanged: BTreeMap, BlueprintZonesConfig>, } impl DiffSleds { @@ -616,8 +655,8 @@ impl DiffSleds { /// The return value only contains the sleds that are present in both /// blueprints. fn new( - before: BTreeMap, - mut after: BTreeMap, + before: BTreeMap, BlueprintZonesConfig>, + mut after: BTreeMap, BlueprintZonesConfig>, errors: &mut Vec, ) -> Self { let mut removed = BTreeMap::new(); @@ -730,7 +769,7 @@ pub enum BlueprintDiffSingleError { /// /// For a particular zone, the type should never change. ZoneTypeChanged { - sled_id: Uuid, + sled_id: TypedUuid, zone_id: Uuid, before: ZoneKind, after: ZoneKind, @@ -776,7 +815,7 @@ impl DiffBeforeMetadata { #[derive(Clone, Debug)] pub struct DiffSledModified { /// id of the sled - pub sled_id: Uuid, + pub sled_id: TypedUuid, /// generation of the "zones" configuration on the left side pub generation_before: Generation, /// generation of the "zones" configuration on the right side @@ -788,7 +827,7 @@ pub struct DiffSledModified { impl DiffSledModified { fn new( - sled_id: Uuid, + sled_id: TypedUuid, before: BlueprintZonesConfig, after: BlueprintZonesConfig, errors: &mut Vec, @@ -1210,7 +1249,7 @@ mod table_display { } fn add_whole_sled_records( - sled_id: Uuid, + sled_id: TypedUuid, sled_zones: &BlueprintZonesConfig, kind: WholeSledKind, section: &mut StSectionBuilder, @@ -1247,7 +1286,7 @@ mod table_display { } fn add_modified_sled_records( - sled_id: Uuid, + sled_id: TypedUuid, modified: &DiffSledModified, section: &mut StSectionBuilder, ) { diff --git a/typed-rng/Cargo.toml b/typed-rng/Cargo.toml index b02a6b974a..c6f5270489 100644 --- a/typed-rng/Cargo.toml +++ b/typed-rng/Cargo.toml @@ -4,6 +4,7 @@ version = "0.1.0" edition = "2021" [dependencies] +newtype-uuid.workspace = true omicron-workspace-hack.workspace = true rand.workspace = true rand_core.workspace = true diff --git a/typed-rng/src/lib.rs b/typed-rng/src/lib.rs index 5d5e4b1665..5269b7f1d7 100644 --- a/typed-rng/src/lib.rs +++ b/typed-rng/src/lib.rs @@ -56,6 +56,7 @@ use std::{fmt, hash::Hash, marker::PhantomData}; +use newtype_uuid::{GenericUuid, TypedUuid, TypedUuidKind}; use rand::rngs::StdRng; use rand_core::{RngCore, SeedableRng}; use uuid::Uuid; @@ -245,7 +246,14 @@ impl Generatable for Uuid { } } +impl Generatable for TypedUuid { + fn generate(rng: &mut R) -> Self { + TypedUuid::from_untyped_uuid(Uuid::generate(rng)) + } +} + pub type UuidRng = TypedRng; +pub type TypedUuidRng = TypedRng, StdRng>; #[cfg(test)] mod tests { From dbf849401c08c62bfcc64c28292f048469b10ef8 Mon Sep 17 00:00:00 2001 From: James MacMahon Date: Wed, 10 Apr 2024 18:46:15 -0400 Subject: [PATCH 115/334] Manually supply disks for non-gimlet systems (#5479) A sled-agent running on a non-gimlet will bail out of `HardwareSnapshot::new`, not returning any disks for use. Previously a hard coded `zpools` field in the sled's config.toml could supply pools to use, but that was recently removed. Add an option into the sled's config.toml to fill in observed disks for the sled agent to use: ```toml [[nongimlet_observed_disks]] slot = 0 variant = "U2" is_boot_disk = false [nongimlet_observed_disks.paths] devfs_path = "/devices/pci@0,0/pci1022,1483@1,1/pci15b7,5011@0/blkdev@w001B448B456F8DB8,0" [nongimlet_observed_disks.identity] vendor = "Synthetic" serial = "214664801348" model = "WDS100T1XHE-00AFY0" ``` These should be indistinguishable from what would be returned in the `HardwareSnapshot`, and should exercise the same related code that a regular gimlet would execute (with respect to partitioning, creating zpools, etc). --- installinator/src/hardware.rs | 4 +- sled-agent/src/config.rs | 5 +++ sled-agent/src/hardware_monitor.rs | 3 ++ sled-agent/src/long_running_tasks.rs | 13 ++++-- sled-hardware/src/disk.rs | 8 +++- sled-hardware/src/illumos/mod.rs | 61 ++++++++++++++++++---------- sled-hardware/src/non_illumos/mod.rs | 6 ++- sled-storage/src/resources.rs | 3 ++ 8 files changed, 72 insertions(+), 31 deletions(-) diff --git a/installinator/src/hardware.rs b/installinator/src/hardware.rs index 90859e3754..a48d816dc8 100644 --- a/installinator/src/hardware.rs +++ b/installinator/src/hardware.rs @@ -25,8 +25,8 @@ impl Hardware { .context("failed to detect whether host is a gimlet")?; ensure!(is_gimlet, "hardware scan only supported on gimlets"); - let hardware = - HardwareManager::new(log, SledMode::Auto).map_err(|err| { + let hardware = HardwareManager::new(log, SledMode::Auto, vec![]) + .map_err(|err| { anyhow!("failed to create HardwareManager: {err}") })?; diff --git a/sled-agent/src/config.rs b/sled-agent/src/config.rs index d084f5f546..c4ce421497 100644 --- a/sled-agent/src/config.rs +++ b/sled-agent/src/config.rs @@ -15,6 +15,7 @@ use illumos_utils::dladm::CHELSIO_LINK_PREFIX; use omicron_common::vlan::VlanID; use serde::Deserialize; use sled_hardware::is_gimlet; +use sled_hardware::UnparsedDisk; #[derive(Clone, Debug, Deserialize)] #[serde(rename_all = "lowercase")] @@ -66,6 +67,10 @@ pub struct Config { pub vlan: Option, /// Optional list of virtual devices to be used as "discovered disks". pub vdevs: Option>, + /// Optional list of real devices to be injected as observed disks during + /// device polling. + #[serde(default)] + pub nongimlet_observed_disks: Option>, /// Optionally skip waiting for time synchronization pub skip_timesync: Option, diff --git a/sled-agent/src/hardware_monitor.rs b/sled-agent/src/hardware_monitor.rs index 3708a642f3..6dbca89d74 100644 --- a/sled-agent/src/hardware_monitor.rs +++ b/sled-agent/src/hardware_monitor.rs @@ -258,10 +258,13 @@ impl HardwareMonitor { } else { None }; + info!( self.log, "Checking current full hardware snapshot"; "underlay_network_info" => ?underlay_network, + "disks" => ?self.hardware_manager.disks(), ); + if self.hardware_manager.is_scrimlet_driver_loaded() { self.activate_switch().await; } else { diff --git a/sled-agent/src/long_running_tasks.rs b/sled-agent/src/long_running_tasks.rs index 9b0ea7ac6c..faea94f552 100644 --- a/sled-agent/src/long_running_tasks.rs +++ b/sled-agent/src/long_running_tasks.rs @@ -24,7 +24,7 @@ use crate::storage_monitor::StorageMonitor; use crate::zone_bundle::{CleanupContext, ZoneBundler}; use bootstore::schemes::v0 as bootstore; use key_manager::{KeyManager, StorageKeyRequester}; -use sled_hardware::{HardwareManager, SledMode}; +use sled_hardware::{HardwareManager, SledMode, UnparsedDisk}; use sled_storage::config::MountConfig; use sled_storage::disk::RawSyntheticDisk; use sled_storage::manager::{StorageHandle, StorageManager}; @@ -73,7 +73,11 @@ pub async fn spawn_all_longrunning_tasks( spawn_storage_monitor(log, storage_manager.clone()); - let hardware_manager = spawn_hardware_manager(log, sled_mode).await; + let nongimlet_observed_disks = + config.nongimlet_observed_disks.clone().unwrap_or(vec![]); + + let hardware_manager = + spawn_hardware_manager(log, sled_mode, nongimlet_observed_disks).await; // Start monitoring for hardware changes let (sled_agent_started_tx, service_manager_ready_tx) = @@ -145,6 +149,7 @@ fn spawn_storage_monitor(log: &Logger, storage_handle: StorageHandle) { async fn spawn_hardware_manager( log: &Logger, sled_mode: SledMode, + nongimlet_observed_disks: Vec, ) -> HardwareManager { // The `HardwareManager` does not use the the "task/handle" pattern // and spawns its worker task inside `HardwareManager::new`. Instead of returning @@ -154,10 +159,10 @@ async fn spawn_hardware_manager( // // There are pros and cons to both methods, but the reason to mention it here is that // the handle in this case is the `HardwareManager` itself. - info!(log, "Starting HardwareManager"; "sled_mode" => ?sled_mode); + info!(log, "Starting HardwareManager"; "sled_mode" => ?sled_mode, "nongimlet_observed_disks" => ?nongimlet_observed_disks); let log = log.clone(); tokio::task::spawn_blocking(move || { - HardwareManager::new(&log, sled_mode).unwrap() + HardwareManager::new(&log, sled_mode, nongimlet_observed_disks).unwrap() }) .await .unwrap() diff --git a/sled-hardware/src/disk.rs b/sled-hardware/src/disk.rs index adea1d182a..3730293936 100644 --- a/sled-hardware/src/disk.rs +++ b/sled-hardware/src/disk.rs @@ -66,7 +66,9 @@ pub enum Partition { ZfsPool, } -#[derive(Debug, Clone, PartialEq, Eq, Hash, Ord, PartialOrd)] +#[derive( + Debug, Clone, PartialEq, Eq, Hash, Ord, PartialOrd, Deserialize, Serialize, +)] pub struct DiskPaths { // Full path to the disk under "/devices". // Should NOT end with a ":partition_letter". @@ -137,7 +139,9 @@ impl DiskPaths { /// This exists as a distinct entity from `Disk` in `sled-storage` because it /// may be desirable to monitor for hardware in one context, and conform disks /// to partition layouts in a different context. -#[derive(Debug, Clone, PartialEq, Eq, Hash, Ord, PartialOrd)] +#[derive( + Debug, Clone, PartialEq, Eq, Hash, Ord, PartialOrd, Deserialize, Serialize, +)] pub struct UnparsedDisk { paths: DiskPaths, slot: i64, diff --git a/sled-hardware/src/illumos/mod.rs b/sled-hardware/src/illumos/mod.rs index 7dd6f9e20d..0bf2fa6e53 100644 --- a/sled-hardware/src/illumos/mod.rs +++ b/sled-hardware/src/illumos/mod.rs @@ -509,6 +509,7 @@ fn poll_blkdev_node( fn poll_device_tree( log: &Logger, inner: &Arc>, + nongimlet_observed_disks: &[UnparsedDisk], tx: &broadcast::Sender, ) -> Result<(), Error> { // Construct a view of hardware by walking the device tree. @@ -517,28 +518,36 @@ fn poll_device_tree( Err(e) => { if let Error::NotAGimlet(root_node) = &e { + let mut inner = inner.lock().unwrap(); + if root_node.as_str() == "i86pc" { // If on i86pc, generate some baseboard information before // returning this error. Each sled agent has to be uniquely // identified for multiple non-gimlets to work. - { - let mut inner = inner.lock().unwrap(); - - if inner.baseboard.is_none() { - let pc_baseboard = Baseboard::new_pc( - gethostname().into_string().unwrap_or_else( - |_| Uuid::new_v4().simple().to_string(), - ), - root_node.clone(), - ); - - info!( - log, - "Generated i86pc baseboard {:?}", pc_baseboard - ); - - inner.baseboard = Some(pc_baseboard); - } + if inner.baseboard.is_none() { + let pc_baseboard = Baseboard::new_pc( + gethostname().into_string().unwrap_or_else(|_| { + Uuid::new_v4().simple().to_string() + }), + root_node.clone(), + ); + + info!( + log, + "Generated i86pc baseboard {:?}", pc_baseboard + ); + + inner.baseboard = Some(pc_baseboard); + } + } + + // For platforms that don't support the HardwareSnapshot + // functionality, sled-agent can be supplied a fixed list of + // UnparsedDisks. Add those to the HardwareSnapshot here if they + // are missing (which they will be for non-gimlets). + for observed_disk in nongimlet_observed_disks { + if !inner.disks.contains(observed_disk) { + inner.disks.insert(observed_disk.clone()); } } } @@ -572,10 +581,11 @@ fn poll_device_tree( async fn hardware_tracking_task( log: Logger, inner: Arc>, + nongimlet_observed_disks: Vec, tx: broadcast::Sender, ) { loop { - match poll_device_tree(&log, &inner, &tx) { + match poll_device_tree(&log, &inner, &nongimlet_observed_disks, &tx) { // We've already warned about `NotAGimlet` by this point, // so let's not spam the logs. Ok(_) | Err(Error::NotAGimlet(_)) => (), @@ -604,7 +614,13 @@ impl HardwareManager { /// /// Arguments: /// - `sled_mode`: The sled's mode of operation (auto detect or force gimlet/scrimlet). - pub fn new(log: &Logger, sled_mode: SledMode) -> Result { + /// - `nongimlet_observed_disks`: For non-gimlets, inject these disks into + /// HardwareSnapshot objects. + pub fn new( + log: &Logger, + sled_mode: SledMode, + nongimlet_observed_disks: Vec, + ) -> Result { let log = log.new(o!("component" => "HardwareManager")); info!(log, "Creating HardwareManager"); @@ -650,7 +666,7 @@ impl HardwareManager { // This mitigates issues where the Sled Agent could try to propagate // an "empty" view of hardware to other consumers before the first // query. - match poll_device_tree(&log, &inner, &tx) { + match poll_device_tree(&log, &inner, &nongimlet_observed_disks, &tx) { Ok(_) => (), // Allow non-gimlet devices to proceed with a "null" view of // hardware, otherwise they won't be able to start. @@ -666,7 +682,8 @@ impl HardwareManager { let inner2 = inner.clone(); let tx2 = tx.clone(); tokio::task::spawn(async move { - hardware_tracking_task(log2, inner2, tx2).await + hardware_tracking_task(log2, inner2, nongimlet_observed_disks, tx2) + .await }); Ok(Self { log, inner, tx }) diff --git a/sled-hardware/src/non_illumos/mod.rs b/sled-hardware/src/non_illumos/mod.rs index a47bb0d2bc..7714df3fc1 100644 --- a/sled-hardware/src/non_illumos/mod.rs +++ b/sled-hardware/src/non_illumos/mod.rs @@ -30,7 +30,11 @@ pub enum NvmeFormattingError { pub struct HardwareManager {} impl HardwareManager { - pub fn new(_log: &Logger, _sled_mode: SledMode) -> Result { + pub fn new( + _log: &Logger, + _sled_mode: SledMode, + _nongimlet_observed_disks: Vec, + ) -> Result { unimplemented!("Accessing hardware unsupported on non-illumos"); } diff --git a/sled-storage/src/resources.rs b/sled-storage/src/resources.rs index 34b30f1bfd..7e1880f2b8 100644 --- a/sled-storage/src/resources.rs +++ b/sled-storage/src/resources.rs @@ -531,8 +531,10 @@ impl StorageResources { pub(crate) fn remove_disk(&mut self, id: &DiskIdentity) { info!(self.log, "Removing disk"; "identity" => ?id); let Some(entry) = self.disks.values.get(id) else { + info!(self.log, "Disk not found by id, exiting"; "identity" => ?id); return; }; + let synthetic = match entry { ManagedDisk::ExplicitlyManaged(disk) | ManagedDisk::ImplicitlyManaged(disk) => disk.is_synthetic(), @@ -548,6 +550,7 @@ impl StorageResources { // In production, we disallow removal of synthetic disks as they // are only added once. if synthetic { + info!(self.log, "Not removing synthetic disk"; "identity" => ?id); return; } } From 705e8449a624a47bdaa0c591d170ec74ffac2712 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Wed, 10 Apr 2024 16:09:46 -0700 Subject: [PATCH 116/334] fix(deps): update rust crate cargo_toml to v0.19.2 (#5494) --- Cargo.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 3bf2713d86..7e54f02941 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -859,9 +859,9 @@ dependencies = [ [[package]] name = "cargo_toml" -version = "0.19.0" +version = "0.19.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "922d6ea3081d68b9e3e09557204bff47f9b5406a4a304dc917e187f8cafd582b" +checksum = "a98356df42a2eb1bd8f1793ae4ee4de48e384dd974ce5eac8eee802edb7492be" dependencies = [ "serde", "toml 0.8.12", From 640dc7892d626062d94ebfe95bd6f5c6c895254e Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Wed, 10 Apr 2024 16:10:48 -0700 Subject: [PATCH 117/334] chore(deps): update rust crate rstest to 0.19.0 (#5496) --- Cargo.lock | 8 ++++---- Cargo.toml | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 7e54f02941..c1560848cb 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -7738,9 +7738,9 @@ dependencies = [ [[package]] name = "rstest" -version = "0.18.2" +version = "0.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97eeab2f3c0a199bc4be135c36c924b6590b88c377d416494288c14f2db30199" +checksum = "9d5316d2a1479eeef1ea21e7f9ddc67c191d497abc8fc3ba2467857abbb68330" dependencies = [ "futures", "futures-timer", @@ -7750,9 +7750,9 @@ dependencies = [ [[package]] name = "rstest_macros" -version = "0.18.2" +version = "0.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d428f8247852f894ee1be110b375111b586d4fa431f6c46e64ba5a0dcccbe605" +checksum = "04a9df72cc1f67020b0d63ad9bfe4a323e459ea7eb68e03bd9824db49f9a4c25" dependencies = [ "cfg-if", "glob", diff --git a/Cargo.toml b/Cargo.toml index dc502a1ff7..12d9fe96e8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -357,7 +357,7 @@ regress = "0.9.1" reqwest = { version = "0.11", default-features = false } ring = "0.17.8" rpassword = "7.3.1" -rstest = "0.18.2" +rstest = "0.19.0" rustfmt-wrapper = "0.2" rustls = "0.22.2" rustls-pemfile = "2.1.2" From 7e9e0f8a60b2526f8f2916af49790ca9af3cba85 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Wed, 10 Apr 2024 16:11:07 -0700 Subject: [PATCH 118/334] chore(deps): update rust crate whoami to v1.5.1 (#5491) --- Cargo.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c1560848cb..dfb03b36f4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -10871,9 +10871,9 @@ dependencies = [ [[package]] name = "whoami" -version = "1.5.0" +version = "1.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fec781d48b41f8163426ed18e8fc2864c12937df9ce54c88ede7bd47270893e" +checksum = "a44ab49fad634e88f55bf8f9bb3abd2f27d7204172a112c7c9987e01c1c94ea9" dependencies = [ "redox_syscall 0.4.1", "wasite", From a5f4139cad2ef63afe1e8e8ffd72c44200882036 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Wed, 10 Apr 2024 16:35:05 -0700 Subject: [PATCH 119/334] chore(deps): update rust to v1.77.2 (#5492) --- rust-toolchain.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust-toolchain.toml b/rust-toolchain.toml index f8a9f2db4f..a2ed3895ec 100644 --- a/rust-toolchain.toml +++ b/rust-toolchain.toml @@ -4,5 +4,5 @@ # # We choose a specific toolchain (rather than "stable") for repeatability. The # intent is to keep this up-to-date with recently-released stable Rust. -channel = "1.77.1" +channel = "1.77.2" profile = "default" From c1da84da052ede816541cf4d29c27ddd202fa380 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Thu, 11 Apr 2024 04:32:43 +0000 Subject: [PATCH 120/334] chore(deps): update taiki-e/install-action digest to 0c6ec41 (#5504) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR contains the following updates: | Package | Type | Update | Change | |---|---|---|---| | [taiki-e/install-action](https://togithub.com/taiki-e/install-action) | action | digest | [`f6e0e17` -> `0c6ec41`](https://togithub.com/taiki-e/install-action/compare/f6e0e17...0c6ec41) | --- ### Configuration 📅 **Schedule**: Branch creation - "after 8pm,before 6am" in timezone America/Los_Angeles, Automerge - "after 8pm,before 6am" in timezone America/Los_Angeles. 🚦 **Automerge**: Enabled. ♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. 🔕 **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box --- This PR has been generated by [Renovate Bot](https://togithub.com/renovatebot/renovate). Co-authored-by: oxide-renovate[bot] <146848827+oxide-renovate[bot]@users.noreply.github.com> --- .github/workflows/hakari.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/hakari.yml b/.github/workflows/hakari.yml index 02eeb44d45..e04d3137fe 100644 --- a/.github/workflows/hakari.yml +++ b/.github/workflows/hakari.yml @@ -24,7 +24,7 @@ jobs: with: toolchain: stable - name: Install cargo-hakari - uses: taiki-e/install-action@f6e0e17ee402584b4db04cdcf15775bffd443d9b # v2 + uses: taiki-e/install-action@0c6ec41fd50792c0be884b73e6da4b56616c1c04 # v2 with: tool: cargo-hakari - name: Check workspace-hack Cargo.toml is up-to-date From cc182eede5849ed74cf984d901d6673f8d9be6df Mon Sep 17 00:00:00 2001 From: Rain Date: Thu, 11 Apr 2024 10:25:23 -0700 Subject: [PATCH 121/334] Add BlueprintZoneFilter to all_omicron_zones (#5348) Queries now can be explicit about what they want to return given all zones current dispositions. --------- Co-authored-by: Andrew J. Stone --- dev-tools/omdb/src/bin/omdb/db.rs | 76 ++++++++++++++---- dev-tools/reconfigurator-cli/src/main.rs | 5 +- .../db-queries/src/db/datastore/deployment.rs | 7 +- nexus/db-queries/src/db/datastore/vpc.rs | 4 +- nexus/reconfigurator/execution/src/dns.rs | 80 +++++++++++++++---- nexus/reconfigurator/execution/src/lib.rs | 9 ++- .../execution/src/omicron_zones.rs | 12 ++- .../planning/src/blueprint_builder.rs | 14 ++-- nexus/reconfigurator/planning/src/example.rs | 2 +- nexus/reconfigurator/planning/src/planner.rs | 2 +- nexus/types/src/deployment.rs | 47 ++++++----- 11 files changed, 188 insertions(+), 70 deletions(-) diff --git a/dev-tools/omdb/src/bin/omdb/db.rs b/dev-tools/omdb/src/bin/omdb/db.rs index 5d9cb594ca..ecdb651295 100644 --- a/dev-tools/omdb/src/bin/omdb/db.rs +++ b/dev-tools/omdb/src/bin/omdb/db.rs @@ -78,6 +78,8 @@ use nexus_db_queries::db::model::ServiceKind; use nexus_db_queries::db::queries::ALLOW_FULL_TABLE_SCAN_SQL; use nexus_db_queries::db::DataStore; use nexus_types::deployment::Blueprint; +use nexus_types::deployment::BlueprintZoneDisposition; +use nexus_types::deployment::BlueprintZoneFilter; use nexus_types::deployment::OmicronZoneType; use nexus_types::identity::Resource; use nexus_types::internal_api::params::DnsRecord; @@ -686,15 +688,21 @@ async fn lookup_instance( .with_context(|| format!("loading instance {instance_id}")) } -/// Helper function to look up the kind of the service with the given ID. +#[derive(Clone, Debug)] +struct ServiceInfo { + service_kind: ServiceKind, + disposition: BlueprintZoneDisposition, +} + +/// Helper function to look up the service with the given ID. /// /// Requires the caller to first have fetched the current target blueprint, so /// we can find services that have been added by Reconfigurator. -async fn lookup_service_kind( +async fn lookup_service_info( datastore: &DataStore, service_id: Uuid, current_target_blueprint: Option<&Blueprint>, -) -> anyhow::Result> { +) -> anyhow::Result> { let conn = datastore.pool_connection_for_tests().await?; // We need to check the `service` table (populated during rack setup)... @@ -709,7 +717,11 @@ async fn lookup_service_kind( .optional() .with_context(|| format!("loading service {service_id}"))? { - return Ok(Some(kind)); + // XXX: the services table is going to go away soon! + return Ok(Some(ServiceInfo { + service_kind: kind, + disposition: BlueprintZoneDisposition::InService, + })); } } @@ -719,9 +731,10 @@ async fn lookup_service_kind( return Ok(None); }; - let Some(zone_config) = - blueprint.all_omicron_zones().find_map(|(_sled_id, zone_config)| { - if zone_config.id == service_id { + let Some(zone_config) = blueprint + .all_blueprint_zones(BlueprintZoneFilter::All) + .find_map(|(_sled_id, zone_config)| { + if zone_config.config.id == service_id { Some(zone_config) } else { None @@ -731,7 +744,7 @@ async fn lookup_service_kind( return Ok(None); }; - let service_kind = match &zone_config.zone_type { + let service_kind = match &zone_config.config.zone_type { OmicronZoneType::BoundaryNtp { .. } | OmicronZoneType::InternalNtp { .. } => ServiceKind::Ntp, OmicronZoneType::Clickhouse { .. } => ServiceKind::Clickhouse, @@ -747,7 +760,7 @@ async fn lookup_service_kind( OmicronZoneType::Oximeter { .. } => ServiceKind::Oximeter, }; - Ok(Some(service_kind)) + Ok(Some(ServiceInfo { service_kind, disposition: zone_config.disposition })) } /// Helper function to looks up a probe with the given ID. @@ -1953,9 +1966,20 @@ async fn cmd_db_eips( } enum Owner { - Instance { id: Uuid, project: String, name: String }, - Service { id: Uuid, kind: String }, - Project { id: Uuid, name: String }, + Instance { + id: Uuid, + project: String, + name: String, + }, + Service { + id: Uuid, + kind: String, + disposition: Option, + }, + Project { + id: Uuid, + name: String, + }, None, } @@ -1988,6 +2012,13 @@ async fn cmd_db_eips( Self::None => "none".to_string(), } } + + fn disposition(&self) -> Option { + match self { + Self::Service { disposition, .. } => *disposition, + _ => None, + } + } } #[derive(Tabled)] @@ -2000,6 +2031,13 @@ async fn cmd_db_eips( owner_kind: &'static str, owner_id: String, owner_name: String, + #[tabled(display_with = "display_option_blank")] + owner_disposition: Option, + } + + // Display an empty cell for an Option if it's None. + fn display_option_blank(opt: &Option) -> String { + opt.as_ref().map(|x| x.to_string()).unwrap_or_else(|| "".to_string()) } if verbose { @@ -2022,17 +2060,22 @@ async fn cmd_db_eips( for ip in &ips { let owner = if let Some(owner_id) = ip.parent_id { if ip.is_service { - let kind = match lookup_service_kind( + let (kind, disposition) = match lookup_service_info( datastore, owner_id, current_target_blueprint.as_ref(), ) .await? { - Some(kind) => format!("{kind:?}"), - None => "UNKNOWN (service ID not found)".to_string(), + Some(info) => ( + format!("{:?}", info.service_kind), + Some(info.disposition), + ), + None => { + ("UNKNOWN (service ID not found)".to_string(), None) + } }; - Owner::Service { id: owner_id, kind } + Owner::Service { id: owner_id, kind, disposition } } else { let instance = match lookup_instance(datastore, owner_id).await? { @@ -2096,6 +2139,7 @@ async fn cmd_db_eips( owner_kind: owner.kind(), owner_id: owner.id(), owner_name: owner.name(), + owner_disposition: owner.disposition(), }; rows.push(row); } diff --git a/dev-tools/reconfigurator-cli/src/main.rs b/dev-tools/reconfigurator-cli/src/main.rs index b065e9586b..6c471e19fb 100644 --- a/dev-tools/reconfigurator-cli/src/main.rs +++ b/dev-tools/reconfigurator-cli/src/main.rs @@ -20,6 +20,7 @@ use nexus_reconfigurator_planning::planner::Planner; use nexus_reconfigurator_planning::system::{ SledBuilder, SledHwInventory, SystemDescription, }; +use nexus_types::deployment::BlueprintZoneFilter; use nexus_types::deployment::ExternalIp; use nexus_types::deployment::PlanningInput; use nexus_types::deployment::ServiceNetworkInterface; @@ -146,7 +147,9 @@ impl ReconfiguratorSim { builder.set_internal_dns_version(parent_blueprint.internal_dns_version); builder.set_external_dns_version(parent_blueprint.external_dns_version); - for (_, zone) in parent_blueprint.all_omicron_zones() { + for (_, zone) in + parent_blueprint.all_omicron_zones(BlueprintZoneFilter::All) + { let zone_id = TypedUuid::::from_untyped_uuid(zone.id); if let Ok(Some(ip)) = zone.zone_type.external_ip() { diff --git a/nexus/db-queries/src/db/datastore/deployment.rs b/nexus/db-queries/src/db/datastore/deployment.rs index 22b602c71d..19c1b3eda1 100644 --- a/nexus/db-queries/src/db/datastore/deployment.rs +++ b/nexus/db-queries/src/db/datastore/deployment.rs @@ -1333,7 +1333,7 @@ mod tests { collection.omicron_zones.len() ); assert_eq!( - blueprint1.all_omicron_zones().count(), + blueprint1.all_omicron_zones(BlueprintZoneFilter::All).count(), collection.all_omicron_zones().count() ); // All zones should be in service. @@ -1419,8 +1419,9 @@ mod tests { blueprint2.blueprint_zones.len() ); assert_eq!( - blueprint1.all_omicron_zones().count() + num_new_sled_zones, - blueprint2.all_omicron_zones().count() + blueprint1.all_omicron_zones(BlueprintZoneFilter::All).count() + + num_new_sled_zones, + blueprint2.all_omicron_zones(BlueprintZoneFilter::All).count() ); // All zones should be in service. diff --git a/nexus/db-queries/src/db/datastore/vpc.rs b/nexus/db-queries/src/db/datastore/vpc.rs index 1651719f7e..c290439d76 100644 --- a/nexus/db-queries/src/db/datastore/vpc.rs +++ b/nexus/db-queries/src/db/datastore/vpc.rs @@ -722,7 +722,9 @@ impl DataStore { ) // Filter out services that are expunged and shouldn't be resolved // here. - .blueprint_zone_filter(BlueprintZoneFilter::VpcFirewall) + .blueprint_zone_filter( + BlueprintZoneFilter::ShouldDeployVpcFirewallRules, + ) .filter(service_network_interface::vpc_id.eq(vpc_id)) .filter(service_network_interface::time_deleted.is_null()) .select(Sled::as_select()); diff --git a/nexus/reconfigurator/execution/src/dns.rs b/nexus/reconfigurator/execution/src/dns.rs index 13b21f9961..abc716be75 100644 --- a/nexus/reconfigurator/execution/src/dns.rs +++ b/nexus/reconfigurator/execution/src/dns.rs @@ -275,8 +275,8 @@ pub fn blueprint_internal_dns_config( .map(|addr| addr.port()) } - for (_, zone) in - blueprint.all_blueprint_zones(BlueprintZoneFilter::InternalDns) + for (_, zone) in blueprint + .all_blueprint_zones(BlueprintZoneFilter::ShouldBeInInternalDns) { let context = || { format!( @@ -474,7 +474,7 @@ pub fn silo_dns_name(name: &omicron_common::api::external::Name) -> String { /// Return the Nexus external addresses according to the given blueprint pub fn blueprint_nexus_external_ips(blueprint: &Blueprint) -> Vec { blueprint - .all_omicron_zones() + .all_omicron_zones(BlueprintZoneFilter::ShouldBeExternallyReachable) .filter_map(|(_, z)| match z.zone_type { OmicronZoneType::Nexus { external_ip, .. } => Some(external_ip), _ => None, @@ -706,12 +706,16 @@ mod test { // To start, we need a mapping from underlay IP to the corresponding // Omicron zone. let mut omicron_zones_by_ip: BTreeMap<_, _> = blueprint - .all_omicron_zones() - .filter(|(_, zone)| zone.id != out_of_service_id) + .all_omicron_zones(BlueprintZoneFilter::ShouldBeInInternalDns) .map(|(_, zone)| (zone.underlay_address, zone.id)) .collect(); println!("omicron zones by IP: {:#?}", omicron_zones_by_ip); + // Check to see that the quiesced zone was actually excluded + assert!(omicron_zones_by_ip + .values() + .all(|id| *id != out_of_service_id)); + // We also want a mapping from underlay IP to the corresponding switch // zone. In this case, the value is the Scrimlet's sled id. let mut switch_sleds_by_ip: BTreeMap<_, _> = sleds_by_id @@ -856,7 +860,7 @@ mod test { let logctx = test_setup_log(TEST_NAME); let (collection, input) = example(&logctx.log, TEST_NAME, 5); let initial_external_dns_generation = Generation::new(); - let blueprint = BlueprintBuilder::build_initial_from_collection( + let mut blueprint = BlueprintBuilder::build_initial_from_collection( &collection, Generation::new(), initial_external_dns_generation, @@ -902,18 +906,24 @@ mod test { .get(&silo_dns_name(my_silo.name())) .expect("missing silo DNS records"); + // Helper for converting dns records for a given silo to IpAddrs + let records_to_ips = |silo_records: &Vec<_>| { + let mut ips: Vec<_> = silo_records + .into_iter() + .map(|record| match record { + DnsRecord::A(v) => IpAddr::V4(*v), + DnsRecord::Aaaa(v) => IpAddr::V6(*v), + DnsRecord::Srv(_) => panic!("unexpected SRV record"), + }) + .collect(); + ips.sort(); + ips + }; + // Here we're hardcoding the contents of the example blueprint. It // currently puts one Nexus zone on each sled. If we change the example // blueprint, change the expected set of IPs here. - let mut silo_record_ips: Vec<_> = silo_records - .into_iter() - .map(|record| match record { - DnsRecord::A(v) => IpAddr::V4(*v), - DnsRecord::Aaaa(v) => IpAddr::V6(*v), - DnsRecord::Srv(_) => panic!("unexpected SRV record"), - }) - .collect(); - silo_record_ips.sort(); + let silo_record_ips: Vec<_> = records_to_ips(silo_records); assert_eq!( silo_record_ips, &[ @@ -924,6 +934,42 @@ mod test { "192.0.2.6".parse::().unwrap(), ] ); + + // Change the zone disposition to quiesced for the nexus zone on the + // first sled. This should ensure we don't get an external DNS record + // back for that sled. + let (_, bp_zones_config) = + blueprint.blueprint_zones.iter_mut().next().unwrap(); + let nexus_zone = bp_zones_config + .zones + .iter_mut() + .find(|z| z.config.zone_type.is_nexus()) + .unwrap(); + nexus_zone.disposition = BlueprintZoneDisposition::Quiesced; + + // Retrieve the DNS config based on the modified blueprint + let external_dns_zone = blueprint_external_dns_config( + &blueprint, + std::slice::from_ref(my_silo.name()), + String::from("oxide.test"), + ); + let silo_records = &external_dns_zone + .records + .get(&silo_dns_name(my_silo.name())) + .expect("missing silo DNS records"); + let silo_record_ips: Vec<_> = records_to_ips(silo_records); + + // We shouldn't see the excluded Nexus address + assert_eq!( + silo_record_ips, + &[ + "192.0.2.3".parse::().unwrap(), + "192.0.2.4".parse::().unwrap(), + "192.0.2.5".parse::().unwrap(), + "192.0.2.6".parse::().unwrap(), + ] + ); + logctx.cleanup_successful(); } @@ -1242,11 +1288,11 @@ mod test { eprintln!("blueprint2: {}", blueprint2.display()); // Figure out the id of the new zone. let zones_before = blueprint - .all_omicron_zones() + .all_omicron_zones(BlueprintZoneFilter::All) .filter_map(|(_, z)| z.zone_type.is_nexus().then_some(z.id)) .collect::>(); let zones_after = blueprint2 - .all_omicron_zones() + .all_omicron_zones(BlueprintZoneFilter::All) .filter_map(|(_, z)| z.zone_type.is_nexus().then_some(z.id)) .collect::>(); let new_zones: Vec<_> = zones_after.difference(&zones_before).collect(); diff --git a/nexus/reconfigurator/execution/src/lib.rs b/nexus/reconfigurator/execution/src/lib.rs index 30b1ab0ce6..f08de22c22 100644 --- a/nexus/reconfigurator/execution/src/lib.rs +++ b/nexus/reconfigurator/execution/src/lib.rs @@ -10,6 +10,7 @@ use anyhow::{anyhow, Context}; use nexus_db_queries::context::OpContext; use nexus_db_queries::db::DataStore; use nexus_types::deployment::Blueprint; +use nexus_types::deployment::BlueprintZoneFilter; use nexus_types::identity::Asset; use omicron_common::address::Ipv6Subnet; use omicron_common::address::SLED_PREFIX; @@ -109,7 +110,9 @@ where resource_allocation::ensure_zone_resources_allocated( &opctx, datastore, - blueprint.all_omicron_zones().map(|(_sled_id, zone)| zone), + blueprint + .all_omicron_zones(BlueprintZoneFilter::ShouldBeExternallyReachable) + .map(|(_sled_id, zone)| zone), ) .await .map_err(|err| vec![err])?; @@ -151,7 +154,9 @@ where datasets::ensure_crucible_dataset_records_exist( &opctx, datastore, - blueprint.all_omicron_zones().map(|(_sled_id, zone)| zone), + blueprint + .all_omicron_zones(BlueprintZoneFilter::ShouldBeRunning) + .map(|(_sled_id, zone)| zone), ) .await .map_err(|err| vec![err])?; diff --git a/nexus/reconfigurator/execution/src/omicron_zones.rs b/nexus/reconfigurator/execution/src/omicron_zones.rs index 0150c40e9e..a93c9391ca 100644 --- a/nexus/reconfigurator/execution/src/omicron_zones.rs +++ b/nexus/reconfigurator/execution/src/omicron_zones.rs @@ -2,7 +2,7 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -//! Manges deployment of Omicron zones to Sled Agents +//! Manages deployment of Omicron zones to Sled Agents use crate::Sled; use anyhow::anyhow; @@ -41,7 +41,7 @@ pub(crate) async fn deploy_zones( &opctx.log, ); let omicron_zones = config - .to_omicron_zones_config(BlueprintZoneFilter::SledAgentPut); + .to_omicron_zones_config(BlueprintZoneFilter::ShouldBeRunning); let result = client .omicron_zones_put(&omicron_zones) .await @@ -274,7 +274,6 @@ mod test { zones: &mut BlueprintZonesConfig, disposition: BlueprintZoneDisposition, ) { - zones.generation = zones.generation.next(); zones.zones.push(BlueprintZoneConfig { config: OmicronZoneConfig { id: Uuid::new_v4(), @@ -292,9 +291,14 @@ mod test { // Both in-service and quiesced zones should be deployed. // - // TODO: add expunged zones to the test (should not be deployed). + // The expunged zone should not be deployed. append_zone(&mut zones1, BlueprintZoneDisposition::InService); + append_zone(&mut zones1, BlueprintZoneDisposition::Expunged); append_zone(&mut zones2, BlueprintZoneDisposition::Quiesced); + // Bump the generation for each config + zones1.generation = zones1.generation.next(); + zones2.generation = zones2.generation.next(); + let (_, blueprint) = create_blueprint(BTreeMap::from([ (sled_id1, zones1), (sled_id2, zones2), diff --git a/nexus/reconfigurator/planning/src/blueprint_builder.rs b/nexus/reconfigurator/planning/src/blueprint_builder.rs index 677df2201f..b1d1c09ef1 100644 --- a/nexus/reconfigurator/planning/src/blueprint_builder.rs +++ b/nexus/reconfigurator/planning/src/blueprint_builder.rs @@ -15,6 +15,7 @@ use nexus_inventory::now_db_precision; use nexus_types::deployment::Blueprint; use nexus_types::deployment::BlueprintZoneConfig; use nexus_types::deployment::BlueprintZoneDisposition; +use nexus_types::deployment::BlueprintZoneFilter; use nexus_types::deployment::BlueprintZonesConfig; use nexus_types::deployment::OmicronZoneConfig; use nexus_types::deployment::OmicronZoneDataset; @@ -286,7 +287,9 @@ impl<'a> BlueprintBuilder<'a> { let mut used_external_ips: HashSet = HashSet::new(); let mut used_macs: HashSet = HashSet::new(); - for (_, z) in parent_blueprint.all_omicron_zones() { + for (_, z) in + parent_blueprint.all_omicron_zones(BlueprintZoneFilter::All) + { let zone_type = &z.zone_type; if let OmicronZoneType::Nexus { nic, .. } = zone_type { match nic.ip { @@ -440,7 +443,7 @@ impl<'a> BlueprintBuilder<'a> { // currently exist. let ntp_servers = self .parent_blueprint - .all_omicron_zones() + .all_omicron_zones(BlueprintZoneFilter::All) .filter_map(|(_, z)| { if matches!(z.zone_type, OmicronZoneType::BoundaryNtp { .. }) { Some(Host::for_zone(z.id, ZoneVariant::Other).fqdn()) @@ -547,7 +550,7 @@ impl<'a> BlueprintBuilder<'a> { // settings should be part of `Policy` instead? let (external_tls, external_dns_servers) = self .parent_blueprint - .all_omicron_zones() + .all_omicron_zones(BlueprintZoneFilter::All) .find_map(|(_, z)| match &z.zone_type { OmicronZoneType::Nexus { external_tls, @@ -870,6 +873,7 @@ pub mod test { use crate::example::ExampleSystem; use crate::system::SledBuilder; use expectorate::assert_contents; + use nexus_types::deployment::BlueprintZoneFilter; use omicron_common::address::IpRange; use omicron_test_utils::dev::test_setup_log; use sled_agent_client::types::{OmicronZoneConfig, OmicronZoneType}; @@ -881,7 +885,7 @@ pub mod test { pub fn verify_blueprint(blueprint: &Blueprint) { let mut underlay_ips: BTreeMap = BTreeMap::new(); - for (_, zone) in blueprint.all_omicron_zones() { + for (_, zone) in blueprint.all_omicron_zones(BlueprintZoneFilter::All) { if let Some(previous) = underlay_ips.insert(zone.underlay_address, zone) { @@ -1221,7 +1225,7 @@ pub mod test { // that are already in use by existing zones. Attempting to add a // Nexus with no remaining external IPs should fail. let mut used_ip_ranges = Vec::new(); - for (_, z) in parent.all_omicron_zones() { + for (_, z) in parent.all_omicron_zones(BlueprintZoneFilter::All) { if let Some(ip) = z .zone_type .external_ip() diff --git a/nexus/reconfigurator/planning/src/example.rs b/nexus/reconfigurator/planning/src/example.rs index dd47f0f17b..d51d144213 100644 --- a/nexus/reconfigurator/planning/src/example.rs +++ b/nexus/reconfigurator/planning/src/example.rs @@ -159,7 +159,7 @@ impl ExampleSystem { // TODO-cleanup use `TypedUuid` everywhere sled_id.into_untyped_uuid(), zones.to_omicron_zones_config( - BlueprintZoneFilter::SledAgentPut, + BlueprintZoneFilter::ShouldBeRunning, ), ) .unwrap(); diff --git a/nexus/reconfigurator/planning/src/planner.rs b/nexus/reconfigurator/planning/src/planner.rs index 7a7fb941b1..e5cee423f0 100644 --- a/nexus/reconfigurator/planning/src/planner.rs +++ b/nexus/reconfigurator/planning/src/planner.rs @@ -474,7 +474,7 @@ mod test { .get(new_sled_id.as_untyped_uuid()) .expect("blueprint should contain zones for new sled") .to_omicron_zones_config( - BlueprintZoneFilter::SledAgentPut + BlueprintZoneFilter::ShouldBeRunning ) } ) diff --git a/nexus/types/src/deployment.rs b/nexus/types/src/deployment.rs index ecd9d3aa49..c06dbd310d 100644 --- a/nexus/types/src/deployment.rs +++ b/nexus/types/src/deployment.rs @@ -150,10 +150,10 @@ impl Blueprint { /// along with the associated sled id. pub fn all_omicron_zones( &self, + filter: BlueprintZoneFilter, ) -> impl Iterator { - self.blueprint_zones.iter().flat_map(|(sled_id, z)| { - z.zones.iter().map(|z| (*sled_id, &z.config)) - }) + self.all_blueprint_zones(filter) + .map(|(sled_id, z)| (sled_id, &z.config)) } // Temporary method that provides the list of Omicron zones using @@ -433,27 +433,33 @@ impl BlueprintZoneDisposition { match self { Self::InService => match filter { BlueprintZoneFilter::All => true, - BlueprintZoneFilter::SledAgentPut => true, - BlueprintZoneFilter::InternalDns => true, - BlueprintZoneFilter::VpcFirewall => true, + BlueprintZoneFilter::ShouldBeRunning => true, + BlueprintZoneFilter::ShouldBeExternallyReachable => true, + BlueprintZoneFilter::ShouldBeInInternalDns => true, + BlueprintZoneFilter::ShouldDeployVpcFirewallRules => true, }, Self::Quiesced => match filter { BlueprintZoneFilter::All => true, - // Quiesced zones should not be exposed in DNS. - BlueprintZoneFilter::InternalDns => false, + // Quiesced zones are still running. + BlueprintZoneFilter::ShouldBeRunning => true, - // Quiesced zones are expected to be deployed by sled-agent. - BlueprintZoneFilter::SledAgentPut => true, + // Quiesced zones should not have external resources -- we do + // not want traffic to be directed to them. + BlueprintZoneFilter::ShouldBeExternallyReachable => false, + + // Quiesced zones should not be exposed in DNS. + BlueprintZoneFilter::ShouldBeInInternalDns => false, // Quiesced zones should get firewall rules. - BlueprintZoneFilter::VpcFirewall => true, + BlueprintZoneFilter::ShouldDeployVpcFirewallRules => true, }, Self::Expunged => match filter { BlueprintZoneFilter::All => true, - BlueprintZoneFilter::InternalDns => false, - BlueprintZoneFilter::SledAgentPut => false, - BlueprintZoneFilter::VpcFirewall => false, + BlueprintZoneFilter::ShouldBeRunning => false, + BlueprintZoneFilter::ShouldBeExternallyReachable => false, + BlueprintZoneFilter::ShouldBeInInternalDns => false, + BlueprintZoneFilter::ShouldDeployVpcFirewallRules => false, }, } } @@ -494,14 +500,17 @@ pub enum BlueprintZoneFilter { /// All zones. All, - /// Filter by zones that should be in internal DNS. - InternalDns, + /// Zones that are desired to be in the RUNNING state + ShouldBeRunning, - /// Filter by zones that we should tell sled-agent to deploy. - SledAgentPut, + /// Filter by zones that should have external IP and DNS resources. + ShouldBeExternallyReachable, + + /// Filter by zones that should be in internal DNS. + ShouldBeInInternalDns, /// Filter by zones that should be sent VPC firewall rules. - VpcFirewall, + ShouldDeployVpcFirewallRules, } /// Describe high-level metadata about a blueprint From 8216ca81ea0fc11675c39c40771c73f9779a3992 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Thu, 11 Apr 2024 13:35:09 -0700 Subject: [PATCH 122/334] Use TypedUuid for Zpools in Sled Agent (#5501) Starts the conversion of using "typed UUIDs" within Sled Agent. This is a thin vertical slice, but if we're happy with how this looks, I can keep propagating types to other UUIDs in this area. - I tried to convert effectively all usage within the sled agent and family of crates - I intentionally did not apply any conversions to the DB schema within Nexus -- so the changes propagating up there are incidental to using Sled Agent APIs --------- Co-authored-by: Rain --- Cargo.lock | 7 +++ clients/sled-agent-client/Cargo.toml | 1 + clients/sled-agent-client/src/lib.rs | 1 + illumos-utils/Cargo.toml | 1 + illumos-utils/src/zpool.rs | 18 ++++---- nexus/db-model/src/inventory.rs | 6 ++- .../db-queries/src/db/datastore/deployment.rs | 4 +- .../reconfigurator/execution/src/datasets.rs | 15 +++++-- nexus/reconfigurator/preparation/src/lib.rs | 4 +- nexus/test-utils/Cargo.toml | 1 + nexus/test-utils/src/lib.rs | 14 +++--- nexus/test-utils/src/resource_helpers.rs | 12 +++--- nexus/tests/integration_tests/unauthorized.rs | 3 +- nexus/types/src/inventory.rs | 3 +- openapi/sled-agent.json | 19 ++++---- schema/omicron-physical-disks.json | 7 ++- schema/rss-service-plan-v3.json | 7 ++- sled-agent/Cargo.toml | 1 + sled-agent/src/params.rs | 5 ++- sled-agent/src/rack_setup/plan/service.rs | 3 +- sled-agent/src/rack_setup/service.rs | 5 ++- sled-agent/src/sim/server.rs | 13 +++--- sled-agent/src/sim/sled_agent.rs | 9 ++-- sled-agent/src/sim/storage.rs | 23 ++++++---- sled-hardware/Cargo.toml | 1 + sled-hardware/src/disk.rs | 10 ++--- sled-hardware/src/illumos/partitions.rs | 8 ++-- sled-hardware/src/non_illumos/mod.rs | 3 +- sled-storage/Cargo.toml | 1 + sled-storage/src/dataset.rs | 4 +- sled-storage/src/disk.rs | 7 +-- sled-storage/src/manager.rs | 5 ++- sled-storage/src/manager_test_harness.rs | 3 +- sled-storage/src/resources.rs | 4 +- uuid-kinds/Cargo.toml | 1 + uuid-kinds/README.adoc | 30 +++++++++++++ uuid-kinds/src/lib.rs | 43 +++++++++++-------- 37 files changed, 196 insertions(+), 106 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index dfb03b36f4..1e92f4b283 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3511,6 +3511,7 @@ dependencies = [ "macaddr", "mockall", "omicron-common", + "omicron-uuid-kinds", "omicron-workspace-hack", "opte-ioctl", "oxide-vpc", @@ -4857,6 +4858,7 @@ dependencies = [ "omicron-passwords", "omicron-sled-agent", "omicron-test-utils", + "omicron-uuid-kinds", "omicron-workspace-hack", "oximeter", "oximeter-collector", @@ -5630,6 +5632,7 @@ dependencies = [ "omicron-common", "omicron-ddm-admin-client", "omicron-test-utils", + "omicron-uuid-kinds", "omicron-workspace-hack", "once_cell", "openapi-lint", @@ -5720,6 +5723,7 @@ name = "omicron-uuid-kinds" version = "0.1.0" dependencies = [ "newtype-uuid", + "paste", "schemars", ] @@ -8642,6 +8646,7 @@ dependencies = [ "chrono", "ipnetwork", "omicron-common", + "omicron-uuid-kinds", "omicron-workspace-hack", "progenitor", "regress", @@ -8669,6 +8674,7 @@ dependencies = [ "macaddr", "omicron-common", "omicron-test-utils", + "omicron-uuid-kinds", "omicron-workspace-hack", "rand 0.8.5", "schemars", @@ -8712,6 +8718,7 @@ dependencies = [ "key-manager", "omicron-common", "omicron-test-utils", + "omicron-uuid-kinds", "omicron-workspace-hack", "rand 0.8.5", "schemars", diff --git a/clients/sled-agent-client/Cargo.toml b/clients/sled-agent-client/Cargo.toml index 71b94441ed..c418caa33b 100644 --- a/clients/sled-agent-client/Cargo.toml +++ b/clients/sled-agent-client/Cargo.toml @@ -18,3 +18,4 @@ serde.workspace = true slog.workspace = true uuid.workspace = true omicron-workspace-hack.workspace = true +omicron-uuid-kinds.workspace = true diff --git a/clients/sled-agent-client/src/lib.rs b/clients/sled-agent-client/src/lib.rs index d500bdca3a..ccb669af4c 100644 --- a/clients/sled-agent-client/src/lib.rs +++ b/clients/sled-agent-client/src/lib.rs @@ -47,6 +47,7 @@ progenitor::generate_api!( SourceNatConfig = omicron_common::api::internal::shared::SourceNatConfig, Vni = omicron_common::api::external::Vni, NetworkInterface = omicron_common::api::internal::shared::NetworkInterface, + TypedUuidForZpoolKind = omicron_uuid_kinds::ZpoolUuid, } ); diff --git a/illumos-utils/Cargo.toml b/illumos-utils/Cargo.toml index 39b24f7ccd..cd13e4a8a6 100644 --- a/illumos-utils/Cargo.toml +++ b/illumos-utils/Cargo.toml @@ -19,6 +19,7 @@ ipnetwork.workspace = true libc.workspace = true macaddr.workspace = true omicron-common.workspace = true +omicron-uuid-kinds.workspace = true oxide-vpc.workspace = true oxlog.workspace = true schemars.workspace = true diff --git a/illumos-utils/src/zpool.rs b/illumos-utils/src/zpool.rs index 27d7e0d700..f2b4df6996 100644 --- a/illumos-utils/src/zpool.rs +++ b/illumos-utils/src/zpool.rs @@ -6,11 +6,11 @@ use crate::{execute, ExecutionError, PFEXEC}; use camino::{Utf8Path, Utf8PathBuf}; +use omicron_uuid_kinds::ZpoolUuid; use schemars::JsonSchema; use serde::{Deserialize, Deserializer, Serialize, Serializer}; use std::fmt; use std::str::FromStr; -use uuid::Uuid; pub const ZPOOL_EXTERNAL_PREFIX: &str = "oxp_"; pub const ZPOOL_INTERNAL_PREFIX: &str = "oxi_"; @@ -319,7 +319,7 @@ pub enum ZpoolKind { /// when reading the structure, and validate that the UUID can be utilized. #[derive(Clone, Debug, Hash, PartialEq, Eq)] pub struct ZpoolName { - id: Uuid, + id: ZpoolUuid, kind: ZpoolKind, } @@ -357,15 +357,15 @@ impl JsonSchema for ZpoolName { } impl ZpoolName { - pub fn new_internal(id: Uuid) -> Self { + pub fn new_internal(id: ZpoolUuid) -> Self { Self { id, kind: ZpoolKind::Internal } } - pub fn new_external(id: Uuid) -> Self { + pub fn new_external(id: ZpoolUuid) -> Self { Self { id, kind: ZpoolKind::External } } - pub fn id(&self) -> Uuid { + pub fn id(&self) -> ZpoolUuid { self.id } @@ -418,10 +418,10 @@ impl FromStr for ZpoolName { fn from_str(s: &str) -> Result { if let Some(s) = s.strip_prefix(ZPOOL_EXTERNAL_PREFIX) { - let id = Uuid::from_str(s).map_err(|e| e.to_string())?; + let id = ZpoolUuid::from_str(s).map_err(|e| e.to_string())?; Ok(ZpoolName::new_external(id)) } else if let Some(s) = s.strip_prefix(ZPOOL_INTERNAL_PREFIX) { - let id = Uuid::from_str(s).map_err(|e| e.to_string())?; + let id = ZpoolUuid::from_str(s).map_err(|e| e.to_string())?; Ok(ZpoolName::new_internal(id)) } else { Err(format!( @@ -525,7 +525,7 @@ mod test { #[test] fn test_parse_external_zpool_name() { - let uuid: Uuid = + let uuid: ZpoolUuid = "d462a7f7-b628-40fe-80ff-4e4189e2d62b".parse().unwrap(); let good_name = format!("{}{}", ZPOOL_EXTERNAL_PREFIX, uuid); @@ -536,7 +536,7 @@ mod test { #[test] fn test_parse_internal_zpool_name() { - let uuid: Uuid = + let uuid: ZpoolUuid = "d462a7f7-b628-40fe-80ff-4e4189e2d62b".parse().unwrap(); let good_name = format!("{}{}", ZPOOL_INTERNAL_PREFIX, uuid); diff --git a/nexus/db-model/src/inventory.rs b/nexus/db-model/src/inventory.rs index cde067f3e8..94306f0f97 100644 --- a/nexus/db-model/src/inventory.rs +++ b/nexus/db-model/src/inventory.rs @@ -31,6 +31,8 @@ use nexus_types::inventory::{ BaseboardId, Caboose, Collection, PowerState, RotPage, RotSlot, }; use omicron_common::api::internal::shared::NetworkInterface; +use omicron_uuid_kinds::GenericUuid; +use omicron_uuid_kinds::ZpoolUuid; use uuid::Uuid; // See [`nexus_types::inventory::PowerState`]. @@ -720,7 +722,7 @@ impl InvZpool { Self { inv_collection_id, time_collected: zpool.time_collected, - id: zpool.id, + id: zpool.id.into_untyped_uuid(), sled_id, total_size: zpool.total_size.into(), } @@ -731,7 +733,7 @@ impl From for nexus_types::inventory::Zpool { fn from(pool: InvZpool) -> Self { Self { time_collected: pool.time_collected, - id: pool.id, + id: ZpoolUuid::from_untyped_uuid(pool.id), total_size: *pool.total_size, } } diff --git a/nexus/db-queries/src/db/datastore/deployment.rs b/nexus/db-queries/src/db/datastore/deployment.rs index 19c1b3eda1..aaf9ba0de0 100644 --- a/nexus/db-queries/src/db/datastore/deployment.rs +++ b/nexus/db-queries/src/db/datastore/deployment.rs @@ -1093,6 +1093,7 @@ mod tests { use omicron_test_utils::dev; use omicron_uuid_kinds::GenericUuid; use omicron_uuid_kinds::TypedUuid; + use omicron_uuid_kinds::ZpoolUuid; use pretty_assertions::assert_eq; use rand::thread_rng; use rand::Rng; @@ -1144,7 +1145,8 @@ mod tests { use illumos_utils::zpool::ZpoolName; let zpools = (0..4) .map(|_| { - let name = ZpoolName::new_external(Uuid::new_v4()).to_string(); + let name = + ZpoolName::new_external(ZpoolUuid::new_v4()).to_string(); name.parse().unwrap() }) .collect(); diff --git a/nexus/reconfigurator/execution/src/datasets.rs b/nexus/reconfigurator/execution/src/datasets.rs index 361e23b7e6..d83ebfc4d6 100644 --- a/nexus/reconfigurator/execution/src/datasets.rs +++ b/nexus/reconfigurator/execution/src/datasets.rs @@ -13,6 +13,7 @@ use nexus_db_queries::db::DataStore; use nexus_types::deployment::OmicronZoneConfig; use nexus_types::deployment::OmicronZoneType; use nexus_types::identity::Asset; +use omicron_uuid_kinds::GenericUuid; use slog::info; use slog::warn; use slog_error_chain::InlineErrorChain; @@ -90,7 +91,12 @@ pub(crate) async fn ensure_crucible_dataset_records_exist( }; let pool_id = zpool_name.id(); - let dataset = Dataset::new(id, pool_id, addr, DatasetKind::Crucible); + let dataset = Dataset::new( + id, + pool_id.into_untyped_uuid(), + addr, + DatasetKind::Crucible, + ); let maybe_inserted = datastore .dataset_insert_if_not_exists(dataset) .await @@ -144,6 +150,7 @@ mod tests { use nexus_db_model::SledUpdate; use nexus_db_model::Zpool; use nexus_test_utils_macros::nexus_test; + use omicron_uuid_kinds::ZpoolUuid; use sled_agent_client::types::OmicronZoneDataset; use uuid::Uuid; @@ -197,7 +204,7 @@ mod tests { let zpool_name: ZpoolName = dataset.pool_name.parse().expect("invalid zpool name"); let zpool = Zpool::new( - zpool_name.id(), + zpool_name.id().into_untyped_uuid(), sled_id, Uuid::new_v4(), // physical_disk_id ); @@ -263,10 +270,10 @@ mod tests { // Create another zpool on one of the sleds, so we can add a new // crucible zone that uses it. - let new_zpool_id = Uuid::new_v4(); + let new_zpool_id = ZpoolUuid::new_v4(); for &sled_id in collection.omicron_zones.keys().take(1) { let zpool = Zpool::new( - new_zpool_id, + new_zpool_id.into_untyped_uuid(), sled_id, Uuid::new_v4(), // physical_disk_id ); diff --git a/nexus/reconfigurator/preparation/src/lib.rs b/nexus/reconfigurator/preparation/src/lib.rs index d7aeec51d8..e01d0cb7a9 100644 --- a/nexus/reconfigurator/preparation/src/lib.rs +++ b/nexus/reconfigurator/preparation/src/lib.rs @@ -34,6 +34,7 @@ use omicron_common::api::external::Error; use omicron_common::api::external::LookupType; use omicron_uuid_kinds::GenericUuid; use omicron_uuid_kinds::TypedUuid; +use omicron_uuid_kinds::ZpoolUuid; use slog::error; use slog::Logger; use std::collections::BTreeMap; @@ -77,8 +78,9 @@ impl PlanningInputFromDb<'_> { // It's unfortunate that Nexus knows how Sled Agent // constructs zpool names, but there's not currently an // alternative. + let id = ZpoolUuid::from_untyped_uuid(z.id()); let zpool_name_generated = - illumos_utils::zpool::ZpoolName::new_external(z.id()) + illumos_utils::zpool::ZpoolName::new_external(id) .to_string(); let zpool_name = ZpoolName::from_str(&zpool_name_generated) .map_err(|e| { diff --git a/nexus/test-utils/Cargo.toml b/nexus/test-utils/Cargo.toml index 861527108b..bd066cc4df 100644 --- a/nexus/test-utils/Cargo.toml +++ b/nexus/test-utils/Cargo.toml @@ -30,6 +30,7 @@ omicron-common.workspace = true omicron-passwords.workspace = true omicron-sled-agent.workspace = true omicron-test-utils.workspace = true +omicron-uuid-kinds.workspace = true oximeter.workspace = true oximeter-collector.workspace = true oximeter-producer.workspace = true diff --git a/nexus/test-utils/src/lib.rs b/nexus/test-utils/src/lib.rs index c1acdc1848..a033fb10c7 100644 --- a/nexus/test-utils/src/lib.rs +++ b/nexus/test-utils/src/lib.rs @@ -55,6 +55,8 @@ use omicron_common::api::internal::shared::NetworkInterfaceKind; use omicron_common::api::internal::shared::SwitchLocation; use omicron_sled_agent::sim; use omicron_test_utils::dev; +use omicron_uuid_kinds::GenericUuid; +use omicron_uuid_kinds::ZpoolUuid; use oximeter_collector::Oximeter; use oximeter_producer::LogConfig; use oximeter_producer::Server as ProducerServer; @@ -246,14 +248,14 @@ impl RackInitRequestBuilder { // - The internal DNS configuration for this service fn add_dataset( &mut self, - zpool_id: Uuid, + zpool_id: ZpoolUuid, dataset_id: Uuid, address: SocketAddrV6, kind: DatasetKind, service_name: internal_dns::ServiceName, ) { self.datasets.push(DatasetCreateRequest { - zpool_id, + zpool_id: zpool_id.into_untyped_uuid(), dataset_id, request: DatasetPutRequest { address, kind }, }); @@ -418,7 +420,7 @@ impl<'a, N: NexusServer> ControlPlaneTestContextBuilder<'a, N> { .parse::() .expect("Failed to parse port"); - let zpool_id = Uuid::new_v4(); + let zpool_id = ZpoolUuid::new_v4(); let dataset_id = Uuid::new_v4(); eprintln!("DB address: {}", address); self.rack_init_builder.add_dataset( @@ -455,7 +457,7 @@ impl<'a, N: NexusServer> ControlPlaneTestContextBuilder<'a, N> { .unwrap(); let port = clickhouse.port(); - let zpool_id = Uuid::new_v4(); + let zpool_id = ZpoolUuid::new_v4(); let dataset_id = Uuid::new_v4(); let address = SocketAddrV6::new(Ipv6Addr::LOCALHOST, port, 0, 0); self.rack_init_builder.add_dataset( @@ -1041,7 +1043,7 @@ impl<'a, N: NexusServer> ControlPlaneTestContextBuilder<'a, N> { sled_id, ); - let zpool_id = Uuid::new_v4(); + let zpool_id = ZpoolUuid::new_v4(); let pool_name = illumos_utils::zpool::ZpoolName::new_external(zpool_id) .to_string() .parse() @@ -1088,7 +1090,7 @@ impl<'a, N: NexusServer> ControlPlaneTestContextBuilder<'a, N> { sled_id, ); - let zpool_id = Uuid::new_v4(); + let zpool_id = ZpoolUuid::new_v4(); let pool_name = illumos_utils::zpool::ZpoolName::new_external(zpool_id) .to_string() .parse() diff --git a/nexus/test-utils/src/resource_helpers.rs b/nexus/test-utils/src/resource_helpers.rs index 942ca63f58..2aef32d37c 100644 --- a/nexus/test-utils/src/resource_helpers.rs +++ b/nexus/test-utils/src/resource_helpers.rs @@ -40,6 +40,8 @@ use omicron_common::disk::DiskIdentity; use omicron_sled_agent::sim::SledAgent; use omicron_test_utils::dev::poll::wait_for_condition; use omicron_test_utils::dev::poll::CondCheckError; +use omicron_uuid_kinds::GenericUuid; +use omicron_uuid_kinds::ZpoolUuid; use slog::debug; use std::net::IpAddr; use std::sync::Arc; @@ -696,7 +698,7 @@ pub struct TestDataset { } pub struct TestZpool { - pub id: Uuid, + pub id: ZpoolUuid, pub size: ByteCount, pub datasets: Vec, } @@ -741,7 +743,7 @@ impl DiskTest { self.add_zpool_with_dataset_ext( cptestctx, Uuid::new_v4(), - Uuid::new_v4(), + ZpoolUuid::new_v4(), Uuid::new_v4(), Self::DEFAULT_ZPOOL_SIZE_GIB, ) @@ -752,7 +754,7 @@ impl DiskTest { &mut self, cptestctx: &ControlPlaneTestContext, physical_disk_id: Uuid, - zpool_id: Uuid, + zpool_id: ZpoolUuid, dataset_id: Uuid, gibibytes: u32, ) { @@ -783,7 +785,7 @@ impl DiskTest { let zpool_request = nexus_types::internal_api::params::ZpoolPutRequest { - id: zpool.id, + id: zpool.id.into_untyped_uuid(), physical_disk_id, sled_id: self.sled_agent.id, }; @@ -865,7 +867,7 @@ impl DiskTest { .flat_map(|sled_agent| { sled_agent.zpools.iter().map(|z| z.id) }) - .collect::>(); + .collect::>(); if all_zpools.contains(&zpool.id) { Ok(()) diff --git a/nexus/tests/integration_tests/unauthorized.rs b/nexus/tests/integration_tests/unauthorized.rs index d9f5f38c1f..4f9f75c770 100644 --- a/nexus/tests/integration_tests/unauthorized.rs +++ b/nexus/tests/integration_tests/unauthorized.rs @@ -20,6 +20,7 @@ use nexus_test_utils::http_testing::RequestBuilder; use nexus_test_utils::http_testing::TestResponse; use nexus_test_utils::resource_helpers::DiskTest; use nexus_test_utils_macros::nexus_test; +use omicron_uuid_kinds::ZpoolUuid; use once_cell::sync::Lazy; type ControlPlaneTestContext = @@ -59,7 +60,7 @@ async fn test_unauthorized(cptestctx: &ControlPlaneTestContext) { .add_zpool_with_dataset_ext( cptestctx, nexus_test_utils::PHYSICAL_DISK_UUID.parse().unwrap(), - uuid::Uuid::new_v4(), + ZpoolUuid::new_v4(), uuid::Uuid::new_v4(), DiskTest::DEFAULT_ZPOOL_SIZE_GIB, ) diff --git a/nexus/types/src/inventory.rs b/nexus/types/src/inventory.rs index bf2fd16971..d6b0383375 100644 --- a/nexus/types/src/inventory.rs +++ b/nexus/types/src/inventory.rs @@ -21,6 +21,7 @@ use omicron_common::api::external::ByteCount; pub use omicron_common::api::internal::shared::NetworkInterface; pub use omicron_common::api::internal::shared::NetworkInterfaceKind; pub use omicron_common::api::internal::shared::SourceNatConfig; +use omicron_uuid_kinds::ZpoolUuid; use serde::{Deserialize, Serialize}; use serde_with::serde_as; pub use sled_agent_client::types::OmicronZoneConfig; @@ -371,7 +372,7 @@ impl From for PhysicalDisk { #[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize)] pub struct Zpool { pub time_collected: DateTime, - pub id: Uuid, + pub id: ZpoolUuid, pub total_size: ByteCount, } diff --git a/openapi/sled-agent.json b/openapi/sled-agent.json index 07a42b461f..9fcccaa561 100644 --- a/openapi/sled-agent.json +++ b/openapi/sled-agent.json @@ -3654,12 +3654,10 @@ "type": "object", "properties": { "expected": { - "type": "string", - "format": "uuid" + "$ref": "#/components/schemas/TypedUuidForZpoolKind" }, "observed": { - "type": "string", - "format": "uuid" + "$ref": "#/components/schemas/TypedUuidForZpoolKind" } }, "required": [ @@ -5533,8 +5531,7 @@ "type": "object", "properties": { "id": { - "type": "string", - "format": "uuid" + "$ref": "#/components/schemas/TypedUuidForZpoolKind" }, "total_size": { "$ref": "#/components/schemas/ByteCount" @@ -6003,8 +6000,7 @@ "$ref": "#/components/schemas/DiskIdentity" }, "pool_id": { - "type": "string", - "format": "uuid" + "$ref": "#/components/schemas/TypedUuidForZpoolKind" } }, "required": [ @@ -7055,6 +7051,10 @@ "sync" ] }, + "TypedUuidForZpoolKind": { + "type": "string", + "format": "uuid" + }, "UpdateArtifactId": { "description": "An identifier for a single update artifact.", "type": "object", @@ -7470,8 +7470,7 @@ "$ref": "#/components/schemas/DiskType" }, "id": { - "type": "string", - "format": "uuid" + "$ref": "#/components/schemas/TypedUuidForZpoolKind" } }, "required": [ diff --git a/schema/omicron-physical-disks.json b/schema/omicron-physical-disks.json index efc1b2cdd2..60c32d98ff 100644 --- a/schema/omicron-physical-disks.json +++ b/schema/omicron-physical-disks.json @@ -65,10 +65,13 @@ "$ref": "#/definitions/DiskIdentity" }, "pool_id": { - "type": "string", - "format": "uuid" + "$ref": "#/definitions/TypedUuidForZpoolKind" } } + }, + "TypedUuidForZpoolKind": { + "type": "string", + "format": "uuid" } } } \ No newline at end of file diff --git a/schema/rss-service-plan-v3.json b/schema/rss-service-plan-v3.json index fcc672a93b..0b7a1468ff 100644 --- a/schema/rss-service-plan-v3.json +++ b/schema/rss-service-plan-v3.json @@ -339,8 +339,7 @@ "$ref": "#/definitions/DiskIdentity" }, "pool_id": { - "type": "string", - "format": "uuid" + "$ref": "#/definitions/TypedUuidForZpoolKind" } } }, @@ -832,6 +831,10 @@ } } }, + "TypedUuidForZpoolKind": { + "type": "string", + "format": "uuid" + }, "Vni": { "description": "A Geneve Virtual Network Identifier", "type": "integer", diff --git a/sled-agent/Cargo.toml b/sled-agent/Cargo.toml index 734055b9e5..998d83725a 100644 --- a/sled-agent/Cargo.toml +++ b/sled-agent/Cargo.toml @@ -48,6 +48,7 @@ nexus-config.workspace = true nexus-types.workspace = true omicron-common.workspace = true omicron-ddm-admin-client.workspace = true +omicron-uuid-kinds.workspace = true once_cell.workspace = true oximeter.workspace = true oximeter-instruments.workspace = true diff --git a/sled-agent/src/params.rs b/sled-agent/src/params.rs index 12c2907f49..627fb11aa0 100644 --- a/sled-agent/src/params.rs +++ b/sled-agent/src/params.rs @@ -19,6 +19,7 @@ use omicron_common::api::internal::nexus::{ use omicron_common::api::internal::shared::{ NetworkInterface, SourceNatConfig, }; +use omicron_uuid_kinds::ZpoolUuid; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; pub use sled_hardware::DendriteAsic; @@ -251,7 +252,7 @@ impl From for DiskType { #[derive(Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq)] pub struct Zpool { - pub id: Uuid, + pub id: ZpoolUuid, pub disk_type: DiskType, } @@ -896,7 +897,7 @@ pub struct InventoryDisk { /// Identifies information about zpools managed by the control plane #[derive(Clone, Debug, Deserialize, JsonSchema, Serialize)] pub struct InventoryZpool { - pub id: Uuid, + pub id: ZpoolUuid, pub total_size: ByteCount, } diff --git a/sled-agent/src/rack_setup/plan/service.rs b/sled-agent/src/rack_setup/plan/service.rs index 9e0a2941c5..6bc5083717 100644 --- a/sled-agent/src/rack_setup/plan/service.rs +++ b/sled-agent/src/rack_setup/plan/service.rs @@ -29,6 +29,7 @@ use omicron_common::backoff::{ retry_notify_ext, retry_policy_internal_service_aggressive, BackoffError, }; use omicron_common::ledger::{self, Ledger, Ledgerable}; +use omicron_uuid_kinds::ZpoolUuid; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; use sled_agent_client::{ @@ -353,7 +354,7 @@ impl Plan { .map(|disk| OmicronPhysicalDiskConfig { identity: disk.identity.clone(), id: Uuid::new_v4(), - pool_id: Uuid::new_v4(), + pool_id: ZpoolUuid::new_v4(), }) .collect(); sled_info.request.disks = OmicronPhysicalDisksConfig { diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index 5ff6074249..6d7b8cd7c7 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -103,6 +103,7 @@ use omicron_common::backoff::{ }; use omicron_common::ledger::{self, Ledger, Ledgerable}; use omicron_ddm_admin_client::{Client as DdmAdminClient, DdmError}; +use omicron_uuid_kinds::GenericUuid; use serde::{Deserialize, Serialize}; use sled_agent_client::{ types as SledAgentTypes, Client as SledAgentClient, Error as SledAgentError, @@ -726,7 +727,7 @@ impl ServiceInner { zone.dataset_name_and_address() { datasets.push(NexusTypes::DatasetCreateRequest { - zpool_id: dataset_name.pool().id(), + zpool_id: dataset_name.pool().id().into_untyped_uuid(), dataset_id: zone.id, request: NexusTypes::DatasetPutRequest { address: dataset_address.to_string(), @@ -840,7 +841,7 @@ impl ServiceInner { let sled_id = id_map.get(addr).expect("Missing sled"); config.disks.disks.iter().map(|config| { NexusTypes::ZpoolPutRequest { - id: config.pool_id, + id: config.pool_id.into_untyped_uuid(), physical_disk_id: config.id, sled_id: *sled_id, } diff --git a/sled-agent/src/sim/server.rs b/sled-agent/src/sim/server.rs index 3a0ab2484a..089760740a 100644 --- a/sled-agent/src/sim/server.rs +++ b/sled-agent/src/sim/server.rs @@ -33,6 +33,8 @@ use omicron_common::backoff::{ }; use omicron_common::disk::DiskIdentity; use omicron_common::FileKv; +use omicron_uuid_kinds::GenericUuid; +use omicron_uuid_kinds::ZpoolUuid; use slog::{info, Drain, Logger}; use std::collections::BTreeMap; use std::collections::HashMap; @@ -165,7 +167,7 @@ impl Server { // on the physical rack. for zpool in &config.storage.zpools { let physical_disk_id = Uuid::new_v4(); - let zpool_id = Uuid::new_v4(); + let zpool_id = ZpoolUuid::new_v4(); let vendor = "synthetic-vendor".to_string(); let serial = format!("synthetic-serial-{zpool_id}"); let model = "synthetic-model".to_string(); @@ -188,7 +190,7 @@ impl Server { sled_agent.create_crucible_dataset(zpool_id, dataset_id).await; datasets.push(NexusTypes::DatasetCreateRequest { - zpool_id, + zpool_id: zpool_id.into_untyped_uuid(), dataset_id, request: NexusTypes::DatasetPutRequest { address: address.to_string(), @@ -363,7 +365,7 @@ pub async fn run_standalone_server( underlay_address: *http_bound.ip(), zone_type: OmicronZoneType::InternalDns { dataset: OmicronZoneDataset { - pool_name: ZpoolName::new_external(Uuid::new_v4()), + pool_name: ZpoolName::new_external(ZpoolUuid::new_v4()), }, http_address: http_bound, dns_address: match dns.dns_server.local_address() { @@ -432,7 +434,7 @@ pub async fn run_standalone_server( underlay_address: ip, zone_type: OmicronZoneType::ExternalDns { dataset: OmicronZoneDataset { - pool_name: ZpoolName::new_external(Uuid::new_v4()), + pool_name: ZpoolName::new_external(ZpoolUuid::new_v4()), }, http_address: external_dns_internal_addr, dns_address: SocketAddr::V6(external_dns_internal_addr), @@ -478,8 +480,9 @@ pub async fn run_standalone_server( let physical_disks = server.sled_agent.get_all_physical_disks().await; let zpools = server.sled_agent.get_zpools().await; for zpool in &zpools { + let zpool_id = ZpoolUuid::from_untyped_uuid(zpool.id); for (dataset_id, address) in - server.sled_agent.get_datasets(zpool.id).await + server.sled_agent.get_datasets(zpool_id).await { datasets.push(NexusTypes::DatasetCreateRequest { zpool_id: zpool.id, diff --git a/sled-agent/src/sim/sled_agent.rs b/sled-agent/src/sim/sled_agent.rs index 455c2988d3..900265ad0d 100644 --- a/sled-agent/src/sim/sled_agent.rs +++ b/sled-agent/src/sim/sled_agent.rs @@ -36,6 +36,7 @@ use omicron_common::api::internal::nexus::{ InstanceRuntimeState, VmmRuntimeState, }; use omicron_common::disk::DiskIdentity; +use omicron_uuid_kinds::ZpoolUuid; use propolis_client::{ types::VolumeConstructionRequest, Client as PropolisClient, }; @@ -547,7 +548,7 @@ impl SledAgent { pub async fn get_datasets( &self, - zpool_id: Uuid, + zpool_id: ZpoolUuid, ) -> Vec<(Uuid, SocketAddr)> { self.storage.lock().await.get_all_datasets(zpool_id) } @@ -555,7 +556,7 @@ impl SledAgent { /// Adds a Zpool to the simulated sled agent. pub async fn create_zpool( &self, - id: Uuid, + id: ZpoolUuid, physical_disk_id: Uuid, size: u64, ) { @@ -569,7 +570,7 @@ impl SledAgent { /// Adds a Crucible Dataset within a zpool. pub async fn create_crucible_dataset( &self, - zpool_id: Uuid, + zpool_id: ZpoolUuid, dataset_id: Uuid, ) -> SocketAddr { self.storage.lock().await.insert_dataset(zpool_id, dataset_id).await @@ -578,7 +579,7 @@ impl SledAgent { /// Returns a crucible dataset within a particular zpool. pub async fn get_crucible_dataset( &self, - zpool_id: Uuid, + zpool_id: ZpoolUuid, dataset_id: Uuid, ) -> Arc { self.storage.lock().await.get_dataset(zpool_id, dataset_id).await diff --git a/sled-agent/src/sim/storage.rs b/sled-agent/src/sim/storage.rs index 13c3da4fd0..3c2c4057c1 100644 --- a/sled-agent/src/sim/storage.rs +++ b/sled-agent/src/sim/storage.rs @@ -20,6 +20,8 @@ use dropshot::HandlerTaskMode; use dropshot::HttpError; use futures::lock::Mutex; use omicron_common::disk::DiskIdentity; +use omicron_uuid_kinds::GenericUuid; +use omicron_uuid_kinds::ZpoolUuid; use propolis_client::types::VolumeConstructionRequest; use sled_hardware::DiskVariant; use sled_storage::resources::DiskManagementStatus; @@ -479,14 +481,14 @@ pub(crate) struct PhysicalDisk { } pub(crate) struct Zpool { - id: Uuid, + id: ZpoolUuid, physical_disk_id: Uuid, total_size: u64, datasets: HashMap, } impl Zpool { - fn new(id: Uuid, physical_disk_id: Uuid, total_size: u64) -> Self { + fn new(id: ZpoolUuid, physical_disk_id: Uuid, total_size: u64) -> Self { Zpool { id, physical_disk_id, total_size, datasets: HashMap::new() } } @@ -547,7 +549,7 @@ pub struct Storage { config: Option, physical_disks: HashMap, next_disk_slot: i64, - zpools: HashMap, + zpools: HashMap, crucible_ip: IpAddr, next_crucible_port: u16, } @@ -625,7 +627,7 @@ impl Storage { /// Adds a Zpool to the sled's simulated storage. pub async fn insert_zpool( &mut self, - zpool_id: Uuid, + zpool_id: ZpoolUuid, disk_id: Uuid, size: u64, ) { @@ -634,13 +636,13 @@ impl Storage { } /// Returns an immutable reference to all zpools - pub fn zpools(&self) -> &HashMap { + pub fn zpools(&self) -> &HashMap { &self.zpools } /// Adds a Dataset to the sled's simulated storage. pub async fn insert_dataset( &mut self, - zpool_id: Uuid, + zpool_id: ZpoolUuid, dataset_id: Uuid, ) -> SocketAddr { // Update our local data @@ -691,14 +693,17 @@ impl Storage { self.zpools .values() .map(|pool| nexus_client::types::ZpoolPutRequest { - id: pool.id, + id: pool.id.into_untyped_uuid(), sled_id: self.sled_id, physical_disk_id: pool.physical_disk_id, }) .collect() } - pub fn get_all_datasets(&self, zpool_id: Uuid) -> Vec<(Uuid, SocketAddr)> { + pub fn get_all_datasets( + &self, + zpool_id: ZpoolUuid, + ) -> Vec<(Uuid, SocketAddr)> { let zpool = self.zpools.get(&zpool_id).expect("Zpool does not exist"); zpool @@ -710,7 +715,7 @@ impl Storage { pub async fn get_dataset( &self, - zpool_id: Uuid, + zpool_id: ZpoolUuid, dataset_id: Uuid, ) -> Arc { self.zpools diff --git a/sled-hardware/Cargo.toml b/sled-hardware/Cargo.toml index 1c914e2897..e36f1f0914 100644 --- a/sled-hardware/Cargo.toml +++ b/sled-hardware/Cargo.toml @@ -15,6 +15,7 @@ illumos-utils.workspace = true libc.workspace = true macaddr.workspace = true omicron-common.workspace = true +omicron-uuid-kinds.workspace = true rand.workspace = true schemars.workspace = true serde.workspace = true diff --git a/sled-hardware/src/disk.rs b/sled-hardware/src/disk.rs index 3730293936..471f9925ca 100644 --- a/sled-hardware/src/disk.rs +++ b/sled-hardware/src/disk.rs @@ -8,11 +8,11 @@ use illumos_utils::zpool::Zpool; use illumos_utils::zpool::ZpoolKind; use illumos_utils::zpool::ZpoolName; use omicron_common::disk::DiskIdentity; +use omicron_uuid_kinds::ZpoolUuid; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; use slog::Logger; use slog::{info, warn}; -use uuid::Uuid; cfg_if::cfg_if! { if #[cfg(target_os = "illumos")] { @@ -35,7 +35,7 @@ pub enum PooledDiskError { #[error("Zpool UUID required to format this disk")] MissingZpoolUuid, #[error("Observed Zpool with unexpected UUID (saw: {observed}, expected: {expected})")] - UnexpectedUuid { expected: Uuid, observed: Uuid }, + UnexpectedUuid { expected: ZpoolUuid, observed: ZpoolUuid }, #[error("Unexpected disk variant")] UnexpectedVariant, #[error("Zpool does not exist")] @@ -220,7 +220,7 @@ impl PooledDisk { pub fn new( log: &Logger, unparsed_disk: UnparsedDisk, - zpool_id: Option, + zpool_id: Option, ) -> Result { let paths = &unparsed_disk.paths; let variant = unparsed_disk.variant; @@ -273,7 +273,7 @@ pub fn ensure_zpool_exists( log: &Logger, variant: DiskVariant, zpool_path: &Utf8Path, - zpool_id: Option, + zpool_id: Option, ) -> Result { let zpool_name = match Fstyp::get_zpool(&zpool_path) { Ok(zpool_name) => { @@ -312,7 +312,7 @@ pub fn ensure_zpool_exists( id } None => { - let id = Uuid::new_v4(); + let id = ZpoolUuid::new_v4(); info!(log, "Formatting zpool with generated ID"; "id" => ?id); id } diff --git a/sled-hardware/src/illumos/partitions.rs b/sled-hardware/src/illumos/partitions.rs index 32debfc3e1..0308e842c0 100644 --- a/sled-hardware/src/illumos/partitions.rs +++ b/sled-hardware/src/illumos/partitions.rs @@ -12,9 +12,9 @@ use crate::{DiskPaths, DiskVariant, Partition, PooledDiskError}; use camino::Utf8Path; use illumos_utils::zpool::ZpoolName; use omicron_common::disk::DiskIdentity; +use omicron_uuid_kinds::ZpoolUuid; use slog::info; use slog::Logger; -use uuid::Uuid; #[cfg(test)] use illumos_utils::zpool::MockZpool as Zpool; @@ -148,7 +148,7 @@ pub fn ensure_partition_layout( paths: &DiskPaths, variant: DiskVariant, identity: &DiskIdentity, - zpool_id: Option, + zpool_id: Option, ) -> Result, PooledDiskError> { internal_ensure_partition_layout::( log, paths, variant, identity, zpool_id, @@ -162,7 +162,7 @@ fn internal_ensure_partition_layout( paths: &DiskPaths, variant: DiskVariant, identity: &DiskIdentity, - zpool_id: Option, + zpool_id: Option, ) -> Result, PooledDiskError> { // Open the "Whole Disk" as a raw device to be parsed by the // libefi-illumos library. This lets us peek at the GPT before @@ -431,7 +431,7 @@ mod test { }, DiskVariant::U2, &mock_disk_identity(), - Some(Uuid::new_v4()), + Some(ZpoolUuid::new_v4()), ) .expect("Should have succeeded partitioning disk"); diff --git a/sled-hardware/src/non_illumos/mod.rs b/sled-hardware/src/non_illumos/mod.rs index 7714df3fc1..3516962577 100644 --- a/sled-hardware/src/non_illumos/mod.rs +++ b/sled-hardware/src/non_illumos/mod.rs @@ -7,6 +7,7 @@ use crate::disk::{ }; use crate::SledMode; use omicron_common::disk::DiskIdentity; +use omicron_uuid_kinds::ZpoolUuid; use sled_hardware_types::Baseboard; use slog::Logger; use std::collections::HashSet; @@ -72,7 +73,7 @@ pub fn ensure_partition_layout( _paths: &DiskPaths, _variant: DiskVariant, _identity: &DiskIdentity, - _zpool_id: Option, + _zpool_id: Option, ) -> Result, PooledDiskError> { unimplemented!("Accessing hardware unsupported on non-illumos"); } diff --git a/sled-storage/Cargo.toml b/sled-storage/Cargo.toml index 839908effb..e0e4712330 100644 --- a/sled-storage/Cargo.toml +++ b/sled-storage/Cargo.toml @@ -16,6 +16,7 @@ futures.workspace = true illumos-utils.workspace = true key-manager.workspace = true omicron-common.workspace = true +omicron-uuid-kinds.workspace = true rand.workspace = true schemars = { workspace = true, features = [ "chrono", "uuid1" ] } serde.workspace = true diff --git a/sled-storage/src/dataset.rs b/sled-storage/src/dataset.rs index 06eea367b9..7846826ee8 100644 --- a/sled-storage/src/dataset.rs +++ b/sled-storage/src/dataset.rs @@ -794,11 +794,11 @@ async fn finalize_encryption_migration( #[cfg(test)] mod test { use super::*; - use uuid::Uuid; + use omicron_uuid_kinds::ZpoolUuid; #[test] fn serialize_dataset_name() { - let pool = ZpoolName::new_internal(Uuid::new_v4()); + let pool = ZpoolName::new_internal(ZpoolUuid::new_v4()); let kind = DatasetKind::Crucible; let name = DatasetName::new(pool, kind); serde_json::to_string(&name).unwrap(); diff --git a/sled-storage/src/disk.rs b/sled-storage/src/disk.rs index 7383475cb9..cf34c689bf 100644 --- a/sled-storage/src/disk.rs +++ b/sled-storage/src/disk.rs @@ -12,6 +12,7 @@ use key_manager::StorageKeyRequester; use omicron_common::api::external::Generation; use omicron_common::disk::DiskIdentity; use omicron_common::ledger::Ledgerable; +use omicron_uuid_kinds::ZpoolUuid; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; use sled_hardware::{ @@ -29,7 +30,7 @@ use crate::dataset; pub struct OmicronPhysicalDiskConfig { pub identity: DiskIdentity, pub id: Uuid, - pub pool_id: Uuid, + pub pool_id: ZpoolUuid, } #[derive( @@ -100,7 +101,7 @@ impl SyntheticDisk { log: &Logger, mount_config: &MountConfig, raw: RawSyntheticDisk, - zpool_id: Option, + zpool_id: Option, ) -> Self { let path = if raw.path.is_absolute() { raw.path.clone() @@ -284,7 +285,7 @@ impl Disk { log: &Logger, mount_config: &MountConfig, raw_disk: RawDisk, - pool_id: Option, + pool_id: Option, key_requester: Option<&StorageKeyRequester>, ) -> Result { let disk: Disk = match raw_disk { diff --git a/sled-storage/src/manager.rs b/sled-storage/src/manager.rs index 2cd79e6556..4f45f1771e 100644 --- a/sled-storage/src/manager.rs +++ b/sled-storage/src/manager.rs @@ -870,6 +870,7 @@ mod tests { use omicron_common::api::external::Generation; use omicron_common::ledger; use omicron_test_utils::dev::test_setup_log; + use omicron_uuid_kinds::ZpoolUuid; use std::sync::atomic::Ordering; use uuid::Uuid; @@ -1305,7 +1306,7 @@ mod tests { // First, we format the U.2s to have a zpool. This should work, even // without looping in the StorageManager. let first_u2 = &raw_disks[0]; - let first_pool_id = Uuid::new_v4(); + let first_pool_id = ZpoolUuid::new_v4(); let _disk = crate::disk::Disk::new( &logctx.log, &harness.mount_config(), @@ -1317,7 +1318,7 @@ mod tests { .expect("Failed to format U.2"); let second_u2 = &raw_disks[1]; - let second_pool_id = Uuid::new_v4(); + let second_pool_id = ZpoolUuid::new_v4(); let _disk = crate::disk::Disk::new( &logctx.log, &harness.mount_config(), diff --git a/sled-storage/src/manager_test_harness.rs b/sled-storage/src/manager_test_harness.rs index efdbb0b9f6..19501dd4e4 100644 --- a/sled-storage/src/manager_test_harness.rs +++ b/sled-storage/src/manager_test_harness.rs @@ -9,6 +9,7 @@ use crate::disk::{OmicronPhysicalDisksConfig, RawDisk}; use crate::manager::{StorageHandle, StorageManager}; use camino::Utf8PathBuf; use key_manager::StorageKeyRequester; +use omicron_uuid_kinds::ZpoolUuid; use slog::{info, Logger}; use std::sync::{ atomic::{AtomicBool, Ordering}, @@ -322,7 +323,7 @@ impl StorageManagerTestHarness { crate::disk::OmicronPhysicalDiskConfig { identity: identity.clone(), id: Uuid::new_v4(), - pool_id: Uuid::new_v4(), + pool_id: ZpoolUuid::new_v4(), } }) .collect(); diff --git a/sled-storage/src/resources.rs b/sled-storage/src/resources.rs index 7e1880f2b8..a2e75249b3 100644 --- a/sled-storage/src/resources.rs +++ b/sled-storage/src/resources.rs @@ -13,6 +13,7 @@ use cfg_if::cfg_if; use illumos_utils::zpool::ZpoolName; use key_manager::StorageKeyRequester; use omicron_common::disk::DiskIdentity; +use omicron_uuid_kinds::ZpoolUuid; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; use sled_hardware::DiskVariant; @@ -20,7 +21,6 @@ use slog::{info, o, warn, Logger}; use std::collections::BTreeMap; use std::sync::Arc; use tokio::sync::watch; -use uuid::Uuid; // The directory within the debug dataset in which bundles are created. const BUNDLE_DIRECTORY: &str = "bundle"; @@ -35,7 +35,7 @@ pub enum DiskManagementError { NotFound, #[error("Expected zpool UUID of {expected}, but saw {observed}")] - ZpoolUuidMismatch { expected: Uuid, observed: Uuid }, + ZpoolUuidMismatch { expected: ZpoolUuid, observed: ZpoolUuid }, #[error("Failed to access keys necessary to unlock storage. This error may be transient.")] KeyManager(String), diff --git a/uuid-kinds/Cargo.toml b/uuid-kinds/Cargo.toml index 126ba8bcf8..bbe3fd3eea 100644 --- a/uuid-kinds/Cargo.toml +++ b/uuid-kinds/Cargo.toml @@ -11,6 +11,7 @@ license = "MPL-2.0" [dependencies] newtype-uuid.workspace = true schemars = { workspace = true, optional = true } +paste.workspace = true [features] default = ["std"] diff --git a/uuid-kinds/README.adoc b/uuid-kinds/README.adoc index 1a22477aad..515ae02b8c 100644 --- a/uuid-kinds/README.adoc +++ b/uuid-kinds/README.adoc @@ -14,6 +14,36 @@ mix different kinds of entities. be shared across Oxide repos. `omicron-uuid-kinds` supports no-std so the kinds can be shared with embedded code as well. +## Adding a new UUID kind + +Start by adding a new element to the invocation of the `impl_typed_uuid_kind!` macro in `src/lib.rs`. For example: + +[source,rust] +``` +impl_typed_uuid_kind! { + // ... + Widget => "widget", + // ... +} +``` + +This will: + +- Create a new `TypedUuidKind` called `WidgetKind`. This kind will become the type parameter to `TypedUuid` and other related generic types. +- Create a type alias `type WidgetUuid = TypedUuid`. + +Then, start changing your UUID types over. It's generally easiest to change the type of a UUID in a struct or enum field, and start pulling that thread. + +- If your UUID isn't used in too many places, you can usually just change all users in one go. For an example, see the conversions of several UUID types in https://github.com/oxidecomputer/omicron/pull/5135[#5135]. + +- If your UUID is widely used, you may need to break your change up across several commits. It's easiest to carve out a section of your code to make changes in, and use the `GenericUuid` conversions into and out of this code. For an example, see the ongoing conversion for sled UUIDs in https://github.com/oxidecomputer/omicron/pull/5404[#5404] and https://github.com/oxidecomputer/omicron/pull/5488[#5488]. + +Some special cases: + +. If part of your change is at an OpenAPI schema boundary, then you generally also want to have clients use the same UUID types. The best way to do that currently is to use a `replace` directive, as in https://github.com/oxidecomputer/omicron/pull/5135[#5135]. There is ongoing design work to make this work automatically, in https://github.com/oxidecomputer/typify/issues/503[typify#503] and elsewhere. + +. For Nexus database storage, `nexus-db-model` has a `DbTypedUuid` generic type which can be used. `DbTypedUuid` should not be exposed outside of `nexus-db-model`; instead, prefer to use the regular `TypedUuid` and only convert to `DbTypedUuid` within the lowest layers (i.e. using getters/setters and not making it a public field). This is because the regular `TypedUuid` has much more infrastructure built around it. + ## Determinations As part of this effort, we've made several decisions that could have gone a diff --git a/uuid-kinds/src/lib.rs b/uuid-kinds/src/lib.rs index b698b2a44a..29842cd53f 100644 --- a/uuid-kinds/src/lib.rs +++ b/uuid-kinds/src/lib.rs @@ -6,7 +6,7 @@ //! A registry for UUID kinds used in Omicron and related projects. //! -//! See this crate's `README.md` for more information. +//! See this crate's `README.adoc` for more information. // Export these types so that other users don't have to pull in newtype-uuid. #[doc(no_inline)] @@ -20,16 +20,20 @@ use schemars::JsonSchema; macro_rules! impl_typed_uuid_kind { ($($kind:ident => $tag:literal),* $(,)?) => { $( - #[cfg_attr(feature = "schemars08", derive(JsonSchema))] - pub enum $kind {} - - impl TypedUuidKind for $kind { - #[inline] - fn tag() -> TypedUuidTag { - // `const` ensures that tags are validated at compile-time. - const TAG: TypedUuidTag = TypedUuidTag::new($tag); - TAG + paste::paste! { + #[cfg_attr(feature = "schemars08", derive(JsonSchema))] + pub enum [< $kind Kind>] {} + + impl TypedUuidKind for [< $kind Kind >] { + #[inline] + fn tag() -> TypedUuidTag { + // `const` ensures that tags are validated at compile-time. + const TAG: TypedUuidTag = TypedUuidTag::new($tag); + TAG + } } + + pub type [< $kind Uuid>] = TypedUuid::<[< $kind Kind >]>; } )* }; @@ -45,13 +49,14 @@ macro_rules! impl_typed_uuid_kind { // Please keep this list in alphabetical order. impl_typed_uuid_kind! { - DownstairsKind => "downstairs", - DownstairsRegionKind => "downstairs_region", - LoopbackAddressKind => "loopback_address", - OmicronZoneKind => "service", - SledKind => "sled", - TufRepoKind => "tuf_repo", - UpstairsKind => "upstairs", - UpstairsRepairKind => "upstairs_repair", - UpstairsSessionKind => "upstairs_session", + Downstairs => "downstairs", + DownstairsRegion => "downstairs_region", + LoopbackAddress => "loopback_address", + OmicronZone => "service", + Sled => "sled", + TufRepo => "tuf_repo", + Upstairs => "upstairs", + UpstairsRepair => "upstairs_repair", + UpstairsSession => "upstairs_session", + Zpool => "zpool", } From 9511b5d234c8874094c8a41b86e6c697eda3e3a7 Mon Sep 17 00:00:00 2001 From: bnaecker Date: Thu, 11 Apr 2024 15:40:41 -0700 Subject: [PATCH 123/334] Remove flaky OxQL test (#5507) Fixes #5498 --- oximeter/db/src/oxql/ast/grammar.rs | 26 -------------------------- 1 file changed, 26 deletions(-) diff --git a/oximeter/db/src/oxql/ast/grammar.rs b/oximeter/db/src/oxql/ast/grammar.rs index 00a0e6e0fe..23db5224da 100644 --- a/oximeter/db/src/oxql/ast/grammar.rs +++ b/oximeter/db/src/oxql/ast/grammar.rs @@ -649,7 +649,6 @@ mod tests { use crate::oxql::ast::table_ops::filter::FilterExpr; use crate::oxql::ast::table_ops::filter::SimpleFilter; use crate::oxql::ast::table_ops::group_by::Reducer; - use chrono::DateTime; use chrono::NaiveDate; use chrono::NaiveDateTime; use chrono::NaiveTime; @@ -1215,31 +1214,6 @@ mod tests { assert!(query_parser::query("get a:b | get a:b").is_err()); } - #[test] - fn test_now_with_offset() { - fn check(expr: &str, expected: DateTime) { - // Rough but still-useful bound in microseconds. - const MAX_DIFF_IN_MICROS: i64 = 1000; - let d = query_parser::now_timestamp(expr).unwrap(); - let now = Utc::now(); - let micros = d.timestamp_micros() - expected.timestamp_micros(); - assert!( - micros.abs() <= MAX_DIFF_IN_MICROS, - "Expected `{}` to be within {}us of {}, but it is {}us away", - expr, - MAX_DIFF_IN_MICROS, - now, - micros, - ); - } - check("@now() - 5m", Utc::now() - Duration::from_secs(60 * 5)); - check("@now() + 5m", Utc::now() + Duration::from_secs(60 * 5)); - check("@now() - 5s", Utc::now() - Duration::from_secs(5)); - check("@now() + 5s", Utc::now() + Duration::from_secs(5)); - check("@now() - 1d", Utc::now() - Duration::from_secs(60 * 60 * 24)); - check("@now() + 1d", Utc::now() + Duration::from_secs(60 * 60 * 24)); - } - #[test] fn test_like_only_available_for_strings() { assert!(query_parser::filter_expr("foo ~= 0").is_err()); From 4984d9eec7034a6b83d9ff605088f91044390461 Mon Sep 17 00:00:00 2001 From: Rain Date: Thu, 11 Apr 2024 20:31:06 -0700 Subject: [PATCH 124/334] [meta] TypedUuid -> SledUuid, OmicronZoneKind -> OmicronZoneUuid (#5513) Followup from #5501 -- use `SledUuid` everywhere. I wanted to try it out to see how it feels, and I think I like it a lot. Depends on #5501. --- dev-tools/reconfigurator-cli/src/main.rs | 16 ++--- .../reconfigurator-cli/tests/test_basic.rs | 5 +- .../db-queries/src/db/datastore/deployment.rs | 6 +- nexus/reconfigurator/execution/src/dns.rs | 4 +- .../planning/src/blueprint_builder.rs | 46 +++++++------- nexus/reconfigurator/planning/src/example.rs | 6 +- nexus/reconfigurator/planning/src/planner.rs | 10 ++- nexus/reconfigurator/planning/src/system.rs | 16 ++--- nexus/reconfigurator/preparation/src/lib.rs | 7 ++- nexus/types/src/deployment.rs | 63 +++++++++---------- nexus/types/src/deployment/planning_input.rs | 56 +++++++---------- uuid-kinds/README.adoc | 14 +++++ 12 files changed, 118 insertions(+), 131 deletions(-) diff --git a/dev-tools/reconfigurator-cli/src/main.rs b/dev-tools/reconfigurator-cli/src/main.rs index 6c471e19fb..e9a2009df7 100644 --- a/dev-tools/reconfigurator-cli/src/main.rs +++ b/dev-tools/reconfigurator-cli/src/main.rs @@ -32,8 +32,9 @@ use nexus_types::inventory::OmicronZonesConfig; use nexus_types::inventory::SledRole; use omicron_common::api::external::Generation; use omicron_common::api::external::Name; -use omicron_uuid_kinds::SledKind; -use omicron_uuid_kinds::{GenericUuid, OmicronZoneKind, TypedUuid}; +use omicron_uuid_kinds::GenericUuid; +use omicron_uuid_kinds::OmicronZoneUuid; +use omicron_uuid_kinds::SledUuid; use reedline::{Reedline, Signal}; use std::cell::RefCell; use std::collections::BTreeMap; @@ -150,8 +151,7 @@ impl ReconfiguratorSim { for (_, zone) in parent_blueprint.all_omicron_zones(BlueprintZoneFilter::All) { - let zone_id = - TypedUuid::::from_untyped_uuid(zone.id); + let zone_id = OmicronZoneUuid::from_untyped_uuid(zone.id); if let Ok(Some(ip)) = zone.zone_type.external_ip() { let external_ip = ExternalIp { id: *self @@ -406,13 +406,13 @@ enum Commands { #[derive(Debug, Args)] struct SledAddArgs { /// id of the new sled - sled_id: Option>, + sled_id: Option, } #[derive(Debug, Args)] struct SledArgs { /// id of the sled - sled_id: TypedUuid, + sled_id: SledUuid, } #[derive(Debug, Args)] @@ -454,7 +454,7 @@ enum BlueprintEditCommands { /// add a Nexus instance to a particular sled AddNexus { /// sled on which to deploy the new instance - sled_id: TypedUuid, + sled_id: SledUuid, }, } @@ -576,7 +576,7 @@ fn cmd_sled_list( #[derive(Tabled)] #[tabled(rename_all = "SCREAMING_SNAKE_CASE")] struct Sled { - id: TypedUuid, + id: SledUuid, nzpools: usize, subnet: String, } diff --git a/dev-tools/reconfigurator-cli/tests/test_basic.rs b/dev-tools/reconfigurator-cli/tests/test_basic.rs index 675fa10fc1..a8fd91f156 100644 --- a/dev-tools/reconfigurator-cli/tests/test_basic.rs +++ b/dev-tools/reconfigurator-cli/tests/test_basic.rs @@ -20,8 +20,7 @@ use omicron_test_utils::dev::test_cmds::path_to_executable; use omicron_test_utils::dev::test_cmds::redact_variable; use omicron_test_utils::dev::test_cmds::run_command; use omicron_test_utils::dev::test_cmds::EXIT_SUCCESS; -use omicron_uuid_kinds::SledKind; -use omicron_uuid_kinds::TypedUuid; +use omicron_uuid_kinds::SledUuid; use slog::debug; use std::io::BufReader; use std::io::BufWriter; @@ -119,7 +118,7 @@ async fn test_blueprint_edit(cptestctx: &ControlPlaneTestContext) { .expect("failed to assemble reconfigurator state"); // Smoke check the initial state. - let sled_id: TypedUuid = SLED_AGENT_UUID.parse().unwrap(); + let sled_id: SledUuid = SLED_AGENT_UUID.parse().unwrap(); assert!(state1.planning_input.sled_resources(&sled_id).is_some()); assert!(!state1.planning_input.service_ip_pool_ranges().is_empty()); assert!(!state1.silo_names.is_empty()); diff --git a/nexus/db-queries/src/db/datastore/deployment.rs b/nexus/db-queries/src/db/datastore/deployment.rs index aaf9ba0de0..d0cdc0fc63 100644 --- a/nexus/db-queries/src/db/datastore/deployment.rs +++ b/nexus/db-queries/src/db/datastore/deployment.rs @@ -1092,7 +1092,7 @@ mod tests { use omicron_common::api::external::Generation; use omicron_test_utils::dev; use omicron_uuid_kinds::GenericUuid; - use omicron_uuid_kinds::TypedUuid; + use omicron_uuid_kinds::SledUuid; use omicron_uuid_kinds::ZpoolUuid; use pretty_assertions::assert_eq; use rand::thread_rng; @@ -1202,7 +1202,7 @@ mod tests { ); for (sled_id, agent) in &collection.sled_agents { // TODO-cleanup use `TypedUuid` everywhere - let sled_id = TypedUuid::from_untyped_uuid(*sled_id); + let sled_id = SledUuid::from_untyped_uuid(*sled_id); builder .add_sled( sled_id, @@ -1370,7 +1370,7 @@ mod tests { ); // Add a new sled. - let new_sled_id = TypedUuid::new_v4(); + let new_sled_id = SledUuid::new_v4(); // While we're at it, use a different DNS version to test that that // works. diff --git a/nexus/reconfigurator/execution/src/dns.rs b/nexus/reconfigurator/execution/src/dns.rs index abc716be75..ebe32ff10b 100644 --- a/nexus/reconfigurator/execution/src/dns.rs +++ b/nexus/reconfigurator/execution/src/dns.rs @@ -530,7 +530,7 @@ mod test { use omicron_common::api::external::IdentityMetadataCreateParams; use omicron_test_utils::dev::test_setup_log; use omicron_uuid_kinds::GenericUuid; - use omicron_uuid_kinds::TypedUuid; + use omicron_uuid_kinds::SledUuid; use std::collections::BTreeMap; use std::collections::BTreeSet; use std::collections::HashMap; @@ -627,7 +627,7 @@ mod test { Generation::new(), policy_sleds.keys().map(|sled_id| { // TODO-cleanup use `TypedUuid` everywhere - TypedUuid::from_untyped_uuid(*sled_id) + SledUuid::from_untyped_uuid(*sled_id) }), "test-suite", ) diff --git a/nexus/reconfigurator/planning/src/blueprint_builder.rs b/nexus/reconfigurator/planning/src/blueprint_builder.rs index b1d1c09ef1..3fcc54fd04 100644 --- a/nexus/reconfigurator/planning/src/blueprint_builder.rs +++ b/nexus/reconfigurator/planning/src/blueprint_builder.rs @@ -40,8 +40,7 @@ use omicron_common::api::external::Vni; use omicron_common::api::internal::shared::NetworkInterface; use omicron_common::api::internal::shared::NetworkInterfaceKind; use omicron_uuid_kinds::GenericUuid; -use omicron_uuid_kinds::SledKind; -use omicron_uuid_kinds::TypedUuid; +use omicron_uuid_kinds::SledUuid; use rand::rngs::StdRng; use rand::SeedableRng; use slog::o; @@ -62,7 +61,7 @@ use uuid::Uuid; #[derive(Debug, Error)] pub enum Error { #[error("sled {sled_id}: ran out of available addresses for sled")] - OutOfAddresses { sled_id: TypedUuid }, + OutOfAddresses { sled_id: SledUuid }, #[error("no Nexus zones exist in parent blueprint")] NoNexusZonesInParentBlueprint, #[error("no external service IP addresses are available")] @@ -119,7 +118,7 @@ pub struct BlueprintBuilder<'a> { // These fields are used to allocate resources from sleds. input: &'a PlanningInput, - sled_ip_allocators: BTreeMap, IpAllocator>, + sled_ip_allocators: BTreeMap, // These fields will become part of the final blueprint. See the // corresponding fields in `Blueprint`. @@ -148,7 +147,7 @@ impl<'a> BlueprintBuilder<'a> { collection: &Collection, internal_dns_version: Generation, external_dns_version: Generation, - all_sleds: impl Iterator>, + all_sleds: impl Iterator, creator: &str, ) -> Result { Self::build_initial_impl( @@ -167,7 +166,7 @@ impl<'a> BlueprintBuilder<'a> { collection: &Collection, internal_dns_version: Generation, external_dns_version: Generation, - all_sleds: impl Iterator>, + all_sleds: impl Iterator, creator: &str, seed: H, ) -> Result { @@ -187,7 +186,7 @@ impl<'a> BlueprintBuilder<'a> { collection: &Collection, internal_dns_version: Generation, external_dns_version: Generation, - all_sleds: impl Iterator>, + all_sleds: impl Iterator, creator: &str, mut rng: BlueprintBuilderRng, ) -> Result { @@ -410,7 +409,7 @@ impl<'a> BlueprintBuilder<'a> { pub fn sled_ensure_zone_ntp( &mut self, - sled_id: TypedUuid, + sled_id: SledUuid, ) -> Result { // If there's already an NTP zone on this sled, do nothing. let has_ntp = self @@ -474,7 +473,7 @@ impl<'a> BlueprintBuilder<'a> { pub fn sled_ensure_zone_crucible( &mut self, - sled_id: TypedUuid, + sled_id: SledUuid, pool_name: ZpoolName, ) -> Result { // If this sled already has a Crucible zone on this pool, do nothing. @@ -525,7 +524,7 @@ impl<'a> BlueprintBuilder<'a> { /// /// This value may change before a blueprint is actually generated if /// further changes are made to the builder. - pub fn sled_num_nexus_zones(&self, sled_id: TypedUuid) -> usize { + pub fn sled_num_nexus_zones(&self, sled_id: SledUuid) -> usize { self.zones .current_sled_zones(sled_id) .filter(|z| z.config.zone_type.is_nexus()) @@ -534,7 +533,7 @@ impl<'a> BlueprintBuilder<'a> { pub fn sled_ensure_zone_multiple_nexus( &mut self, - sled_id: TypedUuid, + sled_id: SledUuid, desired_zone_count: usize, ) -> Result { // Whether Nexus should use TLS and what the external DNS servers it @@ -570,7 +569,7 @@ impl<'a> BlueprintBuilder<'a> { pub fn sled_ensure_zone_multiple_nexus_with_config( &mut self, - sled_id: TypedUuid, + sled_id: SledUuid, desired_zone_count: usize, external_tls: bool, external_dns_servers: Vec, @@ -658,7 +657,7 @@ impl<'a> BlueprintBuilder<'a> { fn sled_add_zone( &mut self, - sled_id: TypedUuid, + sled_id: SledUuid, zone: BlueprintZoneConfig, ) -> Result<(), Error> { // Check the sled id and return an appropriate error if it's invalid. @@ -679,10 +678,7 @@ impl<'a> BlueprintBuilder<'a> { /// Returns a newly-allocated underlay address suitable for use by Omicron /// zones - fn sled_alloc_ip( - &mut self, - sled_id: TypedUuid, - ) -> Result { + fn sled_alloc_ip(&mut self, sled_id: SledUuid) -> Result { let sled_subnet = self.sled_resources(sled_id)?.subnet; let allocator = self.sled_ip_allocators.entry(sled_id).or_insert_with(|| { @@ -721,7 +717,7 @@ impl<'a> BlueprintBuilder<'a> { fn sled_resources( &self, - sled_id: TypedUuid, + sled_id: SledUuid, ) -> Result<&SledResources, Error> { self.input.sled_resources(&sled_id).ok_or_else(|| { Error::Planner(anyhow!( @@ -776,11 +772,11 @@ impl BlueprintBuilderRng { /// that we've changed and a _reference_ to the parent blueprint's zones. This /// struct makes it easy for callers iterate over the right set of zones. struct BlueprintZonesBuilder<'a> { - changed_zones: BTreeMap, BlueprintZonesConfig>, + changed_zones: BTreeMap, // Temporarily make a clone of the parent blueprint's zones so we can use // typed UUIDs everywhere. Once we're done migrating, this `Cow` can be // removed. - parent_zones: Cow<'a, BTreeMap, BlueprintZonesConfig>>, + parent_zones: Cow<'a, BTreeMap>, } impl<'a> BlueprintZonesBuilder<'a> { @@ -797,7 +793,7 @@ impl<'a> BlueprintZonesBuilder<'a> { /// do that if no changes are being made. pub fn change_sled_zones( &mut self, - sled_id: TypedUuid, + sled_id: SledUuid, ) -> &mut BlueprintZonesConfig { self.changed_zones.entry(sled_id).or_insert_with(|| { if let Some(old_sled_zones) = self.parent_zones.get(&sled_id) { @@ -821,7 +817,7 @@ impl<'a> BlueprintZonesBuilder<'a> { /// sled in the blueprint that's being built pub fn current_sled_zones( &self, - sled_id: TypedUuid, + sled_id: SledUuid, ) -> Box + '_> { if let Some(sled_zones) = self .changed_zones @@ -837,7 +833,7 @@ impl<'a> BlueprintZonesBuilder<'a> { /// Produces an owned map of zones for the requested sleds pub fn into_zones_map( mut self, - sled_ids: impl Iterator>, + sled_ids: impl Iterator, ) -> BTreeMap { sled_ids .map(|sled_id| { @@ -1129,7 +1125,7 @@ pub mod test { .omicron_zones .keys() .next() - .map(|sled_id| TypedUuid::from_untyped_uuid(*sled_id)) + .map(|sled_id| SledUuid::from_untyped_uuid(*sled_id)) .expect("no sleds present"), 1, ) @@ -1172,7 +1168,7 @@ pub mod test { let sled_id = selected_sled_id.expect("found no sleds with Nexus zone"); // TODO-cleanup use `TypedUuid` everywhere - TypedUuid::from_untyped_uuid(sled_id) + SledUuid::from_untyped_uuid(sled_id) }; let parent = BlueprintBuilder::build_initial_from_collection_seeded( diff --git a/nexus/reconfigurator/planning/src/example.rs b/nexus/reconfigurator/planning/src/example.rs index d51d144213..cf1b7e79b5 100644 --- a/nexus/reconfigurator/planning/src/example.rs +++ b/nexus/reconfigurator/planning/src/example.rs @@ -16,9 +16,8 @@ use nexus_types::deployment::SledFilter; use nexus_types::inventory::Collection; use omicron_common::api::external::Generation; use omicron_uuid_kinds::GenericUuid; -use omicron_uuid_kinds::OmicronZoneKind; +use omicron_uuid_kinds::OmicronZoneUuid; use omicron_uuid_kinds::SledKind; -use omicron_uuid_kinds::TypedUuid; use sled_agent_client::types::OmicronZonesConfig; use typed_rng::TypedUuidRng; use uuid::Uuid; @@ -128,8 +127,7 @@ impl ExampleSystem { continue; }; for zone in zones.zones.iter().map(|z| &z.config) { - let service_id = - TypedUuid::::from_untyped_uuid(zone.id); + let service_id = OmicronZoneUuid::from_untyped_uuid(zone.id); if let Ok(Some(ip)) = zone.zone_type.external_ip() { input_builder .add_omicron_zone_external_ip( diff --git a/nexus/reconfigurator/planning/src/planner.rs b/nexus/reconfigurator/planning/src/planner.rs index e5cee423f0..9430d0d10d 100644 --- a/nexus/reconfigurator/planning/src/planner.rs +++ b/nexus/reconfigurator/planning/src/planner.rs @@ -15,8 +15,7 @@ use nexus_types::deployment::PlanningInput; use nexus_types::deployment::SledFilter; use nexus_types::inventory::Collection; use omicron_uuid_kinds::GenericUuid; -use omicron_uuid_kinds::SledKind; -use omicron_uuid_kinds::TypedUuid; +use omicron_uuid_kinds::SledUuid; use slog::{info, warn, Logger}; use std::collections::BTreeMap; use std::collections::BTreeSet; @@ -191,7 +190,7 @@ impl<'a> Planner<'a> { fn ensure_correct_number_of_nexus_zones( &mut self, - sleds_waiting_for_ntp_zone: &BTreeSet>, + sleds_waiting_for_ntp_zone: &BTreeSet, ) -> Result<(), Error> { // Count the number of Nexus zones on all in-service sleds. This will // include sleds that are in service but not eligible for new services, @@ -222,7 +221,7 @@ impl<'a> Planner<'a> { // by their current Nexus zone count. Skip sleds with a policy/state // that should be eligible for Nexus but that don't yet have an NTP // zone. - let mut sleds_by_num_nexus: BTreeMap>> = + let mut sleds_by_num_nexus: BTreeMap> = BTreeMap::new(); for sled_id in self .input @@ -244,8 +243,7 @@ impl<'a> Planner<'a> { } // Build a map of sled -> new nexus zone count. - let mut sleds_to_change: BTreeMap, usize> = - BTreeMap::new(); + let mut sleds_to_change: BTreeMap = BTreeMap::new(); 'outer: for _ in 0..nexus_to_add { // `sleds_by_num_nexus` is sorted by key already, and we want to diff --git a/nexus/reconfigurator/planning/src/system.rs b/nexus/reconfigurator/planning/src/system.rs index fcaa096ff9..3a03249936 100644 --- a/nexus/reconfigurator/planning/src/system.rs +++ b/nexus/reconfigurator/planning/src/system.rs @@ -32,7 +32,7 @@ use omicron_common::address::SLED_PREFIX; use omicron_common::api::external::ByteCount; use omicron_common::api::external::Generation; use omicron_uuid_kinds::GenericUuid; -use omicron_uuid_kinds::SledKind; +use omicron_uuid_kinds::SledUuid; use omicron_uuid_kinds::TypedUuid; use std::collections::BTreeSet; use std::fmt::Debug; @@ -65,7 +65,7 @@ impl SubnetIterator for T where #[derive(Debug)] pub struct SystemDescription { collector: Option, - sleds: IndexMap, Sled>, + sleds: IndexMap, sled_subnets: Box, available_non_scrimlet_slots: BTreeSet, available_scrimlet_slots: BTreeSet, @@ -232,7 +232,7 @@ impl SystemDescription { /// database of an existing system pub fn sled_full( &mut self, - sled_id: TypedUuid, + sled_id: SledUuid, sled_policy: SledPolicy, sled_resources: SledResources, inventory_sp: Option>, @@ -330,7 +330,7 @@ pub enum SledHardware { #[derive(Clone, Debug)] pub struct SledBuilder { - id: Option>, + id: Option, unique: Option, hardware: SledHardware, hardware_slot: Option, @@ -354,7 +354,7 @@ impl SledBuilder { /// Set the id of the sled /// /// Default: randomly generated - pub fn id(mut self, id: TypedUuid) -> Self { + pub fn id(mut self, id: SledUuid) -> Self { self.id = Some(id); self } @@ -417,7 +417,7 @@ pub struct SledHwInventory<'a> { /// Collection. #[derive(Clone, Debug)] struct Sled { - sled_id: TypedUuid, + sled_id: SledUuid, sled_subnet: Ipv6Subnet, inventory_sp: Option<(u16, SpState)>, inventory_sled_agent: sled_agent_client::types::Inventory, @@ -428,7 +428,7 @@ struct Sled { impl Sled { /// Create a `Sled` using faked-up information based on a `SledBuilder` fn new_simulated( - sled_id: TypedUuid, + sled_id: SledUuid, sled_subnet: Ipv6Subnet, sled_role: SledRole, unique: Option, @@ -518,7 +518,7 @@ impl Sled { /// Create a `Sled` based on real information from another `Policy` and /// inventory `Collection` fn new_full( - sled_id: TypedUuid, + sled_id: SledUuid, sled_policy: SledPolicy, sled_resources: SledResources, inventory_sp: Option>, diff --git a/nexus/reconfigurator/preparation/src/lib.rs b/nexus/reconfigurator/preparation/src/lib.rs index e01d0cb7a9..f981a88235 100644 --- a/nexus/reconfigurator/preparation/src/lib.rs +++ b/nexus/reconfigurator/preparation/src/lib.rs @@ -33,7 +33,8 @@ use omicron_common::address::SLED_PREFIX; use omicron_common::api::external::Error; use omicron_common::api::external::LookupType; use omicron_uuid_kinds::GenericUuid; -use omicron_uuid_kinds::TypedUuid; +use omicron_uuid_kinds::OmicronZoneUuid; +use omicron_uuid_kinds::SledUuid; use omicron_uuid_kinds::ZpoolUuid; use slog::error; use slog::Logger; @@ -107,7 +108,7 @@ impl PlanningInputFromDb<'_> { resources: SledResources { subnet, zpools }, }; // TODO-cleanup use `TypedUuid` everywhere - let sled_id = TypedUuid::from_untyped_uuid(sled_id); + let sled_id = SledUuid::from_untyped_uuid(sled_id); builder.add_sled(sled_id, sled_details).map_err(|e| { Error::internal_error(&format!( "unexpectedly failed to add sled to planning input: {e}" @@ -127,7 +128,7 @@ impl PlanningInputFromDb<'_> { ); continue; }; - let zone_id = TypedUuid::from_untyped_uuid(zone_id); + let zone_id = OmicronZoneUuid::from_untyped_uuid(zone_id); builder .add_omicron_zone_external_ip( zone_id, diff --git a/nexus/types/src/deployment.rs b/nexus/types/src/deployment.rs index c06dbd310d..80765fe49e 100644 --- a/nexus/types/src/deployment.rs +++ b/nexus/types/src/deployment.rs @@ -21,9 +21,8 @@ pub use crate::inventory::OmicronZonesConfig; pub use crate::inventory::SourceNatConfig; pub use crate::inventory::ZpoolName; use newtype_uuid::GenericUuid; -use newtype_uuid::TypedUuid; use omicron_common::api::external::Generation; -use omicron_uuid_kinds::SledKind; +use omicron_uuid_kinds::SledUuid; use schemars::JsonSchema; use serde::Deserialize; use serde::Serialize; @@ -159,21 +158,21 @@ impl Blueprint { // Temporary method that provides the list of Omicron zones using // `TypedUuid`. // - // In the future, `all_omicron_zones` will return `TypedUuid`, + // In the future, `all_omicron_zones` will return `SledUuid`, // and this method will go away. pub fn all_omicron_zones_typed( &self, - ) -> impl Iterator, &OmicronZoneConfig)> { + ) -> impl Iterator { self.blueprint_zones.iter().flat_map(|(sled_id, z)| { z.zones.iter().map(move |z| { - (TypedUuid::from_untyped_uuid(*sled_id), &z.config) + (SledUuid::from_untyped_uuid(*sled_id), &z.config) }) }) } /// Iterate over the ids of all sleds in the blueprint - pub fn sleds(&self) -> impl Iterator> + '_ { - self.blueprint_zones.keys().copied().map(TypedUuid::from_untyped_uuid) + pub fn sleds(&self) -> impl Iterator + '_ { + self.blueprint_zones.keys().copied().map(SledUuid::from_untyped_uuid) } /// Summarize the difference between sleds and zones between two @@ -228,7 +227,7 @@ impl Blueprint { generation: zones_found.zones.generation, zones, }; - (TypedUuid::from_untyped_uuid(*sled_id), zones) + (SledUuid::from_untyped_uuid(*sled_id), zones) }) .collect(); @@ -247,16 +246,16 @@ impl Blueprint { } /// Temporary method that returns `self.blueprint_zones`, except the keys - /// are `TypedUuid`. + /// are `SledUuid`. /// /// TODO-cleanup use `TypedUuid` everywhere pub fn typed_blueprint_zones( &self, - ) -> BTreeMap, BlueprintZonesConfig> { + ) -> BTreeMap { self.blueprint_zones .iter() .map(|(sled_id, zones)| { - (TypedUuid::from_untyped_uuid(*sled_id), zones.clone()) + (SledUuid::from_untyped_uuid(*sled_id), zones.clone()) }) .collect() } @@ -578,9 +577,9 @@ impl BlueprintDiff { /// data is valid. fn new( before_meta: DiffBeforeMetadata, - before_zones: BTreeMap, BlueprintZonesConfig>, + before_zones: BTreeMap, after_meta: BlueprintMetadata, - after_zones: BTreeMap, BlueprintZonesConfig>, + after_zones: BTreeMap, ) -> Result { let mut errors = Vec::new(); @@ -610,18 +609,16 @@ impl BlueprintDiff { /// Iterate over sleds only present in the second blueprint of a diff pub fn sleds_added( &self, - ) -> impl ExactSizeIterator< - Item = (TypedUuid, &BlueprintZonesConfig), - > + '_ { + ) -> impl ExactSizeIterator + '_ + { self.sleds.added.iter().map(|(sled_id, zones)| (*sled_id, zones)) } /// Iterate over sleds only present in the first blueprint of a diff pub fn sleds_removed( &self, - ) -> impl ExactSizeIterator< - Item = (TypedUuid, &BlueprintZonesConfig), - > + '_ { + ) -> impl ExactSizeIterator + '_ + { self.sleds.removed.iter().map(|(sled_id, zones)| (*sled_id, zones)) } @@ -629,8 +626,7 @@ impl BlueprintDiff { /// changes. pub fn sleds_modified( &self, - ) -> impl ExactSizeIterator, &DiffSledModified)> + '_ - { + ) -> impl ExactSizeIterator + '_ { self.sleds.modified.iter().map(|(sled_id, sled)| (*sled_id, sled)) } @@ -638,8 +634,7 @@ impl BlueprintDiff { /// changes. pub fn sleds_unchanged( &self, - ) -> impl Iterator, &BlueprintZonesConfig)> + '_ - { + ) -> impl Iterator + '_ { self.sleds.unchanged.iter().map(|(sled_id, zones)| (*sled_id, zones)) } @@ -651,10 +646,10 @@ impl BlueprintDiff { #[derive(Debug)] struct DiffSleds { - added: BTreeMap, BlueprintZonesConfig>, - removed: BTreeMap, BlueprintZonesConfig>, - modified: BTreeMap, DiffSledModified>, - unchanged: BTreeMap, BlueprintZonesConfig>, + added: BTreeMap, + removed: BTreeMap, + modified: BTreeMap, + unchanged: BTreeMap, } impl DiffSleds { @@ -664,8 +659,8 @@ impl DiffSleds { /// The return value only contains the sleds that are present in both /// blueprints. fn new( - before: BTreeMap, BlueprintZonesConfig>, - mut after: BTreeMap, BlueprintZonesConfig>, + before: BTreeMap, + mut after: BTreeMap, errors: &mut Vec, ) -> Self { let mut removed = BTreeMap::new(); @@ -778,7 +773,7 @@ pub enum BlueprintDiffSingleError { /// /// For a particular zone, the type should never change. ZoneTypeChanged { - sled_id: TypedUuid, + sled_id: SledUuid, zone_id: Uuid, before: ZoneKind, after: ZoneKind, @@ -824,7 +819,7 @@ impl DiffBeforeMetadata { #[derive(Clone, Debug)] pub struct DiffSledModified { /// id of the sled - pub sled_id: TypedUuid, + pub sled_id: SledUuid, /// generation of the "zones" configuration on the left side pub generation_before: Generation, /// generation of the "zones" configuration on the right side @@ -836,7 +831,7 @@ pub struct DiffSledModified { impl DiffSledModified { fn new( - sled_id: TypedUuid, + sled_id: SledUuid, before: BlueprintZonesConfig, after: BlueprintZonesConfig, errors: &mut Vec, @@ -1258,7 +1253,7 @@ mod table_display { } fn add_whole_sled_records( - sled_id: TypedUuid, + sled_id: SledUuid, sled_zones: &BlueprintZonesConfig, kind: WholeSledKind, section: &mut StSectionBuilder, @@ -1295,7 +1290,7 @@ mod table_display { } fn add_modified_sled_records( - sled_id: TypedUuid, + sled_id: SledUuid, modified: &DiffSledModified, section: &mut StSectionBuilder, ) { diff --git a/nexus/types/src/deployment/planning_input.rs b/nexus/types/src/deployment/planning_input.rs index da5ae07eef..8244c6b616 100644 --- a/nexus/types/src/deployment/planning_input.rs +++ b/nexus/types/src/deployment/planning_input.rs @@ -15,9 +15,8 @@ use omicron_common::address::Ipv6Subnet; use omicron_common::address::SLED_PREFIX; use omicron_common::api::external::Generation; use omicron_common::api::external::MacAddr; -use omicron_uuid_kinds::OmicronZoneKind; -use omicron_uuid_kinds::SledKind; -use omicron_uuid_kinds::TypedUuid; +use omicron_uuid_kinds::OmicronZoneUuid; +use omicron_uuid_kinds::SledUuid; use serde::Deserialize; use serde::Serialize; use std::collections::btree_map::Entry; @@ -258,14 +257,13 @@ pub struct PlanningInput { external_dns_version: Generation, /// per-sled policy and resources - sleds: BTreeMap, SledDetails>, + sleds: BTreeMap, /// external IPs allocated to Omicron zones - omicron_zone_external_ips: BTreeMap, ExternalIp>, + omicron_zone_external_ips: BTreeMap, /// vNICs allocated to Omicron zones - omicron_zone_nics: - BTreeMap, ServiceNetworkInterface>, + omicron_zone_nics: BTreeMap, } #[derive(Debug, Clone, Serialize, Deserialize)] @@ -298,7 +296,7 @@ impl PlanningInput { pub fn all_sleds( &self, filter: SledFilter, - ) -> impl Iterator, &SledDetails)> + '_ { + ) -> impl Iterator + '_ { self.sleds.iter().filter_map(move |(&sled_id, details)| { filter .matches_policy_and_state(details.policy, details.state) @@ -309,29 +307,23 @@ impl PlanningInput { pub fn all_sled_ids( &self, filter: SledFilter, - ) -> impl Iterator> + '_ { + ) -> impl Iterator + '_ { self.all_sleds(filter).map(|(sled_id, _)| sled_id) } pub fn all_sled_resources( &self, filter: SledFilter, - ) -> impl Iterator, &SledResources)> + '_ { + ) -> impl Iterator + '_ { self.all_sleds(filter) .map(|(sled_id, details)| (sled_id, &details.resources)) } - pub fn sled_policy( - &self, - sled_id: &TypedUuid, - ) -> Option { + pub fn sled_policy(&self, sled_id: &SledUuid) -> Option { self.sleds.get(sled_id).map(|details| details.policy) } - pub fn sled_resources( - &self, - sled_id: &TypedUuid, - ) -> Option<&SledResources> { + pub fn sled_resources(&self, sled_id: &SledUuid) -> Option<&SledResources> { self.sleds.get(sled_id).map(|details| &details.resources) } @@ -354,15 +346,12 @@ impl PlanningInput { #[derive(Debug, thiserror::Error)] pub enum PlanningInputBuildError { #[error("duplicate sled ID: {0}")] - DuplicateSledId(TypedUuid), + DuplicateSledId(SledUuid), #[error("Omicron zone {zone_id} already has an external IP ({ip:?})")] - DuplicateOmicronZoneExternalIp { - zone_id: TypedUuid, - ip: ExternalIp, - }, + DuplicateOmicronZoneExternalIp { zone_id: OmicronZoneUuid, ip: ExternalIp }, #[error("Omicron zone {zone_id} already has a NIC ({nic:?})")] DuplicateOmicronZoneNic { - zone_id: TypedUuid, + zone_id: OmicronZoneUuid, nic: ServiceNetworkInterface, }, } @@ -373,10 +362,9 @@ pub struct PlanningInputBuilder { policy: Policy, internal_dns_version: Generation, external_dns_version: Generation, - sleds: BTreeMap, SledDetails>, - omicron_zone_external_ips: BTreeMap, ExternalIp>, - omicron_zone_nics: - BTreeMap, ServiceNetworkInterface>, + sleds: BTreeMap, + omicron_zone_external_ips: BTreeMap, + omicron_zone_nics: BTreeMap, } impl PlanningInputBuilder { @@ -411,7 +399,7 @@ impl PlanningInputBuilder { pub fn add_sled( &mut self, - sled_id: TypedUuid, + sled_id: SledUuid, details: SledDetails, ) -> Result<(), PlanningInputBuildError> { match self.sleds.entry(sled_id) { @@ -427,7 +415,7 @@ impl PlanningInputBuilder { pub fn add_omicron_zone_external_ip( &mut self, - zone_id: TypedUuid, + zone_id: OmicronZoneUuid, ip: ExternalIp, ) -> Result<(), PlanningInputBuildError> { match self.omicron_zone_external_ips.entry(zone_id) { @@ -446,7 +434,7 @@ impl PlanningInputBuilder { pub fn add_omicron_zone_nic( &mut self, - zone_id: TypedUuid, + zone_id: OmicronZoneUuid, nic: ServiceNetworkInterface, ) -> Result<(), PlanningInputBuildError> { match self.omicron_zone_nics.entry(zone_id) { @@ -467,13 +455,11 @@ impl PlanningInputBuilder { &mut self.policy } - pub fn sleds(&mut self) -> &BTreeMap, SledDetails> { + pub fn sleds(&mut self) -> &BTreeMap { &self.sleds } - pub fn sleds_mut( - &mut self, - ) -> &mut BTreeMap, SledDetails> { + pub fn sleds_mut(&mut self) -> &mut BTreeMap { &mut self.sleds } diff --git a/uuid-kinds/README.adoc b/uuid-kinds/README.adoc index 515ae02b8c..0e67cc0e67 100644 --- a/uuid-kinds/README.adoc +++ b/uuid-kinds/README.adoc @@ -38,6 +38,20 @@ Then, start changing your UUID types over. It's generally easiest to change the - If your UUID is widely used, you may need to break your change up across several commits. It's easiest to carve out a section of your code to make changes in, and use the `GenericUuid` conversions into and out of this code. For an example, see the ongoing conversion for sled UUIDs in https://github.com/oxidecomputer/omicron/pull/5404[#5404] and https://github.com/oxidecomputer/omicron/pull/5488[#5488]. +[IMPORTANT] +.Using type aliases +==== +For `TypedUuid`, prefer to use the type aliases. For example, `SledUuid` rather than `TypedUuid`. + +Some older code is still being ported over to type aliases; see https://github.com/oxidecomputer/omicron/pull/5511[#5511] for an example. + +Other types that use the same kinds, like `nexus_db_model::DbTypedUuid` and +`typed_rng::TypedUuidRng`, don't have aliases defined for them. That's +because their frequency of use falls below the threshold at which the benefits +of type aliases outweigh the costs. + +==== + Some special cases: . If part of your change is at an OpenAPI schema boundary, then you generally also want to have clients use the same UUID types. The best way to do that currently is to use a `replace` directive, as in https://github.com/oxidecomputer/omicron/pull/5135[#5135]. There is ongoing design work to make this work automatically, in https://github.com/oxidecomputer/typify/issues/503[typify#503] and elsewhere. From 4c2e4b5c5d03785fe5eae91896a48dad377efa11 Mon Sep 17 00:00:00 2001 From: bnaecker Date: Thu, 11 Apr 2024 20:58:59 -0700 Subject: [PATCH 125/334] Adding OxQL table operations for limiting data (#5478) - Resolves #5436 - Adds a `Limit` table op, for taking either the first or last `k` points from any number of timeseries. - Add OxQL grammar rules for parsing `first k` and `last k` into the table op types. - Add methods for coalescing the limit operations in a query, similar to coalescing predicates. This lets us push filters through limits (and vice versa), to implement predicate pushdown. - Include limits when fetching data from ClickHouse. This uses the `LIMIT BY` clause, which makes this extremely simple to implement in the database itself. That also reveals an unfortunate interaction between our table sort ordering and the `start_time` for cumulative metrics, specifically those which are created before NTP is synced. That can lead to incorrect results when using the `LIMIT BY` clause, since that takes the first few results. That means data with an early timestamp, and early (and random) start time, can come before the desired data with a later timestamp (but also later start time). In this one case, with a `last k` table op, we pay the cost of a CTE and an additional re-sorting of the data based on timestamp. --- oximeter/db/src/client/mod.rs | 9 +- oximeter/db/src/client/oxql.rs | 121 ++++++++- oximeter/db/src/oxql/ast/grammar.rs | 42 +++ oximeter/db/src/oxql/ast/table_ops/filter.rs | 55 ++++ oximeter/db/src/oxql/ast/table_ops/limit.rs | 263 +++++++++++++++++++ oximeter/db/src/oxql/ast/table_ops/mod.rs | 4 + oximeter/db/src/oxql/query/mod.rs | 238 +++++++++++++++-- oximeter/db/src/oxql/table.rs | 4 +- 8 files changed, 709 insertions(+), 27 deletions(-) create mode 100644 oximeter/db/src/oxql/ast/table_ops/limit.rs diff --git a/oximeter/db/src/client/mod.rs b/oximeter/db/src/client/mod.rs index e92518ae08..9a2b7b1bd3 100644 --- a/oximeter/db/src/client/mod.rs +++ b/oximeter/db/src/client/mod.rs @@ -871,7 +871,14 @@ impl Client { let response = self .client .post(&self.url) - .query(&[("output_format_json_quote_64bit_integers", "0")]) + .query(&[ + ("output_format_json_quote_64bit_integers", "0"), + // TODO-performance: This is needed to get the correct counts of + // rows/bytes accessed during a query, but implies larger memory + // consumption on the server and higher latency for the request. + // We may want to sacrifice accuracy of those counts. + ("wait_end_of_query", "1"), + ]) .body(sql) .send() .await diff --git a/oximeter/db/src/client/oxql.rs b/oximeter/db/src/client/oxql.rs index 4f4d1daeeb..d1ce131581 100644 --- a/oximeter/db/src/client/oxql.rs +++ b/oximeter/db/src/client/oxql.rs @@ -12,6 +12,8 @@ use crate::model; use crate::oxql; use crate::oxql::ast::table_ops::filter; use crate::oxql::ast::table_ops::filter::Filter; +use crate::oxql::ast::table_ops::limit::Limit; +use crate::oxql::ast::table_ops::limit::LimitKind; use crate::query::field_table_name; use crate::Error; use crate::Metric; @@ -147,6 +149,7 @@ impl Client { parsed_query, &mut total_rows_fetched, None, + None, ) .await; probes::oxql__query__done!(|| (&id, &query_id)); @@ -285,6 +288,7 @@ impl Client { query: oxql::Query, total_rows_fetched: &mut u64, outer_predicates: Option, + outer_limit: Option, ) -> Result { let split = query.split(); if let oxql::ast::SplitQuery::Nested { subqueries, transformations } = @@ -299,6 +303,7 @@ impl Client { // the transformation portion of this nested query. let new_outer_predicates = query.coalesced_predicates(outer_predicates.clone()); + let new_outer_limit = query.coalesced_limits(outer_limit); // Run each subquery recursively, and extend the results // accordingly. @@ -313,6 +318,7 @@ impl Client { subq, total_rows_fetched, new_outer_predicates.clone(), + new_outer_limit, ) .await?; query_summaries.extend(res.query_summaries); @@ -371,6 +377,13 @@ impl Client { "outer_predicates" => ?&outer_predicates, "coalesced" => ?&preds, ); + let limit = query.coalesced_limits(outer_limit); + debug!( + query_log, + "coalesced limit operations from flat query"; + "outer_limit" => ?&outer_limit, + "coalesced" => ?&limit, + ); // We generally run a few SQL queries for each OxQL query: // @@ -482,6 +495,7 @@ impl Client { query_log, &schema, &consistent_key_groups, + limit, total_rows_fetched, ) .await?; @@ -536,6 +550,7 @@ impl Client { query_log: &Logger, schema: &TimeseriesSchema, consistent_key_groups: &[ConsistentKeyGroup], + limit: Option, total_rows_fetched: &mut u64, ) -> Result< (Vec, BTreeMap), @@ -560,6 +575,7 @@ impl Client { let measurements_query = self.measurements_query( schema, &key_group_chunk, + limit, total_rows_fetched, )?; let (summary, body) = @@ -645,6 +661,7 @@ impl Client { &self, schema: &TimeseriesSchema, consistent_key_groups: &[ConsistentKeyGroup], + limit: Option, total_rows_fetched: &mut u64, ) -> Result { use std::fmt::Write; @@ -722,14 +739,64 @@ impl Client { // - timestamp // // We care most about the timestamp ordering, since that is assumed (and - // asserted) by downstream table operations. We use the full sort order - // of the table, however, to make things the most efficient. + // asserted) by downstream table operations. + // + // Note that although the tables are sorted by start_time, we _omit_ + // that if the query includes a limiting operation, like `first k`. This + // is an unfortunate interaction between the `LIMIT BY` clause that + // implements this in ClickHouse and the fact that the start times for + // some metrics are not monotonic. In particular, those metrics + // collected before a sled syncs with upstream NTP servers may have + // wildly inaccurate start times. Using the `LIMIT BY` clause in + // ClickHouse along with this sort order means we may end up taking the + // latest samples from a block of metrics with an early start time, even + // if there is a sample with a globally later, and accurate, timestamp, + // but with a start_time _after_ that previous block. query.push_str(" ORDER BY timeseries_key"); - if schema.datum_type.is_cumulative() { + if schema.datum_type.is_cumulative() && limit.is_none() { query.push_str(", start_time"); } query.push_str(", timestamp"); + // If provided, push a `LIMIT BY` clause, which implements the `first` + // or `last` table operations directly in ClickHouse. + // + // This clause limits the number of rows _within each group_, which here + // is always the `timeseries_key`. Note that the clause is completely + // independent of the the traditional SQL `LIMIT` clause, pushed below + // to avoid selecting too many rows at once. + if let Some(limit) = limit { + // If this limit takes the _last_ samples, we need to invert the + // sorting by timestamp to be descending. + let is_last = matches!(limit.kind, LimitKind::Last); + if is_last { + query.push_str(" DESC"); + } + + // In either case, add the limit-by clause itself. + query.push_str(" LIMIT "); + write!(query, "{}", limit.count).unwrap(); + query.push_str(" BY timeseries_key"); + + // Possibly invert the timestamp ordering again. + // + // To implement a `last k` operation, above we sort by descending + // timestamps and use the `LIMIT k BY timeseries_key` clause. + // However, this inverts the ordering by timestamp that we need for + // all downstream operations to work correctly. + // + // Restore that ordering here, by putting the now-complete query + // inside a CTE and selecting from that ordered by timestamp. Note + if is_last { + query = format!( + "WITH another_sort_bites_the_dust \ + AS ({query}) \ + SELECT * FROM another_sort_bites_the_dust \ + ORDER BY timeseries_key, timestamp" + ); + } + } + // Push a limit clause, which restricts the number of records we could // return. // @@ -1527,4 +1594,52 @@ mod tests { the remaining 2 input keys", ); } + + #[tokio::test] + async fn test_limit_operations() { + let ctx = setup_oxql_test("test_limit_operations").await; + + // Specify exactly one timeseries we _want_ to fetch, by picking the + // first timeseries we inserted. + let ((expected_target, expected_foo), expected_samples) = + ctx.test_data.samples_by_timeseries.first_key_value().unwrap(); + let query = format!( + "get some_target:some_metric | filter {} | first 1", + exact_filter_for(expected_target, *expected_foo) + ); + let result = ctx + .client + .oxql_query(&query) + .await + .expect("failed to run OxQL query"); + assert_eq!(result.tables.len(), 1, "Should be exactly 1 table"); + let table = result.tables.get(0).unwrap(); + assert_eq!( + table.n_timeseries(), + 1, + "Should have fetched exactly the target timeseries" + ); + assert!( + table.iter().all(|t| t.points.len() == 1), + "Should have fetched exactly 1 point for this timeseries", + ); + + let expected_timeseries = + find_timeseries_in_table(&table, expected_target, expected_foo) + .expect("Table did not contain expected timeseries"); + let measurements: Vec<_> = expected_samples + .iter() + .take(1) + .map(|s| s.measurement.clone()) + .collect(); + let expected_points = Points::delta_from_cumulative(&measurements) + .expect("failed to build expected points from measurements"); + assert_eq!( + expected_points, expected_timeseries.points, + "Did not reconstruct the correct points for the one \ + timeseries the query fetched" + ); + + ctx.cleanup_successful().await; + } } diff --git a/oximeter/db/src/oxql/ast/grammar.rs b/oximeter/db/src/oxql/ast/grammar.rs index 23db5224da..c9e646e58d 100644 --- a/oximeter/db/src/oxql/ast/grammar.rs +++ b/oximeter/db/src/oxql/ast/grammar.rs @@ -26,6 +26,8 @@ peg::parser! { use crate::oxql::ast::table_ops::BasicTableOp; use crate::oxql::ast::table_ops::TableOp; use crate::oxql::ast::table_ops::group_by::Reducer; + use crate::oxql::ast::table_ops::limit::Limit; + use crate::oxql::ast::table_ops::limit::LimitKind; use crate::oxql::ast::literal::duration_consts; use oximeter::TimeseriesName; use std::time::Duration; @@ -531,12 +533,29 @@ peg::parser! { Align { method, period } } + /// Parse a limit kind + pub rule limit_kind() -> LimitKind + = "first" { LimitKind::First } + / "last" { LimitKind::Last } + + /// Parse a limit table operation + pub rule limit() -> Limit + = kind:limit_kind() _ count:integer_literal_impl() + {? + if count <= 0 || count > usize::MAX as i128 { + return Err("limit count must be a nonzero usize") + }; + let count = std::num::NonZeroUsize::new(count.try_into().unwrap()).unwrap(); + Ok(Limit { kind, count }) + } + pub(super) rule basic_table_op() -> TableOp = g:"get" _ t:timeseries_name() { TableOp::Basic(BasicTableOp::Get(t)) } / f:filter() { TableOp::Basic(BasicTableOp::Filter(f)) } / g:group_by() { TableOp::Basic(BasicTableOp::GroupBy(g)) } / join() { TableOp::Basic(BasicTableOp::Join(Join)) } / a:align() { TableOp::Basic(BasicTableOp::Align(a)) } + / l:limit() { TableOp::Basic(BasicTableOp::Limit(l)) } pub(super) rule grouped_table_op() -> TableOp = "{" _? ops:(query() ++ grouped_table_op_delim()) _? "}" @@ -649,6 +668,8 @@ mod tests { use crate::oxql::ast::table_ops::filter::FilterExpr; use crate::oxql::ast::table_ops::filter::SimpleFilter; use crate::oxql::ast::table_ops::group_by::Reducer; + use crate::oxql::ast::table_ops::limit::Limit; + use crate::oxql::ast::table_ops::limit::LimitKind; use chrono::NaiveDate; use chrono::NaiveDateTime; use chrono::NaiveTime; @@ -1305,4 +1326,25 @@ mod tests { .unwrap(); assert_eq!(negated, expected, "Failed to handle multiple negations"); } + + #[test] + fn test_limiting_table_ops() { + assert_eq!( + query_parser::limit("first 100").unwrap(), + Limit { kind: LimitKind::First, count: 100.try_into().unwrap() }, + ); + assert_eq!( + query_parser::limit("last 100").unwrap(), + Limit { kind: LimitKind::Last, count: 100.try_into().unwrap() }, + ); + + assert!(query_parser::limit(&format!( + "first {}", + usize::MAX as i128 + 1 + )) + .is_err()); + assert!(query_parser::limit("first 0").is_err()); + assert!(query_parser::limit("first -1").is_err()); + assert!(query_parser::limit("first \"foo\"").is_err()); + } } diff --git a/oximeter/db/src/oxql/ast/table_ops/filter.rs b/oximeter/db/src/oxql/ast/table_ops/filter.rs index c76a4e713f..e5963fe69c 100644 --- a/oximeter/db/src/oxql/ast/table_ops/filter.rs +++ b/oximeter/db/src/oxql/ast/table_ops/filter.rs @@ -10,6 +10,8 @@ use crate::oxql::ast::cmp::Comparison; use crate::oxql::ast::ident::Ident; use crate::oxql::ast::literal::Literal; use crate::oxql::ast::logical_op::LogicalOp; +use crate::oxql::ast::table_ops::limit::Limit; +use crate::oxql::ast::table_ops::limit::LimitKind; use crate::oxql::point::DataType; use crate::oxql::point::MetricType; use crate::oxql::point::Points; @@ -425,6 +427,59 @@ impl Filter { fn is_simple(&self) -> bool { matches!(self.expr, FilterExpr::Simple(_)) } + + /// Return true if this filtering expression can be reordered around a + /// `limit` table operation. + /// + /// We attempt to push filtering expressions down to the database as much as + /// possible. This involves moving filters "through" an OxQL pipeline, so + /// that we can run them as early as possible, before other operations like + /// a `group_by`. + /// + /// In some cases, but not all, filters interact with limiting table + /// operations, which take the first or last k points from a timeseries. + /// Specifically, we can move a filter around a limit if: + /// + /// - The filter does not refer to timestamps at all + /// - The filter's comparison against timestamps restricts them in the same + /// "direction" as the limit operation. A timestamp filter which takes later + /// values, e.g., `timestamp > t0` can be moved around a `last k` operation; + /// a filter which takes earlier values, e.g., `timestamp < t0` can be moved + /// around a `first k` operation. + /// + /// All other situations return false. Consider a query with `filter + /// timestamp < t0` and `last k`. Those return different results depending + /// on which is run first: + /// + /// - Running the filter then the limit returns the last values before `t0`, + /// so the "end" of that chunk of time. + /// - Running the limit then filter returns the values in the last `k` of + /// the entire timeseries where the timestamp is before `t0`. That set can + /// be empty, if all the last `k` samples have a timestamp _after_ `t0`, + /// whereas the reverse is may well _not_ be empty. + pub(crate) fn can_reorder_around(&self, limit: &Limit) -> bool { + match &self.expr { + FilterExpr::Simple(SimpleFilter { ident, cmp, .. }) => { + if ident.as_str() != special_idents::TIMESTAMP { + return true; + } + let is_compatible = match limit.kind { + LimitKind::First => { + matches!(cmp, Comparison::Lt | Comparison::Le) + } + LimitKind::Last => { + matches!(cmp, Comparison::Gt | Comparison::Ge) + } + }; + self.negated ^ is_compatible + } + FilterExpr::Compound(CompoundFilter { left, right, .. }) => { + let left = left.can_reorder_around(limit); + let right = right.can_reorder_around(limit); + self.negated ^ (left && right) + } + } + } } /// Return the names of the implicit fields / columns that a filter can apply diff --git a/oximeter/db/src/oxql/ast/table_ops/limit.rs b/oximeter/db/src/oxql/ast/table_ops/limit.rs new file mode 100644 index 0000000000..46d19b9cdc --- /dev/null +++ b/oximeter/db/src/oxql/ast/table_ops/limit.rs @@ -0,0 +1,263 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! An AST node apply limiting timeseries operations. + +// Copyright 2024 Oxide Computer Company + +use crate::oxql::point::Points; +use crate::oxql::point::ValueArray; +use crate::oxql::point::Values; +use crate::oxql::Error; +use crate::oxql::Table; +use crate::oxql::Timeseries; +use std::num::NonZeroUsize; + +/// The kind of limiting operation +#[derive(Clone, Copy, Debug, PartialEq)] +pub enum LimitKind { + /// Limit the timeseries to the first points. + First, + /// Limit the timeseries to the last points. + Last, +} + +/// A table operation limiting a timeseries to a number of points. +#[derive(Clone, Copy, Debug, PartialEq)] +pub struct Limit { + /// The kind of limit + pub kind: LimitKind, + /// The number of points the timeseries is limited to. + pub count: NonZeroUsize, +} +impl Limit { + /// Apply the limit operation to the input tables. + pub(crate) fn apply(&self, tables: &[Table]) -> Result, Error> { + if tables.is_empty() { + return Ok(vec![]); + } + + tables + .iter() + .map(|table| { + let timeseries = table.iter().map(|timeseries| { + let input_points = ×eries.points; + + // Compute the slice indices for this timeseries. + let (start, end) = match self.kind { + LimitKind::First => { + // The count in the limit operation should not be + // larger than the number of data points. + let end = input_points.len().min(self.count.get()); + (0, end) + } + LimitKind::Last => { + // When taking the last k points, we need to + // subtract the count from the end of the array, + // taking care that we don't panic if the count is + // larger than the number of data points. + let start = input_points + .len() + .saturating_sub(self.count.get()); + let end = input_points.len(); + (start, end) + } + }; + + // Slice the various data arrays. + let start_times = input_points + .start_times + .as_ref() + .map(|s| s[start..end].to_vec()); + let timestamps = + input_points.timestamps[start..end].to_vec(); + let values = input_points + .values + .iter() + .map(|vals| { + let values = match &vals.values { + ValueArray::Integer(inner) => { + ValueArray::Integer( + inner[start..end].to_vec(), + ) + } + ValueArray::Double(inner) => { + ValueArray::Double( + inner[start..end].to_vec(), + ) + } + ValueArray::Boolean(inner) => { + ValueArray::Boolean( + inner[start..end].to_vec(), + ) + } + ValueArray::String(inner) => { + ValueArray::String( + inner[start..end].to_vec(), + ) + } + ValueArray::IntegerDistribution(inner) => { + ValueArray::IntegerDistribution( + inner[start..end].to_vec(), + ) + } + ValueArray::DoubleDistribution(inner) => { + ValueArray::DoubleDistribution( + inner[start..end].to_vec(), + ) + } + }; + Values { values, metric_type: vals.metric_type } + }) + .collect(); + let points = Points { start_times, timestamps, values }; + Timeseries { + fields: timeseries.fields.clone(), + points, + alignment: timeseries.alignment, + } + }); + Table::from_timeseries(table.name(), timeseries) + }) + .collect() + } +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::oxql::point::{DataType, MetricType}; + use chrono::Utc; + use oximeter::FieldValue; + use std::{collections::BTreeMap, time::Duration}; + + fn test_tables() -> Vec
{ + let mut fields = BTreeMap::new(); + fields.insert("foo".to_string(), FieldValue::from("bar")); + fields.insert("bar".to_string(), FieldValue::from(1u8)); + + let now = Utc::now(); + let timestamps = vec![ + now - Duration::from_secs(4), + now - Duration::from_secs(3), + now - Duration::from_secs(2), + ]; + + let mut timeseries = Timeseries::new( + fields.clone().into_iter(), + DataType::Integer, + MetricType::Gauge, + ) + .unwrap(); + timeseries.points.timestamps = timestamps.clone(); + timeseries.points.values[0].values.as_integer_mut().unwrap().extend([ + Some(1), + Some(2), + Some(3), + ]); + let table1 = + Table::from_timeseries("first", std::iter::once(timeseries)) + .unwrap(); + + let mut timeseries = Timeseries::new( + fields.clone().into_iter(), + DataType::Integer, + MetricType::Gauge, + ) + .unwrap(); + timeseries.points.timestamps = timestamps.clone(); + timeseries.points.values[0].values.as_integer_mut().unwrap().extend([ + Some(4), + Some(5), + Some(6), + ]); + let table2 = + Table::from_timeseries("second", std::iter::once(timeseries)) + .unwrap(); + + vec![table1, table2] + } + + #[test] + fn test_first_k() { + test_limit_impl(LimitKind::First); + } + + #[test] + fn test_last_k() { + test_limit_impl(LimitKind::Last); + } + + fn test_limit_impl(kind: LimitKind) { + let (start, end) = match kind { + LimitKind::First => (0, 2), + LimitKind::Last => (1, 3), + }; + + // Create test data and apply limit operation. + let tables = test_tables(); + let limit = Limit { kind, count: 2.try_into().unwrap() }; + let limited = limit.apply(&tables).expect("This should be infallible"); + assert_eq!( + tables.len(), + limited.len(), + "Limiting should not change the number of tables" + ); + + // Should apply to all tables the same way. + for (table, limited_table) in tables.iter().zip(limited.iter()) { + assert_eq!( + table.name(), + limited_table.name(), + "Limited table whould have the same name" + ); + + // Compare all timeseries. + for (timeseries, limited_timeseries) in + table.iter().zip(limited_table.iter()) + { + // The fields and basic shape should not change. + assert_eq!( + timeseries.fields, limited_timeseries.fields, + "Limited table should have the same fields" + ); + assert_eq!( + timeseries.alignment, limited_timeseries.alignment, + "Limited timeseries should have the same alignment" + ); + assert_eq!( + timeseries.points.dimensionality(), + limited_timeseries.points.dimensionality(), + "Limited timeseries should have the same number of dimensions" + ); + + // Compare data points themselves. + // + // These depend on the limit operation. + let points = ×eries.points; + let limited_points = &limited_timeseries.points; + assert_eq!(points.start_times, limited_points.start_times); + assert_eq!( + points.timestamps[start..end], + limited_points.timestamps + ); + assert_eq!( + limited_points.values[0].values.as_integer().unwrap(), + &points.values[0].values.as_integer().unwrap()[start..end], + "Points should be limited to [{start}..{end}]", + ); + } + } + + // Check that limiting the table to more points than exist returns the + // whole table. + let limit = Limit { kind, count: 100.try_into().unwrap() }; + let limited = limit.apply(&tables).expect("This should be infallible"); + assert_eq!( + limited, + tables, + "Limiting tables to more than their length should return the same thing" + ); + } +} diff --git a/oximeter/db/src/oxql/ast/table_ops/mod.rs b/oximeter/db/src/oxql/ast/table_ops/mod.rs index d9930962f8..46f5106a08 100644 --- a/oximeter/db/src/oxql/ast/table_ops/mod.rs +++ b/oximeter/db/src/oxql/ast/table_ops/mod.rs @@ -11,11 +11,13 @@ pub mod filter; pub mod get; pub mod group_by; pub mod join; +pub mod limit; use self::align::Align; use self::filter::Filter; use self::group_by::GroupBy; use self::join::Join; +use self::limit::Limit; use crate::oxql::ast::Query; use crate::oxql::Error; use crate::oxql::Table; @@ -31,6 +33,7 @@ pub enum BasicTableOp { GroupBy(GroupBy), Join(Join), Align(Align), + Limit(Limit), } impl BasicTableOp { @@ -45,6 +48,7 @@ impl BasicTableOp { BasicTableOp::GroupBy(g) => g.apply(tables), BasicTableOp::Join(j) => j.apply(tables), BasicTableOp::Align(a) => a.apply(tables, query_end), + BasicTableOp::Limit(l) => l.apply(tables), } } } diff --git a/oximeter/db/src/oxql/query/mod.rs b/oximeter/db/src/oxql/query/mod.rs index bb1c0986fe..1c4383d68d 100644 --- a/oximeter/db/src/oxql/query/mod.rs +++ b/oximeter/db/src/oxql/query/mod.rs @@ -11,6 +11,7 @@ use super::ast::logical_op::LogicalOp; use super::ast::table_ops::filter::CompoundFilter; use super::ast::table_ops::filter::FilterExpr; use super::ast::table_ops::group_by::GroupBy; +use super::ast::table_ops::limit::Limit; use super::ast::table_ops::BasicTableOp; use super::ast::table_ops::TableOp; use super::ast::SplitQuery; @@ -96,7 +97,7 @@ impl Query { self.parsed.transformations() } - /// Return the set of all predicates in the query, coalesced. + /// Return predicates which can be pushed down into the database, if any. /// /// Query optimization is a large topic. There are few rules, and many /// heuristics. However, one of those is extremely useful for our case: @@ -132,6 +133,14 @@ impl Query { /// /// Note that this may return `None`, in the case where there are zero /// predicates of any kind. + /// + /// # Limit operations + /// + /// OxQL table operations which limit data, such as `first k` or `last k`, + /// can also be pushed down into the database in certain cases. Since they + /// change the number of points, but not the timeseries, they cannot be + /// pushed through an `align` operation. But they _can_ be pushed through + /// grouping or other filters. // // Pushing filters through a group by. Consider the following data: // @@ -256,10 +265,11 @@ impl Query { // So that also works fine. pub(crate) fn coalesced_predicates( &self, - mut outer: Option, + outer: Option, ) -> Option { - let maybe_filter = self.transformations().iter().rev().fold( - None, + self.transformations().iter().rev().fold( + // We'll start from the predicates passed from the outer query. + outer, |maybe_filter, next_tr| { // Transformations only return basic ops, since all the // subqueries must be at the prefix of the query. @@ -269,13 +279,6 @@ impl Query { match op { BasicTableOp::GroupBy(GroupBy { identifiers, .. }) => { - // We may have been passed predicates from an outer - // query. Those also need to be restricted, if we're - // trying to push them through a group_by operation. - outer = outer.as_ref().and_then(|outer| { - restrict_filter_idents(outer, identifiers) - }); - // Only push through columns referred to in the group by // itself, which replaces the current filter. maybe_filter.as_ref().and_then(|current| { @@ -290,20 +293,84 @@ impl Query { Some(filter.clone()) } } + BasicTableOp::Limit(limit) => { + // A filter can be pushed through a limiting table + // operation in a few cases, see `can_reorder_around` + // for details. + maybe_filter.and_then(|filter| { + if filter.can_reorder_around(limit) { + Some(filter) + } else { + None + } + }) + } _ => maybe_filter, } }, - ); + ) + } - // Merge in any predicates passed from an outer query, which may have - // been restricted as we moved through group_by operations. - match (outer, maybe_filter) { - (None, any) => any, - (Some(outer), None) => Some(outer), - (Some(outer), Some(inner)) => { - Some(outer.merge(&inner, LogicalOp::And)) - } - } + /// Coalesce any limiting table operations, if possible. + pub(crate) fn coalesced_limits( + &self, + maybe_limit: Option, + ) -> Option { + self.transformations().iter().rev().fold( + maybe_limit, + |maybe_limit, next_tr| { + // Transformations only return basic ops, since all the + // subqueries must be at the prefix of the query. + let TableOp::Basic(op) = next_tr else { + unreachable!(); + }; + + match op { + BasicTableOp::Filter(filter) => { + // A limit can be pushed through a filter operation, in + // only a few cases, see `can_reorder_around` for + // details. + maybe_limit.and_then(|limit| { + if filter.can_reorder_around(&limit) { + Some(limit) + } else { + None + } + }) + } + BasicTableOp::Limit(limit) => { + // It is possible to "merge" limits if they're of the + // same kind. To do so, we simply take the one with the + // smaller count. For example + // + // ... | first 10 | first 5 + // + // is equivalent to just + // + // ... | first 5 + let new_limit = if let Some(current_limit) = maybe_limit + { + if limit.kind == current_limit.kind { + Limit { + kind: limit.kind, + count: limit.count.min(current_limit.count), + } + } else { + // If the limits are of different kinds, we replace + // the current one, i.e., drop it and start passing + // through the inner one. + *limit + } + } else { + // No outer limit at all, simply take this one. + *limit + }; + Some(new_limit) + } + _ => maybe_limit, + } + }, + ) } pub(crate) fn split(&self) -> SplitQuery { @@ -370,6 +437,8 @@ mod tests { use crate::oxql::ast::table_ops::filter::FilterExpr; use crate::oxql::ast::table_ops::filter::SimpleFilter; use crate::oxql::ast::table_ops::join::Join; + use crate::oxql::ast::table_ops::limit::Limit; + use crate::oxql::ast::table_ops::limit::LimitKind; use crate::oxql::ast::table_ops::BasicTableOp; use crate::oxql::ast::table_ops::TableOp; use crate::oxql::ast::SplitQuery; @@ -834,4 +903,131 @@ mod tests { ); } } + + #[test] + fn test_coalesce_limits() { + let query = Query::new("get a:b | last 5").unwrap(); + let lim = query.coalesced_limits(None).expect("Should have a limit"); + assert_eq!( + lim.kind, + LimitKind::Last, + "This limit op has the wrong kind" + ); + assert_eq!(lim.count.get(), 5, "Limit has the wrong count"); + } + + #[test] + fn test_coalesce_limits_merge_same_kind_within_query() { + let qs = ["get a:b | last 10 | last 5", "get a:b | last 5 | last 10"]; + for q in qs { + let query = Query::new(q).unwrap(); + let lim = + query.coalesced_limits(None).expect("Should have a limit"); + assert_eq!( + lim.kind, + LimitKind::Last, + "This limit op has the wrong kind" + ); + assert_eq!( + lim.count.get(), + 5, + "Should have merged two limits of the same kind, \ + taking the one with the smaller count" + ); + } + } + + #[test] + fn test_coalesce_limits_do_not_merge_different_kinds_within_query() { + let qs = + ["get a:b | first 10 | last 10", "get a:b | last 10 | first 10"]; + let kinds = [LimitKind::First, LimitKind::Last]; + for (q, kind) in qs.iter().zip(kinds) { + let query = Query::new(q).unwrap(); + let lim = + query.coalesced_limits(None).expect("Should have a limit"); + assert_eq!(lim.kind, kind, "This limit op has the wrong kind"); + assert_eq!(lim.count.get(), 10); + } + } + + #[test] + fn test_coalesce_limits_rearrange_around_timestamp_filters() { + let qs = [ + "get a:b | filter timestamp < @now() | first 10", + "get a:b | filter timestamp > @now() | last 10", + ]; + let kinds = [LimitKind::First, LimitKind::Last]; + for (q, kind) in qs.iter().zip(kinds) { + let query = Query::new(q).unwrap(); + let lim = query.coalesced_limits(None).expect( + "This limit op should have been re-arranged around \ + a compatible timestamp filter", + ); + assert_eq!(lim.kind, kind, "This limit op has the wrong kind"); + assert_eq!(lim.count.get(), 10); + } + } + + #[test] + fn test_coalesce_limits_do_not_rearrange_around_incompatible_timestamp_filters( + ) { + let qs = [ + "get a:b | filter timestamp < @now() | last 10", + "get a:b | filter timestamp > @now() | first 10", + ]; + for q in qs { + let query = Query::new(q).unwrap(); + assert!( + query.coalesced_limits(None).is_none(), + "This limit op should have be merged around an \ + incompatible timestamp filter" + ); + } + } + + #[test] + fn test_coalesce_limits_merge_from_outer_query() { + let query = Query::new("get a:b | last 10").unwrap(); + let outer = + Limit { kind: LimitKind::Last, count: 5.try_into().unwrap() }; + let lim = query + .coalesced_limits(Some(outer)) + .expect("Should have a limit here"); + assert_eq!(lim.kind, LimitKind::Last, "Limit has the wrong kind"); + assert_eq!( + lim.count.get(), + 5, + "Did not pass through outer limit correctly" + ); + } + + #[test] + fn test_coalesce_limits_do_not_merge_different_kind_from_outer_query() { + let query = Query::new("get a:b | last 10").unwrap(); + let outer = + Limit { kind: LimitKind::First, count: 5.try_into().unwrap() }; + let lim = query + .coalesced_limits(Some(outer)) + .expect("Should have a limit here"); + assert_eq!(lim.kind, LimitKind::Last, "Limit has the wrong kind"); + assert_eq!( + lim.count.get(), + 10, + "Inner limit of different kind should ignore the outer one" + ); + } + + #[test] + fn test_coalesce_limits_do_not_coalesce_incompatible_kind_from_outer_query() + { + let query = Query::new("get a:b | filter timestamp > @now()").unwrap(); + let outer = + Limit { kind: LimitKind::First, count: 5.try_into().unwrap() }; + assert!( + query.coalesced_limits(Some(outer)).is_none(), + "Should not coalesce a limit from the outer query, when the \ + inner query contains an incompatible timestamp filter" + ); + } } diff --git a/oximeter/db/src/oxql/table.rs b/oximeter/db/src/oxql/table.rs index 025935090b..2cd141d2fa 100644 --- a/oximeter/db/src/oxql/table.rs +++ b/oximeter/db/src/oxql/table.rs @@ -26,7 +26,7 @@ use std::hash::Hasher; /// /// This includes the typed key-value pairs that uniquely identify it, and the /// set of timestamps and data values from it. -#[derive(Clone, Debug, Deserialize, JsonSchema, Serialize)] +#[derive(Clone, Debug, Deserialize, JsonSchema, PartialEq, Serialize)] pub struct Timeseries { pub fields: BTreeMap, pub points: Points, @@ -140,7 +140,7 @@ impl Timeseries { /// A table is the result of an OxQL query. It contains a name, usually the name /// of the timeseries schema from which the data is derived, and any number of /// timeseries, which contain the actual data. -#[derive(Clone, Debug, Deserialize, JsonSchema, Serialize)] +#[derive(Clone, Debug, Deserialize, JsonSchema, PartialEq, Serialize)] pub struct Table { // The name of the table. // From 3c4abcc1a3e44594b8189807983f166fff82d851 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Fri, 12 Apr 2024 05:24:56 +0000 Subject: [PATCH 126/334] chore(deps): update taiki-e/install-action digest to f8a64c9 (#5515) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR contains the following updates: | Package | Type | Update | Change | |---|---|---|---| | [taiki-e/install-action](https://togithub.com/taiki-e/install-action) | action | digest | [`0c6ec41` -> `f8a64c9`](https://togithub.com/taiki-e/install-action/compare/0c6ec41...f8a64c9) | --- ### Configuration 📅 **Schedule**: Branch creation - "after 8pm,before 6am" in timezone America/Los_Angeles, Automerge - "after 8pm,before 6am" in timezone America/Los_Angeles. 🚦 **Automerge**: Enabled. ♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. 🔕 **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box --- This PR has been generated by [Renovate Bot](https://togithub.com/renovatebot/renovate). Co-authored-by: oxide-renovate[bot] <146848827+oxide-renovate[bot]@users.noreply.github.com> --- .github/workflows/hakari.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/hakari.yml b/.github/workflows/hakari.yml index e04d3137fe..17beb56a48 100644 --- a/.github/workflows/hakari.yml +++ b/.github/workflows/hakari.yml @@ -24,7 +24,7 @@ jobs: with: toolchain: stable - name: Install cargo-hakari - uses: taiki-e/install-action@0c6ec41fd50792c0be884b73e6da4b56616c1c04 # v2 + uses: taiki-e/install-action@f8a64c940979268d3ab5ac99c178e718ed90977d # v2 with: tool: cargo-hakari - name: Check workspace-hack Cargo.toml is up-to-date From c1e354ffd7ce6a7647d47c5861eb2cc8c1ae2e7c Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Thu, 11 Apr 2024 23:09:10 -0700 Subject: [PATCH 127/334] chore(deps): update rust crate async-trait to 0.1.80 (#5516) --- Cargo.lock | 4 ++-- Cargo.toml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 1e92f4b283..21def2998a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -308,9 +308,9 @@ dependencies = [ [[package]] name = "async-trait" -version = "0.1.79" +version = "0.1.80" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a507401cad91ec6a857ed5513a2073c82a9b9048762b885bb98655b306964681" +checksum = "c6fa2087f2753a7da8cc1c0dbfcf89579dd57458e36769de5ac750b4671737ca" dependencies = [ "proc-macro2", "quote", diff --git a/Cargo.toml b/Cargo.toml index 12d9fe96e8..be0a738679 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -170,7 +170,7 @@ approx = "0.5.1" assert_matches = "1.5.0" assert_cmd = "2.0.14" async-bb8-diesel = { git = "https://github.com/oxidecomputer/async-bb8-diesel", rev = "ed7ab5ef0513ba303d33efd41d3e9e381169d59b" } -async-trait = "0.1.79" +async-trait = "0.1.80" atomicwrites = "0.4.3" authz-macros = { path = "nexus/authz-macros" } backoff = { version = "0.4.0", features = [ "tokio" ] } From e8c30c1269882398594087d32bb35030d3f47920 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Fri, 12 Apr 2024 10:31:39 +0000 Subject: [PATCH 128/334] chore(deps): update rust crate ref-cast to v1.0.22 (#5469) --- Cargo.lock | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 21def2998a..67c436eda2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -7494,18 +7494,18 @@ dependencies = [ [[package]] name = "ref-cast" -version = "1.0.20" +version = "1.0.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "acde58d073e9c79da00f2b5b84eed919c8326832648a5b109b3fce1bb1175280" +checksum = "c4846d4c50d1721b1a3bef8af76924eef20d5e723647333798c1b519b3a9473f" dependencies = [ "ref-cast-impl", ] [[package]] name = "ref-cast-impl" -version = "1.0.20" +version = "1.0.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f7473c2cfcf90008193dd0e3e16599455cb601a9fce322b5bb55de799664925" +checksum = "5fddb4f8d99b0a2ebafc65a87a69a7b9875e4b1ae1f00db265d300ef7f28bccc" dependencies = [ "proc-macro2", "quote", From a4029ab5ffe05825f7ded4af46bbfdd37a5b9cca Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Fri, 12 Apr 2024 10:59:41 +0000 Subject: [PATCH 129/334] chore(deps): update rust crate curve25519-dalek to v4.1.2 (#5461) --- Cargo.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 67c436eda2..7ae747d217 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1534,9 +1534,9 @@ dependencies = [ [[package]] name = "curve25519-dalek" -version = "4.1.1" +version = "4.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e89b8c6a2e4b1f45971ad09761aafb85514a84744b67a95e32c3cc1352d1f65c" +checksum = "0a677b8922c94e01bdbb12126b0bc852f00447528dee1782229af9c720c3f348" dependencies = [ "cfg-if", "cpufeatures", From f7eee157b177c1b1e45da642c666747903b1bd3c Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Fri, 12 Apr 2024 14:20:19 +0000 Subject: [PATCH 130/334] chore(deps): update rust crate newtype-uuid to 1.1.0 (#5519) --- Cargo.lock | 4 ++-- Cargo.toml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 7ae747d217..78de113a51 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4458,9 +4458,9 @@ dependencies = [ [[package]] name = "newtype-uuid" -version = "1.0.1" +version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a5ff2b31594942586c1520da8f1e5c705729ec67b3c2ad0fe459f0b576e4d9a" +checksum = "3526cb7c660872e401beaf3297f95f548ce3b4b4bdd8121b7c0713771d7c4a6e" dependencies = [ "schemars", "serde", diff --git a/Cargo.toml b/Cargo.toml index be0a738679..0249e19e01 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -456,7 +456,7 @@ zone = { version = "0.3", default-features = false, features = ["async", "sync"] # the kinds). However, uses of omicron-uuid-kinds _within omicron_ will have # std and the other features enabled because they'll refer to it via # omicron-uuid-kinds.workspace = true. -newtype-uuid = { version = "1.0.1", default-features = false } +newtype-uuid = { version = "1.1.0", default-features = false } omicron-uuid-kinds = { path = "uuid-kinds", features = ["serde", "schemars08", "uuid-v4"] } # NOTE: The test profile inherits from the dev profile, so settings under From e98af0f5579c6ed461e0cac078f632118f8fc4e5 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Fri, 12 Apr 2024 16:19:12 +0000 Subject: [PATCH 131/334] chore(deps): update taiki-e/install-action digest to dd9c3a3 (#5521) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR contains the following updates: | Package | Type | Update | Change | |---|---|---|---| | [taiki-e/install-action](https://togithub.com/taiki-e/install-action) | action | digest | [`f8a64c9` -> `dd9c3a3`](https://togithub.com/taiki-e/install-action/compare/f8a64c9...dd9c3a3) | --- ### Configuration 📅 **Schedule**: Branch creation - "after 8pm,before 6am" in timezone America/Los_Angeles, Automerge - "after 8pm,before 6am" in timezone America/Los_Angeles. 🚦 **Automerge**: Enabled. ♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. 🔕 **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box --- This PR has been generated by [Renovate Bot](https://togithub.com/renovatebot/renovate). Co-authored-by: oxide-renovate[bot] <146848827+oxide-renovate[bot]@users.noreply.github.com> --- .github/workflows/hakari.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/hakari.yml b/.github/workflows/hakari.yml index 17beb56a48..68e892f232 100644 --- a/.github/workflows/hakari.yml +++ b/.github/workflows/hakari.yml @@ -24,7 +24,7 @@ jobs: with: toolchain: stable - name: Install cargo-hakari - uses: taiki-e/install-action@f8a64c940979268d3ab5ac99c178e718ed90977d # v2 + uses: taiki-e/install-action@dd9c3a30915700b12ff7aa4d9e2492417156fde1 # v2 with: tool: cargo-hakari - name: Check workspace-hack Cargo.toml is up-to-date From fc4e1277d2500074c7e1775f1c3deb0c004e24be Mon Sep 17 00:00:00 2001 From: Rain Date: Fri, 12 Apr 2024 10:54:47 -0700 Subject: [PATCH 132/334] [nexus] move inventory collection IDs to TypedUuid (#5517) Quite mechanical in the end -- just updating types everywhere. --- Cargo.lock | 2 + clients/nexus-client/src/lib.rs | 1 + dev-tools/omdb/Cargo.toml | 1 + dev-tools/omdb/src/bin/omdb/db.rs | 9 +- dev-tools/omdb/src/bin/omdb/nexus.rs | 3 +- dev-tools/reconfigurator-cli/src/main.rs | 13 +- nexus/db-model/src/inventory.rs | 78 +++-- .../db-queries/src/db/datastore/inventory.rs | 285 ++++++++++-------- nexus/db-queries/src/db/datastore/mod.rs | 5 +- nexus/inventory/Cargo.toml | 1 + nexus/inventory/src/builder.rs | 7 +- nexus/src/app/deployment.rs | 3 +- nexus/src/internal_api/http_entrypoints.rs | 3 +- nexus/types/src/deployment.rs | 3 +- nexus/types/src/inventory.rs | 3 +- openapi/nexus-internal.json | 7 +- uuid-kinds/src/lib.rs | 1 + 17 files changed, 248 insertions(+), 177 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 78de113a51..fcb2a29c81 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4671,6 +4671,7 @@ dependencies = [ "nexus-types", "omicron-common", "omicron-sled-agent", + "omicron-uuid-kinds", "omicron-workspace-hack", "regex", "reqwest", @@ -5506,6 +5507,7 @@ dependencies = [ "omicron-nexus", "omicron-rpaths", "omicron-test-utils", + "omicron-uuid-kinds", "omicron-workspace-hack", "oximeter-client", "pq-sys", diff --git a/clients/nexus-client/src/lib.rs b/clients/nexus-client/src/lib.rs index 0a1a569f42..e083f5372e 100644 --- a/clients/nexus-client/src/lib.rs +++ b/clients/nexus-client/src/lib.rs @@ -35,6 +35,7 @@ progenitor::generate_api!( NewPasswordHash = omicron_passwords::NewPasswordHash, NetworkInterface = omicron_common::api::internal::shared::NetworkInterface, NetworkInterfaceKind = omicron_common::api::internal::shared::NetworkInterfaceKind, + TypedUuidForCollectionKind = omicron_uuid_kinds::CollectionUuid, TypedUuidForDownstairsKind = omicron_uuid_kinds::TypedUuid, TypedUuidForUpstairsKind = omicron_uuid_kinds::TypedUuid, TypedUuidForUpstairsRepairKind = omicron_uuid_kinds::TypedUuid, diff --git a/dev-tools/omdb/Cargo.toml b/dev-tools/omdb/Cargo.toml index 813a4b9552..a0c4e2fe56 100644 --- a/dev-tools/omdb/Cargo.toml +++ b/dev-tools/omdb/Cargo.toml @@ -32,6 +32,7 @@ nexus-db-queries.workspace = true nexus-reconfigurator-preparation.workspace = true nexus-types.workspace = true omicron-common.workspace = true +omicron-uuid-kinds.workspace = true oximeter-client.workspace = true # See omicron-rpaths for more about the "pq-sys" dependency. pq-sys = "*" diff --git a/dev-tools/omdb/src/bin/omdb/db.rs b/dev-tools/omdb/src/bin/omdb/db.rs index ecdb651295..a08fa9519c 100644 --- a/dev-tools/omdb/src/bin/omdb/db.rs +++ b/dev-tools/omdb/src/bin/omdb/db.rs @@ -91,6 +91,7 @@ use omicron_common::api::external::DataPageParams; use omicron_common::api::external::Generation; use omicron_common::api::external::InstanceState; use omicron_common::api::external::MacAddr; +use omicron_uuid_kinds::CollectionUuid; use sled_agent_client::types::VolumeConstructionRequest; use std::borrow::Cow; use std::cmp::Ordering; @@ -385,7 +386,7 @@ enum CollectionsCommands { #[derive(Debug, Args)] struct CollectionsShowArgs { /// id of the collection - id: Uuid, + id: CollectionUuid, /// show long strings in their entirety #[clap(long)] show_long_strings: bool, @@ -2918,7 +2919,7 @@ async fn cmd_db_inventory_collections_list( #[derive(Tabled)] #[tabled(rename_all = "SCREAMING_SNAKE_CASE")] struct CollectionRow { - id: Uuid, + id: CollectionUuid, started: String, took: String, nsps: i64, @@ -2967,7 +2968,7 @@ async fn cmd_db_inventory_collections_list( .num_milliseconds() ); rows.push(CollectionRow { - id: collection.id, + id: collection.id.into(), started: humantime::format_rfc3339_seconds( collection.time_started.into(), ) @@ -2991,7 +2992,7 @@ async fn cmd_db_inventory_collections_list( async fn cmd_db_inventory_collections_show( opctx: &OpContext, datastore: &DataStore, - id: Uuid, + id: CollectionUuid, long_string_formatter: LongStringFormatter, ) -> Result<(), anyhow::Error> { let collection = datastore diff --git a/dev-tools/omdb/src/bin/omdb/nexus.rs b/dev-tools/omdb/src/bin/omdb/nexus.rs index 4bee664c71..67fe0854a0 100644 --- a/dev-tools/omdb/src/bin/omdb/nexus.rs +++ b/dev-tools/omdb/src/bin/omdb/nexus.rs @@ -26,6 +26,7 @@ use nexus_client::types::UninitializedSledId; use nexus_db_queries::db::lookup::LookupPath; use nexus_types::deployment::Blueprint; use nexus_types::inventory::BaseboardId; +use omicron_uuid_kinds::CollectionUuid; use reedline::DefaultPrompt; use reedline::DefaultPromptSegment; use reedline::Reedline; @@ -117,7 +118,7 @@ struct BlueprintIdsArgs { #[derive(Debug, Args)] struct CollectionIdArgs { /// id of an inventory collection - collection_id: Uuid, + collection_id: CollectionUuid, } #[derive(Debug, Args)] diff --git a/dev-tools/reconfigurator-cli/src/main.rs b/dev-tools/reconfigurator-cli/src/main.rs index e9a2009df7..ae451f3c56 100644 --- a/dev-tools/reconfigurator-cli/src/main.rs +++ b/dev-tools/reconfigurator-cli/src/main.rs @@ -32,6 +32,7 @@ use nexus_types::inventory::OmicronZonesConfig; use nexus_types::inventory::SledRole; use omicron_common::api::external::Generation; use omicron_common::api::external::Name; +use omicron_uuid_kinds::CollectionUuid; use omicron_uuid_kinds::GenericUuid; use omicron_uuid_kinds::OmicronZoneUuid; use omicron_uuid_kinds::SledUuid; @@ -55,7 +56,7 @@ struct ReconfiguratorSim { system: SystemDescription, /// inventory collections created by the user - collections: IndexMap, + collections: IndexMap, /// blueprints created by the user blueprints: IndexMap, @@ -424,7 +425,7 @@ struct SiloAddRemoveArgs { #[derive(Debug, Args)] struct InventoryArgs { /// id of the inventory collection to use in planning - collection_id: Uuid, + collection_id: CollectionUuid, } #[derive(Debug, Args)] @@ -432,7 +433,7 @@ struct BlueprintPlanArgs { /// id of the blueprint on which this one will be based parent_blueprint_id: Uuid, /// id of the inventory collection to use in planning - collection_id: Uuid, + collection_id: CollectionUuid, } #[derive(Debug, Args)] @@ -483,7 +484,7 @@ enum CliDnsGroup { #[derive(Debug, Args)] struct BlueprintDiffInventoryArgs { /// id of the inventory collection - collection_id: Uuid, + collection_id: CollectionUuid, /// id of the blueprint blueprint_id: Uuid, } @@ -519,7 +520,7 @@ struct LoadArgs { /// id of inventory collection to use for sled details /// (may be omitted only if the file contains only one collection) - collection_id: Option, + collection_id: Option, } #[derive(Debug, Args)] @@ -642,7 +643,7 @@ fn cmd_inventory_list( #[derive(Tabled)] #[tabled(rename_all = "SCREAMING_SNAKE_CASE")] struct InventoryRow { - id: Uuid, + id: CollectionUuid, nerrors: usize, time_done: String, } diff --git a/nexus/db-model/src/inventory.rs b/nexus/db-model/src/inventory.rs index 94306f0f97..3a6c966d90 100644 --- a/nexus/db-model/src/inventory.rs +++ b/nexus/db-model/src/inventory.rs @@ -12,6 +12,7 @@ use crate::schema::{ inv_sled_agent, inv_sled_omicron_zones, inv_zpool, sw_caboose, sw_root_of_trust_page, }; +use crate::typed_uuid::DbTypedUuid; use crate::PhysicalDiskKind; use crate::{ impl_enum_type, ipv6, ByteCount, Generation, MacAddr, Name, ServiceKind, @@ -31,8 +32,8 @@ use nexus_types::inventory::{ BaseboardId, Caboose, Collection, PowerState, RotPage, RotSlot, }; use omicron_common::api::internal::shared::NetworkInterface; -use omicron_uuid_kinds::GenericUuid; -use omicron_uuid_kinds::ZpoolUuid; +use omicron_uuid_kinds::{CollectionKind, GenericUuid}; +use omicron_uuid_kinds::{CollectionUuid, ZpoolUuid}; use uuid::Uuid; // See [`nexus_types::inventory::PowerState`]. @@ -248,16 +249,33 @@ impl From for nexus_types::inventory::SpType { #[derive(Queryable, Insertable, Clone, Debug, Selectable)] #[diesel(table_name = inv_collection)] pub struct InvCollection { - pub id: Uuid, + pub id: DbTypedUuid, pub time_started: DateTime, pub time_done: DateTime, pub collector: String, } +impl InvCollection { + /// Creates a new `InvCollection`. + pub fn new( + id: CollectionUuid, + time_started: DateTime, + time_done: DateTime, + collector: String, + ) -> Self { + InvCollection { id: id.into(), time_started, time_done, collector } + } + + /// Returns the ID. + pub fn id(&self) -> CollectionUuid { + self.id.into() + } +} + impl<'a> From<&'a Collection> for InvCollection { fn from(c: &'a Collection) -> Self { InvCollection { - id: c.id, + id: c.id.into(), time_started: c.time_started, time_done: c.time_done, collector: c.collector.clone(), @@ -371,26 +389,34 @@ impl From for RotPage { #[derive(Queryable, Insertable, Clone, Debug, Selectable)] #[diesel(table_name = inv_collection_error)] pub struct InvCollectionError { - pub inv_collection_id: Uuid, + pub inv_collection_id: DbTypedUuid, pub idx: SqlU16, pub message: String, } impl InvCollectionError { - pub fn new(inv_collection_id: Uuid, idx: u16, message: String) -> Self { + pub fn new( + inv_collection_id: CollectionUuid, + idx: u16, + message: String, + ) -> Self { InvCollectionError { - inv_collection_id, + inv_collection_id: inv_collection_id.into(), idx: SqlU16::from(idx), message, } } + + pub fn inv_collection_id(&self) -> CollectionUuid { + self.inv_collection_id.into() + } } /// See [`nexus_types::inventory::ServiceProcessor`]. #[derive(Queryable, Clone, Debug, Selectable)] #[diesel(table_name = inv_service_processor)] pub struct InvServiceProcessor { - pub inv_collection_id: Uuid, + pub inv_collection_id: DbTypedUuid, pub hw_baseboard_id: Uuid, pub time_collected: DateTime, pub source: String, @@ -598,7 +624,7 @@ impl From for nexus_types::inventory::SledRole { #[derive(Queryable, Clone, Debug, Selectable, Insertable)] #[diesel(table_name = inv_sled_agent)] pub struct InvSledAgent { - pub inv_collection_id: Uuid, + pub inv_collection_id: DbTypedUuid, pub time_collected: DateTime, pub source: String, pub sled_id: Uuid, @@ -613,7 +639,7 @@ pub struct InvSledAgent { impl InvSledAgent { pub fn new_without_baseboard( - collection_id: Uuid, + collection_id: CollectionUuid, sled_agent: &nexus_types::inventory::SledAgent, ) -> Result { // It's irritating to have to check this case at runtime. The challenge @@ -635,7 +661,7 @@ impl InvSledAgent { )) } else { Ok(InvSledAgent { - inv_collection_id: collection_id, + inv_collection_id: collection_id.into(), time_collected: sled_agent.time_collected, source: sled_agent.source.clone(), sled_id: sled_agent.sled_id, @@ -661,7 +687,7 @@ impl InvSledAgent { #[derive(Queryable, Clone, Debug, Selectable, Insertable)] #[diesel(table_name = inv_physical_disk)] pub struct InvPhysicalDisk { - pub inv_collection_id: Uuid, + pub inv_collection_id: DbTypedUuid, pub sled_id: Uuid, pub slot: i64, pub vendor: String, @@ -672,12 +698,12 @@ pub struct InvPhysicalDisk { impl InvPhysicalDisk { pub fn new( - inv_collection_id: Uuid, + inv_collection_id: CollectionUuid, sled_id: Uuid, disk: nexus_types::inventory::PhysicalDisk, ) -> Self { Self { - inv_collection_id, + inv_collection_id: inv_collection_id.into(), sled_id, slot: disk.slot, vendor: disk.identity.vendor, @@ -706,7 +732,7 @@ impl From for nexus_types::inventory::PhysicalDisk { #[derive(Queryable, Clone, Debug, Selectable, Insertable)] #[diesel(table_name = inv_zpool)] pub struct InvZpool { - pub inv_collection_id: Uuid, + pub inv_collection_id: DbTypedUuid, pub time_collected: DateTime, pub id: Uuid, pub sled_id: Uuid, @@ -715,12 +741,12 @@ pub struct InvZpool { impl InvZpool { pub fn new( - inv_collection_id: Uuid, + inv_collection_id: CollectionUuid, sled_id: Uuid, zpool: &nexus_types::inventory::Zpool, ) -> Self { Self { - inv_collection_id, + inv_collection_id: inv_collection_id.into(), time_collected: zpool.time_collected, id: zpool.id.into_untyped_uuid(), sled_id, @@ -743,7 +769,7 @@ impl From for nexus_types::inventory::Zpool { #[derive(Queryable, Clone, Debug, Selectable, Insertable)] #[diesel(table_name = inv_sled_omicron_zones)] pub struct InvSledOmicronZones { - pub inv_collection_id: Uuid, + pub inv_collection_id: DbTypedUuid, pub time_collected: DateTime, pub source: String, pub sled_id: Uuid, @@ -752,11 +778,11 @@ pub struct InvSledOmicronZones { impl InvSledOmicronZones { pub fn new( - inv_collection_id: Uuid, + inv_collection_id: CollectionUuid, zones_found: &nexus_types::inventory::OmicronZonesFound, ) -> InvSledOmicronZones { InvSledOmicronZones { - inv_collection_id, + inv_collection_id: inv_collection_id.into(), time_collected: zones_found.time_collected, source: zones_found.source.clone(), sled_id: zones_found.sled_id, @@ -823,7 +849,7 @@ impl From for ServiceKind { #[derive(Queryable, Clone, Debug, Selectable, Insertable)] #[diesel(table_name = inv_omicron_zone)] pub struct InvOmicronZone { - pub inv_collection_id: Uuid, + pub inv_collection_id: DbTypedUuid, pub sled_id: Uuid, pub id: Uuid, pub underlay_address: ipv6::Ipv6Addr, @@ -848,13 +874,13 @@ pub struct InvOmicronZone { impl InvOmicronZone { pub fn new( - inv_collection_id: Uuid, + inv_collection_id: CollectionUuid, sled_id: Uuid, zone: &nexus_types::inventory::OmicronZoneConfig, ) -> Result { let zone = OmicronZone::new(sled_id, zone)?; Ok(Self { - inv_collection_id, + inv_collection_id: inv_collection_id.into(), sled_id: zone.sled_id, id: zone.id, underlay_address: zone.underlay_address, @@ -911,7 +937,7 @@ impl InvOmicronZone { #[derive(Queryable, Clone, Debug, Selectable, Insertable)] #[diesel(table_name = inv_omicron_zone_nic)] pub struct InvOmicronZoneNic { - inv_collection_id: Uuid, + inv_collection_id: DbTypedUuid, pub id: Uuid, name: Name, ip: IpNetwork, @@ -939,12 +965,12 @@ impl From for OmicronZoneNic { impl InvOmicronZoneNic { pub fn new( - inv_collection_id: Uuid, + inv_collection_id: CollectionUuid, zone: &nexus_types::inventory::OmicronZoneConfig, ) -> Result, anyhow::Error> { let zone_nic = OmicronZoneNic::new(zone)?; Ok(zone_nic.map(|nic| Self { - inv_collection_id, + inv_collection_id: inv_collection_id.into(), id: nic.id, name: nic.name, ip: nic.ip, diff --git a/nexus/db-queries/src/db/datastore/inventory.rs b/nexus/db-queries/src/db/datastore/inventory.rs index 0b815b686c..769da52d2d 100644 --- a/nexus/db-queries/src/db/datastore/inventory.rs +++ b/nexus/db-queries/src/db/datastore/inventory.rs @@ -28,6 +28,7 @@ use diesel::QueryDsl; use diesel::Table; use futures::future::BoxFuture; use futures::FutureExt; +use nexus_db_model::to_db_typed_uuid; use nexus_db_model::CabooseWhichEnum; use nexus_db_model::HwBaseboardId; use nexus_db_model::HwPowerState; @@ -62,6 +63,8 @@ use omicron_common::api::external::InternalContext; use omicron_common::api::external::LookupType; use omicron_common::api::external::ResourceType; use omicron_common::bail_unless; +use omicron_uuid_kinds::CollectionUuid; +use omicron_uuid_kinds::GenericUuid; use std::collections::BTreeMap; use std::collections::BTreeSet; use std::num::NonZeroU32; @@ -92,7 +95,8 @@ impl DataStore { // It's helpful to assemble some values before entering the transaction // so that we can produce the `Error` type that we want here. let row_collection = InvCollection::from(collection); - let collection_id = row_collection.id; + let collection_id = row_collection.id(); + let db_collection_id = to_db_typed_uuid(collection_id); let baseboards = collection .baseboards .iter() @@ -289,7 +293,8 @@ impl DataStore { for (baseboard_id, sp) in &collection.sps { let selection = db::schema::hw_baseboard_id::table .select(( - collection_id.into_sql::(), + db_collection_id + .into_sql::(), baseboard_dsl::id, sp.time_collected .into_sql::(), @@ -372,7 +377,8 @@ impl DataStore { for (baseboard_id, rot) in &collection.rots { let selection = db::schema::hw_baseboard_id::table .select(( - collection_id.into_sql::(), + db_collection_id + .into_sql::(), baseboard_dsl::id, rot.time_collected .into_sql::(), @@ -546,7 +552,8 @@ impl DataStore { .select(( dsl_baseboard_id::id, dsl_sw_caboose::id, - collection_id.into_sql::(), + db_collection_id + .into_sql::(), found_caboose .time_collected .into_sql::(), @@ -623,7 +630,8 @@ impl DataStore { .select(( dsl_baseboard_id::id, dsl_sw_rot_page::id, - collection_id.into_sql::(), + db_collection_id + .into_sql::(), found_rot_page .time_collected .into_sql::(), @@ -718,7 +726,8 @@ impl DataStore { ); let selection = db::schema::hw_baseboard_id::table .select(( - collection_id.into_sql::(), + db_collection_id + .into_sql::(), sled_agent .time_collected .into_sql::(), @@ -954,7 +963,7 @@ impl DataStore { &self, opctx: &OpContext, nkeep: u32, - ) -> Result, Error> { + ) -> Result, Error> { let conn = self.pool_connection_authorized(opctx).await?; // Diesel requires us to use aliases in order to refer to the // `inv_collection` table twice in the same query. @@ -1067,7 +1076,7 @@ impl DataStore { "candidates" => ?candidates, ); } - Ok(candidate) + Ok(candidate.map(CollectionUuid::from_untyped_uuid)) } /// Removes an inventory collection from the database @@ -1076,7 +1085,7 @@ impl DataStore { async fn inventory_delete_collection( &self, opctx: &OpContext, - collection_id: Uuid, + collection_id: CollectionUuid, ) -> Result<(), Error> { // As with inserting a whole collection, we remove it in one big // transaction for simplicity. Similar considerations apply. We could @@ -1085,6 +1094,7 @@ impl DataStore { // start removing it and we'd also need to make sure we didn't leak a // collection if we crash while deleting it. let conn = self.pool_connection_authorized(opctx).await?; + let db_collection_id = to_db_typed_uuid(collection_id); let ( ncollections, nsps, @@ -1104,129 +1114,130 @@ impl DataStore { let ncollections = { use db::schema::inv_collection::dsl; diesel::delete( - dsl::inv_collection.filter(dsl::id.eq(collection_id)), + dsl::inv_collection + .filter(dsl::id.eq(db_collection_id)), ) .execute_async(&conn) .await? }; // Remove rows for service processors. - let nsps = { - use db::schema::inv_service_processor::dsl; - diesel::delete( - dsl::inv_service_processor - .filter(dsl::inv_collection_id.eq(collection_id)), - ) - .execute_async(&conn) - .await? - }; + let nsps = + { + use db::schema::inv_service_processor::dsl; + diesel::delete(dsl::inv_service_processor.filter( + dsl::inv_collection_id.eq(db_collection_id), + )) + .execute_async(&conn) + .await? + }; // Remove rows for roots of trust. - let nrots = { - use db::schema::inv_root_of_trust::dsl; - diesel::delete( - dsl::inv_root_of_trust - .filter(dsl::inv_collection_id.eq(collection_id)), - ) - .execute_async(&conn) - .await? - }; + let nrots = + { + use db::schema::inv_root_of_trust::dsl; + diesel::delete(dsl::inv_root_of_trust.filter( + dsl::inv_collection_id.eq(db_collection_id), + )) + .execute_async(&conn) + .await? + }; // Remove rows for cabooses found. - let ncabooses = { - use db::schema::inv_caboose::dsl; - diesel::delete( - dsl::inv_caboose - .filter(dsl::inv_collection_id.eq(collection_id)), - ) - .execute_async(&conn) - .await? - }; + let ncabooses = + { + use db::schema::inv_caboose::dsl; + diesel::delete(dsl::inv_caboose.filter( + dsl::inv_collection_id.eq(db_collection_id), + )) + .execute_async(&conn) + .await? + }; // Remove rows for root of trust pages found. - let nrot_pages = { - use db::schema::inv_root_of_trust_page::dsl; - diesel::delete( - dsl::inv_root_of_trust_page - .filter(dsl::inv_collection_id.eq(collection_id)), - ) - .execute_async(&conn) - .await? - }; + let nrot_pages = + { + use db::schema::inv_root_of_trust_page::dsl; + diesel::delete(dsl::inv_root_of_trust_page.filter( + dsl::inv_collection_id.eq(db_collection_id), + )) + .execute_async(&conn) + .await? + }; // Remove rows for sled agents found. - let nsled_agents = { - use db::schema::inv_sled_agent::dsl; - diesel::delete( - dsl::inv_sled_agent - .filter(dsl::inv_collection_id.eq(collection_id)), - ) - .execute_async(&conn) - .await? - }; + let nsled_agents = + { + use db::schema::inv_sled_agent::dsl; + diesel::delete(dsl::inv_sled_agent.filter( + dsl::inv_collection_id.eq(db_collection_id), + )) + .execute_async(&conn) + .await? + }; // Remove rows for physical disks found. - let nphysical_disks = { - use db::schema::inv_physical_disk::dsl; - diesel::delete( - dsl::inv_physical_disk - .filter(dsl::inv_collection_id.eq(collection_id)), - ) - .execute_async(&conn) - .await? - }; + let nphysical_disks = + { + use db::schema::inv_physical_disk::dsl; + diesel::delete(dsl::inv_physical_disk.filter( + dsl::inv_collection_id.eq(db_collection_id), + )) + .execute_async(&conn) + .await? + }; // Remove rows associated with Omicron zones - let nsled_agent_zones = { - use db::schema::inv_sled_omicron_zones::dsl; - diesel::delete( - dsl::inv_sled_omicron_zones - .filter(dsl::inv_collection_id.eq(collection_id)), - ) - .execute_async(&conn) - .await? - }; + let nsled_agent_zones = + { + use db::schema::inv_sled_omicron_zones::dsl; + diesel::delete(dsl::inv_sled_omicron_zones.filter( + dsl::inv_collection_id.eq(db_collection_id), + )) + .execute_async(&conn) + .await? + }; - let nzones = { - use db::schema::inv_omicron_zone::dsl; - diesel::delete( - dsl::inv_omicron_zone - .filter(dsl::inv_collection_id.eq(collection_id)), - ) - .execute_async(&conn) - .await? - }; + let nzones = + { + use db::schema::inv_omicron_zone::dsl; + diesel::delete(dsl::inv_omicron_zone.filter( + dsl::inv_collection_id.eq(db_collection_id), + )) + .execute_async(&conn) + .await? + }; - let nnics = { - use db::schema::inv_omicron_zone_nic::dsl; - diesel::delete( - dsl::inv_omicron_zone_nic - .filter(dsl::inv_collection_id.eq(collection_id)), - ) - .execute_async(&conn) - .await? - }; + let nnics = + { + use db::schema::inv_omicron_zone_nic::dsl; + diesel::delete(dsl::inv_omicron_zone_nic.filter( + dsl::inv_collection_id.eq(db_collection_id), + )) + .execute_async(&conn) + .await? + }; - let nzpools = { - use db::schema::inv_zpool::dsl; - diesel::delete( - dsl::inv_zpool - .filter(dsl::inv_collection_id.eq(collection_id)), - ) - .execute_async(&conn) - .await? - }; + let nzpools = + { + use db::schema::inv_zpool::dsl; + diesel::delete(dsl::inv_zpool.filter( + dsl::inv_collection_id.eq(db_collection_id), + )) + .execute_async(&conn) + .await? + }; // Remove rows for errors encountered. - let nerrors = { - use db::schema::inv_collection_error::dsl; - diesel::delete( - dsl::inv_collection_error - .filter(dsl::inv_collection_id.eq(collection_id)), - ) - .execute_async(&conn) - .await? - }; + let nerrors = + { + use db::schema::inv_collection_error::dsl; + diesel::delete(dsl::inv_collection_error.filter( + dsl::inv_collection_id.eq(db_collection_id), + )) + .execute_async(&conn) + .await? + }; Ok(( ncollections, @@ -1316,14 +1327,20 @@ impl DataStore { return Ok(None); }; - Ok(Some(self.inventory_collection_read(opctx, collection_id).await?)) + Ok(Some( + self.inventory_collection_read( + opctx, + CollectionUuid::from_untyped_uuid(collection_id), + ) + .await?, + )) } /// Attempt to read the current collection pub async fn inventory_collection_read( &self, opctx: &OpContext, - id: Uuid, + id: CollectionUuid, ) -> Result { self.inventory_collection_read_batched(opctx, id, SQL_BATCH_SIZE).await } @@ -1343,15 +1360,16 @@ impl DataStore { async fn inventory_collection_read_batched( &self, opctx: &OpContext, - id: Uuid, + id: CollectionUuid, batch_size: NonZeroU32, ) -> Result { let conn = self.pool_connection_authorized(opctx).await?; + let db_id = to_db_typed_uuid(id); let (time_started, time_done, collector) = { use db::schema::inv_collection::dsl; let collections = dsl::inv_collection - .filter(dsl::id.eq(id)) + .filter(dsl::id.eq(db_id)) .limit(2) .select(InvCollection::as_select()) .load_async(&*conn) @@ -1378,7 +1396,7 @@ impl DataStore { dsl::idx, &p.current_pagparams(), ) - .filter(dsl::inv_collection_id.eq(id)) + .filter(dsl::inv_collection_id.eq(db_id)) .order_by(dsl::idx) .select(InvCollectionError::as_select()) .load_async(&*conn) @@ -1405,7 +1423,7 @@ impl DataStore { dsl::hw_baseboard_id, &p.current_pagparams(), ) - .filter(dsl::inv_collection_id.eq(id)) + .filter(dsl::inv_collection_id.eq(db_id)) .select(InvServiceProcessor::as_select()) .load_async(&*conn) .await @@ -1436,7 +1454,7 @@ impl DataStore { dsl::hw_baseboard_id, &p.current_pagparams(), ) - .filter(dsl::inv_collection_id.eq(id)) + .filter(dsl::inv_collection_id.eq(db_id)) .select(InvRootOfTrust::as_select()) .load_async(&*conn) .await @@ -1467,7 +1485,7 @@ impl DataStore { dsl::sled_id, &p.current_pagparams(), ) - .filter(dsl::inv_collection_id.eq(id)) + .filter(dsl::inv_collection_id.eq(db_id)) .select(InvSledAgent::as_select()) .load_async(&*conn) .await @@ -1499,7 +1517,7 @@ impl DataStore { (dsl::sled_id, dsl::slot), &p.current_pagparams(), ) - .filter(dsl::inv_collection_id.eq(id)) + .filter(dsl::inv_collection_id.eq(db_id)) .select(InvPhysicalDisk::as_select()) .load_async(&*conn) .await @@ -1528,7 +1546,7 @@ impl DataStore { (dsl::sled_id, dsl::id), &p.current_pagparams(), ) - .filter(dsl::inv_collection_id.eq(id)) + .filter(dsl::inv_collection_id.eq(db_id)) .select(InvZpool::as_select()) .load_async(&*conn) .await @@ -1675,7 +1693,7 @@ impl DataStore { (dsl::hw_baseboard_id, dsl::which), &p.current_pagparams(), ) - .filter(dsl::inv_collection_id.eq(id)) + .filter(dsl::inv_collection_id.eq(db_id)) .select(InvCaboose::as_select()) .load_async(&*conn) .await @@ -1777,7 +1795,7 @@ impl DataStore { (dsl::hw_baseboard_id, dsl::which), &p.current_pagparams(), ) - .filter(dsl::inv_collection_id.eq(id)) + .filter(dsl::inv_collection_id.eq(db_id)) .select(InvRotPage::as_select()) .load_async(&*conn) .await @@ -1891,7 +1909,7 @@ impl DataStore { dsl::sled_id, &p.current_pagparams(), ) - .filter(dsl::inv_collection_id.eq(id)) + .filter(dsl::inv_collection_id.eq(db_id)) .select(InvSledOmicronZones::as_select()) .load_async(&*conn) .await @@ -1927,7 +1945,7 @@ impl DataStore { dsl::id, &p.current_pagparams(), ) - .filter(dsl::inv_collection_id.eq(id)) + .filter(dsl::inv_collection_id.eq(db_id)) .select(InvOmicronZoneNic::as_select()) .load_async(&*conn) .await @@ -1956,7 +1974,7 @@ impl DataStore { dsl::id, &p.current_pagparams(), ) - .filter(dsl::inv_collection_id.eq(id)) + .filter(dsl::inv_collection_id.eq(db_id)) // It's not strictly necessary to order these by id. Doing so // ensures a consistent representation for `Collection`, which // makes testing easier. It's already indexed to do this, too. @@ -2043,11 +2061,15 @@ pub trait DataStoreInventoryTest: Send + Sync { /// List all collections /// /// This does not paginate. - fn inventory_collections(&self) -> BoxFuture>>; + fn inventory_collections( + &self, + ) -> BoxFuture>>; } impl DataStoreInventoryTest for DataStore { - fn inventory_collections(&self) -> BoxFuture>> { + fn inventory_collections( + &self, + ) -> BoxFuture>> { async { let conn = self .pool_connection_for_tests() @@ -2059,12 +2081,17 @@ impl DataStoreInventoryTest for DataStore { .context("failed to allow table scan")?; use db::schema::inv_collection::dsl; - dsl::inv_collection + let uuids = dsl::inv_collection .select(dsl::id) .order_by(dsl::time_started) - .load_async(&conn) + .load_async::(&conn) .await - .context("failed to list collections") + .context("failed to list collections")?; + + Ok(uuids + .into_iter() + .map(CollectionUuid::from_untyped_uuid) + .collect()) }) .await } diff --git a/nexus/db-queries/src/db/datastore/mod.rs b/nexus/db-queries/src/db/datastore/mod.rs index c753ac5436..49a156f81d 100644 --- a/nexus/db-queries/src/db/datastore/mod.rs +++ b/nexus/db-queries/src/db/datastore/mod.rs @@ -402,6 +402,7 @@ mod test { ByteCount, Error, IdentityMetadataCreateParams, LookupType, Name, }; use omicron_test_utils::dev; + use omicron_uuid_kinds::CollectionUuid; use std::collections::HashMap; use std::collections::HashSet; use std::net::{IpAddr, Ipv4Addr, Ipv6Addr, SocketAddrV6}; @@ -703,10 +704,10 @@ mod test { ) { use db::schema::inv_zpool::dsl; - let inv_collection_id = Uuid::new_v4(); + let inv_collection_id = CollectionUuid::new_v4(); let time_collected = Utc::now(); let inv_pool = nexus_db_model::InvZpool { - inv_collection_id, + inv_collection_id: inv_collection_id.into(), time_collected, id: zpool_id, sled_id, diff --git a/nexus/inventory/Cargo.toml b/nexus/inventory/Cargo.toml index 43041ab146..52c7b6975b 100644 --- a/nexus/inventory/Cargo.toml +++ b/nexus/inventory/Cargo.toml @@ -13,6 +13,7 @@ gateway-client.workspace = true gateway-messages.workspace = true nexus-types.workspace = true omicron-common.workspace = true +omicron-uuid-kinds.workspace = true reqwest.workspace = true serde_json.workspace = true sled-agent-client.workspace = true diff --git a/nexus/inventory/src/builder.rs b/nexus/inventory/src/builder.rs index 0506e8286a..3ca2f5a2f1 100644 --- a/nexus/inventory/src/builder.rs +++ b/nexus/inventory/src/builder.rs @@ -27,12 +27,13 @@ use nexus_types::inventory::RotState; use nexus_types::inventory::ServiceProcessor; use nexus_types::inventory::SledAgent; use nexus_types::inventory::Zpool; +use omicron_uuid_kinds::CollectionKind; use std::collections::BTreeMap; use std::collections::BTreeSet; use std::hash::Hash; use std::sync::Arc; use thiserror::Error; -use typed_rng::UuidRng; +use typed_rng::TypedUuidRng; use uuid::Uuid; /// Describes an operational error encountered during the collection process @@ -89,7 +90,7 @@ pub struct CollectionBuilder { sleds: BTreeMap, omicron_zones: BTreeMap, // We just generate one UUID for each collection. - id_rng: UuidRng, + id_rng: TypedUuidRng, } impl CollectionBuilder { @@ -115,7 +116,7 @@ impl CollectionBuilder { rot_pages_found: BTreeMap::new(), sleds: BTreeMap::new(), omicron_zones: BTreeMap::new(), - id_rng: UuidRng::from_entropy(), + id_rng: TypedUuidRng::from_entropy(), } } diff --git a/nexus/src/app/deployment.rs b/nexus/src/app/deployment.rs index 1a384a1fd9..0d8a6834ba 100644 --- a/nexus/src/app/deployment.rs +++ b/nexus/src/app/deployment.rs @@ -26,6 +26,7 @@ use omicron_common::api::external::InternalContext; use omicron_common::api::external::ListResultVec; use omicron_common::api::external::LookupResult; use omicron_common::api::external::LookupType; +use omicron_uuid_kinds::CollectionUuid; use slog_error_chain::InlineErrorChain; use uuid::Uuid; @@ -207,7 +208,7 @@ impl super::Nexus { pub async fn blueprint_generate_from_collection( &self, opctx: &OpContext, - collection_id: Uuid, + collection_id: CollectionUuid, ) -> CreateResult { let collection = self .datastore() diff --git a/nexus/src/internal_api/http_entrypoints.rs b/nexus/src/internal_api/http_entrypoints.rs index 0ce0a204f5..401220431a 100644 --- a/nexus/src/internal_api/http_entrypoints.rs +++ b/nexus/src/internal_api/http_entrypoints.rs @@ -50,6 +50,7 @@ use omicron_common::api::internal::nexus::RepairProgress; use omicron_common::api::internal::nexus::RepairStartInfo; use omicron_common::api::internal::nexus::SledInstanceState; use omicron_common::update::ArtifactId; +use omicron_uuid_kinds::CollectionUuid; use omicron_uuid_kinds::DownstairsKind; use omicron_uuid_kinds::TypedUuid; use omicron_uuid_kinds::UpstairsKind; @@ -961,7 +962,7 @@ async fn blueprint_target_set_enabled( #[derive(Debug, Deserialize, JsonSchema)] struct CollectionId { - collection_id: Uuid, + collection_id: CollectionUuid, } /// Generates a new blueprint matching the specified inventory collection diff --git a/nexus/types/src/deployment.rs b/nexus/types/src/deployment.rs index 80765fe49e..bae605c2e3 100644 --- a/nexus/types/src/deployment.rs +++ b/nexus/types/src/deployment.rs @@ -22,6 +22,7 @@ pub use crate::inventory::SourceNatConfig; pub use crate::inventory::ZpoolName; use newtype_uuid::GenericUuid; use omicron_common::api::external::Generation; +use omicron_uuid_kinds::CollectionUuid; use omicron_uuid_kinds::SledUuid; use schemars::JsonSchema; use serde::Deserialize; @@ -801,7 +802,7 @@ impl fmt::Display for BlueprintDiffSingleError { #[derive(Clone, Debug)] pub enum DiffBeforeMetadata { /// The diff was made from a collection. - Collection { id: Uuid }, + Collection { id: CollectionUuid }, /// The diff was made from a blueprint. Blueprint(Box), } diff --git a/nexus/types/src/inventory.rs b/nexus/types/src/inventory.rs index d6b0383375..9511a20b75 100644 --- a/nexus/types/src/inventory.rs +++ b/nexus/types/src/inventory.rs @@ -21,6 +21,7 @@ use omicron_common::api::external::ByteCount; pub use omicron_common::api::internal::shared::NetworkInterface; pub use omicron_common::api::internal::shared::NetworkInterfaceKind; pub use omicron_common::api::internal::shared::SourceNatConfig; +use omicron_uuid_kinds::CollectionUuid; use omicron_uuid_kinds::ZpoolUuid; use serde::{Deserialize, Serialize}; use serde_with::serde_as; @@ -56,7 +57,7 @@ use uuid::Uuid; #[derive(Debug, Eq, PartialEq, Clone, Serialize, Deserialize)] pub struct Collection { /// unique identifier for this collection - pub id: Uuid, + pub id: CollectionUuid, /// errors encountered during collection pub errors: Vec, /// time the collection started diff --git a/openapi/nexus-internal.json b/openapi/nexus-internal.json index dd8238fbf6..7681c0d601 100644 --- a/openapi/nexus-internal.json +++ b/openapi/nexus-internal.json @@ -2847,8 +2847,7 @@ "type": "object", "properties": { "collection_id": { - "type": "string", - "format": "uuid" + "$ref": "#/components/schemas/TypedUuidForCollectionKind" } }, "required": [ @@ -7453,6 +7452,10 @@ "SwitchPutResponse": { "type": "object" }, + "TypedUuidForCollectionKind": { + "type": "string", + "format": "uuid" + }, "TypedUuidForDownstairsRegionKind": { "type": "string", "format": "uuid" diff --git a/uuid-kinds/src/lib.rs b/uuid-kinds/src/lib.rs index 29842cd53f..41073f8638 100644 --- a/uuid-kinds/src/lib.rs +++ b/uuid-kinds/src/lib.rs @@ -49,6 +49,7 @@ macro_rules! impl_typed_uuid_kind { // Please keep this list in alphabetical order. impl_typed_uuid_kind! { + Collection => "collection", Downstairs => "downstairs", DownstairsRegion => "downstairs_region", LoopbackAddress => "loopback_address", From 7cd78ffd2094adf65ffda9b5e249b9ea716cf367 Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Fri, 12 Apr 2024 16:08:05 -0400 Subject: [PATCH 133/334] SP v1.0.13 (#5510) --- tools/hubris_checksums | 16 ++++++++-------- tools/hubris_version | 2 +- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/tools/hubris_checksums b/tools/hubris_checksums index fcb9151be5..913cc460c4 100644 --- a/tools/hubris_checksums +++ b/tools/hubris_checksums @@ -1,8 +1,8 @@ -213f5e6fea9fea6356afc05e7769872c8d20b2c4c4ccd5841e161b785a3a858e build-gimlet-c-image-default-v1.0.12.zip -56185bc02a5b104106469da925edf7d9c80cfef6a75e1194231ba383fe68d765 build-gimlet-d-image-default-v1.0.12.zip -36f8d2670422be87b55cab701655ce868ce0c6a0dae9293adcbb17f47bd021c6 build-gimlet-e-image-default-v1.0.12.zip -98c9651da3317050c0c016c0936865c8aa409745db60057f39e49b702a8de432 build-gimlet-f-image-default-v1.0.12.zip -16898114126d46ae5723823abb6b31da465f72d884e995b3b3360269fff1dde0 build-psc-b-image-default-v1.0.12.zip -f4c0d46f38f0747b781efa1a306d223558cde89f60ed10edc582550b6f2c1ef1 build-psc-c-image-default-v1.0.12.zip -dede524c6bd38d27b69011b28950a5e335591265ef75306fb87ce5b402e5ae6b build-sidecar-b-image-default-v1.0.12.zip -b114eb25ff7f48f1eceaafbbdaeba071d2f99b19335e793ab03f4f01f6d22c97 build-sidecar-c-image-default-v1.0.12.zip +4d38415a186fb1058c991d0e5ed44711457526e32687ff48ab6d6feadd8b4aa4 build-gimlet-c-image-default-v1.0.13.zip +ead1988cfebb4f79c364a2207f0bda741b8dd0e4f02fb34b4d341c648ecaa733 build-gimlet-d-image-default-v1.0.13.zip +85f5fc9c206c5fc61b4c2380b94a337220e944d67c0cb6bb2cb2486f8d5bc193 build-gimlet-e-image-default-v1.0.13.zip +ac7d898369e94e33b3556a405352b24a1ee107ce877d416811d9e9fae1f1a1ec build-gimlet-f-image-default-v1.0.13.zip +8cf812dc4aacc013335eb932d2bfaf8a542dec7bc29ea671d9a4235c12d61564 build-psc-b-image-default-v1.0.13.zip +85622677eef52c6d210f44e82b2b6cdc5a8357e509744abe1693883b7635b38c build-psc-c-image-default-v1.0.13.zip +87d6cd4add1aabe53756ba8f66a461cd3aa08f1a0093f94ea81a35a6a175ed21 build-sidecar-b-image-default-v1.0.13.zip +d50d6f77da6fc736843b5418359532f18b7ffa090c2a3d68b5dc1d35281385f5 build-sidecar-c-image-default-v1.0.13.zip diff --git a/tools/hubris_version b/tools/hubris_version index e6be0843de..717d36cec2 100644 --- a/tools/hubris_version +++ b/tools/hubris_version @@ -1 +1 @@ -TAGS=(gimlet-v1.0.12 psc-v1.0.12 sidecar-v1.0.12) +TAGS=(gimlet-v1.0.13 psc-v1.0.13 sidecar-v1.0.13) From 00971809e1326a35f41206413ae87bd0db0d338c Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Fri, 12 Apr 2024 16:46:47 -0400 Subject: [PATCH 134/334] Use typed `SledUuid` in inventory collections (#5522) Related to #5517 (and might have some small conflicts, but happy to settle those once it lands). --- dev-tools/omdb/src/bin/omdb/nexus.rs | 8 +- dev-tools/reconfigurator-cli/src/main.rs | 10 +- nexus/db-model/src/deployment.rs | 17 ++- nexus/db-model/src/inventory.rs | 38 ++--- nexus/db-model/src/omicron_zone_config.rs | 5 +- .../db-queries/src/db/datastore/deployment.rs | 25 ++-- .../db-queries/src/db/datastore/inventory.rs | 132 +++++++++--------- nexus/db-queries/src/db/datastore/mod.rs | 39 +++--- nexus/db-queries/src/db/datastore/sled.rs | 24 ++-- .../db-queries/src/db/datastore/test_utils.rs | 22 +-- nexus/db-queries/src/db/datastore/vpc.rs | 14 +- nexus/inventory/src/builder.rs | 27 ++-- nexus/inventory/src/collector.rs | 4 +- nexus/inventory/src/examples.rs | 21 ++- .../reconfigurator/execution/src/datasets.rs | 7 +- nexus/reconfigurator/execution/src/dns.rs | 17 +-- nexus/reconfigurator/execution/src/lib.rs | 15 +- .../execution/src/omicron_zones.rs | 24 ++-- .../execution/src/overridables.rs | 26 ++-- .../planning/src/blueprint_builder.rs | 9 +- nexus/reconfigurator/planning/src/example.rs | 8 +- nexus/reconfigurator/planning/src/planner.rs | 19 +-- .../app/background/sync_service_zone_nat.rs | 3 +- nexus/types/src/deployment.rs | 2 +- nexus/types/src/inventory.rs | 12 +- 25 files changed, 285 insertions(+), 243 deletions(-) diff --git a/dev-tools/omdb/src/bin/omdb/nexus.rs b/dev-tools/omdb/src/bin/omdb/nexus.rs index 67fe0854a0..31a450f935 100644 --- a/dev-tools/omdb/src/bin/omdb/nexus.rs +++ b/dev-tools/omdb/src/bin/omdb/nexus.rs @@ -27,6 +27,8 @@ use nexus_db_queries::db::lookup::LookupPath; use nexus_types::deployment::Blueprint; use nexus_types::inventory::BaseboardId; use omicron_uuid_kinds::CollectionUuid; +use omicron_uuid_kinds::GenericUuid; +use omicron_uuid_kinds::SledUuid; use reedline::DefaultPrompt; use reedline::DefaultPromptSegment; use reedline::Reedline; @@ -200,7 +202,7 @@ struct SledExpungeArgs { db_url_opts: DbUrlOptions, /// sled ID - sled_id: Uuid, + sled_id: SledUuid, } impl NexusArgs { @@ -1208,7 +1210,7 @@ async fn cmd_nexus_sled_expunge( // First, we need to look up the sled so we know its serial number. let (_authz_sled, sled) = LookupPath::new(opctx, &datastore) - .sled_id(args.sled_id) + .sled_id(args.sled_id.into_untyped_uuid()) .fetch() .await .with_context(|| format!("failed to find sled {}", args.sled_id))?; @@ -1286,7 +1288,7 @@ async fn cmd_nexus_sled_expunge( } let old_policy = client - .sled_expunge(&SledSelector { sled: args.sled_id }) + .sled_expunge(&SledSelector { sled: args.sled_id.into_untyped_uuid() }) .await .context("expunging sled")? .into_inner(); diff --git a/dev-tools/reconfigurator-cli/src/main.rs b/dev-tools/reconfigurator-cli/src/main.rs index ae451f3c56..0bd2d21b74 100644 --- a/dev-tools/reconfigurator-cli/src/main.rs +++ b/dev-tools/reconfigurator-cli/src/main.rs @@ -679,7 +679,7 @@ fn cmd_inventory_generate( builder .found_sled_omicron_zones( "fake sled agent", - *sled_id.as_untyped_uuid(), + sled_id, OmicronZonesConfig { generation: Generation::new(), zones: vec![], @@ -878,8 +878,10 @@ fn cmd_blueprint_diff( fn make_sleds_by_id( sim: &ReconfiguratorSim, -) -> Result, anyhow::Error> -{ +) -> Result< + BTreeMap, + anyhow::Error, +> { let collection = sim .system .to_collection_builder() @@ -1153,7 +1155,7 @@ fn cmd_load( } let Some(inventory_sled_agent) = - primary_collection.sled_agents.get(sled_id.as_untyped_uuid()) + primary_collection.sled_agents.get(&sled_id) else { swriteln!( s, diff --git a/nexus/db-model/src/deployment.rs b/nexus/db-model/src/deployment.rs index d425f0ac34..90ab517244 100644 --- a/nexus/db-model/src/deployment.rs +++ b/nexus/db-model/src/deployment.rs @@ -11,6 +11,7 @@ use crate::schema::{ blueprint, bp_omicron_zone, bp_omicron_zone_nic, bp_sled_omicron_zones, bp_target, }; +use crate::typed_uuid::DbTypedUuid; use crate::{ impl_enum_type, ipv6, Generation, MacAddr, Name, SqlU16, SqlU32, SqlU8, }; @@ -21,6 +22,8 @@ use nexus_types::deployment::BlueprintZoneConfig; use nexus_types::deployment::BlueprintZoneDisposition; use nexus_types::deployment::BlueprintZonesConfig; use omicron_common::api::internal::shared::NetworkInterface; +use omicron_uuid_kinds::SledKind; +use omicron_uuid_kinds::SledUuid; use uuid::Uuid; /// See [`nexus_types::deployment::Blueprint`]. @@ -100,19 +103,19 @@ impl From for nexus_types::deployment::BlueprintTarget { #[diesel(table_name = bp_sled_omicron_zones)] pub struct BpSledOmicronZones { pub blueprint_id: Uuid, - pub sled_id: Uuid, + pub sled_id: DbTypedUuid, pub generation: Generation, } impl BpSledOmicronZones { pub fn new( blueprint_id: Uuid, - sled_id: Uuid, + sled_id: SledUuid, zones_config: &BlueprintZonesConfig, ) -> Self { Self { blueprint_id, - sled_id, + sled_id: sled_id.into(), generation: Generation(zones_config.generation), } } @@ -123,7 +126,7 @@ impl BpSledOmicronZones { #[diesel(table_name = bp_omicron_zone)] pub struct BpOmicronZone { pub blueprint_id: Uuid, - pub sled_id: Uuid, + pub sled_id: DbTypedUuid, pub id: Uuid, pub underlay_address: ipv6::Ipv6Addr, pub zone_type: ZoneType, @@ -150,13 +153,13 @@ pub struct BpOmicronZone { impl BpOmicronZone { pub fn new( blueprint_id: Uuid, - sled_id: Uuid, + sled_id: SledUuid, blueprint_zone: &BlueprintZoneConfig, ) -> Result { let zone = OmicronZone::new(sled_id, &blueprint_zone.config)?; Ok(Self { blueprint_id, - sled_id: zone.sled_id, + sled_id: zone.sled_id.into(), id: zone.id, underlay_address: zone.underlay_address, zone_type: zone.zone_type, @@ -185,7 +188,7 @@ impl BpOmicronZone { nic_row: Option, ) -> Result { let zone = OmicronZone { - sled_id: self.sled_id, + sled_id: self.sled_id.into(), id: self.id, underlay_address: self.underlay_address, zone_type: self.zone_type, diff --git a/nexus/db-model/src/inventory.rs b/nexus/db-model/src/inventory.rs index 3a6c966d90..1a993df49f 100644 --- a/nexus/db-model/src/inventory.rs +++ b/nexus/db-model/src/inventory.rs @@ -32,8 +32,12 @@ use nexus_types::inventory::{ BaseboardId, Caboose, Collection, PowerState, RotPage, RotSlot, }; use omicron_common::api::internal::shared::NetworkInterface; -use omicron_uuid_kinds::{CollectionKind, GenericUuid}; -use omicron_uuid_kinds::{CollectionUuid, ZpoolUuid}; +use omicron_uuid_kinds::CollectionKind; +use omicron_uuid_kinds::CollectionUuid; +use omicron_uuid_kinds::GenericUuid; +use omicron_uuid_kinds::SledKind; +use omicron_uuid_kinds::SledUuid; +use omicron_uuid_kinds::ZpoolUuid; use uuid::Uuid; // See [`nexus_types::inventory::PowerState`]. @@ -627,7 +631,7 @@ pub struct InvSledAgent { pub inv_collection_id: DbTypedUuid, pub time_collected: DateTime, pub source: String, - pub sled_id: Uuid, + pub sled_id: DbTypedUuid, pub hw_baseboard_id: Option, pub sled_agent_ip: ipv6::Ipv6Addr, pub sled_agent_port: SqlU16, @@ -664,7 +668,7 @@ impl InvSledAgent { inv_collection_id: collection_id.into(), time_collected: sled_agent.time_collected, source: sled_agent.source.clone(), - sled_id: sled_agent.sled_id, + sled_id: sled_agent.sled_id.into(), hw_baseboard_id: None, sled_agent_ip: ipv6::Ipv6Addr::from( *sled_agent.sled_agent_address.ip(), @@ -688,7 +692,7 @@ impl InvSledAgent { #[diesel(table_name = inv_physical_disk)] pub struct InvPhysicalDisk { pub inv_collection_id: DbTypedUuid, - pub sled_id: Uuid, + pub sled_id: DbTypedUuid, pub slot: i64, pub vendor: String, pub model: String, @@ -699,12 +703,12 @@ pub struct InvPhysicalDisk { impl InvPhysicalDisk { pub fn new( inv_collection_id: CollectionUuid, - sled_id: Uuid, + sled_id: SledUuid, disk: nexus_types::inventory::PhysicalDisk, ) -> Self { Self { inv_collection_id: inv_collection_id.into(), - sled_id, + sled_id: sled_id.into(), slot: disk.slot, vendor: disk.identity.vendor, model: disk.identity.model, @@ -735,21 +739,21 @@ pub struct InvZpool { pub inv_collection_id: DbTypedUuid, pub time_collected: DateTime, pub id: Uuid, - pub sled_id: Uuid, + pub sled_id: DbTypedUuid, pub total_size: ByteCount, } impl InvZpool { pub fn new( inv_collection_id: CollectionUuid, - sled_id: Uuid, + sled_id: SledUuid, zpool: &nexus_types::inventory::Zpool, ) -> Self { Self { inv_collection_id: inv_collection_id.into(), time_collected: zpool.time_collected, id: zpool.id.into_untyped_uuid(), - sled_id, + sled_id: sled_id.into(), total_size: zpool.total_size.into(), } } @@ -772,7 +776,7 @@ pub struct InvSledOmicronZones { pub inv_collection_id: DbTypedUuid, pub time_collected: DateTime, pub source: String, - pub sled_id: Uuid, + pub sled_id: DbTypedUuid, pub generation: Generation, } @@ -785,7 +789,7 @@ impl InvSledOmicronZones { inv_collection_id: inv_collection_id.into(), time_collected: zones_found.time_collected, source: zones_found.source.clone(), - sled_id: zones_found.sled_id, + sled_id: zones_found.sled_id.into(), generation: Generation(zones_found.zones.generation), } } @@ -796,7 +800,7 @@ impl InvSledOmicronZones { nexus_types::inventory::OmicronZonesFound { time_collected: self.time_collected, source: self.source, - sled_id: self.sled_id, + sled_id: self.sled_id.into(), zones: nexus_types::inventory::OmicronZonesConfig { generation: *self.generation, zones: Vec::new(), @@ -850,7 +854,7 @@ impl From for ServiceKind { #[diesel(table_name = inv_omicron_zone)] pub struct InvOmicronZone { pub inv_collection_id: DbTypedUuid, - pub sled_id: Uuid, + pub sled_id: DbTypedUuid, pub id: Uuid, pub underlay_address: ipv6::Ipv6Addr, pub zone_type: ZoneType, @@ -875,13 +879,13 @@ pub struct InvOmicronZone { impl InvOmicronZone { pub fn new( inv_collection_id: CollectionUuid, - sled_id: Uuid, + sled_id: SledUuid, zone: &nexus_types::inventory::OmicronZoneConfig, ) -> Result { let zone = OmicronZone::new(sled_id, zone)?; Ok(Self { inv_collection_id: inv_collection_id.into(), - sled_id: zone.sled_id, + sled_id: zone.sled_id.into(), id: zone.id, underlay_address: zone.underlay_address, zone_type: zone.zone_type, @@ -909,7 +913,7 @@ impl InvOmicronZone { nic_row: Option, ) -> Result { let zone = OmicronZone { - sled_id: self.sled_id, + sled_id: self.sled_id.into(), id: self.id, underlay_address: self.underlay_address, zone_type: self.zone_type, diff --git a/nexus/db-model/src/omicron_zone_config.rs b/nexus/db-model/src/omicron_zone_config.rs index ce3127a9b3..b0fd3356fe 100644 --- a/nexus/db-model/src/omicron_zone_config.rs +++ b/nexus/db-model/src/omicron_zone_config.rs @@ -21,11 +21,12 @@ use nexus_types::inventory::OmicronZoneType; use omicron_common::api::internal::shared::{ NetworkInterface, NetworkInterfaceKind, }; +use omicron_uuid_kinds::SledUuid; use uuid::Uuid; #[derive(Debug)] pub(crate) struct OmicronZone { - pub(crate) sled_id: Uuid, + pub(crate) sled_id: SledUuid, pub(crate) id: Uuid, pub(crate) underlay_address: ipv6::Ipv6Addr, pub(crate) zone_type: ZoneType, @@ -49,7 +50,7 @@ pub(crate) struct OmicronZone { impl OmicronZone { pub(crate) fn new( - sled_id: Uuid, + sled_id: SledUuid, zone: &nexus_types::inventory::OmicronZoneConfig, ) -> anyhow::Result { let id = zone.id; diff --git a/nexus/db-queries/src/db/datastore/deployment.rs b/nexus/db-queries/src/db/datastore/deployment.rs index d0cdc0fc63..ed1131550f 100644 --- a/nexus/db-queries/src/db/datastore/deployment.rs +++ b/nexus/db-queries/src/db/datastore/deployment.rs @@ -49,6 +49,8 @@ use omicron_common::api::external::ListResultVec; use omicron_common::api::external::LookupType; use omicron_common::api::external::ResourceType; use omicron_common::bail_unless; +use omicron_uuid_kinds::GenericUuid; +use omicron_uuid_kinds::SledUuid; use std::collections::BTreeMap; use uuid::Uuid; @@ -108,7 +110,11 @@ impl DataStore { .blueprint_zones .iter() .map(|(sled_id, zones_config)| { - BpSledOmicronZones::new(blueprint_id, *sled_id, zones_config) + BpSledOmicronZones::new( + blueprint_id, + SledUuid::from_untyped_uuid(*sled_id), + zones_config, + ) }) .collect::>(); let omicron_zones = blueprint @@ -116,8 +122,12 @@ impl DataStore { .iter() .flat_map(|(sled_id, zones_config)| { zones_config.zones.iter().map(move |zone| { - BpOmicronZone::new(blueprint_id, *sled_id, zone) - .map_err(|e| Error::internal_error(&format!("{:#}", e))) + BpOmicronZone::new( + blueprint_id, + SledUuid::from_untyped_uuid(*sled_id), + zone, + ) + .map_err(|e| Error::internal_error(&format!("{:#}", e))) }) }) .collect::, Error>>()?; @@ -270,7 +280,7 @@ impl DataStore { for s in batch { let old = blueprint_zones.insert( - s.sled_id, + s.sled_id.into_untyped_uuid(), BlueprintZonesConfig { generation: *s.generation, zones: Vec::new(), @@ -369,7 +379,7 @@ impl DataStore { }) .transpose()?; let sled_zones = blueprint_zones - .get_mut(&z.sled_id) + .get_mut(z.sled_id.as_untyped_uuid()) .ok_or_else(|| { // This error means that we found a row in // bp_omicron_zone with no associated record in @@ -1091,7 +1101,6 @@ mod tests { use omicron_common::address::Ipv6Subnet; use omicron_common::api::external::Generation; use omicron_test_utils::dev; - use omicron_uuid_kinds::GenericUuid; use omicron_uuid_kinds::SledUuid; use omicron_uuid_kinds::ZpoolUuid; use pretty_assertions::assert_eq; @@ -1201,11 +1210,9 @@ mod tests { Generation::new(), ); for (sled_id, agent) in &collection.sled_agents { - // TODO-cleanup use `TypedUuid` everywhere - let sled_id = SledUuid::from_untyped_uuid(*sled_id); builder .add_sled( - sled_id, + *sled_id, fake_sled_details(Some(*agent.sled_agent_address.ip())), ) .expect("failed to add sled to representative"); diff --git a/nexus/db-queries/src/db/datastore/inventory.rs b/nexus/db-queries/src/db/datastore/inventory.rs index 769da52d2d..02832c5528 100644 --- a/nexus/db-queries/src/db/datastore/inventory.rs +++ b/nexus/db-queries/src/db/datastore/inventory.rs @@ -65,6 +65,7 @@ use omicron_common::api::external::ResourceType; use omicron_common::bail_unless; use omicron_uuid_kinds::CollectionUuid; use omicron_uuid_kinds::GenericUuid; +use omicron_uuid_kinds::SledUuid; use std::collections::BTreeMap; use std::collections::BTreeSet; use std::num::NonZeroU32; @@ -735,8 +736,7 @@ impl DataStore { .source .clone() .into_sql::(), - sled_agent - .sled_id + (sled_agent.sled_id.into_untyped_uuid()) .into_sql::(), baseboard_dsl::id.nullable(), nexus_db_model::ipv6::Ipv6Addr::from( @@ -1527,7 +1527,10 @@ impl DataStore { paginator = p.found_batch(&batch, &|row| (row.sled_id, row.slot)); for disk in batch { - disks.entry(disk.sled_id).or_default().push(disk.into()); + disks + .entry(disk.sled_id.into_untyped_uuid()) + .or_default() + .push(disk.into()); } } disks @@ -1555,7 +1558,10 @@ impl DataStore { })?; paginator = p.found_batch(&batch, &|row| (row.sled_id, row.id)); for zpool in batch { - zpools.entry(zpool.sled_id).or_default().push(zpool.into()); + zpools + .entry(zpool.sled_id.into_untyped_uuid()) + .or_default() + .push(zpool.into()); } } zpools @@ -1628,57 +1634,54 @@ impl DataStore { }) }) .collect::, _>>()?; - let sled_agents: BTreeMap<_, _> = - sled_agent_rows - .into_iter() - .map(|s: InvSledAgent| { - let sled_id = s.sled_id; - let baseboard_id = s - .hw_baseboard_id - .map(|id| { - baseboards_by_id.get(&id).cloned().ok_or_else( - || { - Error::internal_error( + let sled_agents: BTreeMap<_, _> = sled_agent_rows + .into_iter() + .map(|s: InvSledAgent| { + let sled_id = SledUuid::from(s.sled_id); + let baseboard_id = s + .hw_baseboard_id + .map(|id| { + baseboards_by_id.get(&id).cloned().ok_or_else(|| { + Error::internal_error( "missing baseboard that we should have fetched", ) - }, - ) }) - .transpose()?; - let sled_agent = nexus_types::inventory::SledAgent { - time_collected: s.time_collected, - source: s.source, - sled_id, - baseboard_id, - sled_agent_address: std::net::SocketAddrV6::new( - std::net::Ipv6Addr::from(s.sled_agent_ip), - u16::from(s.sled_agent_port), - 0, - 0, - ), - sled_role: nexus_types::inventory::SledRole::from( - s.sled_role, - ), - usable_hardware_threads: u32::from( - s.usable_hardware_threads, - ), - usable_physical_ram: s.usable_physical_ram.into(), - reservoir_size: s.reservoir_size.into(), - disks: physical_disks - .get(&sled_id) - .map(|disks| disks.to_vec()) - .unwrap_or_default(), - zpools: zpools - .get(&sled_id) - .map(|zpools| zpools.to_vec()) - .unwrap_or_default(), - }; - Ok((sled_id, sled_agent)) - }) - .collect::, - Error, - >>()?; + }) + .transpose()?; + let sled_agent = nexus_types::inventory::SledAgent { + time_collected: s.time_collected, + source: s.source, + sled_id, + baseboard_id, + sled_agent_address: std::net::SocketAddrV6::new( + std::net::Ipv6Addr::from(s.sled_agent_ip), + u16::from(s.sled_agent_port), + 0, + 0, + ), + sled_role: nexus_types::inventory::SledRole::from( + s.sled_role, + ), + usable_hardware_threads: u32::from( + s.usable_hardware_threads, + ), + usable_physical_ram: s.usable_physical_ram.into(), + reservoir_size: s.reservoir_size.into(), + disks: physical_disks + .get(sled_id.as_untyped_uuid()) + .map(|disks| disks.to_vec()) + .unwrap_or_default(), + zpools: zpools + .get(sled_id.as_untyped_uuid()) + .map(|zpools| zpools.to_vec()) + .unwrap_or_default(), + }; + Ok((sled_id, sled_agent)) + }) + .collect::, + Error, + >>()?; // Fetch records of cabooses found. let inv_caboose_rows = { @@ -1897,7 +1900,7 @@ impl DataStore { // number. We'll assemble these directly into the data structure we're // trying to build, which maps sled ids to objects describing the zones // found on each sled. - let mut omicron_zones: BTreeMap<_, _> = { + let mut omicron_zones: BTreeMap = { use db::schema::inv_sled_omicron_zones::dsl; let mut zones = BTreeMap::new(); @@ -1919,7 +1922,7 @@ impl DataStore { paginator = p.found_batch(&batch, &|row| row.sled_id); zones.extend(batch.into_iter().map(|sled_zones_config| { ( - sled_zones_config.sled_id, + sled_zones_config.sled_id.into(), sled_zones_config.into_uninit_zones_found(), ) })) @@ -2008,16 +2011,17 @@ impl DataStore { }) }) .transpose()?; - let map = omicron_zones.get_mut(&z.sled_id).ok_or_else(|| { - // This error means that we found a row in inv_omicron_zone with - // no associated record in inv_sled_omicron_zones. This should - // be impossible and reflects either a bug or database - // corruption. - Error::internal_error(&format!( - "zone {:?}: unknown sled: {:?}", - z.id, z.sled_id - )) - })?; + let map = + omicron_zones.get_mut(&z.sled_id.into()).ok_or_else(|| { + // This error means that we found a row in inv_omicron_zone + // with no associated record in inv_sled_omicron_zones. + // This should be impossible and reflects either a bug or + // database corruption. + Error::internal_error(&format!( + "zone {:?}: unknown sled: {:?}", + z.id, z.sled_id + )) + })?; let zone_id = z.id; let zone = z .into_omicron_zone_config(nic_row) diff --git a/nexus/db-queries/src/db/datastore/mod.rs b/nexus/db-queries/src/db/datastore/mod.rs index 49a156f81d..a8512d2362 100644 --- a/nexus/db-queries/src/db/datastore/mod.rs +++ b/nexus/db-queries/src/db/datastore/mod.rs @@ -393,8 +393,8 @@ mod test { use futures::stream; use futures::StreamExt; use nexus_config::RegionAllocationStrategy; - use nexus_db_model::Generation; use nexus_db_model::IpAttachState; + use nexus_db_model::{to_db_typed_uuid, Generation}; use nexus_test_utils::db::test_setup_database; use nexus_types::external_api::params; use omicron_common::api::external::DataPageParams; @@ -403,6 +403,8 @@ mod test { }; use omicron_test_utils::dev; use omicron_uuid_kinds::CollectionUuid; + use omicron_uuid_kinds::GenericUuid; + use omicron_uuid_kinds::SledUuid; use std::collections::HashMap; use std::collections::HashSet; use std::net::{IpAddr, Ipv4Addr, Ipv6Addr, SocketAddrV6}; @@ -605,7 +607,7 @@ mod test { } // Creates a test sled, returns its UUID. - async fn create_test_sled(datastore: &DataStore) -> Uuid { + async fn create_test_sled(datastore: &DataStore) -> SledUuid { let bogus_addr = SocketAddrV6::new( Ipv6Addr::new(0xfd00, 0, 0, 0, 0, 0, 0, 1), 8080, @@ -613,10 +615,10 @@ mod test { 0, ); let rack_id = Uuid::new_v4(); - let sled_id = Uuid::new_v4(); + let sled_id = SledUuid::new_v4(); let sled_update = SledUpdate::new( - sled_id, + sled_id.into_untyped_uuid(), bogus_addr, sled_baseboard_for_test(), sled_system_hardware_for_test(), @@ -641,7 +643,7 @@ mod test { async fn create_test_physical_disk( datastore: &DataStore, opctx: &OpContext, - sled_id: Uuid, + sled_id: SledUuid, kind: PhysicalDiskKind, serial: String, ) -> Uuid { @@ -651,7 +653,7 @@ mod test { serial, TEST_MODEL.into(), kind, - sled_id, + sled_id.into_untyped_uuid(), ); datastore .physical_disk_upsert(opctx, physical_disk.clone()) @@ -664,7 +666,7 @@ mod test { async fn create_test_zpool( datastore: &DataStore, opctx: &OpContext, - sled_id: Uuid, + sled_id: SledUuid, physical_disk_id: Uuid, ) -> Uuid { let zpool_id = create_test_zpool_not_in_inventory( @@ -686,11 +688,12 @@ mod test { async fn create_test_zpool_not_in_inventory( datastore: &DataStore, opctx: &OpContext, - sled_id: Uuid, + sled_id: SledUuid, physical_disk_id: Uuid, ) -> Uuid { let zpool_id = Uuid::new_v4(); - let zpool = Zpool::new(zpool_id, sled_id, physical_disk_id); + let zpool = + Zpool::new(zpool_id, sled_id.into_untyped_uuid(), physical_disk_id); datastore.zpool_upsert(opctx, zpool).await.unwrap(); zpool_id } @@ -700,7 +703,7 @@ mod test { async fn add_test_zpool_to_inventory( datastore: &DataStore, zpool_id: Uuid, - sled_id: Uuid, + sled_id: SledUuid, ) { use db::schema::inv_zpool::dsl; @@ -710,7 +713,7 @@ mod test { inv_collection_id: inv_collection_id.into(), time_collected, id: zpool_id, - sled_id, + sled_id: to_db_typed_uuid(sled_id), total_size: test_zpool_size().into(), }; diesel::insert_into(dsl::inv_zpool) @@ -748,12 +751,12 @@ mod test { ineligible: SledToDatasetMap, // A map from eligible dataset IDs to their corresponding sled IDs. - eligible_dataset_ids: HashMap, + eligible_dataset_ids: HashMap, ineligible_dataset_ids: HashMap, } // Map of sled IDs to dataset IDs. - type SledToDatasetMap = HashMap>; + type SledToDatasetMap = HashMap>; impl TestDatasets { async fn create( @@ -823,20 +826,20 @@ mod test { number_of_sleds: usize, ) -> SledToDatasetMap { // Create sleds... - let sled_ids: Vec = stream::iter(0..number_of_sleds) + let sled_ids: Vec = stream::iter(0..number_of_sleds) .then(|_| create_test_sled(&datastore)) .collect() .await; struct PhysicalDisk { - sled_id: Uuid, + sled_id: SledUuid, disk_id: Uuid, } // create 9 disks on each sled let physical_disks: Vec = stream::iter(sled_ids) .map(|sled_id| { - let sled_id_iter: Vec = + let sled_id_iter: Vec = (0..9).map(|_| sled_id).collect(); stream::iter(sled_id_iter).enumerate().then( |(i, sled_id)| { @@ -860,7 +863,7 @@ mod test { #[derive(Copy, Clone)] struct Zpool { - sled_id: Uuid, + sled_id: SledUuid, pool_id: Uuid, } @@ -1750,7 +1753,7 @@ mod test { let (opctx, datastore) = datastore_test(&logctx, &db).await; // Create a sled on which the service should exist. - let sled_id = create_test_sled(&datastore).await; + let sled_id = create_test_sled(&datastore).await.into_untyped_uuid(); // Create a few new service to exist on this sled. let service1_id = diff --git a/nexus/db-queries/src/db/datastore/sled.rs b/nexus/db-queries/src/db/datastore/sled.rs index 667516fe23..7fb0ada639 100644 --- a/nexus/db-queries/src/db/datastore/sled.rs +++ b/nexus/db-queries/src/db/datastore/sled.rs @@ -737,6 +737,8 @@ mod test { use nexus_types::identity::Asset; use omicron_common::api::external; use omicron_test_utils::dev; + use omicron_uuid_kinds::GenericUuid; + use omicron_uuid_kinds::SledUuid; use predicates::{prelude::*, BoxPredicate}; use std::net::{Ipv6Addr, SocketAddrV6}; @@ -891,7 +893,7 @@ mod test { sled_set_state( &opctx, &datastore, - observed_sled.id(), + SledUuid::from_untyped_uuid(observed_sled.id()), SledState::Decommissioned, ValidateTransition::No, Expected::Ok(SledState::Active), @@ -963,10 +965,16 @@ mod test { datastore.sled_upsert(test_new_sled_update()).await.unwrap(); let ineligible_sleds = IneligibleSleds { - non_provisionable: non_provisionable_sled.id(), - expunged: expunged_sled.id(), - decommissioned: decommissioned_sled.id(), - illegal_decommissioned: illegal_decommissioned_sled.id(), + non_provisionable: SledUuid::from_untyped_uuid( + non_provisionable_sled.id(), + ), + expunged: SledUuid::from_untyped_uuid(expunged_sled.id()), + decommissioned: SledUuid::from_untyped_uuid( + decommissioned_sled.id(), + ), + illegal_decommissioned: SledUuid::from_untyped_uuid( + illegal_decommissioned_sled.id(), + ), }; ineligible_sleds.setup(&opctx, &datastore).await.unwrap(); @@ -1104,7 +1112,7 @@ mod test { sled_set_policy( &opctx, &datastore, - sled_id, + SledUuid::from_untyped_uuid(sled_id), SledPolicy::Expunged, ValidateTransition::Yes, Expected::Ok(SledPolicy::provisionable()), @@ -1229,7 +1237,7 @@ mod test { test_sled_state_transitions_once( &opctx, &datastore, - sled_id, + SledUuid::from_untyped_uuid(sled_id), policy, state, after, @@ -1252,7 +1260,7 @@ mod test { async fn test_sled_state_transitions_once( opctx: &OpContext, datastore: &DataStore, - sled_id: Uuid, + sled_id: SledUuid, before_policy: SledPolicy, before_state: SledState, after: SledTransition, diff --git a/nexus/db-queries/src/db/datastore/test_utils.rs b/nexus/db-queries/src/db/datastore/test_utils.rs index 13b0a017e7..4678e07f47 100644 --- a/nexus/db-queries/src/db/datastore/test_utils.rs +++ b/nexus/db-queries/src/db/datastore/test_utils.rs @@ -19,6 +19,8 @@ use nexus_db_model::SledState; use nexus_types::external_api::views::SledPolicy; use nexus_types::external_api::views::SledProvisionPolicy; use omicron_test_utils::dev::db::CockroachInstance; +use omicron_uuid_kinds::GenericUuid; +use omicron_uuid_kinds::SledUuid; use std::sync::Arc; use strum::EnumCount; use uuid::Uuid; @@ -49,16 +51,16 @@ pub(super) enum IneligibleSledKind { /// This is less error-prone than several places duplicating this logic. #[derive(Debug)] pub(super) struct IneligibleSleds { - pub(super) non_provisionable: Uuid, - pub(super) expunged: Uuid, - pub(super) decommissioned: Uuid, - pub(super) illegal_decommissioned: Uuid, + pub(super) non_provisionable: SledUuid, + pub(super) expunged: SledUuid, + pub(super) decommissioned: SledUuid, + pub(super) illegal_decommissioned: SledUuid, } impl IneligibleSleds { pub(super) fn iter( &self, - ) -> impl Iterator { + ) -> impl Iterator { [ (IneligibleSledKind::NonProvisionable, self.non_provisionable), (IneligibleSledKind::Expunged, self.expunged), @@ -207,7 +209,7 @@ impl IneligibleSleds { async fn undo_single( opctx: &OpContext, datastore: &DataStore, - sled_id: Uuid, + sled_id: SledUuid, kind: IneligibleSledKind, ) -> Result<()> { sled_set_policy( @@ -257,13 +259,13 @@ impl IneligibleSleds { pub(super) async fn sled_set_policy( opctx: &OpContext, datastore: &DataStore, - sled_id: Uuid, + sled_id: SledUuid, new_policy: SledPolicy, check: ValidateTransition, expected_old_policy: Expected, ) -> Result<()> { let (authz_sled, _) = LookupPath::new(&opctx, &datastore) - .sled_id(sled_id) + .sled_id(sled_id.into_untyped_uuid()) .fetch_for(authz::Action::Modify) .await .unwrap(); @@ -305,13 +307,13 @@ pub(super) async fn sled_set_policy( pub(super) async fn sled_set_state( opctx: &OpContext, datastore: &DataStore, - sled_id: Uuid, + sled_id: SledUuid, new_state: SledState, check: ValidateTransition, expected_old_state: Expected, ) -> Result<()> { let (authz_sled, _) = LookupPath::new(&opctx, &datastore) - .sled_id(sled_id) + .sled_id(sled_id.into_untyped_uuid()) .fetch_for(authz::Action::Modify) .await .unwrap(); diff --git a/nexus/db-queries/src/db/datastore/vpc.rs b/nexus/db-queries/src/db/datastore/vpc.rs index c290439d76..99f066ee42 100644 --- a/nexus/db-queries/src/db/datastore/vpc.rs +++ b/nexus/db-queries/src/db/datastore/vpc.rs @@ -1278,6 +1278,8 @@ mod tests { use omicron_common::api::internal::shared::NetworkInterface; use omicron_common::api::internal::shared::NetworkInterfaceKind; use omicron_test_utils::dev; + use omicron_uuid_kinds::GenericUuid; + use omicron_uuid_kinds::SledUuid; use slog::info; use std::collections::BTreeMap; use std::net::IpAddr; @@ -1503,7 +1505,7 @@ mod tests { #[derive(Debug)] struct Harness { rack_id: Uuid, - sled_ids: Vec, + sled_ids: Vec, nexuses: Vec, } @@ -1518,7 +1520,7 @@ mod tests { impl Harness { fn new(num_sleds: usize) -> Self { let mut sled_ids = - (0..num_sleds).map(|_| Uuid::new_v4()).collect::>(); + (0..num_sleds).map(|_| SledUuid::new_v4()).collect::>(); sled_ids.sort(); let mut nexus_ips = NEXUS_OPTE_IPV4_SUBNET @@ -1540,7 +1542,7 @@ mod tests { fn db_sleds(&self) -> impl Iterator + '_ { self.sled_ids.iter().copied().map(|sled_id| { SledUpdate::new( - sled_id, + sled_id.into_untyped_uuid(), "[::1]:0".parse().unwrap(), sled_baseboard_for_test(), sled_system_hardware_for_test(), @@ -1558,7 +1560,7 @@ mod tests { self.sled_ids.iter().zip(&self.nexuses).map(|(sled_id, nexus)| { let service = db::model::Service::new( nexus.id, - *sled_id, + sled_id.into_untyped_uuid(), Some(nexus.id), "[::1]:0".parse().unwrap(), db::model::ServiceKind::Nexus, @@ -1637,7 +1639,7 @@ mod tests { .await .expect("failed to resolve to sleds") .into_iter() - .map(|sled| sled.id()) + .map(|sled| SledUuid::from_untyped_uuid(sled.id())) .collect::>(); service_sled_ids.sort(); service_sled_ids @@ -1825,7 +1827,7 @@ mod tests { .expect("failed to set blueprint target"); assert_eq!( &[harness.sled_ids[0], harness.sled_ids[1], harness.sled_ids[3]] - as &[Uuid], + as &[SledUuid], fetch_service_sled_ids().await ); diff --git a/nexus/inventory/src/builder.rs b/nexus/inventory/src/builder.rs index 3ca2f5a2f1..bfa330669f 100644 --- a/nexus/inventory/src/builder.rs +++ b/nexus/inventory/src/builder.rs @@ -28,13 +28,14 @@ use nexus_types::inventory::ServiceProcessor; use nexus_types::inventory::SledAgent; use nexus_types::inventory::Zpool; use omicron_uuid_kinds::CollectionKind; +use omicron_uuid_kinds::GenericUuid; +use omicron_uuid_kinds::SledUuid; use std::collections::BTreeMap; use std::collections::BTreeSet; use std::hash::Hash; use std::sync::Arc; use thiserror::Error; use typed_rng::TypedUuidRng; -use uuid::Uuid; /// Describes an operational error encountered during the collection process /// @@ -87,8 +88,8 @@ pub struct CollectionBuilder { BTreeMap, CabooseFound>>, rot_pages_found: BTreeMap, RotPageFound>>, - sleds: BTreeMap, - omicron_zones: BTreeMap, + sleds: BTreeMap, + omicron_zones: BTreeMap, // We just generate one UUID for each collection. id_rng: TypedUuidRng, } @@ -430,7 +431,7 @@ impl CollectionBuilder { source: &str, inventory: sled_agent_client::types::Inventory, ) -> Result<(), anyhow::Error> { - let sled_id = inventory.sled_id; + let sled_id = SledUuid::from_untyped_uuid(inventory.sled_id); // Normalize the baseboard id, if any. use sled_agent_client::types::Baseboard; @@ -447,8 +448,7 @@ impl CollectionBuilder { } Baseboard::Unknown => { self.found_error(InventoryError::from(anyhow!( - "sled {:?}: reported unknown baseboard", - sled_id + "sled {sled_id}: reported unknown baseboard", ))); None } @@ -462,8 +462,7 @@ impl CollectionBuilder { Ok(addr) => addr, Err(error) => { self.found_error(InventoryError::from(anyhow!( - "sled {:?}: bad sled agent address: {:?}: {:#}", - sled_id, + "sled {sled_id}: bad sled agent address: {:?}: {:#}", inventory.sled_agent_address, error, ))); @@ -491,11 +490,8 @@ impl CollectionBuilder { if let Some(previous) = self.sleds.get(&sled_id) { Err(anyhow!( - "sled {:?}: reported sled multiple times \ - (previously {:?}, now {:?})", - sled_id, - previous, - sled, + "sled {sled_id}: reported sled multiple times \ + (previously {previous:?}, now {sled:?})", )) } else { self.sleds.insert(sled_id, sled); @@ -507,13 +503,12 @@ impl CollectionBuilder { pub fn found_sled_omicron_zones( &mut self, source: &str, - sled_id: Uuid, + sled_id: SledUuid, zones: sled_agent_client::types::OmicronZonesConfig, ) -> Result<(), anyhow::Error> { if let Some(previous) = self.omicron_zones.get(&sled_id) { Err(anyhow!( - "sled {:?} omicron zones: reported previously: {:?}", - sled_id, + "sled {sled_id} omicron zones: reported previously: {:?}", previous )) } else { diff --git a/nexus/inventory/src/collector.rs b/nexus/inventory/src/collector.rs index 7dbffc396c..48761479b0 100644 --- a/nexus/inventory/src/collector.rs +++ b/nexus/inventory/src/collector.rs @@ -15,6 +15,8 @@ use nexus_types::inventory::CabooseWhich; use nexus_types::inventory::Collection; use nexus_types::inventory::RotPage; use nexus_types::inventory::RotPageWhich; +use omicron_uuid_kinds::GenericUuid; +use omicron_uuid_kinds::SledUuid; use slog::o; use slog::{debug, error}; use std::sync::Arc; @@ -347,7 +349,7 @@ impl<'a> Collector<'a> { } }; - let sled_id = inventory.sled_id; + let sled_id = SledUuid::from_untyped_uuid(inventory.sled_id); self.in_progress.found_sled_inventory(&sled_agent_url, inventory)?; let maybe_config = diff --git a/nexus/inventory/src/examples.rs b/nexus/inventory/src/examples.rs index 8af81d957d..1a0c70f456 100644 --- a/nexus/inventory/src/examples.rs +++ b/nexus/inventory/src/examples.rs @@ -17,9 +17,10 @@ use nexus_types::inventory::OmicronZonesConfig; use nexus_types::inventory::RotPage; use nexus_types::inventory::RotPageWhich; use omicron_common::api::external::ByteCount; +use omicron_uuid_kinds::GenericUuid; +use omicron_uuid_kinds::SledUuid; use std::sync::Arc; use strum::IntoEnumIterator; -use uuid::Uuid; /// Returns an example Collection used for testing /// @@ -447,7 +448,19 @@ pub struct Representative { pub sleds: [Arc; 4], pub switch: Arc, pub psc: Arc, - pub sled_agents: [Uuid; 4], + pub sled_agents: [SledUuid; 4], +} + +impl Representative { + pub fn new( + builder: CollectionBuilder, + sleds: [Arc; 4], + switch: Arc, + psc: Arc, + sled_agents: [SledUuid; 4], + ) -> Self { + Self { builder, sleds, switch, psc, sled_agents } + } } /// Returns an SP state that can be used to populate a collection for testing @@ -487,7 +500,7 @@ pub fn rot_page(unique: &str) -> RotPage { } pub fn sled_agent( - sled_id: Uuid, + sled_id: SledUuid, baseboard: sled_agent_client::types::Baseboard, sled_role: sled_agent_client::types::SledRole, disks: Vec, @@ -498,7 +511,7 @@ pub fn sled_agent( reservoir_size: ByteCount::from(1024), sled_role, sled_agent_address: "[::1]:56792".parse().unwrap(), - sled_id, + sled_id: sled_id.into_untyped_uuid(), usable_hardware_threads: 10, usable_physical_ram: ByteCount::from(1024 * 1024), disks, diff --git a/nexus/reconfigurator/execution/src/datasets.rs b/nexus/reconfigurator/execution/src/datasets.rs index d83ebfc4d6..f660e1d845 100644 --- a/nexus/reconfigurator/execution/src/datasets.rs +++ b/nexus/reconfigurator/execution/src/datasets.rs @@ -150,6 +150,7 @@ mod tests { use nexus_db_model::SledUpdate; use nexus_db_model::Zpool; use nexus_test_utils_macros::nexus_test; + use omicron_uuid_kinds::GenericUuid; use omicron_uuid_kinds::ZpoolUuid; use sled_agent_client::types::OmicronZoneDataset; use uuid::Uuid; @@ -178,7 +179,7 @@ mod tests { let rack_id = Uuid::new_v4(); for (&sled_id, config) in &collection.omicron_zones { let sled = SledUpdate::new( - sled_id, + sled_id.into_untyped_uuid(), "[::1]:0".parse().unwrap(), SledBaseboard { serial_number: format!("test-{sled_id}"), @@ -205,7 +206,7 @@ mod tests { dataset.pool_name.parse().expect("invalid zpool name"); let zpool = Zpool::new( zpool_name.id().into_untyped_uuid(), - sled_id, + sled_id.into_untyped_uuid(), Uuid::new_v4(), // physical_disk_id ); datastore @@ -274,7 +275,7 @@ mod tests { for &sled_id in collection.omicron_zones.keys().take(1) { let zpool = Zpool::new( new_zpool_id.into_untyped_uuid(), - sled_id, + sled_id.into_untyped_uuid(), Uuid::new_v4(), // physical_disk_id ); datastore diff --git a/nexus/reconfigurator/execution/src/dns.rs b/nexus/reconfigurator/execution/src/dns.rs index ebe32ff10b..5a2321781d 100644 --- a/nexus/reconfigurator/execution/src/dns.rs +++ b/nexus/reconfigurator/execution/src/dns.rs @@ -28,19 +28,20 @@ use omicron_common::api::external::Generation; use omicron_common::api::external::InternalContext; use omicron_common::api::external::Name; use omicron_common::bail_unless; +use omicron_uuid_kinds::GenericUuid; +use omicron_uuid_kinds::SledUuid; use slog::{debug, info, o}; use std::collections::BTreeMap; use std::collections::HashMap; use std::net::IpAddr; use std::net::SocketAddrV6; -use uuid::Uuid; pub(crate) async fn deploy_dns( opctx: &OpContext, datastore: &DataStore, creator: String, blueprint: &Blueprint, - sleds_by_id: &BTreeMap, + sleds_by_id: &BTreeMap, overrides: &Overridables, ) -> Result<(), Error> { // First, fetch the current DNS configs. @@ -255,7 +256,7 @@ pub(crate) async fn deploy_dns_one( /// Returns the expected contents of internal DNS based on the given blueprint pub fn blueprint_internal_dns_config( blueprint: &Blueprint, - sleds_by_id: &BTreeMap, + sleds_by_id: &BTreeMap, overrides: &Overridables, ) -> Result { // The DNS names configured here should match what RSS configures for the @@ -353,7 +354,7 @@ pub fn blueprint_internal_dns_config( // unwrap(): see above. dns_builder .host_zone_switch( - scrimlet.id, + scrimlet.id.into_untyped_uuid(), switch_zone_ip, overrides.dendrite_port(scrimlet.id), overrides.mgs_port(scrimlet.id), @@ -529,9 +530,6 @@ mod test { use omicron_common::api::external::Generation; use omicron_common::api::external::IdentityMetadataCreateParams; use omicron_test_utils::dev::test_setup_log; - use omicron_uuid_kinds::GenericUuid; - use omicron_uuid_kinds::SledUuid; - use std::collections::BTreeMap; use std::collections::BTreeSet; use std::collections::HashMap; use std::net::IpAddr; @@ -625,10 +623,7 @@ mod test { &collection, initial_dns_generation, Generation::new(), - policy_sleds.keys().map(|sled_id| { - // TODO-cleanup use `TypedUuid` everywhere - SledUuid::from_untyped_uuid(*sled_id) - }), + policy_sleds.keys().copied(), "test-suite", ) .expect("failed to build initial blueprint"); diff --git a/nexus/reconfigurator/execution/src/lib.rs b/nexus/reconfigurator/execution/src/lib.rs index f08de22c22..dc7c4593b3 100644 --- a/nexus/reconfigurator/execution/src/lib.rs +++ b/nexus/reconfigurator/execution/src/lib.rs @@ -14,12 +14,13 @@ use nexus_types::deployment::BlueprintZoneFilter; use nexus_types::identity::Asset; use omicron_common::address::Ipv6Subnet; use omicron_common::address::SLED_PREFIX; +use omicron_uuid_kinds::GenericUuid; +use omicron_uuid_kinds::SledUuid; use overridables::Overridables; use slog::info; use slog_error_chain::InlineErrorChain; use std::collections::BTreeMap; use std::net::SocketAddrV6; -use uuid::Uuid; mod datasets; mod dns; @@ -33,14 +34,14 @@ pub use dns::blueprint_nexus_external_ips; pub use dns::silo_dns_name; pub struct Sled { - id: Uuid, + id: SledUuid, sled_agent_address: SocketAddrV6, is_scrimlet: bool, } impl Sled { pub fn new( - id: Uuid, + id: SledUuid, sled_agent_address: SocketAddrV6, is_scrimlet: bool, ) -> Sled { @@ -55,7 +56,7 @@ impl Sled { impl From for Sled { fn from(value: nexus_db_model::Sled) -> Self { Sled { - id: value.id(), + id: SledUuid::from_untyped_uuid(value.id()), sled_agent_address: value.address(), is_scrimlet: value.is_scrimlet(), } @@ -117,13 +118,15 @@ where .await .map_err(|err| vec![err])?; - let sleds_by_id: BTreeMap = datastore + let sleds_by_id: BTreeMap = datastore .sled_list_all_batched(&opctx) .await .context("listing all sleds") .map_err(|e| vec![e])? .into_iter() - .map(|db_sled| (db_sled.id(), Sled::from(db_sled))) + .map(|db_sled| { + (SledUuid::from_untyped_uuid(db_sled.id()), Sled::from(db_sled)) + }) .collect(); omicron_zones::deploy_zones( &opctx, diff --git a/nexus/reconfigurator/execution/src/omicron_zones.rs b/nexus/reconfigurator/execution/src/omicron_zones.rs index a93c9391ca..bcbf876e81 100644 --- a/nexus/reconfigurator/execution/src/omicron_zones.rs +++ b/nexus/reconfigurator/execution/src/omicron_zones.rs @@ -12,6 +12,8 @@ use futures::StreamExt; use nexus_db_queries::context::OpContext; use nexus_types::deployment::BlueprintZoneFilter; use nexus_types::deployment::BlueprintZonesConfig; +use omicron_uuid_kinds::GenericUuid; +use omicron_uuid_kinds::SledUuid; use slog::info; use slog::warn; use std::collections::BTreeMap; @@ -21,12 +23,14 @@ use uuid::Uuid; /// corresponding sleds pub(crate) async fn deploy_zones( opctx: &OpContext, - sleds_by_id: &BTreeMap, + sleds_by_id: &BTreeMap, zones: &BTreeMap, ) -> Result<(), Vec> { let errors: Vec<_> = stream::iter(zones) .filter_map(|(sled_id, config)| async move { - let db_sled = match sleds_by_id.get(sled_id) { + let db_sled = match sleds_by_id + .get(&SledUuid::from_untyped_uuid(*sled_id)) + { Some(sled) => sled, None => { let err = anyhow!("sled not found in db list: {}", sled_id); @@ -36,7 +40,7 @@ pub(crate) async fn deploy_zones( }; let client = nexus_networking::sled_client_from_address( - *sled_id, + sled_id.into_untyped_uuid(), db_sled.sled_agent_address, &opctx.log, ); @@ -94,6 +98,7 @@ mod test { OmicronZoneConfig, OmicronZoneDataset, OmicronZoneType, }; use omicron_common::api::external::Generation; + use omicron_uuid_kinds::{GenericUuid, SledUuid}; use std::collections::BTreeMap; use std::net::SocketAddr; use uuid::Uuid; @@ -102,7 +107,7 @@ mod test { nexus_test_utils::ControlPlaneTestContext; fn create_blueprint( - blueprint_zones: BTreeMap, + blueprint_zones: BTreeMap, ) -> (BlueprintTarget, Blueprint) { let id = Uuid::new_v4(); ( @@ -113,7 +118,10 @@ mod test { }, Blueprint { id, - blueprint_zones, + blueprint_zones: blueprint_zones + .into_iter() + .map(|(typed_id, z)| (typed_id.into_untyped_uuid(), z)) + .collect(), parent_blueprint_id: None, internal_dns_version: Generation::new(), external_dns_version: Generation::new(), @@ -137,9 +145,9 @@ mod test { // sleds to CRDB. let mut s1 = httptest::Server::run(); let mut s2 = httptest::Server::run(); - let sled_id1 = Uuid::new_v4(); - let sled_id2 = Uuid::new_v4(); - let sleds_by_id: BTreeMap = + let sled_id1 = SledUuid::new_v4(); + let sled_id2 = SledUuid::new_v4(); + let sleds_by_id: BTreeMap = [(sled_id1, &s1), (sled_id2, &s2)] .into_iter() .map(|(sled_id, server)| { diff --git a/nexus/reconfigurator/execution/src/overridables.rs b/nexus/reconfigurator/execution/src/overridables.rs index 5c4ce7dc6f..f59e3228f4 100644 --- a/nexus/reconfigurator/execution/src/overridables.rs +++ b/nexus/reconfigurator/execution/src/overridables.rs @@ -8,9 +8,9 @@ use omicron_common::address::DENDRITE_PORT; use omicron_common::address::MGD_PORT; use omicron_common::address::MGS_PORT; use omicron_common::address::SLED_PREFIX; +use omicron_uuid_kinds::SledUuid; use std::collections::BTreeMap; use std::net::Ipv6Addr; -use uuid::Uuid; /// Override values used during blueprint execution /// @@ -23,59 +23,59 @@ use uuid::Uuid; #[derive(Debug, Default)] pub struct Overridables { /// map: sled id -> TCP port on which that sled's Dendrite is listening - pub dendrite_ports: BTreeMap, + pub dendrite_ports: BTreeMap, /// map: sled id -> TCP port on which that sled's MGS is listening - pub mgs_ports: BTreeMap, + pub mgs_ports: BTreeMap, /// map: sled id -> TCP port on which that sled's MGD is listening - pub mgd_ports: BTreeMap, + pub mgd_ports: BTreeMap, /// map: sled id -> IP address of the sled's switch zone - pub switch_zone_ips: BTreeMap, + pub switch_zone_ips: BTreeMap, } impl Overridables { /// Specify the TCP port on which this sled's Dendrite is listening #[cfg(test)] - fn override_dendrite_port(&mut self, sled_id: Uuid, port: u16) { + fn override_dendrite_port(&mut self, sled_id: SledUuid, port: u16) { self.dendrite_ports.insert(sled_id, port); } /// Returns the TCP port on which this sled's Dendrite is listening - pub fn dendrite_port(&self, sled_id: Uuid) -> u16 { + pub fn dendrite_port(&self, sled_id: SledUuid) -> u16 { self.dendrite_ports.get(&sled_id).copied().unwrap_or(DENDRITE_PORT) } /// Specify the TCP port on which this sled's MGS is listening #[cfg(test)] - fn override_mgs_port(&mut self, sled_id: Uuid, port: u16) { + fn override_mgs_port(&mut self, sled_id: SledUuid, port: u16) { self.mgs_ports.insert(sled_id, port); } /// Returns the TCP port on which this sled's MGS is listening - pub fn mgs_port(&self, sled_id: Uuid) -> u16 { + pub fn mgs_port(&self, sled_id: SledUuid) -> u16 { self.mgs_ports.get(&sled_id).copied().unwrap_or(MGS_PORT) } /// Specify the TCP port on which this sled's MGD is listening #[cfg(test)] - fn override_mgd_port(&mut self, sled_id: Uuid, port: u16) { + fn override_mgd_port(&mut self, sled_id: SledUuid, port: u16) { self.mgd_ports.insert(sled_id, port); } /// Returns the TCP port on which this sled's MGD is listening - pub fn mgd_port(&self, sled_id: Uuid) -> u16 { + pub fn mgd_port(&self, sled_id: SledUuid) -> u16 { self.mgd_ports.get(&sled_id).copied().unwrap_or(MGD_PORT) } /// Specify the IP address of this switch zone #[cfg(test)] - fn override_switch_zone_ip(&mut self, sled_id: Uuid, addr: Ipv6Addr) { + fn override_switch_zone_ip(&mut self, sled_id: SledUuid, addr: Ipv6Addr) { self.switch_zone_ips.insert(sled_id, addr); } /// Returns the IP address of this sled's switch zone pub fn switch_zone_ip( &self, - sled_id: Uuid, + sled_id: SledUuid, sled_subnet: Ipv6Subnet, ) -> Ipv6Addr { self.switch_zone_ips diff --git a/nexus/reconfigurator/planning/src/blueprint_builder.rs b/nexus/reconfigurator/planning/src/blueprint_builder.rs index 3fcc54fd04..2d5ef8bee5 100644 --- a/nexus/reconfigurator/planning/src/blueprint_builder.rs +++ b/nexus/reconfigurator/planning/src/blueprint_builder.rs @@ -194,7 +194,7 @@ impl<'a> BlueprintBuilder<'a> { .map(|sled_id| { let zones = collection .omicron_zones - .get(sled_id.as_untyped_uuid()) + .get(&sled_id) .map(|z| &z.zones) .ok_or_else(|| { // We should not find a sled that's supposed to be @@ -1125,7 +1125,7 @@ pub mod test { .omicron_zones .keys() .next() - .map(|sled_id| SledUuid::from_untyped_uuid(*sled_id)) + .copied() .expect("no sleds present"), 1, ) @@ -1165,10 +1165,7 @@ pub mod test { break; } } - let sled_id = - selected_sled_id.expect("found no sleds with Nexus zone"); - // TODO-cleanup use `TypedUuid` everywhere - SledUuid::from_untyped_uuid(sled_id) + selected_sled_id.expect("found no sleds with Nexus zone") }; let parent = BlueprintBuilder::build_initial_from_collection_seeded( diff --git a/nexus/reconfigurator/planning/src/example.rs b/nexus/reconfigurator/planning/src/example.rs index cf1b7e79b5..2878ed16f6 100644 --- a/nexus/reconfigurator/planning/src/example.rs +++ b/nexus/reconfigurator/planning/src/example.rs @@ -60,12 +60,11 @@ impl ExampleSystem { // For each sled, have it report 0 zones in the initial inventory. // This will enable us to build a blueprint from the initial // inventory, which we can then use to build new blueprints. - for sled_id in &sled_ids { + for &sled_id in &sled_ids { inventory_builder .found_sled_omicron_zones( "fake sled agent", - // TODO-cleanup use `TypedUuid` everywhere - sled_id.into_untyped_uuid(), + sled_id, OmicronZonesConfig { generation: Generation::new(), zones: vec![], @@ -154,8 +153,7 @@ impl ExampleSystem { builder .found_sled_omicron_zones( "fake sled agent", - // TODO-cleanup use `TypedUuid` everywhere - sled_id.into_untyped_uuid(), + sled_id, zones.to_omicron_zones_config( BlueprintZoneFilter::ShouldBeRunning, ), diff --git a/nexus/reconfigurator/planning/src/planner.rs b/nexus/reconfigurator/planning/src/planner.rs index 9430d0d10d..25d49f4802 100644 --- a/nexus/reconfigurator/planning/src/planner.rs +++ b/nexus/reconfigurator/planning/src/planner.rs @@ -14,7 +14,6 @@ use nexus_types::deployment::Blueprint; use nexus_types::deployment::PlanningInput; use nexus_types::deployment::SledFilter; use nexus_types::inventory::Collection; -use omicron_uuid_kinds::GenericUuid; use omicron_uuid_kinds::SledUuid; use slog::{info, warn, Logger}; use std::collections::BTreeMap; @@ -136,7 +135,7 @@ impl<'a> Planner<'a> { let has_ntp_inventory = self .inventory .omicron_zones - .get(sled_id.as_untyped_uuid()) + .get(&sled_id) .map(|sled_zones| { sled_zones.zones.zones.iter().any(|z| z.zone_type.is_ntp()) }) @@ -459,16 +458,13 @@ mod test { assert!(collection .omicron_zones .insert( - // TODO-cleanup use `TypedUuid` everywhere - new_sled_id.into_untyped_uuid(), + new_sled_id, OmicronZonesFound { time_collected: now_db_precision(), source: String::from("test suite"), - // TODO-cleanup use `TypedUuid` everywhere - sled_id: new_sled_id.into_untyped_uuid(), + sled_id: new_sled_id, zones: blueprint4 .blueprint_zones - // TODO-cleanup use `TypedUuid` everywhere .get(new_sled_id.as_untyped_uuid()) .expect("blueprint should contain zones for new sled") .to_omicron_zones_config( @@ -563,13 +559,8 @@ mod test { let keep_sled_id = builder.sleds().keys().next().copied().expect("no sleds"); builder.sleds_mut().retain(|&k, _v| keep_sled_id == k); - // TODO-cleanup use `TypedUuid` everywhere - collection - .sled_agents - .retain(|&k, _v| *keep_sled_id.as_untyped_uuid() == k); - collection - .omicron_zones - .retain(|&k, _v| *keep_sled_id.as_untyped_uuid() == k); + collection.sled_agents.retain(|&k, _v| keep_sled_id == k); + collection.omicron_zones.retain(|&k, _v| keep_sled_id == k); assert_eq!(collection.sled_agents.len(), 1); assert_eq!(collection.omicron_zones.len(), 1); diff --git a/nexus/src/app/background/sync_service_zone_nat.rs b/nexus/src/app/background/sync_service_zone_nat.rs index e23621ed23..59be7db5f2 100644 --- a/nexus/src/app/background/sync_service_zone_nat.rs +++ b/nexus/src/app/background/sync_service_zone_nat.rs @@ -20,6 +20,7 @@ use nexus_db_queries::db::lookup::LookupPath; use nexus_db_queries::db::DataStore; use omicron_common::address::{MAX_PORT, MIN_PORT}; use omicron_common::api::external; +use omicron_uuid_kinds::GenericUuid; use serde_json::json; use sled_agent_client::types::OmicronZoneType; use std::net::{IpAddr, SocketAddr}; @@ -107,7 +108,7 @@ impl BackgroundTask for ServiceZoneNatTracker { for (sled_id, zones_found) in collection.omicron_zones { let (_, sled) = match LookupPath::new(opctx, &self.datastore) - .sled_id(sled_id) + .sled_id(sled_id.into_untyped_uuid()) .fetch() .await .context("failed to look up sled") diff --git a/nexus/types/src/deployment.rs b/nexus/types/src/deployment.rs index bae605c2e3..e74ad7f873 100644 --- a/nexus/types/src/deployment.rs +++ b/nexus/types/src/deployment.rs @@ -228,7 +228,7 @@ impl Blueprint { generation: zones_found.zones.generation, zones, }; - (SledUuid::from_untyped_uuid(*sled_id), zones) + (*sled_id, zones) }) .collect(); diff --git a/nexus/types/src/inventory.rs b/nexus/types/src/inventory.rs index 9511a20b75..1b1b42a8f5 100644 --- a/nexus/types/src/inventory.rs +++ b/nexus/types/src/inventory.rs @@ -22,6 +22,7 @@ pub use omicron_common::api::internal::shared::NetworkInterface; pub use omicron_common::api::internal::shared::NetworkInterfaceKind; pub use omicron_common::api::internal::shared::SourceNatConfig; use omicron_uuid_kinds::CollectionUuid; +use omicron_uuid_kinds::SledUuid; use omicron_uuid_kinds::ZpoolUuid; use serde::{Deserialize, Serialize}; use serde_with::serde_as; @@ -36,7 +37,6 @@ use std::collections::BTreeSet; use std::net::SocketAddrV6; use std::sync::Arc; use strum::EnumIter; -use uuid::Uuid; /// Results of collecting hardware/software inventory from various Omicron /// components @@ -111,10 +111,10 @@ pub struct Collection { BTreeMap, RotPageFound>>, /// Sled Agent information, by *sled* id - pub sled_agents: BTreeMap, + pub sled_agents: BTreeMap, /// Omicron zones found, by *sled* id - pub omicron_zones: BTreeMap, + pub omicron_zones: BTreeMap, } impl Collection { @@ -146,7 +146,7 @@ impl Collection { } /// Iterate over the sled ids of sleds identified as Scrimlets - pub fn scrimlets(&self) -> impl Iterator + '_ { + pub fn scrimlets(&self) -> impl Iterator + '_ { self.sled_agents .iter() .filter(|(_, inventory)| inventory.sled_role == SledRole::Scrimlet) @@ -396,7 +396,7 @@ impl Zpool { pub struct SledAgent { pub time_collected: DateTime, pub source: String, - pub sled_id: Uuid, + pub sled_id: SledUuid, pub baseboard_id: Option>, pub sled_agent_address: SocketAddrV6, pub sled_role: SledRole, @@ -411,6 +411,6 @@ pub struct SledAgent { pub struct OmicronZonesFound { pub time_collected: DateTime, pub source: String, - pub sled_id: Uuid, + pub sled_id: SledUuid, pub zones: OmicronZonesConfig, } From 94a15560b65d300f584df3dcd6859df8c5d94fc3 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Sat, 13 Apr 2024 21:05:11 +0000 Subject: [PATCH 135/334] chore(deps): update taiki-e/install-action digest to 99774fe (#5527) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR contains the following updates: | Package | Type | Update | Change | |---|---|---|---| | [taiki-e/install-action](https://togithub.com/taiki-e/install-action) | action | digest | [`dd9c3a3` -> `99774fe`](https://togithub.com/taiki-e/install-action/compare/dd9c3a3...99774fe) | --- ### Configuration 📅 **Schedule**: Branch creation - "after 8pm,before 6am" in timezone America/Los_Angeles, Automerge - "after 8pm,before 6am" in timezone America/Los_Angeles. 🚦 **Automerge**: Enabled. ♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. 🔕 **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box --- This PR has been generated by [Renovate Bot](https://togithub.com/renovatebot/renovate). Co-authored-by: oxide-renovate[bot] <146848827+oxide-renovate[bot]@users.noreply.github.com> --- .github/workflows/hakari.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/hakari.yml b/.github/workflows/hakari.yml index 68e892f232..3d76541d07 100644 --- a/.github/workflows/hakari.yml +++ b/.github/workflows/hakari.yml @@ -24,7 +24,7 @@ jobs: with: toolchain: stable - name: Install cargo-hakari - uses: taiki-e/install-action@dd9c3a30915700b12ff7aa4d9e2492417156fde1 # v2 + uses: taiki-e/install-action@99774fec7fd4f75144bd0134a24a992297768308 # v2 with: tool: cargo-hakari - name: Check workspace-hack Cargo.toml is up-to-date From 11021ab09a6ca67c5fe796ba6b50ad5a81106334 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Sat, 13 Apr 2024 18:56:41 -0700 Subject: [PATCH 136/334] Nexus/Reconfigurator: Automatically adopt disks, deploy them to sleds via blueprints (#5506) Automatically adopt disks and deploy them to sleds using blueprints - Queries for physical disk info during reconfigurator planning phase - Adds "physical disks" to blueprint, in-memory as well as the database schema - Blueprint planning now ensures that in-service physical disks appear in the blueprint - Blueprint execution sends a request to sled agents via `omicron-physical-disks PUT` - A background task has been added to automatically adopt new physical disks as control plane objects, and to insert them into the database - "Physical disk upsert" has largely been changed to "Physical disk insert", to avoid potential overwriting issues. "Zpool upsert" has also been updated to "Zpool insert". - The physical disk "vendor/serial/model" uniqueness constraint has been removed for decommissioned disks. This will provide a pathway to eventually re-provisioning deleted disks, if an operator asks for it. Fixes https://github.com/oxidecomputer/omicron/issues/5503 , https://github.com/oxidecomputer/omicron/issues/5502 --- Cargo.lock | 1 + clients/sled-agent-client/src/lib.rs | 2 + dev-tools/omdb/tests/env.out | 12 + dev-tools/omdb/tests/successes.out | 11 + dev-tools/reconfigurator-cli/src/main.rs | 5 +- .../tests/output/cmd-stdout | 30 +- nexus-config/src/nexus_config.rs | 22 + nexus/db-model/src/deployment.rs | 80 ++- nexus/db-model/src/schema.rs | 25 + nexus/db-model/src/schema_versions.rs | 3 +- nexus/db-queries/src/db/datastore/dataset.rs | 2 +- .../db-queries/src/db/datastore/deployment.rs | 232 +++++++- nexus/db-queries/src/db/datastore/mod.rs | 4 +- .../src/db/datastore/physical_disk.rs | 549 +++++++++++++----- nexus/db-queries/src/db/datastore/rack.rs | 22 +- nexus/db-queries/src/db/datastore/sled.rs | 4 +- nexus/db-queries/src/db/datastore/vpc.rs | 5 + nexus/db-queries/src/db/datastore/zpool.rs | 16 +- nexus/examples/config.toml | 1 + .../reconfigurator/execution/src/datasets.rs | 4 +- nexus/reconfigurator/execution/src/dns.rs | 27 +- nexus/reconfigurator/execution/src/lib.rs | 9 + .../execution/src/omicron_physical_disks.rs | 349 +++++++++++ .../execution/src/omicron_zones.rs | 1 + nexus/reconfigurator/planning/Cargo.toml | 1 + .../planning/src/blueprint_builder.rs | 268 ++++++++- nexus/reconfigurator/planning/src/example.rs | 4 +- nexus/reconfigurator/planning/src/planner.rs | 26 +- nexus/reconfigurator/planning/src/system.rs | 46 +- .../output/planner_nonprovisionable_bp2.txt | 10 +- nexus/reconfigurator/preparation/src/lib.rs | 43 +- .../src/app/background/blueprint_execution.rs | 18 +- nexus/src/app/background/blueprint_load.rs | 1 + nexus/src/app/background/init.rs | 26 +- .../app/background/inventory_collection.rs | 15 +- nexus/src/app/background/mod.rs | 1 + .../app/background/physical_disk_adoption.rs | 131 +++++ nexus/src/app/sled.rs | 4 +- nexus/test-utils/src/lib.rs | 5 + nexus/tests/config.test.toml | 3 + nexus/tests/integration_tests/sleds.rs | 8 +- nexus/types/src/deployment.rs | 14 + nexus/types/src/deployment/planning_input.rs | 61 +- openapi/nexus-internal.json | 77 +++ schema/crdb/blueprint-physical-disk/up1.sql | 10 + schema/crdb/blueprint-physical-disk/up2.sql | 20 + schema/crdb/blueprint-physical-disk/up3.sql | 1 + schema/crdb/blueprint-physical-disk/up4.sql | 3 + schema/crdb/blueprint-physical-disk/up5.sql | 1 + schema/crdb/blueprint-physical-disk/up6.sql | 4 + schema/crdb/dbinit.sql | 48 +- sled-agent/src/rack_setup/service.rs | 34 +- smf/nexus/multi-sled/config-partial.toml | 1 + smf/nexus/single-sled/config-partial.toml | 1 + uuid-kinds/src/lib.rs | 1 + 55 files changed, 2009 insertions(+), 293 deletions(-) create mode 100644 nexus/reconfigurator/execution/src/omicron_physical_disks.rs create mode 100644 nexus/src/app/background/physical_disk_adoption.rs create mode 100644 schema/crdb/blueprint-physical-disk/up1.sql create mode 100644 schema/crdb/blueprint-physical-disk/up2.sql create mode 100644 schema/crdb/blueprint-physical-disk/up3.sql create mode 100644 schema/crdb/blueprint-physical-disk/up4.sql create mode 100644 schema/crdb/blueprint-physical-disk/up5.sql create mode 100644 schema/crdb/blueprint-physical-disk/up6.sql diff --git a/Cargo.lock b/Cargo.lock index fcb2a29c81..40c3906898 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4782,6 +4782,7 @@ dependencies = [ "chrono", "expectorate", "gateway-client", + "illumos-utils", "indexmap 2.2.6", "internal-dns", "ipnet", diff --git a/clients/sled-agent-client/src/lib.rs b/clients/sled-agent-client/src/lib.rs index ccb669af4c..1986280125 100644 --- a/clients/sled-agent-client/src/lib.rs +++ b/clients/sled-agent-client/src/lib.rs @@ -54,6 +54,8 @@ progenitor::generate_api!( // We cannot easily configure progenitor to derive `Eq` on all the client- // generated types because some have floats and other types that can't impl // `Eq`. We impl it explicitly for a few types on which we need it. +impl Eq for types::OmicronPhysicalDiskConfig {} +impl Eq for types::OmicronPhysicalDisksConfig {} impl Eq for types::OmicronZonesConfig {} impl Eq for types::OmicronZoneConfig {} impl Eq for types::OmicronZoneType {} diff --git a/dev-tools/omdb/tests/env.out b/dev-tools/omdb/tests/env.out index 0e0a198f34..66da2e053a 100644 --- a/dev-tools/omdb/tests/env.out +++ b/dev-tools/omdb/tests/env.out @@ -88,6 +88,10 @@ task: "phantom_disks" detects and un-deletes phantom disks +task: "physical_disk_adoption" + ensure new physical disks are automatically marked in-service + + task: "region_replacement" detects if a region requires replacing and begins the process @@ -186,6 +190,10 @@ task: "phantom_disks" detects and un-deletes phantom disks +task: "physical_disk_adoption" + ensure new physical disks are automatically marked in-service + + task: "region_replacement" detects if a region requires replacing and begins the process @@ -271,6 +279,10 @@ task: "phantom_disks" detects and un-deletes phantom disks +task: "physical_disk_adoption" + ensure new physical disks are automatically marked in-service + + task: "region_replacement" detects if a region requires replacing and begins the process diff --git a/dev-tools/omdb/tests/successes.out b/dev-tools/omdb/tests/successes.out index ff19bbb9a7..481df1c6e6 100644 --- a/dev-tools/omdb/tests/successes.out +++ b/dev-tools/omdb/tests/successes.out @@ -293,6 +293,10 @@ task: "phantom_disks" detects and un-deletes phantom disks +task: "physical_disk_adoption" + ensure new physical disks are automatically marked in-service + + task: "region_replacement" detects if a region requires replacing and begins the process @@ -438,6 +442,13 @@ task: "phantom_disks" number of phantom disks deleted: 0 number of phantom disk delete errors: 0 +task: "physical_disk_adoption" + configured period: every 30s + currently executing: no + last completed activation: iter 3, triggered by a dependent task completing + started at (s ago) and ran for ms + last completion reported error: task disabled + task: "region_replacement" configured period: every 30s currently executing: no diff --git a/dev-tools/reconfigurator-cli/src/main.rs b/dev-tools/reconfigurator-cli/src/main.rs index 0bd2d21b74..28e757af93 100644 --- a/dev-tools/reconfigurator-cli/src/main.rs +++ b/dev-tools/reconfigurator-cli/src/main.rs @@ -631,8 +631,9 @@ fn cmd_sled_show( swriteln!(s, "sled {}", sled_id); swriteln!(s, "subnet {}", sled_resources.subnet.net()); swriteln!(s, "zpools ({}):", sled_resources.zpools.len()); - for z in &sled_resources.zpools { - swriteln!(s, " {:?}", z); + for (zpool, disk) in &sled_resources.zpools { + swriteln!(s, " {:?}", zpool); + swriteln!(s, " ↳ {:?}", disk); } Ok(Some(s)) } diff --git a/dev-tools/reconfigurator-cli/tests/output/cmd-stdout b/dev-tools/reconfigurator-cli/tests/output/cmd-stdout index 7bedb54bf9..273e847a86 100644 --- a/dev-tools/reconfigurator-cli/tests/output/cmd-stdout +++ b/dev-tools/reconfigurator-cli/tests/output/cmd-stdout @@ -23,16 +23,26 @@ ID NZPOOLS SUBNET sled ..................... subnet fd00:1122:3344:101::/64 zpools (10): - ZpoolName("oxp_.....................") - ZpoolName("oxp_.....................") - ZpoolName("oxp_.....................") - ZpoolName("oxp_.....................") - ZpoolName("oxp_.....................") - ZpoolName("oxp_.....................") - ZpoolName("oxp_.....................") - ZpoolName("oxp_.....................") - ZpoolName("oxp_.....................") - ZpoolName("oxp_.....................") + ..................... (zpool) + ↳ SledDisk { disk_identity: DiskIdentity { vendor: "fake-vendor", serial: "serial-.....................", model: "fake-model" }, disk_id: ..................... (physical_disk), policy: InService, state: Active } + ..................... (zpool) + ↳ SledDisk { disk_identity: DiskIdentity { vendor: "fake-vendor", serial: "serial-.....................", model: "fake-model" }, disk_id: ..................... (physical_disk), policy: InService, state: Active } + ..................... (zpool) + ↳ SledDisk { disk_identity: DiskIdentity { vendor: "fake-vendor", serial: "serial-.....................", model: "fake-model" }, disk_id: ..................... (physical_disk), policy: InService, state: Active } + ..................... (zpool) + ↳ SledDisk { disk_identity: DiskIdentity { vendor: "fake-vendor", serial: "serial-.....................", model: "fake-model" }, disk_id: ..................... (physical_disk), policy: InService, state: Active } + ..................... (zpool) + ↳ SledDisk { disk_identity: DiskIdentity { vendor: "fake-vendor", serial: "serial-.....................", model: "fake-model" }, disk_id: ..................... (physical_disk), policy: InService, state: Active } + ..................... (zpool) + ↳ SledDisk { disk_identity: DiskIdentity { vendor: "fake-vendor", serial: "serial-.....................", model: "fake-model" }, disk_id: ..................... (physical_disk), policy: InService, state: Active } + ..................... (zpool) + ↳ SledDisk { disk_identity: DiskIdentity { vendor: "fake-vendor", serial: "serial-.....................", model: "fake-model" }, disk_id: ..................... (physical_disk), policy: InService, state: Active } + ..................... (zpool) + ↳ SledDisk { disk_identity: DiskIdentity { vendor: "fake-vendor", serial: "serial-.....................", model: "fake-model" }, disk_id: ..................... (physical_disk), policy: InService, state: Active } + ..................... (zpool) + ↳ SledDisk { disk_identity: DiskIdentity { vendor: "fake-vendor", serial: "serial-.....................", model: "fake-model" }, disk_id: ..................... (physical_disk), policy: InService, state: Active } + ..................... (zpool) + ↳ SledDisk { disk_identity: DiskIdentity { vendor: "fake-vendor", serial: "serial-.....................", model: "fake-model" }, disk_id: ..................... (physical_disk), policy: InService, state: Active } > sled-add ..................... diff --git a/nexus-config/src/nexus_config.rs b/nexus-config/src/nexus_config.rs index 93f7bcccdb..540a347150 100644 --- a/nexus-config/src/nexus_config.rs +++ b/nexus-config/src/nexus_config.rs @@ -361,6 +361,8 @@ pub struct BackgroundTaskConfig { pub nat_cleanup: NatCleanupConfig, /// configuration for inventory tasks pub inventory: InventoryConfig, + /// configuration for physical disk adoption tasks + pub physical_disk_adoption: PhysicalDiskAdoptionConfig, /// configuration for phantom disks task pub phantom_disks: PhantomDiskConfig, /// configuration for blueprint related tasks @@ -416,6 +418,20 @@ pub struct ExternalEndpointsConfig { // allow/disallow wildcard certs, don't serve expired certs, etc.) } +#[serde_as] +#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)] +pub struct PhysicalDiskAdoptionConfig { + /// period (in seconds) for periodic activations of this background task + #[serde_as(as = "DurationSeconds")] + pub period_secs: Duration, + + /// A toggle to disable automated disk adoption. + /// + /// Default: Off + #[serde(default)] + pub disable: bool, +} + #[serde_as] #[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)] pub struct NatCleanupConfig { @@ -732,6 +748,7 @@ mod test { inventory.period_secs = 10 inventory.nkeep = 11 inventory.disable = false + physical_disk_adoption.period_secs = 30 phantom_disks.period_secs = 30 blueprints.period_secs_load = 10 blueprints.period_secs_execute = 60 @@ -845,6 +862,10 @@ mod test { nkeep: 11, disable: false, }, + physical_disk_adoption: PhysicalDiskAdoptionConfig { + period_secs: Duration::from_secs(30), + disable: false, + }, phantom_disks: PhantomDiskConfig { period_secs: Duration::from_secs(30), }, @@ -921,6 +942,7 @@ mod test { inventory.period_secs = 10 inventory.nkeep = 3 inventory.disable = false + physical_disk_adoption.period_secs = 30 phantom_disks.period_secs = 30 blueprints.period_secs_load = 10 blueprints.period_secs_execute = 60 diff --git a/nexus/db-model/src/deployment.rs b/nexus/db-model/src/deployment.rs index 90ab517244..1046da18f6 100644 --- a/nexus/db-model/src/deployment.rs +++ b/nexus/db-model/src/deployment.rs @@ -8,8 +8,8 @@ use crate::inventory::ZoneType; use crate::omicron_zone_config::{OmicronZone, OmicronZoneNic}; use crate::schema::{ - blueprint, bp_omicron_zone, bp_omicron_zone_nic, bp_sled_omicron_zones, - bp_target, + blueprint, bp_omicron_physical_disk, bp_omicron_zone, bp_omicron_zone_nic, + bp_sled_omicron_physical_disks, bp_sled_omicron_zones, bp_target, }; use crate::typed_uuid::DbTypedUuid; use crate::{ @@ -17,13 +17,18 @@ use crate::{ }; use chrono::{DateTime, Utc}; use ipnetwork::IpNetwork; +use nexus_types::deployment::BlueprintPhysicalDiskConfig; +use nexus_types::deployment::BlueprintPhysicalDisksConfig; use nexus_types::deployment::BlueprintTarget; use nexus_types::deployment::BlueprintZoneConfig; use nexus_types::deployment::BlueprintZoneDisposition; use nexus_types::deployment::BlueprintZonesConfig; use omicron_common::api::internal::shared::NetworkInterface; +use omicron_common::disk::DiskIdentity; +use omicron_uuid_kinds::GenericUuid; use omicron_uuid_kinds::SledKind; use omicron_uuid_kinds::SledUuid; +use omicron_uuid_kinds::ZpoolUuid; use uuid::Uuid; /// See [`nexus_types::deployment::Blueprint`]. @@ -98,6 +103,76 @@ impl From for nexus_types::deployment::BlueprintTarget { } } +/// See [`nexus_types::deployment::BlueprintPhysicalDisksConfig`]. +#[derive(Queryable, Clone, Debug, Selectable, Insertable)] +#[diesel(table_name = bp_sled_omicron_physical_disks)] +pub struct BpSledOmicronPhysicalDisks { + pub blueprint_id: Uuid, + pub sled_id: Uuid, + pub generation: Generation, +} + +impl BpSledOmicronPhysicalDisks { + pub fn new( + blueprint_id: Uuid, + sled_id: Uuid, + disks_config: &BlueprintPhysicalDisksConfig, + ) -> Self { + Self { + blueprint_id, + sled_id, + generation: Generation(disks_config.generation), + } + } +} + +/// See [`nexus_types::deployment::BlueprintPhysicalDiskConfig`]. +#[derive(Queryable, Clone, Debug, Selectable, Insertable)] +#[diesel(table_name = bp_omicron_physical_disk)] +pub struct BpOmicronPhysicalDisk { + pub blueprint_id: Uuid, + pub sled_id: Uuid, + + pub vendor: String, + pub serial: String, + pub model: String, + + pub id: Uuid, + pub pool_id: Uuid, +} + +impl BpOmicronPhysicalDisk { + pub fn new( + blueprint_id: Uuid, + sled_id: Uuid, + disk_config: &BlueprintPhysicalDiskConfig, + ) -> Self { + Self { + blueprint_id, + sled_id, + vendor: disk_config.identity.vendor.clone(), + serial: disk_config.identity.serial.clone(), + model: disk_config.identity.model.clone(), + id: disk_config.id, + pool_id: disk_config.pool_id.into_untyped_uuid(), + } + } +} + +impl From for BlueprintPhysicalDiskConfig { + fn from(disk: BpOmicronPhysicalDisk) -> Self { + Self { + identity: DiskIdentity { + vendor: disk.vendor, + serial: disk.serial, + model: disk.model, + }, + id: disk.id, + pool_id: ZpoolUuid::from_untyped_uuid(disk.pool_id), + } + } +} + /// See [`nexus_types::deployment::OmicronZonesConfig`]. #[derive(Queryable, Clone, Debug, Selectable, Insertable)] #[diesel(table_name = bp_sled_omicron_zones)] @@ -120,7 +195,6 @@ impl BpSledOmicronZones { } } } - /// See [`nexus_types::deployment::OmicronZoneConfig`]. #[derive(Queryable, Clone, Debug, Selectable, Insertable)] #[diesel(table_name = bp_omicron_zone)] diff --git a/nexus/db-model/src/schema.rs b/nexus/db-model/src/schema.rs index 64ddca2c34..b02a8677d4 100644 --- a/nexus/db-model/src/schema.rs +++ b/nexus/db-model/src/schema.rs @@ -1473,6 +1473,29 @@ table! { } } +table! { + bp_sled_omicron_physical_disks (blueprint_id, sled_id) { + blueprint_id -> Uuid, + sled_id -> Uuid, + + generation -> Int8, + } +} + +table! { + bp_omicron_physical_disk (blueprint_id, id) { + blueprint_id -> Uuid, + sled_id -> Uuid, + + vendor -> Text, + serial -> Text, + model -> Text, + + id -> Uuid, + pool_id -> Uuid, + } +} + table! { bp_sled_omicron_zones (blueprint_id, sled_id) { blueprint_id -> Uuid, @@ -1655,8 +1678,10 @@ allow_tables_to_appear_in_same_query!( metric_producer, network_interface, instance_network_interface, + inv_physical_disk, service_network_interface, oximeter, + physical_disk, project, rack, region, diff --git a/nexus/db-model/src/schema_versions.rs b/nexus/db-model/src/schema_versions.rs index 362333c442..ad43cf77c5 100644 --- a/nexus/db-model/src/schema_versions.rs +++ b/nexus/db-model/src/schema_versions.rs @@ -17,7 +17,7 @@ use std::collections::BTreeMap; /// /// This must be updated when you change the database schema. Refer to /// schema/crdb/README.adoc in the root of this repository for details. -pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(51, 0, 0); +pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(52, 0, 0); /// List of all past database schema versions, in *reverse* order /// @@ -29,6 +29,7 @@ static KNOWN_VERSIONS: Lazy> = Lazy::new(|| { // | leaving the first copy as an example for the next person. // v // KnownVersion::new(next_int, "unique-dirname-with-the-sql-files"), + KnownVersion::new(52, "blueprint-physical-disk"), KnownVersion::new(51, "blueprint-disposition-column"), KnownVersion::new(50, "add-lookup-disk-by-volume-id-index"), KnownVersion::new(49, "physical-disk-state-and-policy"), diff --git a/nexus/db-queries/src/db/datastore/dataset.rs b/nexus/db-queries/src/db/datastore/dataset.rs index bfc4d61926..3617f6d7fc 100644 --- a/nexus/db-queries/src/db/datastore/dataset.rs +++ b/nexus/db-queries/src/db/datastore/dataset.rs @@ -231,7 +231,7 @@ mod test { let zpool_id = Uuid::new_v4(); let zpool = Zpool::new(zpool_id, sled_id, Uuid::new_v4()); datastore - .zpool_upsert(opctx, zpool) + .zpool_insert(opctx, zpool) .await .expect("failed to upsert zpool"); diff --git a/nexus/db-queries/src/db/datastore/deployment.rs b/nexus/db-queries/src/db/datastore/deployment.rs index ed1131550f..fa6673842a 100644 --- a/nexus/db-queries/src/db/datastore/deployment.rs +++ b/nexus/db-queries/src/db/datastore/deployment.rs @@ -35,12 +35,15 @@ use diesel::OptionalExtension; use diesel::QueryDsl; use diesel::RunQueryDsl; use nexus_db_model::Blueprint as DbBlueprint; +use nexus_db_model::BpOmicronPhysicalDisk; use nexus_db_model::BpOmicronZone; use nexus_db_model::BpOmicronZoneNic; +use nexus_db_model::BpSledOmicronPhysicalDisks; use nexus_db_model::BpSledOmicronZones; use nexus_db_model::BpTarget; use nexus_types::deployment::Blueprint; use nexus_types::deployment::BlueprintMetadata; +use nexus_types::deployment::BlueprintPhysicalDisksConfig; use nexus_types::deployment::BlueprintTarget; use nexus_types::deployment::BlueprintZonesConfig; use omicron_common::api::external::DataPageParams; @@ -106,6 +109,30 @@ impl DataStore { let row_blueprint = DbBlueprint::from(blueprint); let blueprint_id = row_blueprint.id; + let sled_omicron_physical_disks = blueprint + .blueprint_disks + .iter() + .map(|(sled_id, disks_config)| { + BpSledOmicronPhysicalDisks::new( + blueprint_id, + sled_id.into_untyped_uuid(), + disks_config, + ) + }) + .collect::>(); + let omicron_physical_disks = blueprint + .blueprint_disks + .iter() + .flat_map(|(sled_id, disks_config)| { + disks_config.disks.iter().map(move |disk| { + BpOmicronPhysicalDisk::new( + blueprint_id, + sled_id.into_untyped_uuid(), + disk, + ) + }) + }) + .collect::>(); let sled_omicron_zones = blueprint .blueprint_zones .iter() @@ -168,6 +195,24 @@ impl DataStore { .await?; } + // Insert all physical disks for this blueprint. + + { + use db::schema::bp_sled_omicron_physical_disks::dsl as sled_disks; + let _ = diesel::insert_into(sled_disks::bp_sled_omicron_physical_disks) + .values(sled_omicron_physical_disks) + .execute_async(&conn) + .await?; + } + + { + use db::schema::bp_omicron_physical_disk::dsl as omicron_disk; + let _ = diesel::insert_into(omicron_disk::bp_omicron_physical_disk) + .values(omicron_physical_disks) + .execute_async(&conn) + .await?; + } + // Insert all the Omicron zones for this blueprint. { use db::schema::bp_sled_omicron_zones::dsl as sled_zones; @@ -297,6 +342,50 @@ impl DataStore { blueprint_zones }; + // Do the same thing we just did for zones, but for physical disks too. + let mut blueprint_disks: BTreeMap< + SledUuid, + BlueprintPhysicalDisksConfig, + > = { + use db::schema::bp_sled_omicron_physical_disks::dsl; + + let mut blueprint_physical_disks = BTreeMap::new(); + let mut paginator = Paginator::new(SQL_BATCH_SIZE); + while let Some(p) = paginator.next() { + let batch = paginated( + dsl::bp_sled_omicron_physical_disks, + dsl::sled_id, + &p.current_pagparams(), + ) + .filter(dsl::blueprint_id.eq(blueprint_id)) + .select(BpSledOmicronPhysicalDisks::as_select()) + .load_async(&*conn) + .await + .map_err(|e| { + public_error_from_diesel(e, ErrorHandler::Server) + })?; + + paginator = p.found_batch(&batch, &|s| s.sled_id); + + for s in batch { + let old = blueprint_physical_disks.insert( + SledUuid::from_untyped_uuid(s.sled_id), + BlueprintPhysicalDisksConfig { + generation: *s.generation, + disks: Vec::new(), + }, + ); + bail_unless!( + old.is_none(), + "found duplicate sled ID in bp_sled_omicron_physical_disks: {}", + s.sled_id + ); + } + } + + blueprint_physical_disks + }; + // Assemble a mutable map of all the NICs found, by NIC id. As we // match these up with the corresponding zone below, we'll remove items // from this set. That way we can tell if the same NIC was used twice @@ -387,7 +476,7 @@ impl DataStore { // impossible and reflects either a bug or database // corruption. Error::internal_error(&format!( - "zone {:?}: unknown sled: {:?}", + "zone {}: unknown sled: {}", z.id, z.sled_id )) })?; @@ -419,9 +508,57 @@ impl DataStore { omicron_zone_nics.keys() ); + // Load all the physical disks for each sled. + { + use db::schema::bp_omicron_physical_disk::dsl; + + let mut paginator = Paginator::new(SQL_BATCH_SIZE); + while let Some(p) = paginator.next() { + // `paginated` implicitly orders by our `id`, which is also + // handy for testing: the physical disks are always consistently ordered + let batch = paginated( + dsl::bp_omicron_physical_disk, + dsl::id, + &p.current_pagparams(), + ) + .filter(dsl::blueprint_id.eq(blueprint_id)) + .select(BpOmicronPhysicalDisk::as_select()) + .load_async(&*conn) + .await + .map_err(|e| { + public_error_from_diesel(e, ErrorHandler::Server) + })?; + + paginator = p.found_batch(&batch, &|d| d.id); + + for d in batch { + let sled_disks = blueprint_disks + .get_mut(&SledUuid::from_untyped_uuid(d.sled_id)) + .ok_or_else(|| { + // This error means that we found a row in + // bp_omicron_physical_disk with no associated record in + // bp_sled_omicron_physical_disks. This should be + // impossible and reflects either a bug or database + // corruption. + Error::internal_error(&format!( + "disk {}: unknown sled: {}", + d.id, d.sled_id + )) + })?; + sled_disks.disks.push(d.into()); + } + } + } + + // Sort all disks to match what blueprint builders do. + for (_, disks_config) in blueprint_disks.iter_mut() { + disks_config.disks.sort_unstable_by_key(|d| d.id); + } + Ok(Blueprint { id: blueprint_id, blueprint_zones, + blueprint_disks, parent_blueprint_id, internal_dns_version, external_dns_version, @@ -448,7 +585,14 @@ impl DataStore { // collection if we crash while deleting it. let conn = self.pool_connection_authorized(opctx).await?; - let (nblueprints, nsled_agent_zones, nzones, nnics) = conn + let ( + nblueprints, + nsled_physical_disks, + nphysical_disks, + nsled_agent_zones, + nzones, + nnics, + ) = conn .transaction_async(|conn| async move { // Ensure that blueprint we're about to delete is not the // current target. @@ -484,6 +628,26 @@ impl DataStore { )); } + // Remove rows associated with Omicron physical disks + let nsled_physical_disks = { + use db::schema::bp_sled_omicron_physical_disks::dsl; + diesel::delete( + dsl::bp_sled_omicron_physical_disks + .filter(dsl::blueprint_id.eq(blueprint_id)), + ) + .execute_async(&conn) + .await? + }; + let nphysical_disks = { + use db::schema::bp_omicron_physical_disk::dsl; + diesel::delete( + dsl::bp_omicron_physical_disk + .filter(dsl::blueprint_id.eq(blueprint_id)), + ) + .execute_async(&conn) + .await? + }; + // Remove rows associated with Omicron zones let nsled_agent_zones = { use db::schema::bp_sled_omicron_zones::dsl; @@ -515,7 +679,14 @@ impl DataStore { .await? }; - Ok((nblueprints, nsled_agent_zones, nzones, nnics)) + Ok(( + nblueprints, + nsled_physical_disks, + nphysical_disks, + nsled_agent_zones, + nzones, + nnics, + )) }) .await .map_err(|error| match error { @@ -528,6 +699,8 @@ impl DataStore { info!(&opctx.log, "removed blueprint"; "blueprint_id" => blueprint_id.to_string(), "nblueprints" => nblueprints, + "nsled_physical_disks" => nsled_physical_disks, + "nphysical_disks" => nphysical_disks, "nsled_agent_zones" => nsled_agent_zones, "nzones" => nzones, "nnics" => nnics, @@ -1093,14 +1266,19 @@ mod tests { use nexus_types::deployment::PlanningInputBuilder; use nexus_types::deployment::Policy; use nexus_types::deployment::SledDetails; + use nexus_types::deployment::SledDisk; use nexus_types::deployment::SledFilter; use nexus_types::deployment::SledResources; + use nexus_types::external_api::views::PhysicalDiskPolicy; + use nexus_types::external_api::views::PhysicalDiskState; use nexus_types::external_api::views::SledPolicy; use nexus_types::external_api::views::SledState; use nexus_types::inventory::Collection; use omicron_common::address::Ipv6Subnet; use omicron_common::api::external::Generation; + use omicron_common::disk::DiskIdentity; use omicron_test_utils::dev; + use omicron_uuid_kinds::PhysicalDiskUuid; use omicron_uuid_kinds::SledUuid; use omicron_uuid_kinds::ZpoolUuid; use pretty_assertions::assert_eq; @@ -1151,12 +1329,21 @@ mod tests { // Create a fake set of `SledDetails`, either with a subnet matching // `ip` or with an arbitrary one. fn fake_sled_details(ip: Option) -> SledDetails { - use illumos_utils::zpool::ZpoolName; let zpools = (0..4) - .map(|_| { - let name = - ZpoolName::new_external(ZpoolUuid::new_v4()).to_string(); - name.parse().unwrap() + .map(|i| { + ( + ZpoolUuid::new_v4(), + SledDisk { + disk_identity: DiskIdentity { + vendor: String::from("v"), + serial: format!("s-{i}"), + model: String::from("m"), + }, + disk_id: PhysicalDiskUuid::new_v4(), + policy: PhysicalDiskPolicy::InService, + state: PhysicalDiskState::Active, + }, + ) }) .collect(); let ip = ip.unwrap_or_else(|| thread_rng().gen::().into()); @@ -1404,25 +1591,46 @@ mod tests { ) .expect("failed to create builder"); + // Ensure disks on our sled + assert_eq!( + builder + .sled_ensure_disks( + new_sled_id, + &planning_input + .sled_resources(&new_sled_id) + .unwrap() + .clone(), + ) + .unwrap(), + Ensure::Added + ); + // Add zones to our new sled. assert_eq!( builder.sled_ensure_zone_ntp(new_sled_id).unwrap(), Ensure::Added ); - for zpool_name in new_sled_zpools { + for zpool_id in new_sled_zpools.keys() { assert_eq!( builder - .sled_ensure_zone_crucible(new_sled_id, zpool_name.clone()) + .sled_ensure_zone_crucible(new_sled_id, *zpool_id) .unwrap(), Ensure::Added ); } - let num_new_sled_zones = 1 + new_sled_zpools.len(); + + let num_new_ntp_zones = 1; + let num_new_crucible_zones = new_sled_zpools.len(); + let num_new_sled_zones = num_new_ntp_zones + num_new_crucible_zones; let blueprint2 = builder.build(); let authz_blueprint2 = authz_blueprint_from_id(blueprint2.id); - // Check that we added the new sled and its zones. + // Check that we added the new sled, as well as its disks and zones. + assert_eq!( + blueprint1.blueprint_disks.len() + new_sled_zpools.len(), + blueprint2.blueprint_disks.len(), + ); assert_eq!( blueprint1.blueprint_zones.len() + 1, blueprint2.blueprint_zones.len() diff --git a/nexus/db-queries/src/db/datastore/mod.rs b/nexus/db-queries/src/db/datastore/mod.rs index a8512d2362..6e8eecb8ed 100644 --- a/nexus/db-queries/src/db/datastore/mod.rs +++ b/nexus/db-queries/src/db/datastore/mod.rs @@ -656,7 +656,7 @@ mod test { sled_id.into_untyped_uuid(), ); datastore - .physical_disk_upsert(opctx, physical_disk.clone()) + .physical_disk_insert(opctx, physical_disk.clone()) .await .expect("Failed to upsert physical disk"); physical_disk.id() @@ -694,7 +694,7 @@ mod test { let zpool_id = Uuid::new_v4(); let zpool = Zpool::new(zpool_id, sled_id.into_untyped_uuid(), physical_disk_id); - datastore.zpool_upsert(opctx, zpool).await.unwrap(); + datastore.zpool_insert(opctx, zpool).await.unwrap(); zpool_id } diff --git a/nexus/db-queries/src/db/datastore/physical_disk.rs b/nexus/db-queries/src/db/datastore/physical_disk.rs index b97853dd06..f26ac782b3 100644 --- a/nexus/db-queries/src/db/datastore/physical_disk.rs +++ b/nexus/db-queries/src/db/datastore/physical_disk.rs @@ -12,15 +12,20 @@ use crate::db::collection_insert::AsyncInsertError; use crate::db::collection_insert::DatastoreCollection; use crate::db::error::public_error_from_diesel; use crate::db::error::ErrorHandler; +use crate::db::model::ApplySledFilterExt; +use crate::db::model::InvPhysicalDisk; use crate::db::model::PhysicalDisk; +use crate::db::model::PhysicalDiskKind; use crate::db::model::PhysicalDiskPolicy; use crate::db::model::PhysicalDiskState; use crate::db::model::Sled; +use crate::db::model::Zpool; use crate::db::pagination::paginated; +use crate::transaction_retry::OptionalError; use async_bb8_diesel::AsyncRunQueryDsl; -use chrono::{DateTime, Utc}; +use chrono::Utc; use diesel::prelude::*; -use diesel::upsert::{excluded, on_constraint}; +use nexus_types::deployment::SledFilter; use omicron_common::api::external::CreateResult; use omicron_common::api::external::DataPageParams; use omicron_common::api::external::DeleteResult; @@ -28,28 +33,67 @@ use omicron_common::api::external::Error; use omicron_common::api::external::ListResultVec; use omicron_common::api::external::LookupType; use omicron_common::api::external::ResourceType; +use omicron_uuid_kinds::CollectionUuid; +use omicron_uuid_kinds::GenericUuid; use uuid::Uuid; impl DataStore { + /// Inserts a physical disk and zpool together in a transaction + pub async fn physical_disk_and_zpool_insert( + &self, + opctx: &OpContext, + disk: PhysicalDisk, + zpool: Zpool, + ) -> Result<(), Error> { + let conn = &*self.pool_connection_authorized(&opctx).await?; + let err = OptionalError::new(); + + self.transaction_retry_wrapper("physical_disk_adoption") + .transaction(&conn, |conn| { + let err = err.clone(); + let disk = disk.clone(); + let zpool = zpool.clone(); + async move { + // TODO: These functions need to retry diesel errors + // in order to actually propagate retry errors + + Self::physical_disk_insert_on_connection( + &conn, opctx, disk, + ) + .await + .map_err(|e| err.bail(e))?; + Self::zpool_insert_on_connection(&conn, opctx, zpool) + .await + .map_err(|e| err.bail(e))?; + Ok(()) + } + }) + .await + .map_err(|e| { + if let Some(err) = err.take() { + return err; + } + public_error_from_diesel(e, ErrorHandler::Server) + })?; + Ok(()) + } + /// Stores a new physical disk in the database. /// /// - If the Vendor, Serial, and Model fields are the same as an existing - /// row in the table, the following fields may be updated: - /// - Sled ID - /// - Time Deleted - /// - Time Modified + /// row in the table, an error is thrown. /// - If the primary key (ID) is the same as an existing row in the table, /// an error is thrown. - pub async fn physical_disk_upsert( + pub async fn physical_disk_insert( &self, opctx: &OpContext, disk: PhysicalDisk, ) -> CreateResult { let conn = &*self.pool_connection_authorized(&opctx).await?; - Self::physical_disk_upsert_on_connection(&conn, opctx, disk).await + Self::physical_disk_insert_on_connection(&conn, opctx, disk).await } - pub async fn physical_disk_upsert_on_connection( + pub async fn physical_disk_insert_on_connection( conn: &async_bb8_diesel::Connection, opctx: &OpContext, disk: PhysicalDisk, @@ -57,19 +101,10 @@ impl DataStore { opctx.authorize(authz::Action::Read, &authz::FLEET).await?; use db::schema::physical_disk::dsl; - let now = Utc::now(); let sled_id = disk.sled_id; let disk_in_db = Sled::insert_resource( sled_id, - diesel::insert_into(dsl::physical_disk) - .values(disk.clone()) - .on_conflict(on_constraint("vendor_serial_model_unique")) - .do_update() - .set(( - dsl::sled_id.eq(excluded(dsl::sled_id)), - dsl::time_deleted.eq(Option::>::None), - dsl::time_modified.eq(now), - )), + diesel::insert_into(dsl::physical_disk).values(disk.clone()), ) .insert_and_get_result_async(conn) .await @@ -78,9 +113,13 @@ impl DataStore { type_name: ResourceType::Sled, lookup_type: LookupType::ById(sled_id), }, - AsyncInsertError::DatabaseError(e) => { - public_error_from_diesel(e, ErrorHandler::Server) - } + AsyncInsertError::DatabaseError(e) => public_error_from_diesel( + e, + ErrorHandler::Conflict( + ResourceType::PhysicalDisk, + &disk.id().to_string(), + ), + ), })?; Ok(disk_in_db) @@ -126,6 +165,67 @@ impl DataStore { Ok(()) } + /// Returns all physical disks which: + /// + /// - Appear on in-service sleds + /// - Appear in inventory + /// - Do not have any records of expungement + /// + /// If "inventory_collection_id" is not associated with a collection, this + /// function returns an empty list, rather than failing. + pub async fn physical_disk_uninitialized_list( + &self, + opctx: &OpContext, + inventory_collection_id: CollectionUuid, + ) -> ListResultVec { + opctx.authorize(authz::Action::Read, &authz::FLEET).await?; + + use db::schema::inv_physical_disk::dsl as inv_physical_disk_dsl; + use db::schema::physical_disk::dsl as physical_disk_dsl; + use db::schema::sled::dsl as sled_dsl; + + sled_dsl::sled + // If the sled is not in-service, drop the list immediately. + .filter(sled_dsl::time_deleted.is_null()) + .sled_filter(SledFilter::InService) + // Look up all inventory physical disks that could match this sled + .inner_join( + inv_physical_disk_dsl::inv_physical_disk.on( + inv_physical_disk_dsl::inv_collection_id + .eq(inventory_collection_id.into_untyped_uuid()) + .and(inv_physical_disk_dsl::sled_id.eq(sled_dsl::id)) + .and( + inv_physical_disk_dsl::variant + .eq(PhysicalDiskKind::U2), + ), + ), + ) + // Filter out any disks in the inventory for which we have ever had + // a control plane disk. + .filter(diesel::dsl::not(diesel::dsl::exists( + physical_disk_dsl::physical_disk + .select(0.into_sql::()) + .filter(physical_disk_dsl::sled_id.eq(sled_dsl::id)) + .filter(physical_disk_dsl::variant.eq(PhysicalDiskKind::U2)) + .filter( + physical_disk_dsl::vendor + .eq(inv_physical_disk_dsl::vendor), + ) + .filter( + physical_disk_dsl::model + .eq(inv_physical_disk_dsl::model), + ) + .filter( + physical_disk_dsl::serial + .eq(inv_physical_disk_dsl::serial), + ), + ))) + .select(InvPhysicalDisk::as_select()) + .get_results_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + } + pub async fn physical_disk_list( &self, opctx: &OpContext, @@ -175,6 +275,7 @@ impl DataStore { .filter(dsl::serial.eq(serial)) .filter(dsl::model.eq(model)) .filter(dsl::sled_id.eq(sled_id)) + .filter(dsl::time_deleted.is_null()) .set(dsl::time_deleted.eq(now)) .execute_async(&*self.pool_connection_authorized(opctx).await?) .await @@ -195,7 +296,11 @@ mod test { use nexus_db_model::Generation; use nexus_test_utils::db::test_setup_database; use nexus_types::identity::Asset; + use omicron_common::api::external::ByteCount; + use omicron_common::disk::DiskIdentity; use omicron_test_utils::dev; + use sled_agent_client::types::DiskVariant; + use sled_agent_client::types::InventoryDisk; use std::net::{Ipv6Addr, SocketAddrV6}; use std::num::NonZeroU32; @@ -224,101 +329,10 @@ mod test { } } - // Only checking some fields: - // - The UUID of the disk may actually not be the same as the upserted one; - // the "vendor/serial/model" value is the more critical unique identifier. - // NOTE: Could we derive a UUID from the VSM values? - // - The 'time' field precision can be modified slightly when inserted into - // the DB. - fn assert_disks_equal_ignore_uuid(lhs: &PhysicalDisk, rhs: &PhysicalDisk) { - assert_eq!(lhs.time_deleted().is_some(), rhs.time_deleted().is_some()); - assert_eq!(lhs.vendor, rhs.vendor); - assert_eq!(lhs.serial, rhs.serial); - assert_eq!(lhs.model, rhs.model); - assert_eq!(lhs.variant, rhs.variant); - assert_eq!(lhs.sled_id, rhs.sled_id); - } - - #[tokio::test] - async fn physical_disk_upsert_different_uuid_idempotent() { - let logctx = dev::test_setup_log( - "physical_disk_upsert_different_uuid_idempotent", - ); - let mut db = test_setup_database(&logctx.log).await; - let (opctx, datastore) = datastore_test(&logctx, &db).await; - - let sled = create_test_sled(&datastore).await; - let sled_id = sled.id(); - - // Insert a disk - let disk = PhysicalDisk::new( - Uuid::new_v4(), - String::from("Oxide"), - String::from("123"), - String::from("FakeDisk"), - PhysicalDiskKind::U2, - sled_id, - ); - let first_observed_disk = datastore - .physical_disk_upsert(&opctx, disk.clone()) - .await - .expect("Failed first attempt at upserting disk"); - assert_eq!(disk.id(), first_observed_disk.id()); - assert_disks_equal_ignore_uuid(&disk, &first_observed_disk); - - // Observe the inserted disk - let pagparams = list_disk_params(); - let disks = datastore - .sled_list_physical_disks(&opctx, sled_id, &pagparams) - .await - .expect("Failed to list physical disks"); - assert_eq!(disks.len(), 1); - assert_eq!(disk.id(), disks[0].id()); - assert_disks_equal_ignore_uuid(&disk, &disks[0]); - - // Insert the same disk, with a different UUID primary key - let disk_again = PhysicalDisk::new( - Uuid::new_v4(), - String::from("Oxide"), - String::from("123"), - String::from("FakeDisk"), - PhysicalDiskKind::U2, - sled_id, - ); - let second_observed_disk = datastore - .physical_disk_upsert(&opctx, disk_again.clone()) - .await - .expect("Failed second upsert of physical disk"); - // This check is pretty important - note that we return the original - // UUID, not the new one. - assert_eq!(disk.id(), second_observed_disk.id()); - assert_ne!(disk_again.id(), second_observed_disk.id()); - assert_disks_equal_ignore_uuid(&disk_again, &second_observed_disk); - assert!( - first_observed_disk.time_modified() - <= second_observed_disk.time_modified() - ); - - let disks = datastore - .sled_list_physical_disks(&opctx, sled_id, &pagparams) - .await - .expect("Failed to re-list physical disks"); - - // We'll use the old primary key - assert_eq!(disks.len(), 1); - assert_eq!(disk.id(), disks[0].id()); - assert_ne!(disk_again.id(), disks[0].id()); - assert_disks_equal_ignore_uuid(&disk, &disks[0]); - assert_disks_equal_ignore_uuid(&disk_again, &disks[0]); - - db.cleanup().await.unwrap(); - logctx.cleanup_successful(); - } - #[tokio::test] - async fn physical_disk_upsert_same_uuid_idempotent() { + async fn physical_disk_insert_same_uuid_collides() { let logctx = - dev::test_setup_log("physical_disk_upsert_same_uuid_idempotent"); + dev::test_setup_log("physical_disk_insert_same_uuid_collides"); let mut db = test_setup_database(&logctx.log).await; let (opctx, datastore) = datastore_test(&logctx, &db).await; @@ -335,41 +349,31 @@ mod test { sled_id, ); let first_observed_disk = datastore - .physical_disk_upsert(&opctx, disk.clone()) + .physical_disk_insert(&opctx, disk.clone()) .await .expect("Failed first attempt at upserting disk"); assert_eq!(disk.id(), first_observed_disk.id()); // Insert a disk with an identical UUID - let second_observed_disk = datastore - .physical_disk_upsert(&opctx, disk.clone()) + let err = datastore + .physical_disk_insert(&opctx, disk.clone()) .await - .expect("Should have succeeded upserting disk"); - assert_eq!(disk.id(), second_observed_disk.id()); + .expect_err("Should have failed upserting disk"); + assert!( - first_observed_disk.time_modified() - <= second_observed_disk.time_modified() - ); - assert_disks_equal_ignore_uuid( - &first_observed_disk, - &second_observed_disk, + err.to_string() + .contains("Object (of type PhysicalDisk) already exists"), + "{err}" ); - let pagparams = list_disk_params(); - let disks = datastore - .sled_list_physical_disks(&opctx, sled_id, &pagparams) - .await - .expect("Failed to list physical disks"); - assert_eq!(disks.len(), 1); - db.cleanup().await.unwrap(); logctx.cleanup_successful(); } #[tokio::test] - async fn physical_disk_upsert_different_disks() { + async fn physical_disk_insert_different_disks() { let logctx = - dev::test_setup_log("physical_disk_upsert_different_disks"); + dev::test_setup_log("physical_disk_insert_different_disks"); let mut db = test_setup_database(&logctx.log).await; let (opctx, datastore) = datastore_test(&logctx, &db).await; @@ -386,7 +390,7 @@ mod test { sled_id, ); datastore - .physical_disk_upsert(&opctx, disk.clone()) + .physical_disk_insert(&opctx, disk.clone()) .await .expect("Failed first attempt at upserting disk"); @@ -400,7 +404,7 @@ mod test { sled_id, ); datastore - .physical_disk_upsert(&opctx, disk.clone()) + .physical_disk_insert(&opctx, disk.clone()) .await .expect("Failed first attempt at upserting disk"); @@ -433,7 +437,7 @@ mod test { sled.id(), ); datastore - .physical_disk_upsert(&opctx, disk.clone()) + .physical_disk_insert(&opctx, disk.clone()) .await .expect("Failed first attempt at upserting disk"); let pagparams = list_disk_params(); @@ -482,9 +486,9 @@ mod test { // - Disk is detached from Sled A (and the detach is reported to Nexus) // - Disk is attached into Sled B #[tokio::test] - async fn physical_disk_upsert_delete_reupsert_new_sled() { + async fn physical_disk_insert_delete_reupsert_new_sled() { let logctx = dev::test_setup_log( - "physical_disk_upsert_delete_reupsert_new_sled", + "physical_disk_insert_delete_reupsert_new_sled", ); let mut db = test_setup_database(&logctx.log).await; let (opctx, datastore) = datastore_test(&logctx, &db).await; @@ -502,7 +506,7 @@ mod test { sled_a.id(), ); datastore - .physical_disk_upsert(&opctx, disk.clone()) + .physical_disk_insert(&opctx, disk.clone()) .await .expect("Failed first attempt at upserting disk"); let pagparams = list_disk_params(); @@ -539,7 +543,7 @@ mod test { .expect("Failed to list physical disks"); assert!(disks.is_empty()); - // "Report the disk" from the second sled + // Attach the disk to the second sled let disk = PhysicalDisk::new( Uuid::new_v4(), String::from("Oxide"), @@ -549,7 +553,7 @@ mod test { sled_b.id(), ); datastore - .physical_disk_upsert(&opctx, disk.clone()) + .physical_disk_insert(&opctx, disk.clone()) .await .expect("Failed second attempt at upserting disk"); @@ -576,9 +580,9 @@ mod test { // notification to Nexus). // - Disk is attached into Sled B #[tokio::test] - async fn physical_disk_upsert_reupsert_new_sled() { + async fn physical_disk_insert_reupsert_new_sled() { let logctx = - dev::test_setup_log("physical_disk_upsert_reupsert_new_sled"); + dev::test_setup_log("physical_disk_insert_reupsert_new_sled"); let mut db = test_setup_database(&logctx.log).await; let (opctx, datastore) = datastore_test(&logctx, &db).await; @@ -595,7 +599,7 @@ mod test { sled_a.id(), ); datastore - .physical_disk_upsert(&opctx, disk.clone()) + .physical_disk_insert(&opctx, disk.clone()) .await .expect("Failed first attempt at upserting disk"); let pagparams = list_disk_params(); @@ -610,6 +614,18 @@ mod test { .expect("Failed to list physical disks"); assert!(disks.is_empty()); + // Remove the disk from the first sled + datastore + .physical_disk_delete( + &opctx, + disk.vendor.clone(), + disk.serial.clone(), + disk.model.clone(), + disk.sled_id, + ) + .await + .expect("Failed to delete disk"); + // "Report the disk" from the second sled let disk = PhysicalDisk::new( Uuid::new_v4(), @@ -620,7 +636,7 @@ mod test { sled_b.id(), ); datastore - .physical_disk_upsert(&opctx, disk.clone()) + .physical_disk_insert(&opctx, disk.clone()) .await .expect("Failed second attempt at upserting disk"); @@ -638,4 +654,243 @@ mod test { db.cleanup().await.unwrap(); logctx.cleanup_successful(); } + + // Most of this data doesn't matter, but adds a sled + // to an inventory with a supplied set of disks. + fn add_sled_to_inventory( + builder: &mut nexus_inventory::CollectionBuilder, + sled: &Sled, + disks: Vec, + ) { + builder + .found_sled_inventory( + "fake sled agent", + sled_agent_client::types::Inventory { + baseboard: sled_agent_client::types::Baseboard::Gimlet { + identifier: sled.serial_number().to_string(), + model: sled.part_number().to_string(), + revision: 0, + }, + reservoir_size: ByteCount::from(1024), + sled_role: sled_agent_client::types::SledRole::Gimlet, + sled_agent_address: "[::1]:56792".parse().unwrap(), + sled_id: sled.id(), + usable_hardware_threads: 10, + usable_physical_ram: ByteCount::from(1024 * 1024), + disks, + zpools: vec![], + }, + ) + .unwrap(); + } + + fn create_inv_disk(serial: String, slot: i64) -> InventoryDisk { + InventoryDisk { + identity: DiskIdentity { + serial, + vendor: "vendor".to_string(), + model: "model".to_string(), + }, + variant: DiskVariant::U2, + slot, + } + } + + fn create_disk_zpool_combo( + sled_id: Uuid, + inv_disk: &InventoryDisk, + ) -> (PhysicalDisk, Zpool) { + let disk = PhysicalDisk::new( + Uuid::new_v4(), + inv_disk.identity.vendor.clone(), + inv_disk.identity.serial.clone(), + inv_disk.identity.model.clone(), + PhysicalDiskKind::U2, + sled_id, + ); + + let zpool = Zpool::new(Uuid::new_v4(), sled_id, disk.id()); + (disk, zpool) + } + + #[tokio::test] + async fn test_physical_disk_uninitialized_list() { + let logctx = + dev::test_setup_log("test_physical_disk_uninitialized_list"); + let mut db = test_setup_database(&logctx.log).await; + let (opctx, datastore) = datastore_test(&logctx, &db).await; + + let sled_a = create_test_sled(&datastore).await; + let sled_b = create_test_sled(&datastore).await; + + // No inventory -> No uninitialized disks + let uninitialized_disks = datastore + .physical_disk_uninitialized_list( + &opctx, + CollectionUuid::new_v4(), // Collection that does not exist + ) + .await + .expect("Failed to look up uninitialized disks"); + assert!(uninitialized_disks.is_empty()); + + // Create inventory disks for both sleds + let mut builder = nexus_inventory::CollectionBuilder::new("test"); + let disks_a = vec![ + create_inv_disk("serial-001".to_string(), 1), + create_inv_disk("serial-002".to_string(), 2), + create_inv_disk("serial-003".to_string(), 3), + ]; + let disks_b = vec![ + create_inv_disk("serial-101".to_string(), 1), + create_inv_disk("serial-102".to_string(), 2), + create_inv_disk("serial-103".to_string(), 3), + ]; + add_sled_to_inventory(&mut builder, &sled_a, disks_a.clone()); + add_sled_to_inventory(&mut builder, &sled_b, disks_b.clone()); + let collection = builder.build(); + let collection_id = collection.id; + datastore + .inventory_insert_collection(&opctx, &collection) + .await + .expect("failed to insert collection"); + + // Now when we list the uninitialized disks, we should see everything in + // the inventory. + let uninitialized_disks = datastore + .physical_disk_uninitialized_list(&opctx, collection_id) + .await + .expect("Failed to list uninitialized disks"); + assert_eq!(uninitialized_disks.len(), 6); + + // Normalize the data a bit -- convert to nexus types, and sort vecs for + // stability in the comparison. + let mut uninitialized_disks: Vec = + uninitialized_disks.into_iter().map(|d| d.into()).collect(); + uninitialized_disks + .sort_by(|a, b| a.identity.partial_cmp(&b.identity).unwrap()); + let mut expected_disks: Vec = + disks_a + .iter() + .map(|d| d.clone().into()) + .chain(disks_b.iter().map(|d| d.clone().into())) + .collect(); + expected_disks + .sort_by(|a, b| a.identity.partial_cmp(&b.identity).unwrap()); + assert_eq!(uninitialized_disks, expected_disks); + + // Let's create control plane objects for some of these disks. + // + // They should no longer show up when we list uninitialized devices. + // + // This creates disks for: 001, 002, and 101. + // It leaves the following uninitialized: 003, 102, 103 + let (disk_001, zpool) = + create_disk_zpool_combo(sled_a.id(), &disks_a[0]); + datastore + .physical_disk_and_zpool_insert(&opctx, disk_001, zpool) + .await + .unwrap(); + let (disk_002, zpool) = + create_disk_zpool_combo(sled_a.id(), &disks_a[1]); + datastore + .physical_disk_and_zpool_insert(&opctx, disk_002, zpool) + .await + .unwrap(); + let (disk_101, zpool) = + create_disk_zpool_combo(sled_b.id(), &disks_b[0]); + datastore + .physical_disk_and_zpool_insert(&opctx, disk_101, zpool) + .await + .unwrap(); + + let uninitialized_disks = datastore + .physical_disk_uninitialized_list(&opctx, collection_id) + .await + .expect("Failed to list uninitialized disks"); + assert_eq!(uninitialized_disks.len(), 3); + + // Pay careful attention to our indexing below. + // + // We're grabbing the last disk of "disks_a" (which still is + // uninitailized) and the last two disks of "disks_b" (of which both are + // still uninitialized). + let mut uninitialized_disks: Vec = + uninitialized_disks.into_iter().map(|d| d.into()).collect(); + uninitialized_disks + .sort_by(|a, b| a.identity.partial_cmp(&b.identity).unwrap()); + let mut expected_disks: Vec = + disks_a[2..3] + .iter() + .map(|d| d.clone().into()) + .chain(disks_b[1..3].iter().map(|d| d.clone().into())) + .collect(); + expected_disks + .sort_by(|a, b| a.identity.partial_cmp(&b.identity).unwrap()); + assert_eq!(uninitialized_disks, expected_disks); + + // Create physical disks for all remaining devices. + // + // Observe no remaining uninitialized disks. + let (disk_003, zpool) = + create_disk_zpool_combo(sled_a.id(), &disks_a[2]); + datastore + .physical_disk_and_zpool_insert(&opctx, disk_003.clone(), zpool) + .await + .unwrap(); + let (disk_102, zpool) = + create_disk_zpool_combo(sled_b.id(), &disks_b[1]); + datastore + .physical_disk_and_zpool_insert(&opctx, disk_102.clone(), zpool) + .await + .unwrap(); + let (disk_103, zpool) = + create_disk_zpool_combo(sled_b.id(), &disks_b[2]); + datastore + .physical_disk_and_zpool_insert(&opctx, disk_103.clone(), zpool) + .await + .unwrap(); + + let uninitialized_disks = datastore + .physical_disk_uninitialized_list(&opctx, collection_id) + .await + .expect("Failed to list uninitialized disks"); + assert_eq!(uninitialized_disks.len(), 0); + + // Expunge some disks, observe that they do not re-appear as + // initialized. + use db::schema::physical_disk::dsl; + + // Set a disk to "deleted". + let now = Utc::now(); + diesel::update(dsl::physical_disk) + .filter(dsl::id.eq(disk_003.id())) + .filter(dsl::time_deleted.is_null()) + .set(dsl::time_deleted.eq(now)) + .execute_async( + &*datastore.pool_connection_authorized(&opctx).await.unwrap(), + ) + .await + .unwrap(); + + // Set another disk to "expunged" + diesel::update(dsl::physical_disk) + .filter(dsl::id.eq(disk_102.id())) + .filter(dsl::time_deleted.is_null()) + .set(dsl::disk_policy.eq(PhysicalDiskPolicy::Expunged)) + .execute_async( + &*datastore.pool_connection_authorized(&opctx).await.unwrap(), + ) + .await + .unwrap(); + + // The set of uninitialized disks should remain at zero + let uninitialized_disks = datastore + .physical_disk_uninitialized_list(&opctx, collection_id) + .await + .expect("Failed to list uninitialized disks"); + assert_eq!(uninitialized_disks.len(), 0); + + db.cleanup().await.unwrap(); + logctx.cleanup_successful(); + } } diff --git a/nexus/db-queries/src/db/datastore/rack.rs b/nexus/db-queries/src/db/datastore/rack.rs index 09f635e0f3..ece9112745 100644 --- a/nexus/db-queries/src/db/datastore/rack.rs +++ b/nexus/db-queries/src/db/datastore/rack.rs @@ -737,26 +737,27 @@ impl DataStore { info!(log, "Inserted services"); for physical_disk in physical_disks { - Self::physical_disk_upsert_on_connection(&conn, &opctx, physical_disk) - .await - .map_err(|e| { + if let Err(e) = Self::physical_disk_insert_on_connection(&conn, &opctx, physical_disk) + .await { + if !matches!(e, Error::ObjectAlreadyExists { .. }) { error!(log, "Failed to upsert physical disk"; "err" => #%e); err.set(RackInitError::PhysicalDiskInsert(e)) .unwrap(); - DieselError::RollbackTransaction - })?; + return Err(DieselError::RollbackTransaction); + } + } } info!(log, "Inserted physical disks"); for zpool in zpools { - Self::zpool_upsert_on_connection(&conn, &opctx, zpool).await.map_err( - |e| { + if let Err(e) = Self::zpool_insert_on_connection(&conn, &opctx, zpool).await { + if !matches!(e, Error::ObjectAlreadyExists { .. }) { error!(log, "Failed to upsert zpool"; "err" => #%e); err.set(RackInitError::ZpoolInsert(e)).unwrap(); - DieselError::RollbackTransaction - }, - )?; + return Err(DieselError::RollbackTransaction); + } + } } info!(log, "Inserted zpools"); @@ -988,6 +989,7 @@ mod test { blueprint: Blueprint { id: Uuid::new_v4(), blueprint_zones: BTreeMap::new(), + blueprint_disks: BTreeMap::new(), parent_blueprint_id: None, internal_dns_version: *Generation::new(), external_dns_version: *Generation::new(), diff --git a/nexus/db-queries/src/db/datastore/sled.rs b/nexus/db-queries/src/db/datastore/sled.rs index 7fb0ada639..fa83436e9e 100644 --- a/nexus/db-queries/src/db/datastore/sled.rs +++ b/nexus/db-queries/src/db/datastore/sled.rs @@ -1087,11 +1087,11 @@ mod test { ); datastore - .physical_disk_upsert(&opctx, disk1.clone()) + .physical_disk_insert(&opctx, disk1.clone()) .await .expect("Failed to upsert physical disk"); datastore - .physical_disk_upsert(&opctx, disk2.clone()) + .physical_disk_insert(&opctx, disk2.clone()) .await .expect("Failed to upsert physical disk"); diff --git a/nexus/db-queries/src/db/datastore/vpc.rs b/nexus/db-queries/src/db/datastore/vpc.rs index 99f066ee42..f08854734d 100644 --- a/nexus/db-queries/src/db/datastore/vpc.rs +++ b/nexus/db-queries/src/db/datastore/vpc.rs @@ -1688,6 +1688,7 @@ mod tests { let bp1 = Blueprint { id: bp1_id, blueprint_zones: bp1_zones, + blueprint_disks: BTreeMap::new(), parent_blueprint_id: None, internal_dns_version: Generation::new(), external_dns_version: Generation::new(), @@ -1739,6 +1740,7 @@ mod tests { let bp2 = Blueprint { id: bp2_id, blueprint_zones: BTreeMap::new(), + blueprint_disks: BTreeMap::new(), parent_blueprint_id: Some(bp1_id), internal_dns_version: Generation::new(), external_dns_version: Generation::new(), @@ -1799,6 +1801,7 @@ mod tests { let bp3 = Blueprint { id: bp3_id, blueprint_zones: bp3_zones, + blueprint_disks: BTreeMap::new(), parent_blueprint_id: Some(bp2_id), internal_dns_version: Generation::new(), external_dns_version: Generation::new(), @@ -1862,6 +1865,7 @@ mod tests { let bp4 = Blueprint { id: bp4_id, blueprint_zones: bp4_zones, + blueprint_disks: BTreeMap::new(), parent_blueprint_id: Some(bp3_id), internal_dns_version: Generation::new(), external_dns_version: Generation::new(), @@ -1990,6 +1994,7 @@ mod tests { let bp5 = Blueprint { id: bp5_id, blueprint_zones: bp5_zones, + blueprint_disks: BTreeMap::new(), parent_blueprint_id: Some(bp4_id), internal_dns_version: Generation::new(), external_dns_version: Generation::new(), diff --git a/nexus/db-queries/src/db/datastore/zpool.rs b/nexus/db-queries/src/db/datastore/zpool.rs index 0ab6bcf3af..fb7a76f48d 100644 --- a/nexus/db-queries/src/db/datastore/zpool.rs +++ b/nexus/db-queries/src/db/datastore/zpool.rs @@ -14,6 +14,7 @@ use crate::db::datastore::OpContext; use crate::db::error::public_error_from_diesel; use crate::db::error::ErrorHandler; use crate::db::identity::Asset; +use crate::db::model::PhysicalDisk; use crate::db::model::Sled; use crate::db::model::Zpool; use crate::db::pagination::paginated; @@ -32,17 +33,17 @@ use omicron_common::api::external::ResourceType; use uuid::Uuid; impl DataStore { - pub async fn zpool_upsert( + pub async fn zpool_insert( &self, opctx: &OpContext, zpool: Zpool, ) -> CreateResult { let conn = &*self.pool_connection_authorized(&opctx).await?; - Self::zpool_upsert_on_connection(&conn, opctx, zpool).await + Self::zpool_insert_on_connection(&conn, opctx, zpool).await } /// Stores a new zpool in the database. - pub async fn zpool_upsert_on_connection( + pub async fn zpool_insert_on_connection( conn: &async_bb8_diesel::Connection, opctx: &OpContext, zpool: Zpool, @@ -85,7 +86,7 @@ impl DataStore { &self, opctx: &OpContext, pagparams: &DataPageParams<'_, Uuid>, - ) -> ListResultVec { + ) -> ListResultVec<(Zpool, PhysicalDisk)> { opctx.authorize(authz::Action::ListChildren, &authz::FLEET).await?; use db::schema::physical_disk::dsl as dsl_physical_disk; @@ -99,7 +100,7 @@ impl DataStore { ), ), ) - .select(Zpool::as_select()) + .select((Zpool::as_select(), PhysicalDisk::as_select())) .load_async(&*self.pool_connection_authorized(opctx).await?) .await .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) @@ -114,7 +115,7 @@ impl DataStore { pub async fn zpool_list_all_external_batched( &self, opctx: &OpContext, - ) -> ListResultVec { + ) -> ListResultVec<(Zpool, PhysicalDisk)> { opctx.authorize(authz::Action::ListChildren, &authz::FLEET).await?; opctx.check_complex_operations_allowed()?; let mut zpools = Vec::new(); @@ -123,8 +124,7 @@ impl DataStore { let batch = self .zpool_list_all_external(opctx, &p.current_pagparams()) .await?; - paginator = - p.found_batch(&batch, &|z: &nexus_db_model::Zpool| z.id()); + paginator = p.found_batch(&batch, &|(z, _)| z.id()); zpools.extend(batch); } diff --git a/nexus/examples/config.toml b/nexus/examples/config.toml index 6ba7fd2089..f7c5e44cf0 100644 --- a/nexus/examples/config.toml +++ b/nexus/examples/config.toml @@ -107,6 +107,7 @@ inventory.nkeep = 5 # Disable inventory collection altogether (for emergencies) inventory.disable = false phantom_disks.period_secs = 30 +physical_disk_adoption.period_secs = 30 blueprints.period_secs_load = 10 blueprints.period_secs_execute = 60 sync_service_zone_nat.period_secs = 30 diff --git a/nexus/reconfigurator/execution/src/datasets.rs b/nexus/reconfigurator/execution/src/datasets.rs index f660e1d845..c22a56b1b4 100644 --- a/nexus/reconfigurator/execution/src/datasets.rs +++ b/nexus/reconfigurator/execution/src/datasets.rs @@ -210,7 +210,7 @@ mod tests { Uuid::new_v4(), // physical_disk_id ); datastore - .zpool_upsert(opctx, zpool) + .zpool_insert(opctx, zpool) .await .expect("failed to upsert zpool"); } @@ -279,7 +279,7 @@ mod tests { Uuid::new_v4(), // physical_disk_id ); datastore - .zpool_upsert(opctx, zpool) + .zpool_insert(opctx, zpool) .await .expect("failed to upsert zpool"); } diff --git a/nexus/reconfigurator/execution/src/dns.rs b/nexus/reconfigurator/execution/src/dns.rs index 5a2321781d..f0d0af074b 100644 --- a/nexus/reconfigurator/execution/src/dns.rs +++ b/nexus/reconfigurator/execution/src/dns.rs @@ -510,11 +510,13 @@ mod test { use nexus_types::deployment::BlueprintZoneDisposition; use nexus_types::deployment::OmicronZoneConfig; use nexus_types::deployment::OmicronZoneType; + use nexus_types::deployment::SledDisk; use nexus_types::deployment::SledFilter; use nexus_types::deployment::SledResources; - use nexus_types::deployment::ZpoolName; use nexus_types::external_api::params; use nexus_types::external_api::shared; + use nexus_types::external_api::views::PhysicalDiskPolicy; + use nexus_types::external_api::views::PhysicalDiskState; use nexus_types::identity::Resource; use nexus_types::internal_api::params::DnsConfigParams; use nexus_types::internal_api::params::DnsConfigZone; @@ -529,14 +531,17 @@ mod test { use omicron_common::address::SLED_PREFIX; use omicron_common::api::external::Generation; use omicron_common::api::external::IdentityMetadataCreateParams; + use omicron_common::disk::DiskIdentity; use omicron_test_utils::dev::test_setup_log; + use omicron_uuid_kinds::PhysicalDiskUuid; + use omicron_uuid_kinds::ZpoolUuid; + use std::collections::BTreeMap; use std::collections::BTreeSet; use std::collections::HashMap; use std::net::IpAddr; use std::net::Ipv4Addr; use std::net::Ipv6Addr; use std::net::SocketAddrV6; - use std::str::FromStr; use std::sync::Arc; use uuid::Uuid; @@ -605,11 +610,19 @@ mod test { .zip(possible_sled_subnets) .map(|(sled_id, subnet)| { let sled_resources = SledResources { - zpools: BTreeSet::from([ZpoolName::from_str(&format!( - "oxp_{}", - Uuid::new_v4() - )) - .unwrap()]), + zpools: BTreeMap::from([( + ZpoolUuid::new_v4(), + SledDisk { + disk_identity: DiskIdentity { + vendor: String::from("v"), + serial: format!("s-{sled_id}"), + model: String::from("m"), + }, + disk_id: PhysicalDiskUuid::new_v4(), + policy: PhysicalDiskPolicy::InService, + state: PhysicalDiskState::Active, + }, + )]), subnet: Ipv6Subnet::new(subnet.network()), }; (*sled_id, sled_resources) diff --git a/nexus/reconfigurator/execution/src/lib.rs b/nexus/reconfigurator/execution/src/lib.rs index dc7c4593b3..367b38726e 100644 --- a/nexus/reconfigurator/execution/src/lib.rs +++ b/nexus/reconfigurator/execution/src/lib.rs @@ -24,6 +24,7 @@ use std::net::SocketAddrV6; mod datasets; mod dns; +mod omicron_physical_disks; mod omicron_zones; mod overridables; mod resource_allocation; @@ -128,6 +129,14 @@ where (SledUuid::from_untyped_uuid(db_sled.id()), Sled::from(db_sled)) }) .collect(); + + omicron_physical_disks::deploy_disks( + &opctx, + &sleds_by_id, + &blueprint.blueprint_disks, + ) + .await?; + omicron_zones::deploy_zones( &opctx, &sleds_by_id, diff --git a/nexus/reconfigurator/execution/src/omicron_physical_disks.rs b/nexus/reconfigurator/execution/src/omicron_physical_disks.rs new file mode 100644 index 0000000000..a90b3c8e59 --- /dev/null +++ b/nexus/reconfigurator/execution/src/omicron_physical_disks.rs @@ -0,0 +1,349 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Manges deployment of Omicron physical disks to Sled Agents. + +use crate::Sled; +use anyhow::anyhow; +use anyhow::Context; +use futures::stream; +use futures::StreamExt; +use nexus_db_queries::context::OpContext; +use nexus_types::deployment::BlueprintPhysicalDisksConfig; +use omicron_uuid_kinds::GenericUuid; +use omicron_uuid_kinds::SledUuid; +use slog::info; +use slog::o; +use slog::warn; +use std::collections::BTreeMap; + +/// Idempotently ensure that the specified Omicron disks are deployed to the +/// corresponding sleds +pub(crate) async fn deploy_disks( + opctx: &OpContext, + sleds_by_id: &BTreeMap, + sled_configs: &BTreeMap, +) -> Result<(), Vec> { + let errors: Vec<_> = stream::iter(sled_configs) + .filter_map(|(sled_id, config)| async move { + let log = opctx.log.new(o!( + "sled_id" => sled_id.to_string(), + "generation" => config.generation.to_string(), + )); + + let db_sled = match sleds_by_id.get(&sled_id) { + Some(sled) => sled, + None => { + let err = anyhow!("sled not found in db list: {}", sled_id); + warn!(log, "{err:#}"); + return Some(err); + } + }; + + let client = nexus_networking::sled_client_from_address( + sled_id.into_untyped_uuid(), + db_sled.sled_agent_address, + &log, + ); + let result = + client.omicron_physical_disks_put(&config).await.with_context( + || format!("Failed to put {config:#?} to sled {sled_id}"), + ); + match result { + Err(error) => { + warn!(log, "{error:#}"); + Some(error) + } + Ok(result) => { + let (errs, successes): (Vec<_>, Vec<_>) = result + .into_inner() + .status + .into_iter() + .partition(|status| status.err.is_some()); + + if !errs.is_empty() { + warn!( + log, + "Failed to deploy storage for sled agent"; + "successfully configured disks" => successes.len(), + "failed disk configurations" => errs.len(), + ); + for err in &errs { + warn!(log, "{err:?}"); + } + return Some(anyhow!( + "failure deploying disks: {:?}", + errs + )); + } + + info!( + log, + "Successfully deployed storage for sled agent"; + "successfully configured disks" => successes.len(), + ); + None + } + } + }) + .collect() + .await; + + if errors.is_empty() { + Ok(()) + } else { + Err(errors) + } +} + +#[cfg(test)] +mod test { + use super::deploy_disks; + use crate::Sled; + use httptest::matchers::{all_of, json_decoded, request}; + use httptest::responders::json_encoded; + use httptest::responders::status_code; + use httptest::Expectation; + use nexus_db_queries::context::OpContext; + use nexus_test_utils_macros::nexus_test; + use nexus_types::deployment::{ + Blueprint, BlueprintPhysicalDiskConfig, BlueprintPhysicalDisksConfig, + BlueprintTarget, + }; + use omicron_common::api::external::Generation; + use omicron_common::disk::DiskIdentity; + use omicron_uuid_kinds::SledUuid; + use omicron_uuid_kinds::ZpoolUuid; + use std::collections::BTreeMap; + use std::net::SocketAddr; + use uuid::Uuid; + + type ControlPlaneTestContext = + nexus_test_utils::ControlPlaneTestContext; + + fn create_blueprint( + blueprint_disks: BTreeMap, + ) -> (BlueprintTarget, Blueprint) { + let id = Uuid::new_v4(); + ( + BlueprintTarget { + target_id: id, + enabled: true, + time_made_target: chrono::Utc::now(), + }, + Blueprint { + id, + blueprint_zones: BTreeMap::new(), + blueprint_disks, + parent_blueprint_id: None, + internal_dns_version: Generation::new(), + external_dns_version: Generation::new(), + time_created: chrono::Utc::now(), + creator: "test".to_string(), + comment: "test blueprint".to_string(), + }, + ) + } + + #[nexus_test] + async fn test_deploy_omicron_disks(cptestctx: &ControlPlaneTestContext) { + let nexus = &cptestctx.server.apictx().nexus; + let datastore = nexus.datastore(); + let opctx = OpContext::for_tests( + cptestctx.logctx.log.clone(), + datastore.clone(), + ); + + // Create some fake sled-agent servers to respond to disk puts and add + // sleds to CRDB. + let mut s1 = httptest::Server::run(); + let mut s2 = httptest::Server::run(); + let sled_id1 = SledUuid::new_v4(); + let sled_id2 = SledUuid::new_v4(); + let sleds_by_id: BTreeMap = + [(sled_id1, &s1), (sled_id2, &s2)] + .into_iter() + .map(|(sled_id, server)| { + let SocketAddr::V6(addr) = server.addr() else { + panic!("Expected Ipv6 address. Got {}", server.addr()); + }; + let sled = Sled { + id: sled_id, + sled_agent_address: addr, + is_scrimlet: false, + }; + (sled_id, sled) + }) + .collect(); + + // Get a success result back when the blueprint has an empty set of + // disks. + let (_, blueprint) = create_blueprint(BTreeMap::new()); + deploy_disks(&opctx, &sleds_by_id, &blueprint.blueprint_disks) + .await + .expect("failed to deploy no disks"); + + // Disks are updated in a particular order, but each request contains + // the full set of disks that must be running. + // See `rack_setup::service::ServiceInner::run` for more details. + fn make_disks() -> BlueprintPhysicalDisksConfig { + BlueprintPhysicalDisksConfig { + generation: Generation::new(), + disks: vec![BlueprintPhysicalDiskConfig { + identity: DiskIdentity { + vendor: "test-vendor".to_string(), + serial: "test-serial".to_string(), + model: "test-model".to_string(), + }, + id: Uuid::new_v4(), + pool_id: ZpoolUuid::new_v4(), + }], + } + } + + // Create a blueprint with only one disk for both servers + // We reuse the same `OmicronDisksConfig` because the details don't + // matter for this test. + let disks1 = make_disks(); + let disks2 = make_disks(); + let (_, blueprint) = create_blueprint(BTreeMap::from([ + (sled_id1, disks1.clone()), + (sled_id2, disks2.clone()), + ])); + + // Set expectations for the initial requests sent to the fake + // sled-agents. + for s in [&mut s1, &mut s2] { + s.expect( + Expectation::matching(all_of![ + request::method_path("PUT", "/omicron-physical-disks",), + // Our generation number should be 1 and there should + // be only a single disk. + request::body(json_decoded( + |c: &BlueprintPhysicalDisksConfig| { + c.generation == 1u32.into() && c.disks.len() == 1 + } + )) + ]) + .respond_with(json_encoded( + sled_agent_client::types::DisksManagementResult { + status: vec![], + }, + )), + ); + } + + // Execute it. + deploy_disks(&opctx, &sleds_by_id, &blueprint.blueprint_disks) + .await + .expect("failed to deploy initial disks"); + + s1.verify_and_clear(); + s2.verify_and_clear(); + + // Do it again. This should trigger the same request. + for s in [&mut s1, &mut s2] { + s.expect( + Expectation::matching(request::method_path( + "PUT", + "/omicron-physical-disks", + )) + .respond_with(json_encoded( + sled_agent_client::types::DisksManagementResult { + status: vec![], + }, + )), + ); + } + deploy_disks(&opctx, &sleds_by_id, &blueprint.blueprint_disks) + .await + .expect("failed to deploy same disks"); + s1.verify_and_clear(); + s2.verify_and_clear(); + + // Take another lap, but this time, have one server fail the request and + // try again. + s1.expect( + Expectation::matching(request::method_path( + "PUT", + "/omicron-physical-disks", + )) + .respond_with(json_encoded( + sled_agent_client::types::DisksManagementResult { + status: vec![], + }, + )), + ); + s2.expect( + Expectation::matching(request::method_path( + "PUT", + "/omicron-physical-disks", + )) + .respond_with(status_code(500)), + ); + + let errors = + deploy_disks(&opctx, &sleds_by_id, &blueprint.blueprint_disks) + .await + .expect_err("unexpectedly succeeded in deploying disks"); + + println!("{:?}", errors); + assert_eq!(errors.len(), 1); + assert!(errors[0] + .to_string() + .starts_with("Failed to put OmicronPhysicalDisksConfig")); + s1.verify_and_clear(); + s2.verify_and_clear(); + + // We can also observe "partial failures", where the HTTP-evel response + // is successful, but it indicates that the disk provisioning ran into + // problems. + s1.expect( + Expectation::matching(request::method_path( + "PUT", + "/omicron-physical-disks", + )) + .respond_with(json_encoded( + sled_agent_client::types::DisksManagementResult { + status: vec![], + }, + )), + ); + s2.expect( + Expectation::matching(request::method_path( + "PUT", + "/omicron-physical-disks", + )) + .respond_with(json_encoded(sled_agent_client::types::DisksManagementResult { + status: vec![ + sled_agent_client::types::DiskManagementStatus { + identity: omicron_common::disk::DiskIdentity { + vendor: "v".to_string(), + serial: "s".to_string(), + model: "m".to_string(), + }, + + // This error could occur if a disk is removed + err: Some(sled_agent_client::types::DiskManagementError::NotFound), + } + ] + })), + ); + + let errors = + deploy_disks(&opctx, &sleds_by_id, &blueprint.blueprint_disks) + .await + .expect_err("unexpectedly succeeded in deploying disks"); + + println!("{:?}", errors); + assert_eq!(errors.len(), 1); + assert!( + errors[0].to_string().starts_with("failure deploying disks"), + "{}", + errors[0].to_string() + ); + s1.verify_and_clear(); + s2.verify_and_clear(); + } +} diff --git a/nexus/reconfigurator/execution/src/omicron_zones.rs b/nexus/reconfigurator/execution/src/omicron_zones.rs index bcbf876e81..38b04d0e13 100644 --- a/nexus/reconfigurator/execution/src/omicron_zones.rs +++ b/nexus/reconfigurator/execution/src/omicron_zones.rs @@ -122,6 +122,7 @@ mod test { .into_iter() .map(|(typed_id, z)| (typed_id.into_untyped_uuid(), z)) .collect(), + blueprint_disks: BTreeMap::new(), parent_blueprint_id: None, internal_dns_version: Generation::new(), external_dns_version: Generation::new(), diff --git a/nexus/reconfigurator/planning/Cargo.toml b/nexus/reconfigurator/planning/Cargo.toml index f990a92157..06d1c460ca 100644 --- a/nexus/reconfigurator/planning/Cargo.toml +++ b/nexus/reconfigurator/planning/Cargo.toml @@ -7,6 +7,7 @@ edition = "2021" anyhow.workspace = true chrono.workspace = true gateway-client.workspace = true +illumos-utils.workspace = true indexmap.workspace = true internal-dns.workspace = true ipnet.workspace = true diff --git a/nexus/reconfigurator/planning/src/blueprint_builder.rs b/nexus/reconfigurator/planning/src/blueprint_builder.rs index 2d5ef8bee5..068e4a9875 100644 --- a/nexus/reconfigurator/planning/src/blueprint_builder.rs +++ b/nexus/reconfigurator/planning/src/blueprint_builder.rs @@ -13,10 +13,13 @@ use ipnet::IpAdd; use nexus_config::NUM_INITIAL_RESERVED_IP_ADDRESSES; use nexus_inventory::now_db_precision; use nexus_types::deployment::Blueprint; +use nexus_types::deployment::BlueprintPhysicalDiskConfig; +use nexus_types::deployment::BlueprintPhysicalDisksConfig; use nexus_types::deployment::BlueprintZoneConfig; use nexus_types::deployment::BlueprintZoneDisposition; use nexus_types::deployment::BlueprintZoneFilter; use nexus_types::deployment::BlueprintZonesConfig; +use nexus_types::deployment::DiskFilter; use nexus_types::deployment::OmicronZoneConfig; use nexus_types::deployment::OmicronZoneDataset; use nexus_types::deployment::OmicronZoneType; @@ -40,7 +43,9 @@ use omicron_common::api::external::Vni; use omicron_common::api::internal::shared::NetworkInterface; use omicron_common::api::internal::shared::NetworkInterfaceKind; use omicron_uuid_kinds::GenericUuid; +use omicron_uuid_kinds::PhysicalDiskUuid; use omicron_uuid_kinds::SledUuid; +use omicron_uuid_kinds::ZpoolUuid; use rand::rngs::StdRng; use rand::SeedableRng; use slog::o; @@ -53,6 +58,7 @@ use std::net::IpAddr; use std::net::Ipv4Addr; use std::net::Ipv6Addr; use std::net::SocketAddrV6; +use std::str::FromStr; use thiserror::Error; use typed_rng::UuidRng; use uuid::Uuid; @@ -94,6 +100,15 @@ pub enum EnsureMultiple { NotNeeded, } +fn zpool_id_to_external_name(zpool_id: ZpoolUuid) -> anyhow::Result { + let pool_name_generated = + illumos_utils::zpool::ZpoolName::new_external(zpool_id).to_string(); + let pool_name = ZpoolName::from_str(&pool_name_generated).map_err(|e| { + anyhow!("Failed to create zpool name from {zpool_id}: {e}") + })?; + Ok(pool_name) +} + /// Helper for assembling a blueprint /// /// There are two basic ways to assemble a new blueprint: @@ -123,6 +138,8 @@ pub struct BlueprintBuilder<'a> { // These fields will become part of the final blueprint. See the // corresponding fields in `Blueprint`. zones: BlueprintZonesBuilder<'a>, + disks: BlueprintDisksBuilder<'a>, + creator: String, comments: Vec, @@ -227,6 +244,7 @@ impl<'a> BlueprintBuilder<'a> { Ok(Blueprint { id: rng.blueprint_rng.next(), blueprint_zones, + blueprint_disks: BTreeMap::new(), parent_blueprint_id: None, internal_dns_version, external_dns_version, @@ -359,6 +377,7 @@ impl<'a> BlueprintBuilder<'a> { input, sled_ip_allocators: BTreeMap::new(), zones: BlueprintZonesBuilder::new(parent_blueprint), + disks: BlueprintDisksBuilder::new(parent_blueprint), creator: creator.to_owned(), comments: Vec::new(), nexus_v4_ips, @@ -375,9 +394,12 @@ impl<'a> BlueprintBuilder<'a> { // are no longer in service and need expungement work. let blueprint_zones = self.zones.into_zones_map(self.input.all_sled_ids(SledFilter::All)); + let blueprint_disks = + self.disks.into_disks_map(self.input.all_sled_ids(SledFilter::All)); Blueprint { id: self.rng.blueprint_rng.next(), blueprint_zones, + blueprint_disks, parent_blueprint_id: Some(self.parent_blueprint.id), internal_dns_version: self.input.internal_dns_version(), external_dns_version: self.input.external_dns_version(), @@ -407,6 +429,81 @@ impl<'a> BlueprintBuilder<'a> { self.comments.push(String::from(comment)); } + /// Ensures that the blueprint contains disks for a sled which already + /// exists in the database. + /// + /// This operation must perform the following: + /// - Ensure that any disks / zpools that exist in the database + /// are propagated into the blueprint. + /// - Ensure that any disks that are expunged from the database are + /// removed from the blueprint. + pub fn sled_ensure_disks( + &mut self, + sled_id: SledUuid, + resources: &SledResources, + ) -> Result { + let (mut additions, removals) = { + // These are the disks known to our (last?) blueprint + let blueprint_disks: BTreeMap<_, _> = self + .disks + .current_sled_disks(sled_id) + .map(|disk| { + (PhysicalDiskUuid::from_untyped_uuid(disk.id), disk) + }) + .collect(); + + // These are the in-service disks as we observed them in the database, + // during the planning phase + let database_disks: BTreeMap<_, _> = resources + .all_disks(DiskFilter::InService) + .map(|(zpool, disk)| (disk.disk_id, (zpool, disk))) + .collect(); + + // Add any disks that appear in the database, but not the blueprint + let additions = database_disks + .iter() + .filter_map(|(disk_id, (zpool, disk))| { + if !blueprint_disks.contains_key(disk_id) { + Some(BlueprintPhysicalDiskConfig { + identity: disk.disk_identity.clone(), + id: disk_id.into_untyped_uuid(), + pool_id: **zpool, + }) + } else { + None + } + }) + .collect::>(); + + // Remove any disks that appear in the blueprint, but not the database + let removals: HashSet = blueprint_disks + .keys() + .filter_map(|disk_id| { + if !database_disks.contains_key(disk_id) { + Some(*disk_id) + } else { + None + } + }) + .collect(); + + (additions, removals) + }; + + if additions.is_empty() && removals.is_empty() { + return Ok(Ensure::NotNeeded); + } + + let disks = &mut self.disks.change_sled_disks(sled_id).disks; + + disks.append(&mut additions); + disks.retain(|config| { + !removals.contains(&PhysicalDiskUuid::from_untyped_uuid(config.id)) + }); + + Ok(Ensure::Added) + } + pub fn sled_ensure_zone_ntp( &mut self, sled_id: SledUuid, @@ -474,8 +571,10 @@ impl<'a> BlueprintBuilder<'a> { pub fn sled_ensure_zone_crucible( &mut self, sled_id: SledUuid, - pool_name: ZpoolName, + zpool_id: ZpoolUuid, ) -> Result { + let pool_name = zpool_id_to_external_name(zpool_id)?; + // If this sled already has a Crucible zone on this pool, do nothing. let has_crucible_on_this_pool = self.zones.current_sled_zones(sled_id).any(|z| { @@ -490,7 +589,7 @@ impl<'a> BlueprintBuilder<'a> { } let sled_info = self.sled_resources(sled_id)?; - if !sled_info.zpools.contains(&pool_name) { + if !sled_info.zpools.contains_key(&zpool_id) { return Err(Error::Planner(anyhow!( "adding crucible zone for sled {:?}: \ attempted to use unknown zpool {:?}", @@ -862,6 +961,99 @@ impl<'a> BlueprintZonesBuilder<'a> { } } +/// Helper for working with sets of disks on each sled +/// +/// Tracking the set of disks is slightly non-trivial because we need to bump +/// the per-sled generation number iff the disks are changed. So we need to +/// keep track of whether we've changed the disks relative to the parent +/// blueprint. We do this by keeping a copy of any [`BlueprintDisksConfig`] +/// that we've changed and a _reference_ to the parent blueprint's disks. This +/// struct makes it easy for callers iterate over the right set of disks. +struct BlueprintDisksBuilder<'a> { + changed_disks: BTreeMap, + parent_disks: &'a BTreeMap, +} + +impl<'a> BlueprintDisksBuilder<'a> { + pub fn new(parent_blueprint: &'a Blueprint) -> BlueprintDisksBuilder { + BlueprintDisksBuilder { + changed_disks: BTreeMap::new(), + parent_disks: &parent_blueprint.blueprint_disks, + } + } + + /// Returns a mutable reference to a sled's Omicron disks *because* we're + /// going to change them. It's essential that the caller _does_ change them + /// because we will have bumped the generation number and we don't want to + /// do that if no changes are being made. + pub fn change_sled_disks( + &mut self, + sled_id: SledUuid, + ) -> &mut BlueprintPhysicalDisksConfig { + self.changed_disks.entry(sled_id).or_insert_with(|| { + if let Some(old_sled_disks) = self.parent_disks.get(&sled_id) { + BlueprintPhysicalDisksConfig { + generation: old_sled_disks.generation.next(), + disks: old_sled_disks.disks.clone(), + } + } else { + // No requests have been sent to the disk previously, + // we should be able to use the first generation. + BlueprintPhysicalDisksConfig { + generation: Generation::new(), + disks: vec![], + } + } + }) + } + + /// Iterates over the list of Omicron disks currently configured for this + /// sled in the blueprint that's being built + pub fn current_sled_disks( + &self, + sled_id: SledUuid, + ) -> Box + '_> { + if let Some(sled_disks) = self + .changed_disks + .get(&sled_id) + .or_else(|| self.parent_disks.get(&sled_id)) + { + Box::new(sled_disks.disks.iter()) + } else { + Box::new(std::iter::empty()) + } + } + + /// Produces an owned map of disks for the requested sleds + pub fn into_disks_map( + mut self, + sled_ids: impl Iterator, + ) -> BTreeMap { + sled_ids + .map(|sled_id| { + // Start with self.changed_disks, which contains entries for any + // sled whose disks config is changing in this blueprint. + let mut disks = self + .changed_disks + .remove(&sled_id) + // If it's not there, use the config from the parent + // blueprint. + .or_else(|| self.parent_disks.get(&sled_id).cloned()) + // If it's not there either, then this must be a new sled + // and we haven't added any disks to it yet. Use the + // standard initial config. + .unwrap_or_else(|| BlueprintPhysicalDisksConfig { + generation: Generation::new(), + disks: vec![], + }); + disks.disks.sort_unstable_by_key(|d| d.id); + + (sled_id, disks) + }) + .collect() + } +} + #[cfg(test)] pub mod test { use super::*; @@ -978,10 +1170,8 @@ pub mod test { example.input.all_sled_resources(SledFilter::All) { builder.sled_ensure_zone_ntp(sled_id).unwrap(); - for pool_name in &sled_resources.zpools { - builder - .sled_ensure_zone_crucible(sled_id, pool_name.clone()) - .unwrap(); + for pool_id in sled_resources.zpools.keys() { + builder.sled_ensure_zone_crucible(sled_id, *pool_id).unwrap(); } } @@ -1011,10 +1201,8 @@ pub mod test { builder.sled_ensure_zone_ntp(new_sled_id).unwrap(); // TODO-cleanup use `TypedUuid` everywhere let new_sled_resources = input.sled_resources(&new_sled_id).unwrap(); - for pool_name in &new_sled_resources.zpools { - builder - .sled_ensure_zone_crucible(new_sled_id, pool_name.clone()) - .unwrap(); + for pool_id in new_sled_resources.zpools.keys() { + builder.sled_ensure_zone_crucible(new_sled_id, *pool_id).unwrap(); } let blueprint3 = builder.build(); @@ -1074,7 +1262,65 @@ pub mod test { } }) .collect::>(); - assert_eq!(crucible_pool_names, new_sled_resources.zpools); + assert_eq!( + crucible_pool_names, + new_sled_resources + .zpools + .keys() + .map(|id| { zpool_id_to_external_name(*id).unwrap() }) + .collect() + ); + + logctx.cleanup_successful(); + } + + #[test] + fn test_add_physical_disks() { + static TEST_NAME: &str = "blueprint_builder_test_add_physical_disks"; + let logctx = test_setup_log(TEST_NAME); + let (collection, input) = + example(&logctx.log, TEST_NAME, DEFAULT_N_SLEDS); + + // We don't care about the DNS versions here. + let internal_dns_version = Generation::new(); + let external_dns_version = Generation::new(); + let parent = BlueprintBuilder::build_initial_from_collection_seeded( + &collection, + internal_dns_version, + external_dns_version, + input.all_sled_ids(SledFilter::All), + "test", + TEST_NAME, + ) + .expect("failed to create initial blueprint"); + + { + // We start empty, and can add a disk + let mut builder = BlueprintBuilder::new_based_on( + &logctx.log, + &parent, + &input, + "test", + ) + .expect("failed to create builder"); + + assert!(builder.disks.changed_disks.is_empty()); + assert!(builder.disks.parent_disks.is_empty()); + + for (sled_id, sled_resources) in + input.all_sled_resources(SledFilter::InService) + { + assert_eq!( + builder + .sled_ensure_disks(sled_id, &sled_resources) + .unwrap(), + Ensure::Added, + ); + } + + assert!(!builder.disks.changed_disks.is_empty()); + assert!(builder.disks.parent_disks.is_empty()); + } logctx.cleanup_successful(); } diff --git a/nexus/reconfigurator/planning/src/example.rs b/nexus/reconfigurator/planning/src/example.rs index 2878ed16f6..908afea535 100644 --- a/nexus/reconfigurator/planning/src/example.rs +++ b/nexus/reconfigurator/planning/src/example.rs @@ -106,9 +106,9 @@ impl ExampleSystem { vec![], ) .unwrap(); - for pool_name in &sled_resources.zpools { + for pool_name in sled_resources.zpools.keys() { let _ = builder - .sled_ensure_zone_crucible(sled_id, pool_name.clone()) + .sled_ensure_zone_crucible(sled_id, *pool_name) .unwrap(); } } diff --git a/nexus/reconfigurator/planning/src/planner.rs b/nexus/reconfigurator/planning/src/planner.rs index 25d49f4802..0b853a943d 100644 --- a/nexus/reconfigurator/planning/src/planner.rs +++ b/nexus/reconfigurator/planning/src/planner.rs @@ -93,9 +93,27 @@ impl<'a> Planner<'a> { // is fine. let mut sleds_waiting_for_ntp_zones = BTreeSet::new(); - for (sled_id, sled_info) in + for (sled_id, sled_resources) in self.input.all_sled_resources(SledFilter::InService) { + // First, we need to ensure that sleds are using their expected + // disks. This is necessary before we can allocate any zones. + if self.blueprint.sled_ensure_disks(sled_id, &sled_resources)? + == Ensure::Added + { + info!( + &self.log, + "altered physical disks"; + "sled_id" => %sled_id + ); + self.blueprint + .comment(&format!("sled {}: altered disks", sled_id)); + + // Note that this doesn't actually need to short-circuit the + // rest of the blueprint planning, as long as during execution + // we send this request first. + } + // Check for an NTP zone. Every sled should have one. If it's not // there, all we can do is provision that one zone. We have to wait // for that to succeed and synchronize the clock before we can @@ -152,17 +170,17 @@ impl<'a> Planner<'a> { // Every zpool on the sled should have a Crucible zone on it. let mut ncrucibles_added = 0; - for zpool_name in &sled_info.zpools { + for zpool_id in sled_resources.zpools.keys() { if self .blueprint - .sled_ensure_zone_crucible(sled_id, zpool_name.clone())? + .sled_ensure_zone_crucible(sled_id, *zpool_id)? == Ensure::Added { info!( &self.log, "found sled zpool missing Crucible zone (will add one)"; "sled_id" => ?sled_id, - "zpool_name" => ?zpool_name, + "zpool_id" => ?zpool_id, ); ncrucibles_added += 1; } diff --git a/nexus/reconfigurator/planning/src/system.rs b/nexus/reconfigurator/planning/src/system.rs index 3a03249936..15aefb7344 100644 --- a/nexus/reconfigurator/planning/src/system.rs +++ b/nexus/reconfigurator/planning/src/system.rs @@ -13,7 +13,10 @@ use nexus_inventory::CollectionBuilder; use nexus_types::deployment::PlanningInputBuilder; use nexus_types::deployment::Policy; use nexus_types::deployment::SledDetails; +use nexus_types::deployment::SledDisk; use nexus_types::deployment::SledResources; +use nexus_types::external_api::views::PhysicalDiskPolicy; +use nexus_types::external_api::views::PhysicalDiskState; use nexus_types::external_api::views::SledPolicy; use nexus_types::external_api::views::SledProvisionPolicy; use nexus_types::external_api::views::SledState; @@ -22,7 +25,6 @@ use nexus_types::inventory::PowerState; use nexus_types::inventory::RotSlot; use nexus_types::inventory::SledRole; use nexus_types::inventory::SpType; -use nexus_types::inventory::ZpoolName; use omicron_common::address::get_sled_address; use omicron_common::address::IpRange; use omicron_common::address::Ipv6Subnet; @@ -31,14 +33,16 @@ use omicron_common::address::RACK_PREFIX; use omicron_common::address::SLED_PREFIX; use omicron_common::api::external::ByteCount; use omicron_common::api::external::Generation; +use omicron_common::disk::DiskIdentity; use omicron_uuid_kinds::GenericUuid; +use omicron_uuid_kinds::PhysicalDiskUuid; use omicron_uuid_kinds::SledUuid; -use omicron_uuid_kinds::TypedUuid; +use omicron_uuid_kinds::ZpoolUuid; +use std::collections::BTreeMap; use std::collections::BTreeSet; use std::fmt::Debug; use std::net::Ipv4Addr; use std::net::Ipv6Addr; -use uuid::Uuid; trait SubnetIterator: Iterator> + Debug {} impl SubnetIterator for T where @@ -181,7 +185,7 @@ impl SystemDescription { /// Add a sled to the system, as described by a SledBuilder pub fn sled(&mut self, sled: SledBuilder) -> anyhow::Result<&mut Self> { - let sled_id = sled.id.unwrap_or_else(TypedUuid::new_v4); + let sled_id = sled.id.unwrap_or_else(SledUuid::new_v4); ensure!( !self.sleds.contains_key(&sled_id), "attempted to add sled with the same id as an existing one: {}", @@ -309,7 +313,7 @@ impl SystemDescription { policy: sled.policy, state: SledState::Active, resources: SledResources { - zpools: sled.zpools.iter().cloned().collect(), + zpools: sled.zpools.clone(), subnet: sled.sled_subnet, }, }; @@ -421,7 +425,7 @@ struct Sled { sled_subnet: Ipv6Subnet, inventory_sp: Option<(u16, SpState)>, inventory_sled_agent: sled_agent_client::types::Inventory, - zpools: Vec, + zpools: BTreeMap, policy: SledPolicy, } @@ -440,8 +444,21 @@ impl Sled { let model = format!("model{}", unique); let serial = format!("serial{}", unique); let revision = 0; - let zpools = (0..nzpools) - .map(|_| format!("oxp_{}", Uuid::new_v4()).parse().unwrap()) + let zpools: BTreeMap<_, _> = (0..nzpools) + .map(|_| { + let zpool = ZpoolUuid::new_v4(); + let disk = SledDisk { + disk_identity: DiskIdentity { + vendor: String::from("fake-vendor"), + serial: format!("serial-{zpool}"), + model: String::from("fake-model"), + }, + disk_id: PhysicalDiskUuid::new_v4(), + policy: PhysicalDiskPolicy::InService, + state: PhysicalDiskState::Active, + }; + (zpool, disk) + }) .collect(); let inventory_sp = match hardware { SledHardware::Empty => None, @@ -498,7 +515,18 @@ impl Sled { sled_id: sled_id.into_untyped_uuid(), usable_hardware_threads: 10, usable_physical_ram: ByteCount::from(1024 * 1024), - disks: vec![], + // Populate disks, appearing like a real device. + disks: zpools + .values() + .enumerate() + .map(|(i, d)| sled_agent_client::types::InventoryDisk { + identity: d.disk_identity.clone(), + variant: sled_agent_client::types::DiskVariant::U2, + slot: i64::try_from(i).unwrap(), + }) + .collect(), + // Zpools won't necessarily show up until our first request + // to provision storage, so we omit them. zpools: vec![], } }; diff --git a/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_bp2.txt b/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_bp2.txt index 3417089d99..92cfd1f651 100644 --- a/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_bp2.txt +++ b/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_bp2.txt @@ -82,8 +82,8 @@ parent: 55502b1b-e255-438b-a16a-2680a4b5f962 nexus c26b3bda-5561-44a1-a69f-22103fe209a1 in service fd00:1122:3344:101::2f METADATA: - created by: test_blueprint2 - created at: 1970-01-01T00:00:00.000Z - comment: (none) - internal DNS version: 1 - external DNS version: 1 + created by: test_blueprint2 + created at: 1970-01-01T00:00:00.000Z + comment: sled 2d1cb4f2-cf44-40fc-b118-85036eb732a9: altered disks, sled 75bc286f-2b4b-482c-9431-59272af529da: altered disks, sled affab35f-600a-4109-8ea0-34a067a4e0bc: altered disks + internal DNS version: 1 + external DNS version: 1 diff --git a/nexus/reconfigurator/preparation/src/lib.rs b/nexus/reconfigurator/preparation/src/lib.rs index f981a88235..52f3d3fecb 100644 --- a/nexus/reconfigurator/preparation/src/lib.rs +++ b/nexus/reconfigurator/preparation/src/lib.rs @@ -20,9 +20,9 @@ use nexus_types::deployment::PlanningInput; use nexus_types::deployment::PlanningInputBuilder; use nexus_types::deployment::Policy; use nexus_types::deployment::SledDetails; +use nexus_types::deployment::SledDisk; use nexus_types::deployment::SledResources; use nexus_types::deployment::UnstableReconfiguratorState; -use nexus_types::deployment::ZpoolName; use nexus_types::identity::Asset; use nexus_types::identity::Resource; use nexus_types::inventory::Collection; @@ -32,22 +32,24 @@ use omicron_common::address::NEXUS_REDUNDANCY; use omicron_common::address::SLED_PREFIX; use omicron_common::api::external::Error; use omicron_common::api::external::LookupType; +use omicron_common::disk::DiskIdentity; use omicron_uuid_kinds::GenericUuid; use omicron_uuid_kinds::OmicronZoneUuid; +use omicron_uuid_kinds::PhysicalDiskUuid; use omicron_uuid_kinds::SledUuid; use omicron_uuid_kinds::ZpoolUuid; use slog::error; use slog::Logger; use std::collections::BTreeMap; use std::collections::BTreeSet; -use std::str::FromStr; /// Given various pieces of database state that go into the blueprint planning /// process, produce a `PlanningInput` object encapsulating what the planner /// needs to generate a blueprint pub struct PlanningInputFromDb<'a> { pub sled_rows: &'a [nexus_db_model::Sled], - pub zpool_rows: &'a [nexus_db_model::Zpool], + pub zpool_rows: + &'a [(nexus_db_model::Zpool, nexus_db_model::PhysicalDisk)], pub ip_pool_range_rows: &'a [nexus_db_model::IpPoolRange], pub external_ip_rows: &'a [nexus_db_model::ExternalIp], pub service_nic_rows: &'a [nexus_db_model::ServiceNetworkInterface], @@ -73,25 +75,22 @@ impl PlanningInputFromDb<'_> { let mut zpools_by_sled_id = { let mut zpools = BTreeMap::new(); - for z in self.zpool_rows { + for (zpool, disk) in self.zpool_rows { let sled_zpool_names = - zpools.entry(z.sled_id).or_insert_with(BTreeSet::new); - // It's unfortunate that Nexus knows how Sled Agent - // constructs zpool names, but there's not currently an - // alternative. - let id = ZpoolUuid::from_untyped_uuid(z.id()); - let zpool_name_generated = - illumos_utils::zpool::ZpoolName::new_external(id) - .to_string(); - let zpool_name = ZpoolName::from_str(&zpool_name_generated) - .map_err(|e| { - Error::internal_error(&format!( - "unexpectedly failed to parse generated \ - zpool name: {}: {}", - zpool_name_generated, e - )) - })?; - sled_zpool_names.insert(zpool_name); + zpools.entry(zpool.sled_id).or_insert_with(BTreeMap::new); + let zpool_id = ZpoolUuid::from_untyped_uuid(zpool.id()); + let disk = SledDisk { + disk_identity: DiskIdentity { + vendor: disk.vendor.clone(), + serial: disk.serial.clone(), + model: disk.model.clone(), + }, + disk_id: PhysicalDiskUuid::from_untyped_uuid(disk.id()), + policy: disk.disk_policy.into(), + state: disk.disk_state.into(), + }; + + sled_zpool_names.insert(zpool_id, disk); } zpools }; @@ -101,7 +100,7 @@ impl PlanningInputFromDb<'_> { let subnet = Ipv6Subnet::::new(sled_row.ip()); let zpools = zpools_by_sled_id .remove(&sled_id) - .unwrap_or_else(BTreeSet::new); + .unwrap_or_else(BTreeMap::new); let sled_details = SledDetails { policy: sled_row.policy(), state: sled_row.state().into(), diff --git a/nexus/src/app/background/blueprint_execution.rs b/nexus/src/app/background/blueprint_execution.rs index 7db59bc966..b46e918bb8 100644 --- a/nexus/src/app/background/blueprint_execution.rs +++ b/nexus/src/app/background/blueprint_execution.rs @@ -121,13 +121,15 @@ mod test { use nexus_db_queries::context::OpContext; use nexus_test_utils_macros::nexus_test; use nexus_types::deployment::{ - Blueprint, BlueprintTarget, BlueprintZoneConfig, - BlueprintZoneDisposition, BlueprintZonesConfig, + Blueprint, BlueprintPhysicalDisksConfig, BlueprintTarget, + BlueprintZoneConfig, BlueprintZoneDisposition, BlueprintZonesConfig, }; use nexus_types::inventory::{ OmicronZoneConfig, OmicronZoneDataset, OmicronZoneType, }; use omicron_common::api::external::Generation; + use omicron_uuid_kinds::SledKind; + use omicron_uuid_kinds::TypedUuid; use serde::Deserialize; use serde_json::json; use std::collections::BTreeMap; @@ -141,6 +143,10 @@ mod test { fn create_blueprint( blueprint_zones: BTreeMap, + blueprint_disks: BTreeMap< + TypedUuid, + BlueprintPhysicalDisksConfig, + >, dns_version: Generation, ) -> (BlueprintTarget, Blueprint) { let id = Uuid::new_v4(); @@ -153,6 +159,7 @@ mod test { Blueprint { id, blueprint_zones, + blueprint_disks, parent_blueprint_id: None, internal_dns_version: dns_version, external_dns_version: dns_version, @@ -228,7 +235,11 @@ mod test { // With a target blueprint having no zones, the task should trivially // complete and report a successful (empty) summary. let generation = Generation::new(); - let blueprint = Arc::new(create_blueprint(BTreeMap::new(), generation)); + let blueprint = Arc::new(create_blueprint( + BTreeMap::new(), + BTreeMap::new(), + generation, + )); blueprint_tx.send(Some(blueprint)).unwrap(); let value = task.activate(&opctx).await; println!("activating with no zones: {:?}", value); @@ -273,6 +284,7 @@ mod test { (sled_id1, make_zones(BlueprintZoneDisposition::InService)), (sled_id2, make_zones(BlueprintZoneDisposition::Quiesced)), ]), + BTreeMap::new(), generation, ); diff --git a/nexus/src/app/background/blueprint_load.rs b/nexus/src/app/background/blueprint_load.rs index 2afe2d2f97..d9f6411721 100644 --- a/nexus/src/app/background/blueprint_load.rs +++ b/nexus/src/app/background/blueprint_load.rs @@ -229,6 +229,7 @@ mod test { Blueprint { id, blueprint_zones: BTreeMap::new(), + blueprint_disks: BTreeMap::new(), parent_blueprint_id: Some(parent_blueprint_id), internal_dns_version: Generation::new(), external_dns_version: Generation::new(), diff --git a/nexus/src/app/background/init.rs b/nexus/src/app/background/init.rs index e260e9a87b..77ef3318f6 100644 --- a/nexus/src/app/background/init.rs +++ b/nexus/src/app/background/init.rs @@ -16,6 +16,7 @@ use super::inventory_collection; use super::metrics_producer_gc; use super::nat_cleanup; use super::phantom_disks; +use super::physical_disk_adoption; use super::region_replacement; use super::sync_service_zone_nat::ServiceZoneNatTracker; use super::sync_switch_configuration::SwitchPortSettingsManager; @@ -68,6 +69,9 @@ pub struct BackgroundTasks { /// task handle for the task that collects inventory pub task_inventory_collection: common::TaskHandle, + /// task handle for the task that collects inventory + pub task_physical_disk_adoption: common::TaskHandle, + /// task handle for the task that detects phantom disks pub task_phantom_disks: common::TaskHandle, @@ -245,7 +249,7 @@ impl BackgroundTasks { // because the blueprint executor might also depend indirectly on the // inventory collector. In that case, we may need to do something more // complicated. But for now, this works. - let task_inventory_collection = { + let (task_inventory_collection, inventory_watcher) = { let collector = inventory_collection::InventoryCollector::new( datastore.clone(), resolver.clone(), @@ -253,6 +257,7 @@ impl BackgroundTasks { config.inventory.nkeep, config.inventory.disable, ); + let inventory_watcher = collector.watcher(); let task = driver.register( String::from("inventory_collection"), String::from( @@ -265,7 +270,23 @@ impl BackgroundTasks { vec![Box::new(rx_blueprint_exec)], ); - task + (task, inventory_watcher) + }; + + let task_physical_disk_adoption = { + driver.register( + "physical_disk_adoption".to_string(), + "ensure new physical disks are automatically marked in-service" + .to_string(), + config.physical_disk_adoption.period_secs, + Box::new(physical_disk_adoption::PhysicalDiskAdoption::new( + datastore.clone(), + inventory_watcher.clone(), + config.physical_disk_adoption.disable, + )), + opctx.child(BTreeMap::new()), + vec![Box::new(inventory_watcher)], + ) }; let task_service_zone_nat_tracker = { @@ -330,6 +351,7 @@ impl BackgroundTasks { nat_cleanup, bfd_manager, task_inventory_collection, + task_physical_disk_adoption, task_phantom_disks, task_blueprint_loader, task_blueprint_executor, diff --git a/nexus/src/app/background/inventory_collection.rs b/nexus/src/app/background/inventory_collection.rs index 0666c136fc..236ba9b197 100644 --- a/nexus/src/app/background/inventory_collection.rs +++ b/nexus/src/app/background/inventory_collection.rs @@ -16,9 +16,11 @@ use nexus_db_queries::db::DataStore; use nexus_inventory::InventoryError; use nexus_types::identity::Asset; use nexus_types::inventory::Collection; +use omicron_uuid_kinds::CollectionUuid; use serde_json::json; use std::num::NonZeroU32; use std::sync::Arc; +use tokio::sync::watch; /// How many rows to request in each paginated database query const DB_PAGE_SIZE: u32 = 1024; @@ -30,6 +32,7 @@ pub struct InventoryCollector { creator: String, nkeep: u32, disable: bool, + tx: watch::Sender>, } impl InventoryCollector { @@ -40,14 +43,20 @@ impl InventoryCollector { nkeep: u32, disable: bool, ) -> InventoryCollector { + let (tx, _) = watch::channel(None); InventoryCollector { datastore, resolver, creator: creator.to_owned(), nkeep, disable, + tx, } } + + pub fn watcher(&self) -> watch::Receiver> { + self.tx.subscribe() + } } impl BackgroundTask for InventoryCollector { @@ -78,11 +87,13 @@ impl BackgroundTask for InventoryCollector { "collection_id" => collection.id.to_string(), "time_started" => collection.time_started.to_string(), ); - json!({ + let json = json!({ "collection_id": collection.id.to_string(), "time_started": collection.time_started.to_string(), "time_done": collection.time_done.to_string() - }) + }); + self.tx.send_replace(Some(collection.id)); + json } } } diff --git a/nexus/src/app/background/mod.rs b/nexus/src/app/background/mod.rs index 2b8db422b4..0e3b162404 100644 --- a/nexus/src/app/background/mod.rs +++ b/nexus/src/app/background/mod.rs @@ -18,6 +18,7 @@ mod metrics_producer_gc; mod nat_cleanup; mod networking; mod phantom_disks; +mod physical_disk_adoption; mod region_replacement; mod status; mod sync_service_zone_nat; diff --git a/nexus/src/app/background/physical_disk_adoption.rs b/nexus/src/app/background/physical_disk_adoption.rs new file mode 100644 index 0000000000..e5e1e89b64 --- /dev/null +++ b/nexus/src/app/background/physical_disk_adoption.rs @@ -0,0 +1,131 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Background task for autmatically adopting physical disks. +//! +//! Removable disks may be arbitrarily attached and detached from +//! Oxide racks. When this happens, if they had not previously +//! been part of a cluster, they need to be explicitly added +//! to become usable. +//! +//! In the future, this may become more explicitly operator-controlled. + +use super::common::BackgroundTask; +use futures::future::BoxFuture; +use futures::FutureExt; +use nexus_db_model::PhysicalDisk; +use nexus_db_model::Zpool; +use nexus_db_queries::context::OpContext; +use nexus_db_queries::db::DataStore; +use omicron_uuid_kinds::CollectionUuid; +use omicron_uuid_kinds::GenericUuid; +use serde_json::json; +use std::sync::Arc; +use tokio::sync::watch; +use uuid::Uuid; + +pub struct PhysicalDiskAdoption { + datastore: Arc, + disable: bool, + rx_inventory_collection: watch::Receiver>, +} + +impl PhysicalDiskAdoption { + pub fn new( + datastore: Arc, + rx_inventory_collection: watch::Receiver>, + disable: bool, + ) -> Self { + PhysicalDiskAdoption { datastore, disable, rx_inventory_collection } + } +} + +impl BackgroundTask for PhysicalDiskAdoption { + fn activate<'a>( + &'a mut self, + opctx: &'a OpContext, + ) -> BoxFuture<'a, serde_json::Value> { + async { + if self.disable { + return json!({ "error": "task disabled" }); + } + + let mut disks_added = 0; + let log = &opctx.log; + warn!(&log, "physical disk adoption task started"); + + let collection_id = *self.rx_inventory_collection.borrow(); + let Some(collection_id) = collection_id else { + warn!( + &opctx.log, + "Physical Disk Adoption: skipped"; + "reason" => "no inventory" + ); + return json!({ "error": "no inventory" }); + }; + + let result = self.datastore.physical_disk_uninitialized_list( + opctx, + collection_id, + ).await; + + let uninitialized = match result { + Ok(uninitialized) => uninitialized, + Err(err) => { + warn!( + &opctx.log, + "Physical Disk Adoption: failed to query for insertable disks"; + "err" => %err, + ); + return json!({ "error": format!("failed to query database: {:#}", err) }); + }, + }; + + for inv_disk in uninitialized { + let disk = PhysicalDisk::new( + Uuid::new_v4(), + inv_disk.vendor, + inv_disk.serial, + inv_disk.model, + inv_disk.variant, + inv_disk.sled_id.into_untyped_uuid(), + ); + + let zpool = Zpool::new( + Uuid::new_v4(), + inv_disk.sled_id.into_untyped_uuid(), + disk.id() + ); + + let result = self.datastore.physical_disk_and_zpool_insert( + opctx, + disk, + zpool + ).await; + + if let Err(err) = result { + warn!( + &opctx.log, + "Physical Disk Adoption: failed to insert new disk and zpool"; + "err" => %err + ); + return json!({ "error": format!("failed to insert disk/zpool: {:#}", err) }); + } + + disks_added += 1; + + info!( + &opctx.log, + "Physical Disk Adoption: Successfully added a new disk and zpool" + ); + } + + warn!(&log, "physical disk adoption task done"); + json!({ + "physical_disks_added": disks_added, + }) + } + .boxed() + } +} diff --git a/nexus/src/app/sled.rs b/nexus/src/app/sled.rs index de6eb84334..55a8c18910 100644 --- a/nexus/src/app/sled.rs +++ b/nexus/src/app/sled.rs @@ -222,7 +222,7 @@ impl super::Nexus { request.variant.into(), request.sled_id, ); - self.db_datastore.physical_disk_upsert(&opctx, disk).await?; + self.db_datastore.physical_disk_insert(&opctx, disk).await?; Ok(()) } @@ -246,7 +246,7 @@ impl super::Nexus { request.sled_id, request.physical_disk_id, ); - self.db_datastore.zpool_upsert(&opctx, zpool).await?; + self.db_datastore.zpool_insert(&opctx, zpool).await?; Ok(()) } diff --git a/nexus/test-utils/src/lib.rs b/nexus/test-utils/src/lib.rs index a033fb10c7..81235efc9a 100644 --- a/nexus/test-utils/src/lib.rs +++ b/nexus/test-utils/src/lib.rs @@ -829,6 +829,11 @@ impl<'a, N: NexusServer> ControlPlaneTestContextBuilder<'a, N> { Blueprint { id: Uuid::new_v4(), blueprint_zones, + // NOTE: We'll probably need to actually add disks here + // when the Blueprint contains "which disks back zones". + // + // However, for now, this isn't necessary. + blueprint_disks: BTreeMap::new(), parent_blueprint_id: None, internal_dns_version: dns_config .generation diff --git a/nexus/tests/config.test.toml b/nexus/tests/config.test.toml index ba3f145bb6..94cf34ee41 100644 --- a/nexus/tests/config.test.toml +++ b/nexus/tests/config.test.toml @@ -101,6 +101,9 @@ inventory.nkeep = 3 # Disable inventory collection altogether (for emergencies) inventory.disable = false phantom_disks.period_secs = 30 +physical_disk_adoption.period_secs = 30 +# Disable automatic disk adoption to avoid interfering with tests. +physical_disk_adoption.disable = true blueprints.period_secs_load = 100 blueprints.period_secs_execute = 600 sync_service_zone_nat.period_secs = 30 diff --git a/nexus/tests/integration_tests/sleds.rs b/nexus/tests/integration_tests/sleds.rs index b6ed9183a3..bf1e2e4b99 100644 --- a/nexus/tests/integration_tests/sleds.rs +++ b/nexus/tests/integration_tests/sleds.rs @@ -121,7 +121,7 @@ async fn test_physical_disk_create_list_delete( let opctx = OpContext::for_tests(cptestctx.logctx.log.new(o!()), datastore.clone()); let _disk_id = datastore - .physical_disk_upsert(&opctx, physical_disk.clone()) + .physical_disk_insert(&opctx, physical_disk.clone()) .await .expect("Failed to upsert physical disk"); @@ -148,10 +148,8 @@ async fn test_physical_disk_create_list_delete( .await .expect("Failed to upsert physical disk"); - assert_eq!( - physical_disks_list(&external_client, &disks_url).await, - disks_initial - ); + let list = physical_disks_list(&external_client, &disks_url).await; + assert_eq!(list, disks_initial, "{:#?}", list,); } #[nexus_test] diff --git a/nexus/types/src/deployment.rs b/nexus/types/src/deployment.rs index e74ad7f873..ead7e025e3 100644 --- a/nexus/types/src/deployment.rs +++ b/nexus/types/src/deployment.rs @@ -38,6 +38,7 @@ use uuid::Uuid; mod planning_input; +pub use planning_input::DiskFilter; pub use planning_input::ExternalIp; pub use planning_input::PlanningInput; pub use planning_input::PlanningInputBuildError; @@ -45,6 +46,7 @@ pub use planning_input::PlanningInputBuilder; pub use planning_input::Policy; pub use planning_input::ServiceNetworkInterface; pub use planning_input::SledDetails; +pub use planning_input::SledDisk; pub use planning_input::SledFilter; pub use planning_input::SledResources; @@ -98,6 +100,9 @@ pub struct Blueprint { /// entry in this map. pub blueprint_zones: BTreeMap, + /// A map of sled id -> disks in use on each sled. + pub blueprint_disks: BTreeMap, + /// which blueprint this blueprint is based on pub parent_blueprint_id: Option, @@ -513,6 +518,15 @@ pub enum BlueprintZoneFilter { ShouldDeployVpcFirewallRules, } +/// Information about an Omicron physical disk as recorded in a blueprint. +/// +/// Part of [`Blueprint`]. +pub type BlueprintPhysicalDisksConfig = + sled_agent_client::types::OmicronPhysicalDisksConfig; + +pub type BlueprintPhysicalDiskConfig = + sled_agent_client::types::OmicronPhysicalDiskConfig; + /// Describe high-level metadata about a blueprint // These fields are a subset of [`Blueprint`], and include only the data we can // quickly fetch from the main blueprint table (e.g., when listing all diff --git a/nexus/types/src/deployment/planning_input.rs b/nexus/types/src/deployment/planning_input.rs index 8244c6b616..5b4bf2538e 100644 --- a/nexus/types/src/deployment/planning_input.rs +++ b/nexus/types/src/deployment/planning_input.rs @@ -5,34 +5,74 @@ //! Types describing inputs the Reconfigurator needs to plan and produce new //! blueprints. +use crate::external_api::views::PhysicalDiskPolicy; +use crate::external_api::views::PhysicalDiskState; use crate::external_api::views::SledPolicy; use crate::external_api::views::SledProvisionPolicy; use crate::external_api::views::SledState; -use crate::inventory::ZpoolName; use ipnetwork::IpNetwork; use omicron_common::address::IpRange; use omicron_common::address::Ipv6Subnet; use omicron_common::address::SLED_PREFIX; use omicron_common::api::external::Generation; use omicron_common::api::external::MacAddr; +use omicron_common::disk::DiskIdentity; use omicron_uuid_kinds::OmicronZoneUuid; +use omicron_uuid_kinds::PhysicalDiskUuid; use omicron_uuid_kinds::SledUuid; +use omicron_uuid_kinds::ZpoolUuid; use serde::Deserialize; use serde::Serialize; use std::collections::btree_map::Entry; use std::collections::BTreeMap; -use std::collections::BTreeSet; use strum::IntoEnumIterator; use uuid::Uuid; +/// Describes a single disk already managed by the sled. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct SledDisk { + pub disk_identity: DiskIdentity, + pub disk_id: PhysicalDiskUuid, + pub policy: PhysicalDiskPolicy, + pub state: PhysicalDiskState, +} + +/// Filters that apply to disks. +#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)] +pub enum DiskFilter { + /// All disks + All, + + /// All disks which are in-service. + InService, +} + +impl DiskFilter { + fn matches_policy_and_state( + self, + policy: PhysicalDiskPolicy, + state: PhysicalDiskState, + ) -> bool { + match self { + DiskFilter::All => true, + DiskFilter::InService => match (policy, state) { + (PhysicalDiskPolicy::InService, PhysicalDiskState::Active) => { + true + } + _ => false, + }, + } + } +} + /// Describes the resources available on each sled for the planner #[derive(Debug, Clone, Serialize, Deserialize)] pub struct SledResources { - /// zpools on this sled + /// zpools (and their backing disks) on this sled /// /// (used to allocate storage for control plane zones with persistent /// storage) - pub zpools: BTreeSet, + pub zpools: BTreeMap, /// the IPv6 subnet of this sled on the underlay network /// @@ -41,6 +81,19 @@ pub struct SledResources { pub subnet: Ipv6Subnet, } +impl SledResources { + pub fn all_disks( + &self, + filter: DiskFilter, + ) -> impl Iterator + '_ { + self.zpools.iter().filter_map(move |(zpool, disk)| { + filter + .matches_policy_and_state(disk.policy, disk.state) + .then_some((zpool, disk)) + }) + } +} + /// External IP allocated to a service /// /// This is a slimmer `nexus_db_model::ExternalIp` that only stores the fields diff --git a/openapi/nexus-internal.json b/openapi/nexus-internal.json index 7681c0d601..c4eb89f0f9 100644 --- a/openapi/nexus-internal.json +++ b/openapi/nexus-internal.json @@ -2570,6 +2570,13 @@ "description": "Describes a complete set of software and configuration for the system", "type": "object", "properties": { + "blueprint_disks": { + "description": "A map of sled id -> disks in use on each sled.", + "type": "object", + "additionalProperties": { + "$ref": "#/components/schemas/OmicronPhysicalDisksConfig" + } + }, "blueprint_zones": { "description": "A map of sled id -> zones deployed on each sled, along with the [`BlueprintZoneDisposition`] for each zone.\n\nA sled is considered part of the control plane cluster iff it has an entry in this map.", "type": "object", @@ -2619,6 +2626,7 @@ } }, "required": [ + "blueprint_disks", "blueprint_zones", "comment", "creator", @@ -3611,6 +3619,26 @@ "histogram_f64" ] }, + "DiskIdentity": { + "description": "Uniquely identifies a disk.", + "type": "object", + "properties": { + "model": { + "type": "string" + }, + "serial": { + "type": "string" + }, + "vendor": { + "type": "string" + } + }, + "required": [ + "model", + "serial", + "vendor" + ] + }, "DiskRuntimeState": { "description": "Runtime state of the Disk, which includes its attach state and some minimal metadata", "type": "object", @@ -5503,6 +5531,51 @@ "description": "Unique name for a saga [`Node`]\n\nEach node requires a string name that's unique within its DAG. The name is used to identify its output. Nodes that depend on a given node (either directly or indirectly) can access the node's output using its name.", "type": "string" }, + "OmicronPhysicalDiskConfig": { + "description": "OmicronPhysicalDiskConfig\n\n
JSON schema\n\n```json { \"type\": \"object\", \"required\": [ \"id\", \"identity\", \"pool_id\" ], \"properties\": { \"id\": { \"type\": \"string\", \"format\": \"uuid\" }, \"identity\": { \"$ref\": \"#/components/schemas/DiskIdentity\" }, \"pool_id\": { \"$ref\": \"#/components/schemas/TypedUuidForZpoolKind\" } } } ```
", + "type": "object", + "properties": { + "id": { + "type": "string", + "format": "uuid" + }, + "identity": { + "$ref": "#/components/schemas/DiskIdentity" + }, + "pool_id": { + "$ref": "#/components/schemas/TypedUuidForZpoolKind" + } + }, + "required": [ + "id", + "identity", + "pool_id" + ] + }, + "OmicronPhysicalDisksConfig": { + "description": "OmicronPhysicalDisksConfig\n\n
JSON schema\n\n```json { \"type\": \"object\", \"required\": [ \"disks\", \"generation\" ], \"properties\": { \"disks\": { \"type\": \"array\", \"items\": { \"$ref\": \"#/components/schemas/OmicronPhysicalDiskConfig\" } }, \"generation\": { \"description\": \"generation number of this configuration\\n\\nThis generation number is owned by the control plane (i.e., RSS or Nexus, depending on whether RSS-to-Nexus handoff has happened). It should not be bumped within Sled Agent.\\n\\nSled Agent rejects attempts to set the configuration to a generation older than the one it's currently running.\", \"allOf\": [ { \"$ref\": \"#/components/schemas/Generation\" } ] } } } ```
", + "type": "object", + "properties": { + "disks": { + "type": "array", + "items": { + "$ref": "#/components/schemas/OmicronPhysicalDiskConfig" + } + }, + "generation": { + "description": "generation number of this configuration\n\nThis generation number is owned by the control plane (i.e., RSS or Nexus, depending on whether RSS-to-Nexus handoff has happened). It should not be bumped within Sled Agent.\n\nSled Agent rejects attempts to set the configuration to a generation older than the one it's currently running.", + "allOf": [ + { + "$ref": "#/components/schemas/Generation" + } + ] + } + }, + "required": [ + "disks", + "generation" + ] + }, "OmicronZoneConfig": { "description": "Describes one Omicron-managed zone running on a sled\n\n
JSON schema\n\n```json { \"description\": \"Describes one Omicron-managed zone running on a sled\", \"type\": \"object\", \"required\": [ \"id\", \"underlay_address\", \"zone_type\" ], \"properties\": { \"id\": { \"type\": \"string\", \"format\": \"uuid\" }, \"underlay_address\": { \"type\": \"string\", \"format\": \"ipv6\" }, \"zone_type\": { \"$ref\": \"#/components/schemas/OmicronZoneType\" } } } ```
", "type": "object", @@ -7468,6 +7541,10 @@ "type": "string", "format": "uuid" }, + "TypedUuidForZpoolKind": { + "type": "string", + "format": "uuid" + }, "UninitializedSled": { "description": "A sled that has not been added to an initialized rack yet", "type": "object", diff --git a/schema/crdb/blueprint-physical-disk/up1.sql b/schema/crdb/blueprint-physical-disk/up1.sql new file mode 100644 index 0000000000..faf3b49e86 --- /dev/null +++ b/schema/crdb/blueprint-physical-disk/up1.sql @@ -0,0 +1,10 @@ +-- description of a collection of omicron physical disks stored in a blueprint. +CREATE TABLE IF NOT EXISTS omicron.public.bp_sled_omicron_physical_disks ( + -- foreign key into `blueprint` table + blueprint_id UUID NOT NULL, + + sled_id UUID NOT NULL, + generation INT8 NOT NULL, + PRIMARY KEY (blueprint_id, sled_id) +); + diff --git a/schema/crdb/blueprint-physical-disk/up2.sql b/schema/crdb/blueprint-physical-disk/up2.sql new file mode 100644 index 0000000000..734bbd0ccb --- /dev/null +++ b/schema/crdb/blueprint-physical-disk/up2.sql @@ -0,0 +1,20 @@ +-- description of omicron physical disks specified in a blueprint. +CREATE TABLE IF NOT EXISTS omicron.public.bp_omicron_physical_disk ( + -- foreign key into the `blueprint` table + blueprint_id UUID NOT NULL, + + -- unique id for this sled (should be foreign keys into `sled` table, though + -- it's conceivable a blueprint could refer to a sled that no longer exists, + -- particularly if the blueprint is older than the current target) + sled_id UUID NOT NULL, + + vendor TEXT NOT NULL, + serial TEXT NOT NULL, + model TEXT NOT NULL, + + id UUID NOT NULL, + pool_id UUID NOT NULL, + + PRIMARY KEY (blueprint_id, id) +); + diff --git a/schema/crdb/blueprint-physical-disk/up3.sql b/schema/crdb/blueprint-physical-disk/up3.sql new file mode 100644 index 0000000000..48a5c182ac --- /dev/null +++ b/schema/crdb/blueprint-physical-disk/up3.sql @@ -0,0 +1 @@ +DROP INDEX IF EXISTS vendor_serial_model_unique CASCADE; diff --git a/schema/crdb/blueprint-physical-disk/up4.sql b/schema/crdb/blueprint-physical-disk/up4.sql new file mode 100644 index 0000000000..0224215789 --- /dev/null +++ b/schema/crdb/blueprint-physical-disk/up4.sql @@ -0,0 +1,3 @@ +CREATE UNIQUE INDEX IF NOT EXISTS vendor_serial_model_unique on omicron.public.physical_disk ( + vendor, serial, model +) WHERE time_deleted IS NULL AND disk_state != 'decommissioned'; diff --git a/schema/crdb/blueprint-physical-disk/up5.sql b/schema/crdb/blueprint-physical-disk/up5.sql new file mode 100644 index 0000000000..eed24a7806 --- /dev/null +++ b/schema/crdb/blueprint-physical-disk/up5.sql @@ -0,0 +1 @@ +DROP INDEX IF EXISTS lookup_physical_disk_by_sled; diff --git a/schema/crdb/blueprint-physical-disk/up6.sql b/schema/crdb/blueprint-physical-disk/up6.sql new file mode 100644 index 0000000000..ec69b95a67 --- /dev/null +++ b/schema/crdb/blueprint-physical-disk/up6.sql @@ -0,0 +1,4 @@ +CREATE UNIQUE INDEX IF NOT EXISTS lookup_physical_disk_by_sled ON omicron.public.physical_disk ( + sled_id, + id +); diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index 1fb1c6f3f3..ca9eea49b3 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -383,15 +383,15 @@ CREATE TABLE IF NOT EXISTS omicron.public.physical_disk ( sled_id UUID NOT NULL, disk_policy omicron.public.physical_disk_policy NOT NULL, - disk_state omicron.public.physical_disk_state NOT NULL, - - -- This constraint should be upheld, even for deleted disks - -- in the fleet. - CONSTRAINT vendor_serial_model_unique UNIQUE ( - vendor, serial, model - ) + disk_state omicron.public.physical_disk_state NOT NULL ); +-- This constraint only needs to be upheld for disks that are not deleted +-- nor decommissioned. +CREATE UNIQUE INDEX IF NOT EXISTS vendor_serial_model_unique on omicron.public.physical_disk ( + vendor, serial, model +) WHERE time_deleted IS NULL AND disk_state != 'decommissioned'; + CREATE UNIQUE INDEX IF NOT EXISTS lookup_physical_disk_by_variant ON omicron.public.physical_disk ( variant, id @@ -401,7 +401,7 @@ CREATE UNIQUE INDEX IF NOT EXISTS lookup_physical_disk_by_variant ON omicron.pub CREATE UNIQUE INDEX IF NOT EXISTS lookup_physical_disk_by_sled ON omicron.public.physical_disk ( sled_id, id -) WHERE time_deleted IS NULL; +); -- x509 certificates which may be used by services CREATE TABLE IF NOT EXISTS omicron.public.certificate ( @@ -3252,6 +3252,36 @@ CREATE TABLE IF NOT EXISTS omicron.public.bp_target ( time_made_target TIMESTAMPTZ NOT NULL ); +-- description of a collection of omicron physical disks stored in a blueprint. +CREATE TABLE IF NOT EXISTS omicron.public.bp_sled_omicron_physical_disks ( + -- foreign key into `blueprint` table + blueprint_id UUID NOT NULL, + + sled_id UUID NOT NULL, + generation INT8 NOT NULL, + PRIMARY KEY (blueprint_id, sled_id) +); + +-- description of omicron physical disks specified in a blueprint. +CREATE TABLE IF NOT EXISTS omicron.public.bp_omicron_physical_disk ( + -- foreign key into the `blueprint` table + blueprint_id UUID NOT NULL, + + -- unique id for this sled (should be foreign keys into `sled` table, though + -- it's conceivable a blueprint could refer to a sled that no longer exists, + -- particularly if the blueprint is older than the current target) + sled_id UUID NOT NULL, + + vendor TEXT NOT NULL, + serial TEXT NOT NULL, + model TEXT NOT NULL, + + id UUID NOT NULL, + pool_id UUID NOT NULL, + + PRIMARY KEY (blueprint_id, id) +); + -- see inv_sled_omicron_zones, which is identical except it references a -- collection whereas this table references a blueprint CREATE TABLE IF NOT EXISTS omicron.public.bp_sled_omicron_zones ( @@ -3760,7 +3790,7 @@ INSERT INTO omicron.public.db_metadata ( version, target_version ) VALUES - ( TRUE, NOW(), NOW(), '51.0.0', NULL) + ( TRUE, NOW(), NOW(), '52.0.0', NULL) ON CONFLICT DO NOTHING; COMMIT; diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index 6d7b8cd7c7..c8a2e14ff5 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -92,8 +92,8 @@ use nexus_client::{ types as NexusTypes, Client as NexusClient, Error as NexusError, }; use nexus_types::deployment::{ - Blueprint, BlueprintZoneConfig, BlueprintZoneDisposition, - BlueprintZonesConfig, + Blueprint, BlueprintPhysicalDisksConfig, BlueprintZoneConfig, + BlueprintZoneDisposition, BlueprintZonesConfig, }; use omicron_common::address::get_sled_address; use omicron_common::api::external::Generation; @@ -104,6 +104,7 @@ use omicron_common::backoff::{ use omicron_common::ledger::{self, Ledger, Ledgerable}; use omicron_ddm_admin_client::{Client as DdmAdminClient, DdmError}; use omicron_uuid_kinds::GenericUuid; +use omicron_uuid_kinds::SledUuid; use serde::{Deserialize, Serialize}; use sled_agent_client::{ types as SledAgentTypes, Client as SledAgentClient, Error as SledAgentError, @@ -1330,7 +1331,7 @@ pub(crate) fn build_initial_blueprint_from_sled_configs( internal_dns_version: Generation, ) -> Blueprint { let mut blueprint_zones = BTreeMap::new(); - for (sled_id, sled_config) in sled_configs { + for (sled_id, sled_config) in &sled_configs { let zones_config = BlueprintZonesConfig { // This is a bit of a hack. We only construct a blueprint after // completing RSS, so we need to know the final generation value @@ -1345,21 +1346,42 @@ pub(crate) fn build_initial_blueprint_from_sled_configs( generation: DeployStepVersion::V5_EVERYTHING, zones: sled_config .zones - .into_iter() + .iter() .map(|z| BlueprintZoneConfig { - config: z.into(), + config: z.clone().into(), // All initial zones are in-service. disposition: BlueprintZoneDisposition::InService, }) .collect(), }; - blueprint_zones.insert(sled_id, zones_config); + blueprint_zones.insert(*sled_id, zones_config); + } + + let mut blueprint_disks = BTreeMap::new(); + for (sled_id, sled_config) in &sled_configs { + blueprint_disks.insert( + SledUuid::from_untyped_uuid(*sled_id), + BlueprintPhysicalDisksConfig { + generation: sled_config.disks.generation, + disks: sled_config + .disks + .disks + .iter() + .map(|d| SledAgentTypes::OmicronPhysicalDiskConfig { + identity: d.identity.clone(), + id: d.id, + pool_id: d.pool_id, + }) + .collect(), + }, + ); } Blueprint { id: Uuid::new_v4(), blueprint_zones, + blueprint_disks, parent_blueprint_id: None, internal_dns_version, // We don't configure external DNS during RSS, so set it to an initial diff --git a/smf/nexus/multi-sled/config-partial.toml b/smf/nexus/multi-sled/config-partial.toml index dcb8ea041c..400a987786 100644 --- a/smf/nexus/multi-sled/config-partial.toml +++ b/smf/nexus/multi-sled/config-partial.toml @@ -49,6 +49,7 @@ inventory.nkeep = 3 # Disable inventory collection altogether (for emergencies) inventory.disable = false phantom_disks.period_secs = 30 +physical_disk_adoption.period_secs = 30 blueprints.period_secs_load = 10 blueprints.period_secs_execute = 60 sync_service_zone_nat.period_secs = 30 diff --git a/smf/nexus/single-sled/config-partial.toml b/smf/nexus/single-sled/config-partial.toml index 2cd520653f..524d521c89 100644 --- a/smf/nexus/single-sled/config-partial.toml +++ b/smf/nexus/single-sled/config-partial.toml @@ -49,6 +49,7 @@ inventory.nkeep = 3 # Disable inventory collection altogether (for emergencies) inventory.disable = false phantom_disks.period_secs = 30 +physical_disk_adoption.period_secs = 30 blueprints.period_secs_load = 10 blueprints.period_secs_execute = 60 sync_service_zone_nat.period_secs = 30 diff --git a/uuid-kinds/src/lib.rs b/uuid-kinds/src/lib.rs index 41073f8638..9d33d433d2 100644 --- a/uuid-kinds/src/lib.rs +++ b/uuid-kinds/src/lib.rs @@ -54,6 +54,7 @@ impl_typed_uuid_kind! { DownstairsRegion => "downstairs_region", LoopbackAddress => "loopback_address", OmicronZone => "service", + PhysicalDisk => "physical_disk", Sled => "sled", TufRepo => "tuf_repo", Upstairs => "upstairs", From 14a0d8999cd323724b7a9165ba0a1e55da20be1d Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Sun, 14 Apr 2024 06:10:38 +0000 Subject: [PATCH 137/334] chore(deps): update rust crate num to 0.4.2 (#5528) --- Cargo.lock | 8 ++++---- Cargo.toml | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 40c3906898..e15447080f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4984,9 +4984,9 @@ dependencies = [ [[package]] name = "num" -version = "0.4.1" +version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b05180d69e3da0e530ba2a1dae5110317e49e3b7f3d41be227dc5f92e49ee7af" +checksum = "3135b08af27d103b0a51f2ae0f8632117b7b185ccf931445affa8df530576a41" dependencies = [ "num-complex", "num-integer", @@ -5027,9 +5027,9 @@ dependencies = [ [[package]] name = "num-complex" -version = "0.4.4" +version = "0.4.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1ba157ca0885411de85d6ca030ba7e2a83a28636056c7c699b07c8b6f7383214" +checksum = "23c6602fda94a57c990fe0df199a035d83576b496aa29f4e634a8ac6004e68a6" dependencies = [ "num-traits", ] diff --git a/Cargo.toml b/Cargo.toml index 0249e19e01..068ff52d50 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -294,7 +294,7 @@ nexus-test-utils-macros = { path = "nexus/test-utils-macros" } nexus-test-utils = { path = "nexus/test-utils" } nexus-types = { path = "nexus/types" } num-integer = "0.1.46" -num = { version = "0.4.1", default-features = false, features = [ "libm" ] } +num = { version = "0.4.2", default-features = false, features = [ "libm" ] } omicron-common = { path = "common" } omicron-gateway = { path = "gateway" } omicron-nexus = { path = "nexus" } From 191351f2fb4ba7622076686f94e7d8afccd4310b Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Sun, 14 Apr 2024 00:18:16 -0700 Subject: [PATCH 138/334] chore(deps): update rust crate either to 1.11.0 (#5529) --- Cargo.lock | 4 ++-- Cargo.toml | 2 +- workspace-hack/Cargo.toml | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index e15447080f..b664f3ca1d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2200,9 +2200,9 @@ dependencies = [ [[package]] name = "either" -version = "1.10.0" +version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "11157ac094ffbdde99aa67b23417ebdd801842852b500e395a45a9c0aac03e4a" +checksum = "a47c1c47d2f5964e29c61246e81db715514cd532db6b5116a25ea3c03d6780a2" [[package]] name = "elliptic-curve" diff --git a/Cargo.toml b/Cargo.toml index 068ff52d50..b4bf2bef58 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -216,7 +216,7 @@ dns-service-client = { path = "clients/dns-service-client" } dpd-client = { path = "clients/dpd-client" } dropshot = { git = "https://github.com/oxidecomputer/dropshot", branch = "main", features = [ "usdt-probes" ] } dyn-clone = "1.0.17" -either = "1.10.0" +either = "1.11.0" expectorate = "1.1.0" fatfs = "0.3.6" filetime = "0.2.23" diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index 3086078ca7..e4a70c276b 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -40,7 +40,7 @@ crypto-common = { version = "0.1.6", default-features = false, features = ["getr der = { version = "0.7.8", default-features = false, features = ["derive", "flagset", "oid", "pem", "std"] } diesel = { version = "2.1.5", features = ["chrono", "i-implement-a-third-party-backend-and-opt-into-breaking-changes", "network-address", "postgres", "r2d2", "serde_json", "uuid"] } digest = { version = "0.10.7", features = ["mac", "oid", "std"] } -either = { version = "1.10.0" } +either = { version = "1.11.0" } elliptic-curve = { version = "0.13.8", features = ["ecdh", "hazmat", "pem", "std"] } ff = { version = "0.13.0", default-features = false, features = ["alloc"] } flate2 = { version = "1.0.28" } @@ -147,7 +147,7 @@ crypto-common = { version = "0.1.6", default-features = false, features = ["getr der = { version = "0.7.8", default-features = false, features = ["derive", "flagset", "oid", "pem", "std"] } diesel = { version = "2.1.5", features = ["chrono", "i-implement-a-third-party-backend-and-opt-into-breaking-changes", "network-address", "postgres", "r2d2", "serde_json", "uuid"] } digest = { version = "0.10.7", features = ["mac", "oid", "std"] } -either = { version = "1.10.0" } +either = { version = "1.11.0" } elliptic-curve = { version = "0.13.8", features = ["ecdh", "hazmat", "pem", "std"] } ff = { version = "0.13.0", default-features = false, features = ["alloc"] } flate2 = { version = "1.0.28" } From 33332e3f14773f62f6fc38eb054f14a32c984dfc Mon Sep 17 00:00:00 2001 From: Eliza Weisman Date: Sun, 14 Apr 2024 10:39:56 -0700 Subject: [PATCH 139/334] [nix flake] Updates and maintainance (#5524) This commit updates the Nix flake so that it builds with the current dependencies on `mgd` and Clickhouse. The Clickhouse version we download has changed, so the SHA256 hash in the Nix flake must be updated (this can't be depended on automatically from the fs, currently, because we store its hash as MD5 in the `clickhouse_version` file, rather than SHA256). `mgd` now depends on OpenSSL, so I've changed the build inputs for its derivation to include OpenSSL headers for `autoPatchelfHook`. I think I might be the only user of the Nix flake, so I don't know if it really makes sense to ask anyone else to review this. Rest assured that it does, in fact, fix it. :) --- flake.lock | 12 ++++++------ flake.nix | 10 ++-------- 2 files changed, 8 insertions(+), 14 deletions(-) diff --git a/flake.lock b/flake.lock index f2dfc1b532..7c6acc0815 100644 --- a/flake.lock +++ b/flake.lock @@ -20,11 +20,11 @@ }, "nixpkgs": { "locked": { - "lastModified": 1706371002, - "narHash": "sha256-dwuorKimqSYgyu8Cw6ncKhyQjUDOyuXoxDTVmAXq88s=", + "lastModified": 1712791164, + "narHash": "sha256-3sbWO1mbpWsLepZGbWaMovSO7ndZeFqDSdX0hZ9nVyw=", "owner": "NixOS", "repo": "nixpkgs", - "rev": "c002c6aa977ad22c60398daaa9be52f2203d0006", + "rev": "1042fd8b148a9105f3c0aca3a6177fd1d9360ba5", "type": "github" }, "original": { @@ -48,11 +48,11 @@ ] }, "locked": { - "lastModified": 1706634984, - "narHash": "sha256-xn7lGPE8gRGBe3Lt8ESoN/uUHm7IrbiV7siupwjHX1o=", + "lastModified": 1712888034, + "narHash": "sha256-SmBeT3oxdwOzheSfxZmk+3xmv98Z3zlzjlnl9nBdOIE=", "owner": "oxalica", "repo": "rust-overlay", - "rev": "883b84c426107a8ec020e7124f263d7c35a5bb9f", + "rev": "96fbdc73dec8eaa5a9d4a9b307b75c9a856e5dec", "type": "github" }, "original": { diff --git a/flake.nix b/flake.nix index 8897d9428d..6828577403 100644 --- a/flake.nix +++ b/flake.nix @@ -224,6 +224,7 @@ buildInputs = [ glibc gcc-unwrapped + openssl.dev ]; installPhase = @@ -259,7 +260,7 @@ # can't give Nix those hashes and must instead determine it ourselves. # this means that we will have to update this SHA if the clickhouse # version changes. - sha256 = "1lgxwh67apgl386ilpg0iy5xkyz12q4lgnz08zswjbxv88ra0qxj"; + sha256 = "0wx8w9sdms5hsc9f835ivsissf15wjzdb9cvxr65xdi384i9pkzx"; src = builtins.fetchurl { inherit sha256; @@ -428,10 +429,3 @@ }; }; } - - - - - - - From 117808ec4b2facb6d7d940166a9c05641e10a9c2 Mon Sep 17 00:00:00 2001 From: David Crespo Date: Sun, 14 Apr 2024 16:11:58 -0500 Subject: [PATCH 140/334] Bump web console (floating IP improvements, quotas are GiB) (#5531) https://github.com/oxidecomputer/console/compare/7e34c118...2ba444ca * [2ba444ca](https://github.com/oxidecomputer/console/commit/2ba444ca) smarter warning suppression for unhandled routes in msw * [0de42104](https://github.com/oxidecomputer/console/commit/0de42104) oxidecomputer/console#2146 * [5cae2111](https://github.com/oxidecomputer/console/commit/5cae2111) turn off links test for now. it freaks me out * [37470900](https://github.com/oxidecomputer/console/commit/37470900) bump most deps * [ff67b406](https://github.com/oxidecomputer/console/commit/ff67b406) oxidecomputer/console#2145 * [f1e8f2ee](https://github.com/oxidecomputer/console/commit/f1e8f2ee) oxidecomputer/console#2139 * [31508df8](https://github.com/oxidecomputer/console/commit/31508df8) bring instance networking floating ip detach modal copy in line with floating IPs page * [a433d71a](https://github.com/oxidecomputer/console/commit/a433d71a) oxidecomputer/console#2130 * [83ace42b](https://github.com/oxidecomputer/console/commit/83ace42b) oxidecomputer/console#2141 * [9270a930](https://github.com/oxidecomputer/console/commit/9270a930) oxidecomputer/console#2134 * [00a03637](https://github.com/oxidecomputer/console/commit/00a03637) update api-diff for better way of calling oxide.ts --- tools/console_version | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/console_version b/tools/console_version index 97dd5f60c2..e3da6505e4 100644 --- a/tools/console_version +++ b/tools/console_version @@ -1,2 +1,2 @@ -COMMIT="7e34c118e6e3687c7d2a3931328083a397a06d35" -SHA2="219ee83e8b71bc844203df1ac2cfb3369320c8ad74b393c4229ab8e0d18be8b5" +COMMIT="2ba444caf8d19830c41448847f0acf61d4a5d2b2" +SHA2="752ea88d5d3e2f92d32a8d91d3ad7f3a154086d177511a18401cb14a55eb1ea4" From e84954571a6b11da7418c98c0e9db808b7a9a8ad Mon Sep 17 00:00:00 2001 From: Rain Date: Sun, 14 Apr 2024 19:11:48 -0700 Subject: [PATCH 141/334] [internal-dns] move sled and zone IDs to TypedUuid (#5518) I wanted to understand how internal DNS works wrt the reconfigurator, so I tried to port it over to using TypedUuid to basically get an overview via compile errors. Here's the result. I realized that we actually were overloading the `Host::Zone`'s ID to mean either the sled ID (for dendrite) or the zone ID (for the `Other` variant). I've addressed that in the PR by making the variant also carry the ID. (This was always an expected outcome of the transition to typed UUIDs -- if we really are storing more than one kind of UUID within a field, we should make that clear by specifying our own enum variants.) Probably depends on #5517 due to shared imports. --- Cargo.lock | 1 + internal-dns/Cargo.toml | 1 + internal-dns/src/config.rs | 140 +++++++++--------- internal-dns/src/names.rs | 6 +- internal-dns/src/resolver.rs | 32 ++-- nexus/reconfigurator/execution/src/dns.rs | 19 ++- .../planning/src/blueprint_builder.rs | 11 +- nexus/test-utils/src/lib.rs | 13 +- sled-agent/src/fakes/nexus.rs | 3 +- sled-agent/src/rack_setup/plan/service.rs | 91 +++++++----- sled-agent/src/rack_setup/service.rs | 10 +- sled-agent/src/sim/storage.rs | 5 +- 12 files changed, 198 insertions(+), 134 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index b664f3ca1d..d2fc2b0ff6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3732,6 +3732,7 @@ dependencies = [ "hyper 0.14.28", "omicron-common", "omicron-test-utils", + "omicron-uuid-kinds", "omicron-workspace-hack", "progenitor", "reqwest", diff --git a/internal-dns/Cargo.toml b/internal-dns/Cargo.toml index 96993ce6a2..4f504c5b49 100644 --- a/internal-dns/Cargo.toml +++ b/internal-dns/Cargo.toml @@ -11,6 +11,7 @@ dns-service-client.workspace = true futures.workspace = true hyper.workspace = true omicron-common.workspace = true +omicron-uuid-kinds.workspace = true reqwest = { workspace = true, features = ["rustls-tls", "stream"] } slog.workspace = true thiserror.workspace = true diff --git a/internal-dns/src/config.rs b/internal-dns/src/config.rs index 192a390afd..43d6c96d2d 100644 --- a/internal-dns/src/config.rs +++ b/internal-dns/src/config.rs @@ -62,38 +62,26 @@ use crate::names::{ServiceName, DNS_ZONE}; use anyhow::{anyhow, ensure}; +use core::fmt; use dns_service_client::types::{DnsConfigParams, DnsConfigZone, DnsRecord}; use omicron_common::api::external::Generation; +use omicron_uuid_kinds::{OmicronZoneUuid, SledUuid}; use std::collections::BTreeMap; use std::net::Ipv6Addr; -use uuid::Uuid; - -/// Zones that can be referenced within the internal DNS system. -#[derive(Copy, Clone, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)] -pub enum ZoneVariant { - /// This non-global zone runs an instance of Dendrite. - /// - /// This implies that the Sled is a scrimlet. - // When this variant is used, the UUID in the record should match the sled - // itself. - Dendrite, - /// All other non-global zones. - Other, -} /// Used to construct the DNS name for a control plane host #[derive(Clone, Debug, PartialEq, PartialOrd)] pub enum Host { /// Used to construct an AAAA record for a sled. - Sled(Uuid), + Sled(SledUuid), /// Used to construct an AAAA record for a zone on a sled. - Zone { id: Uuid, variant: ZoneVariant }, + Zone(Zone), } impl Host { - pub fn for_zone(id: Uuid, variant: ZoneVariant) -> Host { - Host::Zone { id, variant } + pub fn for_zone(zone: Zone) -> Host { + Host::Zone(zone) } /// Returns the DNS name for this host, ignoring the zone part of the DNS @@ -101,10 +89,10 @@ impl Host { pub(crate) fn dns_name(&self) -> String { match &self { Host::Sled(id) => format!("{}.sled", id), - Host::Zone { id, variant: ZoneVariant::Dendrite } => { + Host::Zone(Zone::Dendrite(id)) => { format!("dendrite-{}.host", id) } - Host::Zone { id, variant: ZoneVariant::Other } => { + Host::Zone(Zone::Other(id)) => { format!("{}.host", id) } } @@ -165,19 +153,25 @@ pub struct DnsConfigBuilder { /// Describes a host of type "sled" in the control plane DNS zone #[derive(Clone, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)] -pub struct Sled(Uuid); +pub struct Sled(SledUuid); /// Describes a host of type "zone" (an illumos zone) in the control plane DNS /// zone #[derive(Clone, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)] -pub struct Zone { - id: Uuid, - variant: ZoneVariant, +pub enum Zone { + /// This non-global zone runs an instance of Dendrite. + /// + /// This implies that the Sled is a scrimlet. + // When this variant is used, the UUID in the record should match the sled + // itself. + Dendrite(SledUuid), + /// All other non-global zones. + Other(OmicronZoneUuid), } impl Zone { pub(crate) fn to_host(&self) -> Host { - Host::Zone { id: self.id, variant: self.variant } + Host::Zone(self.clone()) } pub(crate) fn dns_name(&self) -> String { @@ -185,6 +179,17 @@ impl Zone { } } +impl fmt::Display for Zone { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Zone::Dendrite(sled_uuid) => { + write!(f, "{} (dendrite)", sled_uuid) + } + Zone::Other(zone_uuid) => write!(f, "{} (other)", zone_uuid), + } + } +} + impl DnsConfigBuilder { pub fn new() -> Self { DnsConfigBuilder { @@ -207,7 +212,7 @@ impl DnsConfigBuilder { /// configuration. pub fn host_sled( &mut self, - sled_id: Uuid, + sled_id: SledUuid, addr: Ipv6Addr, ) -> anyhow::Result { match self.sleds.insert(Sled(sled_id), addr) { @@ -233,10 +238,10 @@ impl DnsConfigBuilder { /// configuration. pub fn host_dendrite( &mut self, - sled_id: Uuid, + sled_id: SledUuid, addr: Ipv6Addr, ) -> anyhow::Result { - self.host_zone_internal(sled_id, ZoneVariant::Dendrite, addr) + self.host_zone_internal(Zone::Dendrite(sled_id), addr) } /// Add a new host of type "zone" to the configuration @@ -251,24 +256,22 @@ impl DnsConfigBuilder { /// configuration. pub fn host_zone( &mut self, - zone_id: Uuid, + zone_id: OmicronZoneUuid, addr: Ipv6Addr, ) -> anyhow::Result { - self.host_zone_internal(zone_id, ZoneVariant::Other, addr) + self.host_zone_internal(Zone::Other(zone_id), addr) } fn host_zone_internal( &mut self, - id: Uuid, - variant: ZoneVariant, + zone: Zone, addr: Ipv6Addr, ) -> anyhow::Result { - let zone = Zone { id, variant }; match self.zones.insert(zone.clone(), addr) { None => Ok(zone), Some(existing) => Err(anyhow!( "multiple definitions for zone {} (previously {}, now {})", - id, + zone, existing, addr )), @@ -293,8 +296,7 @@ impl DnsConfigBuilder { // DnsBuilder. ensure!( self.zones.contains_key(&zone), - "zone {} has not been defined", - zone.id + "zone {zone} has not been defined", ); let set = self @@ -307,7 +309,7 @@ impl DnsConfigBuilder { "service {}: zone {}: registered twice \ (previously port {}, now {})", service.dns_name(), - zone.id, + zone, existing, port )), @@ -332,7 +334,7 @@ impl DnsConfigBuilder { // DnsBuilder. ensure!( self.sleds.contains_key(&sled), - "sled {:?} has not been defined", + "sled {} has not been defined", sled.0 ); @@ -362,7 +364,7 @@ impl DnsConfigBuilder { /// configuration. pub fn host_zone_with_one_backend( &mut self, - zone_id: Uuid, + zone_id: OmicronZoneUuid, addr: Ipv6Addr, service: ServiceName, port: u16, @@ -380,7 +382,7 @@ impl DnsConfigBuilder { /// configuration. pub fn host_zone_switch( &mut self, - sled_id: Uuid, + sled_id: SledUuid, switch_zone_ip: Ipv6Addr, dendrite_port: u16, mgs_port: u16, @@ -474,10 +476,10 @@ impl DnsConfigBuilder { #[cfg(test)] mod test { - use super::{DnsConfigBuilder, Host, ServiceName, ZoneVariant}; - use crate::DNS_ZONE; + use super::{DnsConfigBuilder, Host, ServiceName}; + use crate::{config::Zone, DNS_ZONE}; + use omicron_uuid_kinds::{OmicronZoneUuid, SledUuid}; use std::{collections::BTreeMap, io::Write, net::Ipv6Addr}; - use uuid::Uuid; #[test] fn display_srv_service() { @@ -495,30 +497,33 @@ mod test { ServiceName::CruciblePantry.dns_name(), "_crucible-pantry._tcp", ); - let uuid = Uuid::nil(); + + let sled_uuid = SledUuid::nil(); + let zone_uuid = OmicronZoneUuid::nil(); assert_eq!( - ServiceName::Crucible(uuid).dns_name(), - "_crucible._tcp.00000000-0000-0000-0000-000000000000", + ServiceName::SledAgent(sled_uuid).dns_name(), + "_sledagent._tcp.00000000-0000-0000-0000-000000000000", ); assert_eq!( - ServiceName::SledAgent(uuid).dns_name(), - "_sledagent._tcp.00000000-0000-0000-0000-000000000000", + ServiceName::Crucible(zone_uuid).dns_name(), + "_crucible._tcp.00000000-0000-0000-0000-000000000000", ); } #[test] fn display_hosts() { - let uuid = Uuid::nil(); + let sled_uuid = SledUuid::nil(); + let zone_uuid = OmicronZoneUuid::nil(); assert_eq!( - Host::Sled(uuid).dns_name(), + Host::Sled(sled_uuid).dns_name(), "00000000-0000-0000-0000-000000000000.sled", ); assert_eq!( - Host::Zone { id: uuid, variant: ZoneVariant::Other }.dns_name(), + Host::Zone(Zone::Other(zone_uuid)).dns_name(), "00000000-0000-0000-0000-000000000000.host", ); assert_eq!( - Host::Zone { id: uuid, variant: ZoneVariant::Dendrite }.dns_name(), + Host::Zone(Zone::Dendrite(sled_uuid)).dns_name(), "dendrite-00000000-0000-0000-0000-000000000000.host", ); } @@ -542,12 +547,12 @@ mod test { fn test_builder_output() { let mut output = std::io::Cursor::new(Vec::new()); - let sled1_uuid: Uuid = SLED1_UUID.parse().unwrap(); - let sled2_uuid: Uuid = SLED2_UUID.parse().unwrap(); - let zone1_uuid: Uuid = ZONE1_UUID.parse().unwrap(); - let zone2_uuid: Uuid = ZONE2_UUID.parse().unwrap(); - let zone3_uuid: Uuid = ZONE3_UUID.parse().unwrap(); - let zone4_uuid: Uuid = ZONE4_UUID.parse().unwrap(); + let sled1_uuid: SledUuid = SLED1_UUID.parse().unwrap(); + let sled2_uuid: SledUuid = SLED2_UUID.parse().unwrap(); + let zone1_uuid: OmicronZoneUuid = ZONE1_UUID.parse().unwrap(); + let zone2_uuid: OmicronZoneUuid = ZONE2_UUID.parse().unwrap(); + let zone3_uuid: OmicronZoneUuid = ZONE3_UUID.parse().unwrap(); + let zone4_uuid: OmicronZoneUuid = ZONE4_UUID.parse().unwrap(); let builder_empty = DnsConfigBuilder::new(); @@ -625,8 +630,8 @@ mod test { #[test] fn test_builder_errors() { - let sled1_uuid: Uuid = SLED1_UUID.parse().unwrap(); - let zone1_uuid: Uuid = ZONE1_UUID.parse().unwrap(); + let sled1_uuid: SledUuid = SLED1_UUID.parse().unwrap(); + let zone1_uuid: OmicronZoneUuid = ZONE1_UUID.parse().unwrap(); // Duplicate sled, with both the same IP and a different one let mut builder = DnsConfigBuilder::new(); @@ -652,15 +657,15 @@ mod test { assert_eq!( error.to_string(), "multiple definitions for zone \ - 001de000-c04e-4000-8000-000000000001 (previously ::1:1, \ - now ::1:1)" + 001de000-c04e-4000-8000-000000000001 (other) \ + (previously ::1:1, now ::1:1)" ); let error = builder.host_zone(zone1_uuid, ZONE2_IP).unwrap_err(); assert_eq!( error.to_string(), "multiple definitions for zone \ - 001de000-c04e-4000-8000-000000000001 (previously ::1:1, \ - now ::1:2)" + 001de000-c04e-4000-8000-000000000001 (other) \ + (previously ::1:1, now ::1:2)" ); // Specify an undefined zone or sled. (This requires a second builder.) @@ -673,7 +678,8 @@ mod test { .unwrap_err(); assert_eq!( error.to_string(), - "zone 001de000-c04e-4000-8000-000000000001 has not been defined" + "zone 001de000-c04e-4000-8000-000000000001 (other) \ + has not been defined" ); let error = builder2 .service_backend_sled(ServiceName::Oximeter, &sled, 123) @@ -696,7 +702,7 @@ mod test { assert_eq!( error.to_string(), "service _oximeter._tcp: zone \ - 001de000-c04e-4000-8000-000000000001: registered twice \ + 001de000-c04e-4000-8000-000000000001 (other): registered twice \ (previously port 123, now 123)" ); let error = builder @@ -705,7 +711,7 @@ mod test { assert_eq!( error.to_string(), "service _oximeter._tcp: zone \ - 001de000-c04e-4000-8000-000000000001: registered twice \ + 001de000-c04e-4000-8000-000000000001 (other): registered twice \ (previously port 123, now 456)" ); } diff --git a/internal-dns/src/names.rs b/internal-dns/src/names.rs index 8cafe4ac97..3017d3b3fc 100644 --- a/internal-dns/src/names.rs +++ b/internal-dns/src/names.rs @@ -4,7 +4,7 @@ //! Well-known DNS names and related types for internal DNS (see RFD 248) -use uuid::Uuid; +use omicron_uuid_kinds::{OmicronZoneUuid, SledUuid}; /// Name for the control plane DNS zone pub const DNS_ZONE: &str = "control-plane.oxide.internal"; @@ -28,8 +28,8 @@ pub enum ServiceName { Dendrite, Tfport, CruciblePantry, - SledAgent(Uuid), - Crucible(Uuid), + SledAgent(SledUuid), + Crucible(OmicronZoneUuid), BoundaryNtp, InternalNtp, Maghemite, //TODO change to Dpd - maghemite has several services. diff --git a/internal-dns/src/resolver.rs b/internal-dns/src/resolver.rs index f5987df7c6..a7796f559a 100644 --- a/internal-dns/src/resolver.rs +++ b/internal-dns/src/resolver.rs @@ -382,6 +382,7 @@ mod test { RequestContext, }; use omicron_test_utils::dev::test_setup_log; + use omicron_uuid_kinds::OmicronZoneUuid; use slog::{o, Logger}; use std::collections::HashMap; use std::net::Ipv6Addr; @@ -389,7 +390,6 @@ mod test { use std::net::SocketAddrV6; use std::str::FromStr; use tempfile::TempDir; - use uuid::Uuid; struct DnsServer { // We hang onto the storage_path even though it's never used because @@ -526,7 +526,7 @@ mod test { let mut dns_config = DnsConfigBuilder::new(); let ip = Ipv6Addr::from_str("ff::01").unwrap(); - let zone = dns_config.host_zone(Uuid::new_v4(), ip).unwrap(); + let zone = dns_config.host_zone(OmicronZoneUuid::new_v4(), ip).unwrap(); dns_config .service_backend_zone(ServiceName::Cockroach, &zone, 12345) .unwrap(); @@ -584,26 +584,28 @@ mod test { let srv_crdb = ServiceName::Cockroach; let srv_clickhouse = ServiceName::Clickhouse; - let srv_backend = ServiceName::Crucible(Uuid::new_v4()); + let srv_backend = ServiceName::Crucible(OmicronZoneUuid::new_v4()); let mut dns_builder = DnsConfigBuilder::new(); for db_ip in &cockroach_addrs { - let zone = - dns_builder.host_zone(Uuid::new_v4(), *db_ip.ip()).unwrap(); + let zone = dns_builder + .host_zone(OmicronZoneUuid::new_v4(), *db_ip.ip()) + .unwrap(); dns_builder .service_backend_zone(srv_crdb, &zone, db_ip.port()) .unwrap(); } let zone = dns_builder - .host_zone(Uuid::new_v4(), *clickhouse_addr.ip()) + .host_zone(OmicronZoneUuid::new_v4(), *clickhouse_addr.ip()) .unwrap(); dns_builder .service_backend_zone(srv_clickhouse, &zone, clickhouse_addr.port()) .unwrap(); - let zone = - dns_builder.host_zone(Uuid::new_v4(), *crucible_addr.ip()).unwrap(); + let zone = dns_builder + .host_zone(OmicronZoneUuid::new_v4(), *crucible_addr.ip()) + .unwrap(); dns_builder .service_backend_zone(srv_backend, &zone, crucible_addr.port()) .unwrap(); @@ -685,7 +687,8 @@ mod test { // Insert a record, observe that it exists. let mut dns_builder = DnsConfigBuilder::new(); let ip1 = Ipv6Addr::from_str("ff::01").unwrap(); - let zone = dns_builder.host_zone(Uuid::new_v4(), ip1).unwrap(); + let zone = + dns_builder.host_zone(OmicronZoneUuid::new_v4(), ip1).unwrap(); let srv_crdb = ServiceName::Cockroach; dns_builder.service_backend_zone(srv_crdb, &zone, 12345).unwrap(); let dns_config = dns_builder.build_full_config_for_initial_generation(); @@ -700,7 +703,8 @@ mod test { // updated. let mut dns_builder = DnsConfigBuilder::new(); let ip2 = Ipv6Addr::from_str("ee::02").unwrap(); - let zone = dns_builder.host_zone(Uuid::new_v4(), ip2).unwrap(); + let zone = + dns_builder.host_zone(OmicronZoneUuid::new_v4(), ip2).unwrap(); let srv_crdb = ServiceName::Cockroach; dns_builder.service_backend_zone(srv_crdb, &zone, 54321).unwrap(); let mut dns_config = @@ -834,7 +838,7 @@ mod test { // Add a record for the new service. let mut dns_config = DnsConfigBuilder::new(); - let zone = dns_config.host_zone(Uuid::new_v4(), ip).unwrap(); + let zone = dns_config.host_zone(OmicronZoneUuid::new_v4(), ip).unwrap(); dns_config .service_backend_zone(ServiceName::Nexus, &zone, port) .unwrap(); @@ -916,7 +920,7 @@ mod test { // Since both servers are authoritative, we also shut down the first // server. let mut dns_config = DnsConfigBuilder::new(); - let zone = dns_config.host_zone(Uuid::new_v4(), ip).unwrap(); + let zone = dns_config.host_zone(OmicronZoneUuid::new_v4(), ip).unwrap(); dns_config .service_backend_zone(ServiceName::Nexus, &zone, port) .unwrap(); @@ -952,7 +956,7 @@ mod test { // Create DNS config with a single service and multiple backends. let mut dns_config = DnsConfigBuilder::new(); - let id1 = Uuid::new_v4(); + let id1 = OmicronZoneUuid::new_v4(); let ip1 = Ipv6Addr::new(0xfd, 0, 0, 0, 0, 0, 0, 0x1); let addr1 = SocketAddrV6::new(ip1, 15001, 0, 0); let zone1 = dns_config.host_zone(id1, ip1).unwrap(); @@ -960,7 +964,7 @@ mod test { .service_backend_zone(ServiceName::Cockroach, &zone1, addr1.port()) .unwrap(); - let id2 = Uuid::new_v4(); + let id2 = OmicronZoneUuid::new_v4(); let ip2 = Ipv6Addr::new(0xfd, 0, 0, 0, 0, 0, 0, 0x2); let addr2 = SocketAddrV6::new(ip2, 15002, 0, 0); let zone2 = dns_config.host_zone(id2, ip2).unwrap(); diff --git a/nexus/reconfigurator/execution/src/dns.rs b/nexus/reconfigurator/execution/src/dns.rs index f0d0af074b..fe44d5c25a 100644 --- a/nexus/reconfigurator/execution/src/dns.rs +++ b/nexus/reconfigurator/execution/src/dns.rs @@ -29,6 +29,7 @@ use omicron_common::api::external::InternalContext; use omicron_common::api::external::Name; use omicron_common::bail_unless; use omicron_uuid_kinds::GenericUuid; +use omicron_uuid_kinds::OmicronZoneUuid; use omicron_uuid_kinds::SledUuid; use slog::{debug, info, o}; use std::collections::BTreeMap; @@ -315,7 +316,12 @@ pub fn blueprint_internal_dns_config( } OmicronZoneType::Crucible { address, .. } => { let port = parse_port(address).with_context(context)?; - (ServiceName::Crucible(zone.config.id), port) + ( + ServiceName::Crucible(OmicronZoneUuid::from_untyped_uuid( + zone.config.id, + )), + port, + ) } OmicronZoneType::CruciblePantry { address } => { let port = parse_port(address).with_context(context)?; @@ -339,7 +345,8 @@ pub fn blueprint_internal_dns_config( // the same zone id twice, which should not be possible here. dns_builder .host_zone_with_one_backend( - zone.config.id, + // TODO-cleanup use `TypedUuid` everywhere + OmicronZoneUuid::from_untyped_uuid(zone.config.id), zone.config.underlay_address, service_name, port, @@ -354,7 +361,7 @@ pub fn blueprint_internal_dns_config( // unwrap(): see above. dns_builder .host_zone_switch( - scrimlet.id.into_untyped_uuid(), + scrimlet.id, switch_zone_ip, overrides.dendrite_port(scrimlet.id), overrides.mgs_port(scrimlet.id), @@ -1354,8 +1361,10 @@ mod test { panic!("did not find expected AAAA record for new Nexus zone"); }; let new_zone_host = internal_dns::config::Host::for_zone( - new_zone_id, - internal_dns::config::ZoneVariant::Other, + // TODO-cleanup use `TypedUuid` everywhere + internal_dns::config::Zone::Other( + OmicronZoneUuid::from_untyped_uuid(new_zone_id), + ), ); assert!(new_zone_host.fqdn().starts_with(new_name)); diff --git a/nexus/reconfigurator/planning/src/blueprint_builder.rs b/nexus/reconfigurator/planning/src/blueprint_builder.rs index 068e4a9875..6e5b893180 100644 --- a/nexus/reconfigurator/planning/src/blueprint_builder.rs +++ b/nexus/reconfigurator/planning/src/blueprint_builder.rs @@ -8,7 +8,7 @@ use crate::ip_allocator::IpAllocator; use anyhow::anyhow; use anyhow::bail; use internal_dns::config::Host; -use internal_dns::config::ZoneVariant; +use internal_dns::config::Zone; use ipnet::IpAdd; use nexus_config::NUM_INITIAL_RESERVED_IP_ADDRESSES; use nexus_inventory::now_db_precision; @@ -43,6 +43,7 @@ use omicron_common::api::external::Vni; use omicron_common::api::internal::shared::NetworkInterface; use omicron_common::api::internal::shared::NetworkInterfaceKind; use omicron_uuid_kinds::GenericUuid; +use omicron_uuid_kinds::OmicronZoneUuid; use omicron_uuid_kinds::PhysicalDiskUuid; use omicron_uuid_kinds::SledUuid; use omicron_uuid_kinds::ZpoolUuid; @@ -542,7 +543,13 @@ impl<'a> BlueprintBuilder<'a> { .all_omicron_zones(BlueprintZoneFilter::All) .filter_map(|(_, z)| { if matches!(z.zone_type, OmicronZoneType::BoundaryNtp { .. }) { - Some(Host::for_zone(z.id, ZoneVariant::Other).fqdn()) + Some( + Host::for_zone(Zone::Other( + // TODO-cleanup use `TypedUuid` everywhere + OmicronZoneUuid::from_untyped_uuid(z.id), + )) + .fqdn(), + ) } else { None } diff --git a/nexus/test-utils/src/lib.rs b/nexus/test-utils/src/lib.rs index 81235efc9a..6392c729ce 100644 --- a/nexus/test-utils/src/lib.rs +++ b/nexus/test-utils/src/lib.rs @@ -56,6 +56,7 @@ use omicron_common::api::internal::shared::SwitchLocation; use omicron_sled_agent::sim; use omicron_test_utils::dev; use omicron_uuid_kinds::GenericUuid; +use omicron_uuid_kinds::OmicronZoneUuid; use omicron_uuid_kinds::ZpoolUuid; use oximeter_collector::Oximeter; use oximeter_producer::LogConfig; @@ -220,7 +221,11 @@ impl RackInitRequestBuilder { }); let zone = self .internal_dns_config - .host_zone(zone_id, *address.ip()) + .host_zone( + // TODO-cleanup use TypedUuid everywhere + OmicronZoneUuid::from_untyped_uuid(zone_id), + *address.ip(), + ) .expect("Failed to set up DNS for {kind}"); self.internal_dns_config .service_backend_zone(service_name, &zone, address.port()) @@ -261,7 +266,11 @@ impl RackInitRequestBuilder { }); let zone = self .internal_dns_config - .host_zone(dataset_id, *address.ip()) + .host_zone( + // TODO-cleanup use TypedUuid everywhere + OmicronZoneUuid::from_untyped_uuid(dataset_id), + *address.ip(), + ) .expect("Failed to set up DNS for {kind}"); self.internal_dns_config .service_backend_zone(service_name, &zone, address.port()) diff --git a/sled-agent/src/fakes/nexus.rs b/sled-agent/src/fakes/nexus.rs index 719f08888a..246ef07b60 100644 --- a/sled-agent/src/fakes/nexus.rs +++ b/sled-agent/src/fakes/nexus.rs @@ -18,6 +18,7 @@ use omicron_common::api::external::Error; use omicron_common::api::internal::nexus::{ SledInstanceState, UpdateArtifactId, }; +use omicron_uuid_kinds::OmicronZoneUuid; use schemars::JsonSchema; use serde::Deserialize; use uuid::Uuid; @@ -178,7 +179,7 @@ pub async fn start_dns_server( }; let nexus_zone = dns_config_builder - .host_zone(uuid::Uuid::new_v4(), *nexus_addr.ip()) + .host_zone(OmicronZoneUuid::new_v4(), *nexus_addr.ip()) .expect("failed to set up DNS"); dns_config_builder .service_backend_zone( diff --git a/sled-agent/src/rack_setup/plan/service.rs b/sled-agent/src/rack_setup/plan/service.rs index 6bc5083717..d868448bed 100644 --- a/sled-agent/src/rack_setup/plan/service.rs +++ b/sled-agent/src/rack_setup/plan/service.rs @@ -13,7 +13,7 @@ use crate::rack_setup::config::SetupServiceConfig as Config; use camino::Utf8PathBuf; use dns_service_client::types::DnsConfigParams; use illumos_utils::zpool::ZpoolName; -use internal_dns::config::{Host, ZoneVariant}; +use internal_dns::config::{Host, Zone}; use internal_dns::ServiceName; use omicron_common::address::{ get_sled_address, get_switch_zone_address, Ipv6Subnet, ReservedRackSubnet, @@ -29,7 +29,7 @@ use omicron_common::backoff::{ retry_notify_ext, retry_policy_internal_service_aggressive, BackoffError, }; use omicron_common::ledger::{self, Ledger, Ledgerable}; -use omicron_uuid_kinds::ZpoolUuid; +use omicron_uuid_kinds::{GenericUuid, OmicronZoneUuid, SledUuid, ZpoolUuid}; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; use sled_agent_client::{ @@ -396,7 +396,7 @@ impl Plan { let http_address = SocketAddrV6::new(ip, DNS_HTTP_PORT, 0, 0); let dns_address = SocketAddrV6::new(ip, DNS_PORT, 0, 0); - let id = Uuid::new_v4(); + let id = OmicronZoneUuid::new_v4(); dns_builder .host_zone_with_one_backend( id, @@ -409,7 +409,8 @@ impl Plan { sled.alloc_from_u2_zpool(DatasetKind::InternalDns)?; sled.request.zones.push(OmicronZoneConfig { - id, + // TODO-cleanup use TypedUuid everywhere + id: id.into_untyped_uuid(), underlay_address: ip, zone_type: OmicronZoneType::InternalDns { dataset: OmicronZoneDataset { @@ -430,7 +431,7 @@ impl Plan { sled_allocator.next().ok_or(PlanError::NotEnoughSleds)?; &mut sled_info[which_sled] }; - let id = Uuid::new_v4(); + let id = OmicronZoneUuid::new_v4(); let ip = sled.addr_alloc.next().expect("Not enough addrs"); let port = omicron_common::address::COCKROACH_PORT; let address = SocketAddrV6::new(ip, port, 0, 0); @@ -445,7 +446,8 @@ impl Plan { let dataset_name = sled.alloc_from_u2_zpool(DatasetKind::CockroachDb)?; sled.request.zones.push(OmicronZoneConfig { - id, + // TODO-cleanup use TypedUuid everywhere + id: id.into_untyped_uuid(), underlay_address: ip, zone_type: OmicronZoneType::CockroachDb { dataset: OmicronZoneDataset { @@ -461,7 +463,7 @@ impl Plan { // server IP addresses given to us at RSS-time. // TODO(https://github.com/oxidecomputer/omicron/issues/732): Remove loop { - let id = Uuid::new_v4(); + let id = OmicronZoneUuid::new_v4(); let Some((nic, external_ip)) = svc_port_builder.next_dns(id) else { break; }; @@ -488,7 +490,8 @@ impl Plan { let dataset_name = sled.alloc_from_u2_zpool(dataset_kind)?; sled.request.zones.push(OmicronZoneConfig { - id, + // TODO-cleanup use TypedUuid everywhere + id: id.into_untyped_uuid(), underlay_address: *http_address.ip(), zone_type: OmicronZoneType::ExternalDns { dataset: OmicronZoneDataset { @@ -508,7 +511,7 @@ impl Plan { sled_allocator.next().ok_or(PlanError::NotEnoughSleds)?; &mut sled_info[which_sled] }; - let id = Uuid::new_v4(); + let id = OmicronZoneUuid::new_v4(); let address = sled.addr_alloc.next().expect("Not enough addrs"); dns_builder .host_zone_with_one_backend( @@ -520,7 +523,8 @@ impl Plan { .unwrap(); let (nic, external_ip) = svc_port_builder.next_nexus(id)?; sled.request.zones.push(OmicronZoneConfig { - id, + // TODO-cleanup use TypedUuid everywhere + id: id.into_untyped_uuid(), underlay_address: address, zone_type: OmicronZoneType::Nexus { internal_address: SocketAddrV6::new( @@ -551,7 +555,7 @@ impl Plan { sled_allocator.next().ok_or(PlanError::NotEnoughSleds)?; &mut sled_info[which_sled] }; - let id = Uuid::new_v4(); + let id = OmicronZoneUuid::new_v4(); let address = sled.addr_alloc.next().expect("Not enough addrs"); dns_builder .host_zone_with_one_backend( @@ -562,7 +566,8 @@ impl Plan { ) .unwrap(); sled.request.zones.push(OmicronZoneConfig { - id, + // TODO-cleanup use TypedUuid everywhere + id: id.into_untyped_uuid(), underlay_address: address, zone_type: OmicronZoneType::Oximeter { address: SocketAddrV6::new( @@ -583,7 +588,7 @@ impl Plan { sled_allocator.next().ok_or(PlanError::NotEnoughSleds)?; &mut sled_info[which_sled] }; - let id = Uuid::new_v4(); + let id = OmicronZoneUuid::new_v4(); let ip = sled.addr_alloc.next().expect("Not enough addrs"); let port = omicron_common::address::CLICKHOUSE_PORT; let address = SocketAddrV6::new(ip, port, 0, 0); @@ -598,7 +603,8 @@ impl Plan { let dataset_name = sled.alloc_from_u2_zpool(DatasetKind::Clickhouse)?; sled.request.zones.push(OmicronZoneConfig { - id, + // TODO-cleanup use TypedUuid everywhere + id: id.into_untyped_uuid(), underlay_address: ip, zone_type: OmicronZoneType::Clickhouse { address, @@ -619,7 +625,7 @@ impl Plan { sled_allocator.next().ok_or(PlanError::NotEnoughSleds)?; &mut sled_info[which_sled] }; - let id = Uuid::new_v4(); + let id = OmicronZoneUuid::new_v4(); let ip = sled.addr_alloc.next().expect("Not enough addrs"); let port = omicron_common::address::CLICKHOUSE_KEEPER_PORT; let address = SocketAddrV6::new(ip, port, 0, 0); @@ -634,7 +640,8 @@ impl Plan { let dataset_name = sled.alloc_from_u2_zpool(DatasetKind::ClickhouseKeeper)?; sled.request.zones.push(OmicronZoneConfig { - id, + // TODO-cleanup use TypedUuid everywhere + id: id.into_untyped_uuid(), underlay_address: ip, zone_type: OmicronZoneType::ClickhouseKeeper { address, @@ -655,7 +662,7 @@ impl Plan { }; let address = sled.addr_alloc.next().expect("Not enough addrs"); let port = omicron_common::address::CRUCIBLE_PANTRY_PORT; - let id = Uuid::new_v4(); + let id = OmicronZoneUuid::new_v4(); dns_builder .host_zone_with_one_backend( id, @@ -665,7 +672,8 @@ impl Plan { ) .unwrap(); sled.request.zones.push(OmicronZoneConfig { - id, + // TODO-cleanup use TypedUuid everywhere + id: id.into_untyped_uuid(), underlay_address: address, zone_type: OmicronZoneType::CruciblePantry { address: SocketAddrV6::new(address, port, 0, 0), @@ -680,7 +688,7 @@ impl Plan { let ip = sled.addr_alloc.next().expect("Not enough addrs"); let port = omicron_common::address::CRUCIBLE_PORT; let address = SocketAddrV6::new(ip, port, 0, 0); - let id = Uuid::new_v4(); + let id = OmicronZoneUuid::new_v4(); dns_builder .host_zone_with_one_backend( id, @@ -691,7 +699,8 @@ impl Plan { .unwrap(); sled.request.zones.push(OmicronZoneConfig { - id, + // TODO-cleanup use TypedUuid everywhere + id: id.into_untyped_uuid(), underlay_address: ip, zone_type: OmicronZoneType::Crucible { address, @@ -706,13 +715,13 @@ impl Plan { // network. let mut boundary_ntp_servers = vec![]; for (idx, sled) in sled_info.iter_mut().enumerate() { - let id = Uuid::new_v4(); + let id = OmicronZoneUuid::new_v4(); let address = sled.addr_alloc.next().expect("Not enough addrs"); let ntp_address = SocketAddrV6::new(address, NTP_PORT, 0, 0); let (zone_type, svcname) = if idx < BOUNDARY_NTP_COUNT { boundary_ntp_servers - .push(Host::for_zone(id, ZoneVariant::Other).fqdn()); + .push(Host::for_zone(Zone::Other(id)).fqdn()); let (nic, snat_cfg) = svc_port_builder.next_snat(id)?; ( OmicronZoneType::BoundaryNtp { @@ -742,7 +751,8 @@ impl Plan { .unwrap(); sled.request.zones.push(OmicronZoneConfig { - id, + // TODO-cleanup use TypedUuid everywhere + id: id.into_untyped_uuid(), underlay_address: address, zone_type, }); @@ -776,7 +786,8 @@ impl Plan { let is_scrimlet = Self::is_sled_scrimlet(log, sled_address).await?; Ok(SledInfo::new( - sled_request.body.id, + // TODO-cleanup use TypedUuid everywhere + SledUuid::from_untyped_uuid(sled_request.body.id), subnet, sled_address, inventory, @@ -828,7 +839,7 @@ impl AddressBumpAllocator { /// Wraps up the information used to allocate components to a Sled pub struct SledInfo { /// unique id for the sled agent - pub sled_id: Uuid, + pub sled_id: SledUuid, /// the sled's unique IPv6 subnet subnet: Ipv6Subnet, /// the address of the Sled Agent on the sled's subnet @@ -850,7 +861,7 @@ pub struct SledInfo { impl SledInfo { pub fn new( - sled_id: Uuid, + sled_id: SledUuid, subnet: Ipv6Subnet, sled_address: SocketAddrV6, inventory: SledAgentTypes::Inventory, @@ -1010,7 +1021,10 @@ impl ServicePortBuilder { mac } - fn next_dns(&mut self, svc_id: Uuid) -> Option<(NetworkInterface, IpAddr)> { + fn next_dns( + &mut self, + svc_id: OmicronZoneUuid, + ) -> Option<(NetworkInterface, IpAddr)> { use omicron_common::address::{ DNS_OPTE_IPV4_SUBNET, DNS_OPTE_IPV6_SUBNET, }; @@ -1029,7 +1043,10 @@ impl ServicePortBuilder { let nic = NetworkInterface { id: Uuid::new_v4(), - kind: NetworkInterfaceKind::Service { id: svc_id }, + kind: NetworkInterfaceKind::Service { + // TODO-cleanup use TypedUuid everywhere + id: svc_id.into_untyped_uuid(), + }, name: format!("external-dns-{svc_id}").parse().unwrap(), ip, mac: self.random_mac(), @@ -1044,7 +1061,7 @@ impl ServicePortBuilder { fn next_nexus( &mut self, - svc_id: Uuid, + svc_id: OmicronZoneUuid, ) -> Result<(NetworkInterface, IpAddr), PlanError> { use omicron_common::address::{ NEXUS_OPTE_IPV4_SUBNET, NEXUS_OPTE_IPV6_SUBNET, @@ -1066,7 +1083,10 @@ impl ServicePortBuilder { let nic = NetworkInterface { id: Uuid::new_v4(), - kind: NetworkInterfaceKind::Service { id: svc_id }, + kind: NetworkInterfaceKind::Service { + // TODO-cleanup use TypedUuid everywhere + id: svc_id.into_untyped_uuid(), + }, name: format!("nexus-{svc_id}").parse().unwrap(), ip, mac: self.random_mac(), @@ -1081,7 +1101,7 @@ impl ServicePortBuilder { fn next_snat( &mut self, - svc_id: Uuid, + svc_id: OmicronZoneUuid, ) -> Result<(NetworkInterface, SourceNatConfig), PlanError> { use omicron_common::address::{ NTP_OPTE_IPV4_SUBNET, NTP_OPTE_IPV6_SUBNET, @@ -1113,7 +1133,10 @@ impl ServicePortBuilder { let nic = NetworkInterface { id: Uuid::new_v4(), - kind: NetworkInterfaceKind::Service { id: svc_id }, + kind: NetworkInterfaceKind::Service { + // TODO-cleanup use TypedUuid everywhere + id: svc_id.into_untyped_uuid(), + }, name: format!("ntp-{svc_id}").parse().unwrap(), ip, mac: self.random_mac(), @@ -1242,7 +1265,9 @@ mod tests { // We should only get back the 5 DNS IPs we specified. let mut svp_dns_ips = Vec::new(); - while let Some((_interface, ip)) = svp.next_dns(Uuid::new_v4()) { + while let Some((_interface, ip)) = + svp.next_dns(OmicronZoneUuid::new_v4()) + { svp_dns_ips.push(ip.to_string()); } assert_eq!(svp_dns_ips, dns_ips); diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index c8a2e14ff5..ce5cb3fa2d 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -1496,11 +1496,11 @@ mod test { api::external::{ByteCount, Generation}, disk::DiskIdentity, }; + use omicron_uuid_kinds::{GenericUuid, SledUuid}; use sled_agent_client::types as SledAgentTypes; - use uuid::Uuid; fn make_sled_info( - sled_id: Uuid, + sled_id: SledUuid, subnet: Ipv6Subnet, u2_count: usize, ) -> SledInfo { @@ -1510,7 +1510,7 @@ mod test { subnet, sled_agent_address, SledAgentTypes::Inventory { - sled_id, + sled_id: sled_id.into_untyped_uuid(), sled_agent_address: sled_agent_address.to_string(), sled_role: SledAgentTypes::SledRole::Scrimlet, baseboard: SledAgentTypes::Baseboard::Unknown, @@ -1538,14 +1538,14 @@ mod test { let rss_config = crate::bootstrap::params::test_config(); let fake_sleds = vec![ make_sled_info( - Uuid::new_v4(), + SledUuid::new_v4(), Ipv6Subnet::::new( "fd00:1122:3344:101::1".parse().unwrap(), ), 5, ), make_sled_info( - Uuid::new_v4(), + SledUuid::new_v4(), Ipv6Subnet::::new( "fd00:1122:3344:102::1".parse().unwrap(), ), diff --git a/sled-agent/src/sim/storage.rs b/sled-agent/src/sim/storage.rs index 3c2c4057c1..56cf771b2a 100644 --- a/sled-agent/src/sim/storage.rs +++ b/sled-agent/src/sim/storage.rs @@ -21,6 +21,7 @@ use dropshot::HttpError; use futures::lock::Mutex; use omicron_common::disk::DiskIdentity; use omicron_uuid_kinds::GenericUuid; +use omicron_uuid_kinds::OmicronZoneUuid; use omicron_uuid_kinds::ZpoolUuid; use propolis_client::types::VolumeConstructionRequest; use sled_hardware::DiskVariant; @@ -758,7 +759,7 @@ impl Storage { /// Simulated crucible pantry pub struct Pantry { - pub id: Uuid, + pub id: OmicronZoneUuid, vcrs: Mutex>, // Please rewind! sled_agent: Arc, jobs: Mutex>, @@ -767,7 +768,7 @@ pub struct Pantry { impl Pantry { pub fn new(sled_agent: Arc) -> Self { Self { - id: Uuid::new_v4(), + id: OmicronZoneUuid::new_v4(), vcrs: Mutex::new(HashMap::default()), sled_agent, jobs: Mutex::new(HashSet::default()), From 934017d9eb61b36f5a5f3c62632adfc97d8f6cc6 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Mon, 15 Apr 2024 00:32:08 -0700 Subject: [PATCH 142/334] chore(deps): update rust crate prettyplease to 0.2.19 (#5533) --- Cargo.lock | 144 +++++++++++++++++++------------------- Cargo.toml | 2 +- workspace-hack/Cargo.toml | 8 +-- 3 files changed, 77 insertions(+), 77 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d2fc2b0ff6..df5981f618 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -174,7 +174,7 @@ dependencies = [ "omicron-workspace-hack", "proc-macro2", "quote", - "syn 2.0.58", + "syn 2.0.59", ] [[package]] @@ -281,7 +281,7 @@ checksum = "30c5ef0ede93efbf733c1a727f3b6b5a1060bbedd5600183e66f6e4be4af0ec5" dependencies = [ "proc-macro2", "quote", - "syn 2.0.58", + "syn 2.0.59", ] [[package]] @@ -303,7 +303,7 @@ checksum = "16e62a023e7c117e27523144c5d2459f4397fcc3cab0085af8e2224f643a0193" dependencies = [ "proc-macro2", "quote", - "syn 2.0.58", + "syn 2.0.59", ] [[package]] @@ -314,7 +314,7 @@ checksum = "c6fa2087f2753a7da8cc1c0dbfcf89579dd57458e36769de5ac750b4671737ca" dependencies = [ "proc-macro2", "quote", - "syn 2.0.58", + "syn 2.0.59", ] [[package]] @@ -367,7 +367,7 @@ dependencies = [ "quote", "serde", "serde_tokenstream 0.2.0", - "syn 2.0.58", + "syn 2.0.59", ] [[package]] @@ -526,7 +526,7 @@ dependencies = [ "regex", "rustc-hash", "shlex", - "syn 2.0.58", + "syn 2.0.59", "which", ] @@ -1072,7 +1072,7 @@ dependencies = [ "heck 0.5.0", "proc-macro2", "quote", - "syn 2.0.58", + "syn 2.0.59", ] [[package]] @@ -1558,7 +1558,7 @@ checksum = "83fdaf97f4804dcebfa5862639bc9ce4121e82140bec2a987ac5140294865b5b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.58", + "syn 2.0.59", ] [[package]] @@ -1582,7 +1582,7 @@ dependencies = [ "proc-macro2", "quote", "strsim 0.10.0", - "syn 2.0.58", + "syn 2.0.59", ] [[package]] @@ -1593,7 +1593,7 @@ checksum = "a668eda54683121533a393014d8692171709ff57a7d61f187b6e782719f8933f" dependencies = [ "darling_core", "quote", - "syn 2.0.58", + "syn 2.0.59", ] [[package]] @@ -1627,7 +1627,7 @@ dependencies = [ "quote", "serde", "serde_tokenstream 0.2.0", - "syn 2.0.58", + "syn 2.0.59", ] [[package]] @@ -1670,7 +1670,7 @@ dependencies = [ "proc-macro-error", "proc-macro2", "quote", - "syn 2.0.58", + "syn 2.0.59", ] [[package]] @@ -1703,7 +1703,7 @@ checksum = "5fe87ce4529967e0ba1dcf8450bab64d97dfd5010a6256187ffe2e43e6f0e049" dependencies = [ "proc-macro2", "quote", - "syn 2.0.58", + "syn 2.0.59", ] [[package]] @@ -1724,7 +1724,7 @@ checksum = "62d671cc41a825ebabc75757b62d3d168c577f9149b2d49ece1dad1f72119d25" dependencies = [ "proc-macro2", "quote", - "syn 2.0.58", + "syn 2.0.59", ] [[package]] @@ -1745,7 +1745,7 @@ dependencies = [ "darling", "proc-macro2", "quote", - "syn 2.0.58", + "syn 2.0.59", ] [[package]] @@ -1755,7 +1755,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "206868b8242f27cecce124c19fd88157fbd0dd334df2587f36417bafbc85097b" dependencies = [ "derive_builder_core", - "syn 2.0.58", + "syn 2.0.59", ] [[package]] @@ -1779,7 +1779,7 @@ dependencies = [ "darling", "proc-macro2", "quote", - "syn 2.0.58", + "syn 2.0.59", ] [[package]] @@ -1843,7 +1843,7 @@ dependencies = [ "diesel_table_macro_syntax", "proc-macro2", "quote", - "syn 2.0.58", + "syn 2.0.59", ] [[package]] @@ -1852,7 +1852,7 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fc5557efc453706fed5e4fa85006fe9817c224c3f480a34c7e5959fd700921c5" dependencies = [ - "syn 2.0.58", + "syn 2.0.59", ] [[package]] @@ -2120,7 +2120,7 @@ dependencies = [ "quote", "serde", "serde_tokenstream 0.2.0", - "syn 2.0.58", + "syn 2.0.59", ] [[package]] @@ -2522,7 +2522,7 @@ checksum = "1a5c6c585bc94aaf2c7b51dd4c2ba22680844aba4c687be581871a6f518c5742" dependencies = [ "proc-macro2", "quote", - "syn 2.0.58", + "syn 2.0.59", ] [[package]] @@ -2633,7 +2633,7 @@ checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac" dependencies = [ "proc-macro2", "quote", - "syn 2.0.58", + "syn 2.0.59", ] [[package]] @@ -3901,7 +3901,7 @@ version = "0.1.0" source = "git+https://github.com/oxidecomputer/opte?rev=7ee353a470ea59529ee1b34729681da887aa88ce#7ee353a470ea59529ee1b34729681da887aa88ce" dependencies = [ "quote", - "syn 2.0.58", + "syn 2.0.59", ] [[package]] @@ -4379,7 +4379,7 @@ dependencies = [ "cfg-if", "proc-macro2", "quote", - "syn 2.0.58", + "syn 2.0.59", ] [[package]] @@ -4693,7 +4693,7 @@ dependencies = [ "omicron-workspace-hack", "proc-macro2", "quote", - "syn 2.0.58", + "syn 2.0.59", ] [[package]] @@ -4883,7 +4883,7 @@ version = "0.1.0" dependencies = [ "omicron-workspace-hack", "quote", - "syn 2.0.58", + "syn 2.0.59", ] [[package]] @@ -5049,7 +5049,7 @@ checksum = "9e6a0fd4f737c707bd9086cc16c925f294943eb62eb71499e9fd4cf71f8b9f4e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.58", + "syn 2.0.59", ] [[package]] @@ -5825,7 +5825,7 @@ dependencies = [ "string_cache", "subtle", "syn 1.0.109", - "syn 2.0.58", + "syn 2.0.59", "time", "time-macros", "tokio", @@ -5945,7 +5945,7 @@ checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.58", + "syn 2.0.59", ] [[package]] @@ -6237,7 +6237,7 @@ dependencies = [ "omicron-workspace-hack", "proc-macro2", "quote", - "syn 2.0.58", + "syn 2.0.59", ] [[package]] @@ -6403,7 +6403,7 @@ dependencies = [ "regex", "regex-syntax 0.8.2", "structmeta 0.3.0", - "syn 2.0.58", + "syn 2.0.59", ] [[package]] @@ -6577,7 +6577,7 @@ dependencies = [ "pest_meta", "proc-macro2", "quote", - "syn 2.0.58", + "syn 2.0.59", ] [[package]] @@ -6647,7 +6647,7 @@ checksum = "4359fd9c9171ec6e8c62926d6faaf553a8dc3f64e1507e76da7911b4f6a04405" dependencies = [ "proc-macro2", "quote", - "syn 2.0.58", + "syn 2.0.59", ] [[package]] @@ -6912,12 +6912,12 @@ dependencies = [ [[package]] name = "prettyplease" -version = "0.2.17" +version = "0.2.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8d3928fb5db768cb86f891ff014f0144589297e3c6a1aba6ed7cecfdace270c7" +checksum = "5ac2cf0f2e4f42b49f5ffd07dae8d746508ef7526c13940e5f524012ae6c6550" dependencies = [ "proc-macro2", - "syn 2.0.58", + "syn 2.0.59", ] [[package]] @@ -6965,9 +6965,9 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.79" +version = "1.0.80" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e835ff2298f5721608eb1a980ecaee1aef2c132bf95ecc026a11b7bf3c01c02e" +checksum = "a56dea16b0a29e94408b9aa5e2940a4eedbd128a1ba20e8f7ae60fd3d465af0e" dependencies = [ "unicode-ident", ] @@ -7013,7 +7013,7 @@ dependencies = [ "schemars", "serde", "serde_json", - "syn 2.0.58", + "syn 2.0.59", "thiserror", "typify", "unicode-ident", @@ -7033,7 +7033,7 @@ dependencies = [ "serde_json", "serde_tokenstream 0.2.0", "serde_yaml", - "syn 2.0.58", + "syn 2.0.59", ] [[package]] @@ -7513,7 +7513,7 @@ checksum = "5fddb4f8d99b0a2ebafc65a87a69a7b9875e4b1ae1f00db265d300ef7f28bccc" dependencies = [ "proc-macro2", "quote", - "syn 2.0.58", + "syn 2.0.59", ] [[package]] @@ -7769,7 +7769,7 @@ dependencies = [ "regex", "relative-path", "rustc_version 0.4.0", - "syn 2.0.58", + "syn 2.0.59", "unicode-ident", ] @@ -8199,7 +8199,7 @@ checksum = "7f81c2fde025af7e69b1d1420531c8a8811ca898919db177141a85313b1cb932" dependencies = [ "proc-macro2", "quote", - "syn 2.0.58", + "syn 2.0.59", ] [[package]] @@ -8328,7 +8328,7 @@ checksum = "7eb0b34b42edc17f6b7cac84a52a1c5f0e1bb2227e997ca9011ea3dd34e8610b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.58", + "syn 2.0.59", ] [[package]] @@ -8389,7 +8389,7 @@ checksum = "8725e1dfadb3a50f7e5ce0b1a540466f6ed3fe7a0fca2ac2b8b831d31316bd00" dependencies = [ "proc-macro2", "quote", - "syn 2.0.58", + "syn 2.0.59", ] [[package]] @@ -8421,7 +8421,7 @@ dependencies = [ "proc-macro2", "quote", "serde", - "syn 2.0.58", + "syn 2.0.59", ] [[package]] @@ -8463,7 +8463,7 @@ dependencies = [ "darling", "proc-macro2", "quote", - "syn 2.0.58", + "syn 2.0.59", ] [[package]] @@ -8810,7 +8810,7 @@ source = "git+https://github.com/oxidecomputer/slog-error-chain?branch=main#15f6 dependencies = [ "proc-macro2", "quote", - "syn 2.0.58", + "syn 2.0.59", ] [[package]] @@ -8937,7 +8937,7 @@ dependencies = [ "heck 0.4.1", "proc-macro2", "quote", - "syn 2.0.58", + "syn 2.0.59", ] [[package]] @@ -9064,7 +9064,7 @@ checksum = "01b2e185515564f15375f593fb966b5718bc624ba77fe49fa4616ad619690554" dependencies = [ "proc-macro2", "quote", - "syn 2.0.58", + "syn 2.0.59", ] [[package]] @@ -9171,7 +9171,7 @@ dependencies = [ "proc-macro2", "quote", "structmeta-derive 0.2.0", - "syn 2.0.58", + "syn 2.0.59", ] [[package]] @@ -9183,7 +9183,7 @@ dependencies = [ "proc-macro2", "quote", "structmeta-derive 0.3.0", - "syn 2.0.58", + "syn 2.0.59", ] [[package]] @@ -9194,7 +9194,7 @@ checksum = "a60bcaff7397072dca0017d1db428e30d5002e00b6847703e2e42005c95fbe00" dependencies = [ "proc-macro2", "quote", - "syn 2.0.58", + "syn 2.0.59", ] [[package]] @@ -9205,7 +9205,7 @@ checksum = "152a0b65a590ff6c3da95cabe2353ee04e6167c896b28e3b14478c2636c922fc" dependencies = [ "proc-macro2", "quote", - "syn 2.0.58", + "syn 2.0.59", ] [[package]] @@ -9264,7 +9264,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.58", + "syn 2.0.59", ] [[package]] @@ -9277,7 +9277,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.58", + "syn 2.0.59", ] [[package]] @@ -9324,9 +9324,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.58" +version = "2.0.59" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "44cfb93f38070beee36b3fef7d4f5a16f27751d94b187b666a5cc5e9b0d30687" +checksum = "4a6531ffc7b071655e4ce2e04bd464c4830bb585a61cabb96cf808f05172615a" dependencies = [ "proc-macro2", "quote", @@ -9512,7 +9512,7 @@ dependencies = [ "proc-macro2", "quote", "structmeta 0.2.0", - "syn 2.0.58", + "syn 2.0.59", ] [[package]] @@ -9552,7 +9552,7 @@ checksum = "c61f3ba182994efc43764a46c018c347bc492c79f024e705f46567b418f6d4f7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.58", + "syn 2.0.59", ] [[package]] @@ -9742,7 +9742,7 @@ checksum = "5b8a1e28f2deaa14e508979454cb3a223b10b938b45af148bc0986de36f1923b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.58", + "syn 2.0.59", ] [[package]] @@ -10020,7 +10020,7 @@ checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.58", + "syn 2.0.59", ] [[package]] @@ -10296,7 +10296,7 @@ dependencies = [ "regress", "schemars", "serde_json", - "syn 2.0.58", + "syn 2.0.59", "thiserror", "unicode-ident", ] @@ -10312,7 +10312,7 @@ dependencies = [ "serde", "serde_json", "serde_tokenstream 0.2.0", - "syn 2.0.58", + "syn 2.0.59", "typify-impl", ] @@ -10546,7 +10546,7 @@ dependencies = [ "proc-macro2", "quote", "serde_tokenstream 0.2.0", - "syn 2.0.58", + "syn 2.0.59", "usdt-impl 0.5.0", ] @@ -10584,7 +10584,7 @@ dependencies = [ "quote", "serde", "serde_json", - "syn 2.0.58", + "syn 2.0.59", "thiserror", "thread-id", "version_check", @@ -10614,7 +10614,7 @@ dependencies = [ "proc-macro2", "quote", "serde_tokenstream 0.2.0", - "syn 2.0.58", + "syn 2.0.59", "usdt-impl 0.5.0", ] @@ -10795,7 +10795,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.58", + "syn 2.0.59", "wasm-bindgen-shared", ] @@ -10829,7 +10829,7 @@ checksum = "54681b18a46765f095758388f2d0cf16eb8d4169b639ab575a8f5693af210c7b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.58", + "syn 2.0.59", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -11386,7 +11386,7 @@ checksum = "125139de3f6b9d625c39e2efdd73d41bdac468ccd556556440e322be0e1bbd91" dependencies = [ "proc-macro2", "quote", - "syn 2.0.58", + "syn 2.0.59", ] [[package]] @@ -11397,7 +11397,7 @@ checksum = "9ce1b18ccd8e73a9321186f97e46f9f04b778851177567b1975109d26a08d2a6" dependencies = [ "proc-macro2", "quote", - "syn 2.0.58", + "syn 2.0.59", ] [[package]] @@ -11417,7 +11417,7 @@ checksum = "ce36e65b0d2999d2aafac989fb249189a141aee1f53c612c1f37d72631959f69" dependencies = [ "proc-macro2", "quote", - "syn 2.0.58", + "syn 2.0.59", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index b4bf2bef58..b0526a259b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -335,7 +335,7 @@ postgres-protocol = "0.6.6" predicates = "3.1.0" pretty_assertions = "1.4.0" pretty-hex = "0.4.1" -prettyplease = { version = "0.2.17", features = ["verbatim"] } +prettyplease = { version = "0.2.19", features = ["verbatim"] } proc-macro2 = "1.0" progenitor = { git = "https://github.com/oxidecomputer/progenitor", branch = "main" } progenitor-client = { git = "https://github.com/oxidecomputer/progenitor", branch = "main" } diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index e4a70c276b..e864a8949b 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -81,7 +81,7 @@ petgraph = { version = "0.6.4", features = ["serde-1"] } postgres-types = { version = "0.2.6", default-features = false, features = ["with-chrono-0_4", "with-serde_json-1", "with-uuid-1"] } ppv-lite86 = { version = "0.2.17", default-features = false, features = ["simd", "std"] } predicates = { version = "3.1.0" } -proc-macro2 = { version = "1.0.79" } +proc-macro2 = { version = "1.0.80" } rand = { version = "0.8.5" } rand_chacha = { version = "0.3.1", default-features = false, features = ["std"] } regex = { version = "1.10.4" } @@ -100,7 +100,7 @@ spin = { version = "0.9.8" } string_cache = { version = "0.8.7" } subtle = { version = "2.5.0" } syn-dff4ba8e3ae991db = { package = "syn", version = "1.0.109", features = ["extra-traits", "fold", "full", "visit"] } -syn-f595c2ba2a3f28df = { package = "syn", version = "2.0.58", features = ["extra-traits", "fold", "full", "visit", "visit-mut"] } +syn-f595c2ba2a3f28df = { package = "syn", version = "2.0.59", features = ["extra-traits", "fold", "full", "visit", "visit-mut"] } time = { version = "0.3.34", features = ["formatting", "local-offset", "macros", "parsing"] } tokio = { version = "1.37.0", features = ["full", "test-util"] } tokio-postgres = { version = "0.7.10", features = ["with-chrono-0_4", "with-serde_json-1", "with-uuid-1"] } @@ -188,7 +188,7 @@ petgraph = { version = "0.6.4", features = ["serde-1"] } postgres-types = { version = "0.2.6", default-features = false, features = ["with-chrono-0_4", "with-serde_json-1", "with-uuid-1"] } ppv-lite86 = { version = "0.2.17", default-features = false, features = ["simd", "std"] } predicates = { version = "3.1.0" } -proc-macro2 = { version = "1.0.79" } +proc-macro2 = { version = "1.0.80" } rand = { version = "0.8.5" } rand_chacha = { version = "0.3.1", default-features = false, features = ["std"] } regex = { version = "1.10.4" } @@ -207,7 +207,7 @@ spin = { version = "0.9.8" } string_cache = { version = "0.8.7" } subtle = { version = "2.5.0" } syn-dff4ba8e3ae991db = { package = "syn", version = "1.0.109", features = ["extra-traits", "fold", "full", "visit"] } -syn-f595c2ba2a3f28df = { package = "syn", version = "2.0.58", features = ["extra-traits", "fold", "full", "visit", "visit-mut"] } +syn-f595c2ba2a3f28df = { package = "syn", version = "2.0.59", features = ["extra-traits", "fold", "full", "visit", "visit-mut"] } time = { version = "0.3.34", features = ["formatting", "local-offset", "macros", "parsing"] } time-macros = { version = "0.2.17", default-features = false, features = ["formatting", "parsing"] } tokio = { version = "1.37.0", features = ["full", "test-util"] } From 6032f5d4b6e4ceb92d501b786bcda220f7c0f392 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Mon, 15 Apr 2024 00:32:23 -0700 Subject: [PATCH 143/334] chore(deps): update rust crate proc-macro2 to v1.0.80 (#5534) From 6ed661734385d90f388e66c51400165caadce4cb Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Mon, 15 Apr 2024 00:32:36 -0700 Subject: [PATCH 144/334] chore(deps): update rust crate syn to v2.0.59 (#5535) From 331a4ac490204db8230e00006d9d24b7a5cc8995 Mon Sep 17 00:00:00 2001 From: Alan Hanson Date: Mon, 15 Apr 2024 13:11:08 -0700 Subject: [PATCH 145/334] Added the ability for omdb to talk to crucible-agent (#5514) Made omdb a client of the crucible-agent This allows for omdb to query a crucible agent for regions and snapshots. There is more to do here, but I wanted to get the first part out then iterate. --------- Co-authored-by: Alan Hanson --- dev-tools/omdb/src/bin/omdb/crucible_agent.rs | 178 ++++++++++++++++++ dev-tools/omdb/src/bin/omdb/main.rs | 4 + dev-tools/omdb/tests/usage_errors.out | 26 +-- 3 files changed, 196 insertions(+), 12 deletions(-) create mode 100644 dev-tools/omdb/src/bin/omdb/crucible_agent.rs diff --git a/dev-tools/omdb/src/bin/omdb/crucible_agent.rs b/dev-tools/omdb/src/bin/omdb/crucible_agent.rs new file mode 100644 index 0000000000..e2fd316335 --- /dev/null +++ b/dev-tools/omdb/src/bin/omdb/crucible_agent.rs @@ -0,0 +1,178 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! omdb commands that query a crucible-agent + +use anyhow::bail; +use anyhow::Context; +use clap::Args; +use clap::Subcommand; +use crucible_agent_client::types::RegionId; +use crucible_agent_client::Client; +use tabled::Tabled; + +use crate::Omdb; + +/// Arguments to the "omdb crucible-agent" subcommand +#[derive(Debug, Args)] +pub struct CrucibleAgentArgs { + /// URL of the crucible agent internal API + #[clap(long, env("OMDB_CRUCIBLE_AGENT_URL"))] + crucible_agent_url: Option, + + #[command(subcommand)] + command: CrucibleAgentCommands, +} + +/// Subcommands for the "omdb crucible-agent" subcommand +#[derive(Debug, Subcommand)] +enum CrucibleAgentCommands { + /// print information about regions + #[clap(subcommand)] + Regions(RegionCommands), + /// print information about snapshots + #[clap(subcommand)] + Snapshots(SnapshotCommands), +} + +#[derive(Debug, Subcommand)] +enum RegionCommands { + /// Print list of all running control plane regions + List, +} + +#[derive(Debug, Subcommand)] +enum SnapshotCommands { + /// Print list of all running control plane snapshots + List, +} + +impl CrucibleAgentArgs { + /// Run a `omdb crucible-agent` subcommand. + pub(crate) async fn run_cmd( + &self, + _omdb: &Omdb, + ) -> Result<(), anyhow::Error> { + // The crucible agent URL is required, but can come + // from the environment, in which case it won't be on the command line. + let Some(crucible_agent_url) = &self.crucible_agent_url else { + bail!( + "crucible agent URL must be specified with \ + --crucible-agent-url or by setting the environment variable \ + OMDB_CRUCIBLE_AGENT_URL" + ); + }; + let client = Client::new(crucible_agent_url); + + match &self.command { + CrucibleAgentCommands::Regions(RegionCommands::List) => { + cmd_region_list(&client).await + } + CrucibleAgentCommands::Snapshots(SnapshotCommands::List) => { + cmd_snapshot_list(&client).await + } + } + } +} + +#[derive(Tabled)] +#[tabled(rename_all = "SCREAMING_SNAKE_CASE")] +struct Region { + region_id: String, + state: String, + block_size: String, + extent_size: String, + extent_count: String, + port: String, +} + +/// Runs `omdb crucible-agent regions list` +async fn cmd_region_list( + client: &crucible_agent_client::Client, +) -> Result<(), anyhow::Error> { + let regions = client.region_list().await.context("listing regions")?; + + let mut rows = Vec::new(); + for region in regions.iter() { + rows.push(Region { + region_id: region.id.clone().to_string(), + state: region.state.to_string(), + block_size: region.block_size.to_string(), + extent_size: region.extent_size.to_string(), + extent_count: region.extent_count.to_string(), + port: region.port_number.to_string(), + }); + } + let table = tabled::Table::new(rows) + .with(tabled::settings::Style::empty()) + .with(tabled::settings::Padding::new(0, 1, 0, 0)) + .to_string(); + + println!("{}", table); + Ok(()) +} + +#[derive(Tabled)] +#[tabled(rename_all = "SCREAMING_SNAKE_CASE")] +struct Snapshot { + region_id: String, + snapshot_id: String, + state: String, + port: String, +} +/// Runs `omdb crucible-agent snapshot list` +async fn cmd_snapshot_list( + client: &crucible_agent_client::Client, +) -> Result<(), anyhow::Error> { + let regions = client.region_list().await.context("listing regions")?; + + let mut rows = Vec::new(); + for region in regions.iter() { + let snapshots = match client + .region_get_snapshots(&RegionId(region.id.to_string())) + .await + { + Ok(snapshots) => snapshots, + Err(e) => { + println!( + "Error {} looking at region {} for snapshots", + e, + region.id.to_string() + ); + continue; + } + }; + if snapshots.snapshots.is_empty() { + continue; + } + for snap in snapshots.snapshots.iter() { + match snapshots.running_snapshots.get(&snap.name) { + Some(rs) => { + rows.push(Snapshot { + region_id: region.id.clone().to_string(), + snapshot_id: snap.name.to_string(), + state: rs.state.to_string(), + port: rs.port_number.to_string(), + }); + } + None => { + rows.push(Snapshot { + region_id: region.id.clone().to_string(), + snapshot_id: snap.name.to_string(), + state: "---".to_string(), + port: "---".to_string(), + }); + } + } + } + } + let table = tabled::Table::new(rows) + .with(tabled::settings::Style::empty()) + .with(tabled::settings::Padding::new(0, 1, 0, 0)) + .to_string(); + + println!("{}", table); + + Ok(()) +} diff --git a/dev-tools/omdb/src/bin/omdb/main.rs b/dev-tools/omdb/src/bin/omdb/main.rs index 17de22c2fa..00fd4beccd 100644 --- a/dev-tools/omdb/src/bin/omdb/main.rs +++ b/dev-tools/omdb/src/bin/omdb/main.rs @@ -40,6 +40,7 @@ use omicron_common::address::Ipv6Subnet; use std::net::SocketAddr; use std::net::SocketAddrV6; +mod crucible_agent; mod db; mod mgs; mod nexus; @@ -62,6 +63,7 @@ async fn main() -> Result<(), anyhow::Error> { OmdbCommands::Nexus(nexus) => nexus.run_cmd(&args, &log).await, OmdbCommands::Oximeter(oximeter) => oximeter.run_cmd(&log).await, OmdbCommands::SledAgent(sled) => sled.run_cmd(&args, &log).await, + OmdbCommands::CrucibleAgent(crucible) => crucible.run_cmd(&args).await, } } @@ -181,6 +183,8 @@ impl Omdb { #[derive(Debug, Subcommand)] #[allow(clippy::large_enum_variant)] enum OmdbCommands { + /// Debug a specific crucible-agent + CrucibleAgent(crucible_agent::CrucibleAgentArgs), /// Query the control plane database (CockroachDB) Db(db::DbArgs), /// Debug a specific Management Gateway Service instance diff --git a/dev-tools/omdb/tests/usage_errors.out b/dev-tools/omdb/tests/usage_errors.out index b704982266..7cd37fe148 100644 --- a/dev-tools/omdb/tests/usage_errors.out +++ b/dev-tools/omdb/tests/usage_errors.out @@ -9,12 +9,13 @@ Omicron debugger (unstable) Usage: omdb [OPTIONS] Commands: - db Query the control plane database (CockroachDB) - mgs Debug a specific Management Gateway Service instance - nexus Debug a specific Nexus instance - oximeter Query oximeter collector state - sled-agent Debug a specific Sled - help Print this message or the help of the given subcommand(s) + crucible-agent Debug a specific crucible-agent + db Query the control plane database (CockroachDB) + mgs Debug a specific Management Gateway Service instance + nexus Debug a specific Nexus instance + oximeter Query oximeter collector state + sled-agent Debug a specific Sled + help Print this message or the help of the given subcommand(s) Options: --log-level log level filter [env: LOG_LEVEL=] [default: warn] @@ -34,12 +35,13 @@ using internal APIs. This is a prototype. The commands and output are unstable Usage: omdb [OPTIONS] Commands: - db Query the control plane database (CockroachDB) - mgs Debug a specific Management Gateway Service instance - nexus Debug a specific Nexus instance - oximeter Query oximeter collector state - sled-agent Debug a specific Sled - help Print this message or the help of the given subcommand(s) + crucible-agent Debug a specific crucible-agent + db Query the control plane database (CockroachDB) + mgs Debug a specific Management Gateway Service instance + nexus Debug a specific Nexus instance + oximeter Query oximeter collector state + sled-agent Debug a specific Sled + help Print this message or the help of the given subcommand(s) Options: --log-level From 4c8fdc8668bdc906f53db1d2a4a92cffe6420184 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Mon, 15 Apr 2024 14:57:51 -0700 Subject: [PATCH 146/334] chore(deps): update rust crate ratatui to 0.26.2 (#5537) --- Cargo.lock | 10 +++++----- Cargo.toml | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index df5981f618..8825030177 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -7347,9 +7347,9 @@ dependencies = [ [[package]] name = "ratatui" -version = "0.26.1" +version = "0.26.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bcb12f8fbf6c62614b0d56eb352af54f6a22410c3b079eb53ee93c7b97dd31d8" +checksum = "a564a852040e82671dc50a37d88f3aa83bbc690dfc6844cfe7a2591620206a80" dependencies = [ "bitflags 2.4.2", "cassowary", @@ -9069,12 +9069,12 @@ dependencies = [ [[package]] name = "stability" -version = "0.1.1" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ebd1b177894da2a2d9120208c3386066af06a488255caabc5de8ddca22dbc3ce" +checksum = "2ff9eaf853dec4c8802325d8b6d3dffa86cc707fd7a1a4cdbf416e13b061787a" dependencies = [ "quote", - "syn 1.0.109", + "syn 2.0.59", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index b0526a259b..3c70cb5feb 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -347,7 +347,7 @@ quote = "1.0" rand = "0.8.5" rand_core = "0.6.4" rand_seeder = "0.2.3" -ratatui = "0.26.1" +ratatui = "0.26.2" rayon = "1.10" rcgen = "0.12.1" reedline = "0.31.0" From 027aad4c16ea38ca8695b06a600d5a8a94faab78 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Mon, 15 Apr 2024 14:58:13 -0700 Subject: [PATCH 147/334] chore(deps): update rust crate chrono to v0.4.38 (#5536) --- Cargo.lock | 4 ++-- workspace-hack/Cargo.toml | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 8825030177..96d0346746 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -963,9 +963,9 @@ dependencies = [ [[package]] name = "chrono" -version = "0.4.37" +version = "0.4.38" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a0d04d43504c61aa6c7531f1871dd0d418d91130162063b789da00fd7057a5e" +checksum = "a21f936df1771bf62b77f047b726c4625ff2e8aa607c01ec06e5a05bd8463401" dependencies = [ "android-tzdata", "iana-time-zone", diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index e864a8949b..5d00a50c66 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -27,7 +27,7 @@ bstr-6f8ce4dd05d13bba = { package = "bstr", version = "0.2.17" } bstr-dff4ba8e3ae991db = { package = "bstr", version = "1.9.0" } byteorder = { version = "1.5.0" } bytes = { version = "1.6.0", features = ["serde"] } -chrono = { version = "0.4.37", features = ["serde"] } +chrono = { version = "0.4.38", features = ["serde"] } cipher = { version = "0.4.4", default-features = false, features = ["block-padding", "zeroize"] } clap = { version = "4.5.4", features = ["cargo", "derive", "env", "wrap_help"] } clap_builder = { version = "4.5.2", default-features = false, features = ["cargo", "color", "env", "std", "suggestions", "usage", "wrap_help"] } @@ -134,7 +134,7 @@ bstr-6f8ce4dd05d13bba = { package = "bstr", version = "0.2.17" } bstr-dff4ba8e3ae991db = { package = "bstr", version = "1.9.0" } byteorder = { version = "1.5.0" } bytes = { version = "1.6.0", features = ["serde"] } -chrono = { version = "0.4.37", features = ["serde"] } +chrono = { version = "0.4.38", features = ["serde"] } cipher = { version = "0.4.4", default-features = false, features = ["block-padding", "zeroize"] } clap = { version = "4.5.4", features = ["cargo", "derive", "env", "wrap_help"] } clap_builder = { version = "4.5.2", default-features = false, features = ["cargo", "color", "env", "std", "suggestions", "usage", "wrap_help"] } From 6537ea94e08a3f3ea2668da76822b188079c0262 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Tue, 16 Apr 2024 01:22:59 -0700 Subject: [PATCH 148/334] chore(deps): update rust crate sqlparser to 0.45.0 (#5520) --- Cargo.lock | 4 ++-- Cargo.toml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 96d0346746..1fcb5f000e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -9048,9 +9048,9 @@ dependencies = [ [[package]] name = "sqlparser" -version = "0.44.0" +version = "0.45.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aaf9c7ff146298ffda83a200f8d5084f08dcee1edfc135fcc1d646a45d50ffd6" +checksum = "f7bbffee862a796d67959a89859d6b1046bb5016d63e23835ad0da182777bbe0" dependencies = [ "log", "sqlparser_derive", diff --git a/Cargo.toml b/Cargo.toml index 3c70cb5feb..c2a8d68c4d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -399,7 +399,7 @@ sprockets-common = { git = "http://github.com/oxidecomputer/sprockets", rev = "7 sprockets-host = { git = "http://github.com/oxidecomputer/sprockets", rev = "77df31efa5619d0767ffc837ef7468101608aee9" } sprockets-rot = { git = "http://github.com/oxidecomputer/sprockets", rev = "77df31efa5619d0767ffc837ef7468101608aee9" } sqlformat = "0.2.3" -sqlparser = { version = "0.44.0", features = [ "visitor" ] } +sqlparser = { version = "0.45.0", features = [ "visitor" ] } static_assertions = "1.1.0" # Please do not change the Steno version to a Git dependency. It makes it # harder than expected to make breaking changes (even if you specify a specific From 22f7acadf3ffc0c8ffd14db0a0a5f2c69046c8d9 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Tue, 16 Apr 2024 01:23:11 -0700 Subject: [PATCH 149/334] chore(deps): update rust crate serde_json to 1.0.116 (#5539) --- Cargo.lock | 4 ++-- Cargo.toml | 2 +- workspace-hack/Cargo.toml | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 1fcb5f000e..e9ff658f80 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8353,9 +8353,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.115" +version = "1.0.116" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "12dc5c46daa8e9fdf4f5e71b6cf9a53f2487da0e86e55808e2d35539666497dd" +checksum = "3e17db7126d17feb94eb3fad46bf1a96b034e8aacbc2e775fe81505f8b0b2813" dependencies = [ "itoa", "ryu", diff --git a/Cargo.toml b/Cargo.toml index c2a8d68c4d..a22d0a0827 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -368,7 +368,7 @@ secrecy = "0.8.0" semver = { version = "1.0.22", features = ["std", "serde"] } serde = { version = "1.0", default-features = false, features = [ "derive", "rc" ] } serde_human_bytes = { git = "http://github.com/oxidecomputer/serde_human_bytes", branch = "main" } -serde_json = "1.0.115" +serde_json = "1.0.116" serde_path_to_error = "0.1.16" serde_tokenstream = "0.2" serde_urlencoded = "0.7.1" diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index 5d00a50c66..3a7d19d00f 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -92,7 +92,7 @@ ring = { version = "0.17.8", features = ["std"] } schemars = { version = "0.8.16", features = ["bytes", "chrono", "uuid", "uuid1"] } semver = { version = "1.0.22", features = ["serde"] } serde = { version = "1.0.197", features = ["alloc", "derive", "rc"] } -serde_json = { version = "1.0.115", features = ["raw_value", "unbounded_depth"] } +serde_json = { version = "1.0.116", features = ["raw_value", "unbounded_depth"] } sha2 = { version = "0.10.8", features = ["oid"] } similar = { version = "2.4.0", features = ["inline", "unicode"] } slog = { version = "2.7.0", features = ["dynamic-keys", "max_level_trace", "release_max_level_debug", "release_max_level_trace"] } @@ -199,7 +199,7 @@ ring = { version = "0.17.8", features = ["std"] } schemars = { version = "0.8.16", features = ["bytes", "chrono", "uuid", "uuid1"] } semver = { version = "1.0.22", features = ["serde"] } serde = { version = "1.0.197", features = ["alloc", "derive", "rc"] } -serde_json = { version = "1.0.115", features = ["raw_value", "unbounded_depth"] } +serde_json = { version = "1.0.116", features = ["raw_value", "unbounded_depth"] } sha2 = { version = "0.10.8", features = ["oid"] } similar = { version = "2.4.0", features = ["inline", "unicode"] } slog = { version = "2.7.0", features = ["dynamic-keys", "max_level_trace", "release_max_level_debug", "release_max_level_trace"] } From 7a6e72443930d56f1e73d3b75b7fb9f00968a0c7 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Tue, 16 Apr 2024 13:51:00 +0000 Subject: [PATCH 150/334] chore(deps): update taiki-e/install-action digest to 5c256d5 (#5542) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR contains the following updates: | Package | Type | Update | Change | |---|---|---|---| | [taiki-e/install-action](https://togithub.com/taiki-e/install-action) | action | digest | [`99774fe` -> `5c256d5`](https://togithub.com/taiki-e/install-action/compare/99774fe...5c256d5) | --- ### Configuration 📅 **Schedule**: Branch creation - "after 8pm,before 6am" in timezone America/Los_Angeles, Automerge - "after 8pm,before 6am" in timezone America/Los_Angeles. 🚦 **Automerge**: Enabled. ♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. 🔕 **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box --- This PR has been generated by [Renovate Bot](https://togithub.com/renovatebot/renovate). Co-authored-by: oxide-renovate[bot] <146848827+oxide-renovate[bot]@users.noreply.github.com> --- .github/workflows/hakari.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/hakari.yml b/.github/workflows/hakari.yml index 3d76541d07..05e25dc9dc 100644 --- a/.github/workflows/hakari.yml +++ b/.github/workflows/hakari.yml @@ -24,7 +24,7 @@ jobs: with: toolchain: stable - name: Install cargo-hakari - uses: taiki-e/install-action@99774fec7fd4f75144bd0134a24a992297768308 # v2 + uses: taiki-e/install-action@5c256d5a578917d032b8adcd9802cfa432265631 # v2 with: tool: cargo-hakari - name: Check workspace-hack Cargo.toml is up-to-date From 0c10e066affc15e7105e270575b42a0badebf6f7 Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Tue, 16 Apr 2024 13:21:22 -0400 Subject: [PATCH 151/334] Drop the `service` table (#5287) I still need to test this on madrid and I do _not_ want to merge it before we cut the next release, but I believe this is ready for review. Related changes / fallout also included in this PR: * `omdb db services ...` subcommands are all gone. I believe this functionality is covered by `omdb`'s inspection of blueprints instead. * I removed the `SledResourceKind::{Dataset,Service,Reserved}` variants that were unused. This isn't required, strictly speaking, but `SledResourceKind::Service` was intended to reference the `service` table, so I thought it was clearer to drop these for now (we can add them back when we get to the point of using them). There are two major pieces of functionality that now _require_ systems to have a current target blueprint set: * `DataStore::nexus_external_addresses()` now takes an explicit `Blueprint` instead of an `Option`. Its callers (silo creation and reconfigurator DNS updates) fail if they cannot read the current target blueprint. * `DataStore::vpc_resolve_to_sleds()` now _implicitly_ requires a target blueprint to be set, if and only if the VPC being queried involves control plane services. (In practice today, that means the VPC ID is exactly `SERVICES_VPC_ID`, although in the future we could have multiple service-related VPCs.) I didn't want to make this method take an explicit blueprint, because I think its most frequent callers are specifying instance VPCs, which do not need to access the blueprint. These two together mean that deploying this change to a system without a target blueprint will result in (a) the inability to create silos or update external DNS via reconfigurator and (b) services (including Nexus) will not get the OPTE firewall rules they need to allow incoming traffic. All newly-deployed systems have a (disabled) blueprint set as of #5244, but we'll need to perform the necessary support operation to bring already-deployed systems in line. Closes #4947. --- dev-tools/omdb/src/bin/omdb/db.rs | 190 +--- dev-tools/omdb/tests/successes.out | 41 +- dev-tools/omdb/tests/test_all_output.rs | 3 - dev-tools/omdb/tests/usage_errors.out | 20 - nexus/db-model/src/lib.rs | 2 - nexus/db-model/src/schema.rs | 16 - nexus/db-model/src/schema_versions.rs | 3 +- nexus/db-model/src/service.rs | 44 - nexus/db-model/src/sled.rs | 9 +- nexus/db-model/src/sled_resource_kind.rs | 3 - .../db-queries/src/db/datastore/deployment.rs | 94 +- nexus/db-queries/src/db/datastore/mod.rs | 131 +-- nexus/db-queries/src/db/datastore/rack.rs | 990 +++++++++++------- nexus/db-queries/src/db/datastore/service.rs | 115 -- nexus/db-queries/src/db/datastore/vpc.rs | 193 ++-- nexus/reconfigurator/execution/src/dns.rs | 3 +- nexus/src/app/background/blueprint_load.rs | 207 ++-- nexus/src/app/deployment.rs | 9 +- nexus/src/app/rack.rs | 18 +- nexus/src/app/silo.rs | 12 +- nexus/src/internal_api/http_entrypoints.rs | 6 +- nexus/src/lib.rs | 25 +- nexus/test-interface/src/lib.rs | 1 - nexus/test-utils/src/lib.rs | 116 +- nexus/types/src/internal_api/params.rs | 16 - openapi/nexus-internal.json | 328 ------ schema/crdb/dbinit.sql | 60 +- schema/crdb/drop-service-table/up1.sql | 17 + schema/crdb/drop-service-table/up2.sql | 1 + schema/crdb/drop-service-table/up3.sql | 3 + schema/crdb/drop-service-table/up4.sql | 1 + schema/crdb/drop-service-table/up5.sql | 3 + schema/crdb/drop-service-table/up6.sql | 5 + schema/crdb/drop-service-table/up7.sql | 1 + sled-agent/src/params.rs | 146 --- sled-agent/src/rack_setup/service.rs | 99 +- sled-agent/src/sim/server.rs | 5 +- 37 files changed, 976 insertions(+), 1960 deletions(-) delete mode 100644 nexus/db-model/src/service.rs delete mode 100644 nexus/db-queries/src/db/datastore/service.rs create mode 100644 schema/crdb/drop-service-table/up1.sql create mode 100644 schema/crdb/drop-service-table/up2.sql create mode 100644 schema/crdb/drop-service-table/up3.sql create mode 100644 schema/crdb/drop-service-table/up4.sql create mode 100644 schema/crdb/drop-service-table/up5.sql create mode 100644 schema/crdb/drop-service-table/up6.sql create mode 100644 schema/crdb/drop-service-table/up7.sql diff --git a/dev-tools/omdb/src/bin/omdb/db.rs b/dev-tools/omdb/src/bin/omdb/db.rs index a08fa9519c..ba2c8aea09 100644 --- a/dev-tools/omdb/src/bin/omdb/db.rs +++ b/dev-tools/omdb/src/bin/omdb/db.rs @@ -250,8 +250,6 @@ enum DbCommands { Inventory(InventoryArgs), /// Save the current Reconfigurator inputs to a file ReconfiguratorSave(ReconfiguratorSaveArgs), - /// Print information about control plane services - Services(ServicesArgs), /// Print information about sleds Sleds, /// Print information about customer instances @@ -398,20 +396,6 @@ struct ReconfiguratorSaveArgs { output_file: Utf8PathBuf, } -#[derive(Debug, Args)] -struct ServicesArgs { - #[command(subcommand)] - command: ServicesCommands, -} - -#[derive(Debug, Subcommand)] -enum ServicesCommands { - /// List service instances - ListInstances, - /// List service instances, grouped by sled - ListBySled, -} - #[derive(Debug, Args)] struct NetworkArgs { #[command(subcommand)] @@ -520,26 +504,6 @@ impl DbArgs { ) .await } - DbCommands::Services(ServicesArgs { - command: ServicesCommands::ListInstances, - }) => { - cmd_db_services_list_instances( - &opctx, - &datastore, - &self.fetch_opts, - ) - .await - } - DbCommands::Services(ServicesArgs { - command: ServicesCommands::ListBySled, - }) => { - cmd_db_services_list_by_sled( - &opctx, - &datastore, - &self.fetch_opts, - ) - .await - } DbCommands::Sleds => { cmd_db_sleds(&opctx, &datastore, &self.fetch_opts).await } @@ -697,41 +661,11 @@ struct ServiceInfo { /// Helper function to look up the service with the given ID. /// -/// Requires the caller to first have fetched the current target blueprint, so -/// we can find services that have been added by Reconfigurator. +/// Requires the caller to first have fetched the current target blueprint. async fn lookup_service_info( - datastore: &DataStore, service_id: Uuid, - current_target_blueprint: Option<&Blueprint>, + blueprint: &Blueprint, ) -> anyhow::Result> { - let conn = datastore.pool_connection_for_tests().await?; - - // We need to check the `service` table (populated during rack setup)... - { - use db::schema::service::dsl; - if let Some(kind) = dsl::service - .filter(dsl::id.eq(service_id)) - .limit(1) - .select(dsl::kind) - .get_result_async(&*conn) - .await - .optional() - .with_context(|| format!("loading service {service_id}"))? - { - // XXX: the services table is going to go away soon! - return Ok(Some(ServiceInfo { - service_kind: kind, - disposition: BlueprintZoneDisposition::InService, - })); - } - } - - // ...and if we don't find the service, check the latest blueprint, because - // the service might have been added by Reconfigurator after RSS ran. - let Some(blueprint) = current_target_blueprint else { - return Ok(None); - }; - let Some(zone_config) = blueprint .all_blueprint_zones(BlueprintZoneFilter::All) .find_map(|(_sled_id, zone_config)| { @@ -1466,61 +1400,6 @@ async fn cmd_db_snapshot_info( Ok(()) } -/// Run `omdb db services list-instances`. -async fn cmd_db_services_list_instances( - opctx: &OpContext, - datastore: &DataStore, - fetch_opts: &DbFetchOptions, -) -> Result<(), anyhow::Error> { - let limit = fetch_opts.fetch_limit; - let sled_list = datastore - .sled_list(&opctx, &first_page(limit)) - .await - .context("listing sleds")?; - check_limit(&sled_list, limit, || String::from("listing sleds")); - - let sleds: BTreeMap = - sled_list.into_iter().map(|s| (s.id(), s)).collect(); - - let mut rows = vec![]; - - for service_kind in ServiceKind::iter() { - let context = - || format!("listing instances of kind {:?}", service_kind); - let instances = datastore - .services_list_kind(&opctx, service_kind, &first_page(limit)) - .await - .with_context(&context)?; - check_limit(&instances, limit, &context); - - rows.extend(instances.into_iter().map(|instance| { - let addr = - std::net::SocketAddrV6::new(*instance.ip, *instance.port, 0, 0) - .to_string(); - - ServiceInstanceRow { - kind: format!("{:?}", service_kind), - instance_id: instance.id(), - addr, - sled_serial: sleds - .get(&instance.sled_id) - .map(|s| s.serial_number()) - .unwrap_or("unknown") - .to_string(), - } - })); - } - - let table = tabled::Table::new(rows) - .with(tabled::settings::Style::empty()) - .with(tabled::settings::Padding::new(0, 1, 0, 0)) - .to_string(); - - println!("{}", table); - - Ok(()) -} - // SLEDS #[derive(Tabled)] @@ -1532,63 +1411,6 @@ struct ServiceInstanceSledRow { addr: String, } -/// Run `omdb db services list-by-sled`. -async fn cmd_db_services_list_by_sled( - opctx: &OpContext, - datastore: &DataStore, - fetch_opts: &DbFetchOptions, -) -> Result<(), anyhow::Error> { - let limit = fetch_opts.fetch_limit; - let sled_list = datastore - .sled_list(&opctx, &first_page(limit)) - .await - .context("listing sleds")?; - check_limit(&sled_list, limit, || String::from("listing sleds")); - - let sleds: BTreeMap = - sled_list.into_iter().map(|s| (s.id(), s)).collect(); - let mut services_by_sled: BTreeMap> = - BTreeMap::new(); - - for service_kind in ServiceKind::iter() { - let context = - || format!("listing instances of kind {:?}", service_kind); - let instances = datastore - .services_list_kind(&opctx, service_kind, &first_page(limit)) - .await - .with_context(&context)?; - check_limit(&instances, limit, &context); - - for i in instances { - let addr = - std::net::SocketAddrV6::new(*i.ip, *i.port, 0, 0).to_string(); - let sled_instances = - services_by_sled.entry(i.sled_id).or_insert_with(Vec::new); - sled_instances.push(ServiceInstanceSledRow { - kind: format!("{:?}", service_kind), - instance_id: i.id(), - addr, - }) - } - } - - for (sled_id, instances) in services_by_sled { - println!( - "sled: {} (id {})\n", - sleds.get(&sled_id).map(|s| s.serial_number()).unwrap_or("unknown"), - sled_id, - ); - let table = tabled::Table::new(instances) - .with(tabled::settings::Style::empty()) - .with(tabled::settings::Padding::new(0, 1, 0, 0)) - .to_string(); - println!("{}", textwrap::indent(&table.to_string(), " ")); - println!(""); - } - - Ok(()) -} - #[derive(Tabled)] #[tabled(rename_all = "SCREAMING_SNAKE_CASE")] struct SledRow { @@ -2052,19 +1874,17 @@ async fn cmd_db_eips( let mut rows = Vec::new(); - let current_target_blueprint = datastore + let (_, current_target_blueprint) = datastore .blueprint_target_get_current_full(opctx) .await - .context("loading current target blueprint")? - .map(|(_, blueprint)| blueprint); + .context("loading current target blueprint")?; for ip in &ips { let owner = if let Some(owner_id) = ip.parent_id { if ip.is_service { let (kind, disposition) = match lookup_service_info( - datastore, owner_id, - current_target_blueprint.as_ref(), + ¤t_target_blueprint, ) .await? { diff --git a/dev-tools/omdb/tests/successes.out b/dev-tools/omdb/tests/successes.out index 481df1c6e6..942a5338fb 100644 --- a/dev-tools/omdb/tests/successes.out +++ b/dev-tools/omdb/tests/successes.out @@ -69,45 +69,6 @@ note: database schema version matches expected () assembling reconfigurator state ... done wrote ============================================= -EXECUTING COMMAND: omdb ["db", "services", "list-instances"] -termination: Exited(0) ---------------------------------------------- -stdout: -SERVICE INSTANCE_ID ADDR SLED_SERIAL -CruciblePantry ..................... [::1]:REDACTED_PORT sim-b6d65341 -ExternalDns ..................... [::1]:REDACTED_PORT sim-b6d65341 -InternalDns ..................... [::1]:REDACTED_PORT sim-b6d65341 -Nexus ..................... [::ffff:127.0.0.1]:REDACTED_PORT sim-b6d65341 -Mgd ..................... [::1]:REDACTED_PORT sim-039be560 -Mgd ..................... [::1]:REDACTED_PORT sim-b6d65341 ---------------------------------------------- -stderr: -note: using database URL postgresql://root@[::1]:REDACTED_PORT/omicron?sslmode=disable -note: database schema version matches expected () -============================================= -EXECUTING COMMAND: omdb ["db", "services", "list-by-sled"] -termination: Exited(0) ---------------------------------------------- -stdout: -sled: sim-039be560 (id .....................) - - SERVICE INSTANCE_ID ADDR - Mgd ..................... [::1]:REDACTED_PORT - -sled: sim-b6d65341 (id .....................) - - SERVICE INSTANCE_ID ADDR - CruciblePantry ..................... [::1]:REDACTED_PORT - ExternalDns ..................... [::1]:REDACTED_PORT - InternalDns ..................... [::1]:REDACTED_PORT - Nexus ..................... [::ffff:127.0.0.1]:REDACTED_PORT - Mgd ..................... [::1]:REDACTED_PORT - ---------------------------------------------- -stderr: -note: using database URL postgresql://root@[::1]:REDACTED_PORT/omicron?sslmode=disable -note: database schema version matches expected () -============================================= EXECUTING COMMAND: omdb ["db", "sleds"] termination: Exited(0) --------------------------------------------- @@ -385,7 +346,7 @@ task: "blueprint_loader" currently executing: no last completed activation: iter 2, triggered by an explicit signal started at (s ago) and ran for ms -warning: unknown background task: "blueprint_loader" (don't know how to interpret details: Object {"status": String("no target blueprint")}) + last completion reported error: failed to read target blueprint: Internal Error: no target blueprint set task: "blueprint_executor" configured period: every 10m diff --git a/dev-tools/omdb/tests/test_all_output.rs b/dev-tools/omdb/tests/test_all_output.rs index ca24637040..5f64f9c567 100644 --- a/dev-tools/omdb/tests/test_all_output.rs +++ b/dev-tools/omdb/tests/test_all_output.rs @@ -46,7 +46,6 @@ async fn test_omdb_usage_errors() { &["db", "dns"], &["db", "dns", "diff"], &["db", "dns", "names"], - &["db", "services"], &["db", "snapshots"], &["db", "network"], &["mgs"], @@ -90,8 +89,6 @@ async fn test_omdb_success_cases(cptestctx: &ControlPlaneTestContext) { &["db", "dns", "names", "external", "2"], &["db", "instances"], &["db", "reconfigurator-save", tmppath.as_str()], - &["db", "services", "list-instances"], - &["db", "services", "list-by-sled"], &["db", "sleds"], &["mgs", "inventory"], &["nexus", "background-tasks", "doc"], diff --git a/dev-tools/omdb/tests/usage_errors.out b/dev-tools/omdb/tests/usage_errors.out index 7cd37fe148..e8967c859a 100644 --- a/dev-tools/omdb/tests/usage_errors.out +++ b/dev-tools/omdb/tests/usage_errors.out @@ -101,7 +101,6 @@ Commands: dns Print information about internal and external DNS inventory Print information about collected hardware/software inventory reconfigurator-save Save the current Reconfigurator inputs to a file - services Print information about control plane services sleds Print information about sleds instances Print information about customer instances network Print information about the network @@ -131,7 +130,6 @@ Commands: dns Print information about internal and external DNS inventory Print information about collected hardware/software inventory reconfigurator-save Save the current Reconfigurator inputs to a file - services Print information about control plane services sleds Print information about sleds instances Print information about customer instances network Print information about the network @@ -215,24 +213,6 @@ Usage: omdb db dns names For more information, try '--help'. ============================================= -EXECUTING COMMAND: omdb ["db", "services"] -termination: Exited(2) ---------------------------------------------- -stdout: ---------------------------------------------- -stderr: -Print information about control plane services - -Usage: omdb db services - -Commands: - list-instances List service instances - list-by-sled List service instances, grouped by sled - help Print this message or the help of the given subcommand(s) - -Options: - -h, --help Print help -============================================= EXECUTING COMMAND: omdb ["db", "snapshots"] termination: Exited(2) --------------------------------------------- diff --git a/nexus/db-model/src/lib.rs b/nexus/db-model/src/lib.rs index a2e9565d46..6495a0c960 100644 --- a/nexus/db-model/src/lib.rs +++ b/nexus/db-model/src/lib.rs @@ -70,7 +70,6 @@ mod role_builtin; pub mod saga_types; pub mod schema; mod schema_versions; -mod service; mod service_kind; mod silo; mod silo_group; @@ -165,7 +164,6 @@ pub use role_assignment::*; pub use role_builtin::*; pub use schema_versions::*; pub use semver_version::*; -pub use service::*; pub use service_kind::*; pub use silo::*; pub use silo_group::*; diff --git a/nexus/db-model/src/schema.rs b/nexus/db-model/src/schema.rs index b02a8677d4..07b8ce851e 100644 --- a/nexus/db-model/src/schema.rs +++ b/nexus/db-model/src/schema.rs @@ -875,20 +875,6 @@ table! { } } -table! { - service (id) { - id -> Uuid, - time_created -> Timestamptz, - time_modified -> Timestamptz, - - sled_id -> Uuid, - zone_id -> Nullable, - ip -> Inet, - port -> Int4, - kind -> crate::ServiceKindEnum, - } -} - table! { physical_disk (id) { id -> Uuid, @@ -1691,7 +1677,6 @@ allow_tables_to_appear_in_same_query!( silo, identity_provider, console_session, - service, sled, sled_resource, router_route, @@ -1707,7 +1692,6 @@ allow_tables_to_appear_in_same_query!( ); allow_tables_to_appear_in_same_query!(dns_zone, dns_version, dns_name); -allow_tables_to_appear_in_same_query!(external_ip, service); // used for query to check whether an IP pool association has any allocated IPs before deleting allow_tables_to_appear_in_same_query!(external_ip, instance); diff --git a/nexus/db-model/src/schema_versions.rs b/nexus/db-model/src/schema_versions.rs index ad43cf77c5..86c7d618aa 100644 --- a/nexus/db-model/src/schema_versions.rs +++ b/nexus/db-model/src/schema_versions.rs @@ -17,7 +17,7 @@ use std::collections::BTreeMap; /// /// This must be updated when you change the database schema. Refer to /// schema/crdb/README.adoc in the root of this repository for details. -pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(52, 0, 0); +pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(53, 0, 0); /// List of all past database schema versions, in *reverse* order /// @@ -29,6 +29,7 @@ static KNOWN_VERSIONS: Lazy> = Lazy::new(|| { // | leaving the first copy as an example for the next person. // v // KnownVersion::new(next_int, "unique-dirname-with-the-sql-files"), + KnownVersion::new(53, "drop-service-table"), KnownVersion::new(52, "blueprint-physical-disk"), KnownVersion::new(51, "blueprint-disposition-column"), KnownVersion::new(50, "add-lookup-disk-by-volume-id-index"), diff --git a/nexus/db-model/src/service.rs b/nexus/db-model/src/service.rs deleted file mode 100644 index 45d3ca5a16..0000000000 --- a/nexus/db-model/src/service.rs +++ /dev/null @@ -1,44 +0,0 @@ -// This Source Code Form is subject to the terms of the Mozilla Public -// License, v. 2.0. If a copy of the MPL was not distributed with this -// file, You can obtain one at https://mozilla.org/MPL/2.0/. - -use super::ServiceKind; -use crate::ipv6; -use crate::schema::service; -use crate::SqlU16; -use db_macros::Asset; -use std::net::SocketAddrV6; -use uuid::Uuid; - -/// Representation of services which may run on Sleds. -#[derive(Queryable, Insertable, Debug, Clone, Selectable, Asset)] -#[diesel(table_name = service)] -pub struct Service { - #[diesel(embed)] - identity: ServiceIdentity, - - pub sled_id: Uuid, - pub zone_id: Option, - pub ip: ipv6::Ipv6Addr, - pub port: SqlU16, - pub kind: ServiceKind, -} - -impl Service { - pub fn new( - id: Uuid, - sled_id: Uuid, - zone_id: Option, - addr: SocketAddrV6, - kind: ServiceKind, - ) -> Self { - Self { - identity: ServiceIdentity::new(id), - sled_id, - zone_id, - ip: addr.ip().into(), - port: addr.port().into(), - kind, - } - } -} diff --git a/nexus/db-model/src/sled.rs b/nexus/db-model/src/sled.rs index 1fa436c992..e94da5fbbe 100644 --- a/nexus/db-model/src/sled.rs +++ b/nexus/db-model/src/sled.rs @@ -5,7 +5,7 @@ use super::{ByteCount, Generation, SledState, SqlU16, SqlU32}; use crate::collection::DatastoreCollectionConfig; use crate::ipv6; -use crate::schema::{physical_disk, service, sled, zpool}; +use crate::schema::{physical_disk, sled, zpool}; use crate::sled::shared::Baseboard; use crate::sled_policy::DbSledPolicy; use chrono::{DateTime, Utc}; @@ -177,13 +177,6 @@ impl DatastoreCollectionConfig for Sled { type CollectionIdColumn = zpool::dsl::sled_id; } -impl DatastoreCollectionConfig for Sled { - type CollectionId = Uuid; - type GenerationNumberColumn = sled::dsl::rcgen; - type CollectionTimeDeletedColumn = sled::dsl::time_deleted; - type CollectionIdColumn = service::dsl::sled_id; -} - /// Form of `Sled` used for updates from sled-agent. This is missing some /// columns that are present in `Sled` because sled-agent doesn't control them. #[derive(Debug, Clone)] diff --git a/nexus/db-model/src/sled_resource_kind.rs b/nexus/db-model/src/sled_resource_kind.rs index c17eb2e106..b9a59bdc30 100644 --- a/nexus/db-model/src/sled_resource_kind.rs +++ b/nexus/db-model/src/sled_resource_kind.rs @@ -15,8 +15,5 @@ impl_enum_type!( pub enum SledResourceKind; // Enum values - Dataset => b"dataset" - Service => b"service" Instance => b"instance" - Reserved => b"reserved" ); diff --git a/nexus/db-queries/src/db/datastore/deployment.rs b/nexus/db-queries/src/db/datastore/deployment.rs index fa6673842a..d31428e319 100644 --- a/nexus/db-queries/src/db/datastore/deployment.rs +++ b/nexus/db-queries/src/db/datastore/deployment.rs @@ -598,15 +598,13 @@ impl DataStore { // current target. let current_target = self.blueprint_current_target_only(&conn).await?; - if let Some(current_target) = current_target { - if current_target.target_id == blueprint_id { - return Err(TransactionError::CustomError( - Error::conflict(format!( - "blueprint {blueprint_id} is the \ - current target and cannot be deleted", - )), - )); - } + if current_target.target_id == blueprint_id { + return Err(TransactionError::CustomError( + Error::conflict(format!( + "blueprint {blueprint_id} is the \ + current target and cannot be deleted", + )), + )); } // Remove the record describing the blueprint itself. @@ -848,14 +846,11 @@ impl DataStore { pub async fn blueprint_target_get_current_full( &self, opctx: &OpContext, - ) -> Result, Error> { + ) -> Result<(BlueprintTarget, Blueprint), Error> { opctx.authorize(authz::Action::Read, &authz::BLUEPRINT_CONFIG).await?; let conn = self.pool_connection_authorized(opctx).await?; - let Some(target) = self.blueprint_current_target_only(&conn).await? - else { - return Ok(None); - }; + let target = self.blueprint_current_target_only(&conn).await?; // The blueprint for the current target cannot be deleted while it is // the current target, but it's possible someone else (a) made a new @@ -866,14 +861,14 @@ impl DataStore { let authz_blueprint = authz_blueprint_from_id(target.target_id); let blueprint = self.blueprint_read(opctx, &authz_blueprint).await?; - Ok(Some((target, blueprint))) + Ok((target, blueprint)) } /// Get the current target blueprint, if one exists pub async fn blueprint_target_get_current( &self, opctx: &OpContext, - ) -> Result, Error> { + ) -> Result { opctx.authorize(authz::Action::Read, &authz::BLUEPRINT_CONFIG).await?; let conn = self.pool_connection_authorized(opctx).await?; self.blueprint_current_target_only(&conn).await @@ -886,7 +881,7 @@ impl DataStore { async fn blueprint_current_target_only( &self, conn: &async_bb8_diesel::Connection, - ) -> Result, Error> { + ) -> Result { use db::schema::bp_target::dsl; let current_target = dsl::bp_target @@ -896,7 +891,16 @@ impl DataStore { .optional() .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; - Ok(current_target.map(BlueprintTarget::from)) + // We expect a target blueprint to be set on all systems. RSS sets an + // initial blueprint, but we shipped systems before it did so. We added + // target blueprints to those systems via support operations, but let's + // be careful here and return a specific error for this case. + let current_target = + current_target.ok_or_else(|| Error::InternalError { + internal_message: "no target blueprint set".to_string(), + })?; + + Ok(current_target.into()) } } @@ -1483,10 +1487,12 @@ mod tests { datastore.blueprint_insert(&opctx, &blueprint1).await.unwrap_err(); assert!(err.to_string().contains("duplicate key")); - // Delete the blueprint and ensure it's really gone. - datastore.blueprint_delete(&opctx, &authz_blueprint).await.unwrap(); - ensure_blueprint_fully_deleted(&datastore, blueprint1.id).await; - assert_eq!(blueprint_list_all_ids(&opctx, &datastore).await, []); + // We could try to test deleting this blueprint, but deletion checks + // that the blueprint being deleted isn't the current target, and we + // haven't set a current target at all as part of this test. Instead of + // going through the motions of creating another blueprint and making it + // the target just to test deletion, we'll end this test here, and rely + // on other tests to check blueprint deletion. // Clean up. db.cleanup().await.unwrap(); @@ -1549,7 +1555,7 @@ mod tests { .unwrap(); assert_eq!( datastore.blueprint_target_get_current_full(&opctx).await.unwrap(), - Some((bp1_target, blueprint1.clone())) + (bp1_target, blueprint1.clone()) ); let err = datastore .blueprint_delete(&opctx, &authz_blueprint1) @@ -1683,7 +1689,7 @@ mod tests { .unwrap(); assert_eq!( datastore.blueprint_target_get_current_full(&opctx).await.unwrap(), - Some((bp2_target, blueprint2.clone())) + (bp2_target, blueprint2.clone()) ); let err = datastore .blueprint_delete(&opctx, &authz_blueprint2) @@ -1739,11 +1745,14 @@ mod tests { )) ); - // There should be no current target still. - assert_eq!( - datastore.blueprint_target_get_current_full(&opctx).await.unwrap(), - None - ); + // There should be no current target; this is never expected in a real + // system, since RSS sets an initial target blueprint, so we should get + // an error. + let err = datastore + .blueprint_target_get_current_full(&opctx) + .await + .unwrap_err(); + assert!(err.to_string().contains("no target blueprint set")); // Create three blueprints: // * `blueprint1` has no parent @@ -1812,11 +1821,14 @@ mod tests { Error::from(InsertTargetError::ParentNotTarget(blueprint2.id)) ); - // There should be no current target still. - assert_eq!( - datastore.blueprint_target_get_current_full(&opctx).await.unwrap(), - None - ); + // There should be no current target; this is never expected in a real + // system, since RSS sets an initial target blueprint, so we should get + // an error. + let err = datastore + .blueprint_target_get_current_full(&opctx) + .await + .unwrap_err(); + assert!(err.to_string().contains("no target blueprint set")); // We should be able to insert blueprint1, which has no parent (matching // the currently-empty `bp_target` table's lack of a target). @@ -1826,7 +1838,7 @@ mod tests { .unwrap(); assert_eq!( datastore.blueprint_target_get_current_full(&opctx).await.unwrap(), - Some((bp1_target, blueprint1.clone())) + (bp1_target, blueprint1.clone()) ); // Now that blueprint1 is the current target, we should be able to @@ -1837,7 +1849,7 @@ mod tests { .unwrap(); assert_eq!( datastore.blueprint_target_get_current_full(&opctx).await.unwrap(), - Some((bp3_target, blueprint3.clone())) + (bp3_target, blueprint3.clone()) ); // Now that blueprint3 is the target, trying to insert blueprint1 or @@ -1883,7 +1895,7 @@ mod tests { .unwrap(); assert_eq!( datastore.blueprint_target_get_current_full(&opctx).await.unwrap(), - Some((bp4_target, blueprint4)) + (bp4_target, blueprint4) ); // Clean up. @@ -1942,7 +1954,7 @@ mod tests { .unwrap(); assert_eq!( datastore.blueprint_target_get_current(&opctx).await.unwrap(), - Some(bp1_target), + bp1_target, ); // We should be able to toggle its enabled status an arbitrary number of @@ -1955,7 +1967,7 @@ mod tests { .unwrap(); assert_eq!( datastore.blueprint_target_get_current(&opctx).await.unwrap(), - Some(bp1_target), + bp1_target, ); } @@ -1976,7 +1988,7 @@ mod tests { .unwrap(); assert_eq!( datastore.blueprint_target_get_current(&opctx).await.unwrap(), - Some(bp2_target), + bp2_target, ); // We can no longer toggle the enabled bit of bp1_target. @@ -1997,7 +2009,7 @@ mod tests { .unwrap(); assert_eq!( datastore.blueprint_target_get_current(&opctx).await.unwrap(), - Some(bp2_target), + bp2_target, ); } diff --git a/nexus/db-queries/src/db/datastore/mod.rs b/nexus/db-queries/src/db/datastore/mod.rs index 6e8eecb8ed..4d6b16483d 100644 --- a/nexus/db-queries/src/db/datastore/mod.rs +++ b/nexus/db-queries/src/db/datastore/mod.rs @@ -79,7 +79,6 @@ mod region; mod region_snapshot; mod role; mod saga; -mod service; mod silo; mod silo_group; mod silo_user; @@ -385,8 +384,8 @@ mod test { use crate::db::model::{ BlockSize, ConsoleSession, Dataset, DatasetKind, ExternalIp, PhysicalDisk, PhysicalDiskKind, PhysicalDiskPolicy, PhysicalDiskState, - Project, Rack, Region, Service, ServiceKind, SiloUser, SledBaseboard, - SledSystemHardware, SledUpdate, SshKey, VpcSubnet, Zpool, + Project, Rack, Region, SiloUser, SledBaseboard, SledSystemHardware, + SledUpdate, SshKey, VpcSubnet, Zpool, }; use crate::db::queries::vpc_subnet::FilterConflictingVpcSubnetRangesQuery; use chrono::{Duration, Utc}; @@ -397,7 +396,6 @@ mod test { use nexus_db_model::{to_db_typed_uuid, Generation}; use nexus_test_utils::db::test_setup_database; use nexus_types::external_api::params; - use omicron_common::api::external::DataPageParams; use omicron_common::api::external::{ ByteCount, Error, IdentityMetadataCreateParams, LookupType, Name, }; @@ -408,7 +406,6 @@ mod test { use std::collections::HashMap; use std::collections::HashSet; use std::net::{IpAddr, Ipv4Addr, Ipv6Addr, SocketAddrV6}; - use std::num::NonZeroU32; use std::sync::Arc; use strum::EnumCount; use uuid::Uuid; @@ -1746,130 +1743,6 @@ mod test { logctx.cleanup_successful(); } - #[tokio::test] - async fn test_service_upsert_and_list() { - let logctx = dev::test_setup_log("test_service_upsert_and_list"); - let mut db = test_setup_database(&logctx.log).await; - let (opctx, datastore) = datastore_test(&logctx, &db).await; - - // Create a sled on which the service should exist. - let sled_id = create_test_sled(&datastore).await.into_untyped_uuid(); - - // Create a few new service to exist on this sled. - let service1_id = - "ab7bd7fd-7c37-48ab-a84a-9c09a90c4c7f".parse().unwrap(); - let addr = SocketAddrV6::new(Ipv6Addr::LOCALHOST, 123, 0, 0); - let kind = ServiceKind::Nexus; - - let service1 = - Service::new(service1_id, sled_id, Some(service1_id), addr, kind); - let result = - datastore.service_upsert(&opctx, service1.clone()).await.unwrap(); - assert_eq!(service1.id(), result.id()); - assert_eq!(service1.ip, result.ip); - assert_eq!(service1.kind, result.kind); - - let service2_id = - "fe5b6e3d-dfee-47b4-8719-c54f78912c0b".parse().unwrap(); - let service2 = Service::new(service2_id, sled_id, None, addr, kind); - let result = - datastore.service_upsert(&opctx, service2.clone()).await.unwrap(); - assert_eq!(service2.id(), result.id()); - assert_eq!(service2.ip, result.ip); - assert_eq!(service2.kind, result.kind); - - let service3_id = Uuid::new_v4(); - let kind = ServiceKind::Oximeter; - let service3 = Service::new( - service3_id, - sled_id, - Some(Uuid::new_v4()), - addr, - kind, - ); - let result = - datastore.service_upsert(&opctx, service3.clone()).await.unwrap(); - assert_eq!(service3.id(), result.id()); - assert_eq!(service3.ip, result.ip); - assert_eq!(service3.kind, result.kind); - - // Try listing services of one kind. - let services = datastore - .services_list_kind( - &opctx, - ServiceKind::Nexus, - &DataPageParams { - marker: None, - direction: dropshot::PaginationOrder::Ascending, - limit: NonZeroU32::new(3).unwrap(), - }, - ) - .await - .unwrap(); - assert_eq!(services[0].id(), service1.id()); - assert_eq!(services[0].sled_id, service1.sled_id); - assert_eq!(services[0].zone_id, service1.zone_id); - assert_eq!(services[0].kind, service1.kind); - assert_eq!(services[1].id(), service2.id()); - assert_eq!(services[1].sled_id, service2.sled_id); - assert_eq!(services[1].zone_id, service2.zone_id); - assert_eq!(services[1].kind, service2.kind); - assert_eq!(services.len(), 2); - - // Try listing services of a different kind. - let services = datastore - .services_list_kind( - &opctx, - ServiceKind::Oximeter, - &DataPageParams { - marker: None, - direction: dropshot::PaginationOrder::Ascending, - limit: NonZeroU32::new(3).unwrap(), - }, - ) - .await - .unwrap(); - assert_eq!(services[0].id(), service3.id()); - assert_eq!(services[0].sled_id, service3.sled_id); - assert_eq!(services[0].zone_id, service3.zone_id); - assert_eq!(services[0].kind, service3.kind); - assert_eq!(services.len(), 1); - - // Try listing services of a kind for which there are no services. - let services = datastore - .services_list_kind( - &opctx, - ServiceKind::Dendrite, - &DataPageParams { - marker: None, - direction: dropshot::PaginationOrder::Ascending, - limit: NonZeroU32::new(3).unwrap(), - }, - ) - .await - .unwrap(); - assert!(services.is_empty()); - - // As a quick check, try supplying a marker. - let services = datastore - .services_list_kind( - &opctx, - ServiceKind::Nexus, - &DataPageParams { - marker: Some(&service1_id), - direction: dropshot::PaginationOrder::Ascending, - limit: NonZeroU32::new(3).unwrap(), - }, - ) - .await - .unwrap(); - assert_eq!(services.len(), 1); - assert_eq!(services[0].id(), service2.id()); - - db.cleanup().await.unwrap(); - logctx.cleanup_successful(); - } - #[tokio::test] async fn test_rack_initialize_is_idempotent() { let logctx = dev::test_setup_log("test_rack_initialize_is_idempotent"); diff --git a/nexus/db-queries/src/db/datastore/rack.rs b/nexus/db-queries/src/db/datastore/rack.rs index ece9112745..3dff04cc11 100644 --- a/nexus/db-queries/src/db/datastore/rack.rs +++ b/nexus/db-queries/src/db/datastore/rack.rs @@ -35,7 +35,6 @@ use diesel::prelude::*; use diesel::result::Error as DieselError; use diesel::upsert::excluded; use ipnetwork::IpNetwork; -use nexus_db_model::ExternalIp; use nexus_db_model::IncompleteNetworkInterface; use nexus_db_model::InitialDnsGroup; use nexus_db_model::PasswordHashString; @@ -44,13 +43,15 @@ use nexus_db_model::SiloUserPasswordHash; use nexus_db_model::SledUnderlaySubnetAllocation; use nexus_types::deployment::Blueprint; use nexus_types::deployment::BlueprintTarget; +use nexus_types::deployment::BlueprintZoneFilter; +use nexus_types::deployment::OmicronZoneConfig; +use nexus_types::deployment::OmicronZoneType; use nexus_types::external_api::params as external_params; use nexus_types::external_api::shared; use nexus_types::external_api::shared::IdentityType; use nexus_types::external_api::shared::IpRange; use nexus_types::external_api::shared::SiloRole; use nexus_types::identity::Resource; -use nexus_types::internal_api::params as internal_params; use omicron_common::api::external::DataPageParams; use omicron_common::api::external::Error; use omicron_common::api::external::IdentityMetadataCreateParams; @@ -60,7 +61,6 @@ use omicron_common::api::external::ResourceType; use omicron_common::api::external::UpdateResult; use omicron_common::bail_unless; use slog_error_chain::InlineErrorChain; -use std::net::IpAddr; use std::sync::{Arc, OnceLock}; use uuid::Uuid; @@ -70,7 +70,6 @@ pub struct RackInit { pub rack_id: Uuid, pub rack_subnet: IpNetwork, pub blueprint: Blueprint, - pub services: Vec, pub physical_disks: Vec, pub zpools: Vec, pub datasets: Vec, @@ -91,7 +90,6 @@ enum RackInitError { AddingNic(Error), BlueprintInsert(Error), BlueprintTargetSet(Error), - ServiceInsert(Error), DatasetInsert { err: AsyncInsertError, zpool_id: Uuid }, PhysicalDiskInsert(Error), ZpoolInsert(Error), @@ -133,9 +131,6 @@ impl From for Error { }, RackInitError::PhysicalDiskInsert(err) => err, RackInitError::ZpoolInsert(err) => err, - RackInitError::ServiceInsert(err) => Error::internal_error( - &format!("failed to insert Service record: {:#}", err), - ), RackInitError::BlueprintInsert(err) => Error::internal_error( &format!("failed to insert Blueprint: {:#}", err), ), @@ -464,54 +459,64 @@ impl DataStore { Ok(()) } - async fn rack_populate_service_records( + async fn rack_populate_service_networking_records( &self, conn: &async_bb8_diesel::Connection, log: &slog::Logger, service_pool: &db::model::IpPool, - service: internal_params::ServicePutRequest, + zone_config: &OmicronZoneConfig, ) -> Result<(), RackInitError> { - use internal_params::ServiceKind; - - let service_db = db::model::Service::new( - service.service_id, - service.sled_id, - service.zone_id, - service.address, - service.kind.clone().into(), - ); - self.service_upsert_conn(conn, service_db).await.map_err( - |e| match e.retryable() { - Retryable(e) => RackInitError::Retryable(e), - NotRetryable(e) => RackInitError::ServiceInsert(e.into()), - }, - )?; - // For services with external connectivity, we record their // explicit IP allocation and create a service NIC as well. - let service_ip_nic = match service.kind { - ServiceKind::ExternalDns { external_address, ref nic } - | ServiceKind::Nexus { external_address, ref nic } => { + let zone_type = &zone_config.zone_type; + let service_ip_nic = match zone_type { + OmicronZoneType::ExternalDns { nic, .. } + | OmicronZoneType::Nexus { nic, .. } => { + let service_kind = format!("{}", zone_type.kind()); + let external_ip = match zone_type.external_ip() { + Ok(Some(ip)) => ip, + Ok(None) => { + let message = format!( + "missing external IP in blueprint for {} zone {}", + service_kind, zone_config.id + ); + return Err(RackInitError::AddingNic( + Error::internal_error(&message), + )); + } + Err(err) => { + let message = format!( + "error parsing external IP in blueprint for \ + {} zone {}: {err:#}", + service_kind, zone_config.id + ); + return Err(RackInitError::AddingNic( + Error::internal_error(&message), + )); + } + }; let db_ip = IncompleteExternalIp::for_service_explicit( Uuid::new_v4(), &db::model::Name(nic.name.clone()), - &format!("{}", service.kind), - service.service_id, + &service_kind, + zone_config.id, service_pool.id(), - external_address, + external_ip, ); - let vpc_subnet = match service.kind { - ServiceKind::ExternalDns { .. } => DNS_VPC_SUBNET.clone(), - ServiceKind::Nexus { .. } => NEXUS_VPC_SUBNET.clone(), + let vpc_subnet = match zone_type { + OmicronZoneType::ExternalDns { .. } => { + DNS_VPC_SUBNET.clone() + } + OmicronZoneType::Nexus { .. } => NEXUS_VPC_SUBNET.clone(), _ => unreachable!(), }; let db_nic = IncompleteNetworkInterface::new_service( nic.id, - service.service_id, + zone_config.id, vpc_subnet, IdentityMetadataCreateParams { name: nic.name.clone(), - description: format!("{} service vNIC", service.kind), + description: format!("{service_kind} service vNIC"), }, nic.ip, nic.mac, @@ -520,21 +525,24 @@ impl DataStore { .map_err(|e| RackInitError::AddingNic(e))?; Some((db_ip, db_nic)) } - ServiceKind::BoundaryNtp { snat, ref nic } => { + OmicronZoneType::BoundaryNtp { snat_cfg, ref nic, .. } => { let db_ip = IncompleteExternalIp::for_service_explicit_snat( Uuid::new_v4(), - service.service_id, + zone_config.id, service_pool.id(), - snat.ip, - (snat.first_port, snat.last_port), + snat_cfg.ip, + (snat_cfg.first_port, snat_cfg.last_port), ); let db_nic = IncompleteNetworkInterface::new_service( nic.id, - service.service_id, + zone_config.id, NTP_VPC_SUBNET.clone(), IdentityMetadataCreateParams { name: nic.name.clone(), - description: format!("{} service vNIC", service.kind), + description: format!( + "{} service vNIC", + zone_type.kind() + ), }, nic.ip, nic.mac, @@ -543,44 +551,61 @@ impl DataStore { .map_err(|e| RackInitError::AddingNic(e))?; Some((db_ip, db_nic)) } - _ => None, + OmicronZoneType::InternalNtp { .. } + | OmicronZoneType::Clickhouse { .. } + | OmicronZoneType::ClickhouseKeeper { .. } + | OmicronZoneType::CockroachDb { .. } + | OmicronZoneType::Crucible { .. } + | OmicronZoneType::CruciblePantry { .. } + | OmicronZoneType::InternalDns { .. } + | OmicronZoneType::Oximeter { .. } => None, }; - if let Some((db_ip, db_nic)) = service_ip_nic { - Self::allocate_external_ip_on_connection(conn, db_ip) - .await - .map_err(|err| { - error!( - log, - "Initializing Rack: Failed to allocate \ - IP address for {}", - service.kind; - "err" => %err, - ); - match err.retryable() { - Retryable(e) => RackInitError::Retryable(e), - NotRetryable(e) => RackInitError::AddingIp(e.into()), - } - })?; + let Some((db_ip, db_nic)) = service_ip_nic else { + info!( + log, + "No networking records needed for {} service", + zone_type.kind(), + ); + return Ok(()); + }; + Self::allocate_external_ip_on_connection(conn, db_ip).await.map_err( + |err| { + error!( + log, + "Initializing Rack: Failed to allocate \ + IP address for {}", + zone_type.kind(); + "err" => %err, + ); + match err.retryable() { + Retryable(e) => RackInitError::Retryable(e), + NotRetryable(e) => RackInitError::AddingIp(e.into()), + } + }, + )?; - self.create_network_interface_raw_conn(conn, db_nic) - .await - .map(|_| ()) - .or_else(|e| { - use db::queries::network_interface::InsertError; - match e { - InsertError::InterfaceAlreadyExists( - _, - db::model::NetworkInterfaceKind::Service, - ) => Ok(()), - InsertError::Retryable(err) => { - Err(RackInitError::Retryable(err)) - } - _ => Err(RackInitError::AddingNic(e.into_external())), + self.create_network_interface_raw_conn(conn, db_nic) + .await + .map(|_| ()) + .or_else(|e| { + use db::queries::network_interface::InsertError; + match e { + InsertError::InterfaceAlreadyExists( + _, + db::model::NetworkInterfaceKind::Service, + ) => Ok(()), + InsertError::Retryable(err) => { + Err(RackInitError::Retryable(err)) } - })?; - } + _ => Err(RackInitError::AddingNic(e.into_external())), + } + })?; + info!( + log, + "Inserted networking records for {} service", + zone_type.kind(), + ); - info!(log, "Inserted records for {} service", service.kind); Ok(()) } @@ -616,7 +641,6 @@ impl DataStore { async move { let rack_id = rack_init.rack_id; let blueprint = rack_init.blueprint; - let services = rack_init.services; let physical_disks = rack_init.physical_disks; let zpools = rack_init.zpools; let datasets = rack_init.datasets; @@ -719,13 +743,13 @@ impl DataStore { DieselError::RollbackTransaction })?; - // Allocate records for all services. - for service in services { - self.rack_populate_service_records( + // Allocate networking records for all services. + for (_, zone_config) in blueprint.all_omicron_zones(BlueprintZoneFilter::ShouldBeRunning) { + self.rack_populate_service_networking_records( &conn, &log, &service_pool, - service, + zone_config, ) .await .map_err(|e| { @@ -734,7 +758,7 @@ impl DataStore { DieselError::RollbackTransaction })?; } - info!(log, "Inserted services"); + info!(log, "Inserted service networking records"); for physical_disk in physical_disks { if let Err(e) = Self::physical_disk_insert_on_connection(&conn, &opctx, physical_disk) @@ -909,38 +933,6 @@ impl DataStore { Ok(()) } - - // TODO once we eliminate the service table, we can eliminate this function - // and the branch in the sole caller - pub async fn nexus_external_addresses_from_service_table( - &self, - opctx: &OpContext, - ) -> Result, Error> { - opctx.authorize(authz::Action::Read, &authz::DNS_CONFIG).await?; - - use crate::db::schema::external_ip::dsl as extip_dsl; - use crate::db::schema::service::dsl as service_dsl; - - let conn = self.pool_connection_authorized(opctx).await?; - - Ok(extip_dsl::external_ip - .inner_join( - service_dsl::service - .on(service_dsl::id - .eq(extip_dsl::parent_id.assume_not_null())), - ) - .filter(extip_dsl::parent_id.is_not_null()) - .filter(extip_dsl::time_deleted.is_null()) - .filter(extip_dsl::is_service) - .filter(service_dsl::kind.eq(db::model::ServiceKind::Nexus)) - .select(ExternalIp::as_select()) - .get_results_async(&*conn) - .await - .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))? - .into_iter() - .map(|external_ip| external_ip.ip.ip()) - .collect()) - } } #[cfg(test)] @@ -955,28 +947,37 @@ mod test { use crate::db::model::ExternalIp; use crate::db::model::IpKind; use crate::db::model::IpPoolRange; - use crate::db::model::Service; - use crate::db::model::ServiceKind; use crate::db::model::Sled; use async_bb8_diesel::AsyncSimpleConnection; - use internal_params::DnsRecord; use nexus_config::NUM_INITIAL_RESERVED_IP_ADDRESSES; use nexus_db_model::{DnsGroup, Generation, InitialDnsGroup, SledUpdate}; + use nexus_reconfigurator_planning::blueprint_builder::BlueprintBuilder; + use nexus_reconfigurator_planning::system::{ + SledBuilder, SystemDescription, + }; use nexus_test_utils::db::test_setup_database; + use nexus_types::deployment::OmicronZoneConfig; + use nexus_types::deployment::OmicronZonesConfig; + use nexus_types::deployment::SledFilter; use nexus_types::external_api::shared::SiloIdentityMode; use nexus_types::identity::Asset; - use nexus_types::internal_api::params::ServiceNic; + use nexus_types::internal_api::params::DnsRecord; + use nexus_types::inventory::NetworkInterface; + use nexus_types::inventory::NetworkInterfaceKind; use omicron_common::address::{ DNS_OPTE_IPV4_SUBNET, NEXUS_OPTE_IPV4_SUBNET, NTP_OPTE_IPV4_SUBNET, }; use omicron_common::api::external::http_pagination::PaginatedBy; use omicron_common::api::external::{ - IdentityMetadataCreateParams, MacAddr, + IdentityMetadataCreateParams, MacAddr, Vni, }; use omicron_common::api::internal::shared::SourceNatConfig; use omicron_test_utils::dev; + use omicron_uuid_kinds::TypedUuid; + use omicron_uuid_kinds::{GenericUuid, SledUuid, ZpoolUuid}; + use sled_agent_client::types::OmicronZoneDataset; use std::collections::{BTreeMap, HashMap}; - use std::net::{IpAddr, Ipv4Addr, Ipv6Addr, SocketAddrV6}; + use std::net::{IpAddr, Ipv4Addr, Ipv6Addr, SocketAddr, SocketAddrV6}; use std::num::NonZeroU32; // Default impl is for tests only, and really just so that tests can more @@ -997,7 +998,6 @@ mod test { creator: "test suite".to_string(), comment: "test suite".to_string(), }, - services: vec![], physical_disks: vec![], zpools: vec![], datasets: vec![], @@ -1212,14 +1212,25 @@ mod test { }; } - fn_to_get_all!(service, Service); fn_to_get_all!(external_ip, ExternalIp); fn_to_get_all!(ip_pool_range, IpPoolRange); fn_to_get_all!(dataset, Dataset); + fn random_dataset() -> OmicronZoneDataset { + OmicronZoneDataset { + pool_name: illumos_utils::zpool::ZpoolName::new_external( + ZpoolUuid::new_v4(), + ) + .to_string() + .parse() + .unwrap(), + } + } + #[tokio::test] async fn rack_set_initialized_with_services() { - let logctx = dev::test_setup_log("rack_set_initialized_with_services"); + let test_name = "rack_set_initialized_with_services"; + let logctx = dev::test_setup_log(test_name); let mut db = test_setup_database(&logctx.log).await; let (opctx, datastore) = datastore_test(&logctx, &db).await; @@ -1233,6 +1244,29 @@ mod test { )) .unwrap()]; + let mut system = SystemDescription::new(); + system + .service_ip_pool_ranges(service_ip_pool_ranges.clone()) + .sled( + SledBuilder::new().id(TypedUuid::from_untyped_uuid(sled1.id())), + ) + .expect("failed to add sled1") + .sled( + SledBuilder::new().id(TypedUuid::from_untyped_uuid(sled2.id())), + ) + .expect("failed to add sled2") + .sled( + SledBuilder::new().id(TypedUuid::from_untyped_uuid(sled3.id())), + ) + .expect("failed to add sled3"); + let planning_input = system + .to_planning_input_builder() + .expect("failed to make planning input") + .build(); + let mut inventory_builder = system + .to_collection_builder() + .expect("failed to make collection builder"); + let external_dns_ip = IpAddr::V4(Ipv4Addr::new(1, 2, 3, 4)); let external_dns_pip = DNS_OPTE_IPV4_SUBNET .nth(NUM_INITIAL_RESERVED_IP_ADDRESSES as u32 + 1) @@ -1255,93 +1289,182 @@ mod test { let ntp2_id = Uuid::new_v4(); let ntp3_id = Uuid::new_v4(); let mut macs = MacAddr::iter_system(); - let services = vec![ - internal_params::ServicePutRequest { - service_id: external_dns_id, - sled_id: sled1.id(), - zone_id: Some(external_dns_id), - address: SocketAddrV6::new(Ipv6Addr::LOCALHOST, 123, 0, 0), - kind: internal_params::ServiceKind::ExternalDns { - external_address: external_dns_ip, - nic: ServiceNic { - id: Uuid::new_v4(), - name: "external-dns".parse().unwrap(), - ip: external_dns_pip.into(), - mac: macs.next().unwrap(), - slot: 0, - }, - }, - }, - internal_params::ServicePutRequest { - service_id: ntp1_id, - sled_id: sled1.id(), - zone_id: Some(ntp1_id), - address: SocketAddrV6::new(Ipv6Addr::LOCALHOST, 9090, 0, 0), - kind: internal_params::ServiceKind::BoundaryNtp { - snat: SourceNatConfig { - ip: ntp1_ip, - first_port: 16384, - last_port: 32767, - }, - nic: ServiceNic { - id: Uuid::new_v4(), - name: "ntp1".parse().unwrap(), - ip: ntp1_pip.into(), - mac: macs.next().unwrap(), - slot: 0, - }, + + // Add services for our sleds to the inventory (which will cause them to + // be present in the blueprint we'll generate from it). + inventory_builder + .found_sled_omicron_zones( + "sled1", + SledUuid::from_untyped_uuid(sled1.id()), + OmicronZonesConfig { + generation: Generation::new().next(), + zones: vec![ + OmicronZoneConfig { + id: external_dns_id, + underlay_address: Ipv6Addr::LOCALHOST, + zone_type: OmicronZoneType::ExternalDns { + dataset: random_dataset(), + http_address: "[::1]:80".to_string(), + dns_address: SocketAddr::new( + external_dns_ip, + 53, + ) + .to_string(), + nic: NetworkInterface { + id: Uuid::new_v4(), + kind: NetworkInterfaceKind::Service { + id: external_dns_id, + }, + name: "external-dns".parse().unwrap(), + ip: external_dns_pip.into(), + mac: macs.next().unwrap(), + subnet: IpNetwork::from( + **DNS_OPTE_IPV4_SUBNET, + ) + .into(), + vni: Vni::SERVICES_VNI, + primary: true, + slot: 0, + }, + }, + }, + OmicronZoneConfig { + id: ntp1_id, + underlay_address: Ipv6Addr::LOCALHOST, + zone_type: OmicronZoneType::BoundaryNtp { + address: "[::1]:80".to_string(), + ntp_servers: vec![], + dns_servers: vec![], + domain: None, + nic: NetworkInterface { + id: Uuid::new_v4(), + kind: NetworkInterfaceKind::Service { + id: ntp1_id, + }, + name: "ntp1".parse().unwrap(), + ip: ntp1_pip.into(), + mac: macs.next().unwrap(), + subnet: IpNetwork::from( + **NTP_OPTE_IPV4_SUBNET, + ) + .into(), + vni: Vni::SERVICES_VNI, + primary: true, + slot: 0, + }, + snat_cfg: SourceNatConfig { + ip: ntp1_ip, + first_port: 16384, + last_port: 32767, + }, + }, + }, + ], }, - }, - internal_params::ServicePutRequest { - service_id: nexus_id, - sled_id: sled2.id(), - zone_id: Some(nexus_id), - address: SocketAddrV6::new(Ipv6Addr::LOCALHOST, 456, 0, 0), - kind: internal_params::ServiceKind::Nexus { - external_address: nexus_ip, - nic: ServiceNic { - id: Uuid::new_v4(), - name: "nexus".parse().unwrap(), - ip: nexus_pip.into(), - mac: macs.next().unwrap(), - slot: 0, - }, + ) + .expect("recording Omicron zones"); + inventory_builder + .found_sled_omicron_zones( + "sled2", + SledUuid::from_untyped_uuid(sled2.id()), + OmicronZonesConfig { + generation: Generation::new().next(), + zones: vec![ + OmicronZoneConfig { + id: nexus_id, + underlay_address: Ipv6Addr::LOCALHOST, + zone_type: OmicronZoneType::Nexus { + internal_address: "[::1]:80".to_string(), + external_ip: nexus_ip, + external_tls: false, + external_dns_servers: vec![], + nic: NetworkInterface { + id: Uuid::new_v4(), + kind: NetworkInterfaceKind::Service { + id: nexus_id, + }, + name: "nexus".parse().unwrap(), + ip: nexus_pip.into(), + mac: macs.next().unwrap(), + subnet: IpNetwork::from( + **NEXUS_OPTE_IPV4_SUBNET, + ) + .into(), + vni: Vni::SERVICES_VNI, + primary: true, + slot: 0, + }, + }, + }, + OmicronZoneConfig { + id: ntp2_id, + underlay_address: Ipv6Addr::LOCALHOST, + zone_type: OmicronZoneType::BoundaryNtp { + address: "[::1]:80".to_string(), + ntp_servers: vec![], + dns_servers: vec![], + domain: None, + nic: NetworkInterface { + id: Uuid::new_v4(), + kind: NetworkInterfaceKind::Service { + id: ntp2_id, + }, + name: "ntp2".parse().unwrap(), + ip: ntp2_pip.into(), + mac: macs.next().unwrap(), + subnet: IpNetwork::from( + **NTP_OPTE_IPV4_SUBNET, + ) + .into(), + vni: Vni::SERVICES_VNI, + primary: true, + slot: 0, + }, + snat_cfg: SourceNatConfig { + ip: ntp2_ip, + first_port: 0, + last_port: 16383, + }, + }, + }, + ], }, - }, - internal_params::ServicePutRequest { - service_id: ntp2_id, - sled_id: sled2.id(), - zone_id: Some(ntp2_id), - address: SocketAddrV6::new(Ipv6Addr::LOCALHOST, 9090, 0, 0), - kind: internal_params::ServiceKind::BoundaryNtp { - snat: SourceNatConfig { - ip: ntp2_ip, - first_port: 0, - last_port: 16383, - }, - nic: ServiceNic { - id: Uuid::new_v4(), - name: "ntp2".parse().unwrap(), - ip: ntp2_pip.into(), - mac: macs.next().unwrap(), - slot: 0, - }, + ) + .expect("recording Omicron zones"); + inventory_builder + .found_sled_omicron_zones( + "sled3", + SledUuid::from_untyped_uuid(sled3.id()), + OmicronZonesConfig { + generation: Generation::new().next(), + zones: vec![OmicronZoneConfig { + id: ntp3_id, + underlay_address: Ipv6Addr::LOCALHOST, + zone_type: OmicronZoneType::InternalNtp { + address: "[::1]:80".to_string(), + ntp_servers: vec![], + dns_servers: vec![], + domain: None, + }, + }], }, - }, - internal_params::ServicePutRequest { - service_id: ntp3_id, - sled_id: sled3.id(), - zone_id: Some(ntp3_id), - address: SocketAddrV6::new(Ipv6Addr::LOCALHOST, 9090, 0, 0), - kind: internal_params::ServiceKind::InternalNtp, - }, - ]; + ) + .expect("recording Omicron zones"); + let blueprint = BlueprintBuilder::build_initial_from_collection_seeded( + &inventory_builder.build(), + *Generation::new(), + *Generation::new(), + planning_input.all_sled_ids(SledFilter::All), + "test suite", + (test_name, "initial blueprint"), + ) + .expect("failed to build blueprint"); let rack = datastore .rack_set_initialized( &opctx, RackInit { - services: services.clone(), + blueprint: blueprint.clone(), service_ip_pool_ranges, ..Default::default() }, @@ -1352,48 +1475,12 @@ mod test { assert_eq!(rack.id(), rack_id()); assert!(rack.initialized); - let observed_services = get_all_services(&datastore).await; - let observed_datasets = get_all_datasets(&datastore).await; - - // We should see all the services we initialized - assert_eq!(observed_services.len(), 5); - let dns_service = observed_services - .iter() - .find(|s| s.id() == external_dns_id) - .unwrap(); - let nexus_service = - observed_services.iter().find(|s| s.id() == nexus_id).unwrap(); - let ntp1_service = - observed_services.iter().find(|s| s.id() == ntp1_id).unwrap(); - let ntp2_service = - observed_services.iter().find(|s| s.id() == ntp2_id).unwrap(); - let ntp3_service = - observed_services.iter().find(|s| s.id() == ntp3_id).unwrap(); - - assert_eq!(dns_service.sled_id, sled1.id()); - assert_eq!(dns_service.kind, ServiceKind::ExternalDns); - assert_eq!(*dns_service.ip, Ipv6Addr::LOCALHOST); - assert_eq!(*dns_service.port, 123); - - assert_eq!(nexus_service.sled_id, sled2.id()); - assert_eq!(nexus_service.kind, ServiceKind::Nexus); - assert_eq!(*nexus_service.ip, Ipv6Addr::LOCALHOST); - assert_eq!(*nexus_service.port, 456); - - assert_eq!(ntp1_service.sled_id, sled1.id()); - assert_eq!(ntp1_service.kind, ServiceKind::Ntp); - assert_eq!(*ntp1_service.ip, Ipv6Addr::LOCALHOST); - assert_eq!(*ntp1_service.port, 9090); - - assert_eq!(ntp2_service.sled_id, sled2.id()); - assert_eq!(ntp2_service.kind, ServiceKind::Ntp); - assert_eq!(*ntp2_service.ip, Ipv6Addr::LOCALHOST); - assert_eq!(*ntp2_service.port, 9090); - - assert_eq!(ntp3_service.sled_id, sled3.id()); - assert_eq!(ntp3_service.kind, ServiceKind::Ntp); - assert_eq!(*ntp3_service.ip, Ipv6Addr::LOCALHOST); - assert_eq!(*ntp3_service.port, 9090); + // We should see the blueprint we passed in. + let (_blueprint_target, observed_blueprint) = datastore + .blueprint_target_get_current_full(&opctx) + .await + .expect("failed to read blueprint"); + assert_eq!(observed_blueprint, blueprint); // We should also see the single external IP allocated for each service // save for the non-boundary NTP service. @@ -1419,21 +1506,17 @@ mod test { .iter() .any(|e| e.parent_id == Some(ntp3_id))); - assert_eq!(dns_external_ip.parent_id, Some(dns_service.id())); assert!(dns_external_ip.is_service); assert_eq!(dns_external_ip.kind, IpKind::Floating); - assert_eq!(nexus_external_ip.parent_id, Some(nexus_service.id())); assert!(nexus_external_ip.is_service); assert_eq!(nexus_external_ip.kind, IpKind::Floating); - assert_eq!(ntp1_external_ip.parent_id, Some(ntp1_service.id())); assert!(ntp1_external_ip.is_service); assert_eq!(ntp1_external_ip.kind, IpKind::SNat); assert_eq!(ntp1_external_ip.first_port.0, 16384); assert_eq!(ntp1_external_ip.last_port.0, 32767); - assert_eq!(ntp2_external_ip.parent_id, Some(ntp2_service.id())); assert!(ntp2_external_ip.is_service); assert_eq!(ntp2_external_ip.kind, IpKind::SNat); assert_eq!(ntp2_external_ip.first_port.0, 0); @@ -1478,6 +1561,7 @@ mod test { ); assert_eq!(ntp2_external_ip.ip.ip(), ntp2_ip); + let observed_datasets = get_all_datasets(&datastore).await; assert!(observed_datasets.is_empty()); db.cleanup().await.unwrap(); @@ -1486,9 +1570,8 @@ mod test { #[tokio::test] async fn rack_set_initialized_with_many_nexus_services() { - let logctx = dev::test_setup_log( - "rack_set_initialized_with_many_nexus_services", - ); + let test_name = "rack_set_initialized_with_many_nexus_services"; + let logctx = dev::test_setup_log(test_name); let mut db = test_setup_database(&logctx.log).await; let (opctx, datastore) = datastore_test(&logctx, &db).await; @@ -1497,6 +1580,25 @@ mod test { // Ask for two Nexus services, with different external IPs. let nexus_ip_start = Ipv4Addr::new(1, 2, 3, 4); let nexus_ip_end = Ipv4Addr::new(1, 2, 3, 5); + let service_ip_pool_ranges = + vec![IpRange::try_from((nexus_ip_start, nexus_ip_end)) + .expect("Cannot create IP Range")]; + + let mut system = SystemDescription::new(); + system + .service_ip_pool_ranges(service_ip_pool_ranges.clone()) + .sled( + SledBuilder::new().id(TypedUuid::from_untyped_uuid(sled.id())), + ) + .expect("failed to add sled"); + let planning_input = system + .to_planning_input_builder() + .expect("failed to make planning input") + .build(); + let mut inventory_builder = system + .to_collection_builder() + .expect("failed to make collection builder"); + let nexus_id1 = Uuid::new_v4(); let nexus_id2 = Uuid::new_v4(); let nexus_pip1 = NEXUS_OPTE_IPV4_SUBNET @@ -1506,47 +1608,72 @@ mod test { .nth(NUM_INITIAL_RESERVED_IP_ADDRESSES as u32 + 2) .unwrap(); let mut macs = MacAddr::iter_system(); - let mut services = vec![ - internal_params::ServicePutRequest { - service_id: nexus_id1, - sled_id: sled.id(), - zone_id: Some(nexus_id1), - address: SocketAddrV6::new(Ipv6Addr::LOCALHOST, 123, 0, 0), - kind: internal_params::ServiceKind::Nexus { - external_address: IpAddr::V4(nexus_ip_start), - nic: ServiceNic { - id: Uuid::new_v4(), - name: "nexus1".parse().unwrap(), - ip: nexus_pip1.into(), - mac: macs.next().unwrap(), - slot: 0, - }, - }, - }, - internal_params::ServicePutRequest { - service_id: nexus_id2, - sled_id: sled.id(), - zone_id: Some(nexus_id2), - address: SocketAddrV6::new(Ipv6Addr::LOCALHOST, 456, 0, 0), - kind: internal_params::ServiceKind::Nexus { - external_address: IpAddr::V4(nexus_ip_end), - nic: ServiceNic { - id: Uuid::new_v4(), - name: "nexus2".parse().unwrap(), - ip: nexus_pip2.into(), - mac: macs.next().unwrap(), - slot: 0, - }, + + inventory_builder + .found_sled_omicron_zones( + "sled", + SledUuid::from_untyped_uuid(sled.id()), + OmicronZonesConfig { + generation: Generation::new().next(), + zones: vec![ + OmicronZoneConfig { + id: nexus_id1, + underlay_address: Ipv6Addr::LOCALHOST, + zone_type: OmicronZoneType::Nexus { + internal_address: "[::1]:80".to_string(), + external_ip: nexus_ip_start.into(), + external_tls: false, + external_dns_servers: vec![], + nic: NetworkInterface { + id: Uuid::new_v4(), + kind: NetworkInterfaceKind::Service { + id: nexus_id1, + }, + name: "nexus1".parse().unwrap(), + ip: nexus_pip1.into(), + mac: macs.next().unwrap(), + subnet: IpNetwork::from( + **NEXUS_OPTE_IPV4_SUBNET, + ) + .into(), + vni: Vni::SERVICES_VNI, + primary: true, + slot: 0, + }, + }, + }, + OmicronZoneConfig { + id: nexus_id2, + underlay_address: Ipv6Addr::LOCALHOST, + zone_type: OmicronZoneType::Nexus { + internal_address: "[::1]:80".to_string(), + external_ip: nexus_ip_end.into(), + external_tls: false, + external_dns_servers: vec![], + nic: NetworkInterface { + id: Uuid::new_v4(), + kind: NetworkInterfaceKind::Service { + id: nexus_id2, + }, + name: "nexus2".parse().unwrap(), + ip: nexus_pip2.into(), + mac: macs.next().unwrap(), + subnet: IpNetwork::from( + **NEXUS_OPTE_IPV4_SUBNET, + ) + .into(), + vni: Vni::SERVICES_VNI, + primary: true, + slot: 0, + }, + }, + }, + ], }, - }, - ]; - services - .sort_by(|a, b| a.service_id.partial_cmp(&b.service_id).unwrap()); + ) + .expect("recording Omicron zones"); let datasets = vec![]; - let service_ip_pool_ranges = - vec![IpRange::try_from((nexus_ip_start, nexus_ip_end)) - .expect("Cannot create IP Range")]; let internal_records = vec![ DnsRecord::Aaaa("fe80::1:2:3:4".parse().unwrap()), @@ -1570,11 +1697,21 @@ mod test { HashMap::from([("api.sys".to_string(), external_records.clone())]), ); + let blueprint = BlueprintBuilder::build_initial_from_collection_seeded( + &inventory_builder.build(), + *Generation::new(), + *Generation::new(), + planning_input.all_sled_ids(SledFilter::All), + "test suite", + (test_name, "initial blueprint"), + ) + .expect("failed to build blueprint"); + let rack = datastore .rack_set_initialized( &opctx, RackInit { - services: services.clone(), + blueprint: blueprint.clone(), datasets: datasets.clone(), service_ip_pool_ranges, internal_dns, @@ -1588,21 +1725,20 @@ mod test { assert_eq!(rack.id(), rack_id()); assert!(rack.initialized); - let mut observed_services = get_all_services(&datastore).await; - let observed_datasets = get_all_datasets(&datastore).await; + // We should see the blueprint we passed in. + let (_blueprint_target, observed_blueprint) = datastore + .blueprint_target_get_current_full(&opctx) + .await + .expect("failed to read blueprint"); + assert_eq!(observed_blueprint, blueprint); // We should see both of the Nexus services we provisioned. - assert_eq!(observed_services.len(), 2); - observed_services.sort_by(|a, b| a.id().partial_cmp(&b.id()).unwrap()); - - assert_eq!(observed_services[0].sled_id, sled.id()); - assert_eq!(observed_services[1].sled_id, sled.id()); - assert_eq!(observed_services[0].kind, ServiceKind::Nexus); - assert_eq!(observed_services[1].kind, ServiceKind::Nexus); - assert_eq!(*observed_services[0].ip, Ipv6Addr::LOCALHOST); - assert_eq!(*observed_services[1].ip, Ipv6Addr::LOCALHOST); - assert_eq!(*observed_services[0].port, services[0].address.port()); - assert_eq!(*observed_services[1].port, services[1].address.port()); + let mut observed_zones: Vec<_> = observed_blueprint + .all_omicron_zones(BlueprintZoneFilter::All) + .map(|(_, z)| z) + .collect(); + observed_zones.sort_by_key(|z| z.id); + assert_eq!(observed_zones.len(), 2); // We should see both IPs allocated for these services. let observed_external_ips = get_all_external_ips(&datastore).await; @@ -1619,25 +1755,29 @@ mod test { // The address allocated for the service should match the input. assert_eq!( - observed_external_ips[&observed_services[0].id()].ip.ip(), - if let internal_params::ServiceKind::Nexus { - external_address, - .. - } = services[0].kind + observed_external_ips[&observed_zones[0].id].ip.ip(), + if let OmicronZoneType::Nexus { external_ip, .. } = &blueprint + .all_omicron_zones(BlueprintZoneFilter::All) + .next() + .unwrap() + .1 + .zone_type { - external_address + *external_ip } else { - panic!("Unexpected service kind") + panic!("Unexpected zone type") } ); assert_eq!( - observed_external_ips[&observed_services[1].id()].ip.ip(), - if let internal_params::ServiceKind::Nexus { - external_address, - .. - } = services[1].kind + observed_external_ips[&observed_zones[1].id].ip.ip(), + if let OmicronZoneType::Nexus { external_ip, .. } = &blueprint + .all_omicron_zones(BlueprintZoneFilter::All) + .nth(1) + .unwrap() + .1 + .zone_type { - external_address + *external_ip } else { panic!("Unexpected service kind") } @@ -1653,6 +1793,7 @@ mod test { assert_eq!(observed_ip_pool_ranges.len(), 1); assert_eq!(observed_ip_pool_ranges[0].ip_pool_id, svc_pool.id()); + let observed_datasets = get_all_datasets(&datastore).await; assert!(observed_datasets.is_empty()); // Verify the internal and external DNS configurations. @@ -1692,41 +1833,84 @@ mod test { #[tokio::test] async fn rack_set_initialized_missing_service_pool_ip_throws_error() { - let logctx = dev::test_setup_log( - "rack_set_initialized_missing_service_pool_ip_throws_error", - ); + let test_name = + "rack_set_initialized_missing_service_pool_ip_throws_error"; + let logctx = dev::test_setup_log(test_name); let mut db = test_setup_database(&logctx.log).await; let (opctx, datastore) = datastore_test(&logctx, &db).await; let sled = create_test_sled(&datastore).await; + let mut system = SystemDescription::new(); + system + .sled( + SledBuilder::new().id(TypedUuid::from_untyped_uuid(sled.id())), + ) + .expect("failed to add sled"); + let planning_input = system + .to_planning_input_builder() + .expect("failed to make planning input") + .build(); + let mut inventory_builder = system + .to_collection_builder() + .expect("failed to make collection builder"); + let nexus_ip = IpAddr::V4(Ipv4Addr::new(1, 2, 3, 4)); let nexus_pip = NEXUS_OPTE_IPV4_SUBNET .nth(NUM_INITIAL_RESERVED_IP_ADDRESSES as u32 + 1) .unwrap(); let nexus_id = Uuid::new_v4(); let mut macs = MacAddr::iter_system(); - let services = vec![internal_params::ServicePutRequest { - service_id: nexus_id, - sled_id: sled.id(), - zone_id: Some(nexus_id), - address: SocketAddrV6::new(Ipv6Addr::LOCALHOST, 123, 0, 0), - kind: internal_params::ServiceKind::Nexus { - external_address: nexus_ip, - nic: ServiceNic { - id: Uuid::new_v4(), - name: "nexus".parse().unwrap(), - ip: nexus_pip.into(), - mac: macs.next().unwrap(), - slot: 0, + inventory_builder + .found_sled_omicron_zones( + "sled", + SledUuid::from_untyped_uuid(sled.id()), + OmicronZonesConfig { + generation: Generation::new().next(), + zones: vec![OmicronZoneConfig { + id: nexus_id, + underlay_address: Ipv6Addr::LOCALHOST, + zone_type: OmicronZoneType::Nexus { + internal_address: "[::1]:80".to_string(), + external_ip: nexus_ip, + external_tls: false, + external_dns_servers: vec![], + nic: NetworkInterface { + id: Uuid::new_v4(), + kind: NetworkInterfaceKind::Service { + id: nexus_id, + }, + name: "nexus".parse().unwrap(), + ip: nexus_pip.into(), + mac: macs.next().unwrap(), + subnet: IpNetwork::from( + **NEXUS_OPTE_IPV4_SUBNET, + ) + .into(), + vni: Vni::SERVICES_VNI, + primary: true, + slot: 0, + }, + }, + }], }, - }, - }]; + ) + .expect("recording Omicron zones"); + + let blueprint = BlueprintBuilder::build_initial_from_collection_seeded( + &inventory_builder.build(), + *Generation::new(), + *Generation::new(), + planning_input.all_sled_ids(SledFilter::All), + "test suite", + (test_name, "initial blueprint"), + ) + .expect("failed to build blueprint"); let result = datastore .rack_set_initialized( &opctx, - RackInit { services: services.clone(), ..Default::default() }, + RackInit { blueprint: blueprint.clone(), ..Default::default() }, ) .await; assert!(result.is_err()); @@ -1735,7 +1919,6 @@ mod test { "Invalid Request: Requested external IP address not available" ); - assert!(get_all_services(&datastore).await.is_empty()); assert!(get_all_datasets(&datastore).await.is_empty()); assert!(get_all_external_ips(&datastore).await.is_empty()); @@ -1745,16 +1928,32 @@ mod test { #[tokio::test] async fn rack_set_initialized_overlapping_ips_throws_error() { - let logctx = dev::test_setup_log( - "rack_set_initialized_overlapping_ips_throws_error", - ); + let test_name = "rack_set_initialized_overlapping_ips_throws_error"; + let logctx = dev::test_setup_log(test_name); let mut db = test_setup_database(&logctx.log).await; let (opctx, datastore) = datastore_test(&logctx, &db).await; let sled = create_test_sled(&datastore).await; - // Request two services which happen to be using the same IP address. let ip = IpAddr::V4(Ipv4Addr::new(1, 2, 3, 4)); + let service_ip_pool_ranges = vec![IpRange::from(ip)]; + + let mut system = SystemDescription::new(); + system + .service_ip_pool_ranges(service_ip_pool_ranges.clone()) + .sled( + SledBuilder::new().id(TypedUuid::from_untyped_uuid(sled.id())), + ) + .expect("failed to add sled"); + let planning_input = system + .to_planning_input_builder() + .expect("failed to make planning input") + .build(); + let mut inventory_builder = system + .to_collection_builder() + .expect("failed to make collection builder"); + + // Request two services which happen to be using the same IP address. let external_dns_id = Uuid::new_v4(); let external_dns_pip = DNS_OPTE_IPV4_SUBNET .nth(NUM_INITIAL_RESERVED_IP_ADDRESSES as u32 + 1) @@ -1765,48 +1964,86 @@ mod test { .unwrap(); let mut macs = MacAddr::iter_system(); - let services = vec![ - internal_params::ServicePutRequest { - service_id: external_dns_id, - sled_id: sled.id(), - zone_id: Some(external_dns_id), - address: SocketAddrV6::new(Ipv6Addr::LOCALHOST, 123, 0, 0), - kind: internal_params::ServiceKind::ExternalDns { - external_address: ip, - nic: ServiceNic { - id: Uuid::new_v4(), - name: "external-dns".parse().unwrap(), - ip: external_dns_pip.into(), - mac: macs.next().unwrap(), - slot: 0, - }, - }, - }, - internal_params::ServicePutRequest { - service_id: nexus_id, - sled_id: sled.id(), - zone_id: Some(nexus_id), - address: SocketAddrV6::new(Ipv6Addr::LOCALHOST, 123, 0, 0), - kind: internal_params::ServiceKind::Nexus { - external_address: ip, - nic: ServiceNic { - id: Uuid::new_v4(), - name: "nexus".parse().unwrap(), - ip: nexus_pip.into(), - mac: macs.next().unwrap(), - slot: 0, - }, + inventory_builder + .found_sled_omicron_zones( + "sled", + SledUuid::from_untyped_uuid(sled.id()), + OmicronZonesConfig { + generation: Generation::new().next(), + zones: vec![ + OmicronZoneConfig { + id: external_dns_id, + underlay_address: Ipv6Addr::LOCALHOST, + zone_type: OmicronZoneType::ExternalDns { + dataset: random_dataset(), + http_address: "[::1]:80".to_string(), + dns_address: SocketAddr::new(ip, 53) + .to_string(), + nic: NetworkInterface { + id: Uuid::new_v4(), + kind: NetworkInterfaceKind::Service { + id: external_dns_id, + }, + name: "external-dns".parse().unwrap(), + ip: external_dns_pip.into(), + mac: macs.next().unwrap(), + subnet: IpNetwork::from( + **DNS_OPTE_IPV4_SUBNET, + ) + .into(), + vni: Vni::SERVICES_VNI, + primary: true, + slot: 0, + }, + }, + }, + OmicronZoneConfig { + id: nexus_id, + underlay_address: Ipv6Addr::LOCALHOST, + zone_type: OmicronZoneType::Nexus { + internal_address: "[::1]:80".to_string(), + external_ip: ip, + external_tls: false, + external_dns_servers: vec![], + nic: NetworkInterface { + id: Uuid::new_v4(), + kind: NetworkInterfaceKind::Service { + id: nexus_id, + }, + name: "nexus".parse().unwrap(), + ip: nexus_pip.into(), + mac: macs.next().unwrap(), + subnet: IpNetwork::from( + **NEXUS_OPTE_IPV4_SUBNET, + ) + .into(), + vni: Vni::SERVICES_VNI, + primary: true, + slot: 0, + }, + }, + }, + ], }, - }, - ]; - let service_ip_pool_ranges = vec![IpRange::from(ip)]; + ) + .expect("recording Omicron zones"); + + let blueprint = BlueprintBuilder::build_initial_from_collection_seeded( + &inventory_builder.build(), + *Generation::new(), + *Generation::new(), + planning_input.all_sled_ids(SledFilter::All), + "test suite", + (test_name, "initial blueprint"), + ) + .expect("failed to build blueprint"); let result = datastore .rack_set_initialized( &opctx, RackInit { rack_id: rack_id(), - services: services.clone(), + blueprint: blueprint.clone(), service_ip_pool_ranges, ..Default::default() }, @@ -1818,7 +2055,6 @@ mod test { "Invalid Request: Requested external IP address not available", ); - assert!(get_all_services(&datastore).await.is_empty()); assert!(get_all_datasets(&datastore).await.is_empty()); assert!(get_all_external_ips(&datastore).await.is_empty()); diff --git a/nexus/db-queries/src/db/datastore/service.rs b/nexus/db-queries/src/db/datastore/service.rs deleted file mode 100644 index df7ed27a6d..0000000000 --- a/nexus/db-queries/src/db/datastore/service.rs +++ /dev/null @@ -1,115 +0,0 @@ -// This Source Code Form is subject to the terms of the Mozilla Public -// License, v. 2.0. If a copy of the MPL was not distributed with this -// file, You can obtain one at https://mozilla.org/MPL/2.0/. - -//! [`DataStore`] methods on [`Service`]s. - -use super::DataStore; -use crate::authz; -use crate::context::OpContext; -use crate::db; -use crate::db::collection_insert::AsyncInsertError; -use crate::db::collection_insert::DatastoreCollection; -use crate::db::error::public_error_from_diesel; -use crate::db::error::retryable; -use crate::db::error::ErrorHandler; -use crate::db::error::TransactionError; -use crate::db::identity::Asset; -use crate::db::model::Service; -use crate::db::model::Sled; -use crate::db::pagination::paginated; -use crate::db::pool::DbConnection; -use async_bb8_diesel::AsyncRunQueryDsl; -use chrono::Utc; -use diesel::prelude::*; -use diesel::upsert::excluded; -use nexus_db_model::ServiceKind; -use omicron_common::api::external::CreateResult; -use omicron_common::api::external::DataPageParams; -use omicron_common::api::external::Error; -use omicron_common::api::external::ListResultVec; -use omicron_common::api::external::LookupType; -use omicron_common::api::external::ResourceType; -use uuid::Uuid; - -impl DataStore { - /// Stores a new service in the database. - pub async fn service_upsert( - &self, - opctx: &OpContext, - service: Service, - ) -> CreateResult { - let conn = self.pool_connection_authorized(opctx).await?; - self.service_upsert_conn(&conn, service).await.map_err(|e| match e { - TransactionError::CustomError(err) => err, - TransactionError::Database(err) => { - public_error_from_diesel(err, ErrorHandler::Server) - } - }) - } - - /// Stores a new service in the database (using an existing db connection). - pub(crate) async fn service_upsert_conn( - &self, - conn: &async_bb8_diesel::Connection, - service: Service, - ) -> Result> { - use db::schema::service::dsl; - - let service_id = service.id(); - let sled_id = service.sled_id; - Sled::insert_resource( - sled_id, - diesel::insert_into(dsl::service) - .values(service) - .on_conflict(dsl::id) - .do_update() - .set(( - dsl::time_modified.eq(Utc::now()), - dsl::sled_id.eq(excluded(dsl::sled_id)), - dsl::ip.eq(excluded(dsl::ip)), - dsl::port.eq(excluded(dsl::port)), - dsl::kind.eq(excluded(dsl::kind)), - )), - ) - .insert_and_get_result_async(conn) - .await - .map_err(|e| match e { - AsyncInsertError::CollectionNotFound => { - TransactionError::CustomError(Error::ObjectNotFound { - type_name: ResourceType::Sled, - lookup_type: LookupType::ById(sled_id), - }) - } - AsyncInsertError::DatabaseError(e) => { - if retryable(&e) { - return TransactionError::Database(e); - } - TransactionError::CustomError(public_error_from_diesel( - e, - ErrorHandler::Conflict( - ResourceType::Service, - &service_id.to_string(), - ), - )) - } - }) - } - - /// List services of a given kind - pub async fn services_list_kind( - &self, - opctx: &OpContext, - kind: ServiceKind, - pagparams: &DataPageParams<'_, Uuid>, - ) -> ListResultVec { - opctx.authorize(authz::Action::Read, &authz::FLEET).await?; - use db::schema::service::dsl; - paginated(dsl::service, dsl::id, pagparams) - .filter(dsl::kind.eq(kind)) - .select(Service::as_select()) - .load_async(&*self.pool_connection_authorized(opctx).await?) - .await - .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) - } -} diff --git a/nexus/db-queries/src/db/datastore/vpc.rs b/nexus/db-queries/src/db/datastore/vpc.rs index f08854734d..079f52ba8c 100644 --- a/nexus/db-queries/src/db/datastore/vpc.rs +++ b/nexus/db-queries/src/db/datastore/vpc.rs @@ -655,7 +655,7 @@ impl DataStore { // Sleds to notify when firewall rules change. use db::schema::{ bp_omicron_zone, bp_target, instance, instance_network_interface, - service, service_network_interface, sled, vmm, + service_network_interface, sled, vmm, }; // Diesel requires us to use aliases in order to refer to the // `bp_target` table twice in the same query. @@ -677,25 +677,7 @@ impl DataStore { .filter(vmm::time_deleted.is_null()) .select(Sled::as_select()); - // When Nexus accepts the rack initialization handoff from RSS, it - // populates the `service` table. We eventually want to retire it - // (https://github.com/oxidecomputer/omicron/issues/4947), and the - // Reconfigurator does not add new entries to it. We still need to query - // it for systems that are not yet under Reconfigurator control... - let rss_service_query = service_network_interface::table - .inner_join( - service::table - .on(service::id.eq(service_network_interface::service_id)), - ) - .inner_join(sled::table.on(sled::id.eq(service::sled_id))) - .filter(service_network_interface::vpc_id.eq(vpc_id)) - .filter(service_network_interface::time_deleted.is_null()) - .select(Sled::as_select()); - - // ... and we also need to query for the current target blueprint to - // support systems that _are_ under Reconfigurator control. - - let reconfig_service_query = service_network_interface::table + let service_query = service_network_interface::table .inner_join(bp_omicron_zone::table.on( bp_omicron_zone::id.eq(service_network_interface::service_id), )) @@ -740,11 +722,7 @@ impl DataStore { let conn = self.pool_connection_unauthorized().await?; sleds - .intersect( - instance_query - .union(rss_service_query) - .union(reconfig_service_query), - ) + .intersect(instance_query.union(service_query)) .get_results_async(&*conn) .await .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) @@ -1255,8 +1233,6 @@ mod tests { use crate::db::fixed_data::vpc_subnet::NEXUS_VPC_SUBNET; use crate::db::model::Project; use crate::db::queries::vpc::MAX_VNI_SEARCH_RANGE_SIZE; - use async_bb8_diesel::AsyncConnection; - use async_bb8_diesel::AsyncSimpleConnection; use nexus_config::NUM_INITIAL_RESERVED_IP_ADDRESSES; use nexus_db_model::SledUpdate; use nexus_test_utils::db::test_setup_database; @@ -1511,6 +1487,7 @@ mod tests { #[derive(Debug)] struct HarnessNexus { + sled_id: SledUuid, id: Uuid, ip: IpAddr, mac: MacAddr, @@ -1528,8 +1505,11 @@ mod tests { .skip(NUM_INITIAL_RESERVED_IP_ADDRESSES) .map(IpAddr::from); let mut nexus_macs = MacAddr::iter_system(); - let nexuses = (0..num_sleds) - .map(|_| HarnessNexus { + let nexuses = sled_ids + .iter() + .copied() + .map(|sled_id| HarnessNexus { + sled_id, id: Uuid::new_v4(), ip: nexus_ips.next().unwrap(), mac: nexus_macs.next().unwrap(), @@ -1552,21 +1532,13 @@ mod tests { }) } - fn db_services( + fn db_nics( &self, - ) -> impl Iterator< - Item = (db::model::Service, db::model::IncompleteNetworkInterface), - > + '_ { - self.sled_ids.iter().zip(&self.nexuses).map(|(sled_id, nexus)| { - let service = db::model::Service::new( - nexus.id, - sled_id.into_untyped_uuid(), - Some(nexus.id), - "[::1]:0".parse().unwrap(), - db::model::ServiceKind::Nexus, - ); + ) -> impl Iterator + '_ + { + self.nexuses.iter().map(|nexus| { let name = format!("test-nexus-{}", nexus.id); - let nic = db::model::IncompleteNetworkInterface::new_service( + db::model::IncompleteNetworkInterface::new_service( nexus.nic_id, nexus.id, NEXUS_VPC_SUBNET.clone(), @@ -1578,17 +1550,16 @@ mod tests { nexus.mac, 0, ) - .expect("failed to create incomplete Nexus NIC"); - (service, nic) + .expect("failed to create incomplete Nexus NIC") }) } fn blueprint_zone_configs( &self, ) -> impl Iterator + '_ { - self.db_services().map(|(service, nic)| { + self.nexuses.iter().zip(self.db_nics()).map(|(nexus, nic)| { let config = OmicronZoneConfig { - id: service.id(), + id: nexus.id, underlay_address: "::1".parse().unwrap(), zone_type: OmicronZoneType::Nexus { internal_address: "[::1]:0".to_string(), @@ -1596,7 +1567,7 @@ mod tests { nic: NetworkInterface { id: nic.identity.id, kind: NetworkInterfaceKind::Service { - id: service.id(), + id: nexus.id, }, name: format!("test-nic-{}", nic.identity.id) .parse() @@ -1616,11 +1587,26 @@ mod tests { config, disposition: BlueprintZoneDisposition::InService, }; - (service.sled_id, zone_config) + (nexus.sled_id.into_untyped_uuid(), zone_config) }) } } + async fn assert_service_sled_ids( + datastore: &DataStore, + expected_sled_ids: &[SledUuid], + ) { + let mut service_sled_ids = datastore + .vpc_resolve_to_sleds(*SERVICES_VPC_ID, &[]) + .await + .expect("failed to resolve to sleds") + .into_iter() + .map(|sled| SledUuid::from_untyped_uuid(sled.id())) + .collect::>(); + service_sled_ids.sort(); + assert_eq!(expected_sled_ids, service_sled_ids); + } + #[tokio::test] async fn test_vpc_resolve_to_sleds_uses_current_target_blueprint() { // Test setup. @@ -1631,40 +1617,15 @@ mod tests { let mut db = test_setup_database(&logctx.log).await; let (opctx, datastore) = datastore_test(&logctx, &db).await; - // Helper function to fetch and sort the IDs of sleds we've resolved the - // SERVICES_VPC_ID to. - let fetch_service_sled_ids = || async { - let mut service_sled_ids = datastore - .vpc_resolve_to_sleds(*SERVICES_VPC_ID, &[]) - .await - .expect("failed to resolve to sleds") - .into_iter() - .map(|sled| SledUuid::from_untyped_uuid(sled.id())) - .collect::>(); - service_sled_ids.sort(); - service_sled_ids - }; - // Create five sleds. let harness = Harness::new(5); for sled in harness.db_sleds() { datastore.sled_upsert(sled).await.expect("failed to upsert sled"); } - // Insert two Nexus records into `service`, emulating RSS. - for (service, nic) in harness.db_services().take(2) { - datastore - .service_upsert(&opctx, service) - .await - .expect("failed to insert RSS-like service"); - datastore - .service_create_network_interface_raw(&opctx, nic) - .await - .expect("failed to insert Nexus NIC"); - } - - // Ensure we find the two sleds we expect after adding Nexus records. - assert_eq!(&harness.sled_ids[..2], fetch_service_sled_ids().await); + // We don't have a blueprint yet, so we shouldn't find any services on + // sleds. + assert_service_sled_ids(&datastore, &[]).await; // Create a blueprint that has a Nexus on our third sled. (This // blueprint is completely invalid in many ways, but all we care about @@ -1701,9 +1662,9 @@ mod tests { .await .expect("failed to insert blueprint"); - // We haven't set a blueprint target yet, so we should still only see - // the two RSS-inserted service-running sleds. - assert_eq!(&harness.sled_ids[..2], fetch_service_sled_ids().await); + // We haven't set a blueprint target yet, so we should still fail to see + // any services on sleds. + assert_service_sled_ids(&datastore, &[]).await; // Make bp1 the current target. datastore @@ -1719,21 +1680,21 @@ mod tests { .expect("failed to set blueprint target"); // bp1 is the target, but we haven't yet inserted a vNIC record, so - // we'll still only see the original 2 sleds. - assert_eq!(&harness.sled_ids[..2], fetch_service_sled_ids().await); + // we still won't see any services on sleds. + assert_service_sled_ids(&datastore, &[]).await; // Insert the relevant service NIC record (normally performed by the // reconfigurator's executor). datastore .service_create_network_interface_raw( &opctx, - harness.db_services().nth(2).unwrap().1, + harness.db_nics().nth(2).unwrap(), ) .await .expect("failed to insert service VNIC"); - // We should now see _three_ sleds running services. - assert_eq!(&harness.sled_ids[..3], fetch_service_sled_ids().await); + // We should now see our third sled running a service. + assert_service_sled_ids(&datastore, &[harness.sled_ids[2]]).await; // Create another blueprint with no services and make it the target. let bp2_id = Uuid::new_v4(); @@ -1765,20 +1726,19 @@ mod tests { .expect("failed to set blueprint target"); // We haven't removed the service NIC record, but we should no longer - // see the third sled here, because we should be back to just the - // original two services in the `service` table. - assert_eq!(&harness.sled_ids[..2], fetch_service_sled_ids().await); + // see the third sled here. We should be back to no sleds with services. + assert_service_sled_ids(&datastore, &[]).await; // Insert a service NIC record for our fourth sled's Nexus. This // shouldn't change our VPC resolution. datastore .service_create_network_interface_raw( &opctx, - harness.db_services().nth(3).unwrap().1, + harness.db_nics().nth(3).unwrap(), ) .await .expect("failed to insert service VNIC"); - assert_eq!(&harness.sled_ids[..2], fetch_service_sled_ids().await); + assert_service_sled_ids(&datastore, &[]).await; // Create a blueprint that has a Nexus on our fourth sled. This // shouldn't change our VPC resolution. @@ -1813,7 +1773,7 @@ mod tests { .blueprint_insert(&opctx, &bp3) .await .expect("failed to insert blueprint"); - assert_eq!(&harness.sled_ids[..2], fetch_service_sled_ids().await); + assert_service_sled_ids(&datastore, &[]).await; // Make this blueprint the target. We've already created the service // VNIC, so we should immediately see our fourth sled in VPC resolution. @@ -1828,11 +1788,7 @@ mod tests { ) .await .expect("failed to set blueprint target"); - assert_eq!( - &[harness.sled_ids[0], harness.sled_ids[1], harness.sled_ids[3]] - as &[SledUuid], - fetch_service_sled_ids().await - ); + assert_service_sled_ids(&datastore, &[harness.sled_ids[3]]).await; // --- @@ -1842,7 +1798,7 @@ mod tests { datastore .service_create_network_interface_raw( &opctx, - harness.db_services().nth(4).unwrap().1, + harness.db_nics().nth(4).unwrap(), ) .await .expect("failed to insert service VNIC"); @@ -1888,7 +1844,7 @@ mod tests { ) .await .expect("failed to set blueprint target"); - assert_eq!(harness.sled_ids, fetch_service_sled_ids().await); + assert_service_sled_ids(&datastore, &harness.sled_ids[2..]).await; // --- @@ -1904,8 +1860,7 @@ mod tests { .setup(&opctx, &datastore) .await .expect("failed to set up ineligible sleds"); - - assert_eq!(&harness.sled_ids[3..=4], fetch_service_sled_ids().await); + assert_service_sled_ids(&datastore, &harness.sled_ids[3..=4]).await; // --- @@ -1915,35 +1870,6 @@ mod tests { .await .expect("failed to undo ineligible sleds"); - // Clear out the service table entirely so we're only testing - // blueprints. (The services table is going to go away soon so this is - // an easy workaround for now.) - { - use db::schema::service::dsl; - - let conn = datastore - .pool_connection_authorized(&opctx) - .await - .expect("getting a connection succeeded"); - conn.transaction_async(|conn| async move { - // Need to do a full table scan for a full delete. - conn.batch_execute_async( - nexus_test_utils::db::ALLOW_FULL_TABLE_SCAN_SQL, - ) - .await - .expect("allowing full table scan succeeded"); - - diesel::delete(dsl::service) - .execute_async(&conn) - .await - .expect("failed to delete services"); - - Ok::<_, DieselError>(()) - }) - .await - .expect("transaction succeed"); - } - // Make a new blueprint marking one of the zones as quiesced and one as // expunged. Ensure that the sled with *quiesced* zone is returned by // vpc_resolve_to_sleds, but the sled with the *expunged* zone is not. @@ -1963,6 +1889,15 @@ mod tests { }, ); + // We never created a vNIC record for sled 1; do so now. + datastore + .service_create_network_interface_raw( + &opctx, + harness.db_nics().nth(1).unwrap(), + ) + .await + .expect("failed to insert service VNIC"); + // Sled index 2's zone is quiesced (should be included). let (sled_id, mut zone_config) = iter.next().unwrap(); zone_config.disposition = BlueprintZoneDisposition::Quiesced; @@ -2018,7 +1953,7 @@ mod tests { ) .await .expect("failed to set blueprint target"); - assert_eq!(&harness.sled_ids[1..=2], fetch_service_sled_ids().await); + assert_service_sled_ids(&datastore, &harness.sled_ids[1..=2]).await; db.cleanup().await.unwrap(); logctx.cleanup_successful(); diff --git a/nexus/reconfigurator/execution/src/dns.rs b/nexus/reconfigurator/execution/src/dns.rs index fe44d5c25a..5165dcf3ea 100644 --- a/nexus/reconfigurator/execution/src/dns.rs +++ b/nexus/reconfigurator/execution/src/dns.rs @@ -1198,8 +1198,7 @@ mod test { let (_blueprint_target, blueprint) = datastore .blueprint_target_get_current_full(&opctx) .await - .expect("failed to read current target blueprint") - .expect("no target blueprint set"); + .expect("failed to read current target blueprint"); eprintln!("blueprint: {}", blueprint.display()); // Now, execute the initial blueprint. diff --git a/nexus/src/app/background/blueprint_load.rs b/nexus/src/app/background/blueprint_load.rs index d9f6411721..9c13d8e70a 100644 --- a/nexus/src/app/background/blueprint_load.rs +++ b/nexus/src/app/background/blueprint_load.rs @@ -54,12 +54,15 @@ impl BackgroundTask for TargetBlueprintLoader { }; // Retrieve the latest target blueprint - let result = - self.datastore.blueprint_target_get_current_full(opctx).await; - - // Decide what to do with the result - match (&mut self.last, result) { - (_, Err(error)) => { + let (new_bp_target, new_blueprint) = match self + .datastore + .blueprint_target_get_current_full(opctx) + .await + { + Ok((new_bp_target, new_blueprint)) => { + (new_bp_target, new_blueprint) + } + Err(error) => { // We failed to read the blueprint. There's nothing to do // but log an error. We'll retry when we're activated again. let message = format!("{:#}", error); @@ -70,40 +73,84 @@ impl BackgroundTask for TargetBlueprintLoader { ); let e = format!("failed to read target blueprint: {message}"); - json!({"error": e}) - } - (None, Ok(None)) => { - // We haven't found a blueprint yet. Do nothing. - json!({"status": "no target blueprint"}) + return json!({"error": e}); } - (Some(old), Ok(None)) => { - // We have transitioned from having a blueprint to not - // having one. This should not happen. + }; + + // Decide what to do with the new blueprint + let Some((old_bp_target, old_blueprint)) = self.last.as_deref() + else { + // We've found a target blueprint for the first time. + // Save it and notify any watchers. + let target_id = new_blueprint.id; + let time_created = new_blueprint.time_created; + info!( + log, + "found new target blueprint (first find)"; + "target_id" => %target_id, + "time_created" => %time_created + ); + self.last = Some(Arc::new((new_bp_target, new_blueprint))); + self.tx.send_replace(self.last.clone()); + return json!({ + "target_id": target_id, + "time_created": time_created, + "time_found": chrono::Utc::now(), + "status": "first target blueprint", + }); + }; + + let target_id = new_blueprint.id; + let time_created = new_blueprint.time_created; + if old_blueprint.id != new_blueprint.id { + // The current target blueprint has been updated + info!( + log, + "found new target blueprint"; + "target_id" => %target_id, + "time_created" => %time_created + ); + self.last = Some(Arc::new((new_bp_target, new_blueprint))); + self.tx.send_replace(self.last.clone()); + json!({ + "target_id": target_id, + "time_created": time_created, + "time_found": chrono::Utc::now(), + "status": "target blueprint updated" + }) + } else { + // The new target id matches the old target id + // + // Let's see if the blueprints hold the same contents. + // It should not be possible for the contents of a + // blueprint to change, but we check to catch possible + // bugs further up the stack. + if *old_blueprint != new_blueprint { let message = format!( - "target blueprint with id {} was removed. There is no \ - longer any target blueprint", - old.1.id + "blueprint for id {} changed. \ + Blueprints are supposed to be immutable.", + target_id ); - let old_id = old.1.id; - self.last = None; - self.tx.send_replace(self.last.clone()); - error!(&log, "{message:?}"); + error!(&log, "{}", message); json!({ - "removed_target_id": old_id, - "status": "no target blueprint (removed)", + "target_id": target_id, + "status": "target blueprint unchanged (error)", "error": message }) - } - (None, Ok(Some((new_bp_target, new_blueprint)))) => { - // We've found a target blueprint for the first time. - // Save it and notify any watchers. - let target_id = new_blueprint.id; - let time_created = new_blueprint.time_created; + } else if old_bp_target.enabled != new_bp_target.enabled { + // The blueprints have the same contents, but its + // enabled bit has flipped. + let status = if new_bp_target.enabled { + "enabled" + } else { + "disabled" + }; info!( log, - "found new target blueprint (first find)"; + "target blueprint enabled state changed"; "target_id" => %target_id, - "time_created" => %time_created + "time_created" => %time_created, + "state" => status, ); self.last = Some(Arc::new((new_bp_target, new_blueprint))); self.tx.send_replace(self.last.clone()); @@ -111,89 +158,23 @@ impl BackgroundTask for TargetBlueprintLoader { "target_id": target_id, "time_created": time_created, "time_found": chrono::Utc::now(), - "status": "first target blueprint", + "status": format!("target blueprint {status}"), + }) + } else { + // We found a new target blueprint that exactly + // matches the old target blueprint. This is the + // common case when we're activated by a timeout. + debug!( + log, + "found latest target blueprint (unchanged)"; + "target_id" => %target_id, + "time_created" => %time_created.clone() + ); + json!({ + "target_id": target_id, + "time_created": time_created, + "status": "target blueprint unchanged" }) - } - (Some(old), Ok(Some((new_bp_target, new_blueprint)))) => { - let target_id = new_blueprint.id; - let time_created = new_blueprint.time_created; - if old.1.id != new_blueprint.id { - // The current target blueprint has been updated - info!( - log, - "found new target blueprint"; - "target_id" => %target_id, - "time_created" => %time_created - ); - self.last = - Some(Arc::new((new_bp_target, new_blueprint))); - self.tx.send_replace(self.last.clone()); - json!({ - "target_id": target_id, - "time_created": time_created, - "time_found": chrono::Utc::now(), - "status": "target blueprint updated" - }) - } else { - // The new target id matches the old target id - // - // Let's see if the blueprints hold the same contents. - // It should not be possible for the contents of a - // blueprint to change, but we check to catch possible - // bugs further up the stack. - if old.1 != new_blueprint { - let message = format!( - "blueprint for id {} changed. \ - Blueprints are supposed to be immutable.", - target_id - ); - error!(&log, "{}", message); - json!({ - "target_id": target_id, - "status": "target blueprint unchanged (error)", - "error": message - }) - } else if old.0.enabled != new_bp_target.enabled { - // The blueprints have the same contents, but its - // enabled bit has flipped. - let status = if new_bp_target.enabled { - "enabled" - } else { - "disabled" - }; - info!( - log, - "target blueprint enabled state changed"; - "target_id" => %target_id, - "time_created" => %time_created, - "state" => status, - ); - self.last = - Some(Arc::new((new_bp_target, new_blueprint))); - self.tx.send_replace(self.last.clone()); - json!({ - "target_id": target_id, - "time_created": time_created, - "time_found": chrono::Utc::now(), - "status": format!("target blueprint {status}"), - }) - } else { - // We found a new target blueprint that exactly - // matches the old target blueprint. This is the - // common case when we're activated by a timeout. - debug!( - log, - "found latest target blueprint (unchanged)"; - "target_id" => %target_id, - "time_created" => %time_created.clone() - ); - json!({ - "target_id": target_id, - "time_created": time_created, - "status": "target blueprint unchanged" - }) - } - } } } } diff --git a/nexus/src/app/deployment.rs b/nexus/src/app/deployment.rs index 0d8a6834ba..5f2d316efd 100644 --- a/nexus/src/app/deployment.rs +++ b/nexus/src/app/deployment.rs @@ -75,7 +75,7 @@ impl super::Nexus { pub async fn blueprint_target_view( &self, opctx: &OpContext, - ) -> Result, Error> { + ) -> Result { self.db_datastore.blueprint_target_get_current(opctx).await } @@ -238,13 +238,8 @@ impl super::Nexus { &self, opctx: &OpContext, ) -> CreateResult { - let maybe_target = + let (_, parent_blueprint) = self.db_datastore.blueprint_target_get_current_full(opctx).await?; - let Some((_, parent_blueprint)) = maybe_target else { - return Err(Error::conflict( - "cannot regenerate blueprint without existing target", - )); - }; let planning_context = self.blueprint_planning_context(opctx).await?; let inventory = planning_context.inventory.ok_or_else(|| { diff --git a/nexus/src/app/rack.rs b/nexus/src/app/rack.rs index 5b85acb929..29feeb6181 100644 --- a/nexus/src/app/rack.rs +++ b/nexus/src/app/rack.rs @@ -20,6 +20,7 @@ use nexus_db_queries::db::datastore::DnsVersionUpdateBuilder; use nexus_db_queries::db::datastore::RackInit; use nexus_db_queries::db::lookup::LookupPath; use nexus_reconfigurator_execution::silo_dns_name; +use nexus_types::deployment::BlueprintZoneFilter; use nexus_types::external_api::params::Address; use nexus_types::external_api::params::AddressConfig; use nexus_types::external_api::params::AddressLotBlockCreate; @@ -191,15 +192,15 @@ impl super::Nexus { let silo_name = &request.recovery_silo.silo_name; let dns_records = request - .services - .iter() - .filter_map(|s| match &s.kind { - nexus_types::internal_api::params::ServiceKind::Nexus { - external_address, + .blueprint + .all_omicron_zones(BlueprintZoneFilter::ShouldBeExternallyReachable) + .filter_map(|(_, zc)| match zc.zone_type { + nexus_types::deployment::OmicronZoneType::Nexus { + external_ip, .. - } => Some(match external_address { - IpAddr::V4(addr) => DnsRecord::A(*addr), - IpAddr::V6(addr) => DnsRecord::Aaaa(*addr), + } => Some(match external_ip { + IpAddr::V4(addr) => DnsRecord::A(addr), + IpAddr::V6(addr) => DnsRecord::Aaaa(addr), }), _ => None, }) @@ -613,7 +614,6 @@ impl super::Nexus { rack_subnet: rack_network_config.rack_subnet.into(), rack_id, blueprint, - services: request.services, physical_disks, zpools, datasets, diff --git a/nexus/src/app/silo.rs b/nexus/src/app/silo.rs index d07dc7013a..efde55cbd1 100644 --- a/nexus/src/app/silo.rs +++ b/nexus/src/app/silo.rs @@ -101,18 +101,12 @@ impl super::Nexus { .dns_zones_list_all(nexus_opctx, DnsGroup::External) .await .internal_context("listing external DNS zones")?; - let target_blueprint = datastore + let (_, target_blueprint) = datastore .blueprint_target_get_current_full(opctx) .await .internal_context("loading target blueprint")?; - let nexus_external_ips = match target_blueprint { - Some((_, blueprint)) => blueprint_nexus_external_ips(&blueprint), - None => { - datastore - .nexus_external_addresses_from_service_table(nexus_opctx) - .await? - } - }; + let nexus_external_ips = + blueprint_nexus_external_ips(&target_blueprint); let dns_records: Vec = nexus_external_ips .into_iter() .map(|addr| match addr { diff --git a/nexus/src/internal_api/http_entrypoints.rs b/nexus/src/internal_api/http_entrypoints.rs index 401220431a..35ec5167f9 100644 --- a/nexus/src/internal_api/http_entrypoints.rs +++ b/nexus/src/internal_api/http_entrypoints.rs @@ -40,7 +40,6 @@ use omicron_common::api::external::http_pagination::data_page_params_for; use omicron_common::api::external::http_pagination::PaginatedById; use omicron_common::api::external::http_pagination::ScanById; use omicron_common::api::external::http_pagination::ScanParams; -use omicron_common::api::external::Error; use omicron_common::api::internal::nexus::DiskRuntimeState; use omicron_common::api::internal::nexus::DownstairsClientStopRequest; use omicron_common::api::internal::nexus::DownstairsClientStopped; @@ -909,10 +908,7 @@ async fn blueprint_target_view( let handler = async { let opctx = crate::context::op_context_for_internal_api(&rqctx).await; let nexus = &apictx.nexus; - let target = - nexus.blueprint_target_view(&opctx).await?.ok_or_else(|| { - Error::conflict("no target blueprint has been configured") - })?; + let target = nexus.blueprint_target_view(&opctx).await?; Ok(HttpResponseOk(target)) }; apictx.internal_latencies.instrument_dropshot_handler(&rqctx, handler).await diff --git a/nexus/src/lib.rs b/nexus/src/lib.rs index 80c972363f..6c4fe4e91e 100644 --- a/nexus/src/lib.rs +++ b/nexus/src/lib.rs @@ -28,9 +28,11 @@ use external_api::http_entrypoints::external_api; use internal_api::http_entrypoints::internal_api; use nexus_config::NexusConfig; use nexus_types::deployment::Blueprint; +use nexus_types::deployment::BlueprintZoneFilter; +use nexus_types::deployment::OmicronZoneType; use nexus_types::external_api::views::SledProvisionPolicy; use nexus_types::internal_api::params::{ - PhysicalDiskPutRequest, ServiceKind, ZpoolPutRequest, + PhysicalDiskPutRequest, ZpoolPutRequest, }; use nexus_types::inventory::Collection; use omicron_common::address::IpRange; @@ -238,7 +240,6 @@ impl nexus_test_interface::NexusServer for Server { internal_server: InternalServer, config: &NexusConfig, blueprint: Blueprint, - services: Vec, physical_disks: Vec< nexus_types::internal_api::params::PhysicalDiskPutRequest, >, @@ -268,12 +269,19 @@ impl nexus_test_interface::NexusServer for Server { // it's 127.0.0.1, having come straight from the stock testing config // file. Whatever it is, we fake up an IP pool range for use by system // services that includes solely this IP. - let internal_services_ip_pool_ranges = services - .iter() - .filter_map(|s| match s.kind { - ServiceKind::ExternalDns { external_address, .. } - | ServiceKind::Nexus { external_address, .. } => { - Some(IpRange::from(external_address)) + let internal_services_ip_pool_ranges = blueprint + .all_omicron_zones(BlueprintZoneFilter::ShouldBeExternallyReachable) + .filter_map(|(_, zc)| match &zc.zone_type { + OmicronZoneType::ExternalDns { dns_address, .. } => { + // Work around + // https://github.com/oxidecomputer/omicron/issues/4988 + let dns_address: SocketAddr = dns_address + .parse() + .expect("invalid DNS socket address"); + Some(IpRange::from(dns_address.ip())) + } + OmicronZoneType::Nexus { external_ip, .. } => { + Some(IpRange::from(*external_ip)) } _ => None, }) @@ -287,7 +295,6 @@ impl nexus_test_interface::NexusServer for Server { config.deployment.rack_id, internal_api::params::RackInitializationRequest { blueprint, - services, physical_disks, zpools, datasets, diff --git a/nexus/test-interface/src/lib.rs b/nexus/test-interface/src/lib.rs index 54478c0876..2c7f0989ea 100644 --- a/nexus/test-interface/src/lib.rs +++ b/nexus/test-interface/src/lib.rs @@ -57,7 +57,6 @@ pub trait NexusServer: Send + Sync + 'static { internal_server: Self::InternalServer, config: &NexusConfig, blueprint: Blueprint, - services: Vec, physical_disks: Vec, zpools: Vec, datasets: Vec, diff --git a/nexus/test-utils/src/lib.rs b/nexus/test-utils/src/lib.rs index 6392c729ce..42f1f12546 100644 --- a/nexus/test-utils/src/lib.rs +++ b/nexus/test-utils/src/lib.rs @@ -35,9 +35,6 @@ use nexus_types::internal_api::params::DatasetCreateRequest; use nexus_types::internal_api::params::DatasetKind; use nexus_types::internal_api::params::DatasetPutRequest; use nexus_types::internal_api::params::RecoverySiloConfig; -use nexus_types::internal_api::params::ServiceKind; -use nexus_types::internal_api::params::ServiceNic; -use nexus_types::internal_api::params::ServicePutRequest; use nexus_types::inventory::OmicronZoneConfig; use nexus_types::inventory::OmicronZoneDataset; use nexus_types::inventory::OmicronZoneType; @@ -185,7 +182,6 @@ pub async fn test_setup( } struct RackInitRequestBuilder { - services: Vec, datasets: Vec, internal_dns_config: internal_dns::DnsConfigBuilder, mac_addrs: Box + Send>, @@ -194,31 +190,18 @@ struct RackInitRequestBuilder { impl RackInitRequestBuilder { fn new() -> Self { Self { - services: vec![], datasets: vec![], internal_dns_config: internal_dns::DnsConfigBuilder::new(), mac_addrs: Box::new(MacAddr::iter_system()), } } - // Keeps track of: - // - The "ServicePutRequest" (for handoff to Nexus) - // - The internal DNS configuration for this service - fn add_service_with_id( + fn add_service_to_dns( &mut self, zone_id: Uuid, address: SocketAddrV6, - kind: ServiceKind, service_name: internal_dns::ServiceName, - sled_id: Uuid, ) { - self.services.push(ServicePutRequest { - address, - kind, - service_id: zone_id, - sled_id, - zone_id: Some(zone_id), - }); let zone = self .internal_dns_config .host_zone( @@ -232,22 +215,6 @@ impl RackInitRequestBuilder { .expect("Failed to set up DNS for {kind}"); } - fn add_service_without_dns( - &mut self, - zone_id: Uuid, - address: SocketAddrV6, - kind: ServiceKind, - sled_id: Uuid, - ) { - self.services.push(ServicePutRequest { - address, - kind, - service_id: zone_id, - sled_id, - zone_id: Some(zone_id), - }); - } - // Keeps track of: // - The "DatasetPutRequest" (for handoff to Nexus) // - The internal DNS configuration for this service @@ -539,19 +506,6 @@ impl<'a, N: NexusServer> ControlPlaneTestContextBuilder<'a, N> { // NOTE: If dendrite is started after Nexus, this is ignored. let config = DpdConfig { address: std::net::SocketAddr::V6(address) }; self.config.pkg.dendrite.insert(switch_location, config); - - let sled_id = Uuid::parse_str(match switch_location { - SwitchLocation::Switch0 => SLED_AGENT_UUID, - SwitchLocation::Switch1 => SLED_AGENT2_UUID, - }) - .unwrap(); - - self.rack_init_builder.add_service_without_dns( - sled_id, - address, - ServiceKind::Dendrite, - sled_id, - ); } pub async fn start_mgd(&mut self, switch_location: SwitchLocation) { @@ -568,19 +522,6 @@ impl<'a, N: NexusServer> ControlPlaneTestContextBuilder<'a, N> { let config = MgdConfig { address: std::net::SocketAddr::V6(address) }; self.config.pkg.mgd.insert(switch_location, config); - - let sled_id = Uuid::parse_str(match switch_location { - SwitchLocation::Switch0 => SLED_AGENT_UUID, - SwitchLocation::Switch1 => SLED_AGENT2_UUID, - }) - .unwrap(); - - self.rack_init_builder.add_service_without_dns( - sled_id, - address, - ServiceKind::Mgd, - sled_id, - ); } pub async fn record_switch_dns(&mut self) { @@ -689,7 +630,6 @@ impl<'a, N: NexusServer> ControlPlaneTestContextBuilder<'a, N> { 0, ); - let sled_id = Uuid::parse_str(SLED_AGENT_UUID).unwrap(); let mac = self .rack_init_builder .mac_addrs @@ -698,24 +638,10 @@ impl<'a, N: NexusServer> ControlPlaneTestContextBuilder<'a, N> { let external_address = self.config.deployment.dropshot_external.dropshot.bind_address.ip(); let nexus_id = self.config.deployment.id; - self.rack_init_builder.add_service_with_id( + self.rack_init_builder.add_service_to_dns( nexus_id, address, - ServiceKind::Nexus { - external_address, - nic: ServiceNic { - id: Uuid::new_v4(), - name: "nexus".parse().unwrap(), - ip: NEXUS_OPTE_IPV4_SUBNET - .nth(NUM_INITIAL_RESERVED_IP_ADDRESSES as u32 + 1) - .unwrap() - .into(), - mac, - slot: 0, - }, - }, internal_dns::ServiceName::Nexus, - sled_id, ); self.omicron_zones.push(OmicronZoneConfig { @@ -732,7 +658,10 @@ impl<'a, N: NexusServer> ControlPlaneTestContextBuilder<'a, N> { internal_address: address.to_string(), nic: NetworkInterface { id: Uuid::new_v4(), - ip: external_address, + ip: NEXUS_OPTE_IPV4_SUBNET + .nth(NUM_INITIAL_RESERVED_IP_ADDRESSES as u32 + 1) + .unwrap() + .into(), kind: NetworkInterfaceKind::Service { id: nexus_id }, mac, name: format!("nexus-{}", nexus_id).parse().unwrap(), @@ -864,7 +793,6 @@ impl<'a, N: NexusServer> ControlPlaneTestContextBuilder<'a, N> { .expect("Must launch internal nexus first"), self.config, blueprint, - self.rack_init_builder.services.clone(), // NOTE: We should probably hand off // "self.rack_init_builder.datasets" here, but Nexus won't be happy // if we pass it right now: @@ -998,14 +926,11 @@ impl<'a, N: NexusServer> ControlPlaneTestContextBuilder<'a, N> { panic!("Expected IPv6 Pantry Address"); }; - let sled_id = Uuid::parse_str(SLED_AGENT_UUID).unwrap(); let zone_id = Uuid::new_v4(); - self.rack_init_builder.add_service_with_id( + self.rack_init_builder.add_service_to_dns( zone_id, address, - ServiceKind::CruciblePantry, internal_dns::ServiceName::CruciblePantry, - sled_id, ); self.omicron_zones.push(OmicronZoneConfig { id: zone_id, @@ -1019,7 +944,6 @@ impl<'a, N: NexusServer> ControlPlaneTestContextBuilder<'a, N> { // Set up an external DNS server. pub async fn start_external_dns(&mut self) { let log = self.logctx.log.new(o!("component" => "external_dns_server")); - let sled_id = Uuid::parse_str(SLED_AGENT_UUID).unwrap(); let dns = dns_server::TransientServer::new(&log).await.unwrap(); @@ -1037,24 +961,10 @@ impl<'a, N: NexusServer> ControlPlaneTestContextBuilder<'a, N> { .next() .expect("ran out of MAC addresses"); let zone_id = Uuid::new_v4(); - self.rack_init_builder.add_service_with_id( + self.rack_init_builder.add_service_to_dns( zone_id, dropshot_address, - ServiceKind::ExternalDns { - external_address: (*dns_address.ip()).into(), - nic: ServiceNic { - id: Uuid::new_v4(), - name: "external-dns".parse().unwrap(), - ip: DNS_OPTE_IPV4_SUBNET - .nth(NUM_INITIAL_RESERVED_IP_ADDRESSES as u32 + 1) - .unwrap() - .into(), - mac, - slot: 0, - }, - }, internal_dns::ServiceName::ExternalDns, - sled_id, ); let zpool_id = ZpoolUuid::new_v4(); @@ -1071,7 +981,10 @@ impl<'a, N: NexusServer> ControlPlaneTestContextBuilder<'a, N> { http_address: dropshot_address.to_string(), nic: NetworkInterface { id: Uuid::new_v4(), - ip: (*dns_address.ip()).into(), + ip: DNS_OPTE_IPV4_SUBNET + .nth(NUM_INITIAL_RESERVED_IP_ADDRESSES as u32 + 1) + .unwrap() + .into(), kind: NetworkInterfaceKind::Service { id: zone_id }, mac, name: format!("external-dns-{}", zone_id).parse().unwrap(), @@ -1089,19 +1002,16 @@ impl<'a, N: NexusServer> ControlPlaneTestContextBuilder<'a, N> { // Set up an internal DNS server. pub async fn start_internal_dns(&mut self) { let log = self.logctx.log.new(o!("component" => "internal_dns_server")); - let sled_id = Uuid::parse_str(SLED_AGENT_UUID).unwrap(); let dns = dns_server::TransientServer::new(&log).await.unwrap(); let SocketAddr::V6(address) = dns.dropshot_server.local_addr() else { panic!("Unsupported IPv4 DNS address"); }; let zone_id = Uuid::new_v4(); - self.rack_init_builder.add_service_with_id( + self.rack_init_builder.add_service_to_dns( zone_id, address, - ServiceKind::InternalDns, internal_dns::ServiceName::InternalDns, - sled_id, ); let zpool_id = ZpoolUuid::new_v4(); diff --git a/nexus/types/src/internal_api/params.rs b/nexus/types/src/internal_api/params.rs index a811106c2c..15ab154952 100644 --- a/nexus/types/src/internal_api/params.rs +++ b/nexus/types/src/internal_api/params.rs @@ -193,20 +193,6 @@ impl fmt::Display for ServiceKind { } } -/// Describes a service on a sled -#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] -pub struct ServicePutRequest { - pub service_id: Uuid, - pub sled_id: Uuid, - pub zone_id: Option, - - /// Address on which a service is responding to requests. - pub address: SocketAddrV6, - - /// Type of service being inserted. - pub kind: ServiceKind, -} - #[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] pub struct DatasetCreateRequest { pub zpool_id: Uuid, @@ -233,8 +219,6 @@ impl std::fmt::Debug for Certificate { pub struct RackInitializationRequest { /// Blueprint describing services initialized by RSS. pub blueprint: Blueprint, - /// Services on the rack which have been created by RSS. - pub services: Vec, /// "Managed" physical disks owned by the control plane pub physical_disks: Vec, diff --git a/openapi/nexus-internal.json b/openapi/nexus-internal.json index c4eb89f0f9..ca3eeb0f2d 100644 --- a/openapi/nexus-internal.json +++ b/openapi/nexus-internal.json @@ -6463,13 +6463,6 @@ } ] }, - "services": { - "description": "Services on the rack which have been created by RSS.", - "type": "array", - "items": { - "$ref": "#/components/schemas/ServicePutRequest" - } - }, "zpools": { "description": "Zpools created within the physical disks created by the control plane.", "type": "array", @@ -6489,7 +6482,6 @@ "physical_disks", "rack_network_config", "recovery_silo", - "services", "zpools" ] }, @@ -6912,326 +6904,6 @@ "timeseries_name" ] }, - "ServiceKind": { - "description": "Describes the purpose of the service.", - "oneOf": [ - { - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": [ - "clickhouse" - ] - } - }, - "required": [ - "type" - ] - }, - { - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": [ - "clickhouse_keeper" - ] - } - }, - "required": [ - "type" - ] - }, - { - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": [ - "cockroach" - ] - } - }, - "required": [ - "type" - ] - }, - { - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": [ - "crucible" - ] - } - }, - "required": [ - "type" - ] - }, - { - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": [ - "crucible_pantry" - ] - } - }, - "required": [ - "type" - ] - }, - { - "type": "object", - "properties": { - "content": { - "type": "object", - "properties": { - "external_address": { - "type": "string", - "format": "ip" - }, - "nic": { - "$ref": "#/components/schemas/ServiceNic" - } - }, - "required": [ - "external_address", - "nic" - ] - }, - "type": { - "type": "string", - "enum": [ - "external_dns" - ] - } - }, - "required": [ - "content", - "type" - ] - }, - { - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": [ - "internal_dns" - ] - } - }, - "required": [ - "type" - ] - }, - { - "type": "object", - "properties": { - "content": { - "type": "object", - "properties": { - "external_address": { - "type": "string", - "format": "ip" - }, - "nic": { - "$ref": "#/components/schemas/ServiceNic" - } - }, - "required": [ - "external_address", - "nic" - ] - }, - "type": { - "type": "string", - "enum": [ - "nexus" - ] - } - }, - "required": [ - "content", - "type" - ] - }, - { - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": [ - "oximeter" - ] - } - }, - "required": [ - "type" - ] - }, - { - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": [ - "dendrite" - ] - } - }, - "required": [ - "type" - ] - }, - { - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": [ - "tfport" - ] - } - }, - "required": [ - "type" - ] - }, - { - "type": "object", - "properties": { - "content": { - "type": "object", - "properties": { - "nic": { - "$ref": "#/components/schemas/ServiceNic" - }, - "snat": { - "$ref": "#/components/schemas/SourceNatConfig" - } - }, - "required": [ - "nic", - "snat" - ] - }, - "type": { - "type": "string", - "enum": [ - "boundary_ntp" - ] - } - }, - "required": [ - "content", - "type" - ] - }, - { - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": [ - "internal_ntp" - ] - } - }, - "required": [ - "type" - ] - }, - { - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": [ - "mgd" - ] - } - }, - "required": [ - "type" - ] - } - ] - }, - "ServiceNic": { - "description": "Describes the RSS allocated values for a service vnic", - "type": "object", - "properties": { - "id": { - "type": "string", - "format": "uuid" - }, - "ip": { - "type": "string", - "format": "ip" - }, - "mac": { - "$ref": "#/components/schemas/MacAddr" - }, - "name": { - "$ref": "#/components/schemas/Name" - }, - "slot": { - "type": "integer", - "format": "uint8", - "minimum": 0 - } - }, - "required": [ - "id", - "ip", - "mac", - "name", - "slot" - ] - }, - "ServicePutRequest": { - "description": "Describes a service on a sled", - "type": "object", - "properties": { - "address": { - "description": "Address on which a service is responding to requests.", - "type": "string" - }, - "kind": { - "description": "Type of service being inserted.", - "allOf": [ - { - "$ref": "#/components/schemas/ServiceKind" - } - ] - }, - "service_id": { - "type": "string", - "format": "uuid" - }, - "sled_id": { - "type": "string", - "format": "uuid" - }, - "zone_id": { - "nullable": true, - "type": "string", - "format": "uuid" - } - }, - "required": [ - "address", - "kind", - "service_id", - "sled_id" - ] - }, "SledAgentInfo": { "description": "Sent by a sled agent to Nexus to inform about resources", "type": "object", diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index ca9eea49b3..545150d5fa 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -164,30 +164,21 @@ CREATE UNIQUE INDEX IF NOT EXISTS lookup_sled_by_rack ON omicron.public.sled ( ) WHERE time_deleted IS NULL; CREATE TYPE IF NOT EXISTS omicron.public.sled_resource_kind AS ENUM ( - -- omicron.public.dataset - 'dataset', - -- omicron.public.service - 'service', -- omicron.public.instance - 'instance', - -- omicron.public.sled - -- - -- reserved as an approximation of sled internal usage, such as "by the OS - -- and all unaccounted services". - 'reserved' + 'instance' + -- We expect to other resource kinds here in the future; e.g., to track + -- resources used by control plane services. For now, we only track + -- instances. ); -- Accounting for programs using resources on a sled CREATE TABLE IF NOT EXISTS omicron.public.sled_resource ( - -- Should match the UUID of the corresponding service + -- Should match the UUID of the corresponding resource id UUID PRIMARY KEY, -- The sled where resources are being consumed sled_id UUID NOT NULL, - -- Identifies the type of the resource - kind omicron.public.sled_resource_kind NOT NULL, - -- The maximum number of hardware threads usable by this resource hardware_threads INT8 NOT NULL, @@ -195,7 +186,10 @@ CREATE TABLE IF NOT EXISTS omicron.public.sled_resource ( rss_ram INT8 NOT NULL, -- The maximum amount of Reservoir RAM provisioned to this resource - reservoir_ram INT8 NOT NULL + reservoir_ram INT8 NOT NULL, + + -- Identifies the type of the resource + kind omicron.public.sled_resource_kind NOT NULL ); -- Allow looking up all resources which reside on a sled @@ -296,36 +290,6 @@ CREATE TYPE IF NOT EXISTS omicron.public.service_kind AS ENUM ( 'mgd' ); -CREATE TABLE IF NOT EXISTS omicron.public.service ( - /* Identity metadata (asset) */ - id UUID PRIMARY KEY, - time_created TIMESTAMPTZ NOT NULL, - time_modified TIMESTAMPTZ NOT NULL, - - /* FK into the Sled table */ - sled_id UUID NOT NULL, - /* For services in illumos zones, the zone's unique id (for debugging) */ - zone_id UUID, - /* The IP address of the service. */ - ip INET NOT NULL, - /* The UDP or TCP port on which the service listens. */ - port INT4 CHECK (port BETWEEN 0 AND 65535) NOT NULL, - /* Indicates the type of service. */ - kind omicron.public.service_kind NOT NULL -); - -/* Add an index which lets us look up the services on a sled */ -CREATE UNIQUE INDEX IF NOT EXISTS lookup_service_by_sled ON omicron.public.service ( - sled_id, - id -); - -/* Look up (and paginate) services of a given kind. */ -CREATE UNIQUE INDEX IF NOT EXISTS lookup_service_by_kind ON omicron.public.service ( - kind, - id -); - CREATE TYPE IF NOT EXISTS omicron.public.physical_disk_kind AS ENUM ( 'm2', 'u2' @@ -1300,7 +1264,9 @@ CREATE TABLE IF NOT EXISTS omicron.public.oximeter ( CREATE TYPE IF NOT EXISTS omicron.public.producer_kind AS ENUM ( -- A sled agent for an entry in the sled table. 'sled_agent', - -- A service in the omicron.public.service table + -- A service in a blueprint (typically the current target blueprint, but it + -- may reference a prior blueprint if the service is in the process of being + -- removed). 'service', -- A Propolis VMM for an instance in the omicron.public.instance table 'instance' @@ -3790,7 +3756,7 @@ INSERT INTO omicron.public.db_metadata ( version, target_version ) VALUES - ( TRUE, NOW(), NOW(), '52.0.0', NULL) + ( TRUE, NOW(), NOW(), '53.0.0', NULL) ON CONFLICT DO NOTHING; COMMIT; diff --git a/schema/crdb/drop-service-table/up1.sql b/schema/crdb/drop-service-table/up1.sql new file mode 100644 index 0000000000..dfb402ba4d --- /dev/null +++ b/schema/crdb/drop-service-table/up1.sql @@ -0,0 +1,17 @@ +-- Ensure there are no `sled_resource` rows with a `kind` other than 'instance' + +-- This is a full table scan, but the sled_resource table does not track +-- historical, deleted resources, so is at most the size of the number of +-- currently-running instances (which should be zero during a schema update). +SET + LOCAL disallow_full_table_scans = OFF; + +WITH count_non_instance_resources AS ( + SELECT COUNT(*) AS num + FROM omicron.public.sled_resource + WHERE kind != 'instance' +) +SELECT CAST( + IF(num = 0, 'true', 'sled_resource contains non-instance rows') + AS bool +) FROM count_non_instance_resources; diff --git a/schema/crdb/drop-service-table/up2.sql b/schema/crdb/drop-service-table/up2.sql new file mode 100644 index 0000000000..3d723a4876 --- /dev/null +++ b/schema/crdb/drop-service-table/up2.sql @@ -0,0 +1 @@ +DROP TABLE IF EXISTS omicron.public.service; diff --git a/schema/crdb/drop-service-table/up3.sql b/schema/crdb/drop-service-table/up3.sql new file mode 100644 index 0000000000..8b546821a7 --- /dev/null +++ b/schema/crdb/drop-service-table/up3.sql @@ -0,0 +1,3 @@ +-- We are dropping `kind` so that we can drop the `sled_resource` kind; we'll +-- then recreate it (with some variants removed) and add this column back. +ALTER TABLE omicron.public.sled_resource DROP COLUMN IF EXISTS kind; diff --git a/schema/crdb/drop-service-table/up4.sql b/schema/crdb/drop-service-table/up4.sql new file mode 100644 index 0000000000..bbf5a605a4 --- /dev/null +++ b/schema/crdb/drop-service-table/up4.sql @@ -0,0 +1 @@ +DROP TYPE IF EXISTS omicron.public.sled_resource_kind; diff --git a/schema/crdb/drop-service-table/up5.sql b/schema/crdb/drop-service-table/up5.sql new file mode 100644 index 0000000000..9903bb28df --- /dev/null +++ b/schema/crdb/drop-service-table/up5.sql @@ -0,0 +1,3 @@ +CREATE TYPE IF NOT EXISTS omicron.public.sled_resource_kind AS ENUM ( + 'instance' +); diff --git a/schema/crdb/drop-service-table/up6.sql b/schema/crdb/drop-service-table/up6.sql new file mode 100644 index 0000000000..28241f96da --- /dev/null +++ b/schema/crdb/drop-service-table/up6.sql @@ -0,0 +1,5 @@ +ALTER TABLE omicron.public.sled_resource + ADD COLUMN IF NOT EXISTS + kind omicron.public.sled_resource_kind + NOT NULL + DEFAULT 'instance'; diff --git a/schema/crdb/drop-service-table/up7.sql b/schema/crdb/drop-service-table/up7.sql new file mode 100644 index 0000000000..1eeea65813 --- /dev/null +++ b/schema/crdb/drop-service-table/up7.sql @@ -0,0 +1 @@ +ALTER TABLE omicron.public.sled_resource ALTER COLUMN kind DROP DEFAULT; diff --git a/sled-agent/src/params.rs b/sled-agent/src/params.rs index 627fb11aa0..1393934031 100644 --- a/sled-agent/src/params.rs +++ b/sled-agent/src/params.rs @@ -376,152 +376,6 @@ impl OmicronZoneConfig { Some(self.id), ) } - - /// Returns the structure that describes this zone to Nexus during rack - /// initialization - pub fn to_nexus_service_req( - &self, - sled_id: Uuid, - ) -> nexus_client::types::ServicePutRequest { - use nexus_client::types as NexusTypes; - - let service_id = self.id; - let zone_id = Some(self.id); - match &self.zone_type { - OmicronZoneType::Nexus { - external_ip, - internal_address, - nic, - .. - } => NexusTypes::ServicePutRequest { - service_id, - zone_id, - sled_id, - address: internal_address.to_string(), - kind: NexusTypes::ServiceKind::Nexus { - external_address: *external_ip, - nic: NexusTypes::ServiceNic { - id: nic.id, - name: nic.name.clone(), - ip: nic.ip, - mac: nic.mac, - slot: nic.slot, - }, - }, - }, - OmicronZoneType::ExternalDns { - http_address, - dns_address, - nic, - .. - } => NexusTypes::ServicePutRequest { - service_id, - zone_id, - sled_id, - address: http_address.to_string(), - kind: NexusTypes::ServiceKind::ExternalDns { - external_address: dns_address.ip(), - nic: NexusTypes::ServiceNic { - id: nic.id, - name: nic.name.clone(), - ip: nic.ip, - mac: nic.mac, - slot: nic.slot, - }, - }, - }, - OmicronZoneType::InternalDns { http_address, .. } => { - NexusTypes::ServicePutRequest { - service_id, - zone_id, - sled_id, - address: http_address.to_string(), - kind: NexusTypes::ServiceKind::InternalDns, - } - } - OmicronZoneType::Oximeter { address } => { - NexusTypes::ServicePutRequest { - service_id, - zone_id, - sled_id, - address: address.to_string(), - kind: NexusTypes::ServiceKind::Oximeter, - } - } - OmicronZoneType::CruciblePantry { address } => { - NexusTypes::ServicePutRequest { - service_id, - zone_id, - sled_id, - address: address.to_string(), - kind: NexusTypes::ServiceKind::CruciblePantry, - } - } - OmicronZoneType::BoundaryNtp { address, snat_cfg, nic, .. } => { - NexusTypes::ServicePutRequest { - service_id, - zone_id, - sled_id, - address: address.to_string(), - kind: NexusTypes::ServiceKind::BoundaryNtp { - snat: snat_cfg.into(), - nic: NexusTypes::ServiceNic { - id: nic.id, - name: nic.name.clone(), - ip: nic.ip, - mac: nic.mac, - slot: nic.slot, - }, - }, - } - } - OmicronZoneType::InternalNtp { address, .. } => { - NexusTypes::ServicePutRequest { - service_id, - zone_id, - sled_id, - address: address.to_string(), - kind: NexusTypes::ServiceKind::InternalNtp, - } - } - OmicronZoneType::Clickhouse { address, .. } => { - NexusTypes::ServicePutRequest { - service_id, - zone_id, - sled_id, - address: address.to_string(), - kind: NexusTypes::ServiceKind::Clickhouse, - } - } - OmicronZoneType::ClickhouseKeeper { address, .. } => { - NexusTypes::ServicePutRequest { - service_id, - zone_id, - sled_id, - address: address.to_string(), - kind: NexusTypes::ServiceKind::ClickhouseKeeper, - } - } - OmicronZoneType::Crucible { address, .. } => { - NexusTypes::ServicePutRequest { - service_id, - zone_id, - sled_id, - address: address.to_string(), - kind: NexusTypes::ServiceKind::Crucible, - } - } - OmicronZoneType::CockroachDb { address, .. } => { - NexusTypes::ServicePutRequest { - service_id, - zone_id, - sled_id, - address: address.to_string(), - kind: NexusTypes::ServiceKind::Cockroach, - } - } - } - } } /// Describes a persistent ZFS dataset associated with an Omicron zone diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index ce5cb3fa2d..076ccbd44c 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -678,11 +678,16 @@ impl ServiceInner { ) -> Result<(), SetupServiceError> { info!(self.log, "Handing off control to Nexus"); - // Build a Blueprint describing our service plan. This should never - // fail, unless we've set up an invalid plan. - let blueprint = - build_initial_blueprint_from_plan(sled_plan, service_plan) + // Remap our plan into an easier-to-use type... + let sled_configs_by_id = + build_sled_configs_by_id(sled_plan, service_plan) .map_err(SetupServiceError::ConvertPlanToBlueprint)?; + // ... and use that to derive the initial blueprint from our plan. + let blueprint = build_initial_blueprint_from_plan( + &sled_configs_by_id, + service_plan, + ) + .map_err(SetupServiceError::ConvertPlanToBlueprint)?; info!(self.log, "Nexus address: {}", nexus_address.to_string()); @@ -699,30 +704,10 @@ impl ServiceInner { self.log.new(o!("component" => "NexusClient")), ); - // Ensure we can quickly look up "Sled Agent Address" -> "UUID of sled". - // - // We need the ID when passing info to Nexus. - let mut id_map = HashMap::new(); - for (_, sled_request) in sled_plan.sleds.iter() { - id_map.insert( - get_sled_address(sled_request.body.subnet), - sled_request.body.id, - ); - } - - // Convert all the information we have about services and datasets into - // a format which can be processed by Nexus. - let mut services: Vec = vec![]; + // Convert all the information we have about datasets into a format + // which can be processed by Nexus. let mut datasets: Vec = vec![]; - for (addr, sled_config) in service_plan.services.iter() { - let sled_id = *id_map - .get(addr) - .expect("Sled address in service plan, but not sled plan"); - - for zone in &sled_config.zones { - services.push(zone.to_nexus_service_req(sled_id)); - } - + for sled_config in service_plan.services.values() { for zone in &sled_config.zones { if let Some((dataset_name, dataset_address)) = zone.dataset_name_and_address() @@ -817,11 +802,9 @@ impl ServiceInner { info!(self.log, "rack_network_config: {:#?}", rack_network_config); - let physical_disks: Vec<_> = service_plan - .services + let physical_disks: Vec<_> = sled_configs_by_id .iter() - .flat_map(|(addr, config)| { - let sled_id = id_map.get(addr).expect("Missing sled"); + .flat_map(|(sled_id, config)| { config.disks.disks.iter().map(|config| { NexusTypes::PhysicalDiskPutRequest { id: config.id, @@ -835,11 +818,9 @@ impl ServiceInner { }) .collect(); - let zpools = service_plan - .services + let zpools = sled_configs_by_id .iter() - .flat_map(|(addr, config)| { - let sled_id = id_map.get(addr).expect("Missing sled"); + .flat_map(|(sled_id, config)| { config.disks.disks.iter().map(|config| { NexusTypes::ZpoolPutRequest { id: config.pool_id.into_untyped_uuid(), @@ -852,7 +833,6 @@ impl ServiceInner { let request = NexusTypes::RackInitializationRequest { blueprint, - services, physical_disks, zpools, datasets, @@ -1290,14 +1270,14 @@ impl DeployStepVersion { const V5_EVERYTHING: Generation = Self::V4_COCKROACHDB.next(); } -fn build_initial_blueprint_from_plan( +// Build a map of sled ID to `SledConfig` based on the two plan types we +// generate. This is a bit of a code smell (why doesn't the plan generate this +// on its own if we need it?); we should be able to get rid of it when +// we get to https://github.com/oxidecomputer/omicron/issues/5272. +fn build_sled_configs_by_id( sled_plan: &SledPlan, service_plan: &ServicePlan, -) -> anyhow::Result { - let internal_dns_version = - Generation::try_from(service_plan.dns_config.generation) - .context("invalid internal dns version")?; - +) -> anyhow::Result> { let mut sled_configs = BTreeMap::new(); for sled_request in sled_plan.sleds.values() { let sled_addr = get_sled_address(sled_request.body.subnet); @@ -1320,18 +1300,41 @@ fn build_initial_blueprint_from_plan( entry.insert(sled_config.clone()); } - Ok(build_initial_blueprint_from_sled_configs( - sled_configs, + if sled_configs.len() != service_plan.services.len() { + bail!( + "error mapping service plan to sled IDs; converted {} sled \ + addresses into {} sled configs", + service_plan.services.len(), + sled_configs.len(), + ); + } + + Ok(sled_configs) +} + +// Build an initial blueprint +fn build_initial_blueprint_from_plan( + sled_configs_by_id: &BTreeMap, + service_plan: &ServicePlan, +) -> anyhow::Result { + let internal_dns_version = + Generation::try_from(service_plan.dns_config.generation) + .context("invalid internal dns version")?; + + let blueprint = build_initial_blueprint_from_sled_configs( + &sled_configs_by_id, internal_dns_version, - )) + ); + + Ok(blueprint) } pub(crate) fn build_initial_blueprint_from_sled_configs( - sled_configs: BTreeMap, + sled_configs_by_id: &BTreeMap, internal_dns_version: Generation, ) -> Blueprint { let mut blueprint_zones = BTreeMap::new(); - for (sled_id, sled_config) in &sled_configs { + for (sled_id, sled_config) in sled_configs_by_id { let zones_config = BlueprintZonesConfig { // This is a bit of a hack. We only construct a blueprint after // completing RSS, so we need to know the final generation value @@ -1359,7 +1362,7 @@ pub(crate) fn build_initial_blueprint_from_sled_configs( } let mut blueprint_disks = BTreeMap::new(); - for (sled_id, sled_config) in &sled_configs { + for (sled_id, sled_config) in sled_configs_by_id { blueprint_disks.insert( SledUuid::from_untyped_uuid(*sled_id), BlueprintPhysicalDisksConfig { diff --git a/sled-agent/src/sim/server.rs b/sled-agent/src/sim/server.rs index 089760740a..6f931ea629 100644 --- a/sled-agent/src/sim/server.rs +++ b/sled-agent/src/sim/server.rs @@ -501,17 +501,14 @@ pub async fn run_standalone_server( }; let disks = server.sled_agent.omicron_physical_disks_list().await?; - let services = - zones.iter().map(|z| z.to_nexus_service_req(config.id)).collect(); let mut sled_configs = BTreeMap::new(); sled_configs.insert(config.id, SledConfig { disks, zones }); let rack_init_request = NexusTypes::RackInitializationRequest { blueprint: build_initial_blueprint_from_sled_configs( - sled_configs, + &sled_configs, internal_dns_version, ), - services, physical_disks, zpools, datasets, From 508ad412d7723d295675fec08b7f8307e9109e64 Mon Sep 17 00:00:00 2001 From: iliana etaoin Date: Tue, 16 Apr 2024 11:18:31 -0700 Subject: [PATCH 152/334] Set version to 8.0.0 (#5432) --- .github/buildomat/jobs/package.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/buildomat/jobs/package.sh b/.github/buildomat/jobs/package.sh index 566c345f76..d28f0b86db 100755 --- a/.github/buildomat/jobs/package.sh +++ b/.github/buildomat/jobs/package.sh @@ -37,7 +37,7 @@ rustc --version # trampoline global zone images. # COMMIT=$(git rev-parse HEAD) -VERSION="7.0.0-0.ci+git${COMMIT:0:11}" +VERSION="8.0.0-0.ci+git${COMMIT:0:11}" echo "$VERSION" >/work/version.txt ptime -m ./tools/install_builder_prerequisites.sh -yp From 40e8eb34f8edae3c9855d570e2b1d9a326493ee1 Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Tue, 16 Apr 2024 15:45:10 -0400 Subject: [PATCH 153/334] Grab artifacts from permission-slip (#5368) --- .github/buildomat/jobs/ci-tools.sh | 18 +++++ .github/buildomat/jobs/tuf-repo.sh | 109 +++++------------------------ tools/permslip_commit | 1 + tools/permslip_production | 1 + tools/permslip_staging | 4 ++ 5 files changed, 42 insertions(+), 91 deletions(-) create mode 100644 tools/permslip_commit create mode 100644 tools/permslip_production create mode 100644 tools/permslip_staging diff --git a/.github/buildomat/jobs/ci-tools.sh b/.github/buildomat/jobs/ci-tools.sh index ce17d4fb30..4c58731e24 100755 --- a/.github/buildomat/jobs/ci-tools.sh +++ b/.github/buildomat/jobs/ci-tools.sh @@ -9,6 +9,11 @@ #: "=/work/caboose-util.gz", #: "=/work/tufaceous.gz", #: "=/work/commtest", +#: "=/work/permslip.gz", +#: ] +#: access_repos = [ +#: "oxidecomputer/permission-slip", +#: "oxidecomputer/sshauth" #: ] set -o errexit @@ -57,3 +62,16 @@ banner tufaceous ptime -m cargo build --locked -p tufaceous --release ptime -m gzip < target/release/tufaceous > /work/tufaceous.gz + +########## permission-slip ########## + +banner permission-slip + +source "./tools/permslip_commit" +git init /work/permission-slip-build +pushd /work/permission-slip-build +git remote add origin https://github.com/oxidecomputer/permission-slip.git +ptime -m git fetch --depth 1 origin "$COMMIT" +git checkout FETCH_HEAD +ptime -m cargo build --locked -p permission-slip-client --release +ptime -m gzip < target/release/permslip > /work/permslip.gz diff --git a/.github/buildomat/jobs/tuf-repo.sh b/.github/buildomat/jobs/tuf-repo.sh index 31b9d157ed..89928a0030 100755 --- a/.github/buildomat/jobs/tuf-repo.sh +++ b/.github/buildomat/jobs/tuf-repo.sh @@ -8,9 +8,6 @@ #: "=/work/repo-*.zip", #: "=/work/repo-*.zip.sha256.txt", #: ] -#: access_repos = [ -#: "oxidecomputer/dvt-dock", -#: ] #: #: [dependencies.ci-tools] #: job = "helios / CI tools" @@ -41,12 +38,10 @@ set -o errexit set -o pipefail set -o xtrace -ALL_BOARDS=(gimlet-{c..f} psc-{b..c} sidecar-{b..c}) - TOP=$PWD VERSION=$(< /input/package/work/version.txt) -for bin in caboose-util tufaceous; do +for bin in caboose-util tufaceous permslip; do ptime -m gunzip < /input/ci-tools/work/$bin.gz > /work/$bin chmod a+x /work/$bin done @@ -110,94 +105,26 @@ path = "/input/host/work/helios/upload/os-$kind.tar.gz" EOF done -# Fetch SP images from a Hubris release. +download_region_manifests() { + url=$1 + name=$2 + mkdir $2 + pushd $2 + while read -r manifest_hash manifest_name; do + /work/permslip --url=$url --anonymous get-artifact $manifest_hash --out $manifest_name + # hash refers to the hash in permission slip + grep -F "hash =" $manifest_name | cut -d "=" -f 2 | tr -d "\" " | xargs -L 1 -I {} /work/permslip --url=$1 --anonymous get-artifact {} --out {}.zip + # turn the hash entry into the path we just downloaded in the manifest + sed "s|hash = \"\(.*\)\"|path = \"$PWD\/\1.zip\"|" $manifest_name >> /work/manifest.toml + done < $TOP/tools/permslip_$name + popd +} + mkdir /work/hubris pushd /work/hubris -source "$TOP/tools/hubris_version" -for tag in "${TAGS[@]}"; do - for board in "${ALL_BOARDS[@]}"; do - if [[ "${tag%-*}" = "${board%-*}" ]]; then - file=build-${board}-image-default-${tag#*-}.zip - curl -fLOsS "https://github.com/oxidecomputer/hubris/releases/download/$tag/$file" - grep -F "$file" "$TOP/tools/hubris_checksums" | shasum -a 256 -c - - mv "$file" "$board.zip" - fi - done -done +download_region_manifests https://permslip-staging.corp.oxide.computer staging +download_region_manifests https://signer-us-west.corp.oxide.computer production popd -# Fetch signed ROT images from dvt-dock. -source "$TOP/tools/dvt_dock_version" -git init /work/dvt-dock -( - cd /work/dvt-dock - git remote add origin https://github.com/oxidecomputer/dvt-dock.git - git fetch --depth 1 origin "$COMMIT" - git checkout FETCH_HEAD -) - -caboose_util_rot() { - # usage: caboose_util_rot ACTION IMAGE_A IMAGE_B - output_a=$(/work/caboose-util "$1" "$2") - output_b=$(/work/caboose-util "$1" "$3") - if [[ "$output_a" != "$output_b" ]]; then - >&2 echo "\`caboose-util $1\` mismatch:" - >&2 echo " $2: $output_a" - >&2 echo " $3: $output_b" - exit 1 - fi - echo "$output_a" -} - -# Add the SP images. -for board_rev in "${ALL_BOARDS[@]}"; do - board=${board_rev%-?} - tufaceous_board=${board//sidecar/switch} - sp_image="/work/hubris/${board_rev}.zip" - sp_caboose_version=$(/work/caboose-util read-version "$sp_image") - sp_caboose_board=$(/work/caboose-util read-board "$sp_image") - - cat >>/work/manifest.toml <>/work/manifest.toml < /work/repo-rot-all.zip.sha256.txt diff --git a/tools/permslip_commit b/tools/permslip_commit new file mode 100644 index 0000000000..58140df7da --- /dev/null +++ b/tools/permslip_commit @@ -0,0 +1 @@ +COMMIT=5d44e0065f90051a28881c75e3574142ada9b695 diff --git a/tools/permslip_production b/tools/permslip_production new file mode 100644 index 0000000000..331209b1f0 --- /dev/null +++ b/tools/permslip_production @@ -0,0 +1 @@ +394b0bb7c759eead2e41cec98c2376e5e558d6b401418b56ca0db50d55d434ad manifest-oxide-rot-1-v1.0.9.toml diff --git a/tools/permslip_staging b/tools/permslip_staging new file mode 100644 index 0000000000..7b4e5f161a --- /dev/null +++ b/tools/permslip_staging @@ -0,0 +1,4 @@ +b1b0d63a179652fcc80fabbb49307c0fe28cf52744f58f7b8a768f14d6721a3f manifest-gimlet-v1.0.15.toml +686f5fff41ed3b33ba0be38d2becdeb67847705fd590f05f6d8f7c600db87fb7 manifest-oxide-rot-1-v1.0.9.toml +7d26b9f719a7f2c22e091d7d80de66933c11bdb9ae174ae59552b376400d63db manifest-psc-v1.0.14.toml +cd8c1bb64990573b9d29dcc2312d9c8cb4b08bc59873196ac50ce2b506037594 manifest-sidecar-v1.0.14.toml From dbcdb6a1deae24d6180dbf5f3b3723f9af8be2d1 Mon Sep 17 00:00:00 2001 From: Rain Date: Tue, 16 Apr 2024 15:10:20 -0700 Subject: [PATCH 154/334] [nexus-types] make PlanningInput's ExternalIp only support a single IP (#5540) This simplifies bookkeeping within the blueprint builder considerably -- services always get a single IP, and I don't think that's going to change any time soon. --- dev-tools/reconfigurator-cli/src/main.rs | 15 ++--- nexus/db-model/src/external_ip.rs | 9 +-- nexus/db-model/src/instance.rs | 2 + nexus/db-model/src/network_interface.rs | 4 +- nexus/db-model/src/typed_uuid.rs | 3 +- nexus/reconfigurator/planning/src/example.rs | 13 ++-- nexus/reconfigurator/preparation/src/lib.rs | 10 ++-- nexus/types/src/deployment.rs | 4 +- nexus/types/src/deployment/planning_input.rs | 62 ++++++++++++++------ uuid-kinds/src/lib.rs | 1 + 10 files changed, 76 insertions(+), 47 deletions(-) diff --git a/dev-tools/reconfigurator-cli/src/main.rs b/dev-tools/reconfigurator-cli/src/main.rs index 28e757af93..94ff6d77bd 100644 --- a/dev-tools/reconfigurator-cli/src/main.rs +++ b/dev-tools/reconfigurator-cli/src/main.rs @@ -21,9 +21,9 @@ use nexus_reconfigurator_planning::system::{ SledBuilder, SledHwInventory, SystemDescription, }; use nexus_types::deployment::BlueprintZoneFilter; -use nexus_types::deployment::ExternalIp; +use nexus_types::deployment::OmicronZoneExternalIp; +use nexus_types::deployment::OmicronZoneNic; use nexus_types::deployment::PlanningInput; -use nexus_types::deployment::ServiceNetworkInterface; use nexus_types::deployment::SledFilter; use nexus_types::deployment::{Blueprint, UnstableReconfiguratorState}; use nexus_types::internal_api::params::DnsConfigParams; @@ -33,6 +33,7 @@ use nexus_types::inventory::SledRole; use omicron_common::api::external::Generation; use omicron_common::api::external::Name; use omicron_uuid_kinds::CollectionUuid; +use omicron_uuid_kinds::ExternalIpUuid; use omicron_uuid_kinds::GenericUuid; use omicron_uuid_kinds::OmicronZoneUuid; use omicron_uuid_kinds::SledUuid; @@ -67,7 +68,7 @@ struct ReconfiguratorSim { /// CRDB - they're not part of the zone config sent from Reconfigurator to /// sled-agent. This mimics the minimal bit of the CRDB `external_ip` table /// we need. - external_ips: RefCell>, + external_ips: RefCell>, /// internal DNS configurations internal_dns: BTreeMap, @@ -154,20 +155,20 @@ impl ReconfiguratorSim { { let zone_id = OmicronZoneUuid::from_untyped_uuid(zone.id); if let Ok(Some(ip)) = zone.zone_type.external_ip() { - let external_ip = ExternalIp { + let external_ip = OmicronZoneExternalIp { id: *self .external_ips .borrow_mut() .entry(ip) - .or_insert_with(Uuid::new_v4), - ip: ip.into(), + .or_insert_with(ExternalIpUuid::new_v4), + ip, }; builder .add_omicron_zone_external_ip(zone_id, external_ip) .context("adding omicron zone external IP")?; } if let Some(nic) = zone.zone_type.service_vnic() { - let nic = ServiceNetworkInterface { + let nic = OmicronZoneNic { id: nic.id, mac: nic.mac, ip: nic.ip.into(), diff --git a/nexus/db-model/src/external_ip.rs b/nexus/db-model/src/external_ip.rs index f290fdcd0f..93af08fdee 100644 --- a/nexus/db-model/src/external_ip.rs +++ b/nexus/db-model/src/external_ip.rs @@ -22,6 +22,7 @@ use nexus_types::external_api::views; use omicron_common::address::NUM_SOURCE_NAT_PORTS; use omicron_common::api::external::Error; use omicron_common::api::external::IdentityMetadata; +use omicron_uuid_kinds::GenericUuid; use schemars::JsonSchema; use serde::Deserialize; use serde::Serialize; @@ -130,12 +131,6 @@ pub struct ExternalIp { pub is_probe: bool, } -impl From for nexus_types::deployment::ExternalIp { - fn from(ext_ip: ExternalIp) -> Self { - Self { id: ext_ip.id, ip: ext_ip.ip } - } -} - /// A view type constructed from `ExternalIp` used to represent Floating IP /// objects in user-facing APIs. /// @@ -537,7 +532,7 @@ impl TryFrom for FloatingIp { ))?; let identity = FloatingIpIdentity { - id: ip.id, + id: ip.id.into_untyped_uuid(), name, description, time_created: ip.time_created, diff --git a/nexus/db-model/src/instance.rs b/nexus/db-model/src/instance.rs index f7731ff903..286c68ac7c 100644 --- a/nexus/db-model/src/instance.rs +++ b/nexus/db-model/src/instance.rs @@ -107,6 +107,8 @@ impl DatastoreAttachTargetConfig for Instance { } impl DatastoreAttachTargetConfig for Instance { + // TODO-cleanup ideally this would be an ExternalIpUuid, haven't quite + // figured out how to make that work type Id = Uuid; type CollectionIdColumn = instance::dsl::id; diff --git a/nexus/db-model/src/network_interface.rs b/nexus/db-model/src/network_interface.rs index a632772043..108232275d 100644 --- a/nexus/db-model/src/network_interface.rs +++ b/nexus/db-model/src/network_interface.rs @@ -146,9 +146,7 @@ pub struct ServiceNetworkInterface { pub primary: bool, } -impl From - for nexus_types::deployment::ServiceNetworkInterface -{ +impl From for nexus_types::deployment::OmicronZoneNic { fn from(nic: ServiceNetworkInterface) -> Self { Self { id: nic.id(), diff --git a/nexus/db-model/src/typed_uuid.rs b/nexus/db-model/src/typed_uuid.rs index 7785b8c7dc..1e54e242f3 100644 --- a/nexus/db-model/src/typed_uuid.rs +++ b/nexus/db-model/src/typed_uuid.rs @@ -10,6 +10,7 @@ use diesel::deserialize::{self, FromSql}; use diesel::serialize::{self, ToSql}; use diesel::sql_types; use omicron_uuid_kinds::{GenericUuid, TypedUuid, TypedUuidKind}; +use schemars::JsonSchema; use serde::{Deserialize, Serialize}; use std::fmt; use std::str::FromStr; @@ -31,7 +32,7 @@ pub fn to_db_typed_uuid(id: TypedUuid) -> DbTypedUuid { /// `db-model` crate (this type is not exported at the top level). External /// users must use omicron-common's `TypedUuid`. #[derive_where(Clone, Copy, Eq, Ord, PartialEq, PartialOrd, Hash)] -#[derive(AsExpression, FromSqlRow, Serialize, Deserialize)] +#[derive(AsExpression, FromSqlRow, Serialize, Deserialize, JsonSchema)] #[diesel(sql_type = sql_types::Uuid)] #[serde(transparent, bound = "")] pub struct DbTypedUuid(pub(crate) TypedUuid); diff --git a/nexus/reconfigurator/planning/src/example.rs b/nexus/reconfigurator/planning/src/example.rs index 908afea535..e740abfcad 100644 --- a/nexus/reconfigurator/planning/src/example.rs +++ b/nexus/reconfigurator/planning/src/example.rs @@ -9,18 +9,18 @@ use crate::system::SledBuilder; use crate::system::SystemDescription; use nexus_types::deployment::Blueprint; use nexus_types::deployment::BlueprintZoneFilter; -use nexus_types::deployment::ExternalIp; +use nexus_types::deployment::OmicronZoneExternalIp; +use nexus_types::deployment::OmicronZoneNic; use nexus_types::deployment::PlanningInput; -use nexus_types::deployment::ServiceNetworkInterface; use nexus_types::deployment::SledFilter; use nexus_types::inventory::Collection; use omicron_common::api::external::Generation; +use omicron_uuid_kinds::ExternalIpUuid; use omicron_uuid_kinds::GenericUuid; use omicron_uuid_kinds::OmicronZoneUuid; use omicron_uuid_kinds::SledKind; use sled_agent_client::types::OmicronZonesConfig; use typed_rng::TypedUuidRng; -use uuid::Uuid; pub struct ExampleSystem { pub system: SystemDescription, @@ -131,7 +131,10 @@ impl ExampleSystem { input_builder .add_omicron_zone_external_ip( service_id, - ExternalIp { id: Uuid::new_v4(), ip: ip.into() }, + OmicronZoneExternalIp { + id: ExternalIpUuid::new_v4(), + ip, + }, ) .expect("failed to add Omicron zone external IP"); } @@ -139,7 +142,7 @@ impl ExampleSystem { input_builder .add_omicron_zone_nic( service_id, - ServiceNetworkInterface { + OmicronZoneNic { id: nic.id, mac: nic.mac, ip: nic.ip.into(), diff --git a/nexus/reconfigurator/preparation/src/lib.rs b/nexus/reconfigurator/preparation/src/lib.rs index 52f3d3fecb..75482128a0 100644 --- a/nexus/reconfigurator/preparation/src/lib.rs +++ b/nexus/reconfigurator/preparation/src/lib.rs @@ -33,6 +33,7 @@ use omicron_common::address::SLED_PREFIX; use omicron_common::api::external::Error; use omicron_common::api::external::LookupType; use omicron_common::disk::DiskIdentity; +use omicron_uuid_kinds::ExternalIpUuid; use omicron_uuid_kinds::GenericUuid; use omicron_uuid_kinds::OmicronZoneUuid; use omicron_uuid_kinds::PhysicalDiskUuid; @@ -129,12 +130,11 @@ impl PlanningInputFromDb<'_> { }; let zone_id = OmicronZoneUuid::from_untyped_uuid(zone_id); builder - .add_omicron_zone_external_ip( + .add_omicron_zone_external_ip_network( zone_id, - nexus_types::deployment::ExternalIp { - id: external_ip_row.id, - ip: external_ip_row.ip, - }, + // TODO-cleanup use `TypedUuid` everywhere + ExternalIpUuid::from_untyped_uuid(external_ip_row.id), + external_ip_row.ip, ) .map_err(|e| { Error::internal_error(&format!( diff --git a/nexus/types/src/deployment.rs b/nexus/types/src/deployment.rs index ead7e025e3..52e285e81c 100644 --- a/nexus/types/src/deployment.rs +++ b/nexus/types/src/deployment.rs @@ -39,12 +39,12 @@ use uuid::Uuid; mod planning_input; pub use planning_input::DiskFilter; -pub use planning_input::ExternalIp; +pub use planning_input::OmicronZoneExternalIp; +pub use planning_input::OmicronZoneNic; pub use planning_input::PlanningInput; pub use planning_input::PlanningInputBuildError; pub use planning_input::PlanningInputBuilder; pub use planning_input::Policy; -pub use planning_input::ServiceNetworkInterface; pub use planning_input::SledDetails; pub use planning_input::SledDisk; pub use planning_input::SledFilter; diff --git a/nexus/types/src/deployment/planning_input.rs b/nexus/types/src/deployment/planning_input.rs index 5b4bf2538e..9c0714ffab 100644 --- a/nexus/types/src/deployment/planning_input.rs +++ b/nexus/types/src/deployment/planning_input.rs @@ -11,12 +11,14 @@ use crate::external_api::views::SledPolicy; use crate::external_api::views::SledProvisionPolicy; use crate::external_api::views::SledState; use ipnetwork::IpNetwork; +use ipnetwork::NetworkSize; use omicron_common::address::IpRange; use omicron_common::address::Ipv6Subnet; use omicron_common::address::SLED_PREFIX; use omicron_common::api::external::Generation; use omicron_common::api::external::MacAddr; use omicron_common::disk::DiskIdentity; +use omicron_uuid_kinds::ExternalIpUuid; use omicron_uuid_kinds::OmicronZoneUuid; use omicron_uuid_kinds::PhysicalDiskUuid; use omicron_uuid_kinds::SledUuid; @@ -25,6 +27,7 @@ use serde::Deserialize; use serde::Serialize; use std::collections::btree_map::Entry; use std::collections::BTreeMap; +use std::net::IpAddr; use strum::IntoEnumIterator; use uuid::Uuid; @@ -94,22 +97,23 @@ impl SledResources { } } -/// External IP allocated to a service +/// External IP allocated to an Omicron-managed zone. /// /// This is a slimmer `nexus_db_model::ExternalIp` that only stores the fields -/// necessary for blueprint planning. +/// necessary for blueprint planning, and requires that the zone have a single +/// IP. #[derive(Debug, Clone, Serialize, Deserialize)] -pub struct ExternalIp { - pub id: Uuid, - pub ip: IpNetwork, +pub struct OmicronZoneExternalIp { + pub id: ExternalIpUuid, + pub ip: IpAddr, } -/// Network interface allocated to a service +/// Network interface allocated to an Omicron-managed zone. /// /// This is a slimmer `nexus_db_model::ServiceNetworkInterface` that only stores /// the fields necessary for blueprint planning. #[derive(Debug, Clone, Serialize, Deserialize)] -pub struct ServiceNetworkInterface { +pub struct OmicronZoneNic { pub id: Uuid, pub mac: MacAddr, pub ip: IpNetwork, @@ -313,10 +317,10 @@ pub struct PlanningInput { sleds: BTreeMap, /// external IPs allocated to Omicron zones - omicron_zone_external_ips: BTreeMap, + omicron_zone_external_ips: BTreeMap, /// vNICs allocated to Omicron zones - omicron_zone_nics: BTreeMap, + omicron_zone_nics: BTreeMap, } #[derive(Debug, Clone, Serialize, Deserialize)] @@ -400,13 +404,15 @@ impl PlanningInput { pub enum PlanningInputBuildError { #[error("duplicate sled ID: {0}")] DuplicateSledId(SledUuid), + #[error("Omicron zone {zone_id} has a range of IPs ({ip:?}), only a single IP is supported")] + NotSingleIp { zone_id: OmicronZoneUuid, ip: IpNetwork }, #[error("Omicron zone {zone_id} already has an external IP ({ip:?})")] - DuplicateOmicronZoneExternalIp { zone_id: OmicronZoneUuid, ip: ExternalIp }, - #[error("Omicron zone {zone_id} already has a NIC ({nic:?})")] - DuplicateOmicronZoneNic { + DuplicateOmicronZoneExternalIp { zone_id: OmicronZoneUuid, - nic: ServiceNetworkInterface, + ip: OmicronZoneExternalIp, }, + #[error("Omicron zone {zone_id} already has a NIC ({nic:?})")] + DuplicateOmicronZoneNic { zone_id: OmicronZoneUuid, nic: OmicronZoneNic }, } /// Constructor for [`PlanningInput`]. @@ -416,8 +422,8 @@ pub struct PlanningInputBuilder { internal_dns_version: Generation, external_dns_version: Generation, sleds: BTreeMap, - omicron_zone_external_ips: BTreeMap, - omicron_zone_nics: BTreeMap, + omicron_zone_external_ips: BTreeMap, + omicron_zone_nics: BTreeMap, } impl PlanningInputBuilder { @@ -466,10 +472,32 @@ impl PlanningInputBuilder { } } + /// Like `add_omicron_zone_external_ip`, but can accept an [`IpNetwork`], + /// validating that the IP is a single address. + pub fn add_omicron_zone_external_ip_network( + &mut self, + zone_id: OmicronZoneUuid, + ip_id: ExternalIpUuid, + ip: IpNetwork, + ) -> Result<(), PlanningInputBuildError> { + let size = match ip.size() { + NetworkSize::V4(n) => u128::from(n), + NetworkSize::V6(n) => n, + }; + if size != 1 { + return Err(PlanningInputBuildError::NotSingleIp { zone_id, ip }); + } + + self.add_omicron_zone_external_ip( + zone_id, + OmicronZoneExternalIp { id: ip_id, ip: ip.ip() }, + ) + } + pub fn add_omicron_zone_external_ip( &mut self, zone_id: OmicronZoneUuid, - ip: ExternalIp, + ip: OmicronZoneExternalIp, ) -> Result<(), PlanningInputBuildError> { match self.omicron_zone_external_ips.entry(zone_id) { Entry::Vacant(slot) => { @@ -488,7 +516,7 @@ impl PlanningInputBuilder { pub fn add_omicron_zone_nic( &mut self, zone_id: OmicronZoneUuid, - nic: ServiceNetworkInterface, + nic: OmicronZoneNic, ) -> Result<(), PlanningInputBuildError> { match self.omicron_zone_nics.entry(zone_id) { Entry::Vacant(slot) => { diff --git a/uuid-kinds/src/lib.rs b/uuid-kinds/src/lib.rs index 9d33d433d2..41d1bfc1f6 100644 --- a/uuid-kinds/src/lib.rs +++ b/uuid-kinds/src/lib.rs @@ -52,6 +52,7 @@ impl_typed_uuid_kind! { Collection => "collection", Downstairs => "downstairs", DownstairsRegion => "downstairs_region", + ExternalIp => "external_ip", LoopbackAddress => "loopback_address", OmicronZone => "service", PhysicalDisk => "physical_disk", From fe7b87efad4d715feb11dc52521e193b13f05a06 Mon Sep 17 00:00:00 2001 From: Rain Date: Tue, 16 Apr 2024 15:45:56 -0700 Subject: [PATCH 155/334] [clippy] warn on lossless casts (#5544) Originally brought up in #5540. It might seem strange to only warn on lossless casts for now, but: - if the somewhat-illegible "from" type changes, they can suddenly become lossy, and - these are the casts that are easiest to convert over In the future, it would be nice to also warn on lossy casts. --- clients/dpd-client/src/lib.rs | 14 +++++++------- clients/nexus-client/src/lib.rs | 4 +++- dev-tools/omdb/src/bin/omdb/mgs/dashboard.rs | 4 ++-- dev-tools/xtask/src/clippy.rs | 8 +++++++- nexus/db-model/src/bytecount.rs | 2 +- nexus/db-model/src/saga_types.rs | 2 +- nexus/db-queries/src/db/datastore/external_ip.rs | 2 +- .../db-queries/src/db/datastore/ipv4_nat_entry.rs | 4 ++-- nexus/db-queries/src/db/datastore/region.rs | 14 +++++++------- nexus/db-queries/src/db/datastore/vmm.rs | 2 +- .../db-queries/src/db/queries/region_allocation.rs | 2 +- nexus/src/app/disk.rs | 4 ++-- nexus/src/app/image.rs | 2 +- nexus/src/app/instance.rs | 4 ++-- nexus/src/app/sagas/instance_migrate.rs | 2 +- nexus/src/app/sagas/instance_start.rs | 2 +- nexus/tests/integration_tests/disks.rs | 2 +- nexus/tests/integration_tests/snapshots.rs | 2 +- nexus/types/src/external_api/params.rs | 2 +- oximeter/db/src/model.rs | 2 +- oximeter/db/src/oxql/ast/grammar.rs | 2 +- sled-agent/src/sim/storage.rs | 4 +++- sled-agent/src/vmm_reservoir.rs | 3 ++- sled-hardware/types/src/underlay.rs | 6 +++--- sp-sim/src/update.rs | 2 +- wicket/src/cli/rack_setup/config_toml.rs | 8 ++++---- wicket/src/ui/panes/update.rs | 5 +++-- wicket/src/ui/widgets/popup.rs | 4 ++-- wicket/src/ui/widgets/rack.rs | 8 +++++--- wicket/src/wicketd.rs | 6 +++--- wicketd/src/update_tracker.rs | 8 ++++---- 31 files changed, 75 insertions(+), 61 deletions(-) diff --git a/clients/dpd-client/src/lib.rs b/clients/dpd-client/src/lib.rs index a898c31781..556a8493d7 100644 --- a/clients/dpd-client/src/lib.rs +++ b/clients/dpd-client/src/lib.rs @@ -479,7 +479,7 @@ impl From for Ipv4Cidr { impl From for u64 { fn from(x: Ipv4Cidr) -> Self { let prefix: u32 = x.prefix.into(); - ((prefix as u64) << 32) | (x.prefix_len as u64) + (u64::from(prefix) << 32) | u64::from(x.prefix_len) } } @@ -762,12 +762,12 @@ impl fmt::Debug for MacAddr { impl From for u64 { fn from(mac: MacAddr) -> u64 { - ((mac.a[0] as u64) << 40) - | ((mac.a[1] as u64) << 32) - | ((mac.a[2] as u64) << 24) - | ((mac.a[3] as u64) << 16) - | ((mac.a[4] as u64) << 8) - | (mac.a[5] as u64) + (u64::from(mac.a[0]) << 40) + | (u64::from(mac.a[1]) << 32) + | (u64::from(mac.a[2]) << 24) + | (u64::from(mac.a[3]) << 16) + | (u64::from(mac.a[4]) << 8) + | u64::from(mac.a[5]) } } diff --git a/clients/nexus-client/src/lib.rs b/clients/nexus-client/src/lib.rs index e083f5372e..cd04b8233f 100644 --- a/clients/nexus-client/src/lib.rs +++ b/clients/nexus-client/src/lib.rs @@ -256,7 +256,9 @@ impl From for types::Duration { impl From for std::time::Duration { fn from(s: types::Duration) -> Self { - std::time::Duration::from_nanos(s.secs * 1000000000 + s.nanos as u64) + std::time::Duration::from_nanos( + s.secs * 1000000000 + u64::from(s.nanos), + ) } } diff --git a/dev-tools/omdb/src/bin/omdb/mgs/dashboard.rs b/dev-tools/omdb/src/bin/omdb/mgs/dashboard.rs index 153618b7c0..08ecaf3101 100644 --- a/dev-tools/omdb/src/bin/omdb/mgs/dashboard.rs +++ b/dev-tools/omdb/src/bin/omdb/mgs/dashboard.rs @@ -357,7 +357,7 @@ impl Graph { for (_ndx, s) in &mut self.series.iter_mut().enumerate() { if let Some(datum) = s.raw[offs] { - let point = (i as f64, datum as f64); + let point = (i as f64, f64::from(datum)); if self.interpolate != 0 { if let Some(last) = s.data.last() { @@ -374,7 +374,7 @@ impl Graph { } } - s.data.push((i as f64, datum as f64)); + s.data.push((i as f64, f64::from(datum))); } } } diff --git a/dev-tools/xtask/src/clippy.rs b/dev-tools/xtask/src/clippy.rs index babb86cdaf..a89aaa9cc6 100644 --- a/dev-tools/xtask/src/clippy.rs +++ b/dev-tools/xtask/src/clippy.rs @@ -67,7 +67,13 @@ pub fn run_cmd(args: ClippyArgs) -> Result<()> { .arg("--warn") .arg("clippy::len_zero") .arg("--warn") - .arg("clippy::redundant_field_names"); + .arg("clippy::redundant_field_names") + // Also warn on casts, preferring explicit conversions instead. + // + // We'd like to warn on lossy casts in the future, but lossless casts + // are the easiest ones to convert over. + .arg("--warn") + .arg("clippy::cast_lossless"); eprintln!( "running: {:?} {}", diff --git a/nexus/db-model/src/bytecount.rs b/nexus/db-model/src/bytecount.rs index 92a01db43f..53e00eb78d 100644 --- a/nexus/db-model/src/bytecount.rs +++ b/nexus/db-model/src/bytecount.rs @@ -93,7 +93,7 @@ impl TryFrom for ByteCount { let mut multiplier = 1; for digit in digits.iter().rev() { - result += *digit as i64 * multiplier; + result += i64::from(*digit) * multiplier; multiplier *= 10000; } diff --git a/nexus/db-model/src/saga_types.rs b/nexus/db-model/src/saga_types.rs index bb21e803bc..3ad3e2603c 100644 --- a/nexus/db-model/src/saga_types.rs +++ b/nexus/db-model/src/saga_types.rs @@ -123,7 +123,7 @@ impl ToSql for SagaNodeId { out: &mut serialize::Output<'a, '_, Pg>, ) -> serialize::Result { // Diesel newtype -> steno type -> u32 -> i64 -> SQL - let id = u32::from(self.0) as i64; + let id = i64::from(u32::from(self.0)); >::to_sql(&id, &mut out.reborrow()) } } diff --git a/nexus/db-queries/src/db/datastore/external_ip.rs b/nexus/db-queries/src/db/datastore/external_ip.rs index cc5ddc50d5..28fc5de884 100644 --- a/nexus/db-queries/src/db/datastore/external_ip.rs +++ b/nexus/db-queries/src/db/datastore/external_ip.rs @@ -590,7 +590,7 @@ impl DataStore { attach will be safe to retry once start/stop completes" )), state if SAFE_TO_ATTACH_INSTANCE_STATES.contains(&state) => { - if attached_count >= MAX_EXTERNAL_IPS_PLUS_SNAT as i64 { + if attached_count >= i64::from(MAX_EXTERNAL_IPS_PLUS_SNAT) { Error::invalid_request(&format!( "an instance may not have more than \ {MAX_EXTERNAL_IPS_PER_INSTANCE} external IP addresses", diff --git a/nexus/db-queries/src/db/datastore/ipv4_nat_entry.rs b/nexus/db-queries/src/db/datastore/ipv4_nat_entry.rs index 670ca08960..fa3939b8ac 100644 --- a/nexus/db-queries/src/db/datastore/ipv4_nat_entry.rs +++ b/nexus/db-queries/src/db/datastore/ipv4_nat_entry.rs @@ -303,7 +303,7 @@ impl DataStore { .gt(version) .or(dsl::version_removed.gt(version)), ) - .limit(limit as i64) + .limit(i64::from(limit)) .select(Ipv4NatEntry::as_select()) .load_async(&*self.pool_connection_authorized(opctx).await?) .await @@ -322,7 +322,7 @@ impl DataStore { let nat_changes = dsl::ipv4_nat_changes .filter(dsl::version.gt(version)) - .limit(limit as i64) + .limit(i64::from(limit)) .order_by(dsl::version) .select(Ipv4NatChange::as_select()) .load_async(&*self.pool_connection_authorized(opctx).await?) diff --git a/nexus/db-queries/src/db/datastore/region.rs b/nexus/db-queries/src/db/datastore/region.rs index 113fc51ee5..6e152cb9f2 100644 --- a/nexus/db-queries/src/db/datastore/region.rs +++ b/nexus/db-queries/src/db/datastore/region.rs @@ -112,7 +112,7 @@ impl DataStore { size: external::ByteCount, ) -> (u64, u64) { let blocks_per_extent = - Self::EXTENT_SIZE / block_size.to_bytes() as u64; + Self::EXTENT_SIZE / u64::from(block_size.to_bytes()); let size = size.to_bytes(); @@ -175,7 +175,7 @@ impl DataStore { let query = crate::db::queries::region_allocation::allocation_query( volume_id, - block_size.to_bytes() as u64, + u64::from(block_size.to_bytes()), blocks_per_extent, extent_count, allocation_strategy, @@ -378,7 +378,7 @@ mod test { // Note that i64::MAX bytes is an invalid disk size as it's not // divisible by 4096. Create the maximum sized disk here. let max_disk_size = i64::MAX - - (i64::MAX % (BlockSize::AdvancedFormat.to_bytes() as i64)); + - (i64::MAX % i64::from(BlockSize::AdvancedFormat.to_bytes())); let (blocks_per_extent, extent_count) = DataStore::get_crucible_allocation( &BlockSize::AdvancedFormat, @@ -387,16 +387,16 @@ mod test { // We should still be rounding up to the nearest extent size. assert_eq!( - extent_count as u128 * DataStore::EXTENT_SIZE as u128, + u128::from(extent_count) * u128::from(DataStore::EXTENT_SIZE), i64::MAX as u128 + 1, ); // Assert that the regions allocated will fit this disk assert!( max_disk_size as u128 - <= extent_count as u128 - * blocks_per_extent as u128 - * DataStore::EXTENT_SIZE as u128 + <= u128::from(extent_count) + * u128::from(blocks_per_extent) + * u128::from(DataStore::EXTENT_SIZE) ); } } diff --git a/nexus/db-queries/src/db/datastore/vmm.rs b/nexus/db-queries/src/db/datastore/vmm.rs index b9bfd7697e..a837d1289b 100644 --- a/nexus/db-queries/src/db/datastore/vmm.rs +++ b/nexus/db-queries/src/db/datastore/vmm.rs @@ -155,7 +155,7 @@ impl DataStore { .filter(dsl::id.eq(*vmm_id)) .set(( dsl::propolis_ip.eq(new_ip), - dsl::propolis_port.eq(new_port as i32), + dsl::propolis_port.eq(i32::from(new_port)), )) .returning(Vmm::as_returning()) .get_result_async(&*self.pool_connection_authorized(opctx).await?) diff --git a/nexus/db-queries/src/db/queries/region_allocation.rs b/nexus/db-queries/src/db/queries/region_allocation.rs index a6f9dbb2ca..cc201dac30 100644 --- a/nexus/db-queries/src/db/queries/region_allocation.rs +++ b/nexus/db-queries/src/db/queries/region_allocation.rs @@ -90,7 +90,7 @@ pub fn allocation_query( .unwrap() .as_nanos() }, - |seed| seed as u128, + |seed| u128::from(seed), ), distinct_sleds, ) diff --git a/nexus/src/app/disk.rs b/nexus/src/app/disk.rs index 5dd49a2efb..2286d2f183 100644 --- a/nexus/src/app/disk.rs +++ b/nexus/src/app/disk.rs @@ -151,7 +151,7 @@ impl super::Nexus { // Reject disks where the size isn't at least // MIN_DISK_SIZE_BYTES - if params.size.to_bytes() < MIN_DISK_SIZE_BYTES as u64 { + if params.size.to_bytes() < u64::from(MIN_DISK_SIZE_BYTES) { return Err(Error::invalid_value( "size", format!( @@ -163,7 +163,7 @@ impl super::Nexus { // Reject disks where the MIN_DISK_SIZE_BYTES doesn't evenly // divide the size - if (params.size.to_bytes() % MIN_DISK_SIZE_BYTES as u64) != 0 { + if (params.size.to_bytes() % u64::from(MIN_DISK_SIZE_BYTES)) != 0 { return Err(Error::invalid_value( "size", format!( diff --git a/nexus/src/app/image.rs b/nexus/src/app/image.rs index 96a3e6b06f..03c9c9d6a4 100644 --- a/nexus/src/app/image.rs +++ b/nexus/src/app/image.rs @@ -150,7 +150,7 @@ impl super::Nexus { // allow users to boot that. This should go away when that blob // does. let db_block_size = db::model::BlockSize::Traditional; - let block_size: u64 = db_block_size.to_bytes() as u64; + let block_size: u64 = u64::from(db_block_size.to_bytes()); let image_id = Uuid::new_v4(); diff --git a/nexus/src/app/instance.rs b/nexus/src/app/instance.rs index a82a53331e..4008d33736 100644 --- a/nexus/src/app/instance.rs +++ b/nexus/src/app/instance.rs @@ -294,7 +294,7 @@ impl super::Nexus { // Reject instances where the memory is not at least // MIN_MEMORY_BYTES_PER_INSTANCE - if params.memory.to_bytes() < MIN_MEMORY_BYTES_PER_INSTANCE as u64 { + if params.memory.to_bytes() < u64::from(MIN_MEMORY_BYTES_PER_INSTANCE) { return Err(Error::invalid_value( "size", format!( @@ -306,7 +306,7 @@ impl super::Nexus { // Reject instances where the memory is not divisible by // MIN_MEMORY_BYTES_PER_INSTANCE - if (params.memory.to_bytes() % MIN_MEMORY_BYTES_PER_INSTANCE as u64) + if (params.memory.to_bytes() % u64::from(MIN_MEMORY_BYTES_PER_INSTANCE)) != 0 { return Err(Error::invalid_value( diff --git a/nexus/src/app/sagas/instance_migrate.rs b/nexus/src/app/sagas/instance_migrate.rs index e4bdd989cc..a727debbea 100644 --- a/nexus/src/app/sagas/instance_migrate.rs +++ b/nexus/src/app/sagas/instance_migrate.rs @@ -153,7 +153,7 @@ async fn sim_reserve_sled_resources( let resource = super::instance_common::reserve_vmm_resources( osagactx.nexus(), propolis_id, - params.instance.ncpus.0 .0 as u32, + u32::from(params.instance.ncpus.0 .0), params.instance.memory, constraints, ) diff --git a/nexus/src/app/sagas/instance_start.rs b/nexus/src/app/sagas/instance_start.rs index 98fcec13a7..55c00d8707 100644 --- a/nexus/src/app/sagas/instance_start.rs +++ b/nexus/src/app/sagas/instance_start.rs @@ -132,7 +132,7 @@ async fn sis_alloc_server( let resource = super::instance_common::reserve_vmm_resources( osagactx.nexus(), propolis_id, - hardware_threads.0 as u32, + u32::from(hardware_threads.0), reservoir_ram, db::model::SledReservationConstraints::none(), ) diff --git a/nexus/tests/integration_tests/disks.rs b/nexus/tests/integration_tests/disks.rs index 6acd542061..4a1dbc8379 100644 --- a/nexus/tests/integration_tests/disks.rs +++ b/nexus/tests/integration_tests/disks.rs @@ -801,7 +801,7 @@ async fn test_disk_reject_total_size_not_divisible_by_block_size( // divisible by block size. assert!( disk_size.to_bytes() - < DiskTest::DEFAULT_ZPOOL_SIZE_GIB as u64 * 1024 * 1024 * 1024 + < u64::from(DiskTest::DEFAULT_ZPOOL_SIZE_GIB) * 1024 * 1024 * 1024 ); let disks_url = get_disks_url(); diff --git a/nexus/tests/integration_tests/snapshots.rs b/nexus/tests/integration_tests/snapshots.rs index 251b729f98..058c59a501 100644 --- a/nexus/tests/integration_tests/snapshots.rs +++ b/nexus/tests/integration_tests/snapshots.rs @@ -857,7 +857,7 @@ async fn test_cannot_snapshot_if_no_space(cptestctx: &ControlPlaneTestContext) { let disks_url = get_disks_url(); // Create a disk at just over half the capacity of what DiskTest allocates - let gibibytes: u64 = DiskTest::DEFAULT_ZPOOL_SIZE_GIB as u64 / 2 + 1; + let gibibytes: u64 = u64::from(DiskTest::DEFAULT_ZPOOL_SIZE_GIB) / 2 + 1; let disk_size = ByteCount::try_from(gibibytes * 1024 * 1024 * 1024).unwrap(); let base_disk_name: Name = "base-disk".parse().unwrap(); diff --git a/nexus/types/src/external_api/params.rs b/nexus/types/src/external_api/params.rs index 3829484a27..101d1eaf33 100644 --- a/nexus/types/src/external_api/params.rs +++ b/nexus/types/src/external_api/params.rs @@ -1289,7 +1289,7 @@ impl Into for BlockSize { impl From for u64 { fn from(bs: BlockSize) -> u64 { - bs.0 as u64 + u64::from(bs.0) } } diff --git a/oximeter/db/src/model.rs b/oximeter/db/src/model.rs index 414ad25ba7..ef2a7ad422 100644 --- a/oximeter/db/src/model.rs +++ b/oximeter/db/src/model.rs @@ -64,7 +64,7 @@ impl From for DbBool { impl From for DbBool { fn from(b: bool) -> Self { - DbBool { inner: b as _ } + DbBool { inner: u8::from(b) } } } diff --git a/oximeter/db/src/oxql/ast/grammar.rs b/oximeter/db/src/oxql/ast/grammar.rs index c9e646e58d..a644dff41d 100644 --- a/oximeter/db/src/oxql/ast/grammar.rs +++ b/oximeter/db/src/oxql/ast/grammar.rs @@ -708,7 +708,7 @@ mod tests { } assert!(query_parser::duration_literal_impl("-1m").is_err()); - let too_big: i64 = u32::MAX as i64 + 1; + let too_big: i64 = i64::from(u32::MAX) + 1; assert!(query_parser::duration_literal_impl(&format!("{too_big}s")) .is_err()); } diff --git a/sled-agent/src/sim/storage.rs b/sled-agent/src/sim/storage.rs index 56cf771b2a..b21edf0915 100644 --- a/sled-agent/src/sim/storage.rs +++ b/sled-agent/src/sim/storage.rs @@ -882,7 +882,9 @@ impl Pantry { .. } => ( block_size, - block_size * blocks_per_extent * (extent_count as u64), + block_size + * blocks_per_extent + * u64::from(extent_count), ), _ => { diff --git a/sled-agent/src/vmm_reservoir.rs b/sled-agent/src/vmm_reservoir.rs index caa1d88254..0fa7bc14af 100644 --- a/sled-agent/src/vmm_reservoir.rs +++ b/sled-agent/src/vmm_reservoir.rs @@ -240,7 +240,8 @@ impl VmmReservoirManager { percent ))); }; - (hardware_physical_ram_bytes as f64 * (percent as f64 / 100.0)) + (hardware_physical_ram_bytes as f64 + * (f64::from(percent) / 100.0)) .floor() as u64 } }; diff --git a/sled-hardware/types/src/underlay.rs b/sled-hardware/types/src/underlay.rs index bbeb43bd4d..ca380c08c2 100644 --- a/sled-hardware/types/src/underlay.rs +++ b/sled-hardware/types/src/underlay.rs @@ -45,9 +45,9 @@ fn mac_to_bootstrap_ip(mac: MacAddr, interface_id: u64) -> Ipv6Addr { Ipv6Addr::new( BOOTSTRAP_PREFIX, - ((mac_bytes[0] as u16) << 8) | mac_bytes[1] as u16, - ((mac_bytes[2] as u16) << 8) | mac_bytes[3] as u16, - ((mac_bytes[4] as u16) << 8) | mac_bytes[5] as u16, + (u16::from(mac_bytes[0]) << 8) | u16::from(mac_bytes[1]), + (u16::from(mac_bytes[2]) << 8) | u16::from(mac_bytes[3]), + (u16::from(mac_bytes[4]) << 8) | u16::from(mac_bytes[5]), (interface_id >> 48 & 0xffff).try_into().unwrap(), (interface_id >> 32 & 0xffff).try_into().unwrap(), (interface_id >> 16 & 0xffff).try_into().unwrap(), diff --git a/sp-sim/src/update.rs b/sp-sim/src/update.rs index 0efa730a26..2530f4ccba 100644 --- a/sp-sim/src/update.rs +++ b/sp-sim/src/update.rs @@ -89,7 +89,7 @@ impl SimSpUpdate { if chunk.id != *id || chunk.component != *component { return Err(SpError::InvalidUpdateId { sp_update_id: *id }); }; - if data.position() != chunk.offset as u64 { + if data.position() != u64::from(chunk.offset) { return Err(SpError::UpdateInProgress( self.state.to_message(), )); diff --git a/wicket/src/cli/rack_setup/config_toml.rs b/wicket/src/cli/rack_setup/config_toml.rs index f898a8ece4..e0a519dc36 100644 --- a/wicket/src/cli/rack_setup/config_toml.rs +++ b/wicket/src/cli/rack_setup/config_toml.rs @@ -273,7 +273,7 @@ fn populate_network_table( ); peer.insert( "asn", - Value::Integer(Formatted::new(p.asn as i64)), + Value::Integer(Formatted::new(i64::from(p.asn))), ); peer.insert( "port", @@ -326,9 +326,9 @@ fn populate_network_table( let mut bgp = Table::new(); bgp.insert( "asn", - Item::Value(Value::Integer(Formatted::new( - cfg.asn as i64, - ))), + Item::Value(Value::Integer(Formatted::new(i64::from( + cfg.asn, + )))), ); let mut originate = Array::new(); diff --git a/wicket/src/ui/panes/update.rs b/wicket/src/ui/panes/update.rs index c009d597c8..664c647eac 100644 --- a/wicket/src/ui/panes/update.rs +++ b/wicket/src/ui/panes/update.rs @@ -2014,8 +2014,9 @@ impl ComponentUpdateListState { progress_event.kind.progress_counter() { if let Some(total) = counter.total { - let percentage = - (counter.current as u128 * 100) / total as u128; + let percentage = (u128::from(counter.current) + * 100) + / u128::from(total); item_spans.push(Span::styled( format!("[{:>2}%] ", percentage), style::selected(), diff --git a/wicket/src/ui/widgets/popup.rs b/wicket/src/ui/widgets/popup.rs index fb8c0f1f24..8e666ab003 100644 --- a/wicket/src/ui/widgets/popup.rs +++ b/wicket/src/ui/widgets/popup.rs @@ -195,7 +195,7 @@ impl PopupScrollability for Scrollable { /// /// This is currently 80% of screen width. pub fn popup_max_width(full_screen_width: u16) -> u16 { - (full_screen_width as u32 * 4 / 5) as u16 + (u32::from(full_screen_width) * 4 / 5) as u16 } /// Returns the maximum width that this popup can have, not including outer @@ -210,7 +210,7 @@ pub fn popup_max_content_width(full_screen_width: u16) -> u16 { /// /// This is currently 80% of screen height. pub fn popup_max_height(full_screen_height: u16) -> u16 { - (full_screen_height as u32 * 4 / 5) as u16 + (u32::from(full_screen_height) * 4 / 5) as u16 } /// Returns the wrap options that should be used in most cases for popups. diff --git a/wicket/src/ui/widgets/rack.rs b/wicket/src/ui/widgets/rack.rs index 0ffeab6439..7aa0c7d652 100644 --- a/wicket/src/ui/widgets/rack.rs +++ b/wicket/src/ui/widgets/rack.rs @@ -318,7 +318,9 @@ fn resize(rect: Rect) -> ComponentRects { for i in [17, 18] { let shelf_rect = Rect { x: rack_rect.x, - y: rack_rect.y + sled_height * 8 + other_height * (i as u16 - 16), + y: rack_rect.y + + sled_height * 8 + + other_height * (u16::from(i) - 16), width: sled_width * 2, height: other_height, }; @@ -379,9 +381,9 @@ fn size_sled( rack.x + sled_width }; let y = if index < 16 { - rack.y + sled_height * (index as u16 / 2) + rack.y + sled_height * (u16::from(index) / 2) } else { - rack.y + sled_height * (index as u16 / 2) + other_height * 4 + rack.y + sled_height * (u16::from(index) / 2) + other_height * 4 }; let height = if (index == 30 || index == 31) && sled_height == 2 { // We saved space for a bottom border diff --git a/wicket/src/wicketd.rs b/wicket/src/wicketd.rs index a951bf428b..c0ee3d9b14 100644 --- a/wicket/src/wicketd.rs +++ b/wicket/src/wicketd.rs @@ -26,13 +26,13 @@ impl From for SpIdentifier { fn from(id: ComponentId) -> Self { match id { ComponentId::Sled(i) => { - SpIdentifier { type_: SpType::Sled, slot: i as u32 } + SpIdentifier { type_: SpType::Sled, slot: u32::from(i) } } ComponentId::Psc(i) => { - SpIdentifier { type_: SpType::Power, slot: i as u32 } + SpIdentifier { type_: SpType::Power, slot: u32::from(i) } } ComponentId::Switch(i) => { - SpIdentifier { type_: SpType::Switch, slot: i as u32 } + SpIdentifier { type_: SpType::Switch, slot: u32::from(i) } } } } diff --git a/wicketd/src/update_tracker.rs b/wicketd/src/update_tracker.rs index eec3ee5868..42853a4076 100644 --- a/wicketd/src/update_tracker.rs +++ b/wicketd/src/update_tracker.rs @@ -2159,8 +2159,8 @@ impl UpdateContext { if let Some(progress) = progress { cx.send_progress( StepProgress::with_current_and_total( - progress.current as u64, - progress.total as u64, + u64::from(progress.current), + u64::from(progress.total), // The actual units here depend on the // component being updated and are a bit // hard to explain succinctly: @@ -2194,8 +2194,8 @@ impl UpdateContext { ComponentUpdateStage::InProgress => { cx.send_progress( StepProgress::with_current_and_total( - bytes_received as u64, - total_bytes as u64, + u64::from(bytes_received), + u64::from(total_bytes), ProgressUnits::BYTES, Default::default(), ), From baf1466214afaf77a8b9f9a669dbe18fa587ade7 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Wed, 17 Apr 2024 02:01:30 +0000 Subject: [PATCH 156/334] fix(deps): update russh monorepo to 0.43.0 (#5448) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR contains the following updates: | Package | Type | Update | Change | |---|---|---|---| | [russh](https://togithub.com/warp-tech/russh) | dependencies | minor | `0.42.0` -> `0.43.0` | | [russh-keys](https://togithub.com/warp-tech/russh) | dependencies | minor | `0.42.0` -> `0.43.0` | --- ### Release Notes
warp-tech/russh (russh) ### [`v0.43.0`](https://togithub.com/warp-tech/russh/releases/tag/v0.43.0) [Compare Source](https://togithub.com/warp-tech/russh/compare/v0.42.0...v0.43.0) ##### Breaking changes ##### Changes in the `Handler` traits > [`859e685`](https://togithub.com/warp-tech/russh/commit/859e685): refactor `Handler` trait to use mutable reference instead of owned variables (Alessandro Ricottone) [#​247](https://togithub.com/warp-tech/russh/pull/247) The `Handler` traits no longer take ownership of both `self` and `Session` or have to return them. These have been replaced with normal `&mut` references. You will need to update your `Handler` impls to match the new method signatures, for example: ```diff async fn channel_open_session( - self, + &mut self, channel: Channel, - session: Session, + session: &mut Session, - ) -> Result<(Self, bool, Session), Self::Error> { + ) -> Result { ... - Ok((self, true, session)) + Ok(true) } async fn auth_publickey( - self, + &mut self, _: &str, _: &key::PublicKey, - ) -> Result<(Self, server::Auth), Self::Error> { + ) -> Result { ... - Ok((self, server::Auth::Accept)) + Ok(server::Auth::Accept) } ``` ##### `russh::server::run` moved into the `Server` trait > [`a592366`](https://togithub.com/warp-tech/russh/commit/a592366): Move run and run_on_socket to Server trait (Alessandro Ricottone) [#​247](https://togithub.com/warp-tech/russh/pull/247) You'll need to replace the call to `run` with a call to `Server::run_on_address`, for example: ```diff - russh::server::run(config, ("0.0.0.0", 2222), &mut server).await?; + server.run_on_address(config, ("0.0.0.0", 2222)).await?; } ``` ##### Changes - [`1d7dab8`](https://togithub.com/warp-tech/russh/commit/1d7dab8): Better disconnect event handling (Adrian Müller) [#​255](https://togithub.com/warp-tech/russh/pull/255) - added [Handler::disconnected](https://docs.rs/russh/latest/russh/client/trait.Handler.html#method.disconnected) - [`45edb29`](https://togithub.com/warp-tech/russh/commit/45edb29): added specific error types for keepalive and inactivity timeouts - [`0fcb1ec`](https://togithub.com/warp-tech/russh/commit/0fcb1ec): Allow retrieving peer SSH Protocol Version String ([#​260](https://togithub.com/warp-tech/russh/issues/260)) (Adrian Müller (DTT)) [#​260](https://togithub.com/warp-tech/russh/pull/260) - [`5c60d30`](https://togithub.com/warp-tech/russh/commit/5c60d30): Actually process global request results (Adrian Müller) [#​250](https://togithub.com/warp-tech/russh/pull/250) - [`dcbe4ba`](https://togithub.com/warp-tech/russh/commit/dcbe4ba): update examples to new APIs (Alessandro Ricottone) [#​249](https://togithub.com/warp-tech/russh/pull/249) ##### Fixes - [`62366e9`](https://togithub.com/warp-tech/russh/commit/62366e9): [#​259](https://togithub.com/warp-tech/russh/issues/259), [#​245](https://togithub.com/warp-tech/russh/issues/245), ref [#​227](https://togithub.com/warp-tech/russh/issues/227) - fixed host key algo selection when `Preferred::key` and the available host keys don't match ([#​262](https://togithub.com/warp-tech/russh/issues/262)) [#​262](https://togithub.com/warp-tech/russh/pull/262)
--- ### Configuration 📅 **Schedule**: Branch creation - "after 8pm,before 6am" in timezone America/Los_Angeles, Automerge - "after 8pm,before 6am" in timezone America/Los_Angeles. 🚦 **Automerge**: Disabled by config. Please merge this manually once you are satisfied. ♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. 🔕 **Ignore**: Close this PR and you won't be reminded about these updates again. --- - [ ] If you want to rebase/retry this PR, check this box --- This PR has been generated by [Renovate Bot](https://togithub.com/renovatebot/renovate). --------- Co-authored-by: oxide-renovate[bot] <146848827+oxide-renovate[bot]@users.noreply.github.com> Co-authored-by: iliana etaoin --- Cargo.lock | 8 ++++---- end-to-end-tests/Cargo.toml | 4 ++-- end-to-end-tests/src/instance_launch.rs | 7 +++---- 3 files changed, 9 insertions(+), 10 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index e9ff658f80..f02f0e2b83 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -7785,9 +7785,9 @@ dependencies = [ [[package]] name = "russh" -version = "0.42.0" +version = "0.43.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "394cc2733c5b5ca9f342d9532b78599849633ccabdbf40f1af094cacf4d86b62" +checksum = "1c9534703dc13be1eefc5708618f4c346da8e4f04f260218613f351ed5e94259" dependencies = [ "aes", "aes-gcm", @@ -7830,9 +7830,9 @@ dependencies = [ [[package]] name = "russh-keys" -version = "0.42.0" +version = "0.43.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3e98aa03d476f8d2bf6e4525291c1eb8e22f4ae9653d7a5458fd53cb0191c741" +checksum = "aa4a5afa2fab6fd49d0c470a3b75c3c70a4f363c38db32df5ae3b44a3abf5ab9" dependencies = [ "aes", "async-trait", diff --git a/end-to-end-tests/Cargo.toml b/end-to-end-tests/Cargo.toml index 0fb9efd5cc..754d85512c 100644 --- a/end-to-end-tests/Cargo.toml +++ b/end-to-end-tests/Cargo.toml @@ -16,8 +16,8 @@ omicron-test-utils.workspace = true oxide-client.workspace = true rand.workspace = true reqwest = { workspace = true, features = ["cookies"] } -russh = "0.42.0" -russh-keys = "0.42.0" +russh = "0.43.0" +russh-keys = "0.43.0" serde.workspace = true serde_json.workspace = true tokio = { workspace = true, features = ["macros", "rt-multi-thread"] } diff --git a/end-to-end-tests/src/instance_launch.rs b/end-to-end-tests/src/instance_launch.rs index 377fef4c0b..be30f89492 100644 --- a/end-to-end-tests/src/instance_launch.rs +++ b/end-to-end-tests/src/instance_launch.rs @@ -303,10 +303,9 @@ impl russh::client::Handler for SshClient { type Error = anyhow::Error; async fn check_server_key( - self, + &mut self, server_public_key: &PublicKey, - ) -> Result<(Self, bool), Self::Error> { - let b = &self.host_key == server_public_key; - Ok((self, b)) + ) -> Result { + Ok(&self.host_key == server_public_key) } } From b596c9b5e36bfee07279d02bcb111099178ecb91 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Wed, 17 Apr 2024 04:34:28 +0000 Subject: [PATCH 157/334] chore(deps): update taiki-e/install-action digest to 37b71c3 (#5547) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR contains the following updates: | Package | Type | Update | Change | |---|---|---|---| | [taiki-e/install-action](https://togithub.com/taiki-e/install-action) | action | digest | [`5c256d5` -> `37b71c3`](https://togithub.com/taiki-e/install-action/compare/5c256d5...37b71c3) | --- ### Configuration 📅 **Schedule**: Branch creation - "after 8pm,before 6am" in timezone America/Los_Angeles, Automerge - "after 8pm,before 6am" in timezone America/Los_Angeles. 🚦 **Automerge**: Enabled. ♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. 🔕 **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box --- This PR has been generated by [Renovate Bot](https://togithub.com/renovatebot/renovate). Co-authored-by: oxide-renovate[bot] <146848827+oxide-renovate[bot]@users.noreply.github.com> --- .github/workflows/hakari.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/hakari.yml b/.github/workflows/hakari.yml index 05e25dc9dc..fde2e9139b 100644 --- a/.github/workflows/hakari.yml +++ b/.github/workflows/hakari.yml @@ -24,7 +24,7 @@ jobs: with: toolchain: stable - name: Install cargo-hakari - uses: taiki-e/install-action@5c256d5a578917d032b8adcd9802cfa432265631 # v2 + uses: taiki-e/install-action@37b71c39b208369698511b6530dcb4b7d141be64 # v2 with: tool: cargo-hakari - name: Check workspace-hack Cargo.toml is up-to-date From 64fcc162c89add645c3c33f9410dcd195e5ae00e Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Wed, 17 Apr 2024 14:12:17 -0400 Subject: [PATCH 158/334] Separate `BlueprintZoneConfig` and `OmicronZoneConfig` (#5523) This PR should make no changes to behavior and is solely a beefy refactor (although the bulk of the diff volume is just in the openapi spec; the actual changes are closer to +1000,-500). The motivating case here is that `OmicronZoneConfig` (and particularly `OmicronZoneType`) does not have all the information Reconfigurator needs to clean up expunged zones; in particular, we need the CRDB ID of external IP addresses to delete them, and `OmicronZoneType` only stores the raw IP address. In this case we could clean this up by querying by IP to get the ID and then delete the ID, but the plan has always been that these types would diverge over time. We already passed that line when we added `disposition`, but we didn't really separate the types at that point. This ended up a little messier than I expected, because there are several places that want to construct a `BlueprintZoneConfig` from an `OmicronZoneConfig`+disposition. The "+ disposition" will grow to "+ disposition + external IP ID" soon, which may make this conversion unwieldy enough to consider doing something else in all those places? I'm not sure about this and plan to cross that bridge over the next day or two. --- Cargo.lock | 2 + dev-tools/omdb/src/bin/omdb/db.rs | 33 +- dev-tools/reconfigurator-cli/src/main.rs | 25 +- nexus/db-model/src/deployment.rs | 21 +- nexus/db-model/src/inventory.rs | 14 +- nexus/db-model/src/omicron_zone_config.rs | 68 +- .../db-queries/src/db/datastore/deployment.rs | 4 +- nexus/db-queries/src/db/datastore/rack.rs | 121 +- nexus/db-queries/src/db/datastore/vpc.rs | 60 +- .../reconfigurator/execution/src/datasets.rs | 76 +- nexus/reconfigurator/execution/src/dns.rs | 171 +- .../execution/src/omicron_zones.rs | 42 +- .../execution/src/resource_allocation.rs | 274 +- .../planning/src/blueprint_builder.rs | 170 +- nexus/reconfigurator/planning/src/example.rs | 9 +- nexus/reconfigurator/planning/src/planner.rs | 47 +- .../src/app/background/blueprint_execution.rs | 26 +- nexus/src/app/rack.rs | 6 +- nexus/src/lib.rs | 21 +- nexus/test-utils/src/lib.rs | 11 +- nexus/types/Cargo.toml | 2 + nexus/types/src/deployment.rs | 386 ++- nexus/types/src/deployment/zone_type.rs | 334 ++ openapi/nexus-internal.json | 2693 ++++++++--------- sled-agent/src/rack_setup/service.rs | 71 +- sled-agent/src/sim/server.rs | 3 +- 26 files changed, 2645 insertions(+), 2045 deletions(-) create mode 100644 nexus/types/src/deployment/zone_type.rs diff --git a/Cargo.lock b/Cargo.lock index f02f0e2b83..fc532cd19c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4911,6 +4911,8 @@ dependencies = [ "serde_json", "serde_with", "sled-agent-client", + "slog", + "slog-error-chain", "steno", "strum", "tabled", diff --git a/dev-tools/omdb/src/bin/omdb/db.rs b/dev-tools/omdb/src/bin/omdb/db.rs index ba2c8aea09..e1fd4387c6 100644 --- a/dev-tools/omdb/src/bin/omdb/db.rs +++ b/dev-tools/omdb/src/bin/omdb/db.rs @@ -80,7 +80,7 @@ use nexus_db_queries::db::DataStore; use nexus_types::deployment::Blueprint; use nexus_types::deployment::BlueprintZoneDisposition; use nexus_types::deployment::BlueprintZoneFilter; -use nexus_types::deployment::OmicronZoneType; +use nexus_types::deployment::BlueprintZoneType; use nexus_types::identity::Resource; use nexus_types::internal_api::params::DnsRecord; use nexus_types::internal_api::params::Srv; @@ -92,6 +92,7 @@ use omicron_common::api::external::Generation; use omicron_common::api::external::InstanceState; use omicron_common::api::external::MacAddr; use omicron_uuid_kinds::CollectionUuid; +use omicron_uuid_kinds::GenericUuid; use sled_agent_client::types::VolumeConstructionRequest; use std::borrow::Cow; use std::cmp::Ordering; @@ -667,9 +668,9 @@ async fn lookup_service_info( blueprint: &Blueprint, ) -> anyhow::Result> { let Some(zone_config) = blueprint - .all_blueprint_zones(BlueprintZoneFilter::All) + .all_omicron_zones(BlueprintZoneFilter::All) .find_map(|(_sled_id, zone_config)| { - if zone_config.config.id == service_id { + if zone_config.id.into_untyped_uuid() == service_id { Some(zone_config) } else { None @@ -679,20 +680,18 @@ async fn lookup_service_info( return Ok(None); }; - let service_kind = match &zone_config.config.zone_type { - OmicronZoneType::BoundaryNtp { .. } - | OmicronZoneType::InternalNtp { .. } => ServiceKind::Ntp, - OmicronZoneType::Clickhouse { .. } => ServiceKind::Clickhouse, - OmicronZoneType::ClickhouseKeeper { .. } => { - ServiceKind::ClickhouseKeeper - } - OmicronZoneType::CockroachDb { .. } => ServiceKind::Cockroach, - OmicronZoneType::Crucible { .. } => ServiceKind::Crucible, - OmicronZoneType::CruciblePantry { .. } => ServiceKind::CruciblePantry, - OmicronZoneType::ExternalDns { .. } => ServiceKind::ExternalDns, - OmicronZoneType::InternalDns { .. } => ServiceKind::InternalDns, - OmicronZoneType::Nexus { .. } => ServiceKind::Nexus, - OmicronZoneType::Oximeter { .. } => ServiceKind::Oximeter, + let service_kind = match &zone_config.zone_type { + BlueprintZoneType::BoundaryNtp(_) + | BlueprintZoneType::InternalNtp(_) => ServiceKind::Ntp, + BlueprintZoneType::Clickhouse(_) => ServiceKind::Clickhouse, + BlueprintZoneType::ClickhouseKeeper(_) => ServiceKind::ClickhouseKeeper, + BlueprintZoneType::CockroachDb(_) => ServiceKind::Cockroach, + BlueprintZoneType::Crucible(_) => ServiceKind::Crucible, + BlueprintZoneType::CruciblePantry(_) => ServiceKind::CruciblePantry, + BlueprintZoneType::ExternalDns(_) => ServiceKind::ExternalDns, + BlueprintZoneType::InternalDns(_) => ServiceKind::InternalDns, + BlueprintZoneType::Nexus(_) => ServiceKind::Nexus, + BlueprintZoneType::Oximeter(_) => ServiceKind::Oximeter, }; Ok(Some(ServiceInfo { service_kind, disposition: zone_config.disposition })) diff --git a/dev-tools/reconfigurator-cli/src/main.rs b/dev-tools/reconfigurator-cli/src/main.rs index 94ff6d77bd..abf8cf4441 100644 --- a/dev-tools/reconfigurator-cli/src/main.rs +++ b/dev-tools/reconfigurator-cli/src/main.rs @@ -34,8 +34,6 @@ use omicron_common::api::external::Generation; use omicron_common::api::external::Name; use omicron_uuid_kinds::CollectionUuid; use omicron_uuid_kinds::ExternalIpUuid; -use omicron_uuid_kinds::GenericUuid; -use omicron_uuid_kinds::OmicronZoneUuid; use omicron_uuid_kinds::SledUuid; use reedline::{Reedline, Signal}; use std::cell::RefCell; @@ -153,8 +151,7 @@ impl ReconfiguratorSim { for (_, zone) in parent_blueprint.all_omicron_zones(BlueprintZoneFilter::All) { - let zone_id = OmicronZoneUuid::from_untyped_uuid(zone.id); - if let Ok(Some(ip)) = zone.zone_type.external_ip() { + if let Some(ip) = zone.zone_type.external_ip() { let external_ip = OmicronZoneExternalIp { id: *self .external_ips @@ -164,10 +161,10 @@ impl ReconfiguratorSim { ip, }; builder - .add_omicron_zone_external_ip(zone_id, external_ip) + .add_omicron_zone_external_ip(zone.id, external_ip) .context("adding omicron zone external IP")?; } - if let Some(nic) = zone.zone_type.service_vnic() { + if let Some(nic) = zone.zone_type.opte_vnic() { let nic = OmicronZoneNic { id: nic.id, mac: nic.mac, @@ -176,7 +173,7 @@ impl ReconfiguratorSim { primary: nic.primary, }; builder - .add_omicron_zone_nic(zone_id, nic) + .add_omicron_zone_nic(zone.id, nic) .context("adding omicron zone NIC")?; } } @@ -851,12 +848,12 @@ fn cmd_blueprint_diff( &blueprint1, &sleds_by_id, &Default::default(), - )?; + ); let internal_dns_config2 = blueprint_internal_dns_config( &blueprint2, &sleds_by_id, &Default::default(), - )?; + ); let dns_diff = DnsDiff::new(&internal_dns_config1, &internal_dns_config2) .context("failed to assemble DNS diff")?; swriteln!(rv, "internal DNS:\n{}", dns_diff); @@ -927,19 +924,13 @@ fn cmd_blueprint_diff_dns( CliDnsGroup::Internal => { let sleds_by_id = make_sleds_by_id(sim)?; blueprint_internal_dns_config( - &blueprint, + blueprint, &sleds_by_id, &Default::default(), ) - .with_context(|| { - format!( - "computing internal DNS config for blueprint {}", - blueprint_id - ) - })? } CliDnsGroup::External => blueprint_external_dns_config( - &blueprint, + blueprint, &sim.silo_names, sim.external_dns_zone_name.clone(), ), diff --git a/nexus/db-model/src/deployment.rs b/nexus/db-model/src/deployment.rs index 1046da18f6..f6500d198b 100644 --- a/nexus/db-model/src/deployment.rs +++ b/nexus/db-model/src/deployment.rs @@ -15,6 +15,7 @@ use crate::typed_uuid::DbTypedUuid; use crate::{ impl_enum_type, ipv6, Generation, MacAddr, Name, SqlU16, SqlU32, SqlU8, }; +use anyhow::Context; use chrono::{DateTime, Utc}; use ipnetwork::IpNetwork; use nexus_types::deployment::BlueprintPhysicalDiskConfig; @@ -230,7 +231,12 @@ impl BpOmicronZone { sled_id: SledUuid, blueprint_zone: &BlueprintZoneConfig, ) -> Result { - let zone = OmicronZone::new(sled_id, &blueprint_zone.config)?; + let zone = OmicronZone::new( + sled_id, + blueprint_zone.id.into_untyped_uuid(), + blueprint_zone.underlay_address, + &blueprint_zone.zone_type.clone().into(), + )?; Ok(Self { blueprint_id, sled_id: zone.sled_id.into(), @@ -285,7 +291,11 @@ impl BpOmicronZone { }; let config = zone.into_omicron_zone_config(nic_row.map(OmicronZoneNic::from))?; - Ok(BlueprintZoneConfig { config, disposition: self.disposition.into() }) + BlueprintZoneConfig::from_omicron_zone_config( + config, + self.disposition.into(), + ) + .context("failed to convert OmicronZoneConfig") } } @@ -368,8 +378,11 @@ impl BpOmicronZoneNic { blueprint_id: Uuid, zone: &BlueprintZoneConfig, ) -> Result, anyhow::Error> { - let zone_nic = OmicronZoneNic::new(&zone.config)?; - Ok(zone_nic.map(|nic| Self { + let Some(nic) = zone.zone_type.opte_vnic() else { + return Ok(None); + }; + let nic = OmicronZoneNic::new(zone.id.into_untyped_uuid(), nic)?; + Ok(Some(Self { blueprint_id, id: nic.id, name: nic.name, diff --git a/nexus/db-model/src/inventory.rs b/nexus/db-model/src/inventory.rs index 1a993df49f..59a6b29dc8 100644 --- a/nexus/db-model/src/inventory.rs +++ b/nexus/db-model/src/inventory.rs @@ -882,7 +882,12 @@ impl InvOmicronZone { sled_id: SledUuid, zone: &nexus_types::inventory::OmicronZoneConfig, ) -> Result { - let zone = OmicronZone::new(sled_id, zone)?; + let zone = OmicronZone::new( + sled_id, + zone.id, + zone.underlay_address, + &zone.zone_type, + )?; Ok(Self { inv_collection_id: inv_collection_id.into(), sled_id: zone.sled_id.into(), @@ -972,8 +977,11 @@ impl InvOmicronZoneNic { inv_collection_id: CollectionUuid, zone: &nexus_types::inventory::OmicronZoneConfig, ) -> Result, anyhow::Error> { - let zone_nic = OmicronZoneNic::new(zone)?; - Ok(zone_nic.map(|nic| Self { + let Some(nic) = zone.zone_type.service_vnic() else { + return Ok(None); + }; + let nic = OmicronZoneNic::new(zone.id, nic)?; + Ok(Some(Self { inv_collection_id: inv_collection_id.into(), id: nic.id, name: nic.name, diff --git a/nexus/db-model/src/omicron_zone_config.rs b/nexus/db-model/src/omicron_zone_config.rs index b0fd3356fe..1310d553d2 100644 --- a/nexus/db-model/src/omicron_zone_config.rs +++ b/nexus/db-model/src/omicron_zone_config.rs @@ -11,7 +11,7 @@ //! collecting extra metadata like uptime). This module provides conversion //! helpers for the parts of those tables that are common between the two. -use std::net::SocketAddrV6; +use std::net::{Ipv6Addr, SocketAddrV6}; use crate::inventory::ZoneType; use crate::{ipv6, MacAddr, Name, SqlU16, SqlU32, SqlU8}; @@ -51,10 +51,12 @@ pub(crate) struct OmicronZone { impl OmicronZone { pub(crate) fn new( sled_id: SledUuid, - zone: &nexus_types::inventory::OmicronZoneConfig, + zone_id: Uuid, + zone_underlay_address: Ipv6Addr, + zone_type: &nexus_types::inventory::OmicronZoneType, ) -> anyhow::Result { - let id = zone.id; - let underlay_address = ipv6::Ipv6Addr::from(zone.underlay_address); + let id = zone_id; + let underlay_address = ipv6::Ipv6Addr::from(zone_underlay_address); let mut nic_id = None; let mut dns_gz_address = None; let mut dns_gz_address_index = None; @@ -69,8 +71,7 @@ impl OmicronZone { let mut second_service_ip = None; let mut second_service_port = None; - let (zone_type, primary_service_sockaddr_str, dataset) = match &zone - .zone_type + let (zone_type, primary_service_sockaddr_str, dataset) = match zone_type { OmicronZoneType::BoundaryNtp { address, @@ -402,38 +403,31 @@ pub(crate) struct OmicronZoneNic { impl OmicronZoneNic { pub(crate) fn new( - zone: &nexus_types::inventory::OmicronZoneConfig, - ) -> anyhow::Result> { - match &zone.zone_type { - OmicronZoneType::ExternalDns { nic, .. } - | OmicronZoneType::BoundaryNtp { nic, .. } - | OmicronZoneType::Nexus { nic, .. } => { - // We do not bother storing the NIC's kind and associated id - // because it should be inferrable from the other information - // that we have. Verify that here. - ensure!( - matches!( - nic.kind, - NetworkInterfaceKind::Service{ id } if id == zone.id - ), - "expected zone's NIC kind to be \"service\" and the \ - id to match the zone's id ({})", - zone.id - ); + zone_id: Uuid, + nic: &nexus_types::inventory::NetworkInterface, + ) -> anyhow::Result { + // We do not bother storing the NIC's kind and associated id + // because it should be inferrable from the other information + // that we have. Verify that here. + ensure!( + matches!( + nic.kind, + NetworkInterfaceKind::Service{ id } if id == zone_id + ), + "expected zone's NIC kind to be \"service\" and the \ + id to match the zone's id ({zone_id})", + ); - Ok(Some(Self { - id: nic.id, - name: Name::from(nic.name.clone()), - ip: IpNetwork::from(nic.ip), - mac: MacAddr::from(nic.mac), - subnet: IpNetwork::from(nic.subnet), - vni: SqlU32::from(u32::from(nic.vni)), - is_primary: nic.primary, - slot: SqlU8::from(nic.slot), - })) - } - _ => Ok(None), - } + Ok(Self { + id: nic.id, + name: Name::from(nic.name.clone()), + ip: IpNetwork::from(nic.ip), + mac: MacAddr::from(nic.mac), + subnet: IpNetwork::from(nic.subnet), + vni: SqlU32::from(u32::from(nic.vni)), + is_primary: nic.primary, + slot: SqlU8::from(nic.slot), + }) } pub(crate) fn into_network_interface_for_zone( diff --git a/nexus/db-queries/src/db/datastore/deployment.rs b/nexus/db-queries/src/db/datastore/deployment.rs index d31428e319..4d5b753c7f 100644 --- a/nexus/db-queries/src/db/datastore/deployment.rs +++ b/nexus/db-queries/src/db/datastore/deployment.rs @@ -164,7 +164,7 @@ impl DataStore { .flat_map(|zones_config| { zones_config.zones.iter().filter_map(|zone| { BpOmicronZoneNic::new(blueprint_id, zone) - .with_context(|| format!("zone {:?}", zone.config.id)) + .with_context(|| format!("zone {}", zone.id)) .map_err(|e| Error::internal_error(&format!("{:#}", e))) .transpose() }) @@ -2020,7 +2020,7 @@ mod tests { fn assert_all_zones_in_service(blueprint: &Blueprint) { let not_in_service = blueprint - .all_blueprint_zones(BlueprintZoneFilter::All) + .all_omicron_zones(BlueprintZoneFilter::All) .filter(|(_, z)| { z.disposition != BlueprintZoneDisposition::InService }) diff --git a/nexus/db-queries/src/db/datastore/rack.rs b/nexus/db-queries/src/db/datastore/rack.rs index 3dff04cc11..7beb957917 100644 --- a/nexus/db-queries/src/db/datastore/rack.rs +++ b/nexus/db-queries/src/db/datastore/rack.rs @@ -41,11 +41,12 @@ use nexus_db_model::PasswordHashString; use nexus_db_model::SiloUser; use nexus_db_model::SiloUserPasswordHash; use nexus_db_model::SledUnderlaySubnetAllocation; +use nexus_types::deployment::blueprint_zone_type; use nexus_types::deployment::Blueprint; use nexus_types::deployment::BlueprintTarget; +use nexus_types::deployment::BlueprintZoneConfig; use nexus_types::deployment::BlueprintZoneFilter; -use nexus_types::deployment::OmicronZoneConfig; -use nexus_types::deployment::OmicronZoneType; +use nexus_types::deployment::BlueprintZoneType; use nexus_types::external_api::params as external_params; use nexus_types::external_api::shared; use nexus_types::external_api::shared::IdentityType; @@ -60,6 +61,7 @@ use omicron_common::api::external::LookupType; use omicron_common::api::external::ResourceType; use omicron_common::api::external::UpdateResult; use omicron_common::bail_unless; +use omicron_uuid_kinds::GenericUuid; use slog_error_chain::InlineErrorChain; use std::sync::{Arc, OnceLock}; use uuid::Uuid; @@ -464,56 +466,58 @@ impl DataStore { conn: &async_bb8_diesel::Connection, log: &slog::Logger, service_pool: &db::model::IpPool, - zone_config: &OmicronZoneConfig, + zone_config: &BlueprintZoneConfig, ) -> Result<(), RackInitError> { // For services with external connectivity, we record their // explicit IP allocation and create a service NIC as well. let zone_type = &zone_config.zone_type; let service_ip_nic = match zone_type { - OmicronZoneType::ExternalDns { nic, .. } - | OmicronZoneType::Nexus { nic, .. } => { + BlueprintZoneType::ExternalDns( + blueprint_zone_type::ExternalDns { nic, dns_address, .. }, + ) => { + let external_ip = dns_address.ip(); let service_kind = format!("{}", zone_type.kind()); - let external_ip = match zone_type.external_ip() { - Ok(Some(ip)) => ip, - Ok(None) => { - let message = format!( - "missing external IP in blueprint for {} zone {}", - service_kind, zone_config.id - ); - return Err(RackInitError::AddingNic( - Error::internal_error(&message), - )); - } - Err(err) => { - let message = format!( - "error parsing external IP in blueprint for \ - {} zone {}: {err:#}", - service_kind, zone_config.id - ); - return Err(RackInitError::AddingNic( - Error::internal_error(&message), - )); - } - }; let db_ip = IncompleteExternalIp::for_service_explicit( Uuid::new_v4(), &db::model::Name(nic.name.clone()), &service_kind, - zone_config.id, + zone_config.id.into_untyped_uuid(), service_pool.id(), external_ip, ); - let vpc_subnet = match zone_type { - OmicronZoneType::ExternalDns { .. } => { - DNS_VPC_SUBNET.clone() - } - OmicronZoneType::Nexus { .. } => NEXUS_VPC_SUBNET.clone(), - _ => unreachable!(), - }; let db_nic = IncompleteNetworkInterface::new_service( nic.id, - zone_config.id, - vpc_subnet, + zone_config.id.into_untyped_uuid(), + DNS_VPC_SUBNET.clone(), + IdentityMetadataCreateParams { + name: nic.name.clone(), + description: format!("{service_kind} service vNIC"), + }, + nic.ip, + nic.mac, + nic.slot, + ) + .map_err(|e| RackInitError::AddingNic(e))?; + Some((db_ip, db_nic)) + } + BlueprintZoneType::Nexus(blueprint_zone_type::Nexus { + nic, + external_ip, + .. + }) => { + let service_kind = format!("{}", zone_type.kind()); + let db_ip = IncompleteExternalIp::for_service_explicit( + Uuid::new_v4(), + &db::model::Name(nic.name.clone()), + &service_kind, + zone_config.id.into_untyped_uuid(), + service_pool.id(), + *external_ip, + ); + let db_nic = IncompleteNetworkInterface::new_service( + nic.id, + zone_config.id.into_untyped_uuid(), + NEXUS_VPC_SUBNET.clone(), IdentityMetadataCreateParams { name: nic.name.clone(), description: format!("{service_kind} service vNIC"), @@ -525,17 +529,19 @@ impl DataStore { .map_err(|e| RackInitError::AddingNic(e))?; Some((db_ip, db_nic)) } - OmicronZoneType::BoundaryNtp { snat_cfg, ref nic, .. } => { + BlueprintZoneType::BoundaryNtp( + blueprint_zone_type::BoundaryNtp { snat_cfg, nic, .. }, + ) => { let db_ip = IncompleteExternalIp::for_service_explicit_snat( Uuid::new_v4(), - zone_config.id, + zone_config.id.into_untyped_uuid(), service_pool.id(), snat_cfg.ip, (snat_cfg.first_port, snat_cfg.last_port), ); let db_nic = IncompleteNetworkInterface::new_service( nic.id, - zone_config.id, + zone_config.id.into_untyped_uuid(), NTP_VPC_SUBNET.clone(), IdentityMetadataCreateParams { name: nic.name.clone(), @@ -551,14 +557,14 @@ impl DataStore { .map_err(|e| RackInitError::AddingNic(e))?; Some((db_ip, db_nic)) } - OmicronZoneType::InternalNtp { .. } - | OmicronZoneType::Clickhouse { .. } - | OmicronZoneType::ClickhouseKeeper { .. } - | OmicronZoneType::CockroachDb { .. } - | OmicronZoneType::Crucible { .. } - | OmicronZoneType::CruciblePantry { .. } - | OmicronZoneType::InternalDns { .. } - | OmicronZoneType::Oximeter { .. } => None, + BlueprintZoneType::InternalNtp(_) + | BlueprintZoneType::Clickhouse(_) + | BlueprintZoneType::ClickhouseKeeper(_) + | BlueprintZoneType::CockroachDb(_) + | BlueprintZoneType::Crucible(_) + | BlueprintZoneType::CruciblePantry(_) + | BlueprintZoneType::InternalDns(_) + | BlueprintZoneType::Oximeter(_) => None, }; let Some((db_ip, db_nic)) = service_ip_nic else { info!( @@ -964,6 +970,7 @@ mod test { use nexus_types::internal_api::params::DnsRecord; use nexus_types::inventory::NetworkInterface; use nexus_types::inventory::NetworkInterfaceKind; + use nexus_types::inventory::OmicronZoneType; use omicron_common::address::{ DNS_OPTE_IPV4_SUBNET, NEXUS_OPTE_IPV4_SUBNET, NTP_OPTE_IPV4_SUBNET, }; @@ -1755,8 +1762,13 @@ mod test { // The address allocated for the service should match the input. assert_eq!( - observed_external_ips[&observed_zones[0].id].ip.ip(), - if let OmicronZoneType::Nexus { external_ip, .. } = &blueprint + observed_external_ips[observed_zones[0].id.as_untyped_uuid()] + .ip + .ip(), + if let BlueprintZoneType::Nexus(blueprint_zone_type::Nexus { + external_ip, + .. + }) = &blueprint .all_omicron_zones(BlueprintZoneFilter::All) .next() .unwrap() @@ -1769,8 +1781,13 @@ mod test { } ); assert_eq!( - observed_external_ips[&observed_zones[1].id].ip.ip(), - if let OmicronZoneType::Nexus { external_ip, .. } = &blueprint + observed_external_ips[observed_zones[1].id.as_untyped_uuid()] + .ip + .ip(), + if let BlueprintZoneType::Nexus(blueprint_zone_type::Nexus { + external_ip, + .. + }) = &blueprint .all_omicron_zones(BlueprintZoneFilter::All) .nth(1) .unwrap() diff --git a/nexus/db-queries/src/db/datastore/vpc.rs b/nexus/db-queries/src/db/datastore/vpc.rs index 079f52ba8c..69d9993f86 100644 --- a/nexus/db-queries/src/db/datastore/vpc.rs +++ b/nexus/db-queries/src/db/datastore/vpc.rs @@ -1236,13 +1236,13 @@ mod tests { use nexus_config::NUM_INITIAL_RESERVED_IP_ADDRESSES; use nexus_db_model::SledUpdate; use nexus_test_utils::db::test_setup_database; + use nexus_types::deployment::blueprint_zone_type; use nexus_types::deployment::Blueprint; use nexus_types::deployment::BlueprintTarget; use nexus_types::deployment::BlueprintZoneConfig; use nexus_types::deployment::BlueprintZoneDisposition; + use nexus_types::deployment::BlueprintZoneType; use nexus_types::deployment::BlueprintZonesConfig; - use nexus_types::deployment::OmicronZoneConfig; - use nexus_types::deployment::OmicronZoneType; use nexus_types::external_api::params; use nexus_types::identity::Asset; use omicron_common::address::NEXUS_OPTE_IPV4_SUBNET; @@ -1255,6 +1255,7 @@ mod tests { use omicron_common::api::internal::shared::NetworkInterfaceKind; use omicron_test_utils::dev; use omicron_uuid_kinds::GenericUuid; + use omicron_uuid_kinds::OmicronZoneUuid; use omicron_uuid_kinds::SledUuid; use slog::info; use std::collections::BTreeMap; @@ -1488,7 +1489,7 @@ mod tests { #[derive(Debug)] struct HarnessNexus { sled_id: SledUuid, - id: Uuid, + id: OmicronZoneUuid, ip: IpAddr, mac: MacAddr, nic_id: Uuid, @@ -1510,7 +1511,7 @@ mod tests { .copied() .map(|sled_id| HarnessNexus { sled_id, - id: Uuid::new_v4(), + id: OmicronZoneUuid::new_v4(), ip: nexus_ips.next().unwrap(), mac: nexus_macs.next().unwrap(), nic_id: Uuid::new_v4(), @@ -1540,7 +1541,7 @@ mod tests { let name = format!("test-nexus-{}", nexus.id); db::model::IncompleteNetworkInterface::new_service( nexus.nic_id, - nexus.id, + nexus.id.into_untyped_uuid(), NEXUS_VPC_SUBNET.clone(), IdentityMetadataCreateParams { name: name.parse().unwrap(), @@ -1558,36 +1559,35 @@ mod tests { &self, ) -> impl Iterator + '_ { self.nexuses.iter().zip(self.db_nics()).map(|(nexus, nic)| { - let config = OmicronZoneConfig { + let config = BlueprintZoneConfig { + disposition: BlueprintZoneDisposition::InService, id: nexus.id, underlay_address: "::1".parse().unwrap(), - zone_type: OmicronZoneType::Nexus { - internal_address: "[::1]:0".to_string(), - external_ip: "::1".parse().unwrap(), - nic: NetworkInterface { - id: nic.identity.id, - kind: NetworkInterfaceKind::Service { - id: nexus.id, + zone_type: BlueprintZoneType::Nexus( + blueprint_zone_type::Nexus { + internal_address: "[::1]:0".parse().unwrap(), + external_ip: "::1".parse().unwrap(), + nic: NetworkInterface { + id: nic.identity.id, + kind: NetworkInterfaceKind::Service { + id: nexus.id.into_untyped_uuid(), + }, + name: format!("test-nic-{}", nic.identity.id) + .parse() + .unwrap(), + ip: nic.ip.unwrap(), + mac: nic.mac.unwrap(), + subnet: IpNet::from(*NEXUS_OPTE_IPV4_SUBNET), + vni: Vni::SERVICES_VNI, + primary: true, + slot: nic.slot.unwrap(), }, - name: format!("test-nic-{}", nic.identity.id) - .parse() - .unwrap(), - ip: nic.ip.unwrap(), - mac: nic.mac.unwrap(), - subnet: IpNet::from(*NEXUS_OPTE_IPV4_SUBNET), - vni: Vni::SERVICES_VNI, - primary: true, - slot: nic.slot.unwrap(), + external_tls: false, + external_dns_servers: Vec::new(), }, - external_tls: false, - external_dns_servers: Vec::new(), - }, - }; - let zone_config = BlueprintZoneConfig { - config, - disposition: BlueprintZoneDisposition::InService, + ), }; - (nexus.sled_id.into_untyped_uuid(), zone_config) + (nexus.sled_id.into_untyped_uuid(), config) }) } } diff --git a/nexus/reconfigurator/execution/src/datasets.rs b/nexus/reconfigurator/execution/src/datasets.rs index c22a56b1b4..9dd3da50df 100644 --- a/nexus/reconfigurator/execution/src/datasets.rs +++ b/nexus/reconfigurator/execution/src/datasets.rs @@ -10,15 +10,15 @@ use nexus_db_model::Dataset; use nexus_db_model::DatasetKind; use nexus_db_queries::context::OpContext; use nexus_db_queries::db::DataStore; -use nexus_types::deployment::OmicronZoneConfig; -use nexus_types::deployment::OmicronZoneType; +use nexus_types::deployment::blueprint_zone_type; +use nexus_types::deployment::BlueprintZoneConfig; +use nexus_types::deployment::BlueprintZoneType; use nexus_types::identity::Asset; use omicron_uuid_kinds::GenericUuid; +use omicron_uuid_kinds::OmicronZoneUuid; use slog::info; use slog::warn; -use slog_error_chain::InlineErrorChain; use std::collections::BTreeSet; -use std::net::SocketAddrV6; /// For each crucible zone in `all_omicron_zones`, ensure that a corresponding /// dataset record exists in `datastore` @@ -28,7 +28,7 @@ use std::net::SocketAddrV6; pub(crate) async fn ensure_crucible_dataset_records_exist( opctx: &OpContext, datastore: &DataStore, - all_omicron_zones: impl Iterator, + all_omicron_zones: impl Iterator, ) -> anyhow::Result { // Before attempting to insert any datasets, first query for any existing // dataset records so we can filter them out. This looks like a typical @@ -45,14 +45,17 @@ pub(crate) async fn ensure_crucible_dataset_records_exist( .await .context("failed to list all datasets")? .into_iter() - .map(|dataset| dataset.id()) + .map(|dataset| OmicronZoneUuid::from_untyped_uuid(dataset.id())) .collect::>(); let mut num_inserted = 0; let mut num_already_exist = 0; for zone in all_omicron_zones { - let OmicronZoneType::Crucible { address, dataset } = &zone.zone_type + let BlueprintZoneType::Crucible(blueprint_zone_type::Crucible { + address, + dataset, + }) = &zone.zone_type else { continue; }; @@ -67,17 +70,6 @@ pub(crate) async fn ensure_crucible_dataset_records_exist( // Map progenitor client strings into the types we need. We never // expect these to fail. - let addr: SocketAddrV6 = match address.parse() { - Ok(addr) => addr, - Err(err) => { - warn!( - opctx.log, "failed to parse crucible zone address"; - "address" => address, - "err" => InlineErrorChain::new(&err), - ); - continue; - } - }; let zpool_name: ZpoolName = match dataset.pool_name.parse() { Ok(name) => name, Err(err) => { @@ -92,9 +84,9 @@ pub(crate) async fn ensure_crucible_dataset_records_exist( let pool_id = zpool_name.id(); let dataset = Dataset::new( - id, + id.into_untyped_uuid(), pool_id.into_untyped_uuid(), - addr, + *address, DatasetKind::Crucible, ); let maybe_inserted = datastore @@ -150,9 +142,11 @@ mod tests { use nexus_db_model::SledUpdate; use nexus_db_model::Zpool; use nexus_test_utils_macros::nexus_test; + use nexus_types::deployment::BlueprintZoneDisposition; use omicron_uuid_kinds::GenericUuid; use omicron_uuid_kinds::ZpoolUuid; use sled_agent_client::types::OmicronZoneDataset; + use sled_agent_client::types::OmicronZoneType; use uuid::Uuid; type ControlPlaneTestContext = @@ -231,10 +225,23 @@ mod tests { .len(), 0 ); + + // Convert the collection zones into blueprint zones. + let all_omicron_zones = collection + .all_omicron_zones() + .map(|z| { + BlueprintZoneConfig::from_omicron_zone_config( + z.clone(), + BlueprintZoneDisposition::InService, + ) + .expect("failed to convert to blueprint zone config") + }) + .collect::>(); + let ndatasets_inserted = ensure_crucible_dataset_records_exist( opctx, datastore, - collection.all_omicron_zones(), + all_omicron_zones.iter(), ) .await .expect("failed to ensure crucible datasets"); @@ -255,7 +262,7 @@ mod tests { let ndatasets_inserted = ensure_crucible_dataset_records_exist( opctx, datastore, - collection.all_omicron_zones(), + all_omicron_zones.iter(), ) .await .expect("failed to ensure crucible datasets"); @@ -286,23 +293,26 @@ mod tests { // Call `ensure_crucible_dataset_records_exist` again, adding a new // crucible zone. It should insert only this new zone. - let new_zone = OmicronZoneConfig { - id: Uuid::new_v4(), + let new_zone = BlueprintZoneConfig { + disposition: BlueprintZoneDisposition::InService, + id: OmicronZoneUuid::new_v4(), underlay_address: "::1".parse().unwrap(), - zone_type: OmicronZoneType::Crucible { - address: "[::1]:0".to_string(), - dataset: OmicronZoneDataset { - pool_name: ZpoolName::new_external(new_zpool_id) - .to_string() - .parse() - .unwrap(), + zone_type: BlueprintZoneType::Crucible( + blueprint_zone_type::Crucible { + address: "[::1]:0".parse().unwrap(), + dataset: OmicronZoneDataset { + pool_name: ZpoolName::new_external(new_zpool_id) + .to_string() + .parse() + .unwrap(), + }, }, - }, + ), }; let ndatasets_inserted = ensure_crucible_dataset_records_exist( opctx, datastore, - collection.all_omicron_zones().chain(std::iter::once(&new_zone)), + all_omicron_zones.iter().chain(std::iter::once(&new_zone)), ) .await .expect("failed to ensure crucible datasets"); diff --git a/nexus/reconfigurator/execution/src/dns.rs b/nexus/reconfigurator/execution/src/dns.rs index 5165dcf3ea..79eb86fe09 100644 --- a/nexus/reconfigurator/execution/src/dns.rs +++ b/nexus/reconfigurator/execution/src/dns.rs @@ -6,7 +6,6 @@ use crate::overridables::Overridables; use crate::Sled; -use anyhow::Context; use dns_service_client::DnsDiff; use internal_dns::DnsConfigBuilder; use internal_dns::ServiceName; @@ -16,9 +15,10 @@ use nexus_db_queries::db::datastore::Discoverability; use nexus_db_queries::db::datastore::DnsVersionUpdateBuilder; use nexus_db_queries::db::fixed_data::silo::DEFAULT_SILO; use nexus_db_queries::db::DataStore; +use nexus_types::deployment::blueprint_zone_type; use nexus_types::deployment::Blueprint; use nexus_types::deployment::BlueprintZoneFilter; -use nexus_types::deployment::OmicronZoneType; +use nexus_types::deployment::BlueprintZoneType; use nexus_types::identity::Resource; use nexus_types::internal_api::params::DnsConfigParams; use nexus_types::internal_api::params::DnsConfigZone; @@ -28,14 +28,11 @@ use omicron_common::api::external::Generation; use omicron_common::api::external::InternalContext; use omicron_common::api::external::Name; use omicron_common::bail_unless; -use omicron_uuid_kinds::GenericUuid; -use omicron_uuid_kinds::OmicronZoneUuid; use omicron_uuid_kinds::SledUuid; use slog::{debug, info, o}; use std::collections::BTreeMap; use std::collections::HashMap; use std::net::IpAddr; -use std::net::SocketAddrV6; pub(crate) async fn deploy_dns( opctx: &OpContext, @@ -63,13 +60,7 @@ pub(crate) async fn deploy_dns( // Next, construct the DNS config represented by the blueprint. let internal_dns_zone_blueprint = - blueprint_internal_dns_config(blueprint, sleds_by_id, overrides) - .map_err(|e| { - Error::internal_error(&format!( - "error constructing internal DNS config: {:#}", - e - )) - })?; + blueprint_internal_dns_config(blueprint, sleds_by_id, overrides); let silos = datastore .silo_list_all_batched(opctx, Discoverability::All) .await @@ -259,7 +250,7 @@ pub fn blueprint_internal_dns_config( blueprint: &Blueprint, sleds_by_id: &BTreeMap, overrides: &Overridables, -) -> Result { +) -> DnsConfigZone { // The DNS names configured here should match what RSS configures for the // same zones. It's tricky to have RSS share the same code because it uses // Sled Agent's _internal_ `OmicronZoneConfig` (and friends), whereas we're @@ -268,86 +259,53 @@ pub fn blueprint_internal_dns_config( // the details. let mut dns_builder = DnsConfigBuilder::new(); - // It's annoying that we have to parse this because it really should be - // valid already. See oxidecomputer/omicron#4988. - fn parse_port(address: &str) -> Result { - address - .parse::() - .with_context(|| format!("parsing socket address {:?}", address)) - .map(|addr| addr.port()) - } - - for (_, zone) in blueprint - .all_blueprint_zones(BlueprintZoneFilter::ShouldBeInInternalDns) + for (_, zone) in + blueprint.all_omicron_zones(BlueprintZoneFilter::ShouldBeInInternalDns) { - let context = || { - format!( - "parsing {} zone with id {}", - zone.config.zone_type.kind(), - zone.config.id - ) - }; - - let (service_name, port) = match &zone.config.zone_type { - OmicronZoneType::BoundaryNtp { address, .. } => { - let port = parse_port(&address).with_context(context)?; - (ServiceName::BoundaryNtp, port) - } - OmicronZoneType::InternalNtp { address, .. } => { - let port = parse_port(&address).with_context(context)?; - (ServiceName::InternalNtp, port) - } - OmicronZoneType::Clickhouse { address, .. } => { - let port = parse_port(&address).with_context(context)?; - (ServiceName::Clickhouse, port) - } - OmicronZoneType::ClickhouseKeeper { address, .. } => { - let port = parse_port(&address).with_context(context)?; - (ServiceName::ClickhouseKeeper, port) - } - OmicronZoneType::CockroachDb { address, .. } => { - let port = parse_port(&address).with_context(context)?; - (ServiceName::Cockroach, port) - } - OmicronZoneType::Nexus { internal_address, .. } => { - let port = - parse_port(internal_address).with_context(context)?; - (ServiceName::Nexus, port) - } - OmicronZoneType::Crucible { address, .. } => { - let port = parse_port(address).with_context(context)?; - ( - ServiceName::Crucible(OmicronZoneUuid::from_untyped_uuid( - zone.config.id, - )), - port, - ) - } - OmicronZoneType::CruciblePantry { address } => { - let port = parse_port(address).with_context(context)?; - (ServiceName::CruciblePantry, port) - } - OmicronZoneType::Oximeter { address } => { - let port = parse_port(address).with_context(context)?; - (ServiceName::Oximeter, port) - } - OmicronZoneType::ExternalDns { http_address, .. } => { - let port = parse_port(http_address).with_context(context)?; - (ServiceName::ExternalDns, port) - } - OmicronZoneType::InternalDns { http_address, .. } => { - let port = parse_port(http_address).with_context(context)?; - (ServiceName::InternalDns, port) - } + let (service_name, port) = match &zone.zone_type { + BlueprintZoneType::BoundaryNtp( + blueprint_zone_type::BoundaryNtp { address, .. }, + ) => (ServiceName::BoundaryNtp, address.port()), + BlueprintZoneType::InternalNtp( + blueprint_zone_type::InternalNtp { address, .. }, + ) => (ServiceName::InternalNtp, address.port()), + BlueprintZoneType::Clickhouse( + blueprint_zone_type::Clickhouse { address, .. }, + ) => (ServiceName::Clickhouse, address.port()), + BlueprintZoneType::ClickhouseKeeper( + blueprint_zone_type::ClickhouseKeeper { address, .. }, + ) => (ServiceName::ClickhouseKeeper, address.port()), + BlueprintZoneType::CockroachDb( + blueprint_zone_type::CockroachDb { address, .. }, + ) => (ServiceName::Cockroach, address.port()), + BlueprintZoneType::Nexus(blueprint_zone_type::Nexus { + internal_address, + .. + }) => (ServiceName::Nexus, internal_address.port()), + BlueprintZoneType::Crucible(blueprint_zone_type::Crucible { + address, + .. + }) => (ServiceName::Crucible(zone.id), address.port()), + BlueprintZoneType::CruciblePantry( + blueprint_zone_type::CruciblePantry { address }, + ) => (ServiceName::CruciblePantry, address.port()), + BlueprintZoneType::Oximeter(blueprint_zone_type::Oximeter { + address, + }) => (ServiceName::Oximeter, address.port()), + BlueprintZoneType::ExternalDns( + blueprint_zone_type::ExternalDns { http_address, .. }, + ) => (ServiceName::ExternalDns, http_address.port()), + BlueprintZoneType::InternalDns( + blueprint_zone_type::InternalDns { http_address, .. }, + ) => (ServiceName::InternalDns, http_address.port()), }; // This unwrap is safe because this function only fails if we provide // the same zone id twice, which should not be possible here. dns_builder .host_zone_with_one_backend( - // TODO-cleanup use `TypedUuid` everywhere - OmicronZoneUuid::from_untyped_uuid(zone.config.id), - zone.config.underlay_address, + zone.id, + zone.underlay_address, service_name, port, ) @@ -370,7 +328,7 @@ pub fn blueprint_internal_dns_config( .unwrap(); } - Ok(dns_builder.build_zone()) + dns_builder.build_zone() } pub fn blueprint_external_dns_config( @@ -484,7 +442,10 @@ pub fn blueprint_nexus_external_ips(blueprint: &Blueprint) -> Vec { blueprint .all_omicron_zones(BlueprintZoneFilter::ShouldBeExternallyReachable) .filter_map(|(_, z)| match z.zone_type { - OmicronZoneType::Nexus { external_ip, .. } => Some(external_ip), + BlueprintZoneType::Nexus(blueprint_zone_type::Nexus { + external_ip, + .. + }) => Some(external_ip), _ => None, }) .collect() @@ -515,8 +476,6 @@ mod test { use nexus_types::deployment::BlueprintTarget; use nexus_types::deployment::BlueprintZoneConfig; use nexus_types::deployment::BlueprintZoneDisposition; - use nexus_types::deployment::OmicronZoneConfig; - use nexus_types::deployment::OmicronZoneType; use nexus_types::deployment::SledDisk; use nexus_types::deployment::SledFilter; use nexus_types::deployment::SledResources; @@ -540,6 +499,7 @@ mod test { use omicron_common::api::external::IdentityMetadataCreateParams; use omicron_common::disk::DiskIdentity; use omicron_test_utils::dev::test_setup_log; + use omicron_uuid_kinds::OmicronZoneUuid; use omicron_uuid_kinds::PhysicalDiskUuid; use omicron_uuid_kinds::ZpoolUuid; use std::collections::BTreeMap; @@ -550,7 +510,6 @@ mod test { use std::net::Ipv6Addr; use std::net::SocketAddrV6; use std::sync::Arc; - use uuid::Uuid; type ControlPlaneTestContext = nexus_test_utils::ControlPlaneTestContext; @@ -587,8 +546,7 @@ mod test { &blueprint, &BTreeMap::new(), &Default::default(), - ) - .unwrap(); + ); assert!(blueprint_dns.records.is_empty()); } @@ -650,24 +608,23 @@ mod test { // To make things slightly more interesting, let's add a zone that's // not currently in service. - let out_of_service_id = Uuid::new_v4(); + let out_of_service_id = OmicronZoneUuid::new_v4(); let out_of_service_addr = Ipv6Addr::LOCALHOST; blueprint.blueprint_zones.values_mut().next().unwrap().zones.push( BlueprintZoneConfig { - config: OmicronZoneConfig { - id: out_of_service_id, - underlay_address: out_of_service_addr, - zone_type: OmicronZoneType::Oximeter { + disposition: BlueprintZoneDisposition::Quiesced, + id: out_of_service_id, + underlay_address: out_of_service_addr, + zone_type: BlueprintZoneType::Oximeter( + blueprint_zone_type::Oximeter { address: SocketAddrV6::new( out_of_service_addr, 12345, 0, 0, - ) - .to_string(), + ), }, - }, - disposition: BlueprintZoneDisposition::Quiesced, + ), }, ); @@ -692,8 +649,7 @@ mod test { &blueprint, &sleds_by_id, &Default::default(), - ) - .unwrap(); + ); assert_eq!(blueprint_dns_zone.zone_name, DNS_ZONE); // Now, verify a few different properties about the generated DNS @@ -958,7 +914,7 @@ mod test { let nexus_zone = bp_zones_config .zones .iter_mut() - .find(|z| z.config.zone_type.is_nexus()) + .find(|z| z.zone_type.is_nexus()) .unwrap(); nexus_zone.disposition = BlueprintZoneDisposition::Quiesced; @@ -1360,10 +1316,7 @@ mod test { panic!("did not find expected AAAA record for new Nexus zone"); }; let new_zone_host = internal_dns::config::Host::for_zone( - // TODO-cleanup use `TypedUuid` everywhere - internal_dns::config::Zone::Other( - OmicronZoneUuid::from_untyped_uuid(new_zone_id), - ), + internal_dns::config::Zone::Other(new_zone_id), ); assert!(new_zone_host.fqdn().starts_with(new_name)); diff --git a/nexus/reconfigurator/execution/src/omicron_zones.rs b/nexus/reconfigurator/execution/src/omicron_zones.rs index 38b04d0e13..a54e2b7211 100644 --- a/nexus/reconfigurator/execution/src/omicron_zones.rs +++ b/nexus/reconfigurator/execution/src/omicron_zones.rs @@ -89,16 +89,18 @@ mod test { use httptest::Expectation; use nexus_db_queries::context::OpContext; use nexus_test_utils_macros::nexus_test; - use nexus_types::deployment::OmicronZonesConfig; + use nexus_types::deployment::{ + blueprint_zone_type, BlueprintZoneType, OmicronZonesConfig, + }; use nexus_types::deployment::{ Blueprint, BlueprintTarget, BlueprintZoneConfig, BlueprintZoneDisposition, BlueprintZonesConfig, }; - use nexus_types::inventory::{ - OmicronZoneConfig, OmicronZoneDataset, OmicronZoneType, - }; + use nexus_types::inventory::OmicronZoneDataset; use omicron_common::api::external::Generation; - use omicron_uuid_kinds::{GenericUuid, SledUuid}; + use omicron_uuid_kinds::GenericUuid; + use omicron_uuid_kinds::OmicronZoneUuid; + use omicron_uuid_kinds::SledUuid; use std::collections::BTreeMap; use std::net::SocketAddr; use uuid::Uuid; @@ -178,22 +180,22 @@ mod test { BlueprintZonesConfig { generation: Generation::new(), zones: vec![BlueprintZoneConfig { - config: OmicronZoneConfig { - id: Uuid::new_v4(), - underlay_address: "::1".parse().unwrap(), - zone_type: OmicronZoneType::InternalDns { + disposition: BlueprintZoneDisposition::InService, + id: OmicronZoneUuid::new_v4(), + underlay_address: "::1".parse().unwrap(), + zone_type: BlueprintZoneType::InternalDns( + blueprint_zone_type::InternalDns { dataset: OmicronZoneDataset { pool_name: format!("oxp_{}", Uuid::new_v4()) .parse() .unwrap(), }, - dns_address: "oh-hello-internal-dns".into(), + dns_address: "[::1]:0".parse().unwrap(), gz_address: "::1".parse().unwrap(), gz_address_index: 0, - http_address: "some-ipv6-address".into(), + http_address: "[::1]:0".parse().unwrap(), }, - }, - disposition: BlueprintZoneDisposition::InService, + ), }], } } @@ -284,17 +286,17 @@ mod test { disposition: BlueprintZoneDisposition, ) { zones.zones.push(BlueprintZoneConfig { - config: OmicronZoneConfig { - id: Uuid::new_v4(), - underlay_address: "::1".parse().unwrap(), - zone_type: OmicronZoneType::InternalNtp { - address: "::1".into(), + disposition, + id: OmicronZoneUuid::new_v4(), + underlay_address: "::1".parse().unwrap(), + zone_type: BlueprintZoneType::InternalNtp( + blueprint_zone_type::InternalNtp { + address: "[::1]:0".parse().unwrap(), dns_servers: vec!["::1".parse().unwrap()], domain: None, ntp_servers: vec!["some-ntp-server-addr".into()], }, - }, - disposition, + ), }); } diff --git a/nexus/reconfigurator/execution/src/resource_allocation.rs b/nexus/reconfigurator/execution/src/resource_allocation.rs index 2803482058..86eeb8af13 100644 --- a/nexus/reconfigurator/execution/src/resource_allocation.rs +++ b/nexus/reconfigurator/execution/src/resource_allocation.rs @@ -15,12 +15,15 @@ use nexus_db_queries::db::fixed_data::vpc_subnet::DNS_VPC_SUBNET; use nexus_db_queries::db::fixed_data::vpc_subnet::NEXUS_VPC_SUBNET; use nexus_db_queries::db::fixed_data::vpc_subnet::NTP_VPC_SUBNET; use nexus_db_queries::db::DataStore; -use nexus_types::deployment::OmicronZoneType; +use nexus_types::deployment::blueprint_zone_type; +use nexus_types::deployment::BlueprintZoneConfig; +use nexus_types::deployment::BlueprintZoneType; use nexus_types::deployment::SourceNatConfig; -use nexus_types::inventory::OmicronZoneConfig; use omicron_common::api::external::IdentityMetadataCreateParams; use omicron_common::api::internal::shared::NetworkInterface; use omicron_common::api::internal::shared::NetworkInterfaceKind; +use omicron_uuid_kinds::GenericUuid; +use omicron_uuid_kinds::OmicronZoneUuid; use slog::info; use slog::warn; use std::net::IpAddr; @@ -30,13 +33,17 @@ use uuid::Uuid; pub(crate) async fn ensure_zone_resources_allocated( opctx: &OpContext, datastore: &DataStore, - all_omicron_zones: impl Iterator, + all_omicron_zones: impl Iterator, ) -> anyhow::Result<()> { let allocator = ResourceAllocator { opctx, datastore }; for z in all_omicron_zones { match &z.zone_type { - OmicronZoneType::Nexus { external_ip, nic, .. } => { + BlueprintZoneType::Nexus(blueprint_zone_type::Nexus { + external_ip, + nic, + .. + }) => { allocator .ensure_nexus_external_networking_allocated( z.id, @@ -45,30 +52,34 @@ pub(crate) async fn ensure_zone_resources_allocated( ) .await?; } - OmicronZoneType::ExternalDns { dns_address, nic, .. } => { + BlueprintZoneType::ExternalDns( + blueprint_zone_type::ExternalDns { dns_address, nic, .. }, + ) => { allocator .ensure_external_dns_external_networking_allocated( z.id, - dns_address, + *dns_address, nic, ) .await?; } - OmicronZoneType::BoundaryNtp { snat_cfg, nic, .. } => { + BlueprintZoneType::BoundaryNtp( + blueprint_zone_type::BoundaryNtp { snat_cfg, nic, .. }, + ) => { allocator .ensure_boundary_ntp_external_networking_allocated( z.id, snat_cfg, nic, ) .await?; } - OmicronZoneType::InternalNtp { .. } - | OmicronZoneType::Clickhouse { .. } - | OmicronZoneType::ClickhouseKeeper { .. } - | OmicronZoneType::CockroachDb { .. } - | OmicronZoneType::Crucible { .. } - | OmicronZoneType::CruciblePantry { .. } - | OmicronZoneType::InternalDns { .. } - | OmicronZoneType::Oximeter { .. } => (), + BlueprintZoneType::InternalNtp(_) + | BlueprintZoneType::Clickhouse(_) + | BlueprintZoneType::ClickhouseKeeper(_) + | BlueprintZoneType::CockroachDb(_) + | BlueprintZoneType::Crucible(_) + | BlueprintZoneType::CruciblePantry(_) + | BlueprintZoneType::InternalDns(_) + | BlueprintZoneType::Oximeter(_) => (), } } @@ -86,7 +97,7 @@ impl<'a> ResourceAllocator<'a> { async fn is_external_ip_already_allocated( &self, zone_type: &'static str, - zone_id: Uuid, + zone_id: OmicronZoneUuid, external_ip: IpAddr, port_range: Option<(u16, u16)>, ) -> anyhow::Result { @@ -100,7 +111,7 @@ impl<'a> ResourceAllocator<'a> { let allocated_ips = self .datastore - .external_ip_list_service(self.opctx, zone_id) + .external_ip_list_service(self.opctx, zone_id.into_untyped_uuid()) .await .with_context(|| { format!( @@ -159,7 +170,7 @@ impl<'a> ResourceAllocator<'a> { async fn is_nic_already_allocated( &self, zone_type: &'static str, - zone_id: Uuid, + zone_id: OmicronZoneUuid, nic: &NetworkInterface, ) -> anyhow::Result { // See the comment in is_external_ip_already_allocated(). @@ -169,7 +180,10 @@ impl<'a> ResourceAllocator<'a> { let allocated_nics = self .datastore - .service_list_network_interfaces(self.opctx, zone_id) + .service_list_network_interfaces( + self.opctx, + zone_id.into_untyped_uuid(), + ) .await .with_context(|| { format!("failed to look up NICs for {zone_type} {zone_id}") @@ -228,7 +242,7 @@ impl<'a> ResourceAllocator<'a> { async fn ensure_external_service_ip( &self, zone_type: &'static str, - service_id: Uuid, + service_id: OmicronZoneUuid, external_ip: IpAddr, ip_name: &Name, ) -> anyhow::Result<()> { @@ -263,7 +277,7 @@ impl<'a> ResourceAllocator<'a> { ip_id, ip_name, description, - service_id, + service_id.into_untyped_uuid(), external_ip, ) .await @@ -290,7 +304,7 @@ impl<'a> ResourceAllocator<'a> { async fn ensure_external_service_snat_ip( &self, zone_type: &'static str, - service_id: Uuid, + service_id: OmicronZoneUuid, snat: &SourceNatConfig, ) -> anyhow::Result<()> { // Only attempt to allocate `external_ip` if it isn't already assigned @@ -316,7 +330,7 @@ impl<'a> ResourceAllocator<'a> { .external_ip_allocate_service_explicit_snat( self.opctx, ip_id, - service_id, + service_id.into_untyped_uuid(), snat.ip, (snat.first_port, snat.last_port), ) @@ -343,7 +357,7 @@ impl<'a> ResourceAllocator<'a> { async fn ensure_service_nic( &self, zone_type: &'static str, - service_id: Uuid, + service_id: OmicronZoneUuid, nic: &NetworkInterface, nic_subnet: &VpcSubnet, ) -> anyhow::Result<()> { @@ -369,7 +383,7 @@ impl<'a> ResourceAllocator<'a> { } let nic_arg = IncompleteNetworkInterface::new_service( nic.id, - service_id, + service_id.into_untyped_uuid(), nic_subnet.clone(), IdentityMetadataCreateParams { name: nic.name.clone(), @@ -442,7 +456,7 @@ impl<'a> ResourceAllocator<'a> { async fn ensure_nexus_external_networking_allocated( &self, - zone_id: Uuid, + zone_id: OmicronZoneUuid, external_ip: IpAddr, nic: &NetworkInterface, ) -> anyhow::Result<()> { @@ -460,14 +474,10 @@ impl<'a> ResourceAllocator<'a> { async fn ensure_external_dns_external_networking_allocated( &self, - zone_id: Uuid, - dns_address: &str, + zone_id: OmicronZoneUuid, + dns_address: SocketAddr, nic: &NetworkInterface, ) -> anyhow::Result<()> { - let dns_address = - dns_address.parse::().with_context(|| { - format!("failed to parse ExternalDns address {dns_address}") - })?; self.ensure_external_service_ip( "external_dns", zone_id, @@ -482,7 +492,7 @@ impl<'a> ResourceAllocator<'a> { async fn ensure_boundary_ntp_external_networking_allocated( &self, - zone_id: Uuid, + zone_id: OmicronZoneUuid, snat: &SourceNatConfig, nic: &NetworkInterface, ) -> anyhow::Result<()> { @@ -497,7 +507,8 @@ mod tests { use super::*; use nexus_config::NUM_INITIAL_RESERVED_IP_ADDRESSES; use nexus_test_utils_macros::nexus_test; - use nexus_types::deployment::OmicronZoneConfig; + use nexus_types::deployment::BlueprintZoneConfig; + use nexus_types::deployment::BlueprintZoneDisposition; use nexus_types::deployment::OmicronZoneDataset; use nexus_types::identity::Resource; use omicron_common::address::IpRange; @@ -510,7 +521,6 @@ mod tests { use omicron_common::api::external::Vni; use std::net::IpAddr; use std::net::Ipv6Addr; - use std::net::SocketAddrV6; type ControlPlaneTestContext = nexus_test_utils::ControlPlaneTestContext; @@ -549,12 +559,14 @@ mod tests { // that we don't care about will be filled in below arbitrarily.) // Nexus: - let nexus_id = Uuid::new_v4(); + let nexus_id = OmicronZoneUuid::new_v4(); let nexus_external_ip = external_ips.next().expect("exhausted external_ips"); let nexus_nic = NetworkInterface { id: Uuid::new_v4(), - kind: NetworkInterfaceKind::Service { id: nexus_id }, + kind: NetworkInterfaceKind::Service { + id: nexus_id.into_untyped_uuid(), + }, name: "test-nexus".parse().expect("bad name"), ip: NEXUS_OPTE_IPV4_SUBNET .iter() @@ -569,12 +581,14 @@ mod tests { }; // External DNS: - let dns_id = Uuid::new_v4(); + let dns_id = OmicronZoneUuid::new_v4(); let dns_external_ip = external_ips.next().expect("exhausted external_ips"); let dns_nic = NetworkInterface { id: Uuid::new_v4(), - kind: NetworkInterfaceKind::Service { id: dns_id }, + kind: NetworkInterfaceKind::Service { + id: dns_id.into_untyped_uuid(), + }, name: "test-external-dns".parse().expect("bad name"), ip: DNS_OPTE_IPV4_SUBNET .iter() @@ -589,7 +603,7 @@ mod tests { }; // Boundary NTP: - let ntp_id = Uuid::new_v4(); + let ntp_id = OmicronZoneUuid::new_v4(); let ntp_snat = SourceNatConfig { ip: external_ips.next().expect("exhausted external_ips"), first_port: NUM_SOURCE_NAT_PORTS, @@ -597,7 +611,9 @@ mod tests { }; let ntp_nic = NetworkInterface { id: Uuid::new_v4(), - kind: NetworkInterfaceKind::Service { id: ntp_id }, + kind: NetworkInterfaceKind::Service { + id: ntp_id.into_untyped_uuid(), + }, name: "test-external-ntp".parse().expect("bad name"), ip: NTP_OPTE_IPV4_SUBNET .iter() @@ -614,49 +630,51 @@ mod tests { // Build the `zones` map needed by `ensure_zone_resources_allocated`, // with an arbitrary sled_id. let zones = vec![ - OmicronZoneConfig { + BlueprintZoneConfig { + disposition: BlueprintZoneDisposition::InService, id: nexus_id, underlay_address: Ipv6Addr::LOCALHOST, - zone_type: OmicronZoneType::Nexus { - internal_address: Ipv6Addr::LOCALHOST.to_string(), - external_ip: nexus_external_ip, - nic: nexus_nic.clone(), - external_tls: false, - external_dns_servers: Vec::new(), - }, + zone_type: BlueprintZoneType::Nexus( + blueprint_zone_type::Nexus { + internal_address: "[::1]:0".parse().unwrap(), + external_ip: nexus_external_ip, + nic: nexus_nic.clone(), + external_tls: false, + external_dns_servers: Vec::new(), + }, + ), }, - OmicronZoneConfig { + BlueprintZoneConfig { + disposition: BlueprintZoneDisposition::InService, id: dns_id, underlay_address: Ipv6Addr::LOCALHOST, - zone_type: OmicronZoneType::ExternalDns { - dataset: OmicronZoneDataset { - pool_name: format!("oxp_{}", Uuid::new_v4()) - .parse() - .expect("bad name"), + zone_type: BlueprintZoneType::ExternalDns( + blueprint_zone_type::ExternalDns { + dataset: OmicronZoneDataset { + pool_name: format!("oxp_{}", Uuid::new_v4()) + .parse() + .expect("bad name"), + }, + http_address: "[::1]:0".parse().unwrap(), + dns_address: SocketAddr::new(dns_external_ip, 0), + nic: dns_nic.clone(), }, - http_address: SocketAddrV6::new( - Ipv6Addr::LOCALHOST, - 0, - 0, - 0, - ) - .to_string(), - dns_address: SocketAddr::new(dns_external_ip, 0) - .to_string(), - nic: dns_nic.clone(), - }, + ), }, - OmicronZoneConfig { + BlueprintZoneConfig { + disposition: BlueprintZoneDisposition::InService, id: ntp_id, underlay_address: Ipv6Addr::LOCALHOST, - zone_type: OmicronZoneType::BoundaryNtp { - address: SocketAddr::new(dns_external_ip, 0).to_string(), - ntp_servers: Vec::new(), - dns_servers: Vec::new(), - domain: None, - nic: ntp_nic.clone(), - snat_cfg: ntp_snat, - }, + zone_type: BlueprintZoneType::BoundaryNtp( + blueprint_zone_type::BoundaryNtp { + address: "[::1]:0".parse().unwrap(), + ntp_servers: Vec::new(), + dns_servers: Vec::new(), + domain: None, + nic: ntp_nic.clone(), + snat_cfg: ntp_snat, + }, + ), }, ]; @@ -669,46 +687,52 @@ mod tests { // Check that the external IP records were created. let db_nexus_ips = datastore - .external_ip_list_service(&opctx, nexus_id) + .external_ip_list_service(&opctx, nexus_id.into_untyped_uuid()) .await .expect("failed to get external IPs"); assert_eq!(db_nexus_ips.len(), 1); assert!(db_nexus_ips[0].is_service); - assert_eq!(db_nexus_ips[0].parent_id, Some(nexus_id)); + assert_eq!( + db_nexus_ips[0].parent_id, + Some(nexus_id.into_untyped_uuid()) + ); assert_eq!(db_nexus_ips[0].ip, nexus_external_ip.into()); assert_eq!(db_nexus_ips[0].first_port, SqlU16(0)); assert_eq!(db_nexus_ips[0].last_port, SqlU16(65535)); let db_dns_ips = datastore - .external_ip_list_service(&opctx, dns_id) + .external_ip_list_service(&opctx, dns_id.into_untyped_uuid()) .await .expect("failed to get external IPs"); assert_eq!(db_dns_ips.len(), 1); assert!(db_dns_ips[0].is_service); - assert_eq!(db_dns_ips[0].parent_id, Some(dns_id)); + assert_eq!(db_dns_ips[0].parent_id, Some(dns_id.into_untyped_uuid())); assert_eq!(db_dns_ips[0].ip, dns_external_ip.into()); assert_eq!(db_dns_ips[0].first_port, SqlU16(0)); assert_eq!(db_dns_ips[0].last_port, SqlU16(65535)); let db_ntp_ips = datastore - .external_ip_list_service(&opctx, ntp_id) + .external_ip_list_service(&opctx, ntp_id.into_untyped_uuid()) .await .expect("failed to get external IPs"); assert_eq!(db_ntp_ips.len(), 1); assert!(db_ntp_ips[0].is_service); - assert_eq!(db_ntp_ips[0].parent_id, Some(ntp_id)); + assert_eq!(db_ntp_ips[0].parent_id, Some(ntp_id.into_untyped_uuid())); assert_eq!(db_ntp_ips[0].ip, ntp_snat.ip.into()); assert_eq!(db_ntp_ips[0].first_port, SqlU16(ntp_snat.first_port)); assert_eq!(db_ntp_ips[0].last_port, SqlU16(ntp_snat.last_port)); // Check that the NIC records were created. let db_nexus_nics = datastore - .service_list_network_interfaces(&opctx, nexus_id) + .service_list_network_interfaces( + &opctx, + nexus_id.into_untyped_uuid(), + ) .await .expect("failed to get NICs"); assert_eq!(db_nexus_nics.len(), 1); assert_eq!(db_nexus_nics[0].id(), nexus_nic.id); - assert_eq!(db_nexus_nics[0].service_id, nexus_id); + assert_eq!(db_nexus_nics[0].service_id, nexus_id.into_untyped_uuid()); assert_eq!(db_nexus_nics[0].vpc_id, NEXUS_VPC_SUBNET.vpc_id); assert_eq!(db_nexus_nics[0].subnet_id, NEXUS_VPC_SUBNET.id()); assert_eq!(*db_nexus_nics[0].mac, nexus_nic.mac); @@ -717,12 +741,12 @@ mod tests { assert_eq!(db_nexus_nics[0].primary, nexus_nic.primary); let db_dns_nics = datastore - .service_list_network_interfaces(&opctx, dns_id) + .service_list_network_interfaces(&opctx, dns_id.into_untyped_uuid()) .await .expect("failed to get NICs"); assert_eq!(db_dns_nics.len(), 1); assert_eq!(db_dns_nics[0].id(), dns_nic.id); - assert_eq!(db_dns_nics[0].service_id, dns_id); + assert_eq!(db_dns_nics[0].service_id, dns_id.into_untyped_uuid()); assert_eq!(db_dns_nics[0].vpc_id, DNS_VPC_SUBNET.vpc_id); assert_eq!(db_dns_nics[0].subnet_id, DNS_VPC_SUBNET.id()); assert_eq!(*db_dns_nics[0].mac, dns_nic.mac); @@ -731,12 +755,12 @@ mod tests { assert_eq!(db_dns_nics[0].primary, dns_nic.primary); let db_ntp_nics = datastore - .service_list_network_interfaces(&opctx, ntp_id) + .service_list_network_interfaces(&opctx, ntp_id.into_untyped_uuid()) .await .expect("failed to get NICs"); assert_eq!(db_ntp_nics.len(), 1); assert_eq!(db_ntp_nics[0].id(), ntp_nic.id); - assert_eq!(db_ntp_nics[0].service_id, ntp_id); + assert_eq!(db_ntp_nics[0].service_id, ntp_id.into_untyped_uuid()); assert_eq!(db_ntp_nics[0].vpc_id, NTP_VPC_SUBNET.vpc_id); assert_eq!(db_ntp_nics[0].subnet_id, NTP_VPC_SUBNET.id()); assert_eq!(*db_ntp_nics[0].mac, ntp_nic.mac); @@ -753,42 +777,51 @@ mod tests { assert_eq!( db_nexus_ips, datastore - .external_ip_list_service(&opctx, nexus_id) + .external_ip_list_service(&opctx, nexus_id.into_untyped_uuid()) .await .expect("failed to get external IPs") ); assert_eq!( db_dns_ips, datastore - .external_ip_list_service(&opctx, dns_id) + .external_ip_list_service(&opctx, dns_id.into_untyped_uuid()) .await .expect("failed to get external IPs") ); assert_eq!( db_ntp_ips, datastore - .external_ip_list_service(&opctx, ntp_id) + .external_ip_list_service(&opctx, ntp_id.into_untyped_uuid()) .await .expect("failed to get external IPs") ); assert_eq!( db_nexus_nics, datastore - .service_list_network_interfaces(&opctx, nexus_id) + .service_list_network_interfaces( + &opctx, + nexus_id.into_untyped_uuid() + ) .await .expect("failed to get NICs") ); assert_eq!( db_dns_nics, datastore - .service_list_network_interfaces(&opctx, dns_id) + .service_list_network_interfaces( + &opctx, + dns_id.into_untyped_uuid() + ) .await .expect("failed to get NICs") ); assert_eq!( db_ntp_nics, datastore - .service_list_network_interfaces(&opctx, ntp_id) + .service_list_network_interfaces( + &opctx, + ntp_id.into_untyped_uuid() + ) .await .expect("failed to get NICs") ); @@ -799,11 +832,13 @@ mod tests { let bogus_ip = external_ips.next().expect("exhausted external_ips"); for mutate_zones_fn in [ // non-matching IP on Nexus - (&|zones: &mut [OmicronZoneConfig]| { + (&|zones: &mut [BlueprintZoneConfig]| { for zone in zones { - if let OmicronZoneType::Nexus { - ref mut external_ip, .. - } = &mut zone.zone_type + if let BlueprintZoneType::Nexus( + blueprint_zone_type::Nexus { + ref mut external_ip, .. + }, + ) = &mut zone.zone_type { *external_ip = bogus_ip; return format!( @@ -814,16 +849,18 @@ mod tests { } panic!("didn't find expected zone"); - }) as &dyn Fn(&mut [OmicronZoneConfig]) -> String, + }) as &dyn Fn(&mut [BlueprintZoneConfig]) -> String, // non-matching IP on External DNS &|zones| { for zone in zones { - if let OmicronZoneType::ExternalDns { - ref mut dns_address, - .. - } = &mut zone.zone_type + if let BlueprintZoneType::ExternalDns( + blueprint_zone_type::ExternalDns { + ref mut dns_address, + .. + }, + ) = &mut zone.zone_type { - *dns_address = SocketAddr::new(bogus_ip, 0).to_string(); + *dns_address = SocketAddr::new(bogus_ip, 0); return format!( "zone {} already has 1 non-matching IP", zone.id @@ -835,10 +872,12 @@ mod tests { // non-matching SNAT port range on Boundary NTP &|zones| { for zone in zones { - if let OmicronZoneType::BoundaryNtp { - ref mut snat_cfg, - .. - } = &mut zone.zone_type + if let BlueprintZoneType::BoundaryNtp( + blueprint_zone_type::BoundaryNtp { + ref mut snat_cfg, + .. + }, + ) = &mut zone.zone_type { snat_cfg.first_port += NUM_SOURCE_NAT_PORTS; snat_cfg.last_port += NUM_SOURCE_NAT_PORTS; @@ -881,7 +920,7 @@ mod tests { // three to ensure we get the errors we expect no matter the zone type. for mutate_nic_fn in [ // switch kind from Service to Instance - (&|_: Uuid, nic: &mut NetworkInterface| { + (&|_: OmicronZoneUuid, nic: &mut NetworkInterface| { match &nic.kind { NetworkInterfaceKind::Instance { .. } => { panic!( @@ -899,7 +938,8 @@ mod tests { } } "invalid NIC kind".to_string() - }) as &dyn Fn(Uuid, &mut NetworkInterface) -> String, + }) + as &dyn Fn(OmicronZoneUuid, &mut NetworkInterface) -> String, // non-matching IP &|zone_id, nic| { nic.ip = bogus_ip; @@ -909,8 +949,10 @@ mod tests { // Try this NIC mutation on Nexus... let mut mutated_zones = zones.clone(); for zone in &mut mutated_zones { - if let OmicronZoneType::Nexus { ref mut nic, .. } = - &mut zone.zone_type + if let BlueprintZoneType::Nexus(blueprint_zone_type::Nexus { + ref mut nic, + .. + }) = &mut zone.zone_type { let expected_error = mutate_nic_fn(zone.id, nic); @@ -934,8 +976,9 @@ mod tests { // ... and again on ExternalDns let mut mutated_zones = zones.clone(); for zone in &mut mutated_zones { - if let OmicronZoneType::ExternalDns { ref mut nic, .. } = - &mut zone.zone_type + if let BlueprintZoneType::ExternalDns( + blueprint_zone_type::ExternalDns { ref mut nic, .. }, + ) = &mut zone.zone_type { let expected_error = mutate_nic_fn(zone.id, nic); @@ -959,8 +1002,9 @@ mod tests { // ... and again on BoundaryNtp let mut mutated_zones = zones.clone(); for zone in &mut mutated_zones { - if let OmicronZoneType::BoundaryNtp { ref mut nic, .. } = - &mut zone.zone_type + if let BlueprintZoneType::BoundaryNtp( + blueprint_zone_type::BoundaryNtp { ref mut nic, .. }, + ) = &mut zone.zone_type { let expected_error = mutate_nic_fn(zone.id, nic); diff --git a/nexus/reconfigurator/planning/src/blueprint_builder.rs b/nexus/reconfigurator/planning/src/blueprint_builder.rs index 6e5b893180..f024652332 100644 --- a/nexus/reconfigurator/planning/src/blueprint_builder.rs +++ b/nexus/reconfigurator/planning/src/blueprint_builder.rs @@ -12,17 +12,18 @@ use internal_dns::config::Zone; use ipnet::IpAdd; use nexus_config::NUM_INITIAL_RESERVED_IP_ADDRESSES; use nexus_inventory::now_db_precision; +use nexus_types::deployment::blueprint_zone_type; use nexus_types::deployment::Blueprint; use nexus_types::deployment::BlueprintPhysicalDiskConfig; use nexus_types::deployment::BlueprintPhysicalDisksConfig; use nexus_types::deployment::BlueprintZoneConfig; use nexus_types::deployment::BlueprintZoneDisposition; use nexus_types::deployment::BlueprintZoneFilter; +use nexus_types::deployment::BlueprintZoneType; use nexus_types::deployment::BlueprintZonesConfig; use nexus_types::deployment::DiskFilter; -use nexus_types::deployment::OmicronZoneConfig; +use nexus_types::deployment::InvalidOmicronZoneType; use nexus_types::deployment::OmicronZoneDataset; -use nexus_types::deployment::OmicronZoneType; use nexus_types::deployment::PlanningInput; use nexus_types::deployment::SledFilter; use nexus_types::deployment::SledResources; @@ -43,7 +44,7 @@ use omicron_common::api::external::Vni; use omicron_common::api::internal::shared::NetworkInterface; use omicron_common::api::internal::shared::NetworkInterfaceKind; use omicron_uuid_kinds::GenericUuid; -use omicron_uuid_kinds::OmicronZoneUuid; +use omicron_uuid_kinds::OmicronZoneKind; use omicron_uuid_kinds::PhysicalDiskUuid; use omicron_uuid_kinds::SledUuid; use omicron_uuid_kinds::ZpoolUuid; @@ -61,6 +62,7 @@ use std::net::Ipv6Addr; use std::net::SocketAddrV6; use std::str::FromStr; use thiserror::Error; +use typed_rng::TypedUuidRng; use typed_rng::UuidRng; use uuid::Uuid; @@ -79,6 +81,8 @@ pub enum Error { ExhaustedNexusIps, #[error("programming error in planner")] Planner(#[from] anyhow::Error), + #[error("invalid OmicronZoneType in collection")] + InvalidOmicronZoneType(#[from] InvalidOmicronZoneType), } /// Describes whether an idempotent "ensure" operation resulted in action taken @@ -234,11 +238,13 @@ impl<'a> BlueprintBuilder<'a> { sled_id )) })?; + let config = + BlueprintZonesConfig::initial_from_collection(&zones)?; Ok(( // TODO-cleanup use `TypedUuid` everywhere sled_id.into_untyped_uuid(), - BlueprintZonesConfig::initial_from_collection(&zones), + config, )) }) .collect::>()?; @@ -309,8 +315,8 @@ impl<'a> BlueprintBuilder<'a> { parent_blueprint.all_omicron_zones(BlueprintZoneFilter::All) { let zone_type = &z.zone_type; - if let OmicronZoneType::Nexus { nic, .. } = zone_type { - match nic.ip { + if let BlueprintZoneType::Nexus(nexus) = zone_type { + match nexus.nic.ip { IpAddr::V4(ip) => { if !existing_nexus_v4_ips.insert(ip) { bail!("duplicate Nexus NIC IP: {ip}"); @@ -324,7 +330,7 @@ impl<'a> BlueprintBuilder<'a> { } } - if let Some(external_ip) = zone_type.external_ip()? { + if let Some(external_ip) = zone_type.external_ip() { // For the test suite, ignore localhost. It gets reused many // times and that's okay. We don't expect to see localhost // outside the test suite. @@ -334,7 +340,7 @@ impl<'a> BlueprintBuilder<'a> { bail!("duplicate external IP: {external_ip}"); } } - if let Some(nic) = zone_type.service_vnic() { + if let Some(nic) = zone_type.opte_vnic() { if !used_macs.insert(nic.mac) { bail!("duplicate service vNIC MAC: {}", nic.mac); } @@ -513,7 +519,7 @@ impl<'a> BlueprintBuilder<'a> { let has_ntp = self .zones .current_sled_zones(sled_id) - .any(|z| z.config.zone_type.is_ntp()); + .any(|z| z.zone_type.is_ntp()); if has_ntp { return Ok(Ensure::NotNeeded); } @@ -542,33 +548,26 @@ impl<'a> BlueprintBuilder<'a> { .parent_blueprint .all_omicron_zones(BlueprintZoneFilter::All) .filter_map(|(_, z)| { - if matches!(z.zone_type, OmicronZoneType::BoundaryNtp { .. }) { - Some( - Host::for_zone(Zone::Other( - // TODO-cleanup use `TypedUuid` everywhere - OmicronZoneUuid::from_untyped_uuid(z.id), - )) - .fqdn(), - ) + if matches!(z.zone_type, BlueprintZoneType::BoundaryNtp(_)) { + Some(Host::for_zone(Zone::Other(z.id)).fqdn()) } else { None } }) .collect(); - let zone = OmicronZoneConfig { - id: self.rng.zone_rng.next(), - underlay_address: ip, - zone_type: OmicronZoneType::InternalNtp { - address: ntp_address.to_string(), - ntp_servers, - dns_servers, - domain: None, - }, - }; let zone = BlueprintZoneConfig { - config: zone, disposition: BlueprintZoneDisposition::InService, + id: self.rng.zone_rng.next(), + underlay_address: ip, + zone_type: BlueprintZoneType::InternalNtp( + blueprint_zone_type::InternalNtp { + address: ntp_address, + ntp_servers, + dns_servers, + domain: None, + }, + ), }; self.sled_add_zone(sled_id, zone)?; @@ -586,8 +585,11 @@ impl<'a> BlueprintBuilder<'a> { let has_crucible_on_this_pool = self.zones.current_sled_zones(sled_id).any(|z| { matches!( - &z.config.zone_type, - OmicronZoneType::Crucible { dataset, .. } + &z.zone_type, + BlueprintZoneType::Crucible(blueprint_zone_type::Crucible { + dataset, + .. + }) if dataset.pool_name == pool_name ) }); @@ -607,20 +609,19 @@ impl<'a> BlueprintBuilder<'a> { let ip = self.sled_alloc_ip(sled_id)?; let port = omicron_common::address::CRUCIBLE_PORT; - let address = SocketAddrV6::new(ip, port, 0, 0).to_string(); - let zone = OmicronZoneConfig { + let address = SocketAddrV6::new(ip, port, 0, 0); + let zone = BlueprintZoneConfig { + disposition: BlueprintZoneDisposition::InService, id: self.rng.zone_rng.next(), underlay_address: ip, - zone_type: OmicronZoneType::Crucible { - address, - dataset: OmicronZoneDataset { pool_name }, - }, + zone_type: BlueprintZoneType::Crucible( + blueprint_zone_type::Crucible { + address, + dataset: OmicronZoneDataset { pool_name }, + }, + ), }; - let zone = BlueprintZoneConfig { - config: zone, - disposition: BlueprintZoneDisposition::InService, - }; self.sled_add_zone(sled_id, zone)?; Ok(Ensure::Added) } @@ -633,7 +634,7 @@ impl<'a> BlueprintBuilder<'a> { pub fn sled_num_nexus_zones(&self, sled_id: SledUuid) -> usize { self.zones .current_sled_zones(sled_id) - .filter(|z| z.config.zone_type.is_nexus()) + .filter(|z| z.zone_type.is_nexus()) .count() } @@ -657,11 +658,10 @@ impl<'a> BlueprintBuilder<'a> { .parent_blueprint .all_omicron_zones(BlueprintZoneFilter::All) .find_map(|(_, z)| match &z.zone_type { - OmicronZoneType::Nexus { - external_tls, - external_dns_servers, - .. - } => Some((*external_tls, external_dns_servers.clone())), + BlueprintZoneType::Nexus(nexus) => Some(( + nexus.external_tls, + nexus.external_dns_servers.clone(), + )), _ => None, }) .ok_or(Error::NoNexusZonesInParentBlueprint)?; @@ -725,7 +725,9 @@ impl<'a> BlueprintBuilder<'a> { .ok_or(Error::NoSystemMacAddressAvailable)?; NetworkInterface { id: self.rng.network_interface_rng.next(), - kind: NetworkInterfaceKind::Service { id: nexus_id }, + kind: NetworkInterfaceKind::Service { + id: nexus_id.into_untyped_uuid(), + }, name: format!("nexus-{nexus_id}").parse().unwrap(), ip, mac, @@ -738,22 +740,20 @@ impl<'a> BlueprintBuilder<'a> { let ip = self.sled_alloc_ip(sled_id)?; let port = omicron_common::address::NEXUS_INTERNAL_PORT; - let internal_address = - SocketAddrV6::new(ip, port, 0, 0).to_string(); - let zone = OmicronZoneConfig { - id: nexus_id, - underlay_address: ip, - zone_type: OmicronZoneType::Nexus { - internal_address, - external_ip, - nic, - external_tls, - external_dns_servers: external_dns_servers.clone(), - }, - }; + let internal_address = SocketAddrV6::new(ip, port, 0, 0); let zone = BlueprintZoneConfig { - config: zone, disposition: BlueprintZoneDisposition::InService, + id: nexus_id, + underlay_address: ip, + zone_type: BlueprintZoneType::Nexus( + blueprint_zone_type::Nexus { + internal_address, + external_ip, + nic, + external_tls, + external_dns_servers: external_dns_servers.clone(), + }, + ), }; self.sled_add_zone(sled_id, zone)?; } @@ -772,10 +772,10 @@ impl<'a> BlueprintBuilder<'a> { let sled_zones = self.zones.change_sled_zones(sled_id); // A sled should have a small number (< 20) of zones so a linear search // should be very fast. - if sled_zones.zones.iter().any(|z| z.config.id == zone.config.id) { + if sled_zones.zones.iter().any(|z| z.id == zone.id) { return Err(Error::Planner(anyhow!( "attempted to add zone that already exists: {}", - zone.config.id + zone.id ))); } sled_zones.zones.push(zone); @@ -812,7 +812,7 @@ impl<'a> BlueprintBuilder<'a> { // Record each of the sled's zones' underlay addresses as // allocated. for z in self.zones.current_sled_zones(sled_id) { - allocator.reserve(z.config.underlay_address); + allocator.reserve(z.underlay_address); } allocator @@ -843,7 +843,7 @@ struct BlueprintBuilderRng { // In the future, when we switch to typed UUIDs, each of these will be // associated with a specific `TypedUuidKind`. blueprint_rng: UuidRng, - zone_rng: UuidRng, + zone_rng: TypedUuidRng, network_interface_rng: UuidRng, } @@ -854,7 +854,7 @@ impl BlueprintBuilderRng { fn new_from_parent(mut parent: StdRng) -> Self { let blueprint_rng = UuidRng::from_parent_rng(&mut parent, "blueprint"); - let zone_rng = UuidRng::from_parent_rng(&mut parent, "zone"); + let zone_rng = TypedUuidRng::from_parent_rng(&mut parent, "zone"); let network_interface_rng = UuidRng::from_parent_rng(&mut parent, "network_interface"); @@ -1071,23 +1071,27 @@ pub mod test { use nexus_types::deployment::BlueprintZoneFilter; use omicron_common::address::IpRange; use omicron_test_utils::dev::test_setup_log; - use sled_agent_client::types::{OmicronZoneConfig, OmicronZoneType}; + use sled_agent_client::types::OmicronZoneType; use std::collections::BTreeSet; pub const DEFAULT_N_SLEDS: usize = 3; /// Checks various conditions that should be true for all blueprints pub fn verify_blueprint(blueprint: &Blueprint) { - let mut underlay_ips: BTreeMap = + let mut underlay_ips: BTreeMap = BTreeMap::new(); for (_, zone) in blueprint.all_omicron_zones(BlueprintZoneFilter::All) { if let Some(previous) = underlay_ips.insert(zone.underlay_address, zone) { panic!( - "found duplicate underlay IP {} in zones {} and \ - {}\n\nblueprint: {:#?}", - zone.underlay_address, zone.id, previous.id, blueprint + "found duplicate underlay IP {} in zones {} and {}\ + \n\n\ + blueprint: {}", + zone.underlay_address, + zone.id, + previous.id, + blueprint.display(), ); } } @@ -1234,20 +1238,20 @@ pub mod test { assert!(new_sled_resources .subnet .net() - .contains(z.config.underlay_address)); + .contains(z.underlay_address)); } // Check for an NTP zone. Its sockaddr's IP should also be on the // sled's subnet. assert!(new_sled_zones.zones.iter().any(|z| { - if let OmicronZoneType::InternalNtp { address, .. } = - &z.config.zone_type + if let BlueprintZoneType::InternalNtp( + blueprint_zone_type::InternalNtp { address, .. }, + ) = &z.zone_type { - let sockaddr = address.parse::().unwrap(); assert!(new_sled_resources .subnet .net() - .contains(*sockaddr.ip())); + .contains(*address.ip())); true } else { false @@ -1257,11 +1261,11 @@ pub mod test { .zones .iter() .filter_map(|z| { - if let OmicronZoneType::Crucible { address, dataset } = - &z.config.zone_type + if let BlueprintZoneType::Crucible( + blueprint_zone_type::Crucible { address, dataset }, + ) = &z.zone_type { - let sockaddr = address.parse::().unwrap(); - let ip = sockaddr.ip(); + let ip = address.ip(); assert!(new_sled_resources.subnet.net().contains(*ip)); Some(dataset.pool_name.clone()) } else { @@ -1472,11 +1476,7 @@ pub mod test { // Nexus with no remaining external IPs should fail. let mut used_ip_ranges = Vec::new(); for (_, z) in parent.all_omicron_zones(BlueprintZoneFilter::All) { - if let Some(ip) = z - .zone_type - .external_ip() - .expect("failed to check for external IP") - { + if let Some(ip) = z.zone_type.external_ip() { used_ip_ranges.push(IpRange::from(ip)); } } diff --git a/nexus/reconfigurator/planning/src/example.rs b/nexus/reconfigurator/planning/src/example.rs index e740abfcad..c269d4ccd2 100644 --- a/nexus/reconfigurator/planning/src/example.rs +++ b/nexus/reconfigurator/planning/src/example.rs @@ -17,7 +17,6 @@ use nexus_types::inventory::Collection; use omicron_common::api::external::Generation; use omicron_uuid_kinds::ExternalIpUuid; use omicron_uuid_kinds::GenericUuid; -use omicron_uuid_kinds::OmicronZoneUuid; use omicron_uuid_kinds::SledKind; use sled_agent_client::types::OmicronZonesConfig; use typed_rng::TypedUuidRng; @@ -125,9 +124,9 @@ impl ExampleSystem { else { continue; }; - for zone in zones.zones.iter().map(|z| &z.config) { - let service_id = OmicronZoneUuid::from_untyped_uuid(zone.id); - if let Ok(Some(ip)) = zone.zone_type.external_ip() { + for zone in zones.zones.iter() { + let service_id = zone.id; + if let Some(ip) = zone.zone_type.external_ip() { input_builder .add_omicron_zone_external_ip( service_id, @@ -138,7 +137,7 @@ impl ExampleSystem { ) .expect("failed to add Omicron zone external IP"); } - if let Some(nic) = zone.zone_type.service_vnic() { + if let Some(nic) = zone.zone_type.opte_vnic() { input_builder .add_omicron_zone_nic( service_id, diff --git a/nexus/reconfigurator/planning/src/planner.rs b/nexus/reconfigurator/planning/src/planner.rs index 0b853a943d..1c054de646 100644 --- a/nexus/reconfigurator/planning/src/planner.rs +++ b/nexus/reconfigurator/planning/src/planner.rs @@ -344,13 +344,14 @@ mod test { use chrono::Utc; use expectorate::assert_contents; use nexus_inventory::now_db_precision; + use nexus_types::deployment::blueprint_zone_type; use nexus_types::deployment::BlueprintZoneDisposition; use nexus_types::deployment::BlueprintZoneFilter; + use nexus_types::deployment::BlueprintZoneType; use nexus_types::deployment::SledFilter; use nexus_types::external_api::views::SledPolicy; use nexus_types::external_api::views::SledProvisionPolicy; use nexus_types::external_api::views::SledState; - use nexus_types::inventory::OmicronZoneType; use nexus_types::inventory::OmicronZonesFound; use omicron_common::api::external::Generation; use omicron_test_utils::dev::test_setup_log; @@ -443,8 +444,8 @@ mod test { assert_eq!(sled_id, new_sled_id); assert_eq!(sled_zones.zones.len(), 1); assert!(matches!( - sled_zones.zones[0].config.zone_type, - OmicronZoneType::InternalNtp { .. } + sled_zones.zones[0].zone_type, + BlueprintZoneType::InternalNtp(_), )); assert_eq!(diff.sleds_removed().len(), 0); assert_eq!(diff.sleds_modified().count(), 0); @@ -526,7 +527,7 @@ mod test { let zones = sled_changes.zones_added().collect::>(); assert_eq!(zones.len(), 10); for zone in &zones { - if !zone.config.zone_type.is_crucible() { + if !zone.zone_type.is_crucible() { panic!("unexpectedly added a non-Crucible zone: {zone:?}"); } } @@ -608,7 +609,7 @@ mod test { .expect("missing kept sled") .zones .iter() - .filter(|z| z.config.zone_type.is_nexus()) + .filter(|z| z.zone_type.is_nexus()) .count(), 1 ); @@ -644,7 +645,7 @@ mod test { let zones = sled_changes.zones_added().collect::>(); assert_eq!(zones.len(), input.target_nexus_zone_count() - 1); for zone in &zones { - if !zone.config.zone_type.is_nexus() { + if !zone.zone_type.is_nexus() { panic!("unexpectedly added a non-Nexus zone: {zone:?}"); } } @@ -683,7 +684,7 @@ mod test { sled_config .zones .iter() - .filter(|z| z.config.zone_type.is_nexus()) + .filter(|z| z.zone_type.is_nexus()) .count(), 1 ); @@ -729,7 +730,7 @@ mod test { } } for zone in &zones { - if !zone.config.zone_type.is_nexus() { + if !zone.zone_type.is_nexus() { panic!("unexpectedly added a non-Nexus zone: {zone:?}"); } } @@ -774,7 +775,7 @@ mod test { sled_config .zones .iter() - .filter(|z| z.config.zone_type.is_nexus()) + .filter(|z| z.zone_type.is_nexus()) .count(), 1 ); @@ -861,8 +862,7 @@ mod test { assert_eq!(sled_changes.zones_modified().count(), 0); let zones = sled_changes.zones_added().collect::>(); for zone in &zones { - let OmicronZoneType::Nexus { .. } = zone.config.zone_type - else { + let BlueprintZoneType::Nexus(_) = zone.zone_type else { panic!("unexpectedly added a non-Crucible zone: {zone:?}"); }; } @@ -908,16 +908,17 @@ mod test { .zones; zones.retain_mut(|zone| { - if let OmicronZoneType::Nexus { internal_address, .. } = - &mut zone.config.zone_type + if let BlueprintZoneType::Nexus(blueprint_zone_type::Nexus { + internal_address, + .. + }) = &mut zone.zone_type { - // Change one of these params to ensure that the diff output - // makes sense. - *internal_address = format!("{internal_address}foo"); + // Change the internal address. + let mut segments = internal_address.ip().segments(); + segments[0] = segments[0].wrapping_add(1); + internal_address.set_ip(segments.into()); true - } else if let OmicronZoneType::Crucible { .. } = - zone.config.zone_type - { + } else if let BlueprintZoneType::Crucible(_) = zone.zone_type { match next { NextCrucibleMutate::Modify => { zone.disposition = BlueprintZoneDisposition::Quiesced; @@ -930,13 +931,13 @@ mod test { } NextCrucibleMutate::Done => true, } - } else if let OmicronZoneType::InternalNtp { .. } = - &mut zone.config.zone_type + } else if let BlueprintZoneType::InternalNtp(_) = + &mut zone.zone_type { // Change the underlay IP. - let mut segments = zone.config.underlay_address.segments(); + let mut segments = zone.underlay_address.segments(); segments[0] += 1; - zone.config.underlay_address = segments.into(); + zone.underlay_address = segments.into(); true } else { true diff --git a/nexus/src/app/background/blueprint_execution.rs b/nexus/src/app/background/blueprint_execution.rs index b46e918bb8..51898b9b7b 100644 --- a/nexus/src/app/background/blueprint_execution.rs +++ b/nexus/src/app/background/blueprint_execution.rs @@ -121,13 +121,13 @@ mod test { use nexus_db_queries::context::OpContext; use nexus_test_utils_macros::nexus_test; use nexus_types::deployment::{ - Blueprint, BlueprintPhysicalDisksConfig, BlueprintTarget, - BlueprintZoneConfig, BlueprintZoneDisposition, BlueprintZonesConfig, - }; - use nexus_types::inventory::{ - OmicronZoneConfig, OmicronZoneDataset, OmicronZoneType, + blueprint_zone_type, Blueprint, BlueprintPhysicalDisksConfig, + BlueprintTarget, BlueprintZoneConfig, BlueprintZoneDisposition, + BlueprintZoneType, BlueprintZonesConfig, }; + use nexus_types::inventory::OmicronZoneDataset; use omicron_common::api::external::Generation; + use omicron_uuid_kinds::OmicronZoneUuid; use omicron_uuid_kinds::SledKind; use omicron_uuid_kinds::TypedUuid; use serde::Deserialize; @@ -254,22 +254,22 @@ mod test { BlueprintZonesConfig { generation: Generation::new(), zones: vec![BlueprintZoneConfig { - config: OmicronZoneConfig { - id: Uuid::new_v4(), - underlay_address: "::1".parse().unwrap(), - zone_type: OmicronZoneType::InternalDns { + disposition, + id: OmicronZoneUuid::new_v4(), + underlay_address: "::1".parse().unwrap(), + zone_type: BlueprintZoneType::InternalDns( + blueprint_zone_type::InternalDns { dataset: OmicronZoneDataset { pool_name: format!("oxp_{}", Uuid::new_v4()) .parse() .unwrap(), }, - dns_address: "oh-hello-internal-dns".into(), + dns_address: "[::1]:0".parse().unwrap(), gz_address: "::1".parse().unwrap(), gz_address_index: 0, - http_address: "[::1]:12345".into(), + http_address: "[::1]:12345".parse().unwrap(), }, - }, - disposition, + ), }], } } diff --git a/nexus/src/app/rack.rs b/nexus/src/app/rack.rs index 29feeb6181..17c5fbe7fd 100644 --- a/nexus/src/app/rack.rs +++ b/nexus/src/app/rack.rs @@ -20,7 +20,9 @@ use nexus_db_queries::db::datastore::DnsVersionUpdateBuilder; use nexus_db_queries::db::datastore::RackInit; use nexus_db_queries::db::lookup::LookupPath; use nexus_reconfigurator_execution::silo_dns_name; +use nexus_types::deployment::blueprint_zone_type; use nexus_types::deployment::BlueprintZoneFilter; +use nexus_types::deployment::BlueprintZoneType; use nexus_types::external_api::params::Address; use nexus_types::external_api::params::AddressConfig; use nexus_types::external_api::params::AddressLotBlockCreate; @@ -195,10 +197,10 @@ impl super::Nexus { .blueprint .all_omicron_zones(BlueprintZoneFilter::ShouldBeExternallyReachable) .filter_map(|(_, zc)| match zc.zone_type { - nexus_types::deployment::OmicronZoneType::Nexus { + BlueprintZoneType::Nexus(blueprint_zone_type::Nexus { external_ip, .. - } => Some(match external_ip { + }) => Some(match external_ip { IpAddr::V4(addr) => DnsRecord::A(addr), IpAddr::V6(addr) => DnsRecord::Aaaa(addr), }), diff --git a/nexus/src/lib.rs b/nexus/src/lib.rs index 6c4fe4e91e..67c63de5a1 100644 --- a/nexus/src/lib.rs +++ b/nexus/src/lib.rs @@ -27,9 +27,10 @@ use dropshot::ConfigDropshot; use external_api::http_entrypoints::external_api; use internal_api::http_entrypoints::internal_api; use nexus_config::NexusConfig; +use nexus_types::deployment::blueprint_zone_type; use nexus_types::deployment::Blueprint; use nexus_types::deployment::BlueprintZoneFilter; -use nexus_types::deployment::OmicronZoneType; +use nexus_types::deployment::BlueprintZoneType; use nexus_types::external_api::views::SledProvisionPolicy; use nexus_types::internal_api::params::{ PhysicalDiskPutRequest, ZpoolPutRequest, @@ -272,17 +273,13 @@ impl nexus_test_interface::NexusServer for Server { let internal_services_ip_pool_ranges = blueprint .all_omicron_zones(BlueprintZoneFilter::ShouldBeExternallyReachable) .filter_map(|(_, zc)| match &zc.zone_type { - OmicronZoneType::ExternalDns { dns_address, .. } => { - // Work around - // https://github.com/oxidecomputer/omicron/issues/4988 - let dns_address: SocketAddr = dns_address - .parse() - .expect("invalid DNS socket address"); - Some(IpRange::from(dns_address.ip())) - } - OmicronZoneType::Nexus { external_ip, .. } => { - Some(IpRange::from(*external_ip)) - } + BlueprintZoneType::ExternalDns( + blueprint_zone_type::ExternalDns { dns_address, .. }, + ) => Some(IpRange::from(dns_address.ip())), + BlueprintZoneType::Nexus(blueprint_zone_type::Nexus { + external_ip, + .. + }) => Some(IpRange::from(*external_ip)), _ => None, }) .collect(); diff --git a/nexus/test-utils/src/lib.rs b/nexus/test-utils/src/lib.rs index 42f1f12546..cd71bd2cb8 100644 --- a/nexus/test-utils/src/lib.rs +++ b/nexus/test-utils/src/lib.rs @@ -752,12 +752,11 @@ impl<'a, N: NexusServer> ControlPlaneTestContextBuilder<'a, N> { zones: zones .iter() .map(|z| { - BlueprintZoneConfig { - config: z.clone(), - // All initial zones are in-service - disposition: - BlueprintZoneDisposition::InService, - } + // All initial zones are in-service + BlueprintZoneConfig::from_omicron_zone_config( + z.clone(), + BlueprintZoneDisposition::InService, + ).unwrap() }) .collect(), }, diff --git a/nexus/types/Cargo.toml b/nexus/types/Cargo.toml index 68b1444cc1..ddd95ccf9b 100644 --- a/nexus/types/Cargo.toml +++ b/nexus/types/Cargo.toml @@ -18,6 +18,8 @@ schemars = { workspace = true, features = ["chrono", "uuid1"] } serde.workspace = true serde_json.workspace = true serde_with.workspace = true +slog.workspace = true +slog-error-chain.workspace = true steno.workspace = true strum.workspace = true tabled.workspace = true diff --git a/nexus/types/src/deployment.rs b/nexus/types/src/deployment.rs index 52e285e81c..cb7cc29ffc 100644 --- a/nexus/types/src/deployment.rs +++ b/nexus/types/src/deployment.rs @@ -23,20 +23,25 @@ pub use crate::inventory::ZpoolName; use newtype_uuid::GenericUuid; use omicron_common::api::external::Generation; use omicron_uuid_kinds::CollectionUuid; +use omicron_uuid_kinds::OmicronZoneUuid; use omicron_uuid_kinds::SledUuid; use schemars::JsonSchema; use serde::Deserialize; use serde::Serialize; use sled_agent_client::ZoneKind; +use slog_error_chain::SlogInlineError; use std::collections::BTreeMap; use std::collections::HashMap; use std::fmt; +use std::net::AddrParseError; +use std::net::Ipv6Addr; use strum::EnumIter; use strum::IntoEnumIterator; use thiserror::Error; use uuid::Uuid; mod planning_input; +mod zone_type; pub use planning_input::DiskFilter; pub use planning_input::OmicronZoneExternalIp; @@ -49,6 +54,8 @@ pub use planning_input::SledDetails; pub use planning_input::SledDisk; pub use planning_input::SledFilter; pub use planning_input::SledResources; +pub use zone_type::blueprint_zone_type; +pub use zone_type::BlueprintZoneType; /// Describes a complete set of software and configuration for the system // Blueprints are a fundamental part of how the system modifies itself. Each @@ -140,7 +147,7 @@ impl Blueprint { /// Iterate over the [`BlueprintZoneConfig`] instances in the blueprint /// that match the provided filter, along with the associated sled id. - pub fn all_blueprint_zones( + pub fn all_omicron_zones( &self, filter: BlueprintZoneFilter, ) -> impl Iterator { @@ -151,31 +158,6 @@ impl Blueprint { }) } - /// Iterate over all the [`OmicronZoneConfig`] instances in the blueprint, - /// along with the associated sled id. - pub fn all_omicron_zones( - &self, - filter: BlueprintZoneFilter, - ) -> impl Iterator { - self.all_blueprint_zones(filter) - .map(|(sled_id, z)| (sled_id, &z.config)) - } - - // Temporary method that provides the list of Omicron zones using - // `TypedUuid`. - // - // In the future, `all_omicron_zones` will return `SledUuid`, - // and this method will go away. - pub fn all_omicron_zones_typed( - &self, - ) -> impl Iterator { - self.blueprint_zones.iter().flat_map(|(sled_id, z)| { - z.zones.iter().map(move |z| { - (SledUuid::from_untyped_uuid(*sled_id), &z.config) - }) - }) - } - /// Iterate over the ids of all sleds in the blueprint pub fn sleds(&self) -> impl Iterator + '_ { self.blueprint_zones.keys().copied().map(SledUuid::from_untyped_uuid) @@ -224,18 +206,29 @@ impl Blueprint { .zones .zones .iter() - .map(|z| BlueprintZoneConfig { - config: z.clone(), - disposition: BlueprintZoneDisposition::InService, + .map(|z| { + BlueprintZoneConfig::from_omicron_zone_config( + z.clone(), + BlueprintZoneDisposition::InService, + ) + .map_err(|err| { + BlueprintDiffError { + before_meta: DiffBeforeMetadata::Collection { + id: before.id, + }, + after_meta: Box::new(self.metadata()), + errors: vec![BlueprintDiffSingleError::InvalidOmicronZoneType(err)], + } + }) }) - .collect(); + .collect::, _>>()?; let zones = BlueprintZonesConfig { generation: zones_found.zones.generation, zones, }; - (*sled_id, zones) + Ok((*sled_id, zones)) }) - .collect(); + .collect::>()?; BlueprintDiff::new( DiffBeforeMetadata::Collection { id: before.id }, @@ -322,15 +315,19 @@ impl BlueprintZonesConfig { /// For the initial blueprint, all zones within a collection are assumed to /// have the [`InService`](BlueprintZoneDisposition::InService) /// disposition. - pub fn initial_from_collection(collection: &OmicronZonesConfig) -> Self { + pub fn initial_from_collection( + collection: &OmicronZonesConfig, + ) -> Result { let zones = collection .zones .iter() - .map(|z| BlueprintZoneConfig { - config: z.clone(), - disposition: BlueprintZoneDisposition::InService, + .map(|z| { + BlueprintZoneConfig::from_omicron_zone_config( + z.clone(), + BlueprintZoneDisposition::InService, + ) }) - .collect(); + .collect::>()?; let mut ret = Self { // An initial `BlueprintZonesConfig` reuses the generation from @@ -341,7 +338,7 @@ impl BlueprintZonesConfig { // For testing, it's helpful for zones to be in sorted order. ret.sort(); - ret + Ok(ret) } /// Sorts the list of zones stored in this configuration. @@ -366,9 +363,9 @@ impl BlueprintZonesConfig { zones: self .zones .iter() - .filter_map(|z| { - z.disposition.matches(filter).then(|| z.config.clone()) - }) + .filter(|z| z.disposition.matches(filter)) + .cloned() + .map(OmicronZoneConfig::from) .collect(), } } @@ -377,22 +374,266 @@ impl BlueprintZonesConfig { fn zone_sort_key(z: &BlueprintZoneConfig) -> impl Ord { // First sort by kind, then by ID. This makes it so that zones of the same // kind (e.g. Crucible zones) are grouped together. - (z.config.zone_type.kind(), z.config.id) + (z.zone_type.kind(), z.id) +} + +/// "Should never happen" errors from converting an [`OmicronZoneType`] into a +/// [`BlueprintZoneType`]. +// Removing this error type would be a side effect of fixing +// https://github.com/oxidecomputer/omicron/issues/4988. +#[derive(Debug, Clone, Error, SlogInlineError)] +pub enum InvalidOmicronZoneType { + #[error("invalid socket address for {kind} ({addr})")] + ParseSocketAddr { + kind: ZoneKind, + addr: String, + #[source] + err: AddrParseError, + }, } /// Describes one Omicron-managed zone in a blueprint. /// -/// This is a wrapper around an [`OmicronZoneConfig`] that also includes a -/// [`BlueprintZoneDisposition`]. -/// /// Part of [`BlueprintZonesConfig`]. #[derive(Debug, Clone, Eq, PartialEq, JsonSchema, Deserialize, Serialize)] pub struct BlueprintZoneConfig { - /// The underlying zone configuration. - pub config: OmicronZoneConfig, - /// The disposition (desired state) of this zone recorded in the blueprint. pub disposition: BlueprintZoneDisposition, + + pub id: OmicronZoneUuid, + pub underlay_address: Ipv6Addr, + pub zone_type: BlueprintZoneType, +} + +impl BlueprintZoneConfig { + pub fn from_omicron_zone_config( + config: OmicronZoneConfig, + disposition: BlueprintZoneDisposition, + ) -> Result { + let zone_type = match config.zone_type { + OmicronZoneType::BoundaryNtp { + address, + dns_servers, + domain, + nic, + ntp_servers, + snat_cfg, + } => { + let address = address.parse().map_err(|err| { + InvalidOmicronZoneType::ParseSocketAddr { + kind: ZoneKind::BoundaryNtp, + addr: address.clone(), + err, + } + })?; + BlueprintZoneType::BoundaryNtp( + blueprint_zone_type::BoundaryNtp { + address, + ntp_servers, + dns_servers, + domain, + nic, + snat_cfg, + }, + ) + } + OmicronZoneType::Clickhouse { address, dataset } => { + let address = address.parse().map_err(|err| { + InvalidOmicronZoneType::ParseSocketAddr { + kind: ZoneKind::Clickhouse, + addr: address.clone(), + err, + } + })?; + BlueprintZoneType::Clickhouse(blueprint_zone_type::Clickhouse { + address, + dataset, + }) + } + OmicronZoneType::ClickhouseKeeper { address, dataset } => { + let address = address.parse().map_err(|err| { + InvalidOmicronZoneType::ParseSocketAddr { + kind: ZoneKind::ClickhouseKeeper, + addr: address.clone(), + err, + } + })?; + BlueprintZoneType::ClickhouseKeeper( + blueprint_zone_type::ClickhouseKeeper { address, dataset }, + ) + } + OmicronZoneType::CockroachDb { address, dataset } => { + let address = address.parse().map_err(|err| { + InvalidOmicronZoneType::ParseSocketAddr { + kind: ZoneKind::CockroachDb, + addr: address.clone(), + err, + } + })?; + BlueprintZoneType::CockroachDb( + blueprint_zone_type::CockroachDb { address, dataset }, + ) + } + OmicronZoneType::Crucible { address, dataset } => { + let address = address.parse().map_err(|err| { + InvalidOmicronZoneType::ParseSocketAddr { + kind: ZoneKind::Crucible, + addr: address.clone(), + err, + } + })?; + BlueprintZoneType::Crucible(blueprint_zone_type::Crucible { + address, + dataset, + }) + } + OmicronZoneType::CruciblePantry { address } => { + let address = address.parse().map_err(|err| { + InvalidOmicronZoneType::ParseSocketAddr { + kind: ZoneKind::CruciblePantry, + addr: address.clone(), + err, + } + })?; + BlueprintZoneType::CruciblePantry( + blueprint_zone_type::CruciblePantry { address }, + ) + } + OmicronZoneType::ExternalDns { + dataset, + dns_address, + http_address, + nic, + } => { + let dns_address = dns_address.parse().map_err(|err| { + InvalidOmicronZoneType::ParseSocketAddr { + kind: ZoneKind::ExternalDns, + addr: dns_address.clone(), + err, + } + })?; + let http_address = http_address.parse().map_err(|err| { + InvalidOmicronZoneType::ParseSocketAddr { + kind: ZoneKind::ExternalDns, + addr: http_address.clone(), + err, + } + })?; + BlueprintZoneType::ExternalDns( + blueprint_zone_type::ExternalDns { + dataset, + http_address, + dns_address, + nic, + }, + ) + } + OmicronZoneType::InternalDns { + dataset, + dns_address, + gz_address, + gz_address_index, + http_address, + } => { + let dns_address = dns_address.parse().map_err(|err| { + InvalidOmicronZoneType::ParseSocketAddr { + kind: ZoneKind::InternalDns, + addr: dns_address.clone(), + err, + } + })?; + let http_address = http_address.parse().map_err(|err| { + InvalidOmicronZoneType::ParseSocketAddr { + kind: ZoneKind::InternalDns, + addr: http_address.clone(), + err, + } + })?; + BlueprintZoneType::InternalDns( + blueprint_zone_type::InternalDns { + dataset, + http_address, + dns_address, + gz_address, + gz_address_index, + }, + ) + } + OmicronZoneType::InternalNtp { + address, + dns_servers, + domain, + ntp_servers, + } => { + let address = address.parse().map_err(|err| { + InvalidOmicronZoneType::ParseSocketAddr { + kind: ZoneKind::InternalNtp, + addr: address.clone(), + err, + } + })?; + BlueprintZoneType::InternalNtp( + blueprint_zone_type::InternalNtp { + address, + ntp_servers, + dns_servers, + domain, + }, + ) + } + OmicronZoneType::Nexus { + external_dns_servers, + external_ip, + external_tls, + internal_address, + nic, + } => { + let internal_address = + internal_address.parse().map_err(|err| { + InvalidOmicronZoneType::ParseSocketAddr { + kind: ZoneKind::Nexus, + addr: internal_address.clone(), + err, + } + })?; + BlueprintZoneType::Nexus(blueprint_zone_type::Nexus { + internal_address, + external_ip, + nic, + external_tls, + external_dns_servers, + }) + } + OmicronZoneType::Oximeter { address } => { + let address = address.parse().map_err(|err| { + InvalidOmicronZoneType::ParseSocketAddr { + kind: ZoneKind::Oximeter, + addr: address.clone(), + err, + } + })?; + BlueprintZoneType::Oximeter(blueprint_zone_type::Oximeter { + address, + }) + } + }; + Ok(Self { + disposition, + id: OmicronZoneUuid::from_untyped_uuid(config.id), + underlay_address: config.underlay_address, + zone_type, + }) + } +} + +impl From for OmicronZoneConfig { + fn from(z: BlueprintZoneConfig) -> Self { + Self { + id: z.id.into_untyped_uuid(), + underlay_address: z.underlay_address, + zone_type: z.zone_type.into(), + } + } } /// The desired state of an Omicron-managed zone in a blueprint. @@ -793,6 +1034,7 @@ pub enum BlueprintDiffSingleError { before: ZoneKind, after: ZoneKind, }, + InvalidOmicronZoneType(InvalidOmicronZoneType), } impl fmt::Display for BlueprintDiffSingleError { @@ -805,9 +1047,12 @@ impl fmt::Display for BlueprintDiffSingleError { after, } => write!( f, - "on sled {}, zone {} changed type from {} to {}", - zone_id, sled_id, before, after + "on sled {sled_id}, zone {zone_id} changed type \ + from {before} to {after}", ), + BlueprintDiffSingleError::InvalidOmicronZoneType(err) => { + write!(f, "invalid OmicronZoneType in collection: {err}") + } } } } @@ -852,16 +1097,10 @@ impl DiffSledModified { errors: &mut Vec, ) -> Self { // Assemble separate summaries of the zones, indexed by zone id. - let before_by_id: HashMap<_, _> = before - .zones - .into_iter() - .map(|zone| (zone.config.id, zone)) - .collect(); - let mut after_by_id: HashMap<_, _> = after - .zones - .into_iter() - .map(|zone| (zone.config.id, zone)) - .collect(); + let before_by_id: HashMap<_, _> = + before.zones.into_iter().map(|zone| (zone.id, zone)).collect(); + let mut after_by_id: HashMap<_, _> = + after.zones.into_iter().map(|zone| (zone.id, zone)).collect(); let mut zones_removed = Vec::new(); let mut zones_common = Vec::new(); @@ -869,13 +1108,13 @@ impl DiffSledModified { // Now go through each zone and compare them. for (zone_id, zone_before) in before_by_id { if let Some(zone_after) = after_by_id.remove(&zone_id) { - let before_kind = zone_before.config.zone_type.kind(); - let after_kind = zone_after.config.zone_type.kind(); + let before_kind = zone_before.zone_type.kind(); + let after_kind = zone_after.zone_type.kind(); if before_kind != after_kind { errors.push(BlueprintDiffSingleError::ZoneTypeChanged { sled_id, - zone_id, + zone_id: zone_id.into_untyped_uuid(), before: before_kind, after: after_kind, }); @@ -971,7 +1210,8 @@ impl DiffZoneCommon { /// changed. #[inline] pub fn config_changed(&self) -> bool { - self.zone_before.config != self.zone_after.config + self.zone_before.underlay_address != self.zone_after.underlay_address + || self.zone_before.zone_type != self.zone_after.zone_type } /// Returns true if the [`BlueprintZoneDisposition`] for the zone changed. @@ -1409,10 +1649,10 @@ mod table_display { ) { section.push_record(vec![ first_column, - zone.config.zone_type.kind().to_string(), - zone.config.id.to_string(), + zone.zone_type.kind().to_string(), + zone.id.to_string(), zone.disposition.to_string(), - zone.config.underlay_address.to_string(), + zone.underlay_address.to_string(), ]); } @@ -1424,10 +1664,10 @@ mod table_display { ) { section.push_record(vec![ first_column, - zone.config.zone_type.kind().to_string(), - zone.config.id.to_string(), + zone.zone_type.kind().to_string(), + zone.id.to_string(), zone.disposition.to_string(), - zone.config.underlay_address.to_string(), + zone.underlay_address.to_string(), status.to_string(), ]); } @@ -1453,13 +1693,13 @@ mod table_display { ); let mut what_changed = Vec::new(); - if before.config.zone_type != after.config.zone_type { + if before.zone_type != after.zone_type { what_changed.push(ZONE_TYPE_CONFIG); } if before.disposition != after.disposition { what_changed.push(DISPOSITION); } - if before.config.underlay_address != after.config.underlay_address { + if before.underlay_address != after.underlay_address { what_changed.push(UNDERLAY_IP); } debug_assert!( @@ -1476,7 +1716,7 @@ mod table_display { format!(" {SUB_NOT_LAST}"), "".to_string(), after.disposition.to_string(), - after.config.underlay_address.to_string(), + after.underlay_address.to_string(), ]; section.push_record(record); diff --git a/nexus/types/src/deployment/zone_type.rs b/nexus/types/src/deployment/zone_type.rs new file mode 100644 index 0000000000..a258ab53a1 --- /dev/null +++ b/nexus/types/src/deployment/zone_type.rs @@ -0,0 +1,334 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Types representing types of Omicron zones managed by blueprints +//! +//! These types are closely related to the `OmicronZoneType` in sled-agent's +//! internal API, but include additional information needed by Reconfigurator +//! that is not needed by sled-agent. + +use omicron_common::api::internal::shared::NetworkInterface; +use schemars::JsonSchema; +use serde::Deserialize; +use serde::Serialize; +use sled_agent_client::types::OmicronZoneType; +use sled_agent_client::ZoneKind; +use std::net::IpAddr; + +#[derive(Debug, Clone, Eq, PartialEq, JsonSchema, Deserialize, Serialize)] +#[serde(tag = "type", rename_all = "snake_case")] +pub enum BlueprintZoneType { + BoundaryNtp(blueprint_zone_type::BoundaryNtp), + Clickhouse(blueprint_zone_type::Clickhouse), + ClickhouseKeeper(blueprint_zone_type::ClickhouseKeeper), + CockroachDb(blueprint_zone_type::CockroachDb), + Crucible(blueprint_zone_type::Crucible), + CruciblePantry(blueprint_zone_type::CruciblePantry), + ExternalDns(blueprint_zone_type::ExternalDns), + InternalDns(blueprint_zone_type::InternalDns), + InternalNtp(blueprint_zone_type::InternalNtp), + Nexus(blueprint_zone_type::Nexus), + Oximeter(blueprint_zone_type::Oximeter), +} + +impl BlueprintZoneType { + pub fn external_ip(&self) -> Option { + match self { + BlueprintZoneType::Nexus(nexus) => Some(nexus.external_ip), + BlueprintZoneType::ExternalDns(dns) => Some(dns.dns_address.ip()), + BlueprintZoneType::BoundaryNtp(ntp) => Some(ntp.snat_cfg.ip), + BlueprintZoneType::Clickhouse(_) + | BlueprintZoneType::ClickhouseKeeper(_) + | BlueprintZoneType::CockroachDb(_) + | BlueprintZoneType::Crucible(_) + | BlueprintZoneType::CruciblePantry(_) + | BlueprintZoneType::InternalDns(_) + | BlueprintZoneType::InternalNtp(_) + | BlueprintZoneType::Oximeter(_) => None, + } + } + + pub fn opte_vnic(&self) -> Option<&NetworkInterface> { + match self { + BlueprintZoneType::Nexus(nexus) => Some(&nexus.nic), + BlueprintZoneType::ExternalDns(dns) => Some(&dns.nic), + BlueprintZoneType::BoundaryNtp(ntp) => Some(&ntp.nic), + BlueprintZoneType::Clickhouse(_) + | BlueprintZoneType::ClickhouseKeeper(_) + | BlueprintZoneType::CockroachDb(_) + | BlueprintZoneType::Crucible(_) + | BlueprintZoneType::CruciblePantry(_) + | BlueprintZoneType::InternalDns(_) + | BlueprintZoneType::InternalNtp(_) + | BlueprintZoneType::Oximeter(_) => None, + } + } + + /// Identifies whether this is an NTP zone (any flavor) + pub fn is_ntp(&self) -> bool { + match self { + BlueprintZoneType::InternalNtp(_) + | BlueprintZoneType::BoundaryNtp(_) => true, + BlueprintZoneType::Nexus(_) + | BlueprintZoneType::ExternalDns(_) + | BlueprintZoneType::Clickhouse(_) + | BlueprintZoneType::ClickhouseKeeper(_) + | BlueprintZoneType::CockroachDb(_) + | BlueprintZoneType::Crucible(_) + | BlueprintZoneType::CruciblePantry(_) + | BlueprintZoneType::InternalDns(_) + | BlueprintZoneType::Oximeter(_) => false, + } + } + + /// Identifies whether this is a Nexus zone + pub fn is_nexus(&self) -> bool { + match self { + BlueprintZoneType::Nexus(_) => true, + BlueprintZoneType::BoundaryNtp(_) + | BlueprintZoneType::ExternalDns(_) + | BlueprintZoneType::Clickhouse(_) + | BlueprintZoneType::ClickhouseKeeper(_) + | BlueprintZoneType::CockroachDb(_) + | BlueprintZoneType::Crucible(_) + | BlueprintZoneType::CruciblePantry(_) + | BlueprintZoneType::InternalDns(_) + | BlueprintZoneType::InternalNtp(_) + | BlueprintZoneType::Oximeter(_) => false, + } + } + + /// Identifies whether this a Crucible (not Crucible pantry) zone + pub fn is_crucible(&self) -> bool { + match self { + BlueprintZoneType::Crucible(_) => true, + BlueprintZoneType::BoundaryNtp(_) + | BlueprintZoneType::Clickhouse(_) + | BlueprintZoneType::ClickhouseKeeper(_) + | BlueprintZoneType::CockroachDb(_) + | BlueprintZoneType::CruciblePantry(_) + | BlueprintZoneType::ExternalDns(_) + | BlueprintZoneType::InternalDns(_) + | BlueprintZoneType::InternalNtp(_) + | BlueprintZoneType::Nexus(_) + | BlueprintZoneType::Oximeter(_) => false, + } + } +} + +impl From for OmicronZoneType { + fn from(zone_type: BlueprintZoneType) -> Self { + match zone_type { + BlueprintZoneType::BoundaryNtp(zone) => Self::BoundaryNtp { + address: zone.address.to_string(), + ntp_servers: zone.ntp_servers, + dns_servers: zone.dns_servers, + domain: zone.domain, + nic: zone.nic, + snat_cfg: zone.snat_cfg, + }, + BlueprintZoneType::Clickhouse(zone) => Self::Clickhouse { + address: zone.address.to_string(), + dataset: zone.dataset, + }, + BlueprintZoneType::ClickhouseKeeper(zone) => { + Self::ClickhouseKeeper { + address: zone.address.to_string(), + dataset: zone.dataset, + } + } + BlueprintZoneType::CockroachDb(zone) => Self::CockroachDb { + address: zone.address.to_string(), + dataset: zone.dataset, + }, + BlueprintZoneType::Crucible(zone) => Self::Crucible { + address: zone.address.to_string(), + dataset: zone.dataset, + }, + BlueprintZoneType::CruciblePantry(zone) => { + Self::CruciblePantry { address: zone.address.to_string() } + } + BlueprintZoneType::ExternalDns(zone) => Self::ExternalDns { + dataset: zone.dataset, + http_address: zone.http_address.to_string(), + dns_address: zone.dns_address.to_string(), + nic: zone.nic, + }, + BlueprintZoneType::InternalDns(zone) => Self::InternalDns { + dataset: zone.dataset, + http_address: zone.http_address.to_string(), + dns_address: zone.dns_address.to_string(), + gz_address: zone.gz_address, + gz_address_index: zone.gz_address_index, + }, + BlueprintZoneType::InternalNtp(zone) => Self::InternalNtp { + address: zone.address.to_string(), + ntp_servers: zone.ntp_servers, + dns_servers: zone.dns_servers, + domain: zone.domain, + }, + BlueprintZoneType::Nexus(zone) => Self::Nexus { + internal_address: zone.internal_address.to_string(), + external_ip: zone.external_ip, + nic: zone.nic, + external_tls: zone.external_tls, + external_dns_servers: zone.external_dns_servers, + }, + BlueprintZoneType::Oximeter(zone) => { + Self::Oximeter { address: zone.address.to_string() } + } + } + } +} + +impl BlueprintZoneType { + /// Returns the [`ZoneKind`] corresponding to this variant. + pub fn kind(&self) -> ZoneKind { + match self { + Self::BoundaryNtp(_) => ZoneKind::BoundaryNtp, + Self::Clickhouse(_) => ZoneKind::Clickhouse, + Self::ClickhouseKeeper(_) => ZoneKind::ClickhouseKeeper, + Self::CockroachDb(_) => ZoneKind::CockroachDb, + Self::Crucible(_) => ZoneKind::Crucible, + Self::CruciblePantry(_) => ZoneKind::CruciblePantry, + Self::ExternalDns(_) => ZoneKind::ExternalDns, + Self::InternalDns(_) => ZoneKind::InternalDns, + Self::InternalNtp(_) => ZoneKind::InternalNtp, + Self::Nexus(_) => ZoneKind::Nexus, + Self::Oximeter(_) => ZoneKind::Oximeter, + } + } +} + +pub mod blueprint_zone_type { + use crate::inventory::OmicronZoneDataset; + use omicron_common::api::internal::shared::NetworkInterface; + use omicron_common::api::internal::shared::SourceNatConfig; + use schemars::JsonSchema; + use serde::Deserialize; + use serde::Serialize; + use std::net::IpAddr; + use std::net::Ipv6Addr; + use std::net::SocketAddr; + use std::net::SocketAddrV6; + + #[derive( + Debug, Clone, Eq, PartialEq, JsonSchema, Deserialize, Serialize, + )] + pub struct BoundaryNtp { + pub address: SocketAddrV6, + pub ntp_servers: Vec, + pub dns_servers: Vec, + pub domain: Option, + /// The service vNIC providing outbound connectivity using OPTE. + pub nic: NetworkInterface, + /// The SNAT configuration for outbound connections. + pub snat_cfg: SourceNatConfig, + } + + #[derive( + Debug, Clone, Eq, PartialEq, JsonSchema, Deserialize, Serialize, + )] + pub struct Clickhouse { + pub address: SocketAddrV6, + pub dataset: OmicronZoneDataset, + } + + #[derive( + Debug, Clone, Eq, PartialEq, JsonSchema, Deserialize, Serialize, + )] + pub struct ClickhouseKeeper { + pub address: SocketAddrV6, + pub dataset: OmicronZoneDataset, + } + + #[derive( + Debug, Clone, Eq, PartialEq, JsonSchema, Deserialize, Serialize, + )] + pub struct CockroachDb { + pub address: SocketAddrV6, + pub dataset: OmicronZoneDataset, + } + + #[derive( + Debug, Clone, Eq, PartialEq, JsonSchema, Deserialize, Serialize, + )] + pub struct Crucible { + pub address: SocketAddrV6, + pub dataset: OmicronZoneDataset, + } + + #[derive( + Debug, Clone, Eq, PartialEq, JsonSchema, Deserialize, Serialize, + )] + pub struct CruciblePantry { + pub address: SocketAddrV6, + } + + #[derive( + Debug, Clone, Eq, PartialEq, JsonSchema, Deserialize, Serialize, + )] + pub struct ExternalDns { + pub dataset: OmicronZoneDataset, + /// The address at which the external DNS server API is reachable. + pub http_address: SocketAddrV6, + /// The address at which the external DNS server is reachable. + pub dns_address: SocketAddr, + /// The service vNIC providing external connectivity using OPTE. + pub nic: NetworkInterface, + } + + #[derive( + Debug, Clone, Eq, PartialEq, JsonSchema, Deserialize, Serialize, + )] + pub struct InternalDns { + pub dataset: OmicronZoneDataset, + pub http_address: SocketAddrV6, + pub dns_address: SocketAddrV6, + /// The addresses in the global zone which should be created + /// + /// For the DNS service, which exists outside the sleds's typical subnet + /// - adding an address in the GZ is necessary to allow inter-zone + /// traffic routing. + pub gz_address: Ipv6Addr, + + /// The address is also identified with an auxiliary bit of information + /// to ensure that the created global zone address can have a unique + /// name. + pub gz_address_index: u32, + } + + #[derive( + Debug, Clone, Eq, PartialEq, JsonSchema, Deserialize, Serialize, + )] + pub struct InternalNtp { + pub address: SocketAddrV6, + pub ntp_servers: Vec, + pub dns_servers: Vec, + pub domain: Option, + } + + #[derive( + Debug, Clone, Eq, PartialEq, JsonSchema, Deserialize, Serialize, + )] + pub struct Nexus { + /// The address at which the internal nexus server is reachable. + pub internal_address: SocketAddrV6, + /// The address at which the external nexus server is reachable. + pub external_ip: IpAddr, + /// The service vNIC providing external connectivity using OPTE. + pub nic: NetworkInterface, + /// Whether Nexus's external endpoint should use TLS + pub external_tls: bool, + /// External DNS servers Nexus can use to resolve external hosts. + pub external_dns_servers: Vec, + } + + #[derive( + Debug, Clone, Eq, PartialEq, JsonSchema, Deserialize, Serialize, + )] + pub struct Oximeter { + pub address: SocketAddrV6, + } +} diff --git a/openapi/nexus-internal.json b/openapi/nexus-internal.json index ca3eeb0f2d..0383c9cbd2 100644 --- a/openapi/nexus-internal.json +++ b/openapi/nexus-internal.json @@ -2754,17 +2754,9 @@ ] }, "BlueprintZoneConfig": { - "description": "Describes one Omicron-managed zone in a blueprint.\n\nThis is a wrapper around an [`OmicronZoneConfig`] that also includes a [`BlueprintZoneDisposition`].\n\nPart of [`BlueprintZonesConfig`].", + "description": "Describes one Omicron-managed zone in a blueprint.\n\nPart of [`BlueprintZonesConfig`].", "type": "object", "properties": { - "config": { - "description": "The underlying zone configuration.", - "allOf": [ - { - "$ref": "#/components/schemas/OmicronZoneConfig" - } - ] - }, "disposition": { "description": "The disposition (desired state) of this zone recorded in the blueprint.", "allOf": [ @@ -2772,11 +2764,23 @@ "$ref": "#/components/schemas/BlueprintZoneDisposition" } ] + }, + "id": { + "$ref": "#/components/schemas/TypedUuidForOmicronZoneKind" + }, + "underlay_address": { + "type": "string", + "format": "ipv6" + }, + "zone_type": { + "$ref": "#/components/schemas/BlueprintZoneType" } }, "required": [ - "config", - "disposition" + "disposition", + "id", + "underlay_address", + "zone_type" ] }, "BlueprintZoneDisposition": { @@ -2805,446 +2809,790 @@ } ] }, - "BlueprintZonesConfig": { - "description": "Information about an Omicron zone as recorded in a blueprint.\n\nCurrently, this is similar to [`OmicronZonesConfig`], but also contains a per-zone [`BlueprintZoneDisposition`].\n\nPart of [`Blueprint`].", - "type": "object", - "properties": { - "generation": { - "description": "Generation number of this configuration.\n\nThis generation number is owned by the control plane. See [`OmicronZonesConfig::generation`] for more details.", - "allOf": [ - { - "$ref": "#/components/schemas/Generation" - } - ] - }, - "zones": { - "description": "The list of running zones.", - "type": "array", - "items": { - "$ref": "#/components/schemas/BlueprintZoneConfig" - } - } - }, - "required": [ - "generation", - "zones" - ] - }, - "ByteCount": { - "description": "Byte count to express memory or storage capacity.", - "type": "integer", - "format": "uint64", - "minimum": 0 - }, - "Certificate": { - "type": "object", - "properties": { - "cert": { - "type": "string" - }, - "key": { - "type": "string" - } - }, - "required": [ - "cert", - "key" - ] - }, - "CollectionId": { - "type": "object", - "properties": { - "collection_id": { - "$ref": "#/components/schemas/TypedUuidForCollectionKind" - } - }, - "required": [ - "collection_id" - ] - }, - "Cumulativedouble": { - "description": "A cumulative or counter data type.", - "type": "object", - "properties": { - "start_time": { - "type": "string", - "format": "date-time" - }, - "value": { - "type": "number", - "format": "double" - } - }, - "required": [ - "start_time", - "value" - ] - }, - "Cumulativefloat": { - "description": "A cumulative or counter data type.", - "type": "object", - "properties": { - "start_time": { - "type": "string", - "format": "date-time" - }, - "value": { - "type": "number", - "format": "float" - } - }, - "required": [ - "start_time", - "value" - ] - }, - "Cumulativeint64": { - "description": "A cumulative or counter data type.", - "type": "object", - "properties": { - "start_time": { - "type": "string", - "format": "date-time" - }, - "value": { - "type": "integer", - "format": "int64" - } - }, - "required": [ - "start_time", - "value" - ] - }, - "Cumulativeuint64": { - "description": "A cumulative or counter data type.", - "type": "object", - "properties": { - "start_time": { - "type": "string", - "format": "date-time" - }, - "value": { - "type": "integer", - "format": "uint64", - "minimum": 0 - } - }, - "required": [ - "start_time", - "value" - ] - }, - "CurrentStatus": { - "description": "Describes the current status of a background task", + "BlueprintZoneType": { "oneOf": [ { - "description": "The background task is not running\n\nTypically, the task would be waiting for its next activation, which would happen after a timeout or some other event that triggers activation", "type": "object", "properties": { - "current_status": { + "address": { + "type": "string" + }, + "dns_servers": { + "type": "array", + "items": { + "type": "string", + "format": "ip" + } + }, + "domain": { + "nullable": true, + "type": "string" + }, + "nic": { + "description": "The service vNIC providing outbound connectivity using OPTE.", + "allOf": [ + { + "$ref": "#/components/schemas/NetworkInterface" + } + ] + }, + "ntp_servers": { + "type": "array", + "items": { + "type": "string" + } + }, + "snat_cfg": { + "description": "The SNAT configuration for outbound connections.", + "allOf": [ + { + "$ref": "#/components/schemas/SourceNatConfig" + } + ] + }, + "type": { "type": "string", "enum": [ - "idle" + "boundary_ntp" ] } }, "required": [ - "current_status" + "address", + "dns_servers", + "nic", + "ntp_servers", + "snat_cfg", + "type" ] }, { - "description": "The background task is currently running\n\nMore precisely, the task has been activated and has not yet finished this activation", "type": "object", "properties": { - "current_status": { + "address": { + "type": "string" + }, + "dataset": { + "$ref": "#/components/schemas/OmicronZoneDataset" + }, + "type": { "type": "string", "enum": [ - "running" + "clickhouse" ] - }, - "details": { - "$ref": "#/components/schemas/CurrentStatusRunning" } }, "required": [ - "current_status", - "details" + "address", + "dataset", + "type" ] - } - ] - }, - "CurrentStatusRunning": { - "type": "object", - "properties": { - "iteration": { - "description": "which iteration this was (counter)", - "type": "integer", - "format": "uint64", - "minimum": 0 }, - "reason": { - "description": "what kind of event triggered this activation", - "allOf": [ - { - "$ref": "#/components/schemas/ActivationReason" + { + "type": "object", + "properties": { + "address": { + "type": "string" + }, + "dataset": { + "$ref": "#/components/schemas/OmicronZoneDataset" + }, + "type": { + "type": "string", + "enum": [ + "clickhouse_keeper" + ] } + }, + "required": [ + "address", + "dataset", + "type" ] }, - "start_time": { - "description": "wall-clock time when the current activation started", - "type": "string", - "format": "date-time" - } - }, - "required": [ - "iteration", - "reason", - "start_time" - ] - }, - "DatasetCreateRequest": { - "type": "object", - "properties": { - "dataset_id": { - "type": "string", - "format": "uuid" - }, - "request": { - "$ref": "#/components/schemas/DatasetPutRequest" - }, - "zpool_id": { - "type": "string", - "format": "uuid" - } - }, - "required": [ - "dataset_id", - "request", - "zpool_id" - ] - }, - "DatasetKind": { - "description": "Describes the purpose of the dataset.", - "type": "string", - "enum": [ - "crucible", - "cockroach", - "clickhouse", - "clickhouse_keeper", - "external_dns", - "internal_dns" - ] - }, - "DatasetPutRequest": { - "description": "Describes a dataset within a pool.", - "type": "object", - "properties": { - "address": { - "description": "Address on which a service is responding to requests for the dataset.", - "type": "string" - }, - "kind": { - "description": "Type of dataset being inserted.", - "allOf": [ - { - "$ref": "#/components/schemas/DatasetKind" - } - ] - } - }, - "required": [ - "address", - "kind" - ] - }, - "Datum": { - "description": "A `Datum` is a single sampled data point from a metric.", - "oneOf": [ { "type": "object", "properties": { - "datum": { - "type": "boolean" + "address": { + "type": "string" + }, + "dataset": { + "$ref": "#/components/schemas/OmicronZoneDataset" }, "type": { "type": "string", "enum": [ - "bool" + "cockroach_db" ] } }, "required": [ - "datum", + "address", + "dataset", "type" ] }, { "type": "object", "properties": { - "datum": { - "type": "integer", - "format": "int8" + "address": { + "type": "string" + }, + "dataset": { + "$ref": "#/components/schemas/OmicronZoneDataset" }, "type": { "type": "string", "enum": [ - "i8" + "crucible" ] } }, "required": [ - "datum", + "address", + "dataset", "type" ] }, { "type": "object", "properties": { - "datum": { - "type": "integer", - "format": "uint8", - "minimum": 0 + "address": { + "type": "string" }, "type": { "type": "string", "enum": [ - "u8" + "crucible_pantry" ] } }, "required": [ - "datum", + "address", "type" ] }, { "type": "object", "properties": { - "datum": { - "type": "integer", - "format": "int16" + "dataset": { + "$ref": "#/components/schemas/OmicronZoneDataset" + }, + "dns_address": { + "description": "The address at which the external DNS server is reachable.", + "type": "string" + }, + "http_address": { + "description": "The address at which the external DNS server API is reachable.", + "type": "string" + }, + "nic": { + "description": "The service vNIC providing external connectivity using OPTE.", + "allOf": [ + { + "$ref": "#/components/schemas/NetworkInterface" + } + ] }, "type": { "type": "string", "enum": [ - "i16" + "external_dns" ] } }, "required": [ - "datum", + "dataset", + "dns_address", + "http_address", + "nic", "type" ] }, { "type": "object", "properties": { - "datum": { + "dataset": { + "$ref": "#/components/schemas/OmicronZoneDataset" + }, + "dns_address": { + "type": "string" + }, + "gz_address": { + "description": "The addresses in the global zone which should be created\n\nFor the DNS service, which exists outside the sleds's typical subnet - adding an address in the GZ is necessary to allow inter-zone traffic routing.", + "type": "string", + "format": "ipv6" + }, + "gz_address_index": { + "description": "The address is also identified with an auxiliary bit of information to ensure that the created global zone address can have a unique name.", "type": "integer", - "format": "uint16", + "format": "uint32", "minimum": 0 }, + "http_address": { + "type": "string" + }, "type": { "type": "string", "enum": [ - "u16" + "internal_dns" ] } }, "required": [ - "datum", + "dataset", + "dns_address", + "gz_address", + "gz_address_index", + "http_address", "type" ] }, { "type": "object", "properties": { - "datum": { - "type": "integer", - "format": "int32" + "address": { + "type": "string" + }, + "dns_servers": { + "type": "array", + "items": { + "type": "string", + "format": "ip" + } + }, + "domain": { + "nullable": true, + "type": "string" + }, + "ntp_servers": { + "type": "array", + "items": { + "type": "string" + } }, "type": { "type": "string", "enum": [ - "i32" + "internal_ntp" ] } }, "required": [ - "datum", + "address", + "dns_servers", + "ntp_servers", "type" ] }, { "type": "object", "properties": { - "datum": { - "type": "integer", - "format": "uint32", - "minimum": 0 + "external_dns_servers": { + "description": "External DNS servers Nexus can use to resolve external hosts.", + "type": "array", + "items": { + "type": "string", + "format": "ip" + } }, - "type": { + "external_ip": { + "description": "The address at which the external nexus server is reachable.", "type": "string", - "enum": [ - "u32" + "format": "ip" + }, + "external_tls": { + "description": "Whether Nexus's external endpoint should use TLS", + "type": "boolean" + }, + "internal_address": { + "description": "The address at which the internal nexus server is reachable.", + "type": "string" + }, + "nic": { + "description": "The service vNIC providing external connectivity using OPTE.", + "allOf": [ + { + "$ref": "#/components/schemas/NetworkInterface" + } ] - } - }, - "required": [ - "datum", - "type" - ] - }, - { - "type": "object", - "properties": { - "datum": { - "type": "integer", - "format": "int64" }, "type": { "type": "string", "enum": [ - "i64" + "nexus" ] } }, "required": [ - "datum", + "external_dns_servers", + "external_ip", + "external_tls", + "internal_address", + "nic", "type" ] }, { "type": "object", "properties": { - "datum": { - "type": "integer", - "format": "uint64", - "minimum": 0 + "address": { + "type": "string" }, "type": { "type": "string", "enum": [ - "u64" + "oximeter" ] } }, "required": [ - "datum", + "address", "type" ] - }, - { - "type": "object", - "properties": { - "datum": { - "type": "number", - "format": "float" - }, - "type": { - "type": "string", - "enum": [ + } + ] + }, + "BlueprintZonesConfig": { + "description": "Information about an Omicron zone as recorded in a blueprint.\n\nCurrently, this is similar to [`OmicronZonesConfig`], but also contains a per-zone [`BlueprintZoneDisposition`].\n\nPart of [`Blueprint`].", + "type": "object", + "properties": { + "generation": { + "description": "Generation number of this configuration.\n\nThis generation number is owned by the control plane. See [`OmicronZonesConfig::generation`] for more details.", + "allOf": [ + { + "$ref": "#/components/schemas/Generation" + } + ] + }, + "zones": { + "description": "The list of running zones.", + "type": "array", + "items": { + "$ref": "#/components/schemas/BlueprintZoneConfig" + } + } + }, + "required": [ + "generation", + "zones" + ] + }, + "ByteCount": { + "description": "Byte count to express memory or storage capacity.", + "type": "integer", + "format": "uint64", + "minimum": 0 + }, + "Certificate": { + "type": "object", + "properties": { + "cert": { + "type": "string" + }, + "key": { + "type": "string" + } + }, + "required": [ + "cert", + "key" + ] + }, + "CollectionId": { + "type": "object", + "properties": { + "collection_id": { + "$ref": "#/components/schemas/TypedUuidForCollectionKind" + } + }, + "required": [ + "collection_id" + ] + }, + "Cumulativedouble": { + "description": "A cumulative or counter data type.", + "type": "object", + "properties": { + "start_time": { + "type": "string", + "format": "date-time" + }, + "value": { + "type": "number", + "format": "double" + } + }, + "required": [ + "start_time", + "value" + ] + }, + "Cumulativefloat": { + "description": "A cumulative or counter data type.", + "type": "object", + "properties": { + "start_time": { + "type": "string", + "format": "date-time" + }, + "value": { + "type": "number", + "format": "float" + } + }, + "required": [ + "start_time", + "value" + ] + }, + "Cumulativeint64": { + "description": "A cumulative or counter data type.", + "type": "object", + "properties": { + "start_time": { + "type": "string", + "format": "date-time" + }, + "value": { + "type": "integer", + "format": "int64" + } + }, + "required": [ + "start_time", + "value" + ] + }, + "Cumulativeuint64": { + "description": "A cumulative or counter data type.", + "type": "object", + "properties": { + "start_time": { + "type": "string", + "format": "date-time" + }, + "value": { + "type": "integer", + "format": "uint64", + "minimum": 0 + } + }, + "required": [ + "start_time", + "value" + ] + }, + "CurrentStatus": { + "description": "Describes the current status of a background task", + "oneOf": [ + { + "description": "The background task is not running\n\nTypically, the task would be waiting for its next activation, which would happen after a timeout or some other event that triggers activation", + "type": "object", + "properties": { + "current_status": { + "type": "string", + "enum": [ + "idle" + ] + } + }, + "required": [ + "current_status" + ] + }, + { + "description": "The background task is currently running\n\nMore precisely, the task has been activated and has not yet finished this activation", + "type": "object", + "properties": { + "current_status": { + "type": "string", + "enum": [ + "running" + ] + }, + "details": { + "$ref": "#/components/schemas/CurrentStatusRunning" + } + }, + "required": [ + "current_status", + "details" + ] + } + ] + }, + "CurrentStatusRunning": { + "type": "object", + "properties": { + "iteration": { + "description": "which iteration this was (counter)", + "type": "integer", + "format": "uint64", + "minimum": 0 + }, + "reason": { + "description": "what kind of event triggered this activation", + "allOf": [ + { + "$ref": "#/components/schemas/ActivationReason" + } + ] + }, + "start_time": { + "description": "wall-clock time when the current activation started", + "type": "string", + "format": "date-time" + } + }, + "required": [ + "iteration", + "reason", + "start_time" + ] + }, + "DatasetCreateRequest": { + "type": "object", + "properties": { + "dataset_id": { + "type": "string", + "format": "uuid" + }, + "request": { + "$ref": "#/components/schemas/DatasetPutRequest" + }, + "zpool_id": { + "type": "string", + "format": "uuid" + } + }, + "required": [ + "dataset_id", + "request", + "zpool_id" + ] + }, + "DatasetKind": { + "description": "Describes the purpose of the dataset.", + "type": "string", + "enum": [ + "crucible", + "cockroach", + "clickhouse", + "clickhouse_keeper", + "external_dns", + "internal_dns" + ] + }, + "DatasetPutRequest": { + "description": "Describes a dataset within a pool.", + "type": "object", + "properties": { + "address": { + "description": "Address on which a service is responding to requests for the dataset.", + "type": "string" + }, + "kind": { + "description": "Type of dataset being inserted.", + "allOf": [ + { + "$ref": "#/components/schemas/DatasetKind" + } + ] + } + }, + "required": [ + "address", + "kind" + ] + }, + "Datum": { + "description": "A `Datum` is a single sampled data point from a metric.", + "oneOf": [ + { + "type": "object", + "properties": { + "datum": { + "type": "boolean" + }, + "type": { + "type": "string", + "enum": [ + "bool" + ] + } + }, + "required": [ + "datum", + "type" + ] + }, + { + "type": "object", + "properties": { + "datum": { + "type": "integer", + "format": "int8" + }, + "type": { + "type": "string", + "enum": [ + "i8" + ] + } + }, + "required": [ + "datum", + "type" + ] + }, + { + "type": "object", + "properties": { + "datum": { + "type": "integer", + "format": "uint8", + "minimum": 0 + }, + "type": { + "type": "string", + "enum": [ + "u8" + ] + } + }, + "required": [ + "datum", + "type" + ] + }, + { + "type": "object", + "properties": { + "datum": { + "type": "integer", + "format": "int16" + }, + "type": { + "type": "string", + "enum": [ + "i16" + ] + } + }, + "required": [ + "datum", + "type" + ] + }, + { + "type": "object", + "properties": { + "datum": { + "type": "integer", + "format": "uint16", + "minimum": 0 + }, + "type": { + "type": "string", + "enum": [ + "u16" + ] + } + }, + "required": [ + "datum", + "type" + ] + }, + { + "type": "object", + "properties": { + "datum": { + "type": "integer", + "format": "int32" + }, + "type": { + "type": "string", + "enum": [ + "i32" + ] + } + }, + "required": [ + "datum", + "type" + ] + }, + { + "type": "object", + "properties": { + "datum": { + "type": "integer", + "format": "uint32", + "minimum": 0 + }, + "type": { + "type": "string", + "enum": [ + "u32" + ] + } + }, + "required": [ + "datum", + "type" + ] + }, + { + "type": "object", + "properties": { + "datum": { + "type": "integer", + "format": "int64" + }, + "type": { + "type": "string", + "enum": [ + "i64" + ] + } + }, + "required": [ + "datum", + "type" + ] + }, + { + "type": "object", + "properties": { + "datum": { + "type": "integer", + "format": "uint64", + "minimum": 0 + }, + "type": { + "type": "string", + "enum": [ + "u64" + ] + } + }, + "required": [ + "datum", + "type" + ] + }, + { + "type": "object", + "properties": { + "datum": { + "type": "number", + "format": "float" + }, + "type": { + "type": "string", + "enum": [ "f32" ] } @@ -4503,211 +4851,55 @@ }, "n_counts": { "type": "integer", - "format": "uint", - "minimum": 0 - } - }, - "required": [ - "n_bins", - "n_counts" - ] - }, - "type": { - "type": "string", - "enum": [ - "array_size_mismatch" - ] - } - }, - "required": [ - "content", - "type" - ] - }, - { - "type": "object", - "properties": { - "content": { - "$ref": "#/components/schemas/QuantizationError" - }, - "type": { - "type": "string", - "enum": [ - "quantization" - ] - } - }, - "required": [ - "content", - "type" - ] - } - ] - }, - "Histogramdouble": { - "description": "Histogram metric\n\nA histogram maintains the count of any number of samples, over a set of bins. Bins are specified on construction via their _left_ edges, inclusive. There can't be any \"gaps\" in the bins, and an additional bin may be added to the left, right, or both so that the bins extend to the entire range of the support.\n\nNote that any gaps, unsorted bins, or non-finite values will result in an error.", - "type": "object", - "properties": { - "bins": { - "type": "array", - "items": { - "$ref": "#/components/schemas/Bindouble" - } - }, - "n_samples": { - "type": "integer", - "format": "uint64", - "minimum": 0 - }, - "start_time": { - "type": "string", - "format": "date-time" - } - }, - "required": [ - "bins", - "n_samples", - "start_time" - ] - }, - "Histogramfloat": { - "description": "Histogram metric\n\nA histogram maintains the count of any number of samples, over a set of bins. Bins are specified on construction via their _left_ edges, inclusive. There can't be any \"gaps\" in the bins, and an additional bin may be added to the left, right, or both so that the bins extend to the entire range of the support.\n\nNote that any gaps, unsorted bins, or non-finite values will result in an error.", - "type": "object", - "properties": { - "bins": { - "type": "array", - "items": { - "$ref": "#/components/schemas/Binfloat" - } - }, - "n_samples": { - "type": "integer", - "format": "uint64", - "minimum": 0 - }, - "start_time": { - "type": "string", - "format": "date-time" - } - }, - "required": [ - "bins", - "n_samples", - "start_time" - ] - }, - "Histogramint16": { - "description": "Histogram metric\n\nA histogram maintains the count of any number of samples, over a set of bins. Bins are specified on construction via their _left_ edges, inclusive. There can't be any \"gaps\" in the bins, and an additional bin may be added to the left, right, or both so that the bins extend to the entire range of the support.\n\nNote that any gaps, unsorted bins, or non-finite values will result in an error.", - "type": "object", - "properties": { - "bins": { - "type": "array", - "items": { - "$ref": "#/components/schemas/Binint16" - } - }, - "n_samples": { - "type": "integer", - "format": "uint64", - "minimum": 0 - }, - "start_time": { - "type": "string", - "format": "date-time" - } - }, - "required": [ - "bins", - "n_samples", - "start_time" - ] - }, - "Histogramint32": { - "description": "Histogram metric\n\nA histogram maintains the count of any number of samples, over a set of bins. Bins are specified on construction via their _left_ edges, inclusive. There can't be any \"gaps\" in the bins, and an additional bin may be added to the left, right, or both so that the bins extend to the entire range of the support.\n\nNote that any gaps, unsorted bins, or non-finite values will result in an error.", - "type": "object", - "properties": { - "bins": { - "type": "array", - "items": { - "$ref": "#/components/schemas/Binint32" - } - }, - "n_samples": { - "type": "integer", - "format": "uint64", - "minimum": 0 - }, - "start_time": { - "type": "string", - "format": "date-time" - } - }, - "required": [ - "bins", - "n_samples", - "start_time" - ] - }, - "Histogramint64": { - "description": "Histogram metric\n\nA histogram maintains the count of any number of samples, over a set of bins. Bins are specified on construction via their _left_ edges, inclusive. There can't be any \"gaps\" in the bins, and an additional bin may be added to the left, right, or both so that the bins extend to the entire range of the support.\n\nNote that any gaps, unsorted bins, or non-finite values will result in an error.", - "type": "object", - "properties": { - "bins": { - "type": "array", - "items": { - "$ref": "#/components/schemas/Binint64" - } - }, - "n_samples": { - "type": "integer", - "format": "uint64", - "minimum": 0 - }, - "start_time": { - "type": "string", - "format": "date-time" - } - }, - "required": [ - "bins", - "n_samples", - "start_time" - ] - }, - "Histogramint8": { - "description": "Histogram metric\n\nA histogram maintains the count of any number of samples, over a set of bins. Bins are specified on construction via their _left_ edges, inclusive. There can't be any \"gaps\" in the bins, and an additional bin may be added to the left, right, or both so that the bins extend to the entire range of the support.\n\nNote that any gaps, unsorted bins, or non-finite values will result in an error.", - "type": "object", - "properties": { - "bins": { - "type": "array", - "items": { - "$ref": "#/components/schemas/Binint8" - } - }, - "n_samples": { - "type": "integer", - "format": "uint64", - "minimum": 0 + "format": "uint", + "minimum": 0 + } + }, + "required": [ + "n_bins", + "n_counts" + ] + }, + "type": { + "type": "string", + "enum": [ + "array_size_mismatch" + ] + } + }, + "required": [ + "content", + "type" + ] }, - "start_time": { - "type": "string", - "format": "date-time" + { + "type": "object", + "properties": { + "content": { + "$ref": "#/components/schemas/QuantizationError" + }, + "type": { + "type": "string", + "enum": [ + "quantization" + ] + } + }, + "required": [ + "content", + "type" + ] } - }, - "required": [ - "bins", - "n_samples", - "start_time" ] }, - "Histogramuint16": { + "Histogramdouble": { "description": "Histogram metric\n\nA histogram maintains the count of any number of samples, over a set of bins. Bins are specified on construction via their _left_ edges, inclusive. There can't be any \"gaps\" in the bins, and an additional bin may be added to the left, right, or both so that the bins extend to the entire range of the support.\n\nNote that any gaps, unsorted bins, or non-finite values will result in an error.", "type": "object", "properties": { "bins": { "type": "array", "items": { - "$ref": "#/components/schemas/Binuint16" + "$ref": "#/components/schemas/Bindouble" } }, "n_samples": { @@ -4726,14 +4918,14 @@ "start_time" ] }, - "Histogramuint32": { + "Histogramfloat": { "description": "Histogram metric\n\nA histogram maintains the count of any number of samples, over a set of bins. Bins are specified on construction via their _left_ edges, inclusive. There can't be any \"gaps\" in the bins, and an additional bin may be added to the left, right, or both so that the bins extend to the entire range of the support.\n\nNote that any gaps, unsorted bins, or non-finite values will result in an error.", "type": "object", "properties": { "bins": { "type": "array", "items": { - "$ref": "#/components/schemas/Binuint32" + "$ref": "#/components/schemas/Binfloat" } }, "n_samples": { @@ -4752,14 +4944,14 @@ "start_time" ] }, - "Histogramuint64": { + "Histogramint16": { "description": "Histogram metric\n\nA histogram maintains the count of any number of samples, over a set of bins. Bins are specified on construction via their _left_ edges, inclusive. There can't be any \"gaps\" in the bins, and an additional bin may be added to the left, right, or both so that the bins extend to the entire range of the support.\n\nNote that any gaps, unsorted bins, or non-finite values will result in an error.", "type": "object", "properties": { "bins": { "type": "array", "items": { - "$ref": "#/components/schemas/Binuint64" + "$ref": "#/components/schemas/Binint16" } }, "n_samples": { @@ -4778,1183 +4970,972 @@ "start_time" ] }, - "Histogramuint8": { + "Histogramint32": { "description": "Histogram metric\n\nA histogram maintains the count of any number of samples, over a set of bins. Bins are specified on construction via their _left_ edges, inclusive. There can't be any \"gaps\" in the bins, and an additional bin may be added to the left, right, or both so that the bins extend to the entire range of the support.\n\nNote that any gaps, unsorted bins, or non-finite values will result in an error.", "type": "object", "properties": { "bins": { "type": "array", "items": { - "$ref": "#/components/schemas/Binuint8" + "$ref": "#/components/schemas/Binint32" } }, "n_samples": { "type": "integer", - "format": "uint64", - "minimum": 0 - }, - "start_time": { - "type": "string", - "format": "date-time" - } - }, - "required": [ - "bins", - "n_samples", - "start_time" - ] - }, - "InstanceRuntimeState": { - "description": "The dynamic runtime properties of an instance: its current VMM ID (if any), migration information (if any), and the instance state to report if there is no active VMM.", - "type": "object", - "properties": { - "dst_propolis_id": { - "nullable": true, - "description": "If a migration is active, the ID of the target VMM.", - "type": "string", - "format": "uuid" - }, - "gen": { - "description": "Generation number for this state.", - "allOf": [ - { - "$ref": "#/components/schemas/Generation" - } - ] - }, - "migration_id": { - "nullable": true, - "description": "If a migration is active, the ID of that migration.", - "type": "string", - "format": "uuid" - }, - "propolis_id": { - "nullable": true, - "description": "The instance's currently active VMM ID.", - "type": "string", - "format": "uuid" - }, - "time_updated": { - "description": "Timestamp for this information.", - "type": "string", - "format": "date-time" - } - }, - "required": [ - "gen", - "time_updated" - ] - }, - "InstanceState": { - "description": "Running state of an Instance (primarily: booted or stopped)\n\nThis typically reflects whether it's starting, running, stopping, or stopped, but also includes states related to the Instance's lifecycle", - "oneOf": [ - { - "description": "The instance is being created.", - "type": "string", - "enum": [ - "creating" - ] - }, - { - "description": "The instance is currently starting up.", - "type": "string", - "enum": [ - "starting" - ] - }, - { - "description": "The instance is currently running.", - "type": "string", - "enum": [ - "running" - ] - }, - { - "description": "The instance has been requested to stop and a transition to \"Stopped\" is imminent.", - "type": "string", - "enum": [ - "stopping" - ] - }, - { - "description": "The instance is currently stopped.", - "type": "string", - "enum": [ - "stopped" - ] - }, - { - "description": "The instance is in the process of rebooting - it will remain in the \"rebooting\" state until the VM is starting once more.", - "type": "string", - "enum": [ - "rebooting" - ] - }, - { - "description": "The instance is in the process of migrating - it will remain in the \"migrating\" state until the migration process is complete and the destination propolis is ready to continue execution.", - "type": "string", - "enum": [ - "migrating" - ] - }, - { - "description": "The instance is attempting to recover from a failure.", - "type": "string", - "enum": [ - "repairing" - ] - }, - { - "description": "The instance has encountered a failure.", - "type": "string", - "enum": [ - "failed" - ] - }, - { - "description": "The instance has been deleted.", - "type": "string", - "enum": [ - "destroyed" - ] - } - ] - }, - "IpKind": { - "type": "string", - "enum": [ - "snat", - "floating", - "ephemeral" - ] - }, - "IpNet": { - "oneOf": [ - { - "title": "v4", - "allOf": [ - { - "$ref": "#/components/schemas/Ipv4Net" - } - ] - }, - { - "title": "v6", - "allOf": [ - { - "$ref": "#/components/schemas/Ipv6Net" - } - ] - } - ] - }, - "IpNetwork": { - "oneOf": [ - { - "title": "v4", - "allOf": [ - { - "$ref": "#/components/schemas/Ipv4Network" - } - ] - }, - { - "title": "v6", - "allOf": [ - { - "$ref": "#/components/schemas/Ipv6Network" - } - ] - } - ] - }, - "IpRange": { - "oneOf": [ - { - "title": "v4", - "allOf": [ - { - "$ref": "#/components/schemas/Ipv4Range" - } - ] - }, - { - "title": "v6", - "allOf": [ - { - "$ref": "#/components/schemas/Ipv6Range" - } - ] + "format": "uint64", + "minimum": 0 + }, + "start_time": { + "type": "string", + "format": "date-time" } + }, + "required": [ + "bins", + "n_samples", + "start_time" ] }, - "Ipv4NatEntryView": { - "description": "NAT Record", + "Histogramint64": { + "description": "Histogram metric\n\nA histogram maintains the count of any number of samples, over a set of bins. Bins are specified on construction via their _left_ edges, inclusive. There can't be any \"gaps\" in the bins, and an additional bin may be added to the left, right, or both so that the bins extend to the entire range of the support.\n\nNote that any gaps, unsorted bins, or non-finite values will result in an error.", "type": "object", "properties": { - "deleted": { - "type": "boolean" - }, - "external_address": { - "type": "string", - "format": "ipv4" + "bins": { + "type": "array", + "items": { + "$ref": "#/components/schemas/Binint64" + } }, - "first_port": { + "n_samples": { "type": "integer", - "format": "uint16", + "format": "uint64", "minimum": 0 }, - "gen": { - "type": "integer", - "format": "int64" + "start_time": { + "type": "string", + "format": "date-time" + } + }, + "required": [ + "bins", + "n_samples", + "start_time" + ] + }, + "Histogramint8": { + "description": "Histogram metric\n\nA histogram maintains the count of any number of samples, over a set of bins. Bins are specified on construction via their _left_ edges, inclusive. There can't be any \"gaps\" in the bins, and an additional bin may be added to the left, right, or both so that the bins extend to the entire range of the support.\n\nNote that any gaps, unsorted bins, or non-finite values will result in an error.", + "type": "object", + "properties": { + "bins": { + "type": "array", + "items": { + "$ref": "#/components/schemas/Binint8" + } }, - "last_port": { + "n_samples": { "type": "integer", - "format": "uint16", + "format": "uint64", "minimum": 0 }, - "mac": { - "$ref": "#/components/schemas/MacAddr" - }, - "sled_address": { + "start_time": { "type": "string", - "format": "ipv6" - }, - "vni": { - "$ref": "#/components/schemas/Vni" + "format": "date-time" } }, "required": [ - "deleted", - "external_address", - "first_port", - "gen", - "last_port", - "mac", - "sled_address", - "vni" + "bins", + "n_samples", + "start_time" ] }, - "Ipv4Net": { - "example": "192.168.1.0/24", - "title": "An IPv4 subnet", - "description": "An IPv4 subnet, including prefix and subnet mask", - "type": "string", - "pattern": "^(([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])\\.){3}([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])/([0-9]|1[0-9]|2[0-9]|3[0-2])$" - }, - "Ipv4Network": { - "type": "string", - "pattern": "^((25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\/(3[0-2]|[0-2]?[0-9])$" - }, - "Ipv4Range": { - "description": "A non-decreasing IPv4 address range, inclusive of both ends.\n\nThe first address must be less than or equal to the last address.", + "Histogramuint16": { + "description": "Histogram metric\n\nA histogram maintains the count of any number of samples, over a set of bins. Bins are specified on construction via their _left_ edges, inclusive. There can't be any \"gaps\" in the bins, and an additional bin may be added to the left, right, or both so that the bins extend to the entire range of the support.\n\nNote that any gaps, unsorted bins, or non-finite values will result in an error.", "type": "object", "properties": { - "first": { - "type": "string", - "format": "ipv4" + "bins": { + "type": "array", + "items": { + "$ref": "#/components/schemas/Binuint16" + } }, - "last": { + "n_samples": { + "type": "integer", + "format": "uint64", + "minimum": 0 + }, + "start_time": { "type": "string", - "format": "ipv4" + "format": "date-time" } }, "required": [ - "first", - "last" + "bins", + "n_samples", + "start_time" ] }, - "Ipv6Net": { - "example": "fd12:3456::/64", - "title": "An IPv6 subnet", - "description": "An IPv6 subnet, including prefix and subnet mask", - "type": "string", - "pattern": "^([fF][dD])[0-9a-fA-F]{2}:(([0-9a-fA-F]{1,4}:){6}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,6}:)([0-9a-fA-F]{1,4})?\\/([0-9]|[1-9][0-9]|1[0-1][0-9]|12[0-8])$" - }, - "Ipv6Network": { - "type": "string", - "pattern": "^(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\")[/](12[0-8]|1[0-1][0-9]|[0-9]?[0-9])$" - }, - "Ipv6Range": { - "description": "A non-decreasing IPv6 address range, inclusive of both ends.\n\nThe first address must be less than or equal to the last address.", + "Histogramuint32": { + "description": "Histogram metric\n\nA histogram maintains the count of any number of samples, over a set of bins. Bins are specified on construction via their _left_ edges, inclusive. There can't be any \"gaps\" in the bins, and an additional bin may be added to the left, right, or both so that the bins extend to the entire range of the support.\n\nNote that any gaps, unsorted bins, or non-finite values will result in an error.", "type": "object", "properties": { - "first": { - "type": "string", - "format": "ipv6" + "bins": { + "type": "array", + "items": { + "$ref": "#/components/schemas/Binuint32" + } }, - "last": { + "n_samples": { + "type": "integer", + "format": "uint64", + "minimum": 0 + }, + "start_time": { "type": "string", - "format": "ipv6" + "format": "date-time" } }, "required": [ - "first", - "last" + "bins", + "n_samples", + "start_time" ] }, - "LastResult": { - "oneOf": [ - { - "description": "The task has never completed an activation", - "type": "object", - "properties": { - "last_result": { - "type": "string", - "enum": [ - "never_completed" - ] - } - }, - "required": [ - "last_result" - ] + "Histogramuint64": { + "description": "Histogram metric\n\nA histogram maintains the count of any number of samples, over a set of bins. Bins are specified on construction via their _left_ edges, inclusive. There can't be any \"gaps\" in the bins, and an additional bin may be added to the left, right, or both so that the bins extend to the entire range of the support.\n\nNote that any gaps, unsorted bins, or non-finite values will result in an error.", + "type": "object", + "properties": { + "bins": { + "type": "array", + "items": { + "$ref": "#/components/schemas/Binuint64" + } }, - { - "description": "The task has completed at least one activation", - "type": "object", - "properties": { - "details": { - "$ref": "#/components/schemas/LastResultCompleted" - }, - "last_result": { - "type": "string", - "enum": [ - "completed" - ] - } - }, - "required": [ - "details", - "last_result" - ] + "n_samples": { + "type": "integer", + "format": "uint64", + "minimum": 0 + }, + "start_time": { + "type": "string", + "format": "date-time" } + }, + "required": [ + "bins", + "n_samples", + "start_time" ] }, - "LastResultCompleted": { + "Histogramuint8": { + "description": "Histogram metric\n\nA histogram maintains the count of any number of samples, over a set of bins. Bins are specified on construction via their _left_ edges, inclusive. There can't be any \"gaps\" in the bins, and an additional bin may be added to the left, right, or both so that the bins extend to the entire range of the support.\n\nNote that any gaps, unsorted bins, or non-finite values will result in an error.", "type": "object", "properties": { - "details": { - "description": "arbitrary datum emitted by the background task" - }, - "elapsed": { - "description": "total time elapsed during the activation", - "allOf": [ - { - "$ref": "#/components/schemas/Duration" - } - ] + "bins": { + "type": "array", + "items": { + "$ref": "#/components/schemas/Binuint8" + } }, - "iteration": { - "description": "which iteration this was (counter)", + "n_samples": { "type": "integer", "format": "uint64", "minimum": 0 }, - "reason": { - "description": "what kind of event triggered this activation", - "allOf": [ - { - "$ref": "#/components/schemas/ActivationReason" - } - ] - }, "start_time": { - "description": "wall-clock time when the activation started", "type": "string", "format": "date-time" } }, "required": [ - "details", - "elapsed", - "iteration", - "reason", + "bins", + "n_samples", "start_time" ] }, - "MacAddr": { - "example": "ff:ff:ff:ff:ff:ff", - "title": "A MAC address", - "description": "A Media Access Control address, in EUI-48 format", - "type": "string", - "pattern": "^([0-9a-fA-F]{0,2}:){5}[0-9a-fA-F]{0,2}$", - "minLength": 5, - "maxLength": 17 - }, - "Measurement": { - "description": "A `Measurement` is a timestamped datum from a single metric", + "InstanceRuntimeState": { + "description": "The dynamic runtime properties of an instance: its current VMM ID (if any), migration information (if any), and the instance state to report if there is no active VMM.", "type": "object", "properties": { - "datum": { - "$ref": "#/components/schemas/Datum" + "dst_propolis_id": { + "nullable": true, + "description": "If a migration is active, the ID of the target VMM.", + "type": "string", + "format": "uuid" }, - "timestamp": { + "gen": { + "description": "Generation number for this state.", + "allOf": [ + { + "$ref": "#/components/schemas/Generation" + } + ] + }, + "migration_id": { + "nullable": true, + "description": "If a migration is active, the ID of that migration.", + "type": "string", + "format": "uuid" + }, + "propolis_id": { + "nullable": true, + "description": "The instance's currently active VMM ID.", + "type": "string", + "format": "uuid" + }, + "time_updated": { + "description": "Timestamp for this information.", "type": "string", "format": "date-time" } }, "required": [ - "datum", - "timestamp" + "gen", + "time_updated" ] }, - "MetricsError": { - "description": "Errors related to the generation or collection of metrics.", + "InstanceState": { + "description": "Running state of an Instance (primarily: booted or stopped)\n\nThis typically reflects whether it's starting, running, stopping, or stopped, but also includes states related to the Instance's lifecycle", "oneOf": [ { - "description": "An error related to generating metric data points", - "type": "object", - "properties": { - "content": { - "type": "string" - }, - "type": { - "type": "string", - "enum": [ - "datum_error" - ] - } - }, - "required": [ - "content", - "type" + "description": "The instance is being created.", + "type": "string", + "enum": [ + "creating" ] }, { - "description": "An error running an `Oximeter` server", - "type": "object", - "properties": { - "content": { - "type": "string" - }, - "type": { - "type": "string", - "enum": [ - "oximeter_server" - ] - } - }, - "required": [ - "content", - "type" + "description": "The instance is currently starting up.", + "type": "string", + "enum": [ + "starting" ] }, { - "description": "An error related to creating or sampling a [`histogram::Histogram`] metric.", - "type": "object", - "properties": { - "content": { - "$ref": "#/components/schemas/HistogramError" - }, - "type": { - "type": "string", - "enum": [ - "histogram_error" - ] - } - }, - "required": [ - "content", - "type" + "description": "The instance is currently running.", + "type": "string", + "enum": [ + "running" ] }, { - "description": "An error parsing a field or measurement from a string.", - "type": "object", - "properties": { - "content": { - "type": "object", - "properties": { - "src": { - "type": "string" - }, - "typ": { - "type": "string" - } - }, - "required": [ - "src", - "typ" - ] - }, - "type": { - "type": "string", - "enum": [ - "parse_error" - ] - } - }, - "required": [ - "content", - "type" + "description": "The instance has been requested to stop and a transition to \"Stopped\" is imminent.", + "type": "string", + "enum": [ + "stopping" ] }, { - "description": "A field name is duplicated between the target and metric.", - "type": "object", - "properties": { - "content": { - "type": "object", - "properties": { - "name": { - "type": "string" - } - }, - "required": [ - "name" - ] - }, - "type": { - "type": "string", - "enum": [ - "duplicate_field_name" - ] - } - }, - "required": [ - "content", - "type" + "description": "The instance is currently stopped.", + "type": "string", + "enum": [ + "stopped" ] }, { - "type": "object", - "properties": { - "content": { - "type": "object", - "properties": { - "datum_type": { - "$ref": "#/components/schemas/DatumType" - } - }, - "required": [ - "datum_type" - ] - }, - "type": { - "type": "string", - "enum": [ - "missing_datum_requires_start_time" - ] + "description": "The instance is in the process of rebooting - it will remain in the \"rebooting\" state until the VM is starting once more.", + "type": "string", + "enum": [ + "rebooting" + ] + }, + { + "description": "The instance is in the process of migrating - it will remain in the \"migrating\" state until the migration process is complete and the destination propolis is ready to continue execution.", + "type": "string", + "enum": [ + "migrating" + ] + }, + { + "description": "The instance is attempting to recover from a failure.", + "type": "string", + "enum": [ + "repairing" + ] + }, + { + "description": "The instance has encountered a failure.", + "type": "string", + "enum": [ + "failed" + ] + }, + { + "description": "The instance has been deleted.", + "type": "string", + "enum": [ + "destroyed" + ] + } + ] + }, + "IpKind": { + "type": "string", + "enum": [ + "snat", + "floating", + "ephemeral" + ] + }, + "IpNet": { + "oneOf": [ + { + "title": "v4", + "allOf": [ + { + "$ref": "#/components/schemas/Ipv4Net" } - }, - "required": [ - "content", - "type" ] }, { - "type": "object", - "properties": { - "content": { - "type": "object", - "properties": { - "datum_type": { - "$ref": "#/components/schemas/DatumType" - } - }, - "required": [ - "datum_type" - ] - }, - "type": { - "type": "string", - "enum": [ - "missing_datum_cannot_have_start_time" - ] + "title": "v6", + "allOf": [ + { + "$ref": "#/components/schemas/Ipv6Net" + } + ] + } + ] + }, + "IpNetwork": { + "oneOf": [ + { + "title": "v4", + "allOf": [ + { + "$ref": "#/components/schemas/Ipv4Network" } - }, - "required": [ - "content", - "type" ] }, { - "type": "object", - "properties": { - "type": { - "type": "string", - "enum": [ - "invalid_timeseries_name" - ] + "title": "v6", + "allOf": [ + { + "$ref": "#/components/schemas/Ipv6Network" } - }, - "required": [ - "type" ] } ] }, - "MissingDatum": { - "type": "object", - "properties": { - "datum_type": { - "$ref": "#/components/schemas/DatumType" + "IpRange": { + "oneOf": [ + { + "title": "v4", + "allOf": [ + { + "$ref": "#/components/schemas/Ipv4Range" + } + ] }, - "start_time": { - "nullable": true, - "type": "string", - "format": "date-time" + { + "title": "v6", + "allOf": [ + { + "$ref": "#/components/schemas/Ipv6Range" + } + ] } - }, - "required": [ - "datum_type" ] }, - "Name": { - "title": "A name unique within the parent collection", - "description": "Names must begin with a lower case ASCII letter, be composed exclusively of lowercase ASCII, uppercase ASCII, numbers, and '-', and may not end with a '-'. Names cannot be a UUID though they may contain a UUID.", - "type": "string", - "pattern": "^(?![0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$)^[a-z]([a-zA-Z0-9-]*[a-zA-Z0-9]+)?$", - "minLength": 1, - "maxLength": 63 - }, - "NetworkInterface": { - "description": "Information required to construct a virtual network interface", + "Ipv4NatEntryView": { + "description": "NAT Record", "type": "object", "properties": { - "id": { - "type": "string", - "format": "uuid" + "deleted": { + "type": "boolean" }, - "ip": { + "external_address": { "type": "string", - "format": "ip" - }, - "kind": { - "$ref": "#/components/schemas/NetworkInterfaceKind" - }, - "mac": { - "$ref": "#/components/schemas/MacAddr" + "format": "ipv4" }, - "name": { - "$ref": "#/components/schemas/Name" + "first_port": { + "type": "integer", + "format": "uint16", + "minimum": 0 }, - "primary": { - "type": "boolean" + "gen": { + "type": "integer", + "format": "int64" }, - "slot": { + "last_port": { "type": "integer", - "format": "uint8", + "format": "uint16", "minimum": 0 }, - "subnet": { - "$ref": "#/components/schemas/IpNet" + "mac": { + "$ref": "#/components/schemas/MacAddr" + }, + "sled_address": { + "type": "string", + "format": "ipv6" }, "vni": { "$ref": "#/components/schemas/Vni" } }, "required": [ - "id", - "ip", - "kind", + "deleted", + "external_address", + "first_port", + "gen", + "last_port", "mac", - "name", - "primary", - "slot", - "subnet", + "sled_address", "vni" ] }, - "NetworkInterfaceKind": { - "description": "The type of network interface", - "oneOf": [ - { - "description": "A vNIC attached to a guest instance", - "type": "object", - "properties": { - "id": { - "type": "string", - "format": "uuid" - }, - "type": { - "type": "string", - "enum": [ - "instance" - ] - } - }, - "required": [ - "id", - "type" - ] + "Ipv4Net": { + "example": "192.168.1.0/24", + "title": "An IPv4 subnet", + "description": "An IPv4 subnet, including prefix and subnet mask", + "type": "string", + "pattern": "^(([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])\\.){3}([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])/([0-9]|1[0-9]|2[0-9]|3[0-2])$" + }, + "Ipv4Network": { + "type": "string", + "pattern": "^((25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\/(3[0-2]|[0-2]?[0-9])$" + }, + "Ipv4Range": { + "description": "A non-decreasing IPv4 address range, inclusive of both ends.\n\nThe first address must be less than or equal to the last address.", + "type": "object", + "properties": { + "first": { + "type": "string", + "format": "ipv4" + }, + "last": { + "type": "string", + "format": "ipv4" + } + }, + "required": [ + "first", + "last" + ] + }, + "Ipv6Net": { + "example": "fd12:3456::/64", + "title": "An IPv6 subnet", + "description": "An IPv6 subnet, including prefix and subnet mask", + "type": "string", + "pattern": "^([fF][dD])[0-9a-fA-F]{2}:(([0-9a-fA-F]{1,4}:){6}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,6}:)([0-9a-fA-F]{1,4})?\\/([0-9]|[1-9][0-9]|1[0-1][0-9]|12[0-8])$" + }, + "Ipv6Network": { + "type": "string", + "pattern": "^(([0-9a-fA-F]{1,4}:){7,7}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,7}:|([0-9a-fA-F]{1,4}:){1,6}:[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,5}(:[0-9a-fA-F]{1,4}){1,2}|([0-9a-fA-F]{1,4}:){1,4}(:[0-9a-fA-F]{1,4}){1,3}|([0-9a-fA-F]{1,4}:){1,3}(:[0-9a-fA-F]{1,4}){1,4}|([0-9a-fA-F]{1,4}:){1,2}(:[0-9a-fA-F]{1,4}){1,5}|[0-9a-fA-F]{1,4}:((:[0-9a-fA-F]{1,4}){1,6})|:((:[0-9a-fA-F]{1,4}){1,7}|:)|fe80:(:[0-9a-fA-F]{0,4}){0,4}%[0-9a-zA-Z]{1,}|::(ffff(:0{1,4}){0,1}:){0,1}((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])|([0-9a-fA-F]{1,4}:){1,4}:((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\")[/](12[0-8]|1[0-1][0-9]|[0-9]?[0-9])$" + }, + "Ipv6Range": { + "description": "A non-decreasing IPv6 address range, inclusive of both ends.\n\nThe first address must be less than or equal to the last address.", + "type": "object", + "properties": { + "first": { + "type": "string", + "format": "ipv6" }, + "last": { + "type": "string", + "format": "ipv6" + } + }, + "required": [ + "first", + "last" + ] + }, + "LastResult": { + "oneOf": [ { - "description": "A vNIC associated with an internal service", + "description": "The task has never completed an activation", "type": "object", "properties": { - "id": { - "type": "string", - "format": "uuid" - }, - "type": { + "last_result": { "type": "string", "enum": [ - "service" + "never_completed" ] } }, "required": [ - "id", - "type" + "last_result" ] }, { - "description": "A vNIC associated with a probe", + "description": "The task has completed at least one activation", "type": "object", "properties": { - "id": { - "type": "string", - "format": "uuid" - }, - "type": { - "type": "string", - "enum": [ - "probe" - ] - } - }, - "required": [ - "id", - "type" - ] - } - ] - }, - "NewPasswordHash": { - "title": "A password hash in PHC string format", - "description": "Password hashes must be in PHC (Password Hashing Competition) string format. Passwords must be hashed with Argon2id. Password hashes may be rejected if the parameters appear not to be secure enough.", - "type": "string" - }, - "NodeName": { - "description": "Unique name for a saga [`Node`]\n\nEach node requires a string name that's unique within its DAG. The name is used to identify its output. Nodes that depend on a given node (either directly or indirectly) can access the node's output using its name.", - "type": "string" - }, - "OmicronPhysicalDiskConfig": { - "description": "OmicronPhysicalDiskConfig\n\n
JSON schema\n\n```json { \"type\": \"object\", \"required\": [ \"id\", \"identity\", \"pool_id\" ], \"properties\": { \"id\": { \"type\": \"string\", \"format\": \"uuid\" }, \"identity\": { \"$ref\": \"#/components/schemas/DiskIdentity\" }, \"pool_id\": { \"$ref\": \"#/components/schemas/TypedUuidForZpoolKind\" } } } ```
", - "type": "object", - "properties": { - "id": { - "type": "string", - "format": "uuid" - }, - "identity": { - "$ref": "#/components/schemas/DiskIdentity" - }, - "pool_id": { - "$ref": "#/components/schemas/TypedUuidForZpoolKind" + "details": { + "$ref": "#/components/schemas/LastResultCompleted" + }, + "last_result": { + "type": "string", + "enum": [ + "completed" + ] + } + }, + "required": [ + "details", + "last_result" + ] } - }, - "required": [ - "id", - "identity", - "pool_id" ] }, - "OmicronPhysicalDisksConfig": { - "description": "OmicronPhysicalDisksConfig\n\n
JSON schema\n\n```json { \"type\": \"object\", \"required\": [ \"disks\", \"generation\" ], \"properties\": { \"disks\": { \"type\": \"array\", \"items\": { \"$ref\": \"#/components/schemas/OmicronPhysicalDiskConfig\" } }, \"generation\": { \"description\": \"generation number of this configuration\\n\\nThis generation number is owned by the control plane (i.e., RSS or Nexus, depending on whether RSS-to-Nexus handoff has happened). It should not be bumped within Sled Agent.\\n\\nSled Agent rejects attempts to set the configuration to a generation older than the one it's currently running.\", \"allOf\": [ { \"$ref\": \"#/components/schemas/Generation\" } ] } } } ```
", + "LastResultCompleted": { "type": "object", "properties": { - "disks": { - "type": "array", - "items": { - "$ref": "#/components/schemas/OmicronPhysicalDiskConfig" - } + "details": { + "description": "arbitrary datum emitted by the background task" }, - "generation": { - "description": "generation number of this configuration\n\nThis generation number is owned by the control plane (i.e., RSS or Nexus, depending on whether RSS-to-Nexus handoff has happened). It should not be bumped within Sled Agent.\n\nSled Agent rejects attempts to set the configuration to a generation older than the one it's currently running.", + "elapsed": { + "description": "total time elapsed during the activation", "allOf": [ { - "$ref": "#/components/schemas/Generation" + "$ref": "#/components/schemas/Duration" } ] - } - }, - "required": [ - "disks", - "generation" - ] - }, - "OmicronZoneConfig": { - "description": "Describes one Omicron-managed zone running on a sled\n\n
JSON schema\n\n```json { \"description\": \"Describes one Omicron-managed zone running on a sled\", \"type\": \"object\", \"required\": [ \"id\", \"underlay_address\", \"zone_type\" ], \"properties\": { \"id\": { \"type\": \"string\", \"format\": \"uuid\" }, \"underlay_address\": { \"type\": \"string\", \"format\": \"ipv6\" }, \"zone_type\": { \"$ref\": \"#/components/schemas/OmicronZoneType\" } } } ```
", - "type": "object", - "properties": { - "id": { - "type": "string", - "format": "uuid" }, - "underlay_address": { - "type": "string", - "format": "ipv6" + "iteration": { + "description": "which iteration this was (counter)", + "type": "integer", + "format": "uint64", + "minimum": 0 }, - "zone_type": { - "$ref": "#/components/schemas/OmicronZoneType" + "reason": { + "description": "what kind of event triggered this activation", + "allOf": [ + { + "$ref": "#/components/schemas/ActivationReason" + } + ] + }, + "start_time": { + "description": "wall-clock time when the activation started", + "type": "string", + "format": "date-time" } }, "required": [ - "id", - "underlay_address", - "zone_type" + "details", + "elapsed", + "iteration", + "reason", + "start_time" ] }, - "OmicronZoneDataset": { - "description": "Describes a persistent ZFS dataset associated with an Omicron zone\n\n
JSON schema\n\n```json { \"description\": \"Describes a persistent ZFS dataset associated with an Omicron zone\", \"type\": \"object\", \"required\": [ \"pool_name\" ], \"properties\": { \"pool_name\": { \"$ref\": \"#/components/schemas/ZpoolName\" } } } ```
", + "MacAddr": { + "example": "ff:ff:ff:ff:ff:ff", + "title": "A MAC address", + "description": "A Media Access Control address, in EUI-48 format", + "type": "string", + "pattern": "^([0-9a-fA-F]{0,2}:){5}[0-9a-fA-F]{0,2}$", + "minLength": 5, + "maxLength": 17 + }, + "Measurement": { + "description": "A `Measurement` is a timestamped datum from a single metric", "type": "object", "properties": { - "pool_name": { - "$ref": "#/components/schemas/ZpoolName" + "datum": { + "$ref": "#/components/schemas/Datum" + }, + "timestamp": { + "type": "string", + "format": "date-time" } }, "required": [ - "pool_name" + "datum", + "timestamp" ] }, - "OmicronZoneType": { - "description": "Describes what kind of zone this is (i.e., what component is running in it) as well as any type-specific configuration\n\n
JSON schema\n\n```json { \"description\": \"Describes what kind of zone this is (i.e., what component is running in it) as well as any type-specific configuration\", \"oneOf\": [ { \"type\": \"object\", \"required\": [ \"address\", \"dns_servers\", \"nic\", \"ntp_servers\", \"snat_cfg\", \"type\" ], \"properties\": { \"address\": { \"type\": \"string\" }, \"dns_servers\": { \"type\": \"array\", \"items\": { \"type\": \"string\", \"format\": \"ip\" } }, \"domain\": { \"type\": [ \"string\", \"null\" ] }, \"nic\": { \"description\": \"The service vNIC providing outbound connectivity using OPTE.\", \"allOf\": [ { \"$ref\": \"#/components/schemas/NetworkInterface\" } ] }, \"ntp_servers\": { \"type\": \"array\", \"items\": { \"type\": \"string\" } }, \"snat_cfg\": { \"description\": \"The SNAT configuration for outbound connections.\", \"allOf\": [ { \"$ref\": \"#/components/schemas/SourceNatConfig\" } ] }, \"type\": { \"type\": \"string\", \"enum\": [ \"boundary_ntp\" ] } } }, { \"type\": \"object\", \"required\": [ \"address\", \"dataset\", \"type\" ], \"properties\": { \"address\": { \"type\": \"string\" }, \"dataset\": { \"$ref\": \"#/components/schemas/OmicronZoneDataset\" }, \"type\": { \"type\": \"string\", \"enum\": [ \"clickhouse\" ] } } }, { \"type\": \"object\", \"required\": [ \"address\", \"dataset\", \"type\" ], \"properties\": { \"address\": { \"type\": \"string\" }, \"dataset\": { \"$ref\": \"#/components/schemas/OmicronZoneDataset\" }, \"type\": { \"type\": \"string\", \"enum\": [ \"clickhouse_keeper\" ] } } }, { \"type\": \"object\", \"required\": [ \"address\", \"dataset\", \"type\" ], \"properties\": { \"address\": { \"type\": \"string\" }, \"dataset\": { \"$ref\": \"#/components/schemas/OmicronZoneDataset\" }, \"type\": { \"type\": \"string\", \"enum\": [ \"cockroach_db\" ] } } }, { \"type\": \"object\", \"required\": [ \"address\", \"dataset\", \"type\" ], \"properties\": { \"address\": { \"type\": \"string\" }, \"dataset\": { \"$ref\": \"#/components/schemas/OmicronZoneDataset\" }, \"type\": { \"type\": \"string\", \"enum\": [ \"crucible\" ] } } }, { \"type\": \"object\", \"required\": [ \"address\", \"type\" ], \"properties\": { \"address\": { \"type\": \"string\" }, \"type\": { \"type\": \"string\", \"enum\": [ \"crucible_pantry\" ] } } }, { \"type\": \"object\", \"required\": [ \"dataset\", \"dns_address\", \"http_address\", \"nic\", \"type\" ], \"properties\": { \"dataset\": { \"$ref\": \"#/components/schemas/OmicronZoneDataset\" }, \"dns_address\": { \"description\": \"The address at which the external DNS server is reachable.\", \"type\": \"string\" }, \"http_address\": { \"description\": \"The address at which the external DNS server API is reachable.\", \"type\": \"string\" }, \"nic\": { \"description\": \"The service vNIC providing external connectivity using OPTE.\", \"allOf\": [ { \"$ref\": \"#/components/schemas/NetworkInterface\" } ] }, \"type\": { \"type\": \"string\", \"enum\": [ \"external_dns\" ] } } }, { \"type\": \"object\", \"required\": [ \"dataset\", \"dns_address\", \"gz_address\", \"gz_address_index\", \"http_address\", \"type\" ], \"properties\": { \"dataset\": { \"$ref\": \"#/components/schemas/OmicronZoneDataset\" }, \"dns_address\": { \"type\": \"string\" }, \"gz_address\": { \"description\": \"The addresses in the global zone which should be created\\n\\nFor the DNS service, which exists outside the sleds's typical subnet - adding an address in the GZ is necessary to allow inter-zone traffic routing.\", \"type\": \"string\", \"format\": \"ipv6\" }, \"gz_address_index\": { \"description\": \"The address is also identified with an auxiliary bit of information to ensure that the created global zone address can have a unique name.\", \"type\": \"integer\", \"format\": \"uint32\", \"minimum\": 0.0 }, \"http_address\": { \"type\": \"string\" }, \"type\": { \"type\": \"string\", \"enum\": [ \"internal_dns\" ] } } }, { \"type\": \"object\", \"required\": [ \"address\", \"dns_servers\", \"ntp_servers\", \"type\" ], \"properties\": { \"address\": { \"type\": \"string\" }, \"dns_servers\": { \"type\": \"array\", \"items\": { \"type\": \"string\", \"format\": \"ip\" } }, \"domain\": { \"type\": [ \"string\", \"null\" ] }, \"ntp_servers\": { \"type\": \"array\", \"items\": { \"type\": \"string\" } }, \"type\": { \"type\": \"string\", \"enum\": [ \"internal_ntp\" ] } } }, { \"type\": \"object\", \"required\": [ \"external_dns_servers\", \"external_ip\", \"external_tls\", \"internal_address\", \"nic\", \"type\" ], \"properties\": { \"external_dns_servers\": { \"description\": \"External DNS servers Nexus can use to resolve external hosts.\", \"type\": \"array\", \"items\": { \"type\": \"string\", \"format\": \"ip\" } }, \"external_ip\": { \"description\": \"The address at which the external nexus server is reachable.\", \"type\": \"string\", \"format\": \"ip\" }, \"external_tls\": { \"description\": \"Whether Nexus's external endpoint should use TLS\", \"type\": \"boolean\" }, \"internal_address\": { \"description\": \"The address at which the internal nexus server is reachable.\", \"type\": \"string\" }, \"nic\": { \"description\": \"The service vNIC providing external connectivity using OPTE.\", \"allOf\": [ { \"$ref\": \"#/components/schemas/NetworkInterface\" } ] }, \"type\": { \"type\": \"string\", \"enum\": [ \"nexus\" ] } } }, { \"type\": \"object\", \"required\": [ \"address\", \"type\" ], \"properties\": { \"address\": { \"type\": \"string\" }, \"type\": { \"type\": \"string\", \"enum\": [ \"oximeter\" ] } } } ] } ```
", + "MetricsError": { + "description": "Errors related to the generation or collection of metrics.", "oneOf": [ { + "description": "An error related to generating metric data points", "type": "object", "properties": { - "address": { - "type": "string" - }, - "dns_servers": { - "type": "array", - "items": { - "type": "string", - "format": "ip" - } - }, - "domain": { - "nullable": true, + "content": { "type": "string" }, - "nic": { - "description": "The service vNIC providing outbound connectivity using OPTE.", - "allOf": [ - { - "$ref": "#/components/schemas/NetworkInterface" - } - ] - }, - "ntp_servers": { - "type": "array", - "items": { - "type": "string" - } - }, - "snat_cfg": { - "description": "The SNAT configuration for outbound connections.", - "allOf": [ - { - "$ref": "#/components/schemas/SourceNatConfig" - } - ] - }, "type": { "type": "string", "enum": [ - "boundary_ntp" + "datum_error" ] } }, "required": [ - "address", - "dns_servers", - "nic", - "ntp_servers", - "snat_cfg", + "content", "type" ] }, { + "description": "An error running an `Oximeter` server", "type": "object", "properties": { - "address": { + "content": { "type": "string" }, - "dataset": { - "$ref": "#/components/schemas/OmicronZoneDataset" - }, "type": { "type": "string", "enum": [ - "clickhouse" + "oximeter_server" ] } }, "required": [ - "address", - "dataset", + "content", "type" ] }, { + "description": "An error related to creating or sampling a [`histogram::Histogram`] metric.", "type": "object", "properties": { - "address": { - "type": "string" - }, - "dataset": { - "$ref": "#/components/schemas/OmicronZoneDataset" + "content": { + "$ref": "#/components/schemas/HistogramError" }, "type": { "type": "string", "enum": [ - "clickhouse_keeper" + "histogram_error" ] } }, "required": [ - "address", - "dataset", + "content", "type" ] }, { + "description": "An error parsing a field or measurement from a string.", "type": "object", "properties": { - "address": { - "type": "string" - }, - "dataset": { - "$ref": "#/components/schemas/OmicronZoneDataset" + "content": { + "type": "object", + "properties": { + "src": { + "type": "string" + }, + "typ": { + "type": "string" + } + }, + "required": [ + "src", + "typ" + ] }, "type": { "type": "string", "enum": [ - "cockroach_db" + "parse_error" ] } }, "required": [ - "address", - "dataset", + "content", "type" ] }, { + "description": "A field name is duplicated between the target and metric.", "type": "object", "properties": { - "address": { - "type": "string" - }, - "dataset": { - "$ref": "#/components/schemas/OmicronZoneDataset" + "content": { + "type": "object", + "properties": { + "name": { + "type": "string" + } + }, + "required": [ + "name" + ] }, "type": { "type": "string", "enum": [ - "crucible" + "duplicate_field_name" ] } }, "required": [ - "address", - "dataset", + "content", "type" ] }, { "type": "object", "properties": { - "address": { - "type": "string" + "content": { + "type": "object", + "properties": { + "datum_type": { + "$ref": "#/components/schemas/DatumType" + } + }, + "required": [ + "datum_type" + ] }, "type": { "type": "string", "enum": [ - "crucible_pantry" + "missing_datum_requires_start_time" ] } }, "required": [ - "address", + "content", "type" ] }, { "type": "object", "properties": { - "dataset": { - "$ref": "#/components/schemas/OmicronZoneDataset" - }, - "dns_address": { - "description": "The address at which the external DNS server is reachable.", - "type": "string" - }, - "http_address": { - "description": "The address at which the external DNS server API is reachable.", - "type": "string" - }, - "nic": { - "description": "The service vNIC providing external connectivity using OPTE.", - "allOf": [ - { - "$ref": "#/components/schemas/NetworkInterface" + "content": { + "type": "object", + "properties": { + "datum_type": { + "$ref": "#/components/schemas/DatumType" } + }, + "required": [ + "datum_type" ] }, "type": { "type": "string", "enum": [ - "external_dns" + "missing_datum_cannot_have_start_time" ] } }, "required": [ - "dataset", - "dns_address", - "http_address", - "nic", + "content", "type" ] }, { "type": "object", "properties": { - "dataset": { - "$ref": "#/components/schemas/OmicronZoneDataset" - }, - "dns_address": { - "type": "string" - }, - "gz_address": { - "description": "The addresses in the global zone which should be created\n\nFor the DNS service, which exists outside the sleds's typical subnet - adding an address in the GZ is necessary to allow inter-zone traffic routing.", - "type": "string", - "format": "ipv6" - }, - "gz_address_index": { - "description": "The address is also identified with an auxiliary bit of information to ensure that the created global zone address can have a unique name.", - "type": "integer", - "format": "uint32", - "minimum": 0 - }, - "http_address": { - "type": "string" - }, "type": { "type": "string", "enum": [ - "internal_dns" + "invalid_timeseries_name" ] } }, "required": [ - "dataset", - "dns_address", - "gz_address", - "gz_address_index", - "http_address", "type" ] + } + ] + }, + "MissingDatum": { + "type": "object", + "properties": { + "datum_type": { + "$ref": "#/components/schemas/DatumType" + }, + "start_time": { + "nullable": true, + "type": "string", + "format": "date-time" + } + }, + "required": [ + "datum_type" + ] + }, + "Name": { + "title": "A name unique within the parent collection", + "description": "Names must begin with a lower case ASCII letter, be composed exclusively of lowercase ASCII, uppercase ASCII, numbers, and '-', and may not end with a '-'. Names cannot be a UUID though they may contain a UUID.", + "type": "string", + "pattern": "^(?![0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$)^[a-z]([a-zA-Z0-9-]*[a-zA-Z0-9]+)?$", + "minLength": 1, + "maxLength": 63 + }, + "NetworkInterface": { + "description": "Information required to construct a virtual network interface", + "type": "object", + "properties": { + "id": { + "type": "string", + "format": "uuid" + }, + "ip": { + "type": "string", + "format": "ip" + }, + "kind": { + "$ref": "#/components/schemas/NetworkInterfaceKind" + }, + "mac": { + "$ref": "#/components/schemas/MacAddr" + }, + "name": { + "$ref": "#/components/schemas/Name" }, + "primary": { + "type": "boolean" + }, + "slot": { + "type": "integer", + "format": "uint8", + "minimum": 0 + }, + "subnet": { + "$ref": "#/components/schemas/IpNet" + }, + "vni": { + "$ref": "#/components/schemas/Vni" + } + }, + "required": [ + "id", + "ip", + "kind", + "mac", + "name", + "primary", + "slot", + "subnet", + "vni" + ] + }, + "NetworkInterfaceKind": { + "description": "The type of network interface", + "oneOf": [ { + "description": "A vNIC attached to a guest instance", "type": "object", "properties": { - "address": { - "type": "string" - }, - "dns_servers": { - "type": "array", - "items": { - "type": "string", - "format": "ip" - } - }, - "domain": { - "nullable": true, - "type": "string" - }, - "ntp_servers": { - "type": "array", - "items": { - "type": "string" - } + "id": { + "type": "string", + "format": "uuid" }, "type": { "type": "string", "enum": [ - "internal_ntp" + "instance" ] } }, "required": [ - "address", - "dns_servers", - "ntp_servers", + "id", "type" ] }, { + "description": "A vNIC associated with an internal service", "type": "object", "properties": { - "external_dns_servers": { - "description": "External DNS servers Nexus can use to resolve external hosts.", - "type": "array", - "items": { - "type": "string", - "format": "ip" - } - }, - "external_ip": { - "description": "The address at which the external nexus server is reachable.", + "id": { "type": "string", - "format": "ip" - }, - "external_tls": { - "description": "Whether Nexus's external endpoint should use TLS", - "type": "boolean" - }, - "internal_address": { - "description": "The address at which the internal nexus server is reachable.", - "type": "string" - }, - "nic": { - "description": "The service vNIC providing external connectivity using OPTE.", - "allOf": [ - { - "$ref": "#/components/schemas/NetworkInterface" - } - ] + "format": "uuid" }, "type": { "type": "string", "enum": [ - "nexus" + "service" ] } }, "required": [ - "external_dns_servers", - "external_ip", - "external_tls", - "internal_address", - "nic", + "id", "type" ] }, { + "description": "A vNIC associated with a probe", "type": "object", "properties": { - "address": { - "type": "string" + "id": { + "type": "string", + "format": "uuid" }, "type": { "type": "string", "enum": [ - "oximeter" + "probe" ] } }, "required": [ - "address", + "id", "type" ] } ] }, + "NewPasswordHash": { + "title": "A password hash in PHC string format", + "description": "Password hashes must be in PHC (Password Hashing Competition) string format. Passwords must be hashed with Argon2id. Password hashes may be rejected if the parameters appear not to be secure enough.", + "type": "string" + }, + "NodeName": { + "description": "Unique name for a saga [`Node`]\n\nEach node requires a string name that's unique within its DAG. The name is used to identify its output. Nodes that depend on a given node (either directly or indirectly) can access the node's output using its name.", + "type": "string" + }, + "OmicronPhysicalDiskConfig": { + "description": "OmicronPhysicalDiskConfig\n\n
JSON schema\n\n```json { \"type\": \"object\", \"required\": [ \"id\", \"identity\", \"pool_id\" ], \"properties\": { \"id\": { \"type\": \"string\", \"format\": \"uuid\" }, \"identity\": { \"$ref\": \"#/components/schemas/DiskIdentity\" }, \"pool_id\": { \"$ref\": \"#/components/schemas/TypedUuidForZpoolKind\" } } } ```
", + "type": "object", + "properties": { + "id": { + "type": "string", + "format": "uuid" + }, + "identity": { + "$ref": "#/components/schemas/DiskIdentity" + }, + "pool_id": { + "$ref": "#/components/schemas/TypedUuidForZpoolKind" + } + }, + "required": [ + "id", + "identity", + "pool_id" + ] + }, + "OmicronPhysicalDisksConfig": { + "description": "OmicronPhysicalDisksConfig\n\n
JSON schema\n\n```json { \"type\": \"object\", \"required\": [ \"disks\", \"generation\" ], \"properties\": { \"disks\": { \"type\": \"array\", \"items\": { \"$ref\": \"#/components/schemas/OmicronPhysicalDiskConfig\" } }, \"generation\": { \"description\": \"generation number of this configuration\\n\\nThis generation number is owned by the control plane (i.e., RSS or Nexus, depending on whether RSS-to-Nexus handoff has happened). It should not be bumped within Sled Agent.\\n\\nSled Agent rejects attempts to set the configuration to a generation older than the one it's currently running.\", \"allOf\": [ { \"$ref\": \"#/components/schemas/Generation\" } ] } } } ```
", + "type": "object", + "properties": { + "disks": { + "type": "array", + "items": { + "$ref": "#/components/schemas/OmicronPhysicalDiskConfig" + } + }, + "generation": { + "description": "generation number of this configuration\n\nThis generation number is owned by the control plane (i.e., RSS or Nexus, depending on whether RSS-to-Nexus handoff has happened). It should not be bumped within Sled Agent.\n\nSled Agent rejects attempts to set the configuration to a generation older than the one it's currently running.", + "allOf": [ + { + "$ref": "#/components/schemas/Generation" + } + ] + } + }, + "required": [ + "disks", + "generation" + ] + }, + "OmicronZoneDataset": { + "description": "Describes a persistent ZFS dataset associated with an Omicron zone\n\n
JSON schema\n\n```json { \"description\": \"Describes a persistent ZFS dataset associated with an Omicron zone\", \"type\": \"object\", \"required\": [ \"pool_name\" ], \"properties\": { \"pool_name\": { \"$ref\": \"#/components/schemas/ZpoolName\" } } } ```
", + "type": "object", + "properties": { + "pool_name": { + "$ref": "#/components/schemas/ZpoolName" + } + }, + "required": [ + "pool_name" + ] + }, "OximeterInfo": { "description": "Message used to notify Nexus that this oximeter instance is up and running.", "type": "object", @@ -7205,6 +7186,10 @@ "type": "string", "format": "uuid" }, + "TypedUuidForOmicronZoneKind": { + "type": "string", + "format": "uuid" + }, "TypedUuidForUpstairsRepairKind": { "type": "string", "format": "uuid" diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs index 076ccbd44c..2851b16090 100644 --- a/sled-agent/src/rack_setup/service.rs +++ b/sled-agent/src/rack_setup/service.rs @@ -93,7 +93,7 @@ use nexus_client::{ }; use nexus_types::deployment::{ Blueprint, BlueprintPhysicalDisksConfig, BlueprintZoneConfig, - BlueprintZoneDisposition, BlueprintZonesConfig, + BlueprintZoneDisposition, BlueprintZonesConfig, InvalidOmicronZoneType, }; use omicron_common::address::get_sled_address; use omicron_common::api::external::Generation; @@ -1322,9 +1322,9 @@ fn build_initial_blueprint_from_plan( .context("invalid internal dns version")?; let blueprint = build_initial_blueprint_from_sled_configs( - &sled_configs_by_id, + sled_configs_by_id, internal_dns_version, - ); + )?; Ok(blueprint) } @@ -1332,7 +1332,38 @@ fn build_initial_blueprint_from_plan( pub(crate) fn build_initial_blueprint_from_sled_configs( sled_configs_by_id: &BTreeMap, internal_dns_version: Generation, -) -> Blueprint { +) -> Result { + // Helper to convert an `OmicronZoneConfig` into a `BlueprintZoneConfig`. + // This is separate primarily so rustfmt doesn't lose its mind. + let to_bp_zone_config = |z: &crate::params::OmicronZoneConfig| { + // All initial zones are in-service. + let disposition = BlueprintZoneDisposition::InService; + BlueprintZoneConfig::from_omicron_zone_config( + z.clone().into(), + disposition, + ) + }; + + let mut blueprint_disks = BTreeMap::new(); + for (sled_id, sled_config) in sled_configs_by_id { + blueprint_disks.insert( + SledUuid::from_untyped_uuid(*sled_id), + BlueprintPhysicalDisksConfig { + generation: sled_config.disks.generation, + disks: sled_config + .disks + .disks + .iter() + .map(|d| SledAgentTypes::OmicronPhysicalDiskConfig { + identity: d.identity.clone(), + id: d.id, + pool_id: d.pool_id, + }) + .collect(), + }, + ); + } + let mut blueprint_zones = BTreeMap::new(); for (sled_id, sled_config) in sled_configs_by_id { let zones_config = BlueprintZonesConfig { @@ -1350,38 +1381,14 @@ pub(crate) fn build_initial_blueprint_from_sled_configs( zones: sled_config .zones .iter() - .map(|z| BlueprintZoneConfig { - config: z.clone().into(), - // All initial zones are in-service. - disposition: BlueprintZoneDisposition::InService, - }) - .collect(), + .map(to_bp_zone_config) + .collect::>()?, }; blueprint_zones.insert(*sled_id, zones_config); } - let mut blueprint_disks = BTreeMap::new(); - for (sled_id, sled_config) in sled_configs_by_id { - blueprint_disks.insert( - SledUuid::from_untyped_uuid(*sled_id), - BlueprintPhysicalDisksConfig { - generation: sled_config.disks.generation, - disks: sled_config - .disks - .disks - .iter() - .map(|d| SledAgentTypes::OmicronPhysicalDiskConfig { - identity: d.identity.clone(), - id: d.id, - pool_id: d.pool_id, - }) - .collect(), - }, - ); - } - - Blueprint { + Ok(Blueprint { id: Uuid::new_v4(), blueprint_zones, blueprint_disks, @@ -1394,7 +1401,7 @@ pub(crate) fn build_initial_blueprint_from_sled_configs( time_created: Utc::now(), creator: "RSS".to_string(), comment: "initial blueprint from rack setup".to_string(), - } + }) } /// Facilitates creating a sequence of OmicronZonesConfig objects for each sled diff --git a/sled-agent/src/sim/server.rs b/sled-agent/src/sim/server.rs index 6f931ea629..7d12b3bfe0 100644 --- a/sled-agent/src/sim/server.rs +++ b/sled-agent/src/sim/server.rs @@ -508,7 +508,8 @@ pub async fn run_standalone_server( blueprint: build_initial_blueprint_from_sled_configs( &sled_configs, internal_dns_version, - ), + ) + .expect("failed to construct initial blueprint"), physical_disks, zpools, datasets, From 47548c525941dc14ab321a1d5a357f556e455a21 Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Wed, 17 Apr 2024 16:06:27 -0400 Subject: [PATCH 159/334] omdb: Accept `current-target` as a blueprint ID to show or diff (#5543) #5287 got rid of `omdb db services ...`, so I've found myself wanting to see the current target blueprint. omdb can do that, but it required listing the blueprints, visually finding the target, then calling `nexus blueprints show `. With this PR, we can accept the string `current-target` instead, and I figured that might also be useful in diffs (e.g., `nexus blueprints diff current-target ` to see changes from the current target to some other blueprint). --- dev-tools/omdb/src/bin/omdb/nexus.rs | 102 ++++++++++++++++++------ dev-tools/omdb/tests/successes.out | 62 ++++++++++++++ dev-tools/omdb/tests/test_all_output.rs | 8 ++ 3 files changed, 148 insertions(+), 24 deletions(-) diff --git a/dev-tools/omdb/src/bin/omdb/nexus.rs b/dev-tools/omdb/src/bin/omdb/nexus.rs index 31a450f935..a7fcc6badc 100644 --- a/dev-tools/omdb/src/bin/omdb/nexus.rs +++ b/dev-tools/omdb/src/bin/omdb/nexus.rs @@ -16,6 +16,7 @@ use chrono::Utc; use clap::Args; use clap::Subcommand; use clap::ValueEnum; +use futures::future::try_join; use futures::TryStreamExt; use nexus_client::types::ActivationReason; use nexus_client::types::BackgroundTask; @@ -35,6 +36,7 @@ use reedline::Reedline; use serde::Deserialize; use slog_error_chain::InlineErrorChain; use std::collections::BTreeMap; +use std::str::FromStr; use tabled::Tabled; use uuid::Uuid; @@ -89,7 +91,7 @@ enum BlueprintsCommands { List, /// Show a blueprint Show(BlueprintIdArgs), - /// Diff two blueprint + /// Diff two blueprints Diff(BlueprintIdsArgs), /// Delete a blueprint Delete(BlueprintIdArgs), @@ -103,18 +105,72 @@ enum BlueprintsCommands { Import(BlueprintImportArgs), } -#[derive(Debug, Args)] +#[derive(Debug, Clone, Copy)] +enum BlueprintIdOrCurrentTarget { + CurrentTarget, + BlueprintId(Uuid), +} + +impl FromStr for BlueprintIdOrCurrentTarget { + type Err = uuid::Error; + + fn from_str(s: &str) -> Result { + if matches!(s, "current-target" | "current" | "target") { + Ok(Self::CurrentTarget) + } else { + let id = s.parse()?; + Ok(Self::BlueprintId(id)) + } + } +} + +impl BlueprintIdOrCurrentTarget { + async fn resolve_to_id( + &self, + client: &nexus_client::Client, + ) -> anyhow::Result { + match self { + Self::CurrentTarget => { + let target = client + .blueprint_target_view() + .await + .context("getting current blueprint target")?; + Ok(target.target_id) + } + Self::BlueprintId(id) => Ok(*id), + } + } + + async fn resolve_to_blueprint( + &self, + client: &nexus_client::Client, + ) -> anyhow::Result { + let id = self.resolve_to_id(client).await?; + let response = client.blueprint_view(&id).await.with_context(|| { + let suffix = match self { + BlueprintIdOrCurrentTarget::CurrentTarget => { + " (current target)" + } + BlueprintIdOrCurrentTarget::BlueprintId(_) => "", + }; + format!("fetching blueprint {id}{suffix}") + })?; + Ok(response.into_inner()) + } +} + +#[derive(Debug, Clone, Copy, Args)] struct BlueprintIdArgs { - /// id of a blueprint - blueprint_id: Uuid, + /// id of blueprint (or `target` for the current target) + blueprint_id: BlueprintIdOrCurrentTarget, } #[derive(Debug, Args)] struct BlueprintIdsArgs { - /// id of first blueprint - blueprint1_id: Uuid, - /// id of second blueprint - blueprint2_id: Uuid, + /// id of first blueprint (or `target` for the current target) + blueprint1_id: BlueprintIdOrCurrentTarget, + /// id of second blueprint (or `target` for the current target) + blueprint2_id: BlueprintIdOrCurrentTarget, } #[derive(Debug, Args)] @@ -973,10 +1029,7 @@ async fn cmd_nexus_blueprints_show( client: &nexus_client::Client, args: &BlueprintIdArgs, ) -> Result<(), anyhow::Error> { - let blueprint = client - .blueprint_view(&args.blueprint_id) - .await - .with_context(|| format!("fetching blueprint {}", args.blueprint_id))?; + let blueprint = args.blueprint_id.resolve_to_blueprint(client).await?; println!("{}", blueprint.display()); Ok(()) } @@ -985,12 +1038,11 @@ async fn cmd_nexus_blueprints_diff( client: &nexus_client::Client, args: &BlueprintIdsArgs, ) -> Result<(), anyhow::Error> { - let b1 = client.blueprint_view(&args.blueprint1_id).await.with_context( - || format!("fetching blueprint {}", args.blueprint1_id), - )?; - let b2 = client.blueprint_view(&args.blueprint2_id).await.with_context( - || format!("fetching blueprint {}", args.blueprint2_id), - )?; + let (b1, b2) = try_join( + args.blueprint1_id.resolve_to_blueprint(client), + args.blueprint2_id.resolve_to_blueprint(client), + ) + .await?; let diff = b2.diff_since_blueprint(&b1).context("diffing blueprints")?; println!("{}", diff.display()); Ok(()) @@ -1001,11 +1053,12 @@ async fn cmd_nexus_blueprints_delete( args: &BlueprintIdArgs, _destruction_token: DestructiveOperationToken, ) -> Result<(), anyhow::Error> { + let blueprint_id = args.blueprint_id.resolve_to_id(client).await?; let _ = client - .blueprint_delete(&args.blueprint_id) + .blueprint_delete(&blueprint_id) .await - .with_context(|| format!("deleting blueprint {}", args.blueprint_id))?; - println!("blueprint {} deleted", args.blueprint_id); + .with_context(|| format!("deleting blueprint {blueprint_id}"))?; + println!("blueprint {blueprint_id} deleted"); Ok(()) } @@ -1064,19 +1117,20 @@ async fn cmd_nexus_blueprints_target_set_enabled( enabled: bool, _destruction_token: DestructiveOperationToken, ) -> Result<(), anyhow::Error> { + let blueprint_id = args.blueprint_id.resolve_to_id(client).await?; let description = if enabled { "enabled" } else { "disabled" }; client .blueprint_target_set_enabled( &nexus_client::types::BlueprintTargetSet { - target_id: args.blueprint_id, + target_id: blueprint_id, enabled, }, ) .await .with_context(|| { - format!("setting blueprint {} to {description}", args.blueprint_id) + format!("setting blueprint {blueprint_id} to {description}") })?; - eprintln!("set target blueprint {} to {description}", args.blueprint_id); + eprintln!("set target blueprint {blueprint_id} to {description}"); Ok(()) } diff --git a/dev-tools/omdb/tests/successes.out b/dev-tools/omdb/tests/successes.out index 942a5338fb..bfaaa6aad3 100644 --- a/dev-tools/omdb/tests/successes.out +++ b/dev-tools/omdb/tests/successes.out @@ -479,3 +479,65 @@ METADATA: stderr: note: using Nexus URL http://127.0.0.1:REDACTED_PORT/ ============================================= +EXECUTING COMMAND: omdb ["nexus", "blueprints", "show", "current-target"] +termination: Exited(0) +--------------------------------------------- +stdout: +blueprint ............. +parent: + + ----------------------------------------------------------------------------------------- + zone type zone ID disposition underlay IP + ----------------------------------------------------------------------------------------- + + sled .....................: zones at generation 2 + (no zones) + + sled .....................: zones at generation 2 + clickhouse ..................... in service ::1 + cockroach_db ..................... in service ::1 + crucible_pantry ..................... in service ::1 + external_dns ..................... in service ::1 + internal_dns ..................... in service ::1 + nexus ..................... in service ::ffff:127.0.0.1 + +METADATA: + created by: nexus-test-utils + created at: + comment: initial test blueprint + internal DNS version: 1 + external DNS version: 2 + +--------------------------------------------- +stderr: +note: using Nexus URL http://127.0.0.1:REDACTED_PORT/ +============================================= +EXECUTING COMMAND: omdb ["nexus", "blueprints", "diff", ".............", "current-target"] +termination: Exited(0) +--------------------------------------------- +stdout: +from: blueprint ............. +to: blueprint ............. + + --------------------------------------------------------------------------------------------------- + zone type zone ID disposition underlay IP status + --------------------------------------------------------------------------------------------------- + + UNCHANGED SLEDS: + + sled .....................: zones at generation 2 + clickhouse ..................... in service ::1 + cockroach_db ..................... in service ::1 + crucible_pantry ..................... in service ::1 + external_dns ..................... in service ::1 + internal_dns ..................... in service ::1 + nexus ..................... in service ::ffff:127.0.0.1 + + METADATA: + internal DNS version: 1 (unchanged) + external DNS version: 2 (unchanged) + +--------------------------------------------- +stderr: +note: using Nexus URL http://127.0.0.1:REDACTED_PORT/ +============================================= diff --git a/dev-tools/omdb/tests/test_all_output.rs b/dev-tools/omdb/tests/test_all_output.rs index 5f64f9c567..ef4792df62 100644 --- a/dev-tools/omdb/tests/test_all_output.rs +++ b/dev-tools/omdb/tests/test_all_output.rs @@ -95,6 +95,14 @@ async fn test_omdb_success_cases(cptestctx: &ControlPlaneTestContext) { &["nexus", "background-tasks", "show"], &["nexus", "blueprints", "list"], &["nexus", "blueprints", "show", &initial_blueprint_id], + &["nexus", "blueprints", "show", "current-target"], + &[ + "nexus", + "blueprints", + "diff", + &initial_blueprint_id, + "current-target", + ], // We can't easily test the sled agent output because that's only // provided by a real sled agent, which is not available in the // ControlPlaneTestContext. From a71ec89d6d49b3c971cca4a8400d8c15a821e4cf Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Wed, 17 Apr 2024 15:39:35 -0700 Subject: [PATCH 160/334] chore(deps): update rust crate serde to v1.0.198 (#5549) --- Cargo.lock | 8 ++++---- workspace-hack/Cargo.toml | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index fc532cd19c..8bdcbb853f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8277,9 +8277,9 @@ dependencies = [ [[package]] name = "serde" -version = "1.0.197" +version = "1.0.198" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3fb1c873e1b9b056a4dc4c0c198b24c3ffa059243875552b2bd0933b1aee4ce2" +checksum = "9846a40c979031340571da2545a4e5b7c4163bdae79b301d5f86d03979451fcc" dependencies = [ "serde_derive", ] @@ -8324,9 +8324,9 @@ dependencies = [ [[package]] name = "serde_derive" -version = "1.0.197" +version = "1.0.198" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7eb0b34b42edc17f6b7cac84a52a1c5f0e1bb2227e997ca9011ea3dd34e8610b" +checksum = "e88edab869b01783ba905e7d0153f9fc1a6505a96e4ad3018011eedb838566d9" dependencies = [ "proc-macro2", "quote", diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index 3a7d19d00f..a2ef2eb868 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -91,7 +91,7 @@ reqwest = { version = "0.11.24", features = ["blocking", "cookies", "json", "rus ring = { version = "0.17.8", features = ["std"] } schemars = { version = "0.8.16", features = ["bytes", "chrono", "uuid", "uuid1"] } semver = { version = "1.0.22", features = ["serde"] } -serde = { version = "1.0.197", features = ["alloc", "derive", "rc"] } +serde = { version = "1.0.198", features = ["alloc", "derive", "rc"] } serde_json = { version = "1.0.116", features = ["raw_value", "unbounded_depth"] } sha2 = { version = "0.10.8", features = ["oid"] } similar = { version = "2.4.0", features = ["inline", "unicode"] } @@ -198,7 +198,7 @@ reqwest = { version = "0.11.24", features = ["blocking", "cookies", "json", "rus ring = { version = "0.17.8", features = ["std"] } schemars = { version = "0.8.16", features = ["bytes", "chrono", "uuid", "uuid1"] } semver = { version = "1.0.22", features = ["serde"] } -serde = { version = "1.0.197", features = ["alloc", "derive", "rc"] } +serde = { version = "1.0.198", features = ["alloc", "derive", "rc"] } serde_json = { version = "1.0.116", features = ["raw_value", "unbounded_depth"] } sha2 = { version = "0.10.8", features = ["oid"] } similar = { version = "2.4.0", features = ["inline", "unicode"] } From 6a4ca56623f4756003f4b8f2704c70e7699210c1 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Wed, 17 Apr 2024 15:40:02 -0700 Subject: [PATCH 161/334] chore(deps): update rust crate proc-macro2 to v1.0.81 (#5548) --- Cargo.lock | 4 ++-- workspace-hack/Cargo.toml | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 8bdcbb853f..8b72b1e179 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6967,9 +6967,9 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.80" +version = "1.0.81" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a56dea16b0a29e94408b9aa5e2940a4eedbd128a1ba20e8f7ae60fd3d465af0e" +checksum = "3d1597b0c024618f09a9c3b8655b7e430397a36d23fdafec26d6965e9eec3eba" dependencies = [ "unicode-ident", ] diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index a2ef2eb868..654d92869d 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -81,7 +81,7 @@ petgraph = { version = "0.6.4", features = ["serde-1"] } postgres-types = { version = "0.2.6", default-features = false, features = ["with-chrono-0_4", "with-serde_json-1", "with-uuid-1"] } ppv-lite86 = { version = "0.2.17", default-features = false, features = ["simd", "std"] } predicates = { version = "3.1.0" } -proc-macro2 = { version = "1.0.80" } +proc-macro2 = { version = "1.0.81" } rand = { version = "0.8.5" } rand_chacha = { version = "0.3.1", default-features = false, features = ["std"] } regex = { version = "1.10.4" } @@ -188,7 +188,7 @@ petgraph = { version = "0.6.4", features = ["serde-1"] } postgres-types = { version = "0.2.6", default-features = false, features = ["with-chrono-0_4", "with-serde_json-1", "with-uuid-1"] } ppv-lite86 = { version = "0.2.17", default-features = false, features = ["simd", "std"] } predicates = { version = "3.1.0" } -proc-macro2 = { version = "1.0.80" } +proc-macro2 = { version = "1.0.81" } rand = { version = "0.8.5" } rand_chacha = { version = "0.3.1", default-features = false, features = ["std"] } regex = { version = "1.10.4" } From 5f878468e40d28bcfd78ea06916e9b76cdf4989b Mon Sep 17 00:00:00 2001 From: Rain Date: Wed, 17 Apr 2024 16:21:08 -0700 Subject: [PATCH 162/334] [wicket] add testing notes to readme (#5551) Also change the main readme to indicate that `cargo test` isn't supported. Apologies for the automatic Markdown formatting, my editor just did that. But I think it's a positive overall. --- README.adoc | 2 +- wicket/README.md | 174 ++++++++++++++++++++++++++++++++--------------- 2 files changed, 120 insertions(+), 56 deletions(-) diff --git a/README.adoc b/README.adoc index 0e09fc39df..9db11f0337 100644 --- a/README.adoc +++ b/README.adoc @@ -47,7 +47,7 @@ To build and run the non-simulated version of Omicron, see: xref:docs/how-to-run The supported way to run tests is via https://nexte.st/[cargo-nextest]. -NOTE: `cargo test` may work, but that can't be guaranteed as `cargo test` isn't run in CI. +NOTE: `cargo test` will not work for many of our tests, since they rely on nextest-specific features. If you don't already have nextest installed, get started by https://nexte.st/book/pre-built-binaries[downloading a pre-built binary] or installing nextest via your package manager. Nextest has pre-built binaries for Linux, macOS and illumos. diff --git a/wicket/README.md b/wicket/README.md index 8735e5f5c5..0a24acbe8e 100644 --- a/wicket/README.md +++ b/wicket/README.md @@ -2,29 +2,30 @@ Wicket is a TUI built for operator usage at the technician port. It is intended to support a limited set of responsibilities including: - * Rack Initialization - * Boundary service setup - * Disaster Recovery - * Minimal rack update / emergency update + +- Rack Initialization +- Boundary service setup +- Disaster Recovery +- Minimal rack update / emergency update Wicket is built on top of [crossterm](https://github.com/crossterm-rs/ -crossterm) and [tui-rs](https://github.com/fdehau/tui-rs). +crossterm) and [tui-rs](https://github.com/fdehau/tui-rs). # Navigating -* `banners` - Files containing "banner-like" output using `#` characters for -glyph drawing -* `src/dispatch.rs` - Setup code for shell management, to allow uploading of -TUF repos or running the TUI. -* `src/upload.rs` - Code to upload a TUF repo to wicketd via wicket -* `src/wicketd.rs` - Code for interacting with wicketd -* `src/runner` - The main entrypoint to the TUI. Runs the main loop and spawns -a tokio runtime to interact with wicketd. -* `src/ui` - All code for UI management. This contains the primary types of the -UI: `Controls` and `Widgets` which will be discussed in more detail below. -* `src/state` - Global state managed by wicket. This state is mutated by the -`Runner` mainloop, as well as by `Control::on` methods. It is used immutably to -draw the UI. +- `banners` - Files containing "banner-like" output using `#` characters for + glyph drawing +- `src/dispatch.rs` - Setup code for shell management, to allow uploading of + TUF repos or running the TUI. +- `src/upload.rs` - Code to upload a TUF repo to wicketd via wicket +- `src/wicketd.rs` - Code for interacting with wicketd +- `src/runner` - The main entrypoint to the TUI. Runs the main loop and spawns + a tokio runtime to interact with wicketd. +- `src/ui` - All code for UI management. This contains the primary types of the + UI: `Controls` and `Widgets` which will be discussed in more detail below. +- `src/state` - Global state managed by wicket. This state is mutated by the + `Runner` mainloop, as well as by `Control::on` methods. It is used immutably to + draw the UI. # Design @@ -32,20 +33,21 @@ When wicket starts as a TUI, a `Runner` is created, which is really a bucket of state which can be utilized by the `main_loop`. The `Runner` is in charge of: The main type of the wicket crate is the `Wizard`. The wizard is run by the `wicket` binary and is in charge of: - * Handling user input - * Sending requests to wicketd - * Handling events from downstream services - * Dispatching events to the UI `Screen` - * Triggering terminal rendering + +- Handling user input +- Sending requests to wicketd +- Handling events from downstream services +- Dispatching events to the UI `Screen` +- Triggering terminal rendering There is a main thread that runs an infinite loop in the `Runner::main_loop` method. The loop's job is to receive `Event`s from a single MPSC channel and update internal state, either directly or by forwarding events to the `Screen` by calling its `on` method. The `Screen`'s job is solely to dispatch events to the splash screen at startup (to allow early cancellation of the animation), -and to the `MainScreen` after the splash screen has finished its animation. +and to the `MainScreen` after the splash screen has finished its animation. -The `MainScreen` is *stable* across the TUI, with a sidebar widget that allows +The `MainScreen` is _stable_ across the TUI, with a sidebar widget that allows selecting among a list of `Pane`s. `Pane`s get shown to the right of the sidebar, and are available to render to the rectangle available to them in that space. Each pane is responsible for rendering in its own space, and handling @@ -64,11 +66,11 @@ and never directly inspected by parent Controls, but its always possible this will change. There are only two `Action`s at this point that are handled by the `Runner`. - * `Action::Redraw` - Instructs the `Runner` to call `Screen::draw` and -trigger a terminal render if necessary. This allows us to limit the relatively -expensive operation to those times when it's strictly necessary. - * `Action::Update(ComponentId)` - Instructs the Runner to dispatch an update -command for a given component to `wicketd`. +- `Action::Redraw` - Instructs the `Runner` to call `Screen::draw` and + trigger a terminal render if necessary. This allows us to limit the relatively + expensive operation to those times when it's strictly necessary. +- `Action::Update(ComponentId)` - Instructs the Runner to dispatch an update + command for a given component to `wicketd`. It's important to notice that the global `State` of the system is only updated upon event receipt, and that a screen never processes an event that can mutate @@ -104,6 +106,68 @@ know when a screen animation is ongoing, and so it forwards all ticks to the Use these to test out particular scenarios with wicket by hand. (Feel free to add more as needed!) +## Running an end-to-end-ish test + +Part of the edit/compile cycle for wicket mupdates is setting up something +similar to an end-to-end flow. As a reminder, the general way updates work is +that wicket communicates with wicketd, which instructs MGS to send commands to +the individual SPs. + +Based on this, one way to have an end-to-end flow is with: + +- real wicketd +- real MGS +- sp-sim, an in-memory service that simulates how the SP behaves + +Making this simpler is tracked in +[omicron#5550](https://github.com/oxidecomputer/omicron/issues/5550). + +### Running sp-sim and MGS + +The easiest way to do this is to run: + +``` +cargo run -p omicron-dev mgs-run +``` + +This will print out a line similar to `omicron-dev: MGS API: http://[::1]:12225`. Note the address for use below. + +Another option, which may lead to quicker iteration cycles if you're modifying +MGS or sp-sim, is to run the services by hand from the root of omicron: + +``` +cargo run --bin sp-sim -- sp-sim/examples/config.toml +cargo run --bin mgs run --id c19a698f-c6f9-4a17-ae30-20d711b8f7dc --address '[::1]:12225' gateway/examples/config.toml +``` + +The port number in `--address` is arbitrary. + +**Note:** If you're adding new functionality to wicket, it is quite possible +that sp-sim is missing support for it! Generally, sp-sim has features added to +it on an as-needed basis. + +### Using a real SP + +TODO + +### Running wicketd + +Taking the port number mentioned above, run: + +``` +cargo run -p wicketd -- run wicketd/examples/config.toml --address '[::1]:12226' --artifact-address '[::]:12227' --nexus-proxy-address '[::1]:12228' --mgs-address '[::1]:12225' +``` + +In this case, the port number in `--address` provides the interface between +wicketd and wicket. The port number is _not_ arbitrary: wicket connects to port +12226 by default. There is currently no way to specify a different port (but +there probably should be!) + +### Running wicket + +After running the above commands, simply running `cargo run -p wicket` should +connect to the wicketd instance. + ## Adding simulated failures to operations Add a simulated failure while starting an update: @@ -120,11 +184,11 @@ WICKET_TEST_CLEAR_UPDATE_STATE_ERROR= cargo run --bin wicket Here, `` can be: -* `fail`: Simulate a failure for this operation. -* `timeout`: Simulate a timeout for this operation. - * `timeout:`: Specify a custom number of seconds (15 seconds by +- `fail`: Simulate a failure for this operation. +- `timeout`: Simulate a timeout for this operation. + - `timeout:`: Specify a custom number of seconds (15 seconds by default) -* (implement more options as needed) +- (implement more options as needed) ## Adding a test update step @@ -142,15 +206,15 @@ Some individual steps support having simulated results via environment variables Environment variables supported are: -* `WICKET_UPDATE_TEST_SIMULATE_ROT_RESULT`: Simulates a result for the "Updating RoT" step. -* `WICKET_UPDATE_TEST_SIMULATE_SP_RESULT`: Simulates a result for the "Updating SP" step. +- `WICKET_UPDATE_TEST_SIMULATE_ROT_RESULT`: Simulates a result for the "Updating RoT" step. +- `WICKET_UPDATE_TEST_SIMULATE_SP_RESULT`: Simulates a result for the "Updating SP" step. The environment variable can be set to: -* `success`: A success outcome. -* `warning`: Success with warning. -* `failure`: A failure. -* `skipped`: A skipped outcome. +- `success`: A success outcome. +- `warning`: Success with warning. +- `failure`: A failure. +- `skipped`: A skipped outcome. ### Example @@ -183,25 +247,25 @@ ssh user@$IP_ADDRESS upload < my-tuf-repo.zip Wicket is meant to be used as a captive shell over ssh. If you're making changes to the SSH shell support, you'll likely want to test the captive shell support on a local Unix machine. Here's how to do so. 1. Make the `wicket` available globally. For the rest of this section we're going to use the path `/usr/local/bin/wicket`. - * If your build directory is globally readable, create a symlink to `wicket` in a well-known location. From omicron's root, run: `sudo ln -s $(readlink -f target/debug/wicket) /usr/local/bin/wicket` - * If it isn't globally accessible, run `sudo cp target/debug/wicket /usr/local/bin`. (You'll have to copy `wicket` each time you build it.) + - If your build directory is globally readable, create a symlink to `wicket` in a well-known location. From omicron's root, run: `sudo ln -s $(readlink -f target/debug/wicket) /usr/local/bin/wicket` + - If it isn't globally accessible, run `sudo cp target/debug/wicket /usr/local/bin`. (You'll have to copy `wicket` each time you build it.) 2. Add a new user to test against, for example `wicket-test`: - 1. Add a group for the new user: `groupadd wicket-test`. - 2. Add the user: `sudo useradd -m -g wicket-test` + 1. Add a group for the new user: `groupadd wicket-test`. + 2. Add the user: `sudo useradd -m -g wicket-test` 3. Set up SSH authentication for this user, using either passwords or public keys (`.ssh/authorized_keys`). - * To configure SSH keys, you'll need to first log in as the `wicket-test` user. To do so, run `sudo -u wicket-test -i` (Linux) or `pfexec su - wicket-test` (illumos). - * If using `.ssh/authorized_keys`, be sure to set up the correct permissions for `~/.ssh` and its contents. As the `wicket-test` user, run `chmod go-rwx -R ~/.ssh`. + - To configure SSH keys, you'll need to first log in as the `wicket-test` user. To do so, run `sudo -u wicket-test -i` (Linux) or `pfexec su - wicket-test` (illumos). + - If using `.ssh/authorized_keys`, be sure to set up the correct permissions for `~/.ssh` and its contents. As the `wicket-test` user, run `chmod go-rwx -R ~/.ssh`. 4. Test that you can log in as the user: run `ssh wicket-test@localhost`. If it works, move on to step 5. If it doesn't work: - * To debug issues related to logging in, for example `~/.ssh` permissions issues, check the sshd authentication log. - * On Linux, the authentication log is typically at `/var/log/auth.log`. - * On illumos, the authentication log is at `/var/log/authlog`. If it is empty, logging needs to be enabled. (If you're an Oxide employee, see [this issue](https://github.com/oxidecomputer/helios-engvm/issues/18) for how to enable logging.) + - To debug issues related to logging in, for example `~/.ssh` permissions issues, check the sshd authentication log. + - On Linux, the authentication log is typically at `/var/log/auth.log`. + - On illumos, the authentication log is at `/var/log/authlog`. If it is empty, logging needs to be enabled. (If you're an Oxide employee, see [this issue](https://github.com/oxidecomputer/helios-engvm/issues/18) for how to enable logging.) 5. Add this to the end of `/etc/ssh/sshd_config`: - ``` - Match User wicket-test - ForceCommand /usr/local/bin/wicket - ``` + ``` + Match User wicket-test + ForceCommand /usr/local/bin/wicket + ``` 6. Restart sshd: - * Linux using systemd: `sudo systemctl restart ssh` - * illumos: `svcadm restart ssh` + - Linux using systemd: `sudo systemctl restart ssh` + - illumos: `svcadm restart ssh` From now on, if you run `ssh wicket-test@localhost`, you should get the wicket captive shell. Also, `ssh wicket-test@localhost upload` should let you upload a zip file as a TUF repository. From 4c3c6b09b3f254a107185ad611d7a89188bffcf8 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Thu, 18 Apr 2024 04:48:53 +0000 Subject: [PATCH 163/334] chore(deps): update taiki-e/install-action digest to 4820827 (#5556) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR contains the following updates: | Package | Type | Update | Change | |---|---|---|---| | [taiki-e/install-action](https://togithub.com/taiki-e/install-action) | action | digest | [`37b71c3` -> `4820827`](https://togithub.com/taiki-e/install-action/compare/37b71c3...4820827) | --- ### Configuration 📅 **Schedule**: Branch creation - "after 8pm,before 6am" in timezone America/Los_Angeles, Automerge - "after 8pm,before 6am" in timezone America/Los_Angeles. 🚦 **Automerge**: Enabled. ♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. 🔕 **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box --- This PR has been generated by [Renovate Bot](https://togithub.com/renovatebot/renovate). Co-authored-by: oxide-renovate[bot] <146848827+oxide-renovate[bot]@users.noreply.github.com> --- .github/workflows/hakari.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/hakari.yml b/.github/workflows/hakari.yml index fde2e9139b..6e8b323e1f 100644 --- a/.github/workflows/hakari.yml +++ b/.github/workflows/hakari.yml @@ -24,7 +24,7 @@ jobs: with: toolchain: stable - name: Install cargo-hakari - uses: taiki-e/install-action@37b71c39b208369698511b6530dcb4b7d141be64 # v2 + uses: taiki-e/install-action@4820827bd312afaf667a328f1d0fe0fb4f6751b1 # v2 with: tool: cargo-hakari - name: Check workspace-hack Cargo.toml is up-to-date From f3e2c929a72e5f1a6fba5243caa9ab09a76c9152 Mon Sep 17 00:00:00 2001 From: "Andrew J. Stone" Date: Thu, 18 Apr 2024 15:10:01 -0400 Subject: [PATCH 164/334] Collect inventory when a new sled-agent comes online (#5526) Previously, we collected inventory when a sled was added through the nexus external API. However, this is too early to gather the physical disks in inventory for the sled, since those require a sled in the `sled` table for each corresponding disk, before they can be added as a control plane physical disk. This specific disk related change is part of https://github.com/oxidecomputer/omicron/pull/5506 and without a proper trigger on sled addition, we'd have to wait another 10 minutes for inventory collection in order to see the disks show up. An alternate workaround would be to manually generate an "extra" blueprint, which also triggers inventory collection, although this would also have to be after the sled-agent registered itself via the nexus internal API and got entered into the `sled` table. Since sled-agent-put is idempotent, and inventory collection is somewhat expensive, we only trigger collection if the sled actually gets updated in the db. We do this via a check on the modified timestamp, which is somewhat hacky but serves our purpose well. H/T to @davepacheco for being [prescient](https://github.com/oxidecomputer/omicron/pull/5066#pullrequestreview-1881455062). --- dev-tools/omdb/tests/successes.out | 36 ++--- nexus/db-model/src/sled.rs | 4 + .../db-queries/src/db/datastore/inventory.rs | 90 ++++++++++--- .../src/db/datastore/physical_disk.rs | 6 +- nexus/db-queries/src/db/datastore/rack.rs | 7 +- nexus/db-queries/src/db/datastore/sled.rs | 48 ++++--- nexus/reconfigurator/preparation/src/lib.rs | 4 +- nexus/src/app/background/init.rs | 2 + .../app/background/inventory_collection.rs | 124 +++++++++++------- .../app/background/physical_disk_adoption.rs | 58 +++++++- nexus/src/app/mod.rs | 1 + nexus/src/app/rack.rs | 4 - nexus/src/app/sled.rs | 12 +- test-utils/src/dev/test_cmds.rs | 5 + 14 files changed, 285 insertions(+), 116 deletions(-) diff --git a/dev-tools/omdb/tests/successes.out b/dev-tools/omdb/tests/successes.out index bfaaa6aad3..f09a2715a9 100644 --- a/dev-tools/omdb/tests/successes.out +++ b/dev-tools/omdb/tests/successes.out @@ -281,14 +281,14 @@ stdout: task: "dns_config_internal" configured period: every 1m currently executing: no - last completed activation: iter 3, triggered by an explicit signal + last completed activation: , triggered by an explicit signal started at (s ago) and ran for ms last generation found: 1 task: "dns_servers_internal" configured period: every 1m currently executing: no - last completed activation: iter 3, triggered by an explicit signal + last completed activation: , triggered by an explicit signal started at (s ago) and ran for ms servers found: 1 @@ -298,7 +298,7 @@ task: "dns_servers_internal" task: "dns_propagation_internal" configured period: every 1m currently executing: no - last completed activation: iter 4, triggered by a dependent task completing + last completed activation: , triggered by a dependent task completing started at (s ago) and ran for ms attempt to propagate generation: 1 @@ -309,14 +309,14 @@ task: "dns_propagation_internal" task: "dns_config_external" configured period: every 1m currently executing: no - last completed activation: iter 3, triggered by an explicit signal + last completed activation: , triggered by an explicit signal started at (s ago) and ran for ms last generation found: 2 task: "dns_servers_external" configured period: every 1m currently executing: no - last completed activation: iter 3, triggered by an explicit signal + last completed activation: , triggered by an explicit signal started at (s ago) and ran for ms servers found: 1 @@ -326,7 +326,7 @@ task: "dns_servers_external" task: "dns_propagation_external" configured period: every 1m currently executing: no - last completed activation: iter 4, triggered by a dependent task completing + last completed activation: , triggered by a dependent task completing started at (s ago) and ran for ms attempt to propagate generation: 2 @@ -337,35 +337,35 @@ task: "dns_propagation_external" task: "nat_v4_garbage_collector" configured period: every 30s currently executing: no - last completed activation: iter 2, triggered by an explicit signal + last completed activation: , triggered by an explicit signal started at (s ago) and ran for ms last completion reported error: failed to resolve addresses for Dendrite services: no record found for Query { name: Name("_dendrite._tcp.control-plane.oxide.internal."), query_type: SRV, query_class: IN } task: "blueprint_loader" configured period: every 1m 40s currently executing: no - last completed activation: iter 2, triggered by an explicit signal + last completed activation: , triggered by an explicit signal started at (s ago) and ran for ms last completion reported error: failed to read target blueprint: Internal Error: no target blueprint set task: "blueprint_executor" configured period: every 10m currently executing: no - last completed activation: iter 2, triggered by an explicit signal + last completed activation: , triggered by an explicit signal started at (s ago) and ran for ms last completion reported error: no blueprint task: "bfd_manager" configured period: every 30s currently executing: no - last completed activation: iter 2, triggered by an explicit signal + last completed activation: , triggered by an explicit signal started at (s ago) and ran for ms last completion reported error: failed to resolve addresses for Dendrite services: no record found for Query { name: Name("_dendrite._tcp.control-plane.oxide.internal."), query_type: SRV, query_class: IN } task: "external_endpoints" configured period: every 1m currently executing: no - last completed activation: iter 3, triggered by an explicit signal + last completed activation: , triggered by an explicit signal started at (s ago) and ran for ms external API endpoints: 2 ('*' below marks default) @@ -382,7 +382,7 @@ task: "external_endpoints" task: "inventory_collection" configured period: every 10m currently executing: no - last completed activation: iter 3, triggered by an explicit signal + last completed activation: , triggered by an explicit signal started at (s ago) and ran for ms last collection id: ..................... last collection started: @@ -391,14 +391,14 @@ task: "inventory_collection" task: "metrics_producer_gc" configured period: every 1m currently executing: no - last completed activation: iter 2, triggered by an explicit signal + last completed activation: , triggered by an explicit signal started at (s ago) and ran for ms last completion reported error: metric producer gc disabled (omicron#5284) task: "phantom_disks" configured period: every 30s currently executing: no - last completed activation: iter 2, triggered by an explicit signal + last completed activation: , triggered by an explicit signal started at (s ago) and ran for ms number of phantom disks deleted: 0 number of phantom disk delete errors: 0 @@ -406,14 +406,14 @@ task: "phantom_disks" task: "physical_disk_adoption" configured period: every 30s currently executing: no - last completed activation: iter 3, triggered by a dependent task completing + last completed activation: , triggered by a dependent task completing started at (s ago) and ran for ms last completion reported error: task disabled task: "region_replacement" configured period: every 30s currently executing: no - last completed activation: iter 2, triggered by an explicit signal + last completed activation: , triggered by an explicit signal started at (s ago) and ran for ms number of region replacements started ok: 0 number of region replacement start errors: 0 @@ -421,14 +421,14 @@ task: "region_replacement" task: "service_zone_nat_tracker" configured period: every 30s currently executing: no - last completed activation: iter 2, triggered by an explicit signal + last completed activation: , triggered by an explicit signal started at (s ago) and ran for ms last completion reported error: inventory collection is None task: "switch_port_config_manager" configured period: every 30s currently executing: no - last completed activation: iter 2, triggered by an explicit signal + last completed activation: , triggered by an explicit signal started at (s ago) and ran for ms warning: unknown background task: "switch_port_config_manager" (don't know how to interpret details: Object {}) diff --git a/nexus/db-model/src/sled.rs b/nexus/db-model/src/sled.rs index e94da5fbbe..5019366733 100644 --- a/nexus/db-model/src/sled.rs +++ b/nexus/db-model/src/sled.rs @@ -114,6 +114,10 @@ impl Sled { pub fn state(&self) -> SledState { self.state } + + pub fn time_modified(&self) -> DateTime { + self.identity.time_modified + } } impl From for views::Sled { diff --git a/nexus/db-queries/src/db/datastore/inventory.rs b/nexus/db-queries/src/db/datastore/inventory.rs index 02832c5528..6faa8ea251 100644 --- a/nexus/db-queries/src/db/datastore/inventory.rs +++ b/nexus/db-queries/src/db/datastore/inventory.rs @@ -2067,13 +2067,13 @@ pub trait DataStoreInventoryTest: Send + Sync { /// This does not paginate. fn inventory_collections( &self, - ) -> BoxFuture>>; + ) -> BoxFuture>>; } impl DataStoreInventoryTest for DataStore { fn inventory_collections( &self, - ) -> BoxFuture>> { + ) -> BoxFuture>> { async { let conn = self .pool_connection_for_tests() @@ -2085,17 +2085,14 @@ impl DataStoreInventoryTest for DataStore { .context("failed to allow table scan")?; use db::schema::inv_collection::dsl; - let uuids = dsl::inv_collection - .select(dsl::id) + let collections = dsl::inv_collection + .select(InvCollection::as_select()) .order_by(dsl::time_started) - .load_async::(&conn) + .load_async(&conn) .await .context("failed to list collections")?; - Ok(uuids - .into_iter() - .map(CollectionUuid::from_untyped_uuid) - .collect()) + Ok(collections) }) .await } @@ -2124,6 +2121,7 @@ mod test { use nexus_types::inventory::RotPageWhich; use omicron_common::api::external::Error; use omicron_test_utils::dev; + use omicron_uuid_kinds::CollectionUuid; use pretty_assertions::assert_eq; use std::num::NonZeroU32; @@ -2386,7 +2384,13 @@ mod test { // `collection1`, which _is_ the only one with no errors. So we should // get back `collection2`. assert_eq!( - datastore.inventory_collections().await.unwrap(), + &datastore + .inventory_collections() + .await + .unwrap() + .iter() + .map(|c| c.id.into()) + .collect::>(), &[ collection1.id, collection2.id, @@ -2410,7 +2414,13 @@ mod test { .await .expect("failed to prune collections"); assert_eq!( - datastore.inventory_collections().await.unwrap(), + datastore + .inventory_collections() + .await + .unwrap() + .iter() + .map(|c| c.id.into()) + .collect::>(), &[collection1.id, collection3.id, collection4.id, collection5.id,] ); // Again, we should skip over collection1 and delete the next oldest: @@ -2420,7 +2430,13 @@ mod test { .await .expect("failed to prune collections"); assert_eq!( - datastore.inventory_collections().await.unwrap(), + datastore + .inventory_collections() + .await + .unwrap() + .iter() + .map(|c| c.id.into()) + .collect::>(), &[collection1.id, collection4.id, collection5.id,] ); // At this point, if we're keeping 3, we don't need to prune anything. @@ -2429,7 +2445,13 @@ mod test { .await .expect("failed to prune collections"); assert_eq!( - datastore.inventory_collections().await.unwrap(), + datastore + .inventory_collections() + .await + .unwrap() + .iter() + .map(|c| c.id.into()) + .collect::>(), &[collection1.id, collection4.id, collection5.id,] ); @@ -2446,7 +2468,13 @@ mod test { .await .expect("failed to insert collection"); assert_eq!( - datastore.inventory_collections().await.unwrap(), + datastore + .inventory_collections() + .await + .unwrap() + .iter() + .map(|c| c.id.into()) + .collect::>(), &[collection1.id, collection4.id, collection5.id, collection6.id,] ); datastore @@ -2454,7 +2482,13 @@ mod test { .await .expect("failed to prune collections"); assert_eq!( - datastore.inventory_collections().await.unwrap(), + datastore + .inventory_collections() + .await + .unwrap() + .iter() + .map(|c| c.id.into()) + .collect::>(), &[collection4.id, collection5.id, collection6.id,] ); // Again, at this point, we should not prune anything. @@ -2463,7 +2497,13 @@ mod test { .await .expect("failed to prune collections"); assert_eq!( - datastore.inventory_collections().await.unwrap(), + datastore + .inventory_collections() + .await + .unwrap() + .iter() + .map(|c| c.id.into()) + .collect::>(), &[collection4.id, collection5.id, collection6.id,] ); @@ -2484,7 +2524,13 @@ mod test { .await .expect("failed to prune collections"); assert_eq!( - datastore.inventory_collections().await.unwrap(), + datastore + .inventory_collections() + .await + .unwrap() + .iter() + .map(|c| c.id.into()) + .collect::>(), &[collection5.id, collection6.id, collection7.id,] ); @@ -2518,7 +2564,13 @@ mod test { .await .expect("failed to prune collections"); assert_eq!( - datastore.inventory_collections().await.unwrap(), + datastore + .inventory_collections() + .await + .unwrap() + .iter() + .map(|c| c.id.into()) + .collect::>(), &[collection6.id,] ); @@ -2528,7 +2580,7 @@ mod test { .inventory_delete_collection(&opctx, collection6.id) .await .expect("failed to delete collection"); - assert_eq!(datastore.inventory_collections().await.unwrap(), &[]); + assert!(datastore.inventory_collections().await.unwrap().is_empty()); conn.transaction_async(|conn| async move { conn.batch_execute_async(ALLOW_FULL_TABLE_SCAN_SQL).await.unwrap(); diff --git a/nexus/db-queries/src/db/datastore/physical_disk.rs b/nexus/db-queries/src/db/datastore/physical_disk.rs index f26ac782b3..26ebb3fadb 100644 --- a/nexus/db-queries/src/db/datastore/physical_disk.rs +++ b/nexus/db-queries/src/db/datastore/physical_disk.rs @@ -316,9 +316,11 @@ mod test { rack_id, Generation::new(), ); - db.sled_upsert(sled_update) + let (sled, _) = db + .sled_upsert(sled_update) .await - .expect("Could not upsert sled during test prep") + .expect("Could not upsert sled during test prep"); + sled } fn list_disk_params() -> DataPageParams<'static, Uuid> { diff --git a/nexus/db-queries/src/db/datastore/rack.rs b/nexus/db-queries/src/db/datastore/rack.rs index 7beb957917..225499c0bf 100644 --- a/nexus/db-queries/src/db/datastore/rack.rs +++ b/nexus/db-queries/src/db/datastore/rack.rs @@ -767,6 +767,7 @@ impl DataStore { info!(log, "Inserted service networking records"); for physical_disk in physical_disks { + info!(log, "physical disk upsert in handoff: {physical_disk:#?}"); if let Err(e) = Self::physical_disk_insert_on_connection(&conn, &opctx, physical_disk) .await { if !matches!(e, Error::ObjectAlreadyExists { .. }) { @@ -1180,9 +1181,11 @@ mod test { rack_id(), Generation::new(), ); - db.sled_upsert(sled_update) + let (sled, _) = db + .sled_upsert(sled_update) .await - .expect("Could not upsert sled during test prep") + .expect("Could not upsert sled during test prep"); + sled } // Hacky macro helper to: diff --git a/nexus/db-queries/src/db/datastore/sled.rs b/nexus/db-queries/src/db/datastore/sled.rs index fa83436e9e..bf56af1249 100644 --- a/nexus/db-queries/src/db/datastore/sled.rs +++ b/nexus/db-queries/src/db/datastore/sled.rs @@ -43,22 +43,27 @@ use uuid::Uuid; impl DataStore { /// Stores a new sled in the database. /// + /// Returns the sled, and whether or not it was updated on success. + /// /// Returns an error if `sled_agent_gen` is stale, or the sled is /// decommissioned. pub async fn sled_upsert( &self, sled_update: SledUpdate, - ) -> CreateResult { + ) -> CreateResult<(Sled, bool)> { use db::schema::sled::dsl; // required for conditional upsert use diesel::query_dsl::methods::FilterDsl; - diesel::insert_into(dsl::sled) - .values(sled_update.clone().into_insertable()) + let insertable_sled = sled_update.clone().into_insertable(); + let now = insertable_sled.time_modified(); + + let sled = diesel::insert_into(dsl::sled) + .values(insertable_sled) .on_conflict(dsl::id) .do_update() .set(( - dsl::time_modified.eq(Utc::now()), + dsl::time_modified.eq(now), dsl::ip.eq(sled_update.ip), dsl::port.eq(sled_update.port), dsl::rack_id.eq(sled_update.rack_id), @@ -82,7 +87,12 @@ impl DataStore { &sled_update.id().to_string(), ), ) - }) + })?; + + // We compare only seconds since the epoch, because writing to and + // reading from the database causes us to lose precision. + let was_modified = now.timestamp() == sled.time_modified().timestamp(); + Ok((sled, was_modified)) } pub async fn sled_list( @@ -753,7 +763,7 @@ mod test { let (_opctx, datastore) = datastore_test(&logctx, &db).await; let mut sled_update = test_new_sled_update(); - let observed_sled = + let (observed_sled, _) = datastore.sled_upsert(sled_update.clone()).await.unwrap(); assert_eq!( observed_sled.usable_hardware_threads, @@ -786,7 +796,7 @@ mod test { sled_update.sled_agent_gen.0 = sled_update.sled_agent_gen.0.next(); // Test that upserting the sled propagates those changes to the DB. - let observed_sled = datastore + let (observed_sled, _) = datastore .sled_upsert(sled_update.clone()) .await .expect("Could not upsert sled during test prep"); @@ -813,7 +823,7 @@ mod test { let (_opctx, datastore) = datastore_test(&logctx, &db).await; let mut sled_update = test_new_sled_update(); - let observed_sled = + let (observed_sled, _) = datastore.sled_upsert(sled_update.clone()).await.unwrap(); assert_eq!(observed_sled.reservoir_size, sled_update.reservoir_size); @@ -835,7 +845,7 @@ mod test { sled_update.sled_agent_gen.0 = sled_update.sled_agent_gen.0.next(); // Test that upserting the sled propagates those changes to the DB. - let observed_sled = datastore + let (observed_sled, _) = datastore .sled_upsert(sled_update.clone()) .await .expect("Could not upsert sled during test prep"); @@ -857,7 +867,7 @@ mod test { ); sled_update.sled_agent_gen.0 = current_gen.0.next(); // Test that upserting the sled propagates those changes to the DB. - let observed_sled = datastore + let (observed_sled, _) = datastore .sled_upsert(sled_update.clone()) .await .expect("Could not upsert sled during test prep"); @@ -876,7 +886,7 @@ mod test { let (opctx, datastore) = datastore_test(&logctx, &db).await; let mut sled_update = test_new_sled_update(); - let observed_sled = + let (observed_sled, _) = datastore.sled_upsert(sled_update.clone()).await.unwrap(); assert_eq!( observed_sled.usable_hardware_threads, @@ -955,13 +965,13 @@ mod test { let (opctx, datastore) = datastore_test(&logctx, &db).await; // Define some sleds that resources cannot be provisioned on. - let non_provisionable_sled = + let (non_provisionable_sled, _) = datastore.sled_upsert(test_new_sled_update()).await.unwrap(); - let expunged_sled = + let (expunged_sled, _) = datastore.sled_upsert(test_new_sled_update()).await.unwrap(); - let decommissioned_sled = + let (decommissioned_sled, _) = datastore.sled_upsert(test_new_sled_update()).await.unwrap(); - let illegal_decommissioned_sled = + let (illegal_decommissioned_sled, _) = datastore.sled_upsert(test_new_sled_update()).await.unwrap(); let ineligible_sleds = IneligibleSleds { @@ -1000,7 +1010,7 @@ mod test { // Now add a provisionable sled and try again. let sled_update = test_new_sled_update(); - let provisionable_sled = + let (provisionable_sled, _) = datastore.sled_upsert(sled_update.clone()).await.unwrap(); // Try a few times to ensure that resources never get allocated to the @@ -1061,7 +1071,8 @@ mod test { let (opctx, datastore) = datastore_test(&logctx, &db).await; // Set up a sled to test against. - let sled = datastore.sled_upsert(test_new_sled_update()).await.unwrap(); + let (sled, _) = + datastore.sled_upsert(test_new_sled_update()).await.unwrap(); let sled_id = sled.id(); // Add a couple disks to this sled. @@ -1230,7 +1241,8 @@ mod test { .enumerate(); // Set up a sled to test against. - let sled = datastore.sled_upsert(test_new_sled_update()).await.unwrap(); + let (sled, _) = + datastore.sled_upsert(test_new_sled_update()).await.unwrap(); let sled_id = sled.id(); for (i, ((policy, state), after)) in all_transitions { diff --git a/nexus/reconfigurator/preparation/src/lib.rs b/nexus/reconfigurator/preparation/src/lib.rs index 75482128a0..8f590d95f4 100644 --- a/nexus/reconfigurator/preparation/src/lib.rs +++ b/nexus/reconfigurator/preparation/src/lib.rs @@ -210,7 +210,9 @@ pub async fn reconfigurator_state_load( let collection_ids = datastore .inventory_collections() .await - .context("listing collections")?; + .context("listing collections")? + .into_iter() + .map(|c| c.id()); let collections = futures::stream::iter(collection_ids) .filter_map(|id| async move { let read = datastore diff --git a/nexus/src/app/background/init.rs b/nexus/src/app/background/init.rs index 77ef3318f6..9997953921 100644 --- a/nexus/src/app/background/init.rs +++ b/nexus/src/app/background/init.rs @@ -99,6 +99,7 @@ impl BackgroundTasks { opctx: &OpContext, datastore: Arc, config: &BackgroundTaskConfig, + rack_id: Uuid, nexus_id: Uuid, resolver: internal_dns::resolver::Resolver, saga_request: Sender, @@ -283,6 +284,7 @@ impl BackgroundTasks { datastore.clone(), inventory_watcher.clone(), config.physical_disk_adoption.disable, + rack_id, )), opctx.child(BTreeMap::new()), vec![Box::new(inventory_watcher)], diff --git a/nexus/src/app/background/inventory_collection.rs b/nexus/src/app/background/inventory_collection.rs index 236ba9b197..2c60ff96bb 100644 --- a/nexus/src/app/background/inventory_collection.rs +++ b/nexus/src/app/background/inventory_collection.rs @@ -214,7 +214,8 @@ mod test { use nexus_inventory::SledAgentEnumerator; use nexus_test_utils_macros::nexus_test; use omicron_common::api::external::ByteCount; - use omicron_test_utils::dev::poll; + use omicron_uuid_kinds::CollectionUuid; + use std::collections::BTreeSet; use std::net::Ipv6Addr; use std::net::SocketAddrV6; use std::num::NonZeroU32; @@ -234,39 +235,15 @@ mod test { datastore.clone(), ); - // Nexus starts the very background task that we're also testing - // manually here. As a result, we should find a collection in the - // database before too long. Wait for it so that after it appears, we - // can assume the rest of the collections came from the instance that - // we're testing. - let mut last_collections = - poll::wait_for_condition::<_, anyhow::Error, _, _>( - || async { - let collections = datastore - .inventory_collections() - .await - .map_err(poll::CondCheckError::Failed)?; - if collections.is_empty() { - Err(poll::CondCheckError::NotYet) - } else { - Ok(collections) - } - }, - &std::time::Duration::from_millis(50), - &std::time::Duration::from_secs(15), - ) - .await - .expect("background task did not populate initial collection"); - let resolver = internal_dns::resolver::Resolver::new_from_addrs( cptestctx.logctx.log.clone(), &[cptestctx.internal_dns.dns_server.local_address()], ) .unwrap(); - // Now we'll create our own copy of the background task and activate it - // a bunch and make sure that it always creates a new collection and - // does not allow a backlog to accumulate. + // Create our own copy of the background task and activate it a bunch + // and make sure that it always creates a new collection and does not + // allow a backlog to accumulate. let nkeep = 3; let mut task = InventoryCollector::new( datastore.clone(), @@ -276,25 +253,60 @@ mod test { false, ); let nkeep = usize::try_from(nkeep).unwrap(); - for i in 0..10 { + let mut all_our_collection_ids = Vec::new(); + for i in 0..20 { let _ = task.activate(&opctx).await; let collections = datastore.inventory_collections().await.unwrap(); + + // Nexus is creating inventory collections concurrently with us, + // so our expectations here have to be flexible to account for the + // fact that there might be collections other than the ones we've + // activated interspersed with the ones we care about. + let num_collections = collections.len(); + + // We should have at least one collection (the one we just + // activated). + assert!(num_collections > 0); + + // Regardless of the activation source, we should have at + // most `nkeep + 1` collections. + assert!(num_collections <= nkeep + 1); + + // Filter down to just the collections we activated. (This could be + // empty if Nexus shoved several collections in!) + let our_collections = collections + .into_iter() + .filter(|c| c.collector == "me") + .map(|c| CollectionUuid::from(c.id)) + .collect::>(); + + // If we have no collections, we have nothing else to check; Nexus + // has pushed us out. + if our_collections.is_empty() { + println!( + "iter {i}: no test collections \ + ({num_collections} Nexus collections)", + ); + continue; + } + + // The most recent collection should be new. + let new_collection_id = our_collections.last().unwrap(); + assert!(!all_our_collection_ids.contains(new_collection_id)); + all_our_collection_ids.push(*new_collection_id); + + // Push this onto the collections we've seen, then assert that the + // tail of all IDs we've seen matches the ones we saw in this + // iteration (i.e., we're pushing out old collections in order). println!( - "iter {}: last = {:?}, current = {:?}", - i, last_collections, collections + "iter {i}: saw {our_collections:?}; \ + should match tail of {all_our_collection_ids:?}" + ); + assert_eq!( + all_our_collection_ids + [all_our_collection_ids.len() - our_collections.len()..], + our_collections ); - - let expected_from_last: Vec<_> = if last_collections.len() <= nkeep - { - last_collections - } else { - last_collections.into_iter().skip(1).collect() - }; - let expected_from_current: Vec<_> = - collections.iter().rev().skip(1).rev().cloned().collect(); - assert_eq!(expected_from_last, expected_from_current); - assert_eq!(collections.len(), std::cmp::min(i + 2, nkeep + 1)); - last_collections = collections; } // Create a disabled task and make sure that does nothing. @@ -305,10 +317,27 @@ mod test { 3, true, ); - let previous = datastore.inventory_collections().await.unwrap(); let _ = task.activate(&opctx).await; - let latest = datastore.inventory_collections().await.unwrap(); - assert_eq!(previous, latest); + + // It's possible that Nexus is concurrently running with us still, so + // we'll activate this task and ensure that: + // + // (a) at least one of the collections is from `"me"` above, and + // (b) there is no collection from `"disabled"` + // + // This is technically still racy if Nexus manages to collect `nkeep + + // 1` collections in between the loop above and this check, but we don't + // expect that to be the case. + let latest_collectors = datastore + .inventory_collections() + .await + .unwrap() + .into_iter() + .map(|c| c.collector) + .collect::>(); + println!("latest_collectors: {latest_collectors:?}"); + assert!(latest_collectors.contains("me")); + assert!(!latest_collectors.contains("disabled")); } #[nexus_test(server = crate::Server)] @@ -351,7 +380,8 @@ mod test { rack_id, Generation::new(), ); - sleds.push(datastore.sled_upsert(sled).await.unwrap()); + let (sled, _) = datastore.sled_upsert(sled).await.unwrap(); + sleds.push(sled); } // The same enumerator should immediately find all the new sleds. diff --git a/nexus/src/app/background/physical_disk_adoption.rs b/nexus/src/app/background/physical_disk_adoption.rs index e5e1e89b64..b6e0a83502 100644 --- a/nexus/src/app/background/physical_disk_adoption.rs +++ b/nexus/src/app/background/physical_disk_adoption.rs @@ -18,6 +18,8 @@ use nexus_db_model::PhysicalDisk; use nexus_db_model::Zpool; use nexus_db_queries::context::OpContext; use nexus_db_queries::db::DataStore; +use nexus_types::identity::Asset; +use omicron_common::api::external::DataPageParams; use omicron_uuid_kinds::CollectionUuid; use omicron_uuid_kinds::GenericUuid; use serde_json::json; @@ -28,6 +30,7 @@ use uuid::Uuid; pub struct PhysicalDiskAdoption { datastore: Arc, disable: bool, + rack_id: Uuid, rx_inventory_collection: watch::Receiver>, } @@ -36,8 +39,14 @@ impl PhysicalDiskAdoption { datastore: Arc, rx_inventory_collection: watch::Receiver>, disable: bool, + rack_id: Uuid, ) -> Self { - PhysicalDiskAdoption { datastore, disable, rx_inventory_collection } + PhysicalDiskAdoption { + datastore, + disable, + rack_id, + rx_inventory_collection, + } } } @@ -51,6 +60,41 @@ impl BackgroundTask for PhysicalDiskAdoption { return json!({ "error": "task disabled" }); } + // Only adopt physical disks after rack handoff has completed. + // + // This prevents a race condition where the same physical disks + // are inserted simultaneously at handoff time and inside this + // background task. This is bad because the handoff transaction will + // fail if the same disk already exists. + // + // TODO-multirack: This will only work for clusters smaller than + // a page. + let result = self.datastore.rack_list_initialized( + opctx, + &DataPageParams::max_page() + ).await; + match result { + Ok(racks) => { + if !racks.iter().any(|r| r.identity().id == self.rack_id) { + info!( + &opctx.log, + "Physical Disk Adoption: Rack not yet initialized"; + "rack_id" => %self.rack_id, + ); + let msg = format!("rack not yet initialized: {}", self.rack_id); + return json!({"error": msg}); + } + }, + Err(err) => { + warn!( + &opctx.log, + "Physical Disk Adoption: failed to query for initialized racks"; + "err" => %err, + ); + return json!({ "error": format!("failed to query database: {:#}", err) }); + } + } + let mut disks_added = 0; let log = &opctx.log; warn!(&log, "physical disk adoption task started"); @@ -100,7 +144,7 @@ impl BackgroundTask for PhysicalDiskAdoption { let result = self.datastore.physical_disk_and_zpool_insert( opctx, - disk, + disk.clone(), zpool ).await; @@ -110,14 +154,20 @@ impl BackgroundTask for PhysicalDiskAdoption { "Physical Disk Adoption: failed to insert new disk and zpool"; "err" => %err ); - return json!({ "error": format!("failed to insert disk/zpool: {:#}", err) }); + let msg = format!( + "failed to insert disk/zpool: {:#}; disk = {:#?}", + err, + disk + ); + return json!({ "error": msg}); } disks_added += 1; info!( &opctx.log, - "Physical Disk Adoption: Successfully added a new disk and zpool" + "Physical Disk Adoption: Successfully added a new disk and zpool"; + "disk" => #?disk ); } diff --git a/nexus/src/app/mod.rs b/nexus/src/app/mod.rs index 9e0b12d83d..1bb42b20b2 100644 --- a/nexus/src/app/mod.rs +++ b/nexus/src/app/mod.rs @@ -379,6 +379,7 @@ impl Nexus { Arc::clone(&db_datastore), &config.pkg.background_tasks, config.deployment.id, + config.deployment.rack_id, resolver.clone(), saga_request, ); diff --git a/nexus/src/app/rack.rs b/nexus/src/app/rack.rs index 17c5fbe7fd..c3803c01af 100644 --- a/nexus/src/app/rack.rs +++ b/nexus/src/app/rack.rs @@ -848,10 +848,6 @@ impl super::Nexus { ), })?; - // Trigger an inventory collection so that the newly added sled is known - // about. - self.activate_inventory_collection(); - Ok(()) } diff --git a/nexus/src/app/sled.rs b/nexus/src/app/sled.rs index 55a8c18910..e4bfa1b56b 100644 --- a/nexus/src/app/sled.rs +++ b/nexus/src/app/sled.rs @@ -69,7 +69,17 @@ impl super::Nexus { self.rack_id, info.generation.into(), ); - self.db_datastore.sled_upsert(sled).await?; + let (_, was_modified) = self.db_datastore.sled_upsert(sled).await?; + + // If a new sled-agent just came online we want to trigger inventory + // collection. + // + // This will allow us to learn about disks so that they can be added to + // the control plane. + if was_modified { + self.activate_inventory_collection(); + } + Ok(()) } diff --git a/test-utils/src/dev/test_cmds.rs b/test-utils/src/dev/test_cmds.rs index 5ef2da672b..51ade208f8 100644 --- a/test-utils/src/dev/test_cmds.rs +++ b/test-utils/src/dev/test_cmds.rs @@ -188,6 +188,11 @@ pub fn redact_variable(input: &str) -> String { ) .to_string(); + let s = regex::Regex::new(r"iter \d+,") + .unwrap() + .replace_all(&s, ",") + .to_string(); + s } From 977d00add6aa7f29602ca77c4e9cc0c3bcebd27a Mon Sep 17 00:00:00 2001 From: David Crespo Date: Thu, 18 Apr 2024 15:46:32 -0500 Subject: [PATCH 165/334] Bump web console (dense tables, instance refresh button, etc.) (#5562) https://github.com/oxidecomputer/console/compare/2ba444ca...6334f0db * [6334f0db](https://github.com/oxidecomputer/console/commit/6334f0db) bump omicron to latest main (no changes) * [46dbd9c8](https://github.com/oxidecomputer/console/commit/46dbd9c8) oxidecomputer/console#2158 * [e94929ab](https://github.com/oxidecomputer/console/commit/e94929ab) try 300ms minTime on RefreshButton SpinnerLoader * [b6da4154](https://github.com/oxidecomputer/console/commit/b6da4154) oxidecomputer/console#2161 * [788389a8](https://github.com/oxidecomputer/console/commit/788389a8) oxidecomputer/console#2157 * [fed9f4d4](https://github.com/oxidecomputer/console/commit/fed9f4d4) oxidecomputer/console#2159 * [aa75f577](https://github.com/oxidecomputer/console/commit/aa75f577) oxidecomputer/console#2160 * [eeb32a78](https://github.com/oxidecomputer/console/commit/eeb32a78) oxidecomputer/console#2153 * [522d3ab1](https://github.com/oxidecomputer/console/commit/522d3ab1) oxidecomputer/console#2156 * [c726356f](https://github.com/oxidecomputer/console/commit/c726356f) oxidecomputer/console#2155 * [e2b0a7d0](https://github.com/oxidecomputer/console/commit/e2b0a7d0) oxidecomputer/console#2151 * [a5af899a](https://github.com/oxidecomputer/console/commit/a5af899a) oxidecomputer/console#2154 * [73263a4e](https://github.com/oxidecomputer/console/commit/73263a4e) of course I didn't push to main without running the tests, why do you ask * [ea108b7e](https://github.com/oxidecomputer/console/commit/ea108b7e) put firewall rules tab first on vpc detail --- tools/console_version | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/console_version b/tools/console_version index e3da6505e4..a11cc38868 100644 --- a/tools/console_version +++ b/tools/console_version @@ -1,2 +1,2 @@ -COMMIT="2ba444caf8d19830c41448847f0acf61d4a5d2b2" -SHA2="752ea88d5d3e2f92d32a8d91d3ad7f3a154086d177511a18401cb14a55eb1ea4" +COMMIT="6334f0db4f0efe82de718343a477362d73124a68" +SHA2="a08b40e59a4e67d57a8499622dc0bb7b605416a442a4147d52f69762db1bbc8c" From f557da24e8cf10ce027495c9743eb40a5a2a3e1f Mon Sep 17 00:00:00 2001 From: David Crespo Date: Thu, 18 Apr 2024 17:46:13 -0500 Subject: [PATCH 166/334] Bump web console (fix 404 after instance delete) (#5564) https://github.com/oxidecomputer/console/compare/6334f0db...c008d365 * [c008d365](https://github.com/oxidecomputer/console/commit/c008d365) oxidecomputer/console#2165 --- tools/console_version | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/console_version b/tools/console_version index a11cc38868..4e3adf59cf 100644 --- a/tools/console_version +++ b/tools/console_version @@ -1,2 +1,2 @@ -COMMIT="6334f0db4f0efe82de718343a477362d73124a68" -SHA2="a08b40e59a4e67d57a8499622dc0bb7b605416a442a4147d52f69762db1bbc8c" +COMMIT="c008d365b38b53c962a80e9c352e4e09ec18b436" +SHA2="fed49e7611283f7dc0f006e26d0e111318c8d95f9dca70804590782055615223" From b6f07ab0336562be2bd1b4e982576315134dbccc Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Thu, 18 Apr 2024 21:26:41 -0400 Subject: [PATCH 167/334] Remove diesel OID cache priming (#5565) See #5561 for rationale. We could add this back if we could figure out how to correctly invalidate the cache after a schema upgrade, but there don't seem to be any easy ways to do that. This will ensure correctness for now and give us time to figure out a different path. --- nexus/db-queries/src/db/pool_connection.rs | 268 +-------------------- nexus/tests/integration_tests/schema.rs | 56 ++++- 2 files changed, 56 insertions(+), 268 deletions(-) diff --git a/nexus/db-queries/src/db/pool_connection.rs b/nexus/db-queries/src/db/pool_connection.rs index b3311c540a..dae6a0ee51 100644 --- a/nexus/db-queries/src/db/pool_connection.rs +++ b/nexus/db-queries/src/db/pool_connection.rs @@ -4,207 +4,26 @@ //! Customization that happens on each connection as they're acquired. -use async_bb8_diesel::AsyncConnection; -use async_bb8_diesel::AsyncRunQueryDsl; use async_bb8_diesel::AsyncSimpleConnection; use async_bb8_diesel::Connection; use async_bb8_diesel::ConnectionError; use async_trait::async_trait; use bb8::CustomizeConnection; -use diesel::pg::GetPgMetadataCache; -use diesel::pg::PgMetadataCacheKey; -use diesel::prelude::*; use diesel::PgConnection; use diesel_dtrace::DTraceConnection; -use std::collections::HashMap; -use tokio::sync::Mutex; pub type DbConnection = DTraceConnection; -// This is a list of all user-defined types (ENUMS) in the current DB schema. -// -// Diesel looks up user-defined types as they are encountered, and loads -// them into a metadata cache. Although this cost is amortized over the lifetime -// of a connection, this can be slower than desired: -// - Diesel issues a round-trip database call on each user-defined type -// - The cache of OIDs for user-defined types is "per-connection", so when -// using a connection pool, we redo all these calls for new connections. -// -// To mitigate: We look up a list of user-defined types here on first access -// to the connection, and pre-populate the cache. Furthermore, we save this -// information and use it to populate other connections too, without incurring -// another database lookup. -// -// See https://github.com/oxidecomputer/omicron/issues/4733 for more context. -static CUSTOM_TYPE_KEYS: &'static [&'static str] = &[ - "address_lot_kind", - "authentication_mode", - "bfd_mode", - "block_size", - "bp_zone_disposition", - "caboose_which", - "dataset_kind", - "dns_group", - "downstairs_client_stop_request_reason_type", - "downstairs_client_stopped_reason_type", - "hw_power_state", - "hw_rot_slot", - "identity_type", - "instance_state", - "ip_attach_state", - "ip_kind", - "ip_pool_resource_type", - "network_interface_kind", - "physical_disk_kind", - "physical_disk_policy", - "physical_disk_state", - "producer_kind", - "provider_type", - "root_of_trust_page_which", - "router_route_kind", - "saga_state", - "service_kind", - "sled_policy", - "sled_resource_kind", - "sled_role", - "sled_state", - "snapshot_state", - "sp_type", - "switch_interface_kind", - "switch_link_fec", - "switch_link_speed", - "switch_port_geometry", - "upstairs_repair_notification_type", - "upstairs_repair_type", - "user_provision_type", - "vpc_firewall_rule_action", - "vpc_firewall_rule_direction", - "vpc_firewall_rule_protocol", - "vpc_firewall_rule_status", - "vpc_router_kind", - "zone_type", -]; -const CUSTOM_TYPE_SCHEMA: &'static str = "public"; - pub const DISALLOW_FULL_TABLE_SCAN_SQL: &str = "set disallow_full_table_scans = on; set large_full_scan_rows = 0;"; -#[derive(Debug)] -struct OIDCache(HashMap, (u32, u32)>); - -impl OIDCache { - // Populate a new OID cache by pre-filling values - async fn new( - conn: &mut Connection, - ) -> Result { - // Lookup all the OIDs for custom types. - // - // As a reminder, this is an optimization: - // - If we supply a value in CUSTOM_TYPE_KEYS that does not - // exist in the schema, the corresponding row won't be - // found, so the value will be ignored. - // - If we don't supply a value in CUSTOM_TYPE_KEYS, even - // though it DOES exist in the schema, it'll likewise not - // get pre-populated into the cache. Diesel would observe - // the cache miss, and perform the lookup later. - let results: Vec = pg_type::table - .select((pg_type::typname, pg_type::oid, pg_type::typarray)) - .inner_join( - pg_namespace::table - .on(pg_type::typnamespace.eq(pg_namespace::oid)), - ) - .filter(pg_type::typname.eq_any(CUSTOM_TYPE_KEYS)) - .filter(pg_namespace::nspname.eq(CUSTOM_TYPE_SCHEMA)) - .load_async(&*conn) - .await?; - - // Convert the OIDs into a ("Cache Key", "OID Tuple") pair, - // and store the result in a HashMap. - // - // We'll iterate over this HashMap to pre-populate the connection-local cache for all - // future connections, including this one. - Ok::<_, ConnectionError>(Self(HashMap::from_iter( - results.into_iter().map( - |PgTypeMetadata { typname, oid, array_oid }| { - ( - PgMetadataCacheKey::new( - Some(CUSTOM_TYPE_SCHEMA.into()), - std::borrow::Cow::Owned(typname), - ), - (oid, array_oid), - ) - }, - ), - ))) - } -} - -// String-based representation of the CockroachDB version. -// -// We currently do minimal parsing of this value, but it should -// be distinct between different revisions of CockroachDB. -// This version includes the semver version of the DB, but also -// build and target information. -#[derive(Debug, Eq, PartialEq, Hash)] -struct CockroachVersion(String); - -impl CockroachVersion { - async fn new( - conn: &Connection, - ) -> Result { - diesel::sql_function!(fn version() -> Text); - - let version = - diesel::select(version()).get_result_async::(conn).await?; - Ok(Self(version)) - } -} - /// A customizer for all new connections made to CockroachDB, from Diesel. #[derive(Debug)] -pub(crate) struct ConnectionCustomizer { - oid_caches: Mutex>, -} +pub(crate) struct ConnectionCustomizer {} impl ConnectionCustomizer { pub(crate) fn new() -> Self { - Self { oid_caches: Mutex::new(HashMap::new()) } - } - - async fn populate_metadata_cache( - &self, - conn: &mut Connection, - ) -> Result<(), ConnectionError> { - // Look up the CockroachDB version for new connections, to ensure - // that OID caches are distinct between different CRDB versions. - // - // This step is performed out of an abundance of caution: OIDs are not - // necessarily stable across major releases of CRDB, and this ensures - // that the OID lookups on custom types do not cross this version - // boundary. - let version = CockroachVersion::new(conn).await?; - - // Lookup the OID cache, or populate it if we haven't previously - // established a connection to this database version. - let mut oid_caches = self.oid_caches.lock().await; - let entry = oid_caches.entry(version); - use std::collections::hash_map::Entry::*; - let oid_cache = match entry { - Occupied(ref entry) => entry.get(), - Vacant(entry) => entry.insert(OIDCache::new(conn).await?), - }; - - // Copy the OID cache into this specific connection. - // - // NOTE: I don't love that this is blocking (due to "as_sync_conn"), but the - // "get_metadata_cache" method does not seem implemented for types that could have a - // non-Postgres backend. - let mut sync_conn = conn.as_sync_conn(); - let cache = sync_conn.get_metadata_cache(); - for (k, v) in &oid_cache.0 { - cache.store_type(k.clone(), *v); - } - Ok(()) + Self {} } async fn disallow_full_table_scans( @@ -224,90 +43,7 @@ impl CustomizeConnection, ConnectionError> &self, conn: &mut Connection, ) -> Result<(), ConnectionError> { - self.populate_metadata_cache(conn).await?; self.disallow_full_table_scans(conn).await?; Ok(()) } } - -#[derive(Debug, Clone, Hash, PartialEq, Eq, Queryable)] -pub struct PgTypeMetadata { - typname: String, - oid: u32, - array_oid: u32, -} - -table! { - pg_type (oid) { - oid -> Oid, - typname -> Text, - typarray -> Oid, - typnamespace -> Oid, - } -} - -table! { - pg_namespace (oid) { - oid -> Oid, - nspname -> Text, - } -} - -allow_tables_to_appear_in_same_query!(pg_type, pg_namespace); - -#[cfg(test)] -mod test { - use super::*; - use nexus_test_utils::db::test_setup_database; - use omicron_test_utils::dev; - - // Ensure that the "CUSTOM_TYPE_KEYS" values match the enums - // we find within the database. - // - // If the two are out-of-sync, identify the values causing problems. - #[tokio::test] - async fn all_enums_in_prepopulate_list() { - let logctx = dev::test_setup_log("test_project_creation"); - let mut crdb = test_setup_database(&logctx.log).await; - let client = crdb.connect().await.expect("Failed to connect to CRDB"); - - // https://www.cockroachlabs.com/docs/stable/show-enums - let rows = client - .query("SHOW ENUMS FROM omicron.public;", &[]) - .await - .unwrap_or_else(|_| panic!("failed to list enums")); - client.cleanup().await.expect("cleaning up after listing enums"); - - let mut observed_public_enums = rows - .into_iter() - .map(|row| -> String { - for i in 0..row.len() { - if row.columns()[i].name() == "name" { - return row.get(i); - } - } - panic!("Missing 'name' in row: {row:?}"); - }) - .collect::>(); - observed_public_enums.sort(); - - let mut expected_enums: Vec = - CUSTOM_TYPE_KEYS.into_iter().map(|s| s.to_string()).collect(); - expected_enums.sort(); - - pretty_assertions::assert_eq!( - observed_public_enums, - expected_enums, - "Enums did not match.\n\ - If the type is present on the left, but not the right:\n\ - \tThe enum is in the DB, but not in CUSTOM_TYPE_KEYS.\n\ - \tConsider adding it, so we can pre-populate the OID cache.\n\ - If the type is present on the right, but not the left:\n\ - \tThe enum is not the DB, but it is in CUSTOM_TYPE_KEYS.\n\ - \tConsider removing it, because the type no longer exists" - ); - - crdb.cleanup().await.unwrap(); - logctx.cleanup_successful(); - } -} diff --git a/nexus/tests/integration_tests/schema.rs b/nexus/tests/integration_tests/schema.rs index 44c91bbacf..89d2e274c5 100644 --- a/nexus/tests/integration_tests/schema.rs +++ b/nexus/tests/integration_tests/schema.rs @@ -941,8 +941,28 @@ async fn dbinit_equals_sum_of_all_up() { let all_versions = read_all_schema_versions(); - // Go from the first version to the latest version. - for version in all_versions.iter_versions() { + // Apply the very first schema migration. In particular, this creates the + // `omicron` database, which allows us to construct a `db::Pool` below. + for version in all_versions.iter_versions().take(1) { + apply_update(log, &crdb, version, 1).await; + assert_eq!( + version.semver().to_string(), + query_crdb_schema_version(&crdb).await + ); + } + + // Create a connection pool after we apply the first schema version but + // before applying the rest, and grab a connection from that pool. We'll use + // it for an extra check later. + let pool = nexus_db_queries::db::Pool::new( + log, + &nexus_db_queries::db::Config { url: crdb.pg_config().clone() }, + ); + let conn_from_pool = + pool.pool().get().await.expect("failed to get pooled connection"); + + // Go from the second version to the latest version. + for version in all_versions.iter_versions().skip(1) { apply_update(log, &crdb, version, 1).await; assert_eq!( version.semver().to_string(), @@ -957,6 +977,38 @@ async fn dbinit_equals_sum_of_all_up() { // Query the newly constructed DB for information about its schema let observed_schema = InformationSchema::new(&crdb).await; let observed_data = observed_schema.query_all_tables(log, &crdb).await; + + // Using the connection we got from the connection pool prior to applying + // the schema migrations, attempt to insert a sled resource. This involves + // the `sled_resource_kind` enum, whose OID was changed by the schema + // migration in version 53.0.0 (by virtue of the enum being dropped and + // added back with a different set of variants). If the diesel OID cache was + // populated when we acquired the connection from the pool, this will fail + // with a `type with ID $NUM does not exist` error. + { + use async_bb8_diesel::AsyncRunQueryDsl; + use nexus_db_model::schema::sled_resource::dsl; + use nexus_db_model::Resources; + use nexus_db_model::SledResource; + use nexus_db_model::SledResourceKind; + + diesel::insert_into(dsl::sled_resource) + .values(SledResource { + id: Uuid::new_v4(), + sled_id: Uuid::new_v4(), + kind: SledResourceKind::Instance, + resources: Resources { + hardware_threads: 8_u32.into(), + rss_ram: 1024_i64.try_into().unwrap(), + reservoir_ram: 1024_i64.try_into().unwrap(), + }, + }) + .execute_async(&*conn_from_pool) + .await + .expect("failed to insert - did we poison the OID cache?"); + } + std::mem::drop(conn_from_pool); + std::mem::drop(pool); crdb.cleanup().await.unwrap(); // Create a new DB with data populated from dbinit.sql for comparison From 7cb04ceefe628ebb904a8c61789658343b1cf443 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Karen=20C=C3=A1rcamo?= Date: Fri, 19 Apr 2024 22:11:35 +1200 Subject: [PATCH 168/334] Fix dataset unmount during virtual hardware destroy (#5567) Fixes a bug where running `cargo xtask virtual-hardware destroy` after installing and uninstalling omicron would result in the following error: ```console $ cargo xtask virtual-hardware destroy Finished dev [unoptimized + debuginfo] target(s) in 0.81s Running `target/debug/xtask virtual-hardware destroy` destroying virtual hardware Disabling svc:/system/fmd:default Re-enabling svc:/system/fmd:default unloading xde driver ensuring softnpu zone destroyed destroying: oxi_3dfef4dd-139b-4456-b322-deb39e1e86a5 destroyed: oxi_3dfef4dd-139b-4456-b322-deb39e1e86a5 destroying: oxi_9fca391c-2991-4964-90dc-8e5f321c1176 Error: "/usr/bin/pfexec" failed: exit status: 1 (stderr: cannot unmount '/var/fm/fmd': Device busy ) ``` When looking into this further I noticed the code was checking the second column to see if it could not be mounted by asserting if its value was not equal to "yes" and checking the third column to see if it was not mounted by asserting if its value did not equal "noauto": ```rust // Lines 213-216 for dataset in zfs_list_internal("yes", "noauto")? { println!("unmounting: {dataset}"); zfs_umount(&dataset)?; } // Lines 652-675 fn zfs_list_internal(canmount: &str, mounted: &str) -> Result> { let mut cmd = Command::new(ZFS); cmd.args(["list", "-rHpo", "name,canmount,mounted"]); let output = execute(cmd)?; Ok(String::from_utf8(output.stdout) .context("Invalid zfs list output")? .lines() .filter_map(|line| { let mut cols = line.trim().split_whitespace(); let dataset = cols.next()?; if !dataset.starts_with("oxi_") { return None; } if canmount != cols.next()? { return None; } if mounted != cols.next()? { return None; } return Some(dataset.to_string()); }) .collect()) } ``` The values when calling `zfs_list_internal()` on line 213 should be inverted to reflect the ordering of the `$ zfs list -rHpo name,canmount,mounted` command called in lines 653 and 654: ```rust let mut cmd = Command::new(ZFS); cmd.args(["list", "-rHpo", "name,canmount,mounted"]); ``` ### Fix tested on a Helios machine ```console $ cargo xtask virtual-hardware create --gateway-ip "$GATEWAY_IP" --pxa-start "$PXA_START" --pxa-end "$PXA_END" --physical-link "$PHYSICAL_LINK" --gateway-mac "$GATEWAY_MAC" Finished dev [unoptimized + debuginfo] target(s) in 0.42s Running `target/debug/xtask virtual-hardware create --gateway-ip 192.168.1.199 --pxa-start 192.168.1.20 --pxa-end 192.168.1.40 --physical-link fake_external_stub0 --gateway-mac '02:08:20:95:4a:29'` creating virtual hardware creating /var/tmp/m2_0.vdev creating /var/tmp/m2_1.vdev creating /var/tmp/u2_0.vdev creating /var/tmp/u2_1.vdev creating /var/tmp/u2_2.vdev creating /var/tmp/u2_3.vdev creating /var/tmp/u2_4.vdev creating /var/tmp/u2_5.vdev creating /var/tmp/u2_6.vdev creating /var/tmp/u2_7.vdev creating /var/tmp/u2_8.vdev Simnet net0/sc0_0 exists Simnet net1/sc1_0 exists Vnic sc0_1 exists Using 192.168.1.199 as gateway ip using 02:08:20:95:4A:29 as gateway mac configuring SoftNPU ARP entry configuring SoftNPU proxy ARP SoftNPU state: local v6: local v4: router v6: router v4_idx: router v4_routes: resolver v4: 192.168.1.199 -> 02:08:20:95:4a:29 resolver v6: nat v4: nat_v6: port_mac: proxy arp: 192.168.1.20/192.168.1.40: a8:e1:de:01:70:1d created virtual hardware $ pfexec ./target/release/omicron-package -t centzon install Logging to: /home/coatlicue/src/omicron/out/LOG $ zoneadm list global sidecar_softnpu oxz_switch oxz_internal_dns_2ce8fd4d-8a4c-402f-8627-ecebe856cbe5 oxz_internal_dns_cc8e7214-a66d-4ef6-81e7-54745dc0792e oxz_internal_dns_95c58b1a-7e0c-4f44-9600-8a296f718f2c oxz_ntp_20fdc77d-d04f-4ed2-9d3e-e5018e1262ad oxz_cockroachdb_6d5f28d1-0f2c-4a81-a2ec-aa643fbafc1a oxz_cockroachdb_d2f8c0e0-9552-4ce1-89a9-b131e4c4a9e4 oxz_cockroachdb_a019f6c5-a006-42d1-96e6-eeef99705123 oxz_cockroachdb_b9b2b59b-4524-4e82-aa12-1872371ffbbd oxz_cockroachdb_edbff36d-6731-47bc-98f1-a6beebf6a57a oxz_crucible_b645cdec-6978-48d8-b573-6f4278df485b oxz_crucible_acebdb38-d38e-4071-b833-3558b8c5c23f oxz_crucible_fb969ea3-e8e3-4360-83b8-b3e8e4ed1e85 oxz_crucible_e70aeeed-dd41-4f1b-964d-65776d7eba77 oxz_crucible_77380b8b-7ec9-42dd-bd9e-6f3ddaa3bdfc oxz_oximeter_da21473b-c931-4a56-b18f-267d19abc547 oxz_crucible_1faca31c-6284-4cca-a0a6-9e4662d9be4f oxz_crucible_b27a2bfa-b593-4ce6-9b39-0fd74685f5a2 oxz_crucible_pantry_8f15bcb2-1e68-4cd4-973a-b720209eff32 oxz_crucible_c03e85ae-6d0c-4b18-a36e-6bbacbf4b634 oxz_nexus_2daf75d3-cd94-4734-8f7a-074bc84a6732 oxz_nexus_5a3543b7-df7b-4cbe-a493-55e7d9cf74ee oxz_crucible_pantry_c3b8a111-3bbe-49ba-920d-88e6e83a18fb oxz_crucible_pantry_457c44a3-37d2-4125-a695-806331a5d074 oxz_crucible_7de1e905-dd8c-4785-aa9e-3d796e0c7a0e oxz_external_dns_971460cb-2280-4456-ad5c-33eada634b48 oxz_external_dns_ff27df0a-dece-4cbc-8604-e099f90d9022 oxz_clickhouse_b6d8b8b6-90e8-4abc-ace9-3e792d709130 oxz_nexus_868d1b38-c4b0-49a0-8f18-4e56b07576e5 $ pfexec ./target/release/omicron-package -t centzon uninstall Logging to: /home/coatlicue/src/omicron/out/LOG About to delete the following datasets: [ "oxi_16193889-6446-469f-bd2b-35b1206dc1d7/cluster", "oxi_16193889-6446-469f-bd2b-35b1206dc1d7/config", "oxi_16193889-6446-469f-bd2b-35b1206dc1d7/debug", "oxi_16193889-6446-469f-bd2b-35b1206dc1d7/install", "oxi_7c6f7791-992e-4d87-b26f-4f76602820c8/cluster", "oxi_7c6f7791-992e-4d87-b26f-4f76602820c8/config", "oxi_7c6f7791-992e-4d87-b26f-4f76602820c8/debug", "oxi_7c6f7791-992e-4d87-b26f-4f76602820c8/install", "oxp_1637b74e-5bd3-45c1-8a85-71eb9b8c7158/crucible", "oxp_1637b74e-5bd3-45c1-8a85-71eb9b8c7158/crypt", "oxp_44abd0b7-0592-4406-8e3a-a70ce55f9500/crucible", "oxp_44abd0b7-0592-4406-8e3a-a70ce55f9500/crypt", "oxp_6879b1df-cfdd-4f99-b58c-32e04066c242/crucible", "oxp_6879b1df-cfdd-4f99-b58c-32e04066c242/crypt", "oxp_9deecf63-7fe1-46b3-9cfb-9bc2689345be/crucible", "oxp_9deecf63-7fe1-46b3-9cfb-9bc2689345be/crypt", "oxp_b6c2eae1-9a71-4d3d-953b-41e3f6bfb0c2/crucible", "oxp_b6c2eae1-9a71-4d3d-953b-41e3f6bfb0c2/crypt", "oxp_b9910049-d8fb-41f1-9ec7-b07723fce648/crucible", "oxp_b9910049-d8fb-41f1-9ec7-b07723fce648/crypt", "oxp_deacd5ab-ff26-4728-bcf7-bdb4ab7cf937/crucible", "oxp_deacd5ab-ff26-4728-bcf7-bdb4ab7cf937/crypt", "oxp_df79a8ac-d4f9-47f9-98a0-e7ec958c305b/crucible", "oxp_df79a8ac-d4f9-47f9-98a0-e7ec958c305b/crypt", "oxp_ee7b3860-8fd7-44ac-a7f8-0ee2e9b9432b/crucible", "oxp_ee7b3860-8fd7-44ac-a7f8-0ee2e9b9432b/crypt", "rpool/zone/oxz_switch", ] [yY to confirm] >> y $ cargo xtask virtual-hardware destroy Finished dev [unoptimized + debuginfo] target(s) in 0.82s Running `target/debug/xtask virtual-hardware destroy` destroying virtual hardware Disabling svc:/system/fmd:default unmounting: oxi_7c6f7791-992e-4d87-b26f-4f76602820c8/backing/fmd Re-enabling svc:/system/fmd:default unloading xde driver ensuring softnpu zone destroyed destroying: oxi_16193889-6446-469f-bd2b-35b1206dc1d7 destroyed: oxi_16193889-6446-469f-bd2b-35b1206dc1d7 destroying: oxi_7c6f7791-992e-4d87-b26f-4f76602820c8 destroyed: oxi_7c6f7791-992e-4d87-b26f-4f76602820c8 destroying: oxp_1637b74e-5bd3-45c1-8a85-71eb9b8c7158 destroyed: oxp_1637b74e-5bd3-45c1-8a85-71eb9b8c7158 destroying: oxp_44abd0b7-0592-4406-8e3a-a70ce55f9500 destroyed: oxp_44abd0b7-0592-4406-8e3a-a70ce55f9500 destroying: oxp_6879b1df-cfdd-4f99-b58c-32e04066c242 destroyed: oxp_6879b1df-cfdd-4f99-b58c-32e04066c242 destroying: oxp_9deecf63-7fe1-46b3-9cfb-9bc2689345be destroyed: oxp_9deecf63-7fe1-46b3-9cfb-9bc2689345be destroying: oxp_b6c2eae1-9a71-4d3d-953b-41e3f6bfb0c2 destroyed: oxp_b6c2eae1-9a71-4d3d-953b-41e3f6bfb0c2 destroying: oxp_b9910049-d8fb-41f1-9ec7-b07723fce648 destroyed: oxp_b9910049-d8fb-41f1-9ec7-b07723fce648 destroying: oxp_deacd5ab-ff26-4728-bcf7-bdb4ab7cf937 destroyed: oxp_deacd5ab-ff26-4728-bcf7-bdb4ab7cf937 destroying: oxp_df79a8ac-d4f9-47f9-98a0-e7ec958c305b destroyed: oxp_df79a8ac-d4f9-47f9-98a0-e7ec958c305b destroying: oxp_ee7b3860-8fd7-44ac-a7f8-0ee2e9b9432b destroyed: oxp_ee7b3860-8fd7-44ac-a7f8-0ee2e9b9432b deleted /var/tmp/m2_0.vdev deleted /var/tmp/m2_1.vdev deleted /var/tmp/u2_0.vdev deleted /var/tmp/u2_1.vdev deleted /var/tmp/u2_2.vdev deleted /var/tmp/u2_3.vdev deleted /var/tmp/u2_4.vdev deleted /var/tmp/u2_5.vdev deleted /var/tmp/u2_6.vdev deleted /var/tmp/u2_7.vdev deleted /var/tmp/u2_8.vdev destroyed virtual hardware ``` Closes https://github.com/oxidecomputer/omicron/issues/5559 --- dev-tools/xtask/src/virtual_hardware.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev-tools/xtask/src/virtual_hardware.rs b/dev-tools/xtask/src/virtual_hardware.rs index c98d350c73..95190ebfde 100644 --- a/dev-tools/xtask/src/virtual_hardware.rs +++ b/dev-tools/xtask/src/virtual_hardware.rs @@ -210,7 +210,7 @@ fn demount_backingfs() -> Result<()> { const BACKED_SERVICES: &str = "svc:/system/fmd:default"; println!("Disabling {BACKED_SERVICES}"); svcadm_temporary_toggle(BACKED_SERVICES, false)?; - for dataset in zfs_list_internal("yes", "noauto")? { + for dataset in zfs_list_internal("noauto", "yes")? { println!("unmounting: {dataset}"); zfs_umount(&dataset)?; } From 287eee0537859dc1fa3bf8aa0fc89d4de35114ee Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Fri, 19 Apr 2024 08:46:07 -0400 Subject: [PATCH 169/334] Reframe Omicron zone external IP creation in terms of `OmicronZoneExternalIp` (#5560) This is PR 1 of N (probably 3?) working toward getting blueprints to store an `OmicronZoneExternalIp` (which includes both the ID and the snat cfg, if applicable) instead of only storing a raw `IpAddr`. It adjusts the database query function(s) that create omicron zone IPs to take an `OmicronZoneExternalIp` instead of the list of arguments they took previously. The changes here include: * `IncompleteExternalIp::for_service_explicit{,_snat}` have been replaced by `IncompleteExternalIp::for_omicron_zone`. It now detects based on the `OmicronZoneExternalIpKind` whether it's floating or SNAT. * `DataStore::external_ip_allocate_service_explicit{,_snat}` have been replaced by `DataStore::external_ip_allocate_omicron_zone`. * `DataStore::external_ip_allocate_service{,_snat}` have been removed. These allowed allocating an IP without specifying what the IP was (i.e., by taking a free IP from the pool, if one is available). We had no non-test callers of these, and with the blueprint system wanting to make planning decisions that assign IPs without committing to anything in CRDB, I don't think we expect to have any callers of this in the future (since we'll always want to try to allocate a specific IP). * `IncompleteExternalIp::for_service{,_snat}` are gone; same reason as above. * `SourceNatConfig` now validates that the port pair it's given (upon creation or deserialization) are aligned correctly; previously this check was deferred to when we inserted the IP in the database, and the check was an `assert!`. This is now a runtime error instead. This is more of a drive-by-cleanup than strictly required, but I didn't like how many places we could be passing around a potentially-invalid snat config. * Populated the Omicron zone NICs when constructing `PlanningInput`. We were already fetching the nic rows from the DB, but forgot to actually add them. This is also more of a drive-by-fix than strictly required, but we'd need it soon anyway. --- Cargo.lock | 2 + clients/nexus-client/src/lib.rs | 3 +- common/src/api/internal/shared.rs | 94 +++- dev-tools/reconfigurator-cli/src/main.rs | 7 +- illumos-utils/src/opte/port_manager.rs | 4 +- nexus/db-model/Cargo.toml | 2 + nexus/db-model/src/external_ip.rs | 199 ++++----- nexus/db-model/src/network_interface.rs | 68 ++- nexus/db-model/src/omicron_zone_config.rs | 16 +- .../src/db/datastore/external_ip.rs | 136 ++---- nexus/db-queries/src/db/datastore/rack.rs | 83 ++-- .../db-queries/src/db/queries/external_ip.rs | 403 ++++-------------- .../execution/src/resource_allocation.rs | 259 +++++------ nexus/reconfigurator/planning/src/example.rs | 7 +- nexus/reconfigurator/preparation/src/lib.rs | 42 ++ .../app/background/sync_service_zone_nat.rs | 6 +- nexus/src/app/instance.rs | 7 +- nexus/types/src/deployment.rs | 1 + nexus/types/src/deployment/planning_input.rs | 52 ++- sled-agent/src/instance.rs | 11 +- sled-agent/src/rack_setup/plan/service.rs | 10 +- sled-agent/src/services.rs | 5 +- 22 files changed, 672 insertions(+), 745 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 8b72b1e179..07d94add14 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4554,6 +4554,8 @@ dependencies = [ "serde", "serde_json", "sled-agent-client", + "slog", + "slog-error-chain", "steno", "strum", "thiserror", diff --git a/clients/nexus-client/src/lib.rs b/clients/nexus-client/src/lib.rs index cd04b8233f..685c83f80c 100644 --- a/clients/nexus-client/src/lib.rs +++ b/clients/nexus-client/src/lib.rs @@ -290,7 +290,8 @@ impl From<&omicron_common::api::internal::shared::SourceNatConfig> fn from( r: &omicron_common::api::internal::shared::SourceNatConfig, ) -> Self { - Self { ip: r.ip, first_port: r.first_port, last_port: r.last_port } + let (first_port, last_port) = r.port_range_raw(); + Self { ip: r.ip, first_port, last_port } } } diff --git a/common/src/api/internal/shared.rs b/common/src/api/internal/shared.rs index c123e1f9c8..bc7a2d76ba 100644 --- a/common/src/api/internal/shared.rs +++ b/common/src/api/internal/shared.rs @@ -4,7 +4,10 @@ //! Types shared between Nexus and Sled Agent. -use crate::api::external::{self, BfdMode, Name}; +use crate::{ + address::NUM_SOURCE_NAT_PORTS, + api::external::{self, BfdMode, Name}, +}; use ipnetwork::{IpNetwork, Ipv4Network, Ipv6Network}; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; @@ -58,16 +61,95 @@ pub struct NetworkInterface { /// An IP address and port range used for source NAT, i.e., making /// outbound network connections from guests or services. -#[derive( - Debug, Clone, Copy, Deserialize, Serialize, JsonSchema, PartialEq, Eq, Hash, -)] +// Note that `Deserialize` is manually implemented; if you make any changes to +// the fields of this structure, you must make them to that implementation too. +#[derive(Debug, Clone, Copy, Serialize, JsonSchema, PartialEq, Eq, Hash)] pub struct SourceNatConfig { /// The external address provided to the instance or service. pub ip: IpAddr, /// The first port used for source NAT, inclusive. - pub first_port: u16, + first_port: u16, /// The last port used for source NAT, also inclusive. - pub last_port: u16, + last_port: u16, +} + +// We implement `Deserialize` manually to add validity checking on the port +// range. +impl<'de> Deserialize<'de> for SourceNatConfig { + fn deserialize(deserializer: D) -> Result + where + D: serde::Deserializer<'de>, + { + use serde::de::Error; + + // The fields of `SourceNatConfigShadow` should exactly match the fields + // of `SourceNatConfig`. We're not really using serde's remote derive, + // but by adding the attribute we get compile-time checking that all the + // field names and types match. (It doesn't check the _order_, but that + // should be fine as long as we're using JSON or similar formats.) + #[derive(Deserialize)] + #[serde(remote = "SourceNatConfig")] + struct SourceNatConfigShadow { + ip: IpAddr, + first_port: u16, + last_port: u16, + } + + let shadow = SourceNatConfigShadow::deserialize(deserializer)?; + SourceNatConfig::new(shadow.ip, shadow.first_port, shadow.last_port) + .map_err(D::Error::custom) + } +} + +impl SourceNatConfig { + /// Construct a `SourceNatConfig` with the given port range, both inclusive. + /// + /// # Errors + /// + /// Fails if `(first_port, last_port)` is not aligned to + /// [`NUM_SOURCE_NAT_PORTS`]. + pub fn new( + ip: IpAddr, + first_port: u16, + last_port: u16, + ) -> Result { + if first_port % NUM_SOURCE_NAT_PORTS == 0 + && last_port + .checked_sub(first_port) + .and_then(|diff| diff.checked_add(1)) + == Some(NUM_SOURCE_NAT_PORTS) + { + Ok(Self { ip, first_port, last_port }) + } else { + Err(SourceNatConfigError::UnalignedPortPair { + first_port, + last_port, + }) + } + } + + /// Get the port range. + /// + /// Guaranteed to be aligned to [`NUM_SOURCE_NAT_PORTS`]. + pub fn port_range(&self) -> std::ops::RangeInclusive { + self.first_port..=self.last_port + } + + /// Get the port range as a raw tuple; both values are inclusive. + /// + /// Guaranteed to be aligned to [`NUM_SOURCE_NAT_PORTS`]. + pub fn port_range_raw(&self) -> (u16, u16) { + self.port_range().into_inner() + } +} + +#[derive(Debug, thiserror::Error)] +pub enum SourceNatConfigError { + #[error( + "snat port range is not aligned to {NUM_SOURCE_NAT_PORTS}: \ + ({first_port}, {last_port})" + )] + UnalignedPortPair { first_port: u16, last_port: u16 }, } // We alias [`RackNetworkConfig`] to the current version of the protocol, so diff --git a/dev-tools/reconfigurator-cli/src/main.rs b/dev-tools/reconfigurator-cli/src/main.rs index abf8cf4441..58d310f56e 100644 --- a/dev-tools/reconfigurator-cli/src/main.rs +++ b/dev-tools/reconfigurator-cli/src/main.rs @@ -22,6 +22,7 @@ use nexus_reconfigurator_planning::system::{ }; use nexus_types::deployment::BlueprintZoneFilter; use nexus_types::deployment::OmicronZoneExternalIp; +use nexus_types::deployment::OmicronZoneExternalIpKind; use nexus_types::deployment::OmicronZoneNic; use nexus_types::deployment::PlanningInput; use nexus_types::deployment::SledFilter; @@ -158,7 +159,9 @@ impl ReconfiguratorSim { .borrow_mut() .entry(ip) .or_insert_with(ExternalIpUuid::new_v4), - ip, + // TODO-cleanup This is potentially wrong; + // zone_type should tell us the IP kind. + kind: OmicronZoneExternalIpKind::Floating(ip), }; builder .add_omicron_zone_external_ip(zone.id, external_ip) @@ -168,7 +171,7 @@ impl ReconfiguratorSim { let nic = OmicronZoneNic { id: nic.id, mac: nic.mac, - ip: nic.ip.into(), + ip: nic.ip, slot: nic.slot, primary: nic.primary, }; diff --git a/illumos-utils/src/opte/port_manager.rs b/illumos-utils/src/opte/port_manager.rs index 2b2f622070..03c51c321d 100644 --- a/illumos-utils/src/opte/port_manager.rs +++ b/illumos-utils/src/opte/port_manager.rs @@ -141,7 +141,7 @@ impl PortManager { ); return Err(Error::InvalidPortIpConfig); }; - let ports = snat.first_port..=snat.last_port; + let ports = snat.port_range(); Some($snat_t { external_ip: snat_ip.into(), ports }) } None => None, @@ -428,7 +428,7 @@ impl PortManager { ); return Err(Error::InvalidPortIpConfig); }; - let ports = snat.first_port..=snat.last_port; + let ports = snat.port_range(); Some($snat_t { external_ip: snat_ip.into(), ports }) } None => None, diff --git a/nexus/db-model/Cargo.toml b/nexus/db-model/Cargo.toml index 45a086a5b3..bfe75377c5 100644 --- a/nexus/db-model/Cargo.toml +++ b/nexus/db-model/Cargo.toml @@ -28,6 +28,8 @@ schemars = { workspace = true, features = ["chrono", "uuid1"] } semver.workspace = true serde.workspace = true serde_json.workspace = true +slog.workspace = true +slog-error-chain.workspace = true steno.workspace = true strum.workspace = true thiserror.workspace = true diff --git a/nexus/db-model/src/external_ip.rs b/nexus/db-model/src/external_ip.rs index 93af08fdee..5031b12546 100644 --- a/nexus/db-model/src/external_ip.rs +++ b/nexus/db-model/src/external_ip.rs @@ -9,6 +9,7 @@ use crate::impl_enum_type; use crate::schema::external_ip; use crate::schema::floating_ip; use crate::Name; +use crate::ServiceNetworkInterface; use crate::SqlU16; use chrono::DateTime; use chrono::Utc; @@ -16,17 +17,24 @@ use db_macros::Resource; use diesel::Queryable; use diesel::Selectable; use ipnetwork::IpNetwork; +use nexus_types::deployment::OmicronZoneExternalIp; +use nexus_types::deployment::OmicronZoneExternalIpKind; use nexus_types::external_api::params; use nexus_types::external_api::shared; use nexus_types::external_api::views; -use omicron_common::address::NUM_SOURCE_NAT_PORTS; +use nexus_types::inventory::SourceNatConfig; use omicron_common::api::external::Error; use omicron_common::api::external::IdentityMetadata; +use omicron_common::api::internal::shared::SourceNatConfigError; +use omicron_uuid_kinds::ExternalIpUuid; use omicron_uuid_kinds::GenericUuid; +use omicron_uuid_kinds::OmicronZoneUuid; use schemars::JsonSchema; use serde::Deserialize; use serde::Serialize; use sled_agent_client::types::InstanceExternalIpBody; +use sled_agent_client::ZoneKind; +use slog_error_chain::SlogInlineError; use std::convert::TryFrom; use std::net::IpAddr; use uuid::Uuid; @@ -131,6 +139,46 @@ pub struct ExternalIp { pub is_probe: bool, } +#[derive(Debug, thiserror::Error, SlogInlineError)] +pub enum OmicronZoneExternalIpError { + #[error("database IP is for an instance")] + IpIsForInstance, + #[error("invalid SNAT configuration")] + InvalidSnatConfig(#[from] SourceNatConfigError), + #[error( + "database IP is ephemeral; currently unsupported for Omicron zones" + )] + EphemeralIp, +} + +impl TryFrom<&'_ ExternalIp> for OmicronZoneExternalIp { + type Error = OmicronZoneExternalIpError; + + fn try_from(row: &ExternalIp) -> Result { + if !row.is_service { + return Err(OmicronZoneExternalIpError::IpIsForInstance); + } + + let kind = match row.kind { + IpKind::SNat => { + OmicronZoneExternalIpKind::Snat(SourceNatConfig::new( + row.ip.ip(), + row.first_port.0, + row.last_port.0, + )?) + } + IpKind::Floating => { + OmicronZoneExternalIpKind::Floating(row.ip.ip()) + } + IpKind::Ephemeral => { + return Err(OmicronZoneExternalIpError::EphemeralIp) + } + }; + + Ok(Self { id: ExternalIpUuid::from_untyped_uuid(row.id), kind }) + } +} + /// A view type constructed from `ExternalIp` used to represent Floating IP /// objects in user-facing APIs. /// @@ -153,15 +201,13 @@ pub struct FloatingIp { pub project_id: Uuid, } -impl From +impl TryFrom for omicron_common::api::internal::shared::SourceNatConfig { - fn from(eip: ExternalIp) -> Self { - Self { - ip: eip.ip.ip(), - first_port: eip.first_port.0, - last_port: eip.last_port.0, - } + type Error = SourceNatConfigError; + + fn try_from(eip: ExternalIp) -> Result { + Self::new(eip.ip.ip(), eip.first_port.0, eip.last_port.0) } } @@ -303,104 +349,65 @@ impl IncompleteExternalIp { } } - pub fn for_service_explicit( - id: Uuid, - name: &Name, - description: &str, - service_id: Uuid, - pool_id: Uuid, - address: IpAddr, - ) -> Self { - Self { - id, - name: Some(name.clone()), - description: Some(description.to_string()), - time_created: Utc::now(), - kind: IpKind::Floating, - is_service: true, - is_probe: false, - parent_id: Some(service_id), - pool_id, - project_id: None, - explicit_ip: Some(IpNetwork::from(address)), - explicit_port_range: None, - state: IpAttachState::Attached, - } - } - - pub fn for_service_explicit_snat( - id: Uuid, - service_id: Uuid, + pub fn for_omicron_zone( pool_id: Uuid, - address: IpAddr, - (first_port, last_port): (u16, u16), + external_ip: OmicronZoneExternalIp, + zone_id: OmicronZoneUuid, + zone_kind: ZoneKind, ) -> Self { - assert!( - (first_port % NUM_SOURCE_NAT_PORTS == 0) - && (last_port - first_port + 1) == NUM_SOURCE_NAT_PORTS, - "explicit port range must be aligned to {}", - NUM_SOURCE_NAT_PORTS, - ); - let explicit_port_range = Some((first_port.into(), last_port.into())); - let kind = IpKind::SNat; - Self { - id, - name: None, - description: None, - time_created: Utc::now(), - kind, - is_service: true, - is_probe: false, - parent_id: Some(service_id), - pool_id, - project_id: None, - explicit_ip: Some(IpNetwork::from(address)), - explicit_port_range, - state: kind.initial_state(), - } - } - - pub fn for_service( - id: Uuid, - name: &Name, - description: &str, - service_id: Uuid, - pool_id: Uuid, - ) -> Self { - let kind = IpKind::Floating; - Self { - id, - name: Some(name.clone()), - description: Some(description.to_string()), - time_created: Utc::now(), - kind, - is_service: true, - is_probe: false, - parent_id: Some(service_id), - pool_id, - project_id: None, - explicit_ip: None, - explicit_port_range: None, - state: IpAttachState::Attached, - } - } + let (kind, ip, port_range, name, description, state) = match external_ip + .kind + { + OmicronZoneExternalIpKind::Floating(ip) => { + // We'll name this external IP the same as we'll name the NIC + // associated with this zone. + let name = ServiceNetworkInterface::name(zone_id, zone_kind); + + // Using `IpAttachState::Attached` preserves existing behavior, + // `IpKind::Floating.initial_state()` is `::Detached`. If/when + // we do more to unify IPs between services and instances, this + // probably needs to be addressed. + let state = IpAttachState::Attached; + + ( + IpKind::Floating, + ip, + None, + Some(name), + Some(zone_kind.to_string()), + state, + ) + } + OmicronZoneExternalIpKind::Snat(snat_cfg) => { + let (first_port, last_port) = snat_cfg.port_range_raw(); + let kind = IpKind::SNat; + ( + kind, + snat_cfg.ip, + Some((first_port.into(), last_port.into())), + // Only floating IPs are allowed to have names and + // descriptions. + None, + None, + kind.initial_state(), + ) + } + }; - pub fn for_service_snat(id: Uuid, service_id: Uuid, pool_id: Uuid) -> Self { - let kind = IpKind::SNat; Self { - id, - name: None, - description: None, + id: external_ip.id.into_untyped_uuid(), + name, + description, time_created: Utc::now(), kind, is_service: true, is_probe: false, - parent_id: Some(service_id), + parent_id: Some(zone_id.into_untyped_uuid()), pool_id, project_id: None, - explicit_ip: None, - explicit_port_range: None, - state: kind.initial_state(), + explicit_ip: Some(IpNetwork::from(ip)), + explicit_port_range: port_range, + state, } } diff --git a/nexus/db-model/src/network_interface.rs b/nexus/db-model/src/network_interface.rs index 108232275d..ff774699d6 100644 --- a/nexus/db-model/src/network_interface.rs +++ b/nexus/db-model/src/network_interface.rs @@ -13,9 +13,12 @@ use chrono::DateTime; use chrono::Utc; use db_macros::Resource; use diesel::AsChangeset; +use ipnetwork::NetworkSize; use nexus_types::external_api::params; use nexus_types::identity::Resource; use omicron_common::api::{external, internal}; +use omicron_uuid_kinds::OmicronZoneUuid; +use sled_agent_client::ZoneKind; use uuid::Uuid; /// The max number of interfaces that may be associated with a resource, @@ -146,15 +149,70 @@ pub struct ServiceNetworkInterface { pub primary: bool, } -impl From for nexus_types::deployment::OmicronZoneNic { - fn from(nic: ServiceNetworkInterface) -> Self { - Self { +impl ServiceNetworkInterface { + /// Generate a suitable [`Name`] for the given Omicron zone ID and kind. + pub fn name(zone_id: OmicronZoneUuid, zone_kind: ZoneKind) -> Name { + // Ideally we'd use `zone_kind.to_string()` here, but that uses + // underscores as separators which aren't allowed in `Name`s. We also + // preserve some existing naming behavior where NTP external networking + // is just called "ntp", not "boundary-ntp". + // + // Most of these zone kinds do not get external networking and therefore + // we don't need to be able to generate names for them, but it's simpler + // to give them valid descriptions than worry about error handling here. + let prefix = match zone_kind { + ZoneKind::BoundaryNtp | ZoneKind::InternalNtp => "ntp", + ZoneKind::Clickhouse => "clickhouse", + ZoneKind::ClickhouseKeeper => "clickhouse-keeper", + ZoneKind::CockroachDb => "cockroach", + ZoneKind::Crucible => "crucible", + ZoneKind::CruciblePantry => "crucible-pantry", + ZoneKind::ExternalDns => "external-dns", + ZoneKind::InternalDns => "internal-dns", + ZoneKind::Nexus => "nexus", + ZoneKind::Oximeter => "oximeter", + }; + + // Now that we have a valid prefix, we know this format string + // always produces a valid `Name`, so we'll unwrap here. + let name = format!("{prefix}-{zone_id}") + .parse() + .expect("valid name failed to parse"); + + Name(name) + } +} + +#[derive(Debug, thiserror::Error)] +#[error("Service NIC {nic_id} has a range of IPs ({ip}); only a single IP is supported")] +pub struct ServiceNicNotSingleIpError { + pub nic_id: Uuid, + pub ip: ipnetwork::IpNetwork, +} + +impl TryFrom<&'_ ServiceNetworkInterface> + for nexus_types::deployment::OmicronZoneNic +{ + type Error = ServiceNicNotSingleIpError; + + fn try_from(nic: &ServiceNetworkInterface) -> Result { + let size = match nic.ip.size() { + NetworkSize::V4(n) => u128::from(n), + NetworkSize::V6(n) => n, + }; + if size != 1 { + return Err(ServiceNicNotSingleIpError { + nic_id: nic.id(), + ip: nic.ip, + }); + } + Ok(Self { id: nic.id(), mac: *nic.mac, - ip: nic.ip, + ip: nic.ip.ip(), slot: *nic.slot, primary: nic.primary, - } + }) } } diff --git a/nexus/db-model/src/omicron_zone_config.rs b/nexus/db-model/src/omicron_zone_config.rs index 1310d553d2..f6d272a1cd 100644 --- a/nexus/db-model/src/omicron_zone_config.rs +++ b/nexus/db-model/src/omicron_zone_config.rs @@ -81,12 +81,13 @@ impl OmicronZone { nic, snat_cfg, } => { + let (first_port, last_port) = snat_cfg.port_range_raw(); ntp_ntp_servers = Some(ntp_servers.clone()); ntp_dns_servers = Some(dns_servers.clone()); ntp_ntp_domain = domain.clone(); snat_ip = Some(IpNetwork::from(snat_cfg.ip)); - snat_first_port = Some(SqlU16::from(snat_cfg.first_port)); - snat_last_port = Some(SqlU16::from(snat_cfg.last_port)); + snat_first_port = Some(SqlU16::from(first_port)); + snat_last_port = Some(SqlU16::from(last_port)); nic_id = Some(nic.id); (ZoneType::BoundaryNtp, address, None) } @@ -304,11 +305,12 @@ impl OmicronZone { self.snat_last_port, ) { (Some(ip), Some(first_port), Some(last_port)) => { - nexus_types::inventory::SourceNatConfig { - ip: ip.ip(), - first_port: *first_port, - last_port: *last_port, - } + nexus_types::inventory::SourceNatConfig::new( + ip.ip(), + *first_port, + *last_port, + ) + .context("bad SNAT config for boundary NTP")? } _ => bail!( "expected non-NULL snat properties, \ diff --git a/nexus/db-queries/src/db/datastore/external_ip.rs b/nexus/db-queries/src/db/datastore/external_ip.rs index 28fc5de884..8c54ccd27d 100644 --- a/nexus/db-queries/src/db/datastore/external_ip.rs +++ b/nexus/db-queries/src/db/datastore/external_ip.rs @@ -40,6 +40,7 @@ use diesel::prelude::*; use nexus_db_model::FloatingIpUpdate; use nexus_db_model::Instance; use nexus_db_model::IpAttachState; +use nexus_types::deployment::OmicronZoneExternalIp; use nexus_types::identity::Resource; use omicron_common::api::external::http_pagination::PaginatedBy; use omicron_common::api::external::CreateResult; @@ -52,7 +53,9 @@ use omicron_common::api::external::LookupResult; use omicron_common::api::external::NameOrId; use omicron_common::api::external::ResourceType; use omicron_common::api::external::UpdateResult; +use omicron_uuid_kinds::OmicronZoneUuid; use ref_cast::RefCast; +use sled_agent_client::ZoneKind; use std::net::IpAddr; use uuid::Uuid; @@ -225,44 +228,6 @@ impl DataStore { .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) } - /// Allocates an IP address for internal service usage. - pub async fn external_ip_allocate_service( - &self, - opctx: &OpContext, - ip_id: Uuid, - name: &Name, - description: &str, - service_id: Uuid, - ) -> CreateResult { - let (.., pool) = self.ip_pools_service_lookup(opctx).await?; - - let data = IncompleteExternalIp::for_service( - ip_id, - name, - description, - service_id, - pool.id(), - ); - self.allocate_external_ip(opctx, data).await - } - - /// Allocates an SNAT IP address for internal service usage. - pub async fn external_ip_allocate_service_snat( - &self, - opctx: &OpContext, - ip_id: Uuid, - service_id: Uuid, - ) -> CreateResult { - let (.., pool) = self.ip_pools_service_lookup(opctx).await?; - - let data = IncompleteExternalIp::for_service_snat( - ip_id, - service_id, - pool.id(), - ); - self.allocate_external_ip(opctx, data).await - } - /// Allocates a floating IP address for instance usage. pub async fn allocate_floating_ip( &self, @@ -383,52 +348,21 @@ impl DataStore { }) } - /// Allocates an explicit Floating IP address for an internal service. - /// - /// Unlike the other IP allocation requests, this does not search for an - /// available IP address, it asks for one explicitly. - pub async fn external_ip_allocate_service_explicit( + /// Allocates an explicit IP address for an Omicron zone. + pub async fn external_ip_allocate_omicron_zone( &self, opctx: &OpContext, - ip_id: Uuid, - name: &Name, - description: &str, - service_id: Uuid, - ip: IpAddr, + zone_id: OmicronZoneUuid, + zone_kind: ZoneKind, + external_ip: OmicronZoneExternalIp, ) -> CreateResult { let (authz_pool, pool) = self.ip_pools_service_lookup(opctx).await?; opctx.authorize(authz::Action::CreateChild, &authz_pool).await?; - let data = IncompleteExternalIp::for_service_explicit( - ip_id, - name, - description, - service_id, + let data = IncompleteExternalIp::for_omicron_zone( pool.id(), - ip, - ); - self.allocate_external_ip(opctx, data).await - } - - /// Allocates an explicit SNAT IP address for an internal service. - /// - /// Unlike the other IP allocation requests, this does not search for an - /// available IP address, it asks for one explicitly. - pub async fn external_ip_allocate_service_explicit_snat( - &self, - opctx: &OpContext, - ip_id: Uuid, - service_id: Uuid, - ip: IpAddr, - port_range: (u16, u16), - ) -> CreateResult { - let (authz_pool, pool) = self.ip_pools_service_lookup(opctx).await?; - opctx.authorize(authz::Action::CreateChild, &authz_pool).await?; - let data = IncompleteExternalIp::for_service_explicit_snat( - ip_id, - service_id, - pool.id(), - ip, - port_range, + external_ip, + zone_id, + zone_kind, ); self.allocate_external_ip(opctx, data).await } @@ -1216,9 +1150,12 @@ mod tests { use super::*; use crate::db::datastore::test_utils::datastore_test; use nexus_test_utils::db::test_setup_database; + use nexus_types::deployment::OmicronZoneExternalIpKind; use nexus_types::external_api::shared::IpRange; + use nexus_types::inventory::SourceNatConfig; use omicron_common::address::NUM_SOURCE_NAT_PORTS; use omicron_test_utils::dev; + use omicron_uuid_kinds::ExternalIpUuid; use std::collections::BTreeSet; use std::net::Ipv4Addr; @@ -1267,32 +1204,27 @@ mod tests { // Allocate a bunch of fake service IPs. let mut external_ips = Vec::new(); let mut allocate_snat = false; // flip-flop between regular and snat - for (i, ip) in ip_range.iter().enumerate() { - let name = format!("service-ip-{i}"); - let external_ip = if allocate_snat { - datastore - .external_ip_allocate_service_explicit_snat( - &opctx, - Uuid::new_v4(), - Uuid::new_v4(), - ip, - (0, NUM_SOURCE_NAT_PORTS - 1), - ) - .await - .expect("failed to allocate service IP") + for ip in ip_range.iter() { + let external_ip_kind = if allocate_snat { + OmicronZoneExternalIpKind::Snat( + SourceNatConfig::new(ip, 0, NUM_SOURCE_NAT_PORTS - 1) + .unwrap(), + ) } else { - datastore - .external_ip_allocate_service_explicit( - &opctx, - Uuid::new_v4(), - &Name(name.parse().unwrap()), - &name, - Uuid::new_v4(), - ip, - ) - .await - .expect("failed to allocate service IP") + OmicronZoneExternalIpKind::Floating(ip) }; + let external_ip = datastore + .external_ip_allocate_omicron_zone( + &opctx, + OmicronZoneUuid::new_v4(), + ZoneKind::Nexus, + OmicronZoneExternalIp { + id: ExternalIpUuid::new_v4(), + kind: external_ip_kind, + }, + ) + .await + .expect("failed to allocate service IP"); external_ips.push(external_ip); allocate_snat = !allocate_snat; } diff --git a/nexus/db-queries/src/db/datastore/rack.rs b/nexus/db-queries/src/db/datastore/rack.rs index 225499c0bf..45793d26f7 100644 --- a/nexus/db-queries/src/db/datastore/rack.rs +++ b/nexus/db-queries/src/db/datastore/rack.rs @@ -47,6 +47,8 @@ use nexus_types::deployment::BlueprintTarget; use nexus_types::deployment::BlueprintZoneConfig; use nexus_types::deployment::BlueprintZoneFilter; use nexus_types::deployment::BlueprintZoneType; +use nexus_types::deployment::OmicronZoneExternalIp; +use nexus_types::deployment::OmicronZoneExternalIpKind; use nexus_types::external_api::params as external_params; use nexus_types::external_api::shared; use nexus_types::external_api::shared::IdentityType; @@ -61,6 +63,7 @@ use omicron_common::api::external::LookupType; use omicron_common::api::external::ResourceType; use omicron_common::api::external::UpdateResult; use omicron_common::bail_unless; +use omicron_uuid_kinds::ExternalIpUuid; use omicron_uuid_kinds::GenericUuid; use slog_error_chain::InlineErrorChain; use std::sync::{Arc, OnceLock}; @@ -475,70 +478,62 @@ impl DataStore { BlueprintZoneType::ExternalDns( blueprint_zone_type::ExternalDns { nic, dns_address, .. }, ) => { - let external_ip = dns_address.ip(); - let service_kind = format!("{}", zone_type.kind()); - let db_ip = IncompleteExternalIp::for_service_explicit( - Uuid::new_v4(), - &db::model::Name(nic.name.clone()), - &service_kind, - zone_config.id.into_untyped_uuid(), - service_pool.id(), - external_ip, - ); + let external_ip = OmicronZoneExternalIp { + id: ExternalIpUuid::new_v4(), + kind: OmicronZoneExternalIpKind::Floating(dns_address.ip()), + }; let db_nic = IncompleteNetworkInterface::new_service( nic.id, zone_config.id.into_untyped_uuid(), DNS_VPC_SUBNET.clone(), IdentityMetadataCreateParams { name: nic.name.clone(), - description: format!("{service_kind} service vNIC"), + description: format!( + "{} service vNIC", + zone_type.kind() + ), }, nic.ip, nic.mac, nic.slot, ) .map_err(|e| RackInitError::AddingNic(e))?; - Some((db_ip, db_nic)) + Some((external_ip, db_nic)) } BlueprintZoneType::Nexus(blueprint_zone_type::Nexus { nic, external_ip, .. }) => { - let service_kind = format!("{}", zone_type.kind()); - let db_ip = IncompleteExternalIp::for_service_explicit( - Uuid::new_v4(), - &db::model::Name(nic.name.clone()), - &service_kind, - zone_config.id.into_untyped_uuid(), - service_pool.id(), - *external_ip, - ); + let external_ip = OmicronZoneExternalIp { + id: ExternalIpUuid::new_v4(), + kind: OmicronZoneExternalIpKind::Floating(*external_ip), + }; let db_nic = IncompleteNetworkInterface::new_service( nic.id, zone_config.id.into_untyped_uuid(), NEXUS_VPC_SUBNET.clone(), IdentityMetadataCreateParams { name: nic.name.clone(), - description: format!("{service_kind} service vNIC"), + description: format!( + "{} service vNIC", + zone_type.kind() + ), }, nic.ip, nic.mac, nic.slot, ) .map_err(|e| RackInitError::AddingNic(e))?; - Some((db_ip, db_nic)) + Some((external_ip, db_nic)) } BlueprintZoneType::BoundaryNtp( blueprint_zone_type::BoundaryNtp { snat_cfg, nic, .. }, ) => { - let db_ip = IncompleteExternalIp::for_service_explicit_snat( - Uuid::new_v4(), - zone_config.id.into_untyped_uuid(), - service_pool.id(), - snat_cfg.ip, - (snat_cfg.first_port, snat_cfg.last_port), - ); + let external_ip = OmicronZoneExternalIp { + id: ExternalIpUuid::new_v4(), + kind: OmicronZoneExternalIpKind::Snat(*snat_cfg), + }; let db_nic = IncompleteNetworkInterface::new_service( nic.id, zone_config.id.into_untyped_uuid(), @@ -555,7 +550,7 @@ impl DataStore { nic.slot, ) .map_err(|e| RackInitError::AddingNic(e))?; - Some((db_ip, db_nic)) + Some((external_ip, db_nic)) } BlueprintZoneType::InternalNtp(_) | BlueprintZoneType::Clickhouse(_) @@ -566,7 +561,7 @@ impl DataStore { | BlueprintZoneType::InternalDns(_) | BlueprintZoneType::Oximeter(_) => None, }; - let Some((db_ip, db_nic)) = service_ip_nic else { + let Some((external_ip, db_nic)) = service_ip_nic else { info!( log, "No networking records needed for {} service", @@ -574,6 +569,12 @@ impl DataStore { ); return Ok(()); }; + let db_ip = IncompleteExternalIp::for_omicron_zone( + service_pool.id(), + external_ip, + zone_config.id, + zone_config.zone_type.kind(), + ); Self::allocate_external_ip_on_connection(conn, db_ip).await.map_err( |err| { error!( @@ -1362,11 +1363,10 @@ mod test { primary: true, slot: 0, }, - snat_cfg: SourceNatConfig { - ip: ntp1_ip, - first_port: 16384, - last_port: 32767, - }, + snat_cfg: SourceNatConfig::new( + ntp1_ip, 16384, 32767, + ) + .unwrap(), }, }, ], @@ -1430,11 +1430,10 @@ mod test { primary: true, slot: 0, }, - snat_cfg: SourceNatConfig { - ip: ntp2_ip, - first_port: 0, - last_port: 16383, - }, + snat_cfg: SourceNatConfig::new( + ntp2_ip, 0, 16383, + ) + .unwrap(), }, }, ], diff --git a/nexus/db-queries/src/db/queries/external_ip.rs b/nexus/db-queries/src/db/queries/external_ip.rs index 3969c808f9..e16fcbb3ff 100644 --- a/nexus/db-queries/src/db/queries/external_ip.rs +++ b/nexus/db-queries/src/db/queries/external_ip.rs @@ -879,7 +879,6 @@ mod tests { use crate::db::model::IpKind; use crate::db::model::IpPool; use crate::db::model::IpPoolRange; - use crate::db::model::Name; use async_bb8_diesel::AsyncRunQueryDsl; use diesel::{ExpressionMethods, QueryDsl, SelectableHelper}; use dropshot::test_util::LogContext; @@ -889,13 +888,20 @@ mod tests { use nexus_db_model::IpPoolResource; use nexus_db_model::IpPoolResourceType; use nexus_test_utils::db::test_setup_database; + use nexus_types::deployment::OmicronZoneExternalIp; + use nexus_types::deployment::OmicronZoneExternalIpKind; use nexus_types::external_api::params::InstanceCreate; use nexus_types::external_api::shared::IpRange; + use nexus_types::inventory::SourceNatConfig; use omicron_common::address::NUM_SOURCE_NAT_PORTS; use omicron_common::api::external::Error; use omicron_common::api::external::IdentityMetadataCreateParams; use omicron_test_utils::dev; use omicron_test_utils::dev::db::CockroachInstance; + use omicron_uuid_kinds::ExternalIpUuid; + use omicron_uuid_kinds::GenericUuid; + use omicron_uuid_kinds::OmicronZoneUuid; + use sled_agent_client::ZoneKind; use std::net::IpAddr; use std::net::Ipv4Addr; use std::sync::Arc; @@ -1325,163 +1331,9 @@ mod tests { } #[tokio::test] - async fn test_next_external_ip_for_service() { - let context = - TestContext::new("test_next_external_ip_for_service").await; - - let ip_range = IpRange::try_from(( - Ipv4Addr::new(10, 0, 0, 1), - Ipv4Addr::new(10, 0, 0, 3), - )) - .unwrap(); - context.initialize_ip_pool(SERVICE_IP_POOL_NAME, ip_range).await; - - // Allocate an IP address as we would for an external, rack-associated - // service. - let service1_id = Uuid::new_v4(); - - // Check that `service_lookup_external_ips` returns an empty vector for - // a service with no external IPs. - assert_eq!( - context - .db_datastore - .external_ip_list_service(&context.opctx, service1_id) - .await - .expect("Failed to look up service external IPs"), - Vec::new(), - ); - - let id1 = Uuid::new_v4(); - let ip1 = context - .db_datastore - .external_ip_allocate_service( - &context.opctx, - id1, - &Name("service1-ip".parse().unwrap()), - "service1-ip", - service1_id, - ) - .await - .expect("Failed to allocate service IP address"); - assert!(ip1.is_service); - assert_eq!(ip1.kind, IpKind::Floating); - assert_eq!(ip1.ip.ip(), IpAddr::V4(Ipv4Addr::new(10, 0, 0, 1))); - assert_eq!(ip1.first_port.0, 0); - assert_eq!(ip1.last_port.0, u16::MAX); - assert_eq!(ip1.parent_id, Some(service1_id)); - assert_eq!( - context - .db_datastore - .external_ip_list_service(&context.opctx, service1_id) - .await - .expect("Failed to look up service external IPs"), - vec![ip1], - ); - - // Allocate an SNat IP - let service2_id = Uuid::new_v4(); - let id2 = Uuid::new_v4(); - let ip2 = context - .db_datastore - .external_ip_allocate_service_snat(&context.opctx, id2, service2_id) - .await - .expect("Failed to allocate service IP address"); - assert!(ip2.is_service); - assert_eq!(ip2.kind, IpKind::SNat); - assert_eq!(ip2.ip.ip(), IpAddr::V4(Ipv4Addr::new(10, 0, 0, 2))); - assert_eq!(ip2.first_port.0, 0); - assert_eq!(ip2.last_port.0, 16383); - assert_eq!(ip2.parent_id, Some(service2_id)); - assert_eq!( - context - .db_datastore - .external_ip_list_service(&context.opctx, service2_id) - .await - .expect("Failed to look up service external IPs"), - vec![ip2], - ); - - // Allocate the next IP address - let service3_id = Uuid::new_v4(); - let id3 = Uuid::new_v4(); - let ip3 = context - .db_datastore - .external_ip_allocate_service( - &context.opctx, - id3, - &Name("service3-ip".parse().unwrap()), - "service3-ip", - service3_id, - ) - .await - .expect("Failed to allocate service IP address"); - assert!(ip3.is_service); - assert_eq!(ip3.kind, IpKind::Floating); - assert_eq!(ip3.ip.ip(), IpAddr::V4(Ipv4Addr::new(10, 0, 0, 3))); - assert_eq!(ip3.first_port.0, 0); - assert_eq!(ip3.last_port.0, u16::MAX); - assert_eq!(ip3.parent_id, Some(service3_id)); - assert_eq!( - context - .db_datastore - .external_ip_list_service(&context.opctx, service3_id) - .await - .expect("Failed to look up service external IPs"), - vec![ip3], - ); - - // Once we're out of IP addresses, test that we see the right error. - let service3_id = Uuid::new_v4(); - let id3 = Uuid::new_v4(); - let err = context - .db_datastore - .external_ip_allocate_service( - &context.opctx, - id3, - &Name("service3-ip".parse().unwrap()), - "service3-ip", - service3_id, - ) - .await - .expect_err("Should have failed to allocate after pool exhausted"); - assert_eq!( - err, - Error::insufficient_capacity( - "No external IP addresses available", - "NextExternalIp::new returned NotFound", - ), - ); - - // But we should be able to allocate another SNat IP - let service4_id = Uuid::new_v4(); - let id4 = Uuid::new_v4(); - let ip4 = context - .db_datastore - .external_ip_allocate_service_snat(&context.opctx, id4, service4_id) - .await - .expect("Failed to allocate service IP address"); - assert!(ip4.is_service); - assert_eq!(ip4.kind, IpKind::SNat); - assert_eq!(ip4.ip.ip(), IpAddr::V4(Ipv4Addr::new(10, 0, 0, 2))); - assert_eq!(ip4.first_port.0, 16384); - assert_eq!(ip4.last_port.0, 32767); - assert_eq!(ip4.parent_id, Some(service4_id)); - assert_eq!( - context - .db_datastore - .external_ip_list_service(&context.opctx, service4_id) - .await - .expect("Failed to look up service external IPs"), - vec![ip4], - ); - - context.success().await; - } - - #[tokio::test] - async fn test_explicit_external_ip_for_service_is_idempotent() { + async fn test_external_ip_allocate_omicron_zone_is_idempotent() { let context = TestContext::new( - "test_explicit_external_ip_for_service_is_idempotent", + "test_external_ip_allocate_omicron_zone_is_idempotent", ) .await; @@ -1492,19 +1344,22 @@ mod tests { .unwrap(); context.initialize_ip_pool(SERVICE_IP_POOL_NAME, ip_range).await; + let ip_10_0_0_2 = + OmicronZoneExternalIpKind::Floating("10.0.0.2".parse().unwrap()); + let ip_10_0_0_3 = + OmicronZoneExternalIpKind::Floating("10.0.0.3".parse().unwrap()); + // Allocate an IP address as we would for an external, rack-associated // service. - let service_id = Uuid::new_v4(); - let id = Uuid::new_v4(); + let service_id = OmicronZoneUuid::new_v4(); + let id = ExternalIpUuid::new_v4(); let ip = context .db_datastore - .external_ip_allocate_service_explicit( + .external_ip_allocate_omicron_zone( &context.opctx, - id, - &Name("service-ip".parse().unwrap()), - "service-ip", service_id, - IpAddr::V4(Ipv4Addr::new(10, 0, 0, 3)), + ZoneKind::Nexus, + OmicronZoneExternalIp { id, kind: ip_10_0_0_3 }, ) .await .expect("Failed to allocate service IP address"); @@ -1512,18 +1367,16 @@ mod tests { assert_eq!(ip.ip.ip(), IpAddr::V4(Ipv4Addr::new(10, 0, 0, 3))); assert_eq!(ip.first_port.0, 0); assert_eq!(ip.last_port.0, u16::MAX); - assert_eq!(ip.parent_id, Some(service_id)); + assert_eq!(ip.parent_id, Some(service_id.into_untyped_uuid())); // Try allocating the same service IP again. let ip_again = context .db_datastore - .external_ip_allocate_service_explicit( + .external_ip_allocate_omicron_zone( &context.opctx, - id, - &Name("service-ip".parse().unwrap()), - "service-ip", service_id, - IpAddr::V4(Ipv4Addr::new(10, 0, 0, 3)), + ZoneKind::Nexus, + OmicronZoneExternalIp { id, kind: ip_10_0_0_3 }, ) .await .expect("Failed to allocate service IP address"); @@ -1535,13 +1388,14 @@ mod tests { // different UUID. let err = context .db_datastore - .external_ip_allocate_service_explicit( + .external_ip_allocate_omicron_zone( &context.opctx, - Uuid::new_v4(), - &Name("service-ip".parse().unwrap()), - "service-ip", service_id, - IpAddr::V4(Ipv4Addr::new(10, 0, 0, 3)), + ZoneKind::Nexus, + OmicronZoneExternalIp { + id: ExternalIpUuid::new_v4(), + kind: ip_10_0_0_3, + }, ) .await .expect_err("Should have failed to re-allocate same IP address (different UUID)"); @@ -1554,13 +1408,14 @@ mod tests { // different input address. let err = context .db_datastore - .external_ip_allocate_service_explicit( + .external_ip_allocate_omicron_zone( &context.opctx, - id, - &Name("service-ip".parse().unwrap()), - "service-ip", service_id, - IpAddr::V4(Ipv4Addr::new(10, 0, 0, 2)), + ZoneKind::Nexus, + OmicronZoneExternalIp { + id, + kind: ip_10_0_0_2, + }, ) .await .expect_err("Should have failed to re-allocate different IP address (same UUID)"); @@ -1571,14 +1426,17 @@ mod tests { // Try allocating the same service IP once more, but do it with a // different port range. + let ip_10_0_0_3_snat_0 = OmicronZoneExternalIpKind::Snat( + SourceNatConfig::new("10.0.0.3".parse().unwrap(), 0, 16383) + .unwrap(), + ); let err = context .db_datastore - .external_ip_allocate_service_explicit_snat( + .external_ip_allocate_omicron_zone( &context.opctx, - id, service_id, - IpAddr::V4(Ipv4Addr::new(10, 0, 0, 3)), - (0, 16383), + ZoneKind::BoundaryNtp, + OmicronZoneExternalIp { id, kind: ip_10_0_0_3_snat_0 }, ) .await .expect_err("Should have failed to re-allocate different IP address (different port range)"); @@ -1588,16 +1446,22 @@ mod tests { ); // This time start with an explicit SNat - let snat_service_id = Uuid::new_v4(); - let snat_id = Uuid::new_v4(); + let ip_10_0_0_1_snat_32768 = OmicronZoneExternalIpKind::Snat( + SourceNatConfig::new("10.0.0.1".parse().unwrap(), 32768, 49151) + .unwrap(), + ); + let snat_service_id = OmicronZoneUuid::new_v4(); + let snat_id = ExternalIpUuid::new_v4(); let snat_ip = context .db_datastore - .external_ip_allocate_service_explicit_snat( + .external_ip_allocate_omicron_zone( &context.opctx, - snat_id, snat_service_id, - IpAddr::V4(Ipv4Addr::new(10, 0, 0, 1)), - (32768, 49151), + ZoneKind::BoundaryNtp, + OmicronZoneExternalIp { + id: snat_id, + kind: ip_10_0_0_1_snat_32768, + }, ) .await .expect("Failed to allocate service IP address"); @@ -1606,17 +1470,22 @@ mod tests { assert_eq!(snat_ip.ip.ip(), IpAddr::V4(Ipv4Addr::new(10, 0, 0, 1))); assert_eq!(snat_ip.first_port.0, 32768); assert_eq!(snat_ip.last_port.0, 49151); - assert_eq!(snat_ip.parent_id, Some(snat_service_id)); + assert_eq!( + snat_ip.parent_id, + Some(snat_service_id.into_untyped_uuid()) + ); // Try allocating the same service IP again. let snat_ip_again = context .db_datastore - .external_ip_allocate_service_explicit_snat( + .external_ip_allocate_omicron_zone( &context.opctx, - snat_id, snat_service_id, - IpAddr::V4(Ipv4Addr::new(10, 0, 0, 1)), - (32768, 49151), + ZoneKind::BoundaryNtp, + OmicronZoneExternalIp { + id: snat_id, + kind: ip_10_0_0_1_snat_32768, + }, ) .await .expect("Failed to allocate service IP address"); @@ -1628,14 +1497,20 @@ mod tests { // Try allocating the same service IP once more, but do it with a // different port range. + let ip_10_0_0_1_snat_49152 = OmicronZoneExternalIpKind::Snat( + SourceNatConfig::new("10.0.0.1".parse().unwrap(), 49152, 65535) + .unwrap(), + ); let err = context .db_datastore - .external_ip_allocate_service_explicit_snat( + .external_ip_allocate_omicron_zone( &context.opctx, - snat_id, snat_service_id, - IpAddr::V4(Ipv4Addr::new(10, 0, 0, 1)), - (49152, 65535), + ZoneKind::BoundaryNtp, + OmicronZoneExternalIp { + id: snat_id, + kind: ip_10_0_0_1_snat_49152, + }, ) .await .expect_err("Should have failed to re-allocate different IP address (different port range)"); @@ -1648,9 +1523,9 @@ mod tests { } #[tokio::test] - async fn test_explicit_external_ip_for_service_out_of_range() { + async fn test_external_ip_allocate_omicron_zone_out_of_range() { let context = TestContext::new( - "test_explicit_external_ip_for_service_out_of_range", + "test_external_ip_allocate_omicron_zone_out_of_range", ) .await; @@ -1661,17 +1536,19 @@ mod tests { .unwrap(); context.initialize_ip_pool(SERVICE_IP_POOL_NAME, ip_range).await; - let service_id = Uuid::new_v4(); - let id = Uuid::new_v4(); + let ip_10_0_0_5 = OmicronZoneExternalIpKind::Floating(IpAddr::V4( + Ipv4Addr::new(10, 0, 0, 5), + )); + + let service_id = OmicronZoneUuid::new_v4(); + let id = ExternalIpUuid::new_v4(); let err = context .db_datastore - .external_ip_allocate_service_explicit( + .external_ip_allocate_omicron_zone( &context.opctx, - id, - &Name("service-ip".parse().unwrap()), - "service-ip", service_id, - IpAddr::V4(Ipv4Addr::new(10, 0, 0, 5)), + ZoneKind::Nexus, + OmicronZoneExternalIp { id, kind: ip_10_0_0_5 }, ) .await .expect_err("Should have failed to allocate out-of-bounds IP"); @@ -1683,116 +1560,6 @@ mod tests { context.success().await; } - #[tokio::test] - async fn test_insert_external_ip_for_service_is_idempotent() { - let context = TestContext::new( - "test_insert_external_ip_for_service_is_idempotent", - ) - .await; - - let ip_range = IpRange::try_from(( - Ipv4Addr::new(10, 0, 0, 1), - Ipv4Addr::new(10, 0, 0, 2), - )) - .unwrap(); - context.initialize_ip_pool(SERVICE_IP_POOL_NAME, ip_range).await; - - // Allocate an IP address as we would for an external, rack-associated - // service. - let service_id = Uuid::new_v4(); - let id = Uuid::new_v4(); - let ip = context - .db_datastore - .external_ip_allocate_service( - &context.opctx, - id, - &Name("service-ip".parse().unwrap()), - "service-ip", - service_id, - ) - .await - .expect("Failed to allocate service IP address"); - assert!(ip.is_service); - assert_eq!(ip.ip.ip(), IpAddr::V4(Ipv4Addr::new(10, 0, 0, 1))); - assert_eq!(ip.first_port.0, 0); - assert_eq!(ip.last_port.0, u16::MAX); - assert_eq!(ip.parent_id, Some(service_id)); - - let ip_again = context - .db_datastore - .external_ip_allocate_service( - &context.opctx, - id, - &Name("service-ip".parse().unwrap()), - "service-ip", - service_id, - ) - .await - .expect("Failed to allocate service IP address"); - - assert_eq!(ip.id, ip_again.id); - assert_eq!(ip.ip.ip(), ip_again.ip.ip()); - - context.success().await; - } - - // This test is identical to "test_insert_external_ip_is_idempotent", - // but tries to make an idempotent allocation after all addresses in the - // pool have been allocated. - #[tokio::test] - async fn test_insert_external_ip_for_service_is_idempotent_even_when_full() - { - let context = TestContext::new( - "test_insert_external_ip_is_idempotent_even_when_full", - ) - .await; - - let ip_range = IpRange::try_from(( - Ipv4Addr::new(10, 0, 0, 1), - Ipv4Addr::new(10, 0, 0, 1), - )) - .unwrap(); - context.initialize_ip_pool(SERVICE_IP_POOL_NAME, ip_range).await; - - // Allocate an IP address as we would for an external, rack-associated - // service. - let service_id = Uuid::new_v4(); - let id = Uuid::new_v4(); - let ip = context - .db_datastore - .external_ip_allocate_service( - &context.opctx, - id, - &Name("service-ip".parse().unwrap()), - "service-ip", - service_id, - ) - .await - .expect("Failed to allocate service IP address"); - assert!(ip.is_service); - assert_eq!(ip.ip.ip(), IpAddr::V4(Ipv4Addr::new(10, 0, 0, 1))); - assert_eq!(ip.first_port.0, 0); - assert_eq!(ip.last_port.0, u16::MAX); - assert_eq!(ip.parent_id, Some(service_id)); - - let ip_again = context - .db_datastore - .external_ip_allocate_service( - &context.opctx, - id, - &Name("service-ip".parse().unwrap()), - "service-ip", - service_id, - ) - .await - .expect("Failed to allocate service IP address"); - - assert_eq!(ip.id, ip_again.id); - assert_eq!(ip.ip.ip(), ip_again.ip.ip()); - - context.success().await; - } - #[tokio::test] async fn test_insert_external_ip_is_idempotent() { let context = diff --git a/nexus/reconfigurator/execution/src/resource_allocation.rs b/nexus/reconfigurator/execution/src/resource_allocation.rs index 86eeb8af13..42a3a4f5de 100644 --- a/nexus/reconfigurator/execution/src/resource_allocation.rs +++ b/nexus/reconfigurator/execution/src/resource_allocation.rs @@ -7,8 +7,6 @@ use anyhow::bail; use anyhow::Context; use nexus_db_model::IncompleteNetworkInterface; -use nexus_db_model::Name; -use nexus_db_model::SqlU16; use nexus_db_model::VpcSubnet; use nexus_db_queries::context::OpContext; use nexus_db_queries::db::fixed_data::vpc_subnet::DNS_VPC_SUBNET; @@ -18,17 +16,21 @@ use nexus_db_queries::db::DataStore; use nexus_types::deployment::blueprint_zone_type; use nexus_types::deployment::BlueprintZoneConfig; use nexus_types::deployment::BlueprintZoneType; +use nexus_types::deployment::OmicronZoneExternalIp; +use nexus_types::deployment::OmicronZoneExternalIpKind; use nexus_types::deployment::SourceNatConfig; use omicron_common::api::external::IdentityMetadataCreateParams; use omicron_common::api::internal::shared::NetworkInterface; use omicron_common::api::internal::shared::NetworkInterfaceKind; +use omicron_uuid_kinds::ExternalIpUuid; use omicron_uuid_kinds::GenericUuid; use omicron_uuid_kinds::OmicronZoneUuid; +use sled_agent_client::ZoneKind; +use slog::error; use slog::info; use slog::warn; use std::net::IpAddr; use std::net::SocketAddr; -use uuid::Uuid; pub(crate) async fn ensure_zone_resources_allocated( opctx: &OpContext, @@ -68,7 +70,7 @@ pub(crate) async fn ensure_zone_resources_allocated( ) => { allocator .ensure_boundary_ntp_external_networking_allocated( - z.id, snat_cfg, nic, + z.id, *snat_cfg, nic, ) .await?; } @@ -96,16 +98,15 @@ impl<'a> ResourceAllocator<'a> { // already allocated to a specific service zone. async fn is_external_ip_already_allocated( &self, - zone_type: &'static str, + zone_kind: ZoneKind, zone_id: OmicronZoneUuid, - external_ip: IpAddr, - port_range: Option<(u16, u16)>, + ip_kind: OmicronZoneExternalIpKind, ) -> anyhow::Result { // localhost is used by many components in the test suite. We can't use // the normal path because normally a given external IP must only be // used once. Just treat localhost in the test suite as though it's // already allocated. We do the same in is_nic_already_allocated(). - if cfg!(test) && external_ip.is_loopback() { + if cfg!(test) && ip_kind.ip().is_loopback() { return Ok(true); } @@ -115,54 +116,77 @@ impl<'a> ResourceAllocator<'a> { .await .with_context(|| { format!( - "failed to look up external IPs for {zone_type} {zone_id}" + "failed to look up external IPs for {zone_kind} {zone_id}" ) })?; - if !allocated_ips.is_empty() { - // All the service zones that want external IP addresses only expect - // to have a single IP. This service already has (at least) one: - // make sure this list includes the one we want, or return an error. - for allocated_ip in &allocated_ips { - if allocated_ip.ip.ip() == external_ip - && port_range - .map(|(first, last)| { - allocated_ip.first_port == SqlU16(first) - && allocated_ip.last_port == SqlU16(last) - }) - .unwrap_or(true) - { - info!( - self.opctx.log, "found already-allocated external IP"; - "zone_type" => zone_type, - "zone_id" => %zone_id, - "ip" => %external_ip, - ); - return Ok(true); - } + // We expect to find either 0 or exactly 1 IP for any given zone. If 0, + // we know the IP isn't allocated; if 1, we'll check that it matches + // below. + let existing_ip = match allocated_ips.as_slice() { + [] => { + info!( + self.opctx.log, "external IP allocation required for zone"; + "zone_kind" => %zone_kind, + "zone_id" => %zone_id, + "ip" => ?ip_kind, + ); + + return Ok(false); + } + [ip] => ip, + _ => { + warn!( + self.opctx.log, "zone has multiple IPs allocated"; + "zone_kind" => %zone_kind, + "zone_id" => %zone_id, + "want_ip" => ?ip_kind, + "allocated_ips" => ?allocated_ips, + ); + bail!( + "zone {zone_id} already has {} IPs allocated (expected 1)", + allocated_ips.len() + ); } + }; - warn!( - self.opctx.log, "zone has unexpected IPs allocated"; - "zone_type" => zone_type, + // We expect this to always succeed; a failure here means we've stored + // an Omicron zone IP in the database that can't be converted back to an + // Omicron zone IP! + let existing_ip = match OmicronZoneExternalIp::try_from(existing_ip) { + Ok(existing_ip) => existing_ip, + Err(err) => { + error!( + self.opctx.log, "invalid IP in database for zone"; + "zone_kind" => %zone_kind, + "zone_id" => %zone_id, + "ip" => ?existing_ip, + &err, + ); + bail!("zone {zone_id} has invalid IP database record: {err}"); + } + }; + + // TODO-cleanup The blueprint should store the IP ID, at which point we + // could check full equality here instead of only checking the kind. + if existing_ip.kind == ip_kind { + info!( + self.opctx.log, "found already-allocated external IP"; + "zone_kind" => %zone_kind, "zone_id" => %zone_id, - "want_ip" => %external_ip, - "allocated_ips" => ?allocated_ips, - ); - bail!( - "zone {zone_id} already has {} non-matching IP(s) allocated", - allocated_ips.len() + "ip" => ?ip_kind, ); + return Ok(true); } - info!( - self.opctx.log, "external IP allocation required for zone"; - "zone_type" => zone_type, + warn!( + self.opctx.log, "zone has unexpected IP allocated"; + "zone_kind" => %zone_kind, "zone_id" => %zone_id, - "ip" => %external_ip, + "want_ip" => ?ip_kind, + "allocated_ip" => ?existing_ip, ); - - Ok(false) + bail!("zone {zone_id} has a different IP allocated ({existing_ip:?})",); } // Helper function to determine whether a given NIC is already allocated to @@ -237,14 +261,11 @@ impl<'a> ResourceAllocator<'a> { Ok(false) } - // Nexus and ExternalDns both use non-SNAT service IPs; this method is used - // to allocate external networking for both of them. async fn ensure_external_service_ip( &self, - zone_type: &'static str, - service_id: OmicronZoneUuid, - external_ip: IpAddr, - ip_name: &Name, + zone_kind: ZoneKind, + zone_id: OmicronZoneUuid, + ip_kind: OmicronZoneExternalIpKind, ) -> anyhow::Result<()> { // Only attempt to allocate `external_ip` if it isn't already assigned // to this zone. @@ -259,94 +280,32 @@ impl<'a> ResourceAllocator<'a> { // exactly what we want if two Nexuses try to realize the same // blueprint at the same time. if self - .is_external_ip_already_allocated( - zone_type, - service_id, - external_ip, - None, - ) + .is_external_ip_already_allocated(zone_kind, zone_id, ip_kind) .await? { return Ok(()); } - let ip_id = Uuid::new_v4(); - let description = zone_type; + let ip_id = ExternalIpUuid::new_v4(); self.datastore - .external_ip_allocate_service_explicit( + .external_ip_allocate_omicron_zone( self.opctx, - ip_id, - ip_name, - description, - service_id.into_untyped_uuid(), - external_ip, + zone_id, + zone_kind, + OmicronZoneExternalIp { id: ip_id, kind: ip_kind }, ) .await .with_context(|| { format!( - "failed to allocate IP to {zone_type} {service_id}: \ - {external_ip}" + "failed to allocate IP to {zone_kind} {zone_id}: \ + {ip_kind:?}" ) })?; info!( self.opctx.log, "successfully allocated external IP"; - "zone_type" => zone_type, - "zone_id" => %service_id, - "ip" => %external_ip, - "ip_id" => %ip_id, - ); - - Ok(()) - } - - // BoundaryNtp uses a SNAT service IPs; this method is similar to - // `ensure_external_service_ip` but accounts for that. - async fn ensure_external_service_snat_ip( - &self, - zone_type: &'static str, - service_id: OmicronZoneUuid, - snat: &SourceNatConfig, - ) -> anyhow::Result<()> { - // Only attempt to allocate `external_ip` if it isn't already assigned - // to this zone. - // - // This is subject to the same kind of TOCTOU race as described for IP - // allocation in `ensure_external_service_ip`, and we believe it's okay - // for the same reasons as described there. - if self - .is_external_ip_already_allocated( - zone_type, - service_id, - snat.ip, - Some((snat.first_port, snat.last_port)), - ) - .await? - { - return Ok(()); - } - - let ip_id = Uuid::new_v4(); - self.datastore - .external_ip_allocate_service_explicit_snat( - self.opctx, - ip_id, - service_id.into_untyped_uuid(), - snat.ip, - (snat.first_port, snat.last_port), - ) - .await - .with_context(|| { - format!( - "failed to allocate snat IP to {zone_type} {service_id}: \ - {snat:?}" - ) - })?; - - info!( - self.opctx.log, "successfully allocated external SNAT IP"; - "zone_type" => zone_type, - "zone_id" => %service_id, - "snat" => ?snat, + "zone_kind" => %zone_kind, + "zone_id" => %zone_id, + "ip" => ?ip_kind, "ip_id" => %ip_id, ); @@ -461,10 +420,9 @@ impl<'a> ResourceAllocator<'a> { nic: &NetworkInterface, ) -> anyhow::Result<()> { self.ensure_external_service_ip( - "nexus", + ZoneKind::Nexus, zone_id, - external_ip, - &Name(nic.name.clone()), + OmicronZoneExternalIpKind::Floating(external_ip), ) .await?; self.ensure_service_nic("nexus", zone_id, nic, &NEXUS_VPC_SUBNET) @@ -479,10 +437,9 @@ impl<'a> ResourceAllocator<'a> { nic: &NetworkInterface, ) -> anyhow::Result<()> { self.ensure_external_service_ip( - "external_dns", + ZoneKind::ExternalDns, zone_id, - dns_address.ip(), - &Name(nic.name.clone()), + OmicronZoneExternalIpKind::Floating(dns_address.ip()), ) .await?; self.ensure_service_nic("external_dns", zone_id, nic, &DNS_VPC_SUBNET) @@ -493,10 +450,15 @@ impl<'a> ResourceAllocator<'a> { async fn ensure_boundary_ntp_external_networking_allocated( &self, zone_id: OmicronZoneUuid, - snat: &SourceNatConfig, + snat: SourceNatConfig, nic: &NetworkInterface, ) -> anyhow::Result<()> { - self.ensure_external_service_snat_ip("ntp", zone_id, snat).await?; + self.ensure_external_service_ip( + ZoneKind::BoundaryNtp, + zone_id, + OmicronZoneExternalIpKind::Snat(snat), + ) + .await?; self.ensure_service_nic("ntp", zone_id, nic, &NTP_VPC_SUBNET).await?; Ok(()) } @@ -506,6 +468,7 @@ impl<'a> ResourceAllocator<'a> { mod tests { use super::*; use nexus_config::NUM_INITIAL_RESERVED_IP_ADDRESSES; + use nexus_db_model::SqlU16; use nexus_test_utils_macros::nexus_test; use nexus_types::deployment::BlueprintZoneConfig; use nexus_types::deployment::BlueprintZoneDisposition; @@ -521,6 +484,7 @@ mod tests { use omicron_common::api::external::Vni; use std::net::IpAddr; use std::net::Ipv6Addr; + use uuid::Uuid; type ControlPlaneTestContext = nexus_test_utils::ControlPlaneTestContext; @@ -604,11 +568,12 @@ mod tests { // Boundary NTP: let ntp_id = OmicronZoneUuid::new_v4(); - let ntp_snat = SourceNatConfig { - ip: external_ips.next().expect("exhausted external_ips"), - first_port: NUM_SOURCE_NAT_PORTS, - last_port: 2 * NUM_SOURCE_NAT_PORTS - 1, - }; + let ntp_snat = SourceNatConfig::new( + external_ips.next().expect("exhausted external_ips"), + NUM_SOURCE_NAT_PORTS, + 2 * NUM_SOURCE_NAT_PORTS - 1, + ) + .unwrap(); let ntp_nic = NetworkInterface { id: Uuid::new_v4(), kind: NetworkInterfaceKind::Service { @@ -719,8 +684,10 @@ mod tests { assert!(db_ntp_ips[0].is_service); assert_eq!(db_ntp_ips[0].parent_id, Some(ntp_id.into_untyped_uuid())); assert_eq!(db_ntp_ips[0].ip, ntp_snat.ip.into()); - assert_eq!(db_ntp_ips[0].first_port, SqlU16(ntp_snat.first_port)); - assert_eq!(db_ntp_ips[0].last_port, SqlU16(ntp_snat.last_port)); + assert_eq!( + db_ntp_ips[0].first_port.0..=db_ntp_ips[0].last_port.0, + ntp_snat.port_range() + ); // Check that the NIC records were created. let db_nexus_nics = datastore @@ -842,7 +809,7 @@ mod tests { { *external_ip = bogus_ip; return format!( - "zone {} already has 1 non-matching IP", + "zone {} has a different IP allocated", zone.id ); } @@ -862,7 +829,7 @@ mod tests { { *dns_address = SocketAddr::new(bogus_ip, 0); return format!( - "zone {} already has 1 non-matching IP", + "zone {} has a different IP allocated", zone.id ); } @@ -879,10 +846,14 @@ mod tests { }, ) = &mut zone.zone_type { - snat_cfg.first_port += NUM_SOURCE_NAT_PORTS; - snat_cfg.last_port += NUM_SOURCE_NAT_PORTS; + let (mut first, mut last) = snat_cfg.port_range_raw(); + first += NUM_SOURCE_NAT_PORTS; + last += NUM_SOURCE_NAT_PORTS; + *snat_cfg = + SourceNatConfig::new(snat_cfg.ip, first, last) + .unwrap(); return format!( - "zone {} already has 1 non-matching IP", + "zone {} has a different IP allocated", zone.id ); } diff --git a/nexus/reconfigurator/planning/src/example.rs b/nexus/reconfigurator/planning/src/example.rs index c269d4ccd2..2c96e1e5a8 100644 --- a/nexus/reconfigurator/planning/src/example.rs +++ b/nexus/reconfigurator/planning/src/example.rs @@ -10,6 +10,7 @@ use crate::system::SystemDescription; use nexus_types::deployment::Blueprint; use nexus_types::deployment::BlueprintZoneFilter; use nexus_types::deployment::OmicronZoneExternalIp; +use nexus_types::deployment::OmicronZoneExternalIpKind; use nexus_types::deployment::OmicronZoneNic; use nexus_types::deployment::PlanningInput; use nexus_types::deployment::SledFilter; @@ -132,7 +133,9 @@ impl ExampleSystem { service_id, OmicronZoneExternalIp { id: ExternalIpUuid::new_v4(), - ip, + // TODO-cleanup This is potentially wrong; + // zone_type should tell us the IP kind. + kind: OmicronZoneExternalIpKind::Floating(ip), }, ) .expect("failed to add Omicron zone external IP"); @@ -144,7 +147,7 @@ impl ExampleSystem { OmicronZoneNic { id: nic.id, mac: nic.mac, - ip: nic.ip.into(), + ip: nic.ip, slot: nic.slot, primary: nic.primary, }, diff --git a/nexus/reconfigurator/preparation/src/lib.rs b/nexus/reconfigurator/preparation/src/lib.rs index 8f590d95f4..30370edb16 100644 --- a/nexus/reconfigurator/preparation/src/lib.rs +++ b/nexus/reconfigurator/preparation/src/lib.rs @@ -7,6 +7,7 @@ use anyhow::Context; use futures::StreamExt; use nexus_db_model::DnsGroup; +use nexus_db_model::IpKind; use nexus_db_queries::context::OpContext; use nexus_db_queries::db::datastore::DataStoreDnsTest; use nexus_db_queries::db::datastore::DataStoreInventoryTest; @@ -16,7 +17,10 @@ use nexus_db_queries::db::pagination::Paginator; use nexus_db_queries::db::DataStore; use nexus_types::deployment::Blueprint; use nexus_types::deployment::BlueprintMetadata; +use nexus_types::deployment::OmicronZoneExternalIpKind; +use nexus_types::deployment::OmicronZoneNic; use nexus_types::deployment::PlanningInput; +use nexus_types::deployment::PlanningInputBuildError; use nexus_types::deployment::PlanningInputBuilder; use nexus_types::deployment::Policy; use nexus_types::deployment::SledDetails; @@ -26,6 +30,7 @@ use nexus_types::deployment::UnstableReconfiguratorState; use nexus_types::identity::Asset; use nexus_types::identity::Resource; use nexus_types::inventory::Collection; +use nexus_types::inventory::SourceNatConfig; use omicron_common::address::IpRange; use omicron_common::address::Ipv6Subnet; use omicron_common::address::NEXUS_REDUNDANCY; @@ -128,13 +133,34 @@ impl PlanningInputFromDb<'_> { ); continue; }; + let zone_id = OmicronZoneUuid::from_untyped_uuid(zone_id); + + let to_kind = |ip| match external_ip_row.kind { + IpKind::Floating => Ok(OmicronZoneExternalIpKind::Floating(ip)), + IpKind::SNat => { + let snat = SourceNatConfig::new( + ip, + *external_ip_row.first_port, + *external_ip_row.last_port, + ) + .map_err(|err| { + PlanningInputBuildError::BadSnatConfig { zone_id, err } + })?; + Ok(OmicronZoneExternalIpKind::Snat(snat)) + } + IpKind::Ephemeral => Err( + PlanningInputBuildError::EphemeralIpUnsupported(zone_id), + ), + }; + builder .add_omicron_zone_external_ip_network( zone_id, // TODO-cleanup use `TypedUuid` everywhere ExternalIpUuid::from_untyped_uuid(external_ip_row.id), external_ip_row.ip, + to_kind, ) .map_err(|e| { Error::internal_error(&format!( @@ -144,6 +170,22 @@ impl PlanningInputFromDb<'_> { })?; } + for nic_row in self.service_nic_rows { + let zone_id = + OmicronZoneUuid::from_untyped_uuid(nic_row.service_id); + let nic = OmicronZoneNic::try_from(nic_row).map_err(|e| { + Error::internal_error(&format!( + "invalid Omicron zone NIC read from database: {e}" + )) + })?; + builder.add_omicron_zone_nic(zone_id, nic).map_err(|e| { + Error::internal_error(&format!( + "unexpectedly failed to add Omicron zone NIC \ + to planning input: {e}" + )) + })?; + } + Ok(builder.build()) } } diff --git a/nexus/src/app/background/sync_service_zone_nat.rs b/nexus/src/app/background/sync_service_zone_nat.rs index 59be7db5f2..d1bb9955d7 100644 --- a/nexus/src/app/background/sync_service_zone_nat.rs +++ b/nexus/src/app/background/sync_service_zone_nat.rs @@ -155,14 +155,16 @@ impl BackgroundTask for ServiceZoneNatTracker { ipnetwork::Ipv4Network::new(external_ip, 32) .unwrap(); + let (snat_first_port, snat_last_port) = + snat_cfg.port_range_raw(); let nat_value = Ipv4NatValues { external_address: nexus_db_model::Ipv4Net( omicron_common::api::external::Ipv4Net( external_address, ), ), - first_port: snat_cfg.first_port.into(), - last_port: snat_cfg.last_port.into(), + first_port: snat_first_port.into(), + last_port: snat_last_port.into(), sled_address: sled_address.into(), vni: nexus_db_model::Vni(nic.vni), mac: nexus_db_model::MacAddr(nic.mac), diff --git a/nexus/src/app/instance.rs b/nexus/src/app/instance.rs index 4008d33736..5d9c05331a 100644 --- a/nexus/src/app/instance.rs +++ b/nexus/src/app/instance.rs @@ -1159,7 +1159,12 @@ impl super::Nexus { )); } let source_nat = - SourceNatConfig::from(snat_ip.into_iter().next().unwrap()); + SourceNatConfig::try_from(snat_ip.into_iter().next().unwrap()) + .map_err(|err| { + Error::internal_error(&format!( + "read invalid SNAT config from db: {err}" + )) + })?; // Gather the firewall rules for the VPC this instance is in. // The NIC info we gathered above doesn't have VPC information diff --git a/nexus/types/src/deployment.rs b/nexus/types/src/deployment.rs index cb7cc29ffc..ba8477c125 100644 --- a/nexus/types/src/deployment.rs +++ b/nexus/types/src/deployment.rs @@ -45,6 +45,7 @@ mod zone_type; pub use planning_input::DiskFilter; pub use planning_input::OmicronZoneExternalIp; +pub use planning_input::OmicronZoneExternalIpKind; pub use planning_input::OmicronZoneNic; pub use planning_input::PlanningInput; pub use planning_input::PlanningInputBuildError; diff --git a/nexus/types/src/deployment/planning_input.rs b/nexus/types/src/deployment/planning_input.rs index 9c0714ffab..2503ff81f3 100644 --- a/nexus/types/src/deployment/planning_input.rs +++ b/nexus/types/src/deployment/planning_input.rs @@ -17,6 +17,8 @@ use omicron_common::address::Ipv6Subnet; use omicron_common::address::SLED_PREFIX; use omicron_common::api::external::Generation; use omicron_common::api::external::MacAddr; +use omicron_common::api::internal::shared::SourceNatConfig; +use omicron_common::api::internal::shared::SourceNatConfigError; use omicron_common::disk::DiskIdentity; use omicron_uuid_kinds::ExternalIpUuid; use omicron_uuid_kinds::OmicronZoneUuid; @@ -97,15 +99,33 @@ impl SledResources { } } +/// External IP variants possible for Omicron-managed zones. +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] +pub enum OmicronZoneExternalIpKind { + Floating(IpAddr), + Snat(SourceNatConfig), + // We should probably have `Ephemeral(IpAddr)` too (for Nexus), but + // currently we record Nexus as Floating. +} + +impl OmicronZoneExternalIpKind { + pub fn ip(&self) -> IpAddr { + match self { + OmicronZoneExternalIpKind::Floating(ip) => *ip, + OmicronZoneExternalIpKind::Snat(snat) => snat.ip, + } + } +} + /// External IP allocated to an Omicron-managed zone. /// /// This is a slimmer `nexus_db_model::ExternalIp` that only stores the fields /// necessary for blueprint planning, and requires that the zone have a single /// IP. -#[derive(Debug, Clone, Serialize, Deserialize)] +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] pub struct OmicronZoneExternalIp { pub id: ExternalIpUuid, - pub ip: IpAddr, + pub kind: OmicronZoneExternalIpKind, } /// Network interface allocated to an Omicron-managed zone. @@ -116,7 +136,7 @@ pub struct OmicronZoneExternalIp { pub struct OmicronZoneNic { pub id: Uuid, pub mac: MacAddr, - pub ip: IpNetwork, + pub ip: IpAddr, pub slot: u8, pub primary: bool, } @@ -404,13 +424,21 @@ impl PlanningInput { pub enum PlanningInputBuildError { #[error("duplicate sled ID: {0}")] DuplicateSledId(SledUuid), - #[error("Omicron zone {zone_id} has a range of IPs ({ip:?}), only a single IP is supported")] + #[error("Omicron zone {zone_id} has a range of IPs ({ip}); only a single IP is supported")] NotSingleIp { zone_id: OmicronZoneUuid, ip: IpNetwork }, #[error("Omicron zone {zone_id} already has an external IP ({ip:?})")] DuplicateOmicronZoneExternalIp { zone_id: OmicronZoneUuid, ip: OmicronZoneExternalIp, }, + #[error("Omicron zone {0} has an ephemeral IP (unsupported)")] + EphemeralIpUnsupported(OmicronZoneUuid), + #[error("Omicron zone {zone_id} has a bad SNAT config")] + BadSnatConfig { + zone_id: OmicronZoneUuid, + #[source] + err: SourceNatConfigError, + }, #[error("Omicron zone {zone_id} already has a NIC ({nic:?})")] DuplicateOmicronZoneNic { zone_id: OmicronZoneUuid, nic: OmicronZoneNic }, } @@ -474,12 +502,19 @@ impl PlanningInputBuilder { /// Like `add_omicron_zone_external_ip`, but can accept an [`IpNetwork`], /// validating that the IP is a single address. - pub fn add_omicron_zone_external_ip_network( + pub fn add_omicron_zone_external_ip_network( &mut self, zone_id: OmicronZoneUuid, ip_id: ExternalIpUuid, ip: IpNetwork, - ) -> Result<(), PlanningInputBuildError> { + to_kind: F, + ) -> Result<(), PlanningInputBuildError> + where + F: FnOnce( + IpAddr, + ) + -> Result, + { let size = match ip.size() { NetworkSize::V4(n) => u128::from(n), NetworkSize::V6(n) => n, @@ -487,10 +522,11 @@ impl PlanningInputBuilder { if size != 1 { return Err(PlanningInputBuildError::NotSingleIp { zone_id, ip }); } + let kind = to_kind(ip.ip())?; self.add_omicron_zone_external_ip( zone_id, - OmicronZoneExternalIp { id: ip_id, ip: ip.ip() }, + OmicronZoneExternalIp { id: ip_id, kind }, ) } @@ -507,7 +543,7 @@ impl PlanningInputBuilder { Entry::Occupied(prev) => { Err(PlanningInputBuildError::DuplicateOmicronZoneExternalIp { zone_id, - ip: prev.get().clone(), + ip: *prev.get(), }) } } diff --git a/sled-agent/src/instance.rs b/sled-agent/src/instance.rs index d016715591..94ad8522c7 100644 --- a/sled-agent/src/instance.rs +++ b/sled-agent/src/instance.rs @@ -1753,11 +1753,12 @@ mod tests { hostname: Hostname::from_str("bert").unwrap(), }, nics: vec![], - source_nat: SourceNatConfig { - ip: IpAddr::V6(Ipv6Addr::UNSPECIFIED), - first_port: 0, - last_port: 0, - }, + source_nat: SourceNatConfig::new( + IpAddr::V6(Ipv6Addr::UNSPECIFIED), + 0, + 16383, + ) + .unwrap(), ephemeral_ip: None, floating_ips: vec![], firewall_rules: vec![], diff --git a/sled-agent/src/rack_setup/plan/service.rs b/sled-agent/src/rack_setup/plan/service.rs index d868448bed..6e3ce4a6ac 100644 --- a/sled-agent/src/rack_setup/plan/service.rs +++ b/sled-agent/src/rack_setup/plan/service.rs @@ -24,6 +24,7 @@ use omicron_common::address::{ use omicron_common::api::external::{Generation, MacAddr, Vni}; use omicron_common::api::internal::shared::{ NetworkInterface, NetworkInterfaceKind, SourceNatConfig, + SourceNatConfigError, }; use omicron_common::backoff::{ retry_notify_ext, retry_policy_internal_service_aggressive, BackoffError, @@ -1118,7 +1119,14 @@ impl ServicePortBuilder { self.next_snat_ip = None; } - let snat_cfg = SourceNatConfig { ip: snat_ip, first_port, last_port }; + let snat_cfg = + match SourceNatConfig::new(snat_ip, first_port, last_port) { + Ok(cfg) => cfg, + // We know our port pair is aligned, making this unreachable. + Err(err @ SourceNatConfigError::UnalignedPortPair { .. }) => { + unreachable!("{err}"); + } + }; let (ip, subnet) = match snat_ip { IpAddr::V4(_) => ( diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index 3584e8f139..1ddb3f9b0a 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -1274,7 +1274,10 @@ impl ServiceManager { // XXX: need to revisit iff. any services get more than one // address. let (target_ip, first_port, last_port) = match snat { - Some(s) => (s.ip, s.first_port, s.last_port), + Some(s) => { + let (first_port, last_port) = s.port_range_raw(); + (s.ip, first_port, last_port) + } None => (floating_ips[0], 0, u16::MAX), }; From 77fffe5e3492c471f9fd176d54e9cdce51c6e0ff Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Fri, 19 Apr 2024 10:18:13 -0400 Subject: [PATCH 170/334] Automatic bump of permslip manifest to sidecar-v1.0.15 (#5563) (not quite) Automated bump still working out some issues with newlines in generating the PR --- tools/permslip_staging | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/permslip_staging b/tools/permslip_staging index 7b4e5f161a..ae5c7890ed 100644 --- a/tools/permslip_staging +++ b/tools/permslip_staging @@ -1,4 +1,4 @@ b1b0d63a179652fcc80fabbb49307c0fe28cf52744f58f7b8a768f14d6721a3f manifest-gimlet-v1.0.15.toml 686f5fff41ed3b33ba0be38d2becdeb67847705fd590f05f6d8f7c600db87fb7 manifest-oxide-rot-1-v1.0.9.toml 7d26b9f719a7f2c22e091d7d80de66933c11bdb9ae174ae59552b376400d63db manifest-psc-v1.0.14.toml -cd8c1bb64990573b9d29dcc2312d9c8cb4b08bc59873196ac50ce2b506037594 manifest-sidecar-v1.0.14.toml +267c8953c26f91614a59015719162f6f8f55d31d795a458387191dd1d874f9f0 manifest-sidecar-v1.0.15.toml From b826b24a6ba1db99cffcef8a5a2d723e91d2009c Mon Sep 17 00:00:00 2001 From: Rain Date: Fri, 19 Apr 2024 13:04:14 -0700 Subject: [PATCH 171/334] [reconfigurator-planning] track more state while building zones (#5555) In #5493 we'd like to track invariants like: a zone should not be added and expunged in the same blueprint. In order to do that, we need to track this state. (There are probably other ways to do it, but this is the most explicit method and I really like that.) This lives in a submodule because I don't want the rest of the blueprint builder to reach into the internals here. I split this from #5493 because it became somewhat complex in its own right, with its own tests. --- .../planning/src/blueprint_builder.rs | 387 +++++++++++++++--- 1 file changed, 336 insertions(+), 51 deletions(-) diff --git a/nexus/reconfigurator/planning/src/blueprint_builder.rs b/nexus/reconfigurator/planning/src/blueprint_builder.rs index f024652332..abac687020 100644 --- a/nexus/reconfigurator/planning/src/blueprint_builder.rs +++ b/nexus/reconfigurator/planning/src/blueprint_builder.rs @@ -519,7 +519,7 @@ impl<'a> BlueprintBuilder<'a> { let has_ntp = self .zones .current_sled_zones(sled_id) - .any(|z| z.zone_type.is_ntp()); + .any(|(z, _)| z.zone_type.is_ntp()); if has_ntp { return Ok(Ensure::NotNeeded); } @@ -583,7 +583,7 @@ impl<'a> BlueprintBuilder<'a> { // If this sled already has a Crucible zone on this pool, do nothing. let has_crucible_on_this_pool = - self.zones.current_sled_zones(sled_id).any(|z| { + self.zones.current_sled_zones(sled_id).any(|(z, _)| { matches!( &z.zone_type, BlueprintZoneType::Crucible(blueprint_zone_type::Crucible { @@ -634,7 +634,7 @@ impl<'a> BlueprintBuilder<'a> { pub fn sled_num_nexus_zones(&self, sled_id: SledUuid) -> usize { self.zones .current_sled_zones(sled_id) - .filter(|z| z.zone_type.is_nexus()) + .filter(|(z, _)| z.zone_type.is_nexus()) .count() } @@ -770,15 +770,8 @@ impl<'a> BlueprintBuilder<'a> { let _ = self.sled_resources(sled_id)?; let sled_zones = self.zones.change_sled_zones(sled_id); - // A sled should have a small number (< 20) of zones so a linear search - // should be very fast. - if sled_zones.zones.iter().any(|z| z.id == zone.id) { - return Err(Error::Planner(anyhow!( - "attempted to add zone that already exists: {}", - zone.id - ))); - } - sled_zones.zones.push(zone); + sled_zones.add_zone(zone)?; + Ok(()) } @@ -811,7 +804,7 @@ impl<'a> BlueprintBuilder<'a> { // Record each of the sled's zones' underlay addresses as // allocated. - for z in self.zones.current_sled_zones(sled_id) { + for (z, _) in self.zones.current_sled_zones(sled_id) { allocator.reserve(z.underlay_address); } @@ -878,7 +871,7 @@ impl BlueprintBuilderRng { /// that we've changed and a _reference_ to the parent blueprint's zones. This /// struct makes it easy for callers iterate over the right set of zones. struct BlueprintZonesBuilder<'a> { - changed_zones: BTreeMap, + changed_zones: BTreeMap, // Temporarily make a clone of the parent blueprint's zones so we can use // typed UUIDs everywhere. Once we're done migrating, this `Cow` can be // removed. @@ -900,37 +893,33 @@ impl<'a> BlueprintZonesBuilder<'a> { pub fn change_sled_zones( &mut self, sled_id: SledUuid, - ) -> &mut BlueprintZonesConfig { + ) -> &mut BuilderZonesConfig { self.changed_zones.entry(sled_id).or_insert_with(|| { if let Some(old_sled_zones) = self.parent_zones.get(&sled_id) { - BlueprintZonesConfig { - generation: old_sled_zones.generation.next(), - zones: old_sled_zones.zones.clone(), - } + BuilderZonesConfig::from_parent(old_sled_zones) } else { - // The first generation is reserved to mean the one containing - // no zones. See OmicronZonesConfig::INITIAL_GENERATION. So - // we start with the next one. - BlueprintZonesConfig { - generation: Generation::new().next(), - zones: vec![], - } + BuilderZonesConfig::new() } }) } /// Iterates over the list of Omicron zones currently configured for this - /// sled in the blueprint that's being built + /// sled in the blueprint that's being built, along with each zone's state + /// in the builder. pub fn current_sled_zones( &self, sled_id: SledUuid, - ) -> Box + '_> { - if let Some(sled_zones) = self - .changed_zones - .get(&sled_id) - .or_else(|| self.parent_zones.get(&sled_id)) - { - Box::new(sled_zones.zones.iter()) + ) -> Box + '_> + { + if let Some(sled_zones) = self.changed_zones.get(&sled_id) { + Box::new(sled_zones.iter_zones().map(|z| (z.zone(), z.state()))) + } else if let Some(parent_zones) = self.parent_zones.get(&sled_id) { + Box::new( + parent_zones + .zones + .iter() + .map(|z| (z, BuilderZoneState::Unchanged)), + ) } else { Box::new(std::iter::empty()) } @@ -945,29 +934,144 @@ impl<'a> BlueprintZonesBuilder<'a> { .map(|sled_id| { // Start with self.changed_zones, which contains entries for any // sled whose zones config is changing in this blueprint. - let mut zones = self - .changed_zones - .remove(&sled_id) - // If it's not there, use the config from the parent - // blueprint. - .or_else(|| self.parent_zones.get(&sled_id).cloned()) - // If it's not there either, then this must be a new sled - // and we haven't added any zones to it yet. Use the + if let Some(zones) = self.changed_zones.remove(&sled_id) { + (sled_id.into_untyped_uuid(), zones.build()) + } + // Next, check self.parent_zones, to represent an unchanged sled. + else if let Some(parent_zones) = + self.parent_zones.get(&sled_id) + { + (sled_id.into_untyped_uuid(), parent_zones.clone()) + } else { + // If the sled is not in self.parent_zones, then it must be a + // new sled and we haven't added any zones to it yet. Use the // standard initial config. - .unwrap_or_else(|| BlueprintZonesConfig { - generation: Generation::new(), - zones: vec![], - }); - - zones.sort(); - - // TODO-cleanup use `TypedUuid` everywhere - (sled_id.into_untyped_uuid(), zones) + ( + sled_id.into_untyped_uuid(), + BlueprintZonesConfig { + generation: Generation::new(), + zones: vec![], + }, + ) + } }) .collect() } } +// This is a sub-module to hide implementation details from the rest of +// blueprint_builder. +mod builder_zones { + use super::*; + + #[derive(Debug)] + #[must_use] + pub(crate) struct BuilderZonesConfig { + // The current generation -- this is bumped at blueprint build time and is + // otherwise not exposed to callers. + generation: Generation, + + // The list of zones, along with their state. + zones: Vec, + } + + impl BuilderZonesConfig { + pub(super) fn new() -> Self { + Self { + // Note that the first generation is reserved to mean the one + // containing no zones. See + // OmicronZonesConfig::INITIAL_GENERATION. + // + // Since we're currently assuming that creating a new + // `BuilderZonesConfig` means that we're going to add new zones + // shortly, we start with Generation::new() here. It'll get + // bumped up to the next one in `Self::build`. + generation: Generation::new(), + zones: vec![], + } + } + + pub(super) fn from_parent(parent: &BlueprintZonesConfig) -> Self { + Self { + // We'll bump this up at build time. + generation: parent.generation, + + zones: parent + .zones + .iter() + .map(|zone| BuilderZoneConfig { + zone: zone.clone(), + state: BuilderZoneState::Unchanged, + }) + .collect(), + } + } + + pub(super) fn add_zone( + &mut self, + zone: BlueprintZoneConfig, + ) -> Result<(), Error> { + if self.zones.iter().any(|z| z.zone.id == zone.id) { + return Err(Error::Planner(anyhow!( + "attempted to add zone that already exists: {}", + zone.id + ))); + }; + + self.zones.push(BuilderZoneConfig { + zone, + state: BuilderZoneState::Added, + }); + Ok(()) + } + + pub(super) fn iter_zones( + &self, + ) -> impl Iterator { + self.zones.iter() + } + + pub(super) fn build(self) -> BlueprintZonesConfig { + let mut ret = BlueprintZonesConfig { + // Something we could do here is to check if any zones have + // actually been modified, and if not, return the parent's + // generation. For now, we depend on callers to only call + // `BlueprintZonesBuilder::change_sled_zones` when they really + // mean it. + generation: self.generation.next(), + zones: self.zones.into_iter().map(|z| z.zone).collect(), + }; + ret.sort(); + ret + } + } + + #[derive(Debug)] + pub(crate) struct BuilderZoneConfig { + zone: BlueprintZoneConfig, + state: BuilderZoneState, + } + + impl BuilderZoneConfig { + pub(super) fn zone(&self) -> &BlueprintZoneConfig { + &self.zone + } + + pub(super) fn state(&self) -> BuilderZoneState { + self.state + } + } + + #[derive(Copy, Clone, Debug, PartialEq, Eq)] + pub(crate) enum BuilderZoneState { + Unchanged, + // Currently unused: Modified + Added, + } +} + +use builder_zones::*; + /// Helper for working with sets of disks on each sled /// /// Tracking the set of disks is slightly non-trivial because we need to bump @@ -1069,8 +1173,13 @@ pub mod test { use crate::system::SledBuilder; use expectorate::assert_contents; use nexus_types::deployment::BlueprintZoneFilter; + use nexus_types::deployment::SledDetails; + use nexus_types::external_api::views::SledPolicy; + use nexus_types::external_api::views::SledState; use omicron_common::address::IpRange; + use omicron_common::address::Ipv6Subnet; use omicron_test_utils::dev::test_setup_log; + use omicron_uuid_kinds::OmicronZoneUuid; use sled_agent_client::types::OmicronZoneType; use std::collections::BTreeSet; @@ -1285,6 +1394,182 @@ pub mod test { logctx.cleanup_successful(); } + /// A test focusing on `BlueprintZonesBuilder` and its internal logic. + #[test] + fn test_builder_zones() { + static TEST_NAME: &str = "blueprint_test_builder_zones"; + let logctx = test_setup_log(TEST_NAME); + let mut example = + ExampleSystem::new(&logctx.log, TEST_NAME, DEFAULT_N_SLEDS); + let blueprint_initial = + BlueprintBuilder::build_initial_from_collection_seeded( + &example.collection, + Generation::new(), + Generation::new(), + example.input.all_sled_ids(SledFilter::All), + "the_test", + TEST_NAME, + ) + .expect("creating initial blueprint"); + + // Add a completely bare sled to the input. + let (new_sled_id, input2) = { + let mut input = example.input.clone().into_builder(); + let new_sled_id = example.sled_rng.next(); + input + .add_sled( + new_sled_id, + SledDetails { + policy: SledPolicy::provisionable(), + state: SledState::Active, + resources: SledResources { + subnet: Ipv6Subnet::new( + "fd00:1::".parse().unwrap(), + ), + zpools: BTreeMap::new(), + }, + }, + ) + .expect("adding new sled"); + + (new_sled_id, input.build()) + }; + + let mut builder = BlueprintBuilder::new_based_on( + &logctx.log, + &blueprint_initial, + &input2, + "the_test", + ) + .expect("creating blueprint builder"); + builder.set_rng_seed((TEST_NAME, "bp2")); + + // Test adding a new sled with an NTP zone. + assert_eq!( + builder.sled_ensure_zone_ntp(new_sled_id).unwrap(), + Ensure::Added + ); + + // Iterate over the zones for the sled and ensure that the NTP zone is + // present. + { + let mut zones = builder.zones.current_sled_zones(new_sled_id); + let (_, state) = zones.next().expect("exactly one zone for sled"); + assert!(zones.next().is_none(), "exactly one zone for sled"); + assert_eq!( + state, + BuilderZoneState::Added, + "NTP zone should have been added" + ); + } + + // Now, test adding a new zone (Oximeter, picked arbitrarily) to an + // existing sled. + let existing_sled_id = example + .input + .all_sled_ids(SledFilter::All) + .next() + .expect("at least one sled present"); + let change = builder.zones.change_sled_zones(existing_sled_id); + + let new_zone_id = OmicronZoneUuid::new_v4(); + change + .add_zone(BlueprintZoneConfig { + disposition: BlueprintZoneDisposition::InService, + id: new_zone_id, + underlay_address: Ipv6Addr::UNSPECIFIED, + zone_type: BlueprintZoneType::Oximeter( + blueprint_zone_type::Oximeter { + address: SocketAddrV6::new( + Ipv6Addr::UNSPECIFIED, + 0, + 0, + 0, + ), + }, + ), + }) + .expect("adding new zone"); + + { + // Iterate over the zones and ensure that the Oximeter zone is + // present, and marked added. + let mut zones = builder.zones.current_sled_zones(existing_sled_id); + zones + .find_map(|(z, state)| { + if z.id == new_zone_id { + assert_eq!( + state, + BuilderZoneState::Added, + "new zone ID {new_zone_id} should be marked added" + ); + Some(()) + } else { + None + } + }) + .expect("new zone ID should be present"); + } + + // Also call change_sled_zones without making any changes. This + // currently bumps the generation number, but in the future might + // become smarter. + let control_sled_id = example + .input + .all_sled_ids(SledFilter::All) + .nth(2) + .expect("at least 2 sleds present"); + _ = builder.zones.change_sled_zones(control_sled_id); + + // Now build the blueprint and ensure that all the changes we described + // above are present. + let blueprint = builder.build(); + verify_blueprint(&blueprint); + let diff = blueprint.diff_since_blueprint(&blueprint_initial).unwrap(); + println!("expecting new NTP and Oximeter zones:\n{}", diff.display()); + + // No sleds were removed. + assert_eq!(diff.sleds_removed().len(), 0); + + // One sled was added. + let sleds: Vec<_> = diff.sleds_added().collect(); + assert_eq!(sleds.len(), 1); + let (sled_id, new_sled_zones) = sleds[0]; + assert_eq!(sled_id, new_sled_id); + // The generation number should be newer than the initial default. + assert_eq!(new_sled_zones.generation, Generation::new().next()); + assert_eq!(new_sled_zones.zones.len(), 1); + + // Two sleds were modified: existing_sled_id and control_sled_id. + let sleds = diff.sleds_modified(); + assert_eq!(sleds.len(), 2, "2 sleds modified"); + for (sled_id, sled_modified) in sleds { + if sled_id == existing_sled_id { + assert_eq!( + sled_modified.generation_after, + sled_modified.generation_before.next() + ); + assert_eq!(sled_modified.zones_added().len(), 1); + let added_zone = sled_modified.zones_added().next().unwrap(); + assert_eq!(added_zone.id, new_zone_id); + } else { + assert_eq!(sled_id, control_sled_id); + + // The generation number is bumped, but nothing else. + assert_eq!( + sled_modified.generation_after, + sled_modified.generation_before.next(), + "control sled has generation number bumped" + ); + assert_eq!(sled_modified.zones_added().len(), 0); + assert_eq!(sled_modified.zones_removed().len(), 0); + assert_eq!(sled_modified.zones_modified().count(), 0); + } + } + + logctx.cleanup_successful(); + } + #[test] fn test_add_physical_disks() { static TEST_NAME: &str = "blueprint_builder_test_add_physical_disks"; From ae287d44cec22fabcac38c742a27a89075d7249e Mon Sep 17 00:00:00 2001 From: "Andrew J. Stone" Date: Fri, 19 Apr 2024 16:51:26 -0400 Subject: [PATCH 172/334] Fix swapping of rack_id and nexus id params (#5578) This let's add sled work again. I had broken it with a fix to another bug in https://github.com/oxidecomputer/omicron/pull/5526. --- nexus/src/app/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nexus/src/app/mod.rs b/nexus/src/app/mod.rs index 1bb42b20b2..1c7fadea05 100644 --- a/nexus/src/app/mod.rs +++ b/nexus/src/app/mod.rs @@ -378,8 +378,8 @@ impl Nexus { &background_ctx, Arc::clone(&db_datastore), &config.pkg.background_tasks, + rack_id, config.deployment.id, - config.deployment.rack_id, resolver.clone(), saga_request, ); From 8ad0840745519ba5daf256668c69876be8b3d130 Mon Sep 17 00:00:00 2001 From: Alan Hanson Date: Fri, 19 Apr 2024 14:05:12 -0700 Subject: [PATCH 173/334] Update Propolis and Crucible (#5579) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Propolis changes: Update h2 dependency Add NPT ops API definitions from illumos#15639 server: return better HTTP errors when not ensured (#649) Crucible changes: Make Region test suite generic across backends (#1263) Remove async from now-synchronous functions (#1264) Agent update to support cloning. (#1262) Remove the Active → Faulted transition (#1260) Avoid race condition in crutest rand-read/write (#1261) Add Active -> Offline -> Faulted tests (#1257) Reorganize dummy downstairs tests (#1253) Switch to unbounded queues (#1256) Add Upstairs session ID to dtrace stat probe, cleanup closure (#1254) Panic instead of returning errors in unit tests (#1251) Add a clone option to downstairs create (#1249) Co-authored-by: Alan Hanson --- Cargo.lock | 26 +++++++++++++------------- Cargo.toml | 12 ++++++------ nexus/src/app/sagas/common_storage.rs | 1 + package-manifest.toml | 12 ++++++------ sled-agent/src/sim/storage.rs | 2 ++ 5 files changed, 28 insertions(+), 25 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 07d94add14..501beecb7d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -482,9 +482,9 @@ dependencies = [ [[package]] name = "bhyve_api" version = "0.0.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=8ff3ab62246fa1f8b8a5bfab0a7b8e1000926361#8ff3ab62246fa1f8b8a5bfab0a7b8e1000926361" +source = "git+https://github.com/oxidecomputer/propolis?rev=dd788a311a382b09ce1d3e35f7777b378e09fdf7#dd788a311a382b09ce1d3e35f7777b378e09fdf7" dependencies = [ - "bhyve_api_sys 0.0.0 (git+https://github.com/oxidecomputer/propolis?rev=8ff3ab62246fa1f8b8a5bfab0a7b8e1000926361)", + "bhyve_api_sys 0.0.0 (git+https://github.com/oxidecomputer/propolis?rev=dd788a311a382b09ce1d3e35f7777b378e09fdf7)", "libc", "strum", ] @@ -501,7 +501,7 @@ dependencies = [ [[package]] name = "bhyve_api_sys" version = "0.0.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=8ff3ab62246fa1f8b8a5bfab0a7b8e1000926361#8ff3ab62246fa1f8b8a5bfab0a7b8e1000926361" +source = "git+https://github.com/oxidecomputer/propolis?rev=dd788a311a382b09ce1d3e35f7777b378e09fdf7#dd788a311a382b09ce1d3e35f7777b378e09fdf7" dependencies = [ "libc", "strum", @@ -1415,7 +1415,7 @@ dependencies = [ [[package]] name = "crucible-agent-client" version = "0.0.1" -source = "git+https://github.com/oxidecomputer/crucible?rev=5677c7be81b60d9ba9c30991d10376f279a1d3b7#5677c7be81b60d9ba9c30991d10376f279a1d3b7" +source = "git+https://github.com/oxidecomputer/crucible?rev=1ef72f3c935e7dc936bf43310c04668fb60d7a20#1ef72f3c935e7dc936bf43310c04668fb60d7a20" dependencies = [ "anyhow", "chrono", @@ -1431,7 +1431,7 @@ dependencies = [ [[package]] name = "crucible-pantry-client" version = "0.0.1" -source = "git+https://github.com/oxidecomputer/crucible?rev=5677c7be81b60d9ba9c30991d10376f279a1d3b7#5677c7be81b60d9ba9c30991d10376f279a1d3b7" +source = "git+https://github.com/oxidecomputer/crucible?rev=1ef72f3c935e7dc936bf43310c04668fb60d7a20#1ef72f3c935e7dc936bf43310c04668fb60d7a20" dependencies = [ "anyhow", "chrono", @@ -1448,7 +1448,7 @@ dependencies = [ [[package]] name = "crucible-smf" version = "0.0.0" -source = "git+https://github.com/oxidecomputer/crucible?rev=5677c7be81b60d9ba9c30991d10376f279a1d3b7#5677c7be81b60d9ba9c30991d10376f279a1d3b7" +source = "git+https://github.com/oxidecomputer/crucible?rev=1ef72f3c935e7dc936bf43310c04668fb60d7a20#1ef72f3c935e7dc936bf43310c04668fb60d7a20" dependencies = [ "crucible-workspace-hack", "libc", @@ -3499,7 +3499,7 @@ version = "0.1.0" dependencies = [ "anyhow", "async-trait", - "bhyve_api 0.0.0 (git+https://github.com/oxidecomputer/propolis?rev=8ff3ab62246fa1f8b8a5bfab0a7b8e1000926361)", + "bhyve_api 0.0.0 (git+https://github.com/oxidecomputer/propolis?rev=dd788a311a382b09ce1d3e35f7777b378e09fdf7)", "byteorder", "camino", "camino-tempfile", @@ -5436,7 +5436,7 @@ dependencies = [ "pq-sys", "pretty_assertions", "progenitor-client", - "propolis-client 0.1.0 (git+https://github.com/oxidecomputer/propolis?rev=8ff3ab62246fa1f8b8a5bfab0a7b8e1000926361)", + "propolis-client 0.1.0 (git+https://github.com/oxidecomputer/propolis?rev=dd788a311a382b09ce1d3e35f7777b378e09fdf7)", "rand 0.8.5", "rcgen", "ref-cast", @@ -5650,7 +5650,7 @@ dependencies = [ "oximeter-instruments", "oximeter-producer", "pretty_assertions", - "propolis-client 0.1.0 (git+https://github.com/oxidecomputer/propolis?rev=8ff3ab62246fa1f8b8a5bfab0a7b8e1000926361)", + "propolis-client 0.1.0 (git+https://github.com/oxidecomputer/propolis?rev=dd788a311a382b09ce1d3e35f7777b378e09fdf7)", "propolis-mock-server", "rand 0.8.5", "rcgen", @@ -7094,7 +7094,7 @@ dependencies = [ [[package]] name = "propolis-client" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=8ff3ab62246fa1f8b8a5bfab0a7b8e1000926361#8ff3ab62246fa1f8b8a5bfab0a7b8e1000926361" +source = "git+https://github.com/oxidecomputer/propolis?rev=dd788a311a382b09ce1d3e35f7777b378e09fdf7#dd788a311a382b09ce1d3e35f7777b378e09fdf7" dependencies = [ "async-trait", "base64 0.21.7", @@ -7115,7 +7115,7 @@ dependencies = [ [[package]] name = "propolis-mock-server" version = "0.0.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=8ff3ab62246fa1f8b8a5bfab0a7b8e1000926361#8ff3ab62246fa1f8b8a5bfab0a7b8e1000926361" +source = "git+https://github.com/oxidecomputer/propolis?rev=dd788a311a382b09ce1d3e35f7777b378e09fdf7#dd788a311a382b09ce1d3e35f7777b378e09fdf7" dependencies = [ "anyhow", "atty", @@ -7125,7 +7125,7 @@ dependencies = [ "futures", "hyper 0.14.28", "progenitor", - "propolis_types 0.0.0 (git+https://github.com/oxidecomputer/propolis?rev=8ff3ab62246fa1f8b8a5bfab0a7b8e1000926361)", + "propolis_types 0.0.0 (git+https://github.com/oxidecomputer/propolis?rev=dd788a311a382b09ce1d3e35f7777b378e09fdf7)", "rand 0.8.5", "reqwest", "schemars", @@ -7166,7 +7166,7 @@ dependencies = [ [[package]] name = "propolis_types" version = "0.0.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=8ff3ab62246fa1f8b8a5bfab0a7b8e1000926361#8ff3ab62246fa1f8b8a5bfab0a7b8e1000926361" +source = "git+https://github.com/oxidecomputer/propolis?rev=dd788a311a382b09ce1d3e35f7777b378e09fdf7#dd788a311a382b09ce1d3e35f7777b378e09fdf7" dependencies = [ "schemars", "serde", diff --git a/Cargo.toml b/Cargo.toml index a22d0a0827..4eb0161781 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -197,9 +197,9 @@ cookie = "0.18" criterion = { version = "0.5.1", features = [ "async_tokio" ] } crossbeam = "0.8" crossterm = { version = "0.27.0", features = ["event-stream"] } -crucible-agent-client = { git = "https://github.com/oxidecomputer/crucible", rev = "5677c7be81b60d9ba9c30991d10376f279a1d3b7" } -crucible-pantry-client = { git = "https://github.com/oxidecomputer/crucible", rev = "5677c7be81b60d9ba9c30991d10376f279a1d3b7" } -crucible-smf = { git = "https://github.com/oxidecomputer/crucible", rev = "5677c7be81b60d9ba9c30991d10376f279a1d3b7" } +crucible-agent-client = { git = "https://github.com/oxidecomputer/crucible", rev = "1ef72f3c935e7dc936bf43310c04668fb60d7a20" } +crucible-pantry-client = { git = "https://github.com/oxidecomputer/crucible", rev = "1ef72f3c935e7dc936bf43310c04668fb60d7a20" } +crucible-smf = { git = "https://github.com/oxidecomputer/crucible", rev = "1ef72f3c935e7dc936bf43310c04668fb60d7a20" } csv = "1.3.0" curve25519-dalek = "4" datatest-stable = "0.2.6" @@ -339,9 +339,9 @@ prettyplease = { version = "0.2.19", features = ["verbatim"] } proc-macro2 = "1.0" progenitor = { git = "https://github.com/oxidecomputer/progenitor", branch = "main" } progenitor-client = { git = "https://github.com/oxidecomputer/progenitor", branch = "main" } -bhyve_api = { git = "https://github.com/oxidecomputer/propolis", rev = "8ff3ab62246fa1f8b8a5bfab0a7b8e1000926361" } -propolis-client = { git = "https://github.com/oxidecomputer/propolis", rev = "8ff3ab62246fa1f8b8a5bfab0a7b8e1000926361" } -propolis-mock-server = { git = "https://github.com/oxidecomputer/propolis", rev = "8ff3ab62246fa1f8b8a5bfab0a7b8e1000926361" } +bhyve_api = { git = "https://github.com/oxidecomputer/propolis", rev = "dd788a311a382b09ce1d3e35f7777b378e09fdf7" } +propolis-client = { git = "https://github.com/oxidecomputer/propolis", rev = "dd788a311a382b09ce1d3e35f7777b378e09fdf7" } +propolis-mock-server = { git = "https://github.com/oxidecomputer/propolis", rev = "dd788a311a382b09ce1d3e35f7777b378e09fdf7" } proptest = "1.4.0" quote = "1.0" rand = "0.8.5" diff --git a/nexus/src/app/sagas/common_storage.rs b/nexus/src/app/sagas/common_storage.rs index bf530ef858..0fe14f6d2a 100644 --- a/nexus/src/app/sagas/common_storage.rs +++ b/nexus/src/app/sagas/common_storage.rs @@ -49,6 +49,7 @@ pub(crate) async fn ensure_region_in_dataset( cert_pem: None, key_pem: None, root_pem: None, + source: None, }; let create_region = || async { diff --git a/package-manifest.toml b/package-manifest.toml index 2819010335..7fed672271 100644 --- a/package-manifest.toml +++ b/package-manifest.toml @@ -492,10 +492,10 @@ only_for_targets.image = "standard" # 3. Use source.type = "manual" instead of "prebuilt" source.type = "prebuilt" source.repo = "crucible" -source.commit = "5677c7be81b60d9ba9c30991d10376f279a1d3b7" +source.commit = "1ef72f3c935e7dc936bf43310c04668fb60d7a20" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/crucible/image//crucible.sha256.txt -source.sha256 = "5341c5572f80b8d1763f6563412dc03d9604d8c7af4022fc5da55338ee60d35c" +source.sha256 = "f4b9189d82729f851bab25ee7991134db2732f82657a15e88889500ed8a6e6c2" output.type = "zone" output.intermediate_only = true @@ -504,10 +504,10 @@ service_name = "crucible_pantry_prebuilt" only_for_targets.image = "standard" source.type = "prebuilt" source.repo = "crucible" -source.commit = "5677c7be81b60d9ba9c30991d10376f279a1d3b7" +source.commit = "1ef72f3c935e7dc936bf43310c04668fb60d7a20" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/crucible/image//crucible-pantry.sha256.txt -source.sha256 = "bf281bae1331279109dac23328ff86756331d7776e69396b02c77a4d08a225c7" +source.sha256 = "e7bf9cf165c3191c899c1f019df4edb6a34c0fe83d61cce861ae0aefc649882d" output.type = "zone" output.intermediate_only = true @@ -519,10 +519,10 @@ service_name = "propolis-server" only_for_targets.image = "standard" source.type = "prebuilt" source.repo = "propolis" -source.commit = "8ff3ab62246fa1f8b8a5bfab0a7b8e1000926361" +source.commit = "dd788a311a382b09ce1d3e35f7777b378e09fdf7" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/propolis/image//propolis-server.sha256.txt -source.sha256 = "35c5956b14d3b0a843351ce8ea7e8cb52e631a96a89041810fe0f91cc4072638" +source.sha256 = "f9ebee502fdaa115563ac84e855805c0bf5582437820445dd1734423216dfc5b" output.type = "zone" [package.mg-ddm-gz] diff --git a/sled-agent/src/sim/storage.rs b/sled-agent/src/sim/storage.rs index b21edf0915..6a688f6101 100644 --- a/sled-agent/src/sim/storage.rs +++ b/sled-agent/src/sim/storage.rs @@ -97,6 +97,8 @@ impl CrucibleDataInner { cert_pem: None, key_pem: None, root_pem: None, + source: None, + read_only: false, }; let old = self.regions.insert(id, region.clone()); From 090d26959bcab3fb55f965531a602294e99070be Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Fri, 19 Apr 2024 21:34:20 +0000 Subject: [PATCH 174/334] chore(deps): update rust crate rustls to v0.22.4 [security] (#5582) --- Cargo.lock | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 501beecb7d..7cea45f732 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2089,7 +2089,7 @@ dependencies = [ "paste", "percent-encoding", "proc-macro2", - "rustls 0.22.2", + "rustls 0.22.4", "rustls-pemfile 2.1.2", "schemars", "serde", @@ -3358,7 +3358,7 @@ dependencies = [ "hyper 1.1.0", "hyper-util", "log", - "rustls 0.22.2", + "rustls 0.22.4", "rustls-native-certs", "rustls-pki-types", "tokio", @@ -4624,7 +4624,7 @@ dependencies = [ "rcgen", "ref-cast", "regex", - "rustls 0.22.2", + "rustls 0.22.4", "samael", "schemars", "semver 1.0.22", @@ -5443,7 +5443,7 @@ dependencies = [ "regex", "reqwest", "ring 0.17.8", - "rustls 0.22.2", + "rustls 0.22.4", "rustls-pemfile 2.1.2", "samael", "schemars", @@ -5713,7 +5713,7 @@ dependencies = [ "regex", "reqwest", "ring 0.17.8", - "rustls 0.22.2", + "rustls 0.22.4", "slog", "subprocess", "tar", @@ -7952,9 +7952,9 @@ dependencies = [ [[package]] name = "rustls" -version = "0.22.2" +version = "0.22.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e87c9956bd9807afa1f77e0f7594af32566e830e088a5576d27c5b6f30f49d41" +checksum = "bf4ef73721ac7bcd79b2b315da7779d8fc09718c6b3d2d1b2d94850eb8c18432" dependencies = [ "log", "ring 0.17.8", @@ -9801,7 +9801,7 @@ version = "0.25.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "775e0c0f0adb3a2f22a00c4745d728b479985fc15ee7ca6a2608388c5569860f" dependencies = [ - "rustls 0.22.2", + "rustls 0.22.4", "rustls-pki-types", "tokio", ] From 184e4cb05b0097b41ac471c78dd9e68efce84c70 Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Fri, 19 Apr 2024 18:21:12 -0400 Subject: [PATCH 175/334] Automatic bump of permslip manifest to psc-v1.0.15 (#5577) Automated bump --- tools/permslip_staging | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/permslip_staging b/tools/permslip_staging index ae5c7890ed..ff5c866a4b 100644 --- a/tools/permslip_staging +++ b/tools/permslip_staging @@ -1,4 +1,4 @@ b1b0d63a179652fcc80fabbb49307c0fe28cf52744f58f7b8a768f14d6721a3f manifest-gimlet-v1.0.15.toml 686f5fff41ed3b33ba0be38d2becdeb67847705fd590f05f6d8f7c600db87fb7 manifest-oxide-rot-1-v1.0.9.toml -7d26b9f719a7f2c22e091d7d80de66933c11bdb9ae174ae59552b376400d63db manifest-psc-v1.0.14.toml +8c7a57a733df2cbff4963bf32073066871aae26a7f9eca878490e8f125bd2688 manifest-psc-v1.0.15.toml 267c8953c26f91614a59015719162f6f8f55d31d795a458387191dd1d874f9f0 manifest-sidecar-v1.0.15.toml From 0db7d991bed1b944f2b83f14e994d7bf7fba3e37 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Fri, 19 Apr 2024 17:10:21 -0700 Subject: [PATCH 176/334] chore(deps): update rust crate diesel to 2.1.6 (#5570) --- Cargo.lock | 4 ++-- Cargo.toml | 2 +- workspace-hack/Cargo.toml | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 7cea45f732..36b65b62ff 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1805,9 +1805,9 @@ checksum = "a7993efb860416547839c115490d4951c6d0f8ec04a3594d9dd99d50ed7ec170" [[package]] name = "diesel" -version = "2.1.5" +version = "2.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03fc05c17098f21b89bc7d98fe1dd3cce2c11c2ad8e145f2a44fe08ed28eb559" +checksum = "ff236accb9a5069572099f0b350a92e9560e8e63a9b8d546162f4a5e03026bb2" dependencies = [ "bitflags 2.4.2", "byteorder", diff --git a/Cargo.toml b/Cargo.toml index 4eb0161781..fa1f548b56 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -209,7 +209,7 @@ db-macros = { path = "nexus/db-macros" } debug-ignore = "1.0.5" derive_more = "0.99.17" derive-where = "1.2.7" -diesel = { version = "2.1.5", features = ["postgres", "r2d2", "chrono", "serde_json", "network-address", "uuid"] } +diesel = { version = "2.1.6", features = ["postgres", "r2d2", "chrono", "serde_json", "network-address", "uuid"] } diesel-dtrace = { git = "https://github.com/oxidecomputer/diesel-dtrace", branch = "main" } dns-server = { path = "dns-server" } dns-service-client = { path = "clients/dns-service-client" } diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index 654d92869d..c6e17df884 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -38,7 +38,7 @@ crossbeam-utils = { version = "0.8.19" } crossterm = { version = "0.27.0", features = ["event-stream", "serde"] } crypto-common = { version = "0.1.6", default-features = false, features = ["getrandom", "std"] } der = { version = "0.7.8", default-features = false, features = ["derive", "flagset", "oid", "pem", "std"] } -diesel = { version = "2.1.5", features = ["chrono", "i-implement-a-third-party-backend-and-opt-into-breaking-changes", "network-address", "postgres", "r2d2", "serde_json", "uuid"] } +diesel = { version = "2.1.6", features = ["chrono", "i-implement-a-third-party-backend-and-opt-into-breaking-changes", "network-address", "postgres", "r2d2", "serde_json", "uuid"] } digest = { version = "0.10.7", features = ["mac", "oid", "std"] } either = { version = "1.11.0" } elliptic-curve = { version = "0.13.8", features = ["ecdh", "hazmat", "pem", "std"] } @@ -145,7 +145,7 @@ crossbeam-utils = { version = "0.8.19" } crossterm = { version = "0.27.0", features = ["event-stream", "serde"] } crypto-common = { version = "0.1.6", default-features = false, features = ["getrandom", "std"] } der = { version = "0.7.8", default-features = false, features = ["derive", "flagset", "oid", "pem", "std"] } -diesel = { version = "2.1.5", features = ["chrono", "i-implement-a-third-party-backend-and-opt-into-breaking-changes", "network-address", "postgres", "r2d2", "serde_json", "uuid"] } +diesel = { version = "2.1.6", features = ["chrono", "i-implement-a-third-party-backend-and-opt-into-breaking-changes", "network-address", "postgres", "r2d2", "serde_json", "uuid"] } digest = { version = "0.10.7", features = ["mac", "oid", "std"] } either = { version = "1.11.0" } elliptic-curve = { version = "0.13.8", features = ["ecdh", "hazmat", "pem", "std"] } From 24bc7e8a87b83aa7068e2cc200a7d4b148015681 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Sat, 20 Apr 2024 04:39:59 +0000 Subject: [PATCH 177/334] chore(deps): update taiki-e/install-action digest to 93eca7e (#5587) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR contains the following updates: | Package | Type | Update | Change | |---|---|---|---| | [taiki-e/install-action](https://togithub.com/taiki-e/install-action) | action | digest | [`4820827` -> `93eca7e`](https://togithub.com/taiki-e/install-action/compare/4820827...93eca7e) | --- ### Configuration 📅 **Schedule**: Branch creation - "after 8pm,before 6am" in timezone America/Los_Angeles, Automerge - "after 8pm,before 6am" in timezone America/Los_Angeles. 🚦 **Automerge**: Enabled. ♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. 🔕 **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box --- This PR has been generated by [Renovate Bot](https://togithub.com/renovatebot/renovate). Co-authored-by: oxide-renovate[bot] <146848827+oxide-renovate[bot]@users.noreply.github.com> --- .github/workflows/hakari.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/hakari.yml b/.github/workflows/hakari.yml index 6e8b323e1f..d3bb8daa24 100644 --- a/.github/workflows/hakari.yml +++ b/.github/workflows/hakari.yml @@ -24,7 +24,7 @@ jobs: with: toolchain: stable - name: Install cargo-hakari - uses: taiki-e/install-action@4820827bd312afaf667a328f1d0fe0fb4f6751b1 # v2 + uses: taiki-e/install-action@93eca7e3866e3af0ad7ae0a6f85da14894612ca8 # v2 with: tool: cargo-hakari - name: Check workspace-hack Cargo.toml is up-to-date From ffd72fde0e5dde36bd142811d1c5b494e3b2707e Mon Sep 17 00:00:00 2001 From: Rain Date: Fri, 19 Apr 2024 23:19:08 -0700 Subject: [PATCH 178/334] [reconfigurator] turn expunged sleds into expunged zones (#5493) It actually looks like this bit falls out nicely. Turns out we were already disregarding Nexus zones on expunged sleds, so more of it works than I thought it did! --- Cargo.lock | 4 + dev-tools/omdb/tests/successes.out | 10 +- nexus/reconfigurator/planning/Cargo.toml | 4 + .../builder.rs} | 536 +++++++----------- .../planning/src/blueprint_builder/mod.rs | 10 + .../planning/src/blueprint_builder/zones.rs | 438 ++++++++++++++ nexus/reconfigurator/planning/src/planner.rs | 136 ++++- .../output/blueprint_builder_initial_diff.txt | 6 +- .../output/planner_basic_add_sled_2_3.txt | 8 +- .../output/planner_basic_add_sled_3_5.txt | 8 +- .../output/planner_nonprovisionable_1_2.txt | 214 ++++--- .../output/planner_nonprovisionable_2_2a.txt | 58 +- .../output/planner_nonprovisionable_bp2.txt | 68 +-- nexus/types/src/deployment.rs | 57 +- 14 files changed, 1038 insertions(+), 519 deletions(-) rename nexus/reconfigurator/planning/src/{blueprint_builder.rs => blueprint_builder/builder.rs} (83%) create mode 100644 nexus/reconfigurator/planning/src/blueprint_builder/mod.rs create mode 100644 nexus/reconfigurator/planning/src/blueprint_builder/zones.rs diff --git a/Cargo.lock b/Cargo.lock index 36b65b62ff..5e5783320f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4783,6 +4783,7 @@ version = "0.1.0" dependencies = [ "anyhow", "chrono", + "debug-ignore", "expectorate", "gateway-client", "illumos-utils", @@ -4790,6 +4791,7 @@ dependencies = [ "internal-dns", "ipnet", "ipnetwork", + "maplit", "nexus-config", "nexus-inventory", "nexus-types", @@ -4797,9 +4799,11 @@ dependencies = [ "omicron-test-utils", "omicron-uuid-kinds", "omicron-workspace-hack", + "proptest", "rand 0.8.5", "sled-agent-client", "slog", + "test-strategy", "thiserror", "typed-rng", "uuid 1.8.0", diff --git a/dev-tools/omdb/tests/successes.out b/dev-tools/omdb/tests/successes.out index f09a2715a9..17668d002f 100644 --- a/dev-tools/omdb/tests/successes.out +++ b/dev-tools/omdb/tests/successes.out @@ -457,10 +457,10 @@ parent: zone type zone ID disposition underlay IP ----------------------------------------------------------------------------------------- - sled .....................: zones at generation 2 + sled .....................: blueprint zones at generation 2 (no zones) - sled .....................: zones at generation 2 + sled .....................: blueprint zones at generation 2 clickhouse ..................... in service ::1 cockroach_db ..................... in service ::1 crucible_pantry ..................... in service ::1 @@ -490,10 +490,10 @@ parent: zone type zone ID disposition underlay IP ----------------------------------------------------------------------------------------- - sled .....................: zones at generation 2 + sled .....................: blueprint zones at generation 2 (no zones) - sled .....................: zones at generation 2 + sled .....................: blueprint zones at generation 2 clickhouse ..................... in service ::1 cockroach_db ..................... in service ::1 crucible_pantry ..................... in service ::1 @@ -525,7 +525,7 @@ to: blueprint ............. UNCHANGED SLEDS: - sled .....................: zones at generation 2 + sled .....................: blueprint zones at generation 2 clickhouse ..................... in service ::1 cockroach_db ..................... in service ::1 crucible_pantry ..................... in service ::1 diff --git a/nexus/reconfigurator/planning/Cargo.toml b/nexus/reconfigurator/planning/Cargo.toml index 06d1c460ca..9c1d462a3b 100644 --- a/nexus/reconfigurator/planning/Cargo.toml +++ b/nexus/reconfigurator/planning/Cargo.toml @@ -6,6 +6,7 @@ edition = "2021" [dependencies] anyhow.workspace = true chrono.workspace = true +debug-ignore.workspace = true gateway-client.workspace = true illumos-utils.workspace = true indexmap.workspace = true @@ -28,4 +29,7 @@ omicron-workspace-hack.workspace = true [dev-dependencies] expectorate.workspace = true +maplit.workspace = true omicron-test-utils.workspace = true +proptest.workspace = true +test-strategy.workspace = true diff --git a/nexus/reconfigurator/planning/src/blueprint_builder.rs b/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs similarity index 83% rename from nexus/reconfigurator/planning/src/blueprint_builder.rs rename to nexus/reconfigurator/planning/src/blueprint_builder/builder.rs index abac687020..e1621a11c8 100644 --- a/nexus/reconfigurator/planning/src/blueprint_builder.rs +++ b/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs @@ -5,8 +5,10 @@ //! Low-level facility for generating Blueprints use crate::ip_allocator::IpAllocator; +use crate::planner::ZoneExpungeReason; use anyhow::anyhow; use anyhow::bail; +use debug_ignore::DebugIgnore; use internal_dns::config::Host; use internal_dns::config::Zone; use ipnet::IpAdd; @@ -45,15 +47,20 @@ use omicron_common::api::internal::shared::NetworkInterface; use omicron_common::api::internal::shared::NetworkInterfaceKind; use omicron_uuid_kinds::GenericUuid; use omicron_uuid_kinds::OmicronZoneKind; +use omicron_uuid_kinds::OmicronZoneUuid; use omicron_uuid_kinds::PhysicalDiskUuid; use omicron_uuid_kinds::SledUuid; use omicron_uuid_kinds::ZpoolUuid; use rand::rngs::StdRng; use rand::SeedableRng; +use slog::debug; +use slog::error; +use slog::info; use slog::o; use slog::Logger; use std::borrow::Cow; use std::collections::BTreeMap; +use std::collections::BTreeSet; use std::collections::HashSet; use std::hash::Hash; use std::net::IpAddr; @@ -66,6 +73,10 @@ use typed_rng::TypedUuidRng; use typed_rng::UuidRng; use uuid::Uuid; +use super::zones::is_already_expunged; +use super::zones::BuilderZoneState; +use super::zones::BuilderZonesConfig; + /// Errors encountered while assembling blueprints #[derive(Debug, Error)] pub enum Error { @@ -142,21 +153,21 @@ pub struct BlueprintBuilder<'a> { // These fields will become part of the final blueprint. See the // corresponding fields in `Blueprint`. - zones: BlueprintZonesBuilder<'a>, + pub(super) zones: BlueprintZonesBuilder<'a>, disks: BlueprintDisksBuilder<'a>, creator: String, comments: Vec, // These fields mirror how RSS chooses addresses for zone NICs. - nexus_v4_ips: Box + Send>, - nexus_v6_ips: Box + Send>, + nexus_v4_ips: AvailableIterator<'static, Ipv4Addr>, + nexus_v6_ips: AvailableIterator<'static, Ipv6Addr>, // Iterator of available external IPs for service zones - available_external_ips: Box + Send + 'a>, + available_external_ips: AvailableIterator<'a, IpAddr>, // Iterator of available MAC addresses in the system address range - available_system_macs: Box>, + available_system_macs: AvailableIterator<'a, MacAddr>, // Random number generator for new UUIDs rng: BlueprintBuilderRng, @@ -279,17 +290,17 @@ impl<'a> BlueprintBuilder<'a> { // need to allocate new resources to that zone. However, allocation at // this point is entirely optimistic and theoretical: our caller may // discard the blueprint we create without ever making it the new - // target, or it might be an arbitrarily long time before it becomes the - // target. We need to be able to make allocation decisions that we + // target, or it might be an arbitrarily long time before it becomes + // the target. We need to be able to make allocation decisions that we // expect the blueprint executor to be able to realize successfully if // and when we become the target, but we cannot _actually_ perform // resource allocation. // // To do this, we look at our parent blueprint's used resources, and - // then choose new resources that aren't already in use (if possible; if - // we need to allocate a new resource and the parent blueprint appears - // to be using all the resources of that kind, our blueprint generation - // will fail). + // then choose new resources that aren't already in use (if possible; + // if we need to allocate a new resource and the parent blueprint + // appears to be using all the resources of that kind, our blueprint + // generation will fail). // // For example, RSS assigns Nexus NIC IPs by stepping through a list of // addresses based on `NEXUS_OPTE_IPVx_SUBNET` (as in the iterators @@ -300,12 +311,19 @@ impl<'a> BlueprintBuilder<'a> { // Note that by building these iterators up front based on // `parent_blueprint`, we cannot reuse resources in a case where we // remove a zone that used a resource and then add another zone that - // wants the same kind of resource. We don't support zone removal yet, - // but expect this to be okay: we don't anticipate removal and addition - // to frequently be combined into the exact same blueprint, particularly - // in a way that expects the addition to reuse resources from the - // removal; we won't want to attempt to reuse resources from a zone - // until we know it's been fully removed. + // wants the same kind of resource. That is mostly okay, but there are + // some cases in which we may have to do that -- particularly external + // DNS zones, which tend to have a small number of fixed IPs. Solving + // that is a TODO. + // + // Also note that currently, we don't perform any kind of garbage + // collection on sleds and zones that no longer have any attached + // resources. Once a sled or zone is marked expunged, it will always + // stay in that state. + // https://github.com/oxidecomputer/omicron/issues/5552 tracks + // implementing this kind of garbage collection, and we should do it + // very soon. + let mut existing_nexus_v4_ips: HashSet = HashSet::new(); let mut existing_nexus_v6_ips: HashSet = HashSet::new(); let mut used_external_ips: HashSet = HashSet::new(); @@ -340,6 +358,7 @@ impl<'a> BlueprintBuilder<'a> { bail!("duplicate external IP: {external_ip}"); } } + if let Some(nic) = zone_type.opte_vnic() { if !used_macs.insert(nic.mac) { bail!("duplicate service vNIC MAC: {}", nic.mac); @@ -353,30 +372,26 @@ impl<'a> BlueprintBuilder<'a> { // of Nexus instances), but wouldn't be ideal if we have many resources // we need to skip. We could do something smarter here based on the sets // of used resources we built above if needed. - let nexus_v4_ips = Box::new( + let nexus_v4_ips = AvailableIterator::new( NEXUS_OPTE_IPV4_SUBNET .0 .iter() - .skip(NUM_INITIAL_RESERVED_IP_ADDRESSES) - .filter(move |ip| !existing_nexus_v4_ips.contains(ip)), + .skip(NUM_INITIAL_RESERVED_IP_ADDRESSES), + existing_nexus_v4_ips, ); - let nexus_v6_ips = Box::new( + let nexus_v6_ips = AvailableIterator::new( NEXUS_OPTE_IPV6_SUBNET .0 .iter() - .skip(NUM_INITIAL_RESERVED_IP_ADDRESSES) - .filter(move |ip| !existing_nexus_v6_ips.contains(ip)), - ); - let available_external_ips = Box::new( - input - .service_ip_pool_ranges() - .iter() - .flat_map(|r| r.iter()) - .filter(move |ip| !used_external_ips.contains(ip)), + .skip(NUM_INITIAL_RESERVED_IP_ADDRESSES), + existing_nexus_v6_ips, ); - let available_system_macs = Box::new( - MacAddr::iter_system().filter(move |mac| !used_macs.contains(mac)), + let available_external_ips = AvailableIterator::new( + input.service_ip_pool_ranges().iter().flat_map(|r| r.iter()), + used_external_ips, ); + let available_system_macs = + AvailableIterator::new(MacAddr::iter_system(), used_macs); Ok(BlueprintBuilder { log, @@ -436,6 +451,93 @@ impl<'a> BlueprintBuilder<'a> { self.comments.push(String::from(comment)); } + /// Expunges all zones from a sled. + /// + /// Returns a list of zone IDs expunged (excluding zones that were already + /// expunged). If the list is empty, then the operation was a no-op. + pub(crate) fn expunge_all_zones_for_sled( + &mut self, + sled_id: SledUuid, + reason: ZoneExpungeReason, + ) -> Result, Error> { + let log = self.log.new(o!( + "sled_id" => sled_id.to_string(), + )); + + // Do any zones need to be marked expunged? + let mut zones_to_expunge = BTreeSet::new(); + + let sled_zones = self.zones.current_sled_zones(sled_id); + for (z, state) in sled_zones { + let is_expunged = + is_already_expunged(z, state).map_err(|error| { + Error::Planner(anyhow!(error).context(format!( + "for sled {sled_id}, error computing zones to expunge" + ))) + })?; + + if !is_expunged { + zones_to_expunge.insert(z.id); + } + } + + if zones_to_expunge.is_empty() { + debug!( + log, + "sled has no zones that need expungement; skipping"; + ); + return Ok(zones_to_expunge); + } + + match reason { + ZoneExpungeReason::SledDecommissioned { policy } => { + // A sled marked as decommissioned should have no resources + // allocated to it. If it does, it's an illegal state, possibly + // introduced by a bug elsewhere in the system -- we need to + // produce a loud warning (i.e. an ERROR-level log message) on + // this, while still removing the zones. + error!( + &log, + "sled has state Decommissioned, yet has zones \ + allocated to it; will expunge them \ + (sled policy is \"{policy}\")" + ); + } + ZoneExpungeReason::SledExpunged => { + // This is the expected situation. + info!( + &log, + "expunged sled with {} non-expunged zones found \ + (will expunge all zones)", + zones_to_expunge.len() + ); + } + } + + // Now expunge all the zones that need it. + let change = self.zones.change_sled_zones(sled_id); + change.expunge_zones(zones_to_expunge.clone()).map_err(|error| { + anyhow!(error) + .context(format!("for sled {sled_id}, error expunging zones")) + })?; + + // Finally, add a comment describing what happened. + let reason = match reason { + ZoneExpungeReason::SledDecommissioned { .. } => { + "sled state is decommissioned" + } + ZoneExpungeReason::SledExpunged => "sled policy is expunged", + }; + + self.comment(format!( + "sled {} ({reason}): {} zones expunged", + sled_id, + zones_to_expunge.len(), + )); + + Ok(zones_to_expunge) + } + /// Ensures that the blueprint contains disks for a sled which already /// exists in the database. /// @@ -770,7 +872,10 @@ impl<'a> BlueprintBuilder<'a> { let _ = self.sled_resources(sled_id)?; let sled_zones = self.zones.change_sled_zones(sled_id); - sled_zones.add_zone(zone)?; + sled_zones.add_zone(zone).map_err(|error| { + anyhow!(error) + .context(format!("error adding zone to sled {sled_id}")) + })?; Ok(()) } @@ -820,13 +925,52 @@ impl<'a> BlueprintBuilder<'a> { ) -> Result<&SledResources, Error> { self.input.sled_resources(&sled_id).ok_or_else(|| { Error::Planner(anyhow!( - "attempted to use sled that is not in service: {}", + "attempted to use sled that is not currently known: {}", sled_id )) }) } } +/// Combines a base iterator with an `in_use` set, filtering out any elements +/// that are in the "in_use" set. +/// +/// This can be done with a chained `.filter` on the iterator, but +/// `AvailableIterator` also allows for inspection of the `in_use` set. +/// +/// Note that this is a stateful iterator -- i.e. it implements `Iterator`, not +/// `IntoIterator`. That's what we currently need in the planner. +#[derive(Debug)] +pub struct AvailableIterator<'a, T> { + base: DebugIgnore + Send + 'a>>, + in_use: HashSet, +} + +impl<'a, T: Hash + Eq> AvailableIterator<'a, T> { + /// Creates a new `AvailableIterator` from a base iterator and a set of + /// elements that are in use. + pub fn new(base: I, in_use: impl IntoIterator) -> Self + where + I: Iterator + Send + 'a, + { + let in_use = in_use.into_iter().collect(); + AvailableIterator { base: DebugIgnore(Box::new(base)), in_use } + } + + /// Returns the in-use set. + pub fn in_use(&self) -> &HashSet { + &self.in_use + } +} + +impl Iterator for AvailableIterator<'_, T> { + type Item = T; + + fn next(&mut self) -> Option { + self.base.find(|item| !self.in_use.contains(item)) + } +} + #[derive(Debug)] struct BlueprintBuilderRng { // Have separate RNGs for the different kinds of UUIDs we might add, @@ -870,7 +1014,7 @@ impl BlueprintBuilderRng { /// blueprint. We do this by keeping a copy of any [`BlueprintZonesConfig`] /// that we've changed and a _reference_ to the parent blueprint's zones. This /// struct makes it easy for callers iterate over the right set of zones. -struct BlueprintZonesBuilder<'a> { +pub(super) struct BlueprintZonesBuilder<'a> { changed_zones: BTreeMap, // Temporarily make a clone of the parent blueprint's zones so we can use // typed UUIDs everywhere. Once we're done migrating, this `Cow` can be @@ -959,119 +1103,6 @@ impl<'a> BlueprintZonesBuilder<'a> { } } -// This is a sub-module to hide implementation details from the rest of -// blueprint_builder. -mod builder_zones { - use super::*; - - #[derive(Debug)] - #[must_use] - pub(crate) struct BuilderZonesConfig { - // The current generation -- this is bumped at blueprint build time and is - // otherwise not exposed to callers. - generation: Generation, - - // The list of zones, along with their state. - zones: Vec, - } - - impl BuilderZonesConfig { - pub(super) fn new() -> Self { - Self { - // Note that the first generation is reserved to mean the one - // containing no zones. See - // OmicronZonesConfig::INITIAL_GENERATION. - // - // Since we're currently assuming that creating a new - // `BuilderZonesConfig` means that we're going to add new zones - // shortly, we start with Generation::new() here. It'll get - // bumped up to the next one in `Self::build`. - generation: Generation::new(), - zones: vec![], - } - } - - pub(super) fn from_parent(parent: &BlueprintZonesConfig) -> Self { - Self { - // We'll bump this up at build time. - generation: parent.generation, - - zones: parent - .zones - .iter() - .map(|zone| BuilderZoneConfig { - zone: zone.clone(), - state: BuilderZoneState::Unchanged, - }) - .collect(), - } - } - - pub(super) fn add_zone( - &mut self, - zone: BlueprintZoneConfig, - ) -> Result<(), Error> { - if self.zones.iter().any(|z| z.zone.id == zone.id) { - return Err(Error::Planner(anyhow!( - "attempted to add zone that already exists: {}", - zone.id - ))); - }; - - self.zones.push(BuilderZoneConfig { - zone, - state: BuilderZoneState::Added, - }); - Ok(()) - } - - pub(super) fn iter_zones( - &self, - ) -> impl Iterator { - self.zones.iter() - } - - pub(super) fn build(self) -> BlueprintZonesConfig { - let mut ret = BlueprintZonesConfig { - // Something we could do here is to check if any zones have - // actually been modified, and if not, return the parent's - // generation. For now, we depend on callers to only call - // `BlueprintZonesBuilder::change_sled_zones` when they really - // mean it. - generation: self.generation.next(), - zones: self.zones.into_iter().map(|z| z.zone).collect(), - }; - ret.sort(); - ret - } - } - - #[derive(Debug)] - pub(crate) struct BuilderZoneConfig { - zone: BlueprintZoneConfig, - state: BuilderZoneState, - } - - impl BuilderZoneConfig { - pub(super) fn zone(&self) -> &BlueprintZoneConfig { - &self.zone - } - - pub(super) fn state(&self) -> BuilderZoneState { - self.state - } - } - - #[derive(Copy, Clone, Debug, PartialEq, Eq)] - pub(crate) enum BuilderZoneState { - Unchanged, - // Currently unused: Modified - Added, - } -} - -use builder_zones::*; - /// Helper for working with sets of disks on each sled /// /// Tracking the set of disks is slightly non-trivial because we need to bump @@ -1173,15 +1204,11 @@ pub mod test { use crate::system::SledBuilder; use expectorate::assert_contents; use nexus_types::deployment::BlueprintZoneFilter; - use nexus_types::deployment::SledDetails; - use nexus_types::external_api::views::SledPolicy; - use nexus_types::external_api::views::SledState; use omicron_common::address::IpRange; - use omicron_common::address::Ipv6Subnet; use omicron_test_utils::dev::test_setup_log; - use omicron_uuid_kinds::OmicronZoneUuid; use sled_agent_client::types::OmicronZoneType; use std::collections::BTreeSet; + use test_strategy::proptest; pub const DEFAULT_N_SLEDS: usize = 3; @@ -1394,182 +1421,6 @@ pub mod test { logctx.cleanup_successful(); } - /// A test focusing on `BlueprintZonesBuilder` and its internal logic. - #[test] - fn test_builder_zones() { - static TEST_NAME: &str = "blueprint_test_builder_zones"; - let logctx = test_setup_log(TEST_NAME); - let mut example = - ExampleSystem::new(&logctx.log, TEST_NAME, DEFAULT_N_SLEDS); - let blueprint_initial = - BlueprintBuilder::build_initial_from_collection_seeded( - &example.collection, - Generation::new(), - Generation::new(), - example.input.all_sled_ids(SledFilter::All), - "the_test", - TEST_NAME, - ) - .expect("creating initial blueprint"); - - // Add a completely bare sled to the input. - let (new_sled_id, input2) = { - let mut input = example.input.clone().into_builder(); - let new_sled_id = example.sled_rng.next(); - input - .add_sled( - new_sled_id, - SledDetails { - policy: SledPolicy::provisionable(), - state: SledState::Active, - resources: SledResources { - subnet: Ipv6Subnet::new( - "fd00:1::".parse().unwrap(), - ), - zpools: BTreeMap::new(), - }, - }, - ) - .expect("adding new sled"); - - (new_sled_id, input.build()) - }; - - let mut builder = BlueprintBuilder::new_based_on( - &logctx.log, - &blueprint_initial, - &input2, - "the_test", - ) - .expect("creating blueprint builder"); - builder.set_rng_seed((TEST_NAME, "bp2")); - - // Test adding a new sled with an NTP zone. - assert_eq!( - builder.sled_ensure_zone_ntp(new_sled_id).unwrap(), - Ensure::Added - ); - - // Iterate over the zones for the sled and ensure that the NTP zone is - // present. - { - let mut zones = builder.zones.current_sled_zones(new_sled_id); - let (_, state) = zones.next().expect("exactly one zone for sled"); - assert!(zones.next().is_none(), "exactly one zone for sled"); - assert_eq!( - state, - BuilderZoneState::Added, - "NTP zone should have been added" - ); - } - - // Now, test adding a new zone (Oximeter, picked arbitrarily) to an - // existing sled. - let existing_sled_id = example - .input - .all_sled_ids(SledFilter::All) - .next() - .expect("at least one sled present"); - let change = builder.zones.change_sled_zones(existing_sled_id); - - let new_zone_id = OmicronZoneUuid::new_v4(); - change - .add_zone(BlueprintZoneConfig { - disposition: BlueprintZoneDisposition::InService, - id: new_zone_id, - underlay_address: Ipv6Addr::UNSPECIFIED, - zone_type: BlueprintZoneType::Oximeter( - blueprint_zone_type::Oximeter { - address: SocketAddrV6::new( - Ipv6Addr::UNSPECIFIED, - 0, - 0, - 0, - ), - }, - ), - }) - .expect("adding new zone"); - - { - // Iterate over the zones and ensure that the Oximeter zone is - // present, and marked added. - let mut zones = builder.zones.current_sled_zones(existing_sled_id); - zones - .find_map(|(z, state)| { - if z.id == new_zone_id { - assert_eq!( - state, - BuilderZoneState::Added, - "new zone ID {new_zone_id} should be marked added" - ); - Some(()) - } else { - None - } - }) - .expect("new zone ID should be present"); - } - - // Also call change_sled_zones without making any changes. This - // currently bumps the generation number, but in the future might - // become smarter. - let control_sled_id = example - .input - .all_sled_ids(SledFilter::All) - .nth(2) - .expect("at least 2 sleds present"); - _ = builder.zones.change_sled_zones(control_sled_id); - - // Now build the blueprint and ensure that all the changes we described - // above are present. - let blueprint = builder.build(); - verify_blueprint(&blueprint); - let diff = blueprint.diff_since_blueprint(&blueprint_initial).unwrap(); - println!("expecting new NTP and Oximeter zones:\n{}", diff.display()); - - // No sleds were removed. - assert_eq!(diff.sleds_removed().len(), 0); - - // One sled was added. - let sleds: Vec<_> = diff.sleds_added().collect(); - assert_eq!(sleds.len(), 1); - let (sled_id, new_sled_zones) = sleds[0]; - assert_eq!(sled_id, new_sled_id); - // The generation number should be newer than the initial default. - assert_eq!(new_sled_zones.generation, Generation::new().next()); - assert_eq!(new_sled_zones.zones.len(), 1); - - // Two sleds were modified: existing_sled_id and control_sled_id. - let sleds = diff.sleds_modified(); - assert_eq!(sleds.len(), 2, "2 sleds modified"); - for (sled_id, sled_modified) in sleds { - if sled_id == existing_sled_id { - assert_eq!( - sled_modified.generation_after, - sled_modified.generation_before.next() - ); - assert_eq!(sled_modified.zones_added().len(), 1); - let added_zone = sled_modified.zones_added().next().unwrap(); - assert_eq!(added_zone.id, new_zone_id); - } else { - assert_eq!(sled_id, control_sled_id); - - // The generation number is bumped, but nothing else. - assert_eq!( - sled_modified.generation_after, - sled_modified.generation_before.next(), - "control sled has generation number bumped" - ); - assert_eq!(sled_modified.zones_added().len(), 0); - assert_eq!(sled_modified.zones_removed().len(), 0); - assert_eq!(sled_modified.zones_modified().count(), 0); - } - } - - logctx.cleanup_successful(); - } - #[test] fn test_add_physical_disks() { static TEST_NAME: &str = "blueprint_builder_test_add_physical_disks"; @@ -1975,4 +1826,31 @@ pub mod test { logctx.cleanup_successful(); } + + /// Test that `AvailableIterator` correctly filters out items that are in + /// use. + #[proptest] + fn test_available_iterator(items: HashSet<(i32, bool)>) { + let mut in_use_map = HashSet::new(); + let mut expected_available = Vec::new(); + let items: Vec<_> = items + .into_iter() + .map(|(item, in_use)| { + if in_use { + in_use_map.insert(item); + } else { + expected_available.push(item); + } + item + }) + .collect(); + + let available = AvailableIterator::new(items.into_iter(), in_use_map); + let actual_available = available.collect::>(); + + assert_eq!( + expected_available, actual_available, + "available items match" + ); + } } diff --git a/nexus/reconfigurator/planning/src/blueprint_builder/mod.rs b/nexus/reconfigurator/planning/src/blueprint_builder/mod.rs new file mode 100644 index 0000000000..e3afa2cdad --- /dev/null +++ b/nexus/reconfigurator/planning/src/blueprint_builder/mod.rs @@ -0,0 +1,10 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Low-level facility for generating Blueprints + +mod builder; +mod zones; + +pub use builder::*; diff --git a/nexus/reconfigurator/planning/src/blueprint_builder/zones.rs b/nexus/reconfigurator/planning/src/blueprint_builder/zones.rs new file mode 100644 index 0000000000..5f8c0625a7 --- /dev/null +++ b/nexus/reconfigurator/planning/src/blueprint_builder/zones.rs @@ -0,0 +1,438 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use std::collections::BTreeSet; + +use nexus_types::deployment::{ + BlueprintZoneConfig, BlueprintZoneDisposition, BlueprintZonesConfig, +}; +use omicron_common::api::external::Generation; +use omicron_uuid_kinds::OmicronZoneUuid; +use thiserror::Error; + +#[derive(Debug)] +#[must_use] +pub(super) struct BuilderZonesConfig { + // The current generation -- this is bumped at blueprint build time and is + // otherwise not exposed to callers. + generation: Generation, + + // The list of zones, along with their state. + zones: Vec, +} + +impl BuilderZonesConfig { + pub(super) fn new() -> Self { + Self { + // Note that the first generation is reserved to mean the one + // containing no zones. See + // OmicronZonesConfig::INITIAL_GENERATION. + // + // Since we're currently assuming that creating a new + // `BuilderZonesConfig` means that we're going to add new zones + // shortly, we start with Generation::new() here. It'll get + // bumped up to the next one in `Self::build`. + generation: Generation::new(), + zones: vec![], + } + } + + pub(super) fn from_parent(parent: &BlueprintZonesConfig) -> Self { + Self { + // We'll bump this up at build time. + generation: parent.generation, + + zones: parent + .zones + .iter() + .map(|zone| BuilderZoneConfig { + zone: zone.clone(), + state: BuilderZoneState::Unchanged, + }) + .collect(), + } + } + + pub(super) fn add_zone( + &mut self, + zone: BlueprintZoneConfig, + ) -> Result<(), BuilderZonesConfigError> { + if self.zones.iter().any(|z| z.zone.id == zone.id) { + // We shouldn't be trying to add zones that already exist -- + // something went wrong in the planner logic. + return Err(BuilderZonesConfigError::AddExistingZone { + zone_id: zone.id, + }); + }; + + self.zones + .push(BuilderZoneConfig { zone, state: BuilderZoneState::Added }); + Ok(()) + } + + pub(super) fn expunge_zones( + &mut self, + mut zones: BTreeSet, + ) -> Result<(), BuilderZonesConfigError> { + for zone in &mut self.zones { + if zones.remove(&zone.zone.id) { + // Check that the zone is expungeable. Typically, zones passed + // in here should have had this check done to them already, but + // in case they're not, or in case something else about those + // zones changed in between, check again. + is_already_expunged(&zone.zone, zone.state)?; + zone.zone.disposition = BlueprintZoneDisposition::Expunged; + zone.state = BuilderZoneState::Modified; + } + } + + // All zones passed in should have been found -- are there any left + // over? + if !zones.is_empty() { + return Err(BuilderZonesConfigError::ExpungeUnmatchedZones { + unmatched: zones, + }); + } + + Ok(()) + } + + pub(super) fn iter_zones( + &self, + ) -> impl Iterator { + self.zones.iter() + } + + pub(super) fn build(self) -> BlueprintZonesConfig { + let mut ret = BlueprintZonesConfig { + // Something we could do here is to check if any zones have + // actually been modified, and if not, return the parent's + // generation. For now, we depend on callers to only call + // `BlueprintZonesBuilder::change_sled_zones` when they really + // mean it. + generation: self.generation.next(), + zones: self.zones.into_iter().map(|z| z.zone).collect(), + }; + ret.sort(); + ret + } +} + +pub(super) fn is_already_expunged( + zone: &BlueprintZoneConfig, + state: BuilderZoneState, +) -> Result { + match zone.disposition { + BlueprintZoneDisposition::InService + | BlueprintZoneDisposition::Quiesced => { + if state != BuilderZoneState::Unchanged { + // We shouldn't be trying to expunge zones that have also been + // changed in this blueprint -- something went wrong in the planner + // logic. + return Err(BuilderZonesConfigError::ExpungeModifiedZone { + zone_id: zone.id, + state, + }); + } + Ok(false) + } + BlueprintZoneDisposition::Expunged => { + // Treat expungement as idempotent. + Ok(true) + } + } +} + +#[derive(Debug)] +pub(super) struct BuilderZoneConfig { + zone: BlueprintZoneConfig, + state: BuilderZoneState, +} + +impl BuilderZoneConfig { + pub(super) fn zone(&self) -> &BlueprintZoneConfig { + &self.zone + } + + pub(super) fn state(&self) -> BuilderZoneState { + self.state + } +} + +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub(super) enum BuilderZoneState { + Unchanged, + Modified, + Added, +} + +#[derive(Clone, Debug, PartialEq, Eq, Error)] +pub(super) enum BuilderZonesConfigError { + #[error("attempted to add zone that already exists: {zone_id}")] + AddExistingZone { zone_id: OmicronZoneUuid }, + #[error( + "attempted to expunge zone {zone_id} that was in state {state:?} \ + (can only expunge unchanged zones)" + )] + ExpungeModifiedZone { zone_id: OmicronZoneUuid, state: BuilderZoneState }, + #[error( + "while expunging zones, not all zones provided were found: {unmatched:?}" + )] + ExpungeUnmatchedZones { unmatched: BTreeSet }, +} + +#[cfg(test)] +mod tests { + use std::{ + collections::BTreeMap, + net::{Ipv6Addr, SocketAddrV6}, + }; + + use maplit::btreeset; + use nexus_types::{ + deployment::{ + blueprint_zone_type, BlueprintZoneType, SledDetails, SledFilter, + SledResources, + }, + external_api::views::{SledPolicy, SledState}, + }; + use omicron_common::address::Ipv6Subnet; + use omicron_test_utils::dev::test_setup_log; + + use crate::{ + blueprint_builder::{ + test::{verify_blueprint, DEFAULT_N_SLEDS}, + BlueprintBuilder, Ensure, + }, + example::ExampleSystem, + }; + + use super::*; + + /// A test focusing on `BlueprintZonesBuilder` and its internal logic. + #[test] + fn test_builder_zones() { + static TEST_NAME: &str = "blueprint_test_builder_zones"; + let logctx = test_setup_log(TEST_NAME); + let mut example = + ExampleSystem::new(&logctx.log, TEST_NAME, DEFAULT_N_SLEDS); + let blueprint_initial = + BlueprintBuilder::build_initial_from_collection_seeded( + &example.collection, + Generation::new(), + Generation::new(), + example.input.all_sled_ids(SledFilter::All), + "the_test", + TEST_NAME, + ) + .expect("creating initial blueprint"); + + // Add a completely bare sled to the input. + let (new_sled_id, input2) = { + let mut input = example.input.clone().into_builder(); + let new_sled_id = example.sled_rng.next(); + input + .add_sled( + new_sled_id, + SledDetails { + policy: SledPolicy::provisionable(), + state: SledState::Active, + resources: SledResources { + subnet: Ipv6Subnet::new( + "fd00:1::".parse().unwrap(), + ), + zpools: BTreeMap::new(), + }, + }, + ) + .expect("adding new sled"); + + (new_sled_id, input.build()) + }; + + let mut builder = BlueprintBuilder::new_based_on( + &logctx.log, + &blueprint_initial, + &input2, + "the_test", + ) + .expect("creating blueprint builder"); + builder.set_rng_seed((TEST_NAME, "bp2")); + + // Test adding a new sled with an NTP zone. + assert_eq!( + builder.sled_ensure_zone_ntp(new_sled_id).unwrap(), + Ensure::Added + ); + + // Iterate over the zones for the sled and ensure that the NTP zone is + // present. + { + let mut zones = builder.zones.current_sled_zones(new_sled_id); + let (_, state) = zones.next().expect("exactly one zone for sled"); + assert!(zones.next().is_none(), "exactly one zone for sled"); + assert_eq!( + state, + BuilderZoneState::Added, + "NTP zone should have been added" + ); + } + + // Now, test adding a new zone (Oximeter, picked arbitrarily) to an + // existing sled. + let existing_sled_id = example + .input + .all_sled_ids(SledFilter::All) + .next() + .expect("at least one sled present"); + let change = builder.zones.change_sled_zones(existing_sled_id); + + let new_zone_id = OmicronZoneUuid::new_v4(); + change + .add_zone(BlueprintZoneConfig { + disposition: BlueprintZoneDisposition::InService, + id: new_zone_id, + underlay_address: Ipv6Addr::UNSPECIFIED, + zone_type: BlueprintZoneType::Oximeter( + blueprint_zone_type::Oximeter { + address: SocketAddrV6::new( + Ipv6Addr::UNSPECIFIED, + 0, + 0, + 0, + ), + }, + ), + }) + .expect("adding new zone"); + + // Attempt to expunge one of the other zones on the sled. + let existing_zone_id = change + .iter_zones() + .find(|z| z.zone.id != new_zone_id) + .expect("at least one existing zone") + .zone + .id; + change + .expunge_zones(btreeset! { existing_zone_id }) + .expect("expunging existing zone"); + // Do it again to ensure that expunging an already-expunged zone is + // idempotent, even within the same blueprint. + change + .expunge_zones(btreeset! { existing_zone_id }) + .expect("expunging already-expunged zone"); + // But expunging a zone that doesn't exist should fail. + let non_existent_zone_id = OmicronZoneUuid::new_v4(); + let non_existent_set = btreeset! { non_existent_zone_id }; + let error = change + .expunge_zones(non_existent_set.clone()) + .expect_err("expunging non-existent zone"); + assert_eq!( + error, + BuilderZonesConfigError::ExpungeUnmatchedZones { + unmatched: non_existent_set + } + ); + + { + // Iterate over the zones and ensure that the Oximeter zone is + // present, and marked added. + let mut zones = builder.zones.current_sled_zones(existing_sled_id); + zones + .find_map(|(z, state)| { + if z.id == new_zone_id { + assert_eq!( + state, + BuilderZoneState::Added, + "new zone ID {new_zone_id} should be marked added" + ); + Some(()) + } else { + None + } + }) + .expect("new zone ID should be present"); + } + + // Also call change_sled_zones without making any changes. This + // currently bumps the generation number, but in the future might + // become smarter and not do so (in which case this test will break). + let control_sled_id = example + .input + .all_sled_ids(SledFilter::All) + .nth(2) + .expect("at least 2 sleds present"); + _ = builder.zones.change_sled_zones(control_sled_id); + + // Attempt to expunge the newly added Oximeter zone. This should fail + // because we only support expunging zones that are unchanged from the + // parent blueprint. + let error = builder + .zones + .change_sled_zones(existing_sled_id) + .expunge_zones(btreeset! { new_zone_id }) + .expect_err("expunging a new zone should fail"); + assert_eq!( + error, + BuilderZonesConfigError::ExpungeModifiedZone { + zone_id: new_zone_id, + state: BuilderZoneState::Added + } + ); + + // Now build the blueprint and ensure that all the changes we described + // above are present. + let blueprint = builder.build(); + verify_blueprint(&blueprint); + let diff = blueprint.diff_since_blueprint(&blueprint_initial).unwrap(); + println!("expecting new NTP and Oximeter zones:\n{}", diff.display()); + + // No sleds were removed. + assert_eq!(diff.sleds_removed().len(), 0); + + // One sled was added. + let sleds: Vec<_> = diff.sleds_added().collect(); + assert_eq!(sleds.len(), 1); + let (sled_id, new_sled_zones) = sleds[0]; + assert_eq!(sled_id, new_sled_id); + // The generation number should be newer than the initial default. + assert_eq!(new_sled_zones.generation, Generation::new().next()); + assert_eq!(new_sled_zones.zones.len(), 1); + + // Two sleds were modified: existing_sled_id and control_sled_id. + let sleds = diff.sleds_modified(); + assert_eq!(sleds.len(), 2, "2 sleds modified"); + for (sled_id, sled_modified) in sleds { + if sled_id == existing_sled_id { + assert_eq!( + sled_modified.generation_after, + sled_modified.generation_before.next() + ); + assert_eq!(sled_modified.zones_added().len(), 1); + let added_zone = sled_modified.zones_added().next().unwrap(); + assert_eq!(added_zone.id, new_zone_id); + + assert_eq!(sled_modified.zones_removed().len(), 0); + assert_eq!(sled_modified.zones_modified().count(), 1); + let modified_zone = + sled_modified.zones_modified().next().unwrap(); + assert_eq!(modified_zone.zone_before.id, existing_zone_id); + } else { + assert_eq!(sled_id, control_sled_id); + + // The generation number is bumped, but nothing else. + assert_eq!( + sled_modified.generation_after, + sled_modified.generation_before.next(), + "control sled has generation number bumped" + ); + assert_eq!(sled_modified.zones_added().len(), 0); + assert_eq!(sled_modified.zones_removed().len(), 0); + assert_eq!(sled_modified.zones_modified().count(), 0); + } + } + + logctx.cleanup_successful(); + } +} diff --git a/nexus/reconfigurator/planning/src/planner.rs b/nexus/reconfigurator/planning/src/planner.rs index 1c054de646..46716754a1 100644 --- a/nexus/reconfigurator/planning/src/planner.rs +++ b/nexus/reconfigurator/planning/src/planner.rs @@ -13,6 +13,8 @@ use crate::blueprint_builder::Error; use nexus_types::deployment::Blueprint; use nexus_types::deployment::PlanningInput; use nexus_types::deployment::SledFilter; +use nexus_types::external_api::views::SledPolicy; +use nexus_types::external_api::views::SledState; use nexus_types::inventory::Collection; use omicron_uuid_kinds::SledUuid; use slog::{info, warn, Logger}; @@ -72,9 +74,35 @@ impl<'a> Planner<'a> { } fn do_plan(&mut self) -> Result<(), Error> { - // The only thing this planner currently knows how to do is add services - // to a sled that's missing them. So let's see if we're in that case. + // We perform planning in two loops: the first one turns expunged sleds + // into expunged zones, and the second one adds services. + self.do_plan_expunge()?; + self.do_plan_add()?; + + Ok(()) + } + + fn do_plan_expunge(&mut self) -> Result<(), Error> { + // Remove services from sleds marked expunged. We use `SledFilter::All` + // and have a custom `needs_zone_expungement` function that allows us + // to produce better errors. + for (sled_id, sled_details) in self.input.all_sleds(SledFilter::All) { + // Does this sled need zone expungement based on the details? + let Some(reason) = + needs_zone_expungement(sled_details.state, sled_details.policy) + else { + continue; + }; + + // Perform the expungement. + self.blueprint.expunge_all_zones_for_sled(sled_id, reason)?; + } + + Ok(()) + } + + fn do_plan_add(&mut self) -> Result<(), Error> { // Internal DNS is a prerequisite for bringing up all other zones. At // this point, we assume that internal DNS (as a service) is already // functioning. At some point, this function will have to grow the @@ -330,6 +358,39 @@ impl<'a> Planner<'a> { } } +/// Returns `Some(reason)` if the sled needs its zones to be expunged, +/// based on the policy and state. +fn needs_zone_expungement( + state: SledState, + policy: SledPolicy, +) -> Option { + match state { + SledState::Active => {} + SledState::Decommissioned => { + // A decommissioned sled that still has resources attached to it is + // an illegal state, but representable. If we see a sled in this + // state, we should still expunge all zones in it, but parent code + // should warn on it. + return Some(ZoneExpungeReason::SledDecommissioned { policy }); + } + } + + match policy { + SledPolicy::InService { .. } => None, + SledPolicy::Expunged => Some(ZoneExpungeReason::SledExpunged), + } +} + +/// The reason a sled's zones need to be expunged. +/// +/// This is used only for introspection and logging -- it's not part of the +/// logical flow. +#[derive(Copy, Clone, Debug)] +pub(crate) enum ZoneExpungeReason { + SledDecommissioned { policy: SledPolicy }, + SledExpunged, +} + #[cfg(test)] mod test { use super::Planner; @@ -348,6 +409,7 @@ mod test { use nexus_types::deployment::BlueprintZoneDisposition; use nexus_types::deployment::BlueprintZoneFilter; use nexus_types::deployment::BlueprintZoneType; + use nexus_types::deployment::DiffSledModified; use nexus_types::deployment::SledFilter; use nexus_types::external_api::views::SledPolicy; use nexus_types::external_api::views::SledProvisionPolicy; @@ -356,6 +418,7 @@ mod test { use omicron_common::api::external::Generation; use omicron_test_utils::dev::test_setup_log; use omicron_uuid_kinds::GenericUuid; + use std::collections::HashMap; /// Runs through a basic sequence of blueprints for adding a sled #[test] @@ -840,18 +903,41 @@ mod test { ); let diff = blueprint2.diff_since_blueprint(&blueprint1).unwrap(); - println!("1 -> 2 (added additional Nexus zones):\n{}", diff.display()); + println!( + "1 -> 2 (added additional Nexus zones, take 2 sleds out of service):\n{}", + diff.display() + ); assert_contents( "tests/output/planner_nonprovisionable_1_2.txt", &diff.display().to_string(), ); + + // The expunged and decommissioned sleds should have had all zones be + // marked as expunged. (Not removed! Just marked as expunged.) + // + // Note that at this point we're neither removing zones from the + // blueprint nor marking sleds as decommissioned -- we still need to do + // cleanup, and we aren't performing garbage collection on zones or + // sleds at the moment. + assert_eq!(diff.sleds_added().len(), 0); assert_eq!(diff.sleds_removed().len(), 0); - let sleds = diff.sleds_modified().collect::>(); + let mut sleds = diff.sleds_modified().collect::>(); + + let expunged_modified = sleds.remove(&expunged_sled_id).unwrap(); + assert_all_zones_expunged(&expunged_modified, "expunged sled"); - // Only 2 of the 3 sleds should get additional Nexus zones. We expect a - // total of 6 new Nexus zones, which should be split evenly between the - // two sleds, while the non-provisionable sled should be unchanged. + let decommissioned_modified = + sleds.remove(&decommissioned_sled_id).unwrap(); + assert_all_zones_expunged( + &decommissioned_modified, + "decommissioned sled", + ); + + // Only 2 of the 3 remaining sleds (not the non-provisionable sled) + // should get additional Nexus zones. We expect a total of 6 new Nexus + // zones, which should be split evenly between the two sleds, while the + // non-provisionable sled should be unchanged. assert_eq!(sleds.len(), 2); let mut total_new_nexus_zones = 0; for (sled_id, sled_changes) in sleds { @@ -971,4 +1057,40 @@ mod test { logctx.cleanup_successful(); } + + fn assert_all_zones_expunged(modified: &DiffSledModified, desc: &str) { + assert_eq!( + modified.generation_before.next(), + modified.generation_after, + "for {desc}, generation should have been bumped" + ); + + assert_eq!( + modified.zones_added().count(), + 0, + "for {desc}, no zones should have been added to blueprint" + ); + + // A zone disposition going to expunged *does not* mean that the + // zone is actually removed, i.e. `zones_removed` is still 0. Any + // zone removal will be part of some future garbage collection + // process that isn't currently defined. + + assert_eq!( + modified.zones_removed().len(), + 0, + "for {desc}, no zones should have been removed from blueprint" + ); + + // Run through all the common zones and ensure that all of them + // have been marked expunged. + for zone in modified.zones_modified() { + assert_eq!( + zone.zone_after.disposition, + BlueprintZoneDisposition::Expunged, + "for {desc}, zone {} should have been marked expunged", + zone.zone_after.id + ); + } + } } diff --git a/nexus/reconfigurator/planning/tests/output/blueprint_builder_initial_diff.txt b/nexus/reconfigurator/planning/tests/output/blueprint_builder_initial_diff.txt index 7323008ad1..fe56567f65 100644 --- a/nexus/reconfigurator/planning/tests/output/blueprint_builder_initial_diff.txt +++ b/nexus/reconfigurator/planning/tests/output/blueprint_builder_initial_diff.txt @@ -7,7 +7,7 @@ to: blueprint 9d2c007b-46f1-4ff2-8b4c-8a5767030f76 UNCHANGED SLEDS: - sled 08c7046b-c9c4-4368-881f-19a72df22143: zones at generation 2 + sled 08c7046b-c9c4-4368-881f-19a72df22143: blueprint zones at generation 2 crucible 44afce85-3377-4b20-a398-517c1579df4d in service fd00:1122:3344:103::23 crucible 4644ea0c-0ec3-41be-a356-660308e1c3fc in service fd00:1122:3344:103::2c crucible 55f4d117-0b9d-4256-a2c0-f46d3ed5fff9 in service fd00:1122:3344:103::25 @@ -21,7 +21,7 @@ to: blueprint 9d2c007b-46f1-4ff2-8b4c-8a5767030f76 internal_ntp c81c9d4a-36d7-4796-9151-f564d3735152 in service fd00:1122:3344:103::21 nexus b2573120-9c91-4ed7-8b4f-a7bfe8dbc807 in service fd00:1122:3344:103::22 - sled 84ac367e-9b03-4e9d-a846-df1a08deee6c: zones at generation 2 + sled 84ac367e-9b03-4e9d-a846-df1a08deee6c: blueprint zones at generation 2 crucible 0faa9350-2c02-47c7-a0a6-9f4afd69152c in service fd00:1122:3344:101::2c crucible 5b44003e-1a3d-4152-b606-872c72efce0e in service fd00:1122:3344:101::25 crucible 943fea7a-9458-4935-9dc7-01ee5cfe5a02 in service fd00:1122:3344:101::29 @@ -35,7 +35,7 @@ to: blueprint 9d2c007b-46f1-4ff2-8b4c-8a5767030f76 internal_ntp 38b047ea-e3de-4859-b8e0-70cac5871446 in service fd00:1122:3344:101::21 nexus fb36b9dc-273a-4bc3-aaa9-19ee4d0ef552 in service fd00:1122:3344:101::22 - sled be7f4375-2a6b-457f-b1a4-3074a715e5fe: zones at generation 2 + sled be7f4375-2a6b-457f-b1a4-3074a715e5fe: blueprint zones at generation 2 crucible 248db330-56e6-4c7e-b5ff-9cd6cbcb210a in service fd00:1122:3344:102::2c crucible 353b0aff-4c71-4fae-a6bd-adcb1d2a1a1d in service fd00:1122:3344:102::29 crucible 4330134c-41b9-4097-aa0b-3eaefa06d473 in service fd00:1122:3344:102::24 diff --git a/nexus/reconfigurator/planning/tests/output/planner_basic_add_sled_2_3.txt b/nexus/reconfigurator/planning/tests/output/planner_basic_add_sled_2_3.txt index 3aad697aa0..b135303ead 100644 --- a/nexus/reconfigurator/planning/tests/output/planner_basic_add_sled_2_3.txt +++ b/nexus/reconfigurator/planning/tests/output/planner_basic_add_sled_2_3.txt @@ -7,7 +7,7 @@ to: blueprint 4171ad05-89dd-474b-846b-b007e4346366 UNCHANGED SLEDS: - sled 41f45d9f-766e-4ca6-a881-61ee45c80f57: zones at generation 2 + sled 41f45d9f-766e-4ca6-a881-61ee45c80f57: blueprint zones at generation 2 crucible 322ee9f1-8903-4542-a0a8-a54cefabdeca in service fd00:1122:3344:103::24 crucible 4ab1650f-32c5-447f-939d-64b8103a7645 in service fd00:1122:3344:103::2a crucible 64aa65f8-1ccb-4cd6-9953-027aebdac8ff in service fd00:1122:3344:103::27 @@ -21,7 +21,7 @@ to: blueprint 4171ad05-89dd-474b-846b-b007e4346366 internal_ntp 267ed614-92af-4b9d-bdba-c2881c2e43a2 in service fd00:1122:3344:103::21 nexus cc816cfe-3869-4dde-b596-397d41198628 in service fd00:1122:3344:103::22 - sled 43677374-8d2f-4deb-8a41-eeea506db8e0: zones at generation 2 + sled 43677374-8d2f-4deb-8a41-eeea506db8e0: blueprint zones at generation 2 crucible 02acbe6a-1c88-47e3-94c3-94084cbde098 in service fd00:1122:3344:101::27 crucible 07c3c805-8888-4fe5-9543-3d2479dbe6f3 in service fd00:1122:3344:101::26 crucible 10d98a73-ec88-4aff-a7e8-7db6a87880e6 in service fd00:1122:3344:101::24 @@ -35,7 +35,7 @@ to: blueprint 4171ad05-89dd-474b-846b-b007e4346366 internal_ntp 08c7f8aa-1ea9-469b-8cac-2fdbfc11ebcb in service fd00:1122:3344:101::21 nexus c66ab6d5-ff7a-46d1-9fd0-70cefa352d25 in service fd00:1122:3344:101::22 - sled 590e3034-d946-4166-b0e5-2d0034197a07: zones at generation 2 + sled 590e3034-d946-4166-b0e5-2d0034197a07: blueprint zones at generation 2 crucible 18f8fe40-646e-4962-b17a-20e201f3a6e5 in service fd00:1122:3344:102::2a crucible 56d5d7cf-db2c-40a3-a775-003241ad4820 in service fd00:1122:3344:102::29 crucible 6af7f4d6-33b6-4eb3-a146-d8e9e4ae9d66 in service fd00:1122:3344:102::2b @@ -51,7 +51,7 @@ to: blueprint 4171ad05-89dd-474b-846b-b007e4346366 ADDED SLEDS: -+ sled b59ec570-2abb-4017-80ce-129d94e7a025: zones at generation 2 ++ sled b59ec570-2abb-4017-80ce-129d94e7a025: blueprint zones at generation 2 + internal_ntp 2d73d30e-ca47-46a8-9c12-917d4ab824b6 in service fd00:1122:3344:104::21 added METADATA: diff --git a/nexus/reconfigurator/planning/tests/output/planner_basic_add_sled_3_5.txt b/nexus/reconfigurator/planning/tests/output/planner_basic_add_sled_3_5.txt index 233821412f..89120cf377 100644 --- a/nexus/reconfigurator/planning/tests/output/planner_basic_add_sled_3_5.txt +++ b/nexus/reconfigurator/planning/tests/output/planner_basic_add_sled_3_5.txt @@ -7,7 +7,7 @@ to: blueprint f432fcd5-1284-4058-8b4a-9286a3de6163 UNCHANGED SLEDS: - sled 41f45d9f-766e-4ca6-a881-61ee45c80f57: zones at generation 2 + sled 41f45d9f-766e-4ca6-a881-61ee45c80f57: blueprint zones at generation 2 crucible 322ee9f1-8903-4542-a0a8-a54cefabdeca in service fd00:1122:3344:103::24 crucible 4ab1650f-32c5-447f-939d-64b8103a7645 in service fd00:1122:3344:103::2a crucible 64aa65f8-1ccb-4cd6-9953-027aebdac8ff in service fd00:1122:3344:103::27 @@ -21,7 +21,7 @@ to: blueprint f432fcd5-1284-4058-8b4a-9286a3de6163 internal_ntp 267ed614-92af-4b9d-bdba-c2881c2e43a2 in service fd00:1122:3344:103::21 nexus cc816cfe-3869-4dde-b596-397d41198628 in service fd00:1122:3344:103::22 - sled 43677374-8d2f-4deb-8a41-eeea506db8e0: zones at generation 2 + sled 43677374-8d2f-4deb-8a41-eeea506db8e0: blueprint zones at generation 2 crucible 02acbe6a-1c88-47e3-94c3-94084cbde098 in service fd00:1122:3344:101::27 crucible 07c3c805-8888-4fe5-9543-3d2479dbe6f3 in service fd00:1122:3344:101::26 crucible 10d98a73-ec88-4aff-a7e8-7db6a87880e6 in service fd00:1122:3344:101::24 @@ -35,7 +35,7 @@ to: blueprint f432fcd5-1284-4058-8b4a-9286a3de6163 internal_ntp 08c7f8aa-1ea9-469b-8cac-2fdbfc11ebcb in service fd00:1122:3344:101::21 nexus c66ab6d5-ff7a-46d1-9fd0-70cefa352d25 in service fd00:1122:3344:101::22 - sled 590e3034-d946-4166-b0e5-2d0034197a07: zones at generation 2 + sled 590e3034-d946-4166-b0e5-2d0034197a07: blueprint zones at generation 2 crucible 18f8fe40-646e-4962-b17a-20e201f3a6e5 in service fd00:1122:3344:102::2a crucible 56d5d7cf-db2c-40a3-a775-003241ad4820 in service fd00:1122:3344:102::29 crucible 6af7f4d6-33b6-4eb3-a146-d8e9e4ae9d66 in service fd00:1122:3344:102::2b @@ -51,7 +51,7 @@ to: blueprint f432fcd5-1284-4058-8b4a-9286a3de6163 MODIFIED SLEDS: -* sled b59ec570-2abb-4017-80ce-129d94e7a025: zones at generation: 2 -> 3 +* sled b59ec570-2abb-4017-80ce-129d94e7a025: blueprint zones at generation: 2 -> 3 internal_ntp 2d73d30e-ca47-46a8-9c12-917d4ab824b6 in service fd00:1122:3344:104::21 + crucible 1a20ee3c-f66e-4fca-ab85-2a248aa3d79d in service fd00:1122:3344:104::2b added + crucible 28852beb-d0e5-4cba-9adb-e7f0cd4bb864 in service fd00:1122:3344:104::29 added diff --git a/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_1_2.txt b/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_1_2.txt index c19403906e..005d963475 100644 --- a/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_1_2.txt +++ b/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_1_2.txt @@ -1,89 +1,137 @@ from: blueprint 55502b1b-e255-438b-a16a-2680a4b5f962 to: blueprint 9f71f5d3-a272-4382-9154-6ea2e171a6c6 - ------------------------------------------------------------------------------------------------------ - zone type zone ID disposition underlay IP status - ------------------------------------------------------------------------------------------------------ - - UNCHANGED SLEDS: - - sled 2d1cb4f2-cf44-40fc-b118-85036eb732a9: zones at generation 2 - crucible 19fbc4f8-a683-4f22-8f5a-e74782b935be in service fd00:1122:3344:105::26 - crucible 4f1ce8a2-d3a5-4a38-be4c-9817de52db37 in service fd00:1122:3344:105::2c - crucible 6b53ab2e-d98c-485f-87a3-4d5df595390f in service fd00:1122:3344:105::27 - crucible 93b137a1-a1d6-4b5b-b2cb-21a9f11e2883 in service fd00:1122:3344:105::23 - crucible 9f0abbad-dbd3-4d43-9675-78092217ffd9 in service fd00:1122:3344:105::25 - crucible b0c63f48-01ea-4aae-bb26-fb0dd59d1662 in service fd00:1122:3344:105::28 - crucible c406da50-34b9-4bb4-a460-8f49875d2a6a in service fd00:1122:3344:105::24 - crucible d660d7ed-28c0-45ae-9ace-dc3ecf7e8786 in service fd00:1122:3344:105::2a - crucible e98cc0de-abf6-4da4-a20d-d05c7a9bb1d7 in service fd00:1122:3344:105::2b - crucible f55e6aaf-e8fc-4913-9e3c-8cd1bd4bdad3 in service fd00:1122:3344:105::29 - internal_ntp 7f4e9f9f-08f8-4d14-885d-e977c05525ad in service fd00:1122:3344:105::21 - nexus 6dff7633-66bb-4924-a6ff-2c896e66964b in service fd00:1122:3344:105::22 - - sled 48d95fef-bc9f-4f50-9a53-1e075836291d: zones at generation 2 - crucible 094f27af-1acb-4d1e-ba97-1fc1377d4bf2 in service fd00:1122:3344:103::2c - crucible 0dcfdfc5-481e-4153-b97c-11cf02b648ea in service fd00:1122:3344:103::25 - crucible 2f5e8010-a94d-43a4-9c5c-3f52832f5f7f in service fd00:1122:3344:103::27 - crucible 4a9a0a9d-87f0-4f1d-9181-27f6b435e637 in service fd00:1122:3344:103::28 - crucible 56ac1706-9e2a-49ba-bd6f-a99c44cb2ccb in service fd00:1122:3344:103::24 - crucible 67622d61-2df4-414d-aa0e-d1277265f405 in service fd00:1122:3344:103::23 - crucible b91b271d-8d80-4f49-99a0-34006ae86063 in service fd00:1122:3344:103::2a - crucible d6ee1338-3127-43ec-9aaa-b973ccf05496 in service fd00:1122:3344:103::26 - crucible e39d7c9e-182b-48af-af87-58079d723583 in service fd00:1122:3344:103::29 - crucible f69f92a1-5007-4bb0-a85b-604dc217154b in service fd00:1122:3344:103::2b - internal_ntp 67d913e0-0005-4599-9b28-0abbf6cc2916 in service fd00:1122:3344:103::21 - nexus 2aa0ea4f-3561-4989-a98c-9ab7d9a240fb in service fd00:1122:3344:103::22 - - sled 68d24ac5-f341-49ea-a92a-0381b52ab387: zones at generation 2 - crucible 3b3c14b6-a8e2-4054-a577-8d96cb576230 in service fd00:1122:3344:102::2c - crucible 47a87c6e-ef45-4d52-9a3e-69cdd96737cc in service fd00:1122:3344:102::23 - crucible 6464d025-4652-4948-919e-740bec5699b1 in service fd00:1122:3344:102::24 - crucible 6939ce48-b17c-4616-b176-8a419a7697be in service fd00:1122:3344:102::29 - crucible 878dfddd-3113-4197-a3ea-e0d4dbe9b476 in service fd00:1122:3344:102::25 - crucible 8d4d2b28-82bb-4e36-80da-1408d8c35d82 in service fd00:1122:3344:102::2b - crucible 9fd52961-426f-4e62-a644-b70871103fca in service fd00:1122:3344:102::26 - crucible b44cdbc0-0ce0-46eb-8b21-a09e113aa1d0 in service fd00:1122:3344:102::27 - crucible b6b759d0-f60d-42b7-bbbc-9d61c9e895a9 in service fd00:1122:3344:102::28 - crucible c407795c-6c8b-428e-8ab8-b962913c447f in service fd00:1122:3344:102::2a - internal_ntp f3f2e4f3-0985-4ef6-8336-ce479382d05d in service fd00:1122:3344:102::21 - nexus 01d58626-e1b0-480f-96be-ac784863c7dc in service fd00:1122:3344:102::22 - - MODIFIED SLEDS: - -* sled 75bc286f-2b4b-482c-9431-59272af529da: zones at generation: 2 -> 3 - crucible 15bb9def-69b8-4d2e-b04f-9fee1143387c in service fd00:1122:3344:104::25 - crucible 23a8fa2b-ef3e-4017-a43f-f7a83953bd7c in service fd00:1122:3344:104::2c - crucible 621509d6-3772-4009-aca1-35eefd1098fb in service fd00:1122:3344:104::28 - crucible 85b8c68a-160d-461d-94dd-1baf175fa75c in service fd00:1122:3344:104::2a - crucible 996d7570-b0df-46d5-aaa4-0c97697cf484 in service fd00:1122:3344:104::26 - crucible a732c489-d29a-4f75-b900-5966385943af in service fd00:1122:3344:104::29 - crucible b1783e95-9598-451d-b6ba-c50b52b428c3 in service fd00:1122:3344:104::24 - crucible c6dd531e-2d1d-423b-acc8-358533dab78c in service fd00:1122:3344:104::27 - crucible e4b3e159-3dbe-48cb-8497-e3da92a90e5a in service fd00:1122:3344:104::23 - crucible f0ff59e8-4105-4980-a4bb-a1f4c58de1e3 in service fd00:1122:3344:104::2b - internal_ntp 57b96d5c-b71e-43e4-8869-7d514003d00d in service fd00:1122:3344:104::21 - nexus b4947d31-f70e-4ee0-8817-0ca6cea9b16b in service fd00:1122:3344:104::22 -+ nexus 2ec75441-3d7d-4b4b-9614-af03de5a3666 in service fd00:1122:3344:104::2d added -+ nexus 508abd03-cbfe-4654-9a6d-7f15a1ad32e5 in service fd00:1122:3344:104::2e added -+ nexus 59950bc8-1497-44dd-8cbf-b6502ba921b2 in service fd00:1122:3344:104::2f added - -* sled affab35f-600a-4109-8ea0-34a067a4e0bc: zones at generation: 2 -> 3 - crucible 0dfbf374-9ef9-430f-b06d-f271bf7f84c4 in service fd00:1122:3344:101::27 - crucible 3aa07966-5899-4789-ace5-f8eeb375c6c3 in service fd00:1122:3344:101::24 - crucible 4ad0e9da-08f8-4d40-b4d3-d17e711b5bbf in service fd00:1122:3344:101::29 - crucible 72c5a909-077d-4ec1-a9d5-ae64ef9d716e in service fd00:1122:3344:101::26 - crucible 95482c25-1e7f-43e8-adf1-e3548a1b3ae0 in service fd00:1122:3344:101::23 - crucible a1c03689-fc62-4ea5-bb72-4d01f5138614 in service fd00:1122:3344:101::2a - crucible a568e92e-4fbd-4b69-acd8-f16277073031 in service fd00:1122:3344:101::2c - crucible bf79a56a-97af-4cc4-94a5-8b20d64c2cda in service fd00:1122:3344:101::28 - crucible c60379ba-4e30-4628-a79a-0ae509aef4c5 in service fd00:1122:3344:101::25 - crucible d47f4996-fac0-4657-bcea-01b1fee6404d in service fd00:1122:3344:101::2b - internal_ntp f1a7b9a7-fc6a-4b23-b829-045ff33117ff in service fd00:1122:3344:101::21 - nexus 15c103f0-ac63-423b-ba5d-1b5fcd563ba3 in service fd00:1122:3344:101::22 -+ nexus 3ca5292f-8a59-4475-bb72-0f43714d0fff in service fd00:1122:3344:101::2e added -+ nexus 99f6d544-8599-4e2b-a55a-82d9e0034662 in service fd00:1122:3344:101::2d added -+ nexus c26b3bda-5561-44a1-a69f-22103fe209a1 in service fd00:1122:3344:101::2f added + -------------------------------------------------------------------------------------------------------- + zone type zone ID disposition underlay IP status + -------------------------------------------------------------------------------------------------------- + + UNCHANGED SLEDS: + + sled 2d1cb4f2-cf44-40fc-b118-85036eb732a9: blueprint zones at generation 2 + crucible 19fbc4f8-a683-4f22-8f5a-e74782b935be in service fd00:1122:3344:105::26 + crucible 4f1ce8a2-d3a5-4a38-be4c-9817de52db37 in service fd00:1122:3344:105::2c + crucible 6b53ab2e-d98c-485f-87a3-4d5df595390f in service fd00:1122:3344:105::27 + crucible 93b137a1-a1d6-4b5b-b2cb-21a9f11e2883 in service fd00:1122:3344:105::23 + crucible 9f0abbad-dbd3-4d43-9675-78092217ffd9 in service fd00:1122:3344:105::25 + crucible b0c63f48-01ea-4aae-bb26-fb0dd59d1662 in service fd00:1122:3344:105::28 + crucible c406da50-34b9-4bb4-a460-8f49875d2a6a in service fd00:1122:3344:105::24 + crucible d660d7ed-28c0-45ae-9ace-dc3ecf7e8786 in service fd00:1122:3344:105::2a + crucible e98cc0de-abf6-4da4-a20d-d05c7a9bb1d7 in service fd00:1122:3344:105::2b + crucible f55e6aaf-e8fc-4913-9e3c-8cd1bd4bdad3 in service fd00:1122:3344:105::29 + internal_ntp 7f4e9f9f-08f8-4d14-885d-e977c05525ad in service fd00:1122:3344:105::21 + nexus 6dff7633-66bb-4924-a6ff-2c896e66964b in service fd00:1122:3344:105::22 + + MODIFIED SLEDS: + +* sled 48d95fef-bc9f-4f50-9a53-1e075836291d: blueprint zones at generation: 2 -> 3 +- crucible 094f27af-1acb-4d1e-ba97-1fc1377d4bf2 in service fd00:1122:3344:103::2c modified ++ ├─ expunged fd00:1122:3344:103::2c +* └─ changed: disposition +- crucible 0dcfdfc5-481e-4153-b97c-11cf02b648ea in service fd00:1122:3344:103::25 modified ++ ├─ expunged fd00:1122:3344:103::25 +* └─ changed: disposition +- crucible 2f5e8010-a94d-43a4-9c5c-3f52832f5f7f in service fd00:1122:3344:103::27 modified ++ ├─ expunged fd00:1122:3344:103::27 +* └─ changed: disposition +- crucible 4a9a0a9d-87f0-4f1d-9181-27f6b435e637 in service fd00:1122:3344:103::28 modified ++ ├─ expunged fd00:1122:3344:103::28 +* └─ changed: disposition +- crucible 56ac1706-9e2a-49ba-bd6f-a99c44cb2ccb in service fd00:1122:3344:103::24 modified ++ ├─ expunged fd00:1122:3344:103::24 +* └─ changed: disposition +- crucible 67622d61-2df4-414d-aa0e-d1277265f405 in service fd00:1122:3344:103::23 modified ++ ├─ expunged fd00:1122:3344:103::23 +* └─ changed: disposition +- crucible b91b271d-8d80-4f49-99a0-34006ae86063 in service fd00:1122:3344:103::2a modified ++ ├─ expunged fd00:1122:3344:103::2a +* └─ changed: disposition +- crucible d6ee1338-3127-43ec-9aaa-b973ccf05496 in service fd00:1122:3344:103::26 modified ++ ├─ expunged fd00:1122:3344:103::26 +* └─ changed: disposition +- crucible e39d7c9e-182b-48af-af87-58079d723583 in service fd00:1122:3344:103::29 modified ++ ├─ expunged fd00:1122:3344:103::29 +* └─ changed: disposition +- crucible f69f92a1-5007-4bb0-a85b-604dc217154b in service fd00:1122:3344:103::2b modified ++ ├─ expunged fd00:1122:3344:103::2b +* └─ changed: disposition +- internal_ntp 67d913e0-0005-4599-9b28-0abbf6cc2916 in service fd00:1122:3344:103::21 modified ++ ├─ expunged fd00:1122:3344:103::21 +* └─ changed: disposition +- nexus 2aa0ea4f-3561-4989-a98c-9ab7d9a240fb in service fd00:1122:3344:103::22 modified ++ ├─ expunged fd00:1122:3344:103::22 +* └─ changed: disposition + +* sled 68d24ac5-f341-49ea-a92a-0381b52ab387: blueprint zones at generation: 2 -> 3 +- crucible 3b3c14b6-a8e2-4054-a577-8d96cb576230 in service fd00:1122:3344:102::2c modified ++ ├─ expunged fd00:1122:3344:102::2c +* └─ changed: disposition +- crucible 47a87c6e-ef45-4d52-9a3e-69cdd96737cc in service fd00:1122:3344:102::23 modified ++ ├─ expunged fd00:1122:3344:102::23 +* └─ changed: disposition +- crucible 6464d025-4652-4948-919e-740bec5699b1 in service fd00:1122:3344:102::24 modified ++ ├─ expunged fd00:1122:3344:102::24 +* └─ changed: disposition +- crucible 6939ce48-b17c-4616-b176-8a419a7697be in service fd00:1122:3344:102::29 modified ++ ├─ expunged fd00:1122:3344:102::29 +* └─ changed: disposition +- crucible 878dfddd-3113-4197-a3ea-e0d4dbe9b476 in service fd00:1122:3344:102::25 modified ++ ├─ expunged fd00:1122:3344:102::25 +* └─ changed: disposition +- crucible 8d4d2b28-82bb-4e36-80da-1408d8c35d82 in service fd00:1122:3344:102::2b modified ++ ├─ expunged fd00:1122:3344:102::2b +* └─ changed: disposition +- crucible 9fd52961-426f-4e62-a644-b70871103fca in service fd00:1122:3344:102::26 modified ++ ├─ expunged fd00:1122:3344:102::26 +* └─ changed: disposition +- crucible b44cdbc0-0ce0-46eb-8b21-a09e113aa1d0 in service fd00:1122:3344:102::27 modified ++ ├─ expunged fd00:1122:3344:102::27 +* └─ changed: disposition +- crucible b6b759d0-f60d-42b7-bbbc-9d61c9e895a9 in service fd00:1122:3344:102::28 modified ++ ├─ expunged fd00:1122:3344:102::28 +* └─ changed: disposition +- crucible c407795c-6c8b-428e-8ab8-b962913c447f in service fd00:1122:3344:102::2a modified ++ ├─ expunged fd00:1122:3344:102::2a +* └─ changed: disposition +- internal_ntp f3f2e4f3-0985-4ef6-8336-ce479382d05d in service fd00:1122:3344:102::21 modified ++ ├─ expunged fd00:1122:3344:102::21 +* └─ changed: disposition +- nexus 01d58626-e1b0-480f-96be-ac784863c7dc in service fd00:1122:3344:102::22 modified ++ ├─ expunged fd00:1122:3344:102::22 +* └─ changed: disposition + +* sled 75bc286f-2b4b-482c-9431-59272af529da: blueprint zones at generation: 2 -> 3 + crucible 15bb9def-69b8-4d2e-b04f-9fee1143387c in service fd00:1122:3344:104::25 + crucible 23a8fa2b-ef3e-4017-a43f-f7a83953bd7c in service fd00:1122:3344:104::2c + crucible 621509d6-3772-4009-aca1-35eefd1098fb in service fd00:1122:3344:104::28 + crucible 85b8c68a-160d-461d-94dd-1baf175fa75c in service fd00:1122:3344:104::2a + crucible 996d7570-b0df-46d5-aaa4-0c97697cf484 in service fd00:1122:3344:104::26 + crucible a732c489-d29a-4f75-b900-5966385943af in service fd00:1122:3344:104::29 + crucible b1783e95-9598-451d-b6ba-c50b52b428c3 in service fd00:1122:3344:104::24 + crucible c6dd531e-2d1d-423b-acc8-358533dab78c in service fd00:1122:3344:104::27 + crucible e4b3e159-3dbe-48cb-8497-e3da92a90e5a in service fd00:1122:3344:104::23 + crucible f0ff59e8-4105-4980-a4bb-a1f4c58de1e3 in service fd00:1122:3344:104::2b + internal_ntp 57b96d5c-b71e-43e4-8869-7d514003d00d in service fd00:1122:3344:104::21 + nexus b4947d31-f70e-4ee0-8817-0ca6cea9b16b in service fd00:1122:3344:104::22 ++ nexus 2ec75441-3d7d-4b4b-9614-af03de5a3666 in service fd00:1122:3344:104::2d added ++ nexus 508abd03-cbfe-4654-9a6d-7f15a1ad32e5 in service fd00:1122:3344:104::2e added ++ nexus 59950bc8-1497-44dd-8cbf-b6502ba921b2 in service fd00:1122:3344:104::2f added + +* sled affab35f-600a-4109-8ea0-34a067a4e0bc: blueprint zones at generation: 2 -> 3 + crucible 0dfbf374-9ef9-430f-b06d-f271bf7f84c4 in service fd00:1122:3344:101::27 + crucible 3aa07966-5899-4789-ace5-f8eeb375c6c3 in service fd00:1122:3344:101::24 + crucible 4ad0e9da-08f8-4d40-b4d3-d17e711b5bbf in service fd00:1122:3344:101::29 + crucible 72c5a909-077d-4ec1-a9d5-ae64ef9d716e in service fd00:1122:3344:101::26 + crucible 95482c25-1e7f-43e8-adf1-e3548a1b3ae0 in service fd00:1122:3344:101::23 + crucible a1c03689-fc62-4ea5-bb72-4d01f5138614 in service fd00:1122:3344:101::2a + crucible a568e92e-4fbd-4b69-acd8-f16277073031 in service fd00:1122:3344:101::2c + crucible bf79a56a-97af-4cc4-94a5-8b20d64c2cda in service fd00:1122:3344:101::28 + crucible c60379ba-4e30-4628-a79a-0ae509aef4c5 in service fd00:1122:3344:101::25 + crucible d47f4996-fac0-4657-bcea-01b1fee6404d in service fd00:1122:3344:101::2b + internal_ntp f1a7b9a7-fc6a-4b23-b829-045ff33117ff in service fd00:1122:3344:101::21 + nexus 15c103f0-ac63-423b-ba5d-1b5fcd563ba3 in service fd00:1122:3344:101::22 ++ nexus 3ca5292f-8a59-4475-bb72-0f43714d0fff in service fd00:1122:3344:101::2e added ++ nexus 99f6d544-8599-4e2b-a55a-82d9e0034662 in service fd00:1122:3344:101::2d added ++ nexus c26b3bda-5561-44a1-a69f-22103fe209a1 in service fd00:1122:3344:101::2f added METADATA: internal DNS version: 1 (unchanged) diff --git a/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_2_2a.txt b/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_2_2a.txt index 74dd0fbbaf..00ca05b4b8 100644 --- a/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_2_2a.txt +++ b/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_2_2a.txt @@ -7,7 +7,7 @@ to: blueprint 9f71f5d3-a272-4382-9154-6ea2e171a6c6 UNCHANGED SLEDS: - sled 75bc286f-2b4b-482c-9431-59272af529da: zones at generation 3 + sled 75bc286f-2b4b-482c-9431-59272af529da: blueprint zones at generation 3 crucible 15bb9def-69b8-4d2e-b04f-9fee1143387c in service fd00:1122:3344:104::25 crucible 23a8fa2b-ef3e-4017-a43f-f7a83953bd7c in service fd00:1122:3344:104::2c crucible 621509d6-3772-4009-aca1-35eefd1098fb in service fd00:1122:3344:104::28 @@ -24,7 +24,7 @@ to: blueprint 9f71f5d3-a272-4382-9154-6ea2e171a6c6 nexus 59950bc8-1497-44dd-8cbf-b6502ba921b2 in service fd00:1122:3344:104::2f nexus b4947d31-f70e-4ee0-8817-0ca6cea9b16b in service fd00:1122:3344:104::22 - sled affab35f-600a-4109-8ea0-34a067a4e0bc: zones at generation 3 + sled affab35f-600a-4109-8ea0-34a067a4e0bc: blueprint zones at generation 3 crucible 0dfbf374-9ef9-430f-b06d-f271bf7f84c4 in service fd00:1122:3344:101::27 crucible 3aa07966-5899-4789-ace5-f8eeb375c6c3 in service fd00:1122:3344:101::24 crucible 4ad0e9da-08f8-4d40-b4d3-d17e711b5bbf in service fd00:1122:3344:101::29 @@ -43,23 +43,23 @@ to: blueprint 9f71f5d3-a272-4382-9154-6ea2e171a6c6 REMOVED SLEDS: -- sled 68d24ac5-f341-49ea-a92a-0381b52ab387: zones at generation 2 -- crucible 3b3c14b6-a8e2-4054-a577-8d96cb576230 in service fd00:1122:3344:102::2c removed -- crucible 47a87c6e-ef45-4d52-9a3e-69cdd96737cc in service fd00:1122:3344:102::23 removed -- crucible 6464d025-4652-4948-919e-740bec5699b1 in service fd00:1122:3344:102::24 removed -- crucible 6939ce48-b17c-4616-b176-8a419a7697be in service fd00:1122:3344:102::29 removed -- crucible 878dfddd-3113-4197-a3ea-e0d4dbe9b476 in service fd00:1122:3344:102::25 removed -- crucible 8d4d2b28-82bb-4e36-80da-1408d8c35d82 in service fd00:1122:3344:102::2b removed -- crucible 9fd52961-426f-4e62-a644-b70871103fca in service fd00:1122:3344:102::26 removed -- crucible b44cdbc0-0ce0-46eb-8b21-a09e113aa1d0 in service fd00:1122:3344:102::27 removed -- crucible b6b759d0-f60d-42b7-bbbc-9d61c9e895a9 in service fd00:1122:3344:102::28 removed -- crucible c407795c-6c8b-428e-8ab8-b962913c447f in service fd00:1122:3344:102::2a removed -- internal_ntp f3f2e4f3-0985-4ef6-8336-ce479382d05d in service fd00:1122:3344:102::21 removed -- nexus 01d58626-e1b0-480f-96be-ac784863c7dc in service fd00:1122:3344:102::22 removed +- sled 68d24ac5-f341-49ea-a92a-0381b52ab387: blueprint zones at generation 3 +- crucible 3b3c14b6-a8e2-4054-a577-8d96cb576230 expunged fd00:1122:3344:102::2c removed +- crucible 47a87c6e-ef45-4d52-9a3e-69cdd96737cc expunged fd00:1122:3344:102::23 removed +- crucible 6464d025-4652-4948-919e-740bec5699b1 expunged fd00:1122:3344:102::24 removed +- crucible 6939ce48-b17c-4616-b176-8a419a7697be expunged fd00:1122:3344:102::29 removed +- crucible 878dfddd-3113-4197-a3ea-e0d4dbe9b476 expunged fd00:1122:3344:102::25 removed +- crucible 8d4d2b28-82bb-4e36-80da-1408d8c35d82 expunged fd00:1122:3344:102::2b removed +- crucible 9fd52961-426f-4e62-a644-b70871103fca expunged fd00:1122:3344:102::26 removed +- crucible b44cdbc0-0ce0-46eb-8b21-a09e113aa1d0 expunged fd00:1122:3344:102::27 removed +- crucible b6b759d0-f60d-42b7-bbbc-9d61c9e895a9 expunged fd00:1122:3344:102::28 removed +- crucible c407795c-6c8b-428e-8ab8-b962913c447f expunged fd00:1122:3344:102::2a removed +- internal_ntp f3f2e4f3-0985-4ef6-8336-ce479382d05d expunged fd00:1122:3344:102::21 removed +- nexus 01d58626-e1b0-480f-96be-ac784863c7dc expunged fd00:1122:3344:102::22 removed MODIFIED SLEDS: -* sled 2d1cb4f2-cf44-40fc-b118-85036eb732a9: zones at generation: 2 +* sled 2d1cb4f2-cf44-40fc-b118-85036eb732a9: blueprint zones at generation: 2 ! warning: generation should have changed crucible 6b53ab2e-d98c-485f-87a3-4d5df595390f in service fd00:1122:3344:105::27 crucible 93b137a1-a1d6-4b5b-b2cb-21a9f11e2883 in service fd00:1122:3344:105::23 @@ -80,19 +80,19 @@ to: blueprint 9f71f5d3-a272-4382-9154-6ea2e171a6c6 + ├─ in service fd00:1122:3344:105::22 * └─ changed: zone type config -* sled 48d95fef-bc9f-4f50-9a53-1e075836291d: zones at generation: 2 -> 3 -- crucible 094f27af-1acb-4d1e-ba97-1fc1377d4bf2 in service fd00:1122:3344:103::2c removed -- crucible 0dcfdfc5-481e-4153-b97c-11cf02b648ea in service fd00:1122:3344:103::25 removed -- crucible 2f5e8010-a94d-43a4-9c5c-3f52832f5f7f in service fd00:1122:3344:103::27 removed -- crucible 4a9a0a9d-87f0-4f1d-9181-27f6b435e637 in service fd00:1122:3344:103::28 removed -- crucible 56ac1706-9e2a-49ba-bd6f-a99c44cb2ccb in service fd00:1122:3344:103::24 removed -- crucible 67622d61-2df4-414d-aa0e-d1277265f405 in service fd00:1122:3344:103::23 removed -- crucible b91b271d-8d80-4f49-99a0-34006ae86063 in service fd00:1122:3344:103::2a removed -- crucible d6ee1338-3127-43ec-9aaa-b973ccf05496 in service fd00:1122:3344:103::26 removed -- crucible e39d7c9e-182b-48af-af87-58079d723583 in service fd00:1122:3344:103::29 removed -- crucible f69f92a1-5007-4bb0-a85b-604dc217154b in service fd00:1122:3344:103::2b removed -- internal_ntp 67d913e0-0005-4599-9b28-0abbf6cc2916 in service fd00:1122:3344:103::21 removed -- nexus 2aa0ea4f-3561-4989-a98c-9ab7d9a240fb in service fd00:1122:3344:103::22 removed +* sled 48d95fef-bc9f-4f50-9a53-1e075836291d: blueprint zones at generation: 3 -> 4 +- crucible 094f27af-1acb-4d1e-ba97-1fc1377d4bf2 expunged fd00:1122:3344:103::2c removed +- crucible 0dcfdfc5-481e-4153-b97c-11cf02b648ea expunged fd00:1122:3344:103::25 removed +- crucible 2f5e8010-a94d-43a4-9c5c-3f52832f5f7f expunged fd00:1122:3344:103::27 removed +- crucible 4a9a0a9d-87f0-4f1d-9181-27f6b435e637 expunged fd00:1122:3344:103::28 removed +- crucible 56ac1706-9e2a-49ba-bd6f-a99c44cb2ccb expunged fd00:1122:3344:103::24 removed +- crucible 67622d61-2df4-414d-aa0e-d1277265f405 expunged fd00:1122:3344:103::23 removed +- crucible b91b271d-8d80-4f49-99a0-34006ae86063 expunged fd00:1122:3344:103::2a removed +- crucible d6ee1338-3127-43ec-9aaa-b973ccf05496 expunged fd00:1122:3344:103::26 removed +- crucible e39d7c9e-182b-48af-af87-58079d723583 expunged fd00:1122:3344:103::29 removed +- crucible f69f92a1-5007-4bb0-a85b-604dc217154b expunged fd00:1122:3344:103::2b removed +- internal_ntp 67d913e0-0005-4599-9b28-0abbf6cc2916 expunged fd00:1122:3344:103::21 removed +- nexus 2aa0ea4f-3561-4989-a98c-9ab7d9a240fb expunged fd00:1122:3344:103::22 removed METADATA: internal DNS version: 1 (unchanged) diff --git a/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_bp2.txt b/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_bp2.txt index 92cfd1f651..aa4da01852 100644 --- a/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_bp2.txt +++ b/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_bp2.txt @@ -5,7 +5,7 @@ parent: 55502b1b-e255-438b-a16a-2680a4b5f962 zone type zone ID disposition underlay IP -------------------------------------------------------------------------------------------- - sled 2d1cb4f2-cf44-40fc-b118-85036eb732a9: zones at generation 2 + sled 2d1cb4f2-cf44-40fc-b118-85036eb732a9: blueprint zones at generation 2 crucible 19fbc4f8-a683-4f22-8f5a-e74782b935be in service fd00:1122:3344:105::26 crucible 4f1ce8a2-d3a5-4a38-be4c-9817de52db37 in service fd00:1122:3344:105::2c crucible 6b53ab2e-d98c-485f-87a3-4d5df595390f in service fd00:1122:3344:105::27 @@ -19,35 +19,35 @@ parent: 55502b1b-e255-438b-a16a-2680a4b5f962 internal_ntp 7f4e9f9f-08f8-4d14-885d-e977c05525ad in service fd00:1122:3344:105::21 nexus 6dff7633-66bb-4924-a6ff-2c896e66964b in service fd00:1122:3344:105::22 - sled 48d95fef-bc9f-4f50-9a53-1e075836291d: zones at generation 2 - crucible 094f27af-1acb-4d1e-ba97-1fc1377d4bf2 in service fd00:1122:3344:103::2c - crucible 0dcfdfc5-481e-4153-b97c-11cf02b648ea in service fd00:1122:3344:103::25 - crucible 2f5e8010-a94d-43a4-9c5c-3f52832f5f7f in service fd00:1122:3344:103::27 - crucible 4a9a0a9d-87f0-4f1d-9181-27f6b435e637 in service fd00:1122:3344:103::28 - crucible 56ac1706-9e2a-49ba-bd6f-a99c44cb2ccb in service fd00:1122:3344:103::24 - crucible 67622d61-2df4-414d-aa0e-d1277265f405 in service fd00:1122:3344:103::23 - crucible b91b271d-8d80-4f49-99a0-34006ae86063 in service fd00:1122:3344:103::2a - crucible d6ee1338-3127-43ec-9aaa-b973ccf05496 in service fd00:1122:3344:103::26 - crucible e39d7c9e-182b-48af-af87-58079d723583 in service fd00:1122:3344:103::29 - crucible f69f92a1-5007-4bb0-a85b-604dc217154b in service fd00:1122:3344:103::2b - internal_ntp 67d913e0-0005-4599-9b28-0abbf6cc2916 in service fd00:1122:3344:103::21 - nexus 2aa0ea4f-3561-4989-a98c-9ab7d9a240fb in service fd00:1122:3344:103::22 + sled 48d95fef-bc9f-4f50-9a53-1e075836291d: blueprint zones at generation 3 + crucible 094f27af-1acb-4d1e-ba97-1fc1377d4bf2 expunged fd00:1122:3344:103::2c + crucible 0dcfdfc5-481e-4153-b97c-11cf02b648ea expunged fd00:1122:3344:103::25 + crucible 2f5e8010-a94d-43a4-9c5c-3f52832f5f7f expunged fd00:1122:3344:103::27 + crucible 4a9a0a9d-87f0-4f1d-9181-27f6b435e637 expunged fd00:1122:3344:103::28 + crucible 56ac1706-9e2a-49ba-bd6f-a99c44cb2ccb expunged fd00:1122:3344:103::24 + crucible 67622d61-2df4-414d-aa0e-d1277265f405 expunged fd00:1122:3344:103::23 + crucible b91b271d-8d80-4f49-99a0-34006ae86063 expunged fd00:1122:3344:103::2a + crucible d6ee1338-3127-43ec-9aaa-b973ccf05496 expunged fd00:1122:3344:103::26 + crucible e39d7c9e-182b-48af-af87-58079d723583 expunged fd00:1122:3344:103::29 + crucible f69f92a1-5007-4bb0-a85b-604dc217154b expunged fd00:1122:3344:103::2b + internal_ntp 67d913e0-0005-4599-9b28-0abbf6cc2916 expunged fd00:1122:3344:103::21 + nexus 2aa0ea4f-3561-4989-a98c-9ab7d9a240fb expunged fd00:1122:3344:103::22 - sled 68d24ac5-f341-49ea-a92a-0381b52ab387: zones at generation 2 - crucible 3b3c14b6-a8e2-4054-a577-8d96cb576230 in service fd00:1122:3344:102::2c - crucible 47a87c6e-ef45-4d52-9a3e-69cdd96737cc in service fd00:1122:3344:102::23 - crucible 6464d025-4652-4948-919e-740bec5699b1 in service fd00:1122:3344:102::24 - crucible 6939ce48-b17c-4616-b176-8a419a7697be in service fd00:1122:3344:102::29 - crucible 878dfddd-3113-4197-a3ea-e0d4dbe9b476 in service fd00:1122:3344:102::25 - crucible 8d4d2b28-82bb-4e36-80da-1408d8c35d82 in service fd00:1122:3344:102::2b - crucible 9fd52961-426f-4e62-a644-b70871103fca in service fd00:1122:3344:102::26 - crucible b44cdbc0-0ce0-46eb-8b21-a09e113aa1d0 in service fd00:1122:3344:102::27 - crucible b6b759d0-f60d-42b7-bbbc-9d61c9e895a9 in service fd00:1122:3344:102::28 - crucible c407795c-6c8b-428e-8ab8-b962913c447f in service fd00:1122:3344:102::2a - internal_ntp f3f2e4f3-0985-4ef6-8336-ce479382d05d in service fd00:1122:3344:102::21 - nexus 01d58626-e1b0-480f-96be-ac784863c7dc in service fd00:1122:3344:102::22 + sled 68d24ac5-f341-49ea-a92a-0381b52ab387: blueprint zones at generation 3 + crucible 3b3c14b6-a8e2-4054-a577-8d96cb576230 expunged fd00:1122:3344:102::2c + crucible 47a87c6e-ef45-4d52-9a3e-69cdd96737cc expunged fd00:1122:3344:102::23 + crucible 6464d025-4652-4948-919e-740bec5699b1 expunged fd00:1122:3344:102::24 + crucible 6939ce48-b17c-4616-b176-8a419a7697be expunged fd00:1122:3344:102::29 + crucible 878dfddd-3113-4197-a3ea-e0d4dbe9b476 expunged fd00:1122:3344:102::25 + crucible 8d4d2b28-82bb-4e36-80da-1408d8c35d82 expunged fd00:1122:3344:102::2b + crucible 9fd52961-426f-4e62-a644-b70871103fca expunged fd00:1122:3344:102::26 + crucible b44cdbc0-0ce0-46eb-8b21-a09e113aa1d0 expunged fd00:1122:3344:102::27 + crucible b6b759d0-f60d-42b7-bbbc-9d61c9e895a9 expunged fd00:1122:3344:102::28 + crucible c407795c-6c8b-428e-8ab8-b962913c447f expunged fd00:1122:3344:102::2a + internal_ntp f3f2e4f3-0985-4ef6-8336-ce479382d05d expunged fd00:1122:3344:102::21 + nexus 01d58626-e1b0-480f-96be-ac784863c7dc expunged fd00:1122:3344:102::22 - sled 75bc286f-2b4b-482c-9431-59272af529da: zones at generation 3 + sled 75bc286f-2b4b-482c-9431-59272af529da: blueprint zones at generation 3 crucible 15bb9def-69b8-4d2e-b04f-9fee1143387c in service fd00:1122:3344:104::25 crucible 23a8fa2b-ef3e-4017-a43f-f7a83953bd7c in service fd00:1122:3344:104::2c crucible 621509d6-3772-4009-aca1-35eefd1098fb in service fd00:1122:3344:104::28 @@ -64,7 +64,7 @@ parent: 55502b1b-e255-438b-a16a-2680a4b5f962 nexus 59950bc8-1497-44dd-8cbf-b6502ba921b2 in service fd00:1122:3344:104::2f nexus b4947d31-f70e-4ee0-8817-0ca6cea9b16b in service fd00:1122:3344:104::22 - sled affab35f-600a-4109-8ea0-34a067a4e0bc: zones at generation 3 + sled affab35f-600a-4109-8ea0-34a067a4e0bc: blueprint zones at generation 3 crucible 0dfbf374-9ef9-430f-b06d-f271bf7f84c4 in service fd00:1122:3344:101::27 crucible 3aa07966-5899-4789-ace5-f8eeb375c6c3 in service fd00:1122:3344:101::24 crucible 4ad0e9da-08f8-4d40-b4d3-d17e711b5bbf in service fd00:1122:3344:101::29 @@ -82,8 +82,8 @@ parent: 55502b1b-e255-438b-a16a-2680a4b5f962 nexus c26b3bda-5561-44a1-a69f-22103fe209a1 in service fd00:1122:3344:101::2f METADATA: - created by: test_blueprint2 - created at: 1970-01-01T00:00:00.000Z - comment: sled 2d1cb4f2-cf44-40fc-b118-85036eb732a9: altered disks, sled 75bc286f-2b4b-482c-9431-59272af529da: altered disks, sled affab35f-600a-4109-8ea0-34a067a4e0bc: altered disks - internal DNS version: 1 - external DNS version: 1 + created by: test_blueprint2 + created at: 1970-01-01T00:00:00.000Z + comment: sled 48d95fef-bc9f-4f50-9a53-1e075836291d (sled policy is expunged): 12 zones expunged, sled 68d24ac5-f341-49ea-a92a-0381b52ab387 (sled state is decommissioned): 12 zones expunged, sled 2d1cb4f2-cf44-40fc-b118-85036eb732a9: altered disks, sled 75bc286f-2b4b-482c-9431-59272af529da: altered disks, sled affab35f-600a-4109-8ea0-34a067a4e0bc: altered disks + internal DNS version: 1 + external DNS version: 1 diff --git a/nexus/types/src/deployment.rs b/nexus/types/src/deployment.rs index ba8477c125..7dbaf9aa79 100644 --- a/nexus/types/src/deployment.rs +++ b/nexus/types/src/deployment.rs @@ -159,6 +159,19 @@ impl Blueprint { }) } + // Temporary method that provides the list of Omicron zones using + // `TypedUuid`. + // + // In the future, `all_omicron_zones` will return `SledUuid`, + // and this method will go away. + pub fn all_omicron_zones_typed( + &self, + filter: BlueprintZoneFilter, + ) -> impl Iterator { + self.all_omicron_zones(filter) + .map(|(sled_id, z)| (SledUuid::from_untyped_uuid(sled_id), z)) + } + /// Iterate over the ids of all sleds in the blueprint pub fn sleds(&self) -> impl Iterator + '_ { self.blueprint_zones.keys().copied().map(SledUuid::from_untyped_uuid) @@ -1264,7 +1277,7 @@ mod table_display { for (sled_id, sled_zones) in blueprint_zones { let heading = format!( - "{SLED_INDENT}sled {sled_id}: zones at generation {}", + "{SLED_INDENT}sled {sled_id}: blueprint zones at generation {}", sled_zones.generation ); builder.make_section( @@ -1515,7 +1528,7 @@ mod table_display { section: &mut StSectionBuilder, ) { let heading = format!( - "{}{SLED_INDENT}sled {sled_id}: zones at generation {}", + "{}{SLED_INDENT}sled {sled_id}: blueprint zones at generation {}", kind.prefix(), sled_zones.generation, ); @@ -1550,26 +1563,28 @@ mod table_display { modified: &DiffSledModified, section: &mut StSectionBuilder, ) { - let (generation_heading, warning) = if modified.generation_before - != modified.generation_after - { - ( - format!( - "zones at generation: {} -> {}", - modified.generation_before, modified.generation_after, - ), - None, - ) - } else { - // Modified sleds should always see a generation bump. - ( - format!("zones at generation: {}", modified.generation_before), - Some(format!( - "{WARNING_PREFIX}{ZONE_HEAD_INDENT}\ + let (generation_heading, warning) = + if modified.generation_before != modified.generation_after { + ( + format!( + "blueprint zones at generation: {} -> {}", + modified.generation_before, modified.generation_after, + ), + None, + ) + } else { + // Modified sleds should always see a generation bump. + ( + format!( + "blueprint zones at generation: {}", + modified.generation_before + ), + Some(format!( + "{WARNING_PREFIX}{ZONE_HEAD_INDENT}\ warning: generation should have changed" - )), - ) - }; + )), + ) + }; let sled_heading = format!("{MODIFIED_PREFIX}{SLED_INDENT}sled {sled_id}: {generation_heading}"); From 11c2b656e15fc2eda326f7b027f2a178f6d248bf Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Sat, 20 Apr 2024 07:49:45 +0000 Subject: [PATCH 179/334] chore(deps): update taiki-e/install-action digest to a94d7ba (#5590) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR contains the following updates: | Package | Type | Update | Change | |---|---|---|---| | [taiki-e/install-action](https://togithub.com/taiki-e/install-action) | action | digest | [`93eca7e` -> `a94d7ba`](https://togithub.com/taiki-e/install-action/compare/93eca7e...a94d7ba) | --- ### Configuration 📅 **Schedule**: Branch creation - "after 8pm,before 6am" in timezone America/Los_Angeles, Automerge - "after 8pm,before 6am" in timezone America/Los_Angeles. 🚦 **Automerge**: Enabled. ♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. 🔕 **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box --- This PR has been generated by [Renovate Bot](https://togithub.com/renovatebot/renovate). Co-authored-by: oxide-renovate[bot] <146848827+oxide-renovate[bot]@users.noreply.github.com> --- .github/workflows/hakari.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/hakari.yml b/.github/workflows/hakari.yml index d3bb8daa24..2cdfa158ad 100644 --- a/.github/workflows/hakari.yml +++ b/.github/workflows/hakari.yml @@ -24,7 +24,7 @@ jobs: with: toolchain: stable - name: Install cargo-hakari - uses: taiki-e/install-action@93eca7e3866e3af0ad7ae0a6f85da14894612ca8 # v2 + uses: taiki-e/install-action@a94d7ba8955e0861119ed8d3fddb8823ef7a97a8 # v2 with: tool: cargo-hakari - name: Check workspace-hack Cargo.toml is up-to-date From 0b95ee8e478dcd9695e4f9c6026c6eef460ee1c4 Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Sat, 20 Apr 2024 06:32:13 -0400 Subject: [PATCH 180/334] RoT V1.0.10 (#5585) --- tools/permslip_production | 2 +- tools/permslip_staging | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/permslip_production b/tools/permslip_production index 331209b1f0..2cf844d9d3 100644 --- a/tools/permslip_production +++ b/tools/permslip_production @@ -1 +1 @@ -394b0bb7c759eead2e41cec98c2376e5e558d6b401418b56ca0db50d55d434ad manifest-oxide-rot-1-v1.0.9.toml +75bf4467effc6077958c926c19fe83c05a09b02795d4b0b6ad9191ed93a6d5b9 manifest-oxide-rot-1-v1.0.10.toml diff --git a/tools/permslip_staging b/tools/permslip_staging index ff5c866a4b..4e3a32c785 100644 --- a/tools/permslip_staging +++ b/tools/permslip_staging @@ -1,4 +1,4 @@ b1b0d63a179652fcc80fabbb49307c0fe28cf52744f58f7b8a768f14d6721a3f manifest-gimlet-v1.0.15.toml -686f5fff41ed3b33ba0be38d2becdeb67847705fd590f05f6d8f7c600db87fb7 manifest-oxide-rot-1-v1.0.9.toml +e34b2f363ed0e1399e175bfae9e5e50217255c7984154697180d8a2d4611f65d manifest-oxide-rot-1-v1.0.10.toml 8c7a57a733df2cbff4963bf32073066871aae26a7f9eca878490e8f125bd2688 manifest-psc-v1.0.15.toml 267c8953c26f91614a59015719162f6f8f55d31d795a458387191dd1d874f9f0 manifest-sidecar-v1.0.15.toml From a080dff0da81dcaae6c172736ffd71a9f24b184a Mon Sep 17 00:00:00 2001 From: Rain Date: Sat, 20 Apr 2024 09:47:30 -0700 Subject: [PATCH 181/334] [deps] update oxidecomputer/tofino (#5475) Pull in the newer, lighter-weight crate which avoids a clap dependency. --- Cargo.lock | 141 +++++++++++++---------------------------------------- 1 file changed, 35 insertions(+), 106 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5e5783320f..9eeb22632e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -101,15 +101,6 @@ version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4b46cbb362ab8752921c97e041f5e366ee6297bd428a31275b9fcf1e380f7299" -[[package]] -name = "ansi_term" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d52a9bb7ec0cf484c551830a7ce27bd20d67eac647e1befb56b0be4ee39a55d2" -dependencies = [ - "winapi", -] - [[package]] name = "anstream" version = "0.6.11" @@ -1025,21 +1016,6 @@ dependencies = [ "libloading", ] -[[package]] -name = "clap" -version = "2.34.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a0610544180c38b88101fecf2dd634b174a62eef6946f84dfc6a7127512b381c" -dependencies = [ - "ansi_term", - "atty", - "bitflags 1.3.2", - "strsim 0.8.0", - "textwrap 0.11.0", - "unicode-width", - "vec_map", -] - [[package]] name = "clap" version = "4.5.4" @@ -1314,7 +1290,7 @@ dependencies = [ "anes", "cast", "ciborium", - "clap 4.5.4", + "clap", "criterion-plot", "futures", "is-terminal", @@ -1957,7 +1933,7 @@ dependencies = [ "camino", "camino-tempfile", "chrono", - "clap 4.5.4", + "clap", "dns-service-client", "dropshot", "expectorate", @@ -2264,7 +2240,7 @@ dependencies = [ "async-trait", "base64 0.22.0", "chrono", - "clap 4.5.4", + "clap", "colored", "dhcproto", "http 0.2.12", @@ -2686,7 +2662,7 @@ name = "gateway-cli" version = "0.1.0" dependencies = [ "anyhow", - "clap 4.5.4", + "clap", "futures", "gateway-client", "gateway-messages", @@ -3609,7 +3585,7 @@ dependencies = [ "bytes", "camino", "cancel-safe-futures", - "clap 4.5.4", + "clap", "display-error-chain", "futures", "hex", @@ -3670,7 +3646,7 @@ version = "0.1.0" dependencies = [ "anyhow", "async-trait", - "clap 4.5.4", + "clap", "dropshot", "expectorate", "hyper 0.14.28", @@ -3752,7 +3728,7 @@ name = "internal-dns-cli" version = "0.1.0" dependencies = [ "anyhow", - "clap 4.5.4", + "clap", "dropshot", "internal-dns", "omicron-common", @@ -3993,7 +3969,7 @@ dependencies = [ "anstyle", "anyhow", "camino", - "clap 4.5.4", + "clap", "colored", "futures", "libc", @@ -4103,7 +4079,7 @@ version = "0.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fefdf21230d6143476a28adbee3d930e2b68a3d56443c777cae3fe9340eebff9" dependencies = [ - "clap 4.5.4", + "clap", "escape8259", "termcolor", "threadpool", @@ -4160,7 +4136,7 @@ version = "0.2.4" source = "git+https://github.com/oxidecomputer/lpc55_support#96f064eaae5e95930efaab6c29fd1b2e22225dac" dependencies = [ "bitfield", - "clap 4.5.4", + "clap", "packed_struct", "serde", ] @@ -5287,7 +5263,7 @@ dependencies = [ "anyhow", "camino", "camino-tempfile", - "clap 4.5.4", + "clap", "dropshot", "expectorate", "futures", @@ -5321,7 +5297,7 @@ dependencies = [ "anyhow", "base64 0.22.0", "camino", - "clap 4.5.4", + "clap", "dropshot", "expectorate", "futures", @@ -5372,7 +5348,7 @@ dependencies = [ "camino-tempfile", "cancel-safe-futures", "chrono", - "clap 4.5.4", + "clap", "criterion", "crucible-agent-client", "crucible-pantry-client", @@ -5489,7 +5465,7 @@ dependencies = [ "camino", "camino-tempfile", "chrono", - "clap 4.5.4", + "clap", "crossterm", "crucible-agent-client", "csv", @@ -5531,7 +5507,7 @@ dependencies = [ "strum", "subprocess", "tabled", - "textwrap 0.16.1", + "textwrap", "tokio", "unicode-width", "uuid 1.8.0", @@ -5543,7 +5519,7 @@ version = "0.1.0" dependencies = [ "anyhow", "camino", - "clap 4.5.4", + "clap", "expectorate", "futures", "hex", @@ -5610,7 +5586,7 @@ dependencies = [ "cancel-safe-futures", "cfg-if", "chrono", - "clap 4.5.4", + "clap", "crucible-agent-client", "derive_more", "display-error-chain", @@ -5758,7 +5734,7 @@ dependencies = [ "bytes", "chrono", "cipher", - "clap 4.5.4", + "clap", "clap_builder", "console", "const-oid", @@ -6137,7 +6113,7 @@ dependencies = [ "anyhow", "camino", "chrono", - "clap 4.5.4", + "clap", "dropshot", "expectorate", "futures", @@ -6181,7 +6157,7 @@ dependencies = [ "bytes", "camino", "chrono", - "clap 4.5.4", + "clap", "crossterm", "dropshot", "expectorate", @@ -6254,7 +6230,7 @@ version = "0.1.0" dependencies = [ "anyhow", "chrono", - "clap 4.5.4", + "clap", "dropshot", "nexus-client", "omicron-common", @@ -6276,7 +6252,7 @@ dependencies = [ "anyhow", "camino", "chrono", - "clap 4.5.4", + "clap", "omicron-workspace-hack", "sigpipe", "uuid 1.8.0", @@ -7124,7 +7100,7 @@ dependencies = [ "anyhow", "atty", "base64 0.21.7", - "clap 4.5.4", + "clap", "dropshot", "futures", "hyper 0.14.28", @@ -7413,7 +7389,7 @@ dependencies = [ "assert_matches", "camino", "camino-tempfile", - "clap 4.5.4", + "clap", "dns-service-client", "dropshot", "expectorate", @@ -8974,7 +8950,7 @@ version = "0.1.0" dependencies = [ "anyhow", "async-trait", - "clap 4.5.4", + "clap", "dropshot", "futures", "gateway-messages", @@ -9152,12 +9128,6 @@ dependencies = [ "vte", ] -[[package]] -name = "strsim" -version = "0.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ea5119cdb4c55b55d432abb513a0429384878c15dde60cc77b1c99de1a95a6a" - [[package]] name = "strsim" version = "0.10.0" @@ -9216,30 +9186,6 @@ dependencies = [ "syn 2.0.59", ] -[[package]] -name = "structopt" -version = "0.3.26" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c6b5c64445ba8094a6ab0c3cd2ad323e07171012d9c98b0b15651daf1787a10" -dependencies = [ - "clap 2.34.0", - "lazy_static", - "structopt-derive", -] - -[[package]] -name = "structopt-derive" -version = "0.4.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dcb5ae327f9cc13b68763b5749770cb9e048a99bd9dfdfa58d0cf05d5f64afe0" -dependencies = [ - "heck 0.3.3", - "proc-macro-error", - "proc-macro2", - "quote", - "syn 1.0.109", -] - [[package]] name = "strum" version = "0.26.2" @@ -9523,15 +9469,6 @@ dependencies = [ "syn 2.0.59", ] -[[package]] -name = "textwrap" -version = "0.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d326610f408c7a4eb6f51c37c330e496b08506c9457c9d34287ecc38809fb060" -dependencies = [ - "unicode-width", -] - [[package]] name = "textwrap" version = "0.16.1" @@ -9714,13 +9651,11 @@ dependencies = [ [[package]] name = "tofino" version = "0.1.0" -source = "git+http://github.com/oxidecomputer/tofino?branch=main#8283f8021068f055484b653f0cc6b4d5c0979dc1" +source = "git+http://github.com/oxidecomputer/tofino?branch=main#1b66b89c3727d2191082df057b068ec52560e334" dependencies = [ "anyhow", "cc", - "chrono", "illumos-devinfo", - "structopt", ] [[package]] @@ -10156,7 +10091,7 @@ dependencies = [ "assert_cmd", "camino", "chrono", - "clap 4.5.4", + "clap", "console", "datatest-stable", "fs-err", @@ -10433,7 +10368,7 @@ dependencies = [ "camino", "camino-tempfile", "chrono", - "clap 4.5.4", + "clap", "debug-ignore", "display-error-chain", "dropshot", @@ -10464,7 +10399,7 @@ dependencies = [ "camino", "camino-tempfile", "cancel-safe-futures", - "clap 4.5.4", + "clap", "debug-ignore", "derive-where", "either", @@ -10660,12 +10595,6 @@ version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" -[[package]] -name = "vec_map" -version = "0.8.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f1bddf1187be692e79c5ffeab891132dfb0f236ed36a43c7ed39f1165ee20191" - [[package]] name = "version_check" version = "0.9.4" @@ -10908,7 +10837,7 @@ dependencies = [ "buf-list", "camino", "ciborium", - "clap 4.5.4", + "clap", "crossterm", "futures", "humantime", @@ -10933,7 +10862,7 @@ dependencies = [ "slog-term", "supports-color", "tempfile", - "textwrap 0.16.1", + "textwrap", "tokio", "tokio-util", "toml 0.8.12", @@ -10969,7 +10898,7 @@ dependencies = [ "bytes", "camino", "ciborium", - "clap 4.5.4", + "clap", "crossterm", "omicron-workspace-hack", "reedline", @@ -10994,7 +10923,7 @@ dependencies = [ "bytes", "camino", "camino-tempfile", - "clap 4.5.4", + "clap", "debug-ignore", "display-error-chain", "dpd-client", @@ -11320,7 +11249,7 @@ dependencies = [ "camino", "cargo_metadata", "cargo_toml", - "clap 4.5.4", + "clap", "fs-err", "macaddr", "serde", @@ -11468,7 +11397,7 @@ name = "zone-network-setup" version = "0.1.0" dependencies = [ "anyhow", - "clap 4.5.4", + "clap", "dropshot", "illumos-utils", "omicron-common", From 84e9c27e5f1af5d7de17384baea3b3639096a68e Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Sat, 20 Apr 2024 12:47:39 -0400 Subject: [PATCH 182/334] Remove ability to generate a Blueprint from an inventory collection (#5583) We want to add information in blueprints, particularly `BlueprintZoneConfig` and `BlueprintZoneType`, that is not present in the sled-agent types `OmicronZoneConfig` and `OmicronZoneType`. Today on main conversion between those types is bidirectional. As we add to blueprints, we will continue to be able to convert a `BlueprintZoneConfig` into an `OmicronZoneConfig`, but the opposite direction will become more and more difficult as callers need to provide the additional information required for blueprints. We have enough users of the inventory -> blueprint direction that it's quite painful to try to add to blueprints, so this PR attempts to knock out one of the more common uses: converting an inventory collection into a blueprint via `BlueprintBuilder::build_initial_from_collection()`. This method is removed, and all the remaining changes are fallout from that. Most uses of this have been replaced by either the blueprint produced by `ExampleSystem` or the new `BlueprintBuilder::build_empty_with_sleds()` helper for constructing an empty blueprint. One test needed the full blueprint-from-inventory, so that one actually gained a new use of converting OmicronZoneConfig -> BlueprintZoneConfig. (Not ideal, but fine for now!) --- dev-tools/omdb/src/bin/omdb/nexus.rs | 31 -- dev-tools/reconfigurator-cli/src/main.rs | 37 -- .../db-queries/src/db/datastore/deployment.rs | 136 ++---- nexus/db-queries/src/db/datastore/rack.rs | 451 +++++++++--------- nexus/reconfigurator/execution/src/dns.rs | 135 +++--- .../planning/src/blueprint_builder/builder.rs | 266 +++-------- .../planning/src/blueprint_builder/zones.rs | 11 +- nexus/reconfigurator/planning/src/example.rs | 44 +- nexus/reconfigurator/planning/src/planner.rs | 90 +--- .../output/blueprint_builder_initial_diff.txt | 2 +- .../output/planner_nonprovisionable_1_2.txt | 2 +- .../output/planner_nonprovisionable_bp2.txt | 2 +- nexus/src/app/deployment.rs | 32 -- nexus/src/internal_api/http_entrypoints.rs | 29 -- nexus/types/src/deployment.rs | 37 +- openapi/nexus-internal.json | 49 -- 16 files changed, 448 insertions(+), 906 deletions(-) diff --git a/dev-tools/omdb/src/bin/omdb/nexus.rs b/dev-tools/omdb/src/bin/omdb/nexus.rs index a7fcc6badc..67b91e0280 100644 --- a/dev-tools/omdb/src/bin/omdb/nexus.rs +++ b/dev-tools/omdb/src/bin/omdb/nexus.rs @@ -97,8 +97,6 @@ enum BlueprintsCommands { Delete(BlueprintIdArgs), /// Interact with the current target blueprint Target(BlueprintsTargetArgs), - /// Generate an initial blueprint from a specific inventory collection - GenerateFromCollection(CollectionIdArgs), /// Generate a new blueprint Regenerate, /// Import a blueprint @@ -361,15 +359,6 @@ impl NexusArgs { let token = omdb.check_allow_destructive()?; cmd_nexus_blueprints_regenerate(&client, token).await } - NexusCommands::Blueprints(BlueprintsArgs { - command: BlueprintsCommands::GenerateFromCollection(args), - }) => { - let token = omdb.check_allow_destructive()?; - cmd_nexus_blueprints_generate_from_collection( - &client, args, token, - ) - .await - } NexusCommands::Blueprints(BlueprintsArgs { command: BlueprintsCommands::Import(args), }) => { @@ -1134,26 +1123,6 @@ async fn cmd_nexus_blueprints_target_set_enabled( Ok(()) } -async fn cmd_nexus_blueprints_generate_from_collection( - client: &nexus_client::Client, - args: &CollectionIdArgs, - _destruction_token: DestructiveOperationToken, -) -> Result<(), anyhow::Error> { - let blueprint = client - .blueprint_generate_from_collection( - &nexus_client::types::CollectionId { - collection_id: args.collection_id, - }, - ) - .await - .context("creating blueprint from collection id")?; - eprintln!( - "created blueprint {} from collection id {}", - blueprint.id, args.collection_id - ); - Ok(()) -} - async fn cmd_nexus_blueprints_regenerate( client: &nexus_client::Client, _destruction_token: DestructiveOperationToken, diff --git a/dev-tools/reconfigurator-cli/src/main.rs b/dev-tools/reconfigurator-cli/src/main.rs index 58d310f56e..ae4a6bd648 100644 --- a/dev-tools/reconfigurator-cli/src/main.rs +++ b/dev-tools/reconfigurator-cli/src/main.rs @@ -314,9 +314,6 @@ fn process_entry(sim: &mut ReconfiguratorSim, entry: String) -> LoopResult { Commands::InventoryList => cmd_inventory_list(sim), Commands::InventoryGenerate => cmd_inventory_generate(sim), Commands::BlueprintList => cmd_blueprint_list(sim), - Commands::BlueprintFromInventory(args) => { - cmd_blueprint_from_inventory(sim, args) - } Commands::BlueprintEdit(args) => cmd_blueprint_edit(sim, args), Commands::BlueprintPlan(args) => cmd_blueprint_plan(sim, args), Commands::BlueprintShow(args) => cmd_blueprint_show(sim, args), @@ -374,8 +371,6 @@ enum Commands { /// list all blueprints BlueprintList, - /// generate a blueprint that represents the contents of an inventory - BlueprintFromInventory(InventoryArgs), /// run planner to generate a new blueprint BlueprintPlan(BlueprintPlanArgs), /// edit contents of a blueprint directly @@ -718,38 +713,6 @@ fn cmd_blueprint_list( Ok(Some(table)) } -fn cmd_blueprint_from_inventory( - sim: &mut ReconfiguratorSim, - args: InventoryArgs, -) -> anyhow::Result> { - let collection_id = args.collection_id; - let collection = sim - .collections - .get(&collection_id) - .ok_or_else(|| anyhow!("no such collection: {}", collection_id))?; - let dns_version = Generation::new(); - let planning_input = sim - .system - .to_planning_input_builder() - .context("generating planning_input builder")? - .build(); - let creator = "reconfigurator-sim"; - let blueprint = BlueprintBuilder::build_initial_from_collection( - collection, - dns_version, - dns_version, - planning_input.all_sled_ids(SledFilter::All), - creator, - ) - .context("building collection")?; - let rv = format!( - "generated blueprint {} from inventory collection {}", - blueprint.id, collection_id - ); - sim.blueprint_insert_new(blueprint); - Ok(Some(rv)) -} - fn cmd_blueprint_plan( sim: &mut ReconfiguratorSim, args: BlueprintPlanArgs, diff --git a/nexus/db-queries/src/db/datastore/deployment.rs b/nexus/db-queries/src/db/datastore/deployment.rs index 4d5b753c7f..5a17b39fdd 100644 --- a/nexus/db-queries/src/db/datastore/deployment.rs +++ b/nexus/db-queries/src/db/datastore/deployment.rs @@ -1263,12 +1263,12 @@ mod tests { use nexus_inventory::now_db_precision; use nexus_reconfigurator_planning::blueprint_builder::BlueprintBuilder; use nexus_reconfigurator_planning::blueprint_builder::Ensure; + use nexus_reconfigurator_planning::example::example; use nexus_test_utils::db::test_setup_database; use nexus_types::deployment::BlueprintZoneDisposition; use nexus_types::deployment::BlueprintZoneFilter; use nexus_types::deployment::PlanningInput; use nexus_types::deployment::PlanningInputBuilder; - use nexus_types::deployment::Policy; use nexus_types::deployment::SledDetails; use nexus_types::deployment::SledDisk; use nexus_types::deployment::SledFilter; @@ -1279,7 +1279,6 @@ mod tests { use nexus_types::external_api::views::SledState; use nexus_types::inventory::Collection; use omicron_common::address::Ipv6Subnet; - use omicron_common::api::external::Generation; use omicron_common::disk::DiskIdentity; use omicron_test_utils::dev; use omicron_uuid_kinds::PhysicalDiskUuid; @@ -1288,6 +1287,7 @@ mod tests { use pretty_assertions::assert_eq; use rand::thread_rng; use rand::Rng; + use slog::Logger; use std::mem; use std::net::Ipv6Addr; @@ -1359,65 +1359,32 @@ mod tests { } } - // Create a `Policy` that contains all the sleds found in `collection` - fn policy_from_collection(collection: &Collection) -> Policy { - Policy { - service_ip_pool_ranges: Vec::new(), - target_nexus_zone_count: collection - .all_omicron_zones() - .filter(|z| z.zone_type.is_nexus()) - .count(), - } - } + fn representative( + log: &Logger, + test_name: &str, + ) -> (Collection, PlanningInput, Blueprint) { + // We'll start with an example system. + let (mut base_collection, planning_input, mut blueprint) = + example(log, test_name, 3); - fn representative() -> (Collection, PlanningInput, Blueprint) { - // We'll start with a representative collection... + // Take a more thorough collection representative (includes SPs, + // etc.)... let mut collection = nexus_inventory::examples::representative().builder.build(); - // ...and then mutate it such that the omicron zones it reports match - // the sled agent IDs it reports. Steal the sled agent info and drop the - // fake sled-agent IDs: - let mut empty_map = BTreeMap::new(); - mem::swap(&mut empty_map, &mut collection.sled_agents); - let mut sled_agents = empty_map.into_values().collect::>(); - - // Now reinsert them with IDs pulled from the omicron zones. This - // assumes we have more fake sled agents than omicron zones, which is - // currently true for the representative collection. - for &sled_id in collection.omicron_zones.keys() { - let some_sled_agent = sled_agents.pop().expect( - "fewer representative sled agents than \ - representative omicron zones sleds", - ); - collection.sled_agents.insert(sled_id, some_sled_agent); - } + // ... and replace its sled agent and Omicron zones with those from our + // example system. + mem::swap( + &mut collection.sled_agents, + &mut base_collection.sled_agents, + ); + mem::swap( + &mut collection.omicron_zones, + &mut base_collection.omicron_zones, + ); - let policy = policy_from_collection(&collection); - let planning_input = { - let mut builder = PlanningInputBuilder::new( - policy, - Generation::new(), - Generation::new(), - ); - for (sled_id, agent) in &collection.sled_agents { - builder - .add_sled( - *sled_id, - fake_sled_details(Some(*agent.sled_agent_address.ip())), - ) - .expect("failed to add sled to representative"); - } - builder.build() - }; - let blueprint = BlueprintBuilder::build_initial_from_collection( - &collection, - Generation::new(), - Generation::new(), - planning_input.all_sled_ids(SledFilter::All), - "test", - ) - .unwrap(); + // Treat this blueprint as the initial blueprint for the system. + blueprint.parent_blueprint_id = None; (collection, planning_input, blueprint) } @@ -1442,17 +1409,11 @@ mod tests { let mut db = test_setup_database(&logctx.log).await; let (opctx, datastore) = datastore_test(&logctx, &db).await; - // Create an empty collection and a blueprint from it - let collection = - nexus_inventory::CollectionBuilder::new("test").build(); - let blueprint1 = BlueprintBuilder::build_initial_from_collection( - &collection, - Generation::new(), - Generation::new(), + // Create an empty blueprint from it + let blueprint1 = BlueprintBuilder::build_empty_with_sleds( std::iter::empty(), "test", - ) - .unwrap(); + ); let authz_blueprint = authz_blueprint_from_id(blueprint1.id); // Trying to read it from the database should fail with the relevant @@ -1471,7 +1432,7 @@ mod tests { let blueprint_read = datastore .blueprint_read(&opctx, &authz_blueprint) .await - .expect("failed to read collection back"); + .expect("failed to read blueprint back"); assert_eq!(blueprint1, blueprint_read); assert_eq!( blueprint_list_all_ids(&opctx, &datastore).await, @@ -1501,13 +1462,15 @@ mod tests { #[tokio::test] async fn test_representative_blueprint() { + const TEST_NAME: &str = "test_representative_blueprint"; // Setup - let logctx = dev::test_setup_log("test_representative_blueprint"); + let logctx = dev::test_setup_log(TEST_NAME); let mut db = test_setup_database(&logctx.log).await; let (opctx, datastore) = datastore_test(&logctx, &db).await; // Create a cohesive representative collection/policy/blueprint - let (collection, planning_input, blueprint1) = representative(); + let (collection, planning_input, blueprint1) = + representative(&logctx.log, TEST_NAME); let authz_blueprint1 = authz_blueprint_from_id(blueprint1.id); // Write it to the database and read it back. @@ -1632,10 +1595,23 @@ mod tests { let blueprint2 = builder.build(); let authz_blueprint2 = authz_blueprint_from_id(blueprint2.id); + let diff = blueprint2.diff_since_blueprint(&blueprint1).unwrap(); + println!("b1 -> b2: {}", diff.display()); + println!("b1 disks: {:?}", blueprint1.blueprint_disks); + println!("b2 disks: {:?}", blueprint2.blueprint_disks); // Check that we added the new sled, as well as its disks and zones. assert_eq!( - blueprint1.blueprint_disks.len() + new_sled_zpools.len(), - blueprint2.blueprint_disks.len(), + blueprint1 + .blueprint_disks + .values() + .map(|c| c.disks.len()) + .sum::() + + new_sled_zpools.len(), + blueprint2 + .blueprint_disks + .values() + .map(|c| c.disks.len()) + .sum::() ); assert_eq!( blueprint1.blueprint_zones.len() + 1, @@ -1757,16 +1733,10 @@ mod tests { // Create three blueprints: // * `blueprint1` has no parent // * `blueprint2` and `blueprint3` both have `blueprint1` as parent - let collection = - nexus_inventory::CollectionBuilder::new("test").build(); - let blueprint1 = BlueprintBuilder::build_initial_from_collection( - &collection, - Generation::new(), - Generation::new(), + let blueprint1 = BlueprintBuilder::build_empty_with_sleds( std::iter::empty(), "test1", - ) - .unwrap(); + ); let blueprint2 = BlueprintBuilder::new_based_on( &logctx.log, &blueprint1, @@ -1911,16 +1881,10 @@ mod tests { let (opctx, datastore) = datastore_test(&logctx, &db).await; // Create an initial blueprint and a child. - let collection = - nexus_inventory::CollectionBuilder::new("test").build(); - let blueprint1 = BlueprintBuilder::build_initial_from_collection( - &collection, - Generation::new(), - Generation::new(), + let blueprint1 = BlueprintBuilder::build_empty_with_sleds( std::iter::empty(), "test1", - ) - .unwrap(); + ); let blueprint2 = BlueprintBuilder::new_based_on( &logctx.log, &blueprint1, diff --git a/nexus/db-queries/src/db/datastore/rack.rs b/nexus/db-queries/src/db/datastore/rack.rs index 45793d26f7..0f4b1b245e 100644 --- a/nexus/db-queries/src/db/datastore/rack.rs +++ b/nexus/db-queries/src/db/datastore/rack.rs @@ -959,20 +959,19 @@ mod test { use async_bb8_diesel::AsyncSimpleConnection; use nexus_config::NUM_INITIAL_RESERVED_IP_ADDRESSES; use nexus_db_model::{DnsGroup, Generation, InitialDnsGroup, SledUpdate}; - use nexus_reconfigurator_planning::blueprint_builder::BlueprintBuilder; + use nexus_inventory::now_db_precision; use nexus_reconfigurator_planning::system::{ SledBuilder, SystemDescription, }; use nexus_test_utils::db::test_setup_database; - use nexus_types::deployment::OmicronZoneConfig; - use nexus_types::deployment::OmicronZonesConfig; - use nexus_types::deployment::SledFilter; + use nexus_types::deployment::BlueprintZoneConfig; + use nexus_types::deployment::BlueprintZoneDisposition; + use nexus_types::deployment::BlueprintZonesConfig; use nexus_types::external_api::shared::SiloIdentityMode; use nexus_types::identity::Asset; use nexus_types::internal_api::params::DnsRecord; use nexus_types::inventory::NetworkInterface; use nexus_types::inventory::NetworkInterfaceKind; - use nexus_types::inventory::OmicronZoneType; use omicron_common::address::{ DNS_OPTE_IPV4_SUBNET, NEXUS_OPTE_IPV4_SUBNET, NTP_OPTE_IPV4_SUBNET, }; @@ -982,8 +981,9 @@ mod test { }; use omicron_common::api::internal::shared::SourceNatConfig; use omicron_test_utils::dev; + use omicron_uuid_kinds::OmicronZoneUuid; use omicron_uuid_kinds::TypedUuid; - use omicron_uuid_kinds::{GenericUuid, SledUuid, ZpoolUuid}; + use omicron_uuid_kinds::{GenericUuid, ZpoolUuid}; use sled_agent_client::types::OmicronZoneDataset; use std::collections::{BTreeMap, HashMap}; use std::net::{IpAddr, Ipv4Addr, Ipv6Addr, SocketAddr, SocketAddrV6}; @@ -1270,61 +1270,52 @@ mod test { SledBuilder::new().id(TypedUuid::from_untyped_uuid(sled3.id())), ) .expect("failed to add sled3"); - let planning_input = system - .to_planning_input_builder() - .expect("failed to make planning input") - .build(); - let mut inventory_builder = system - .to_collection_builder() - .expect("failed to make collection builder"); let external_dns_ip = IpAddr::V4(Ipv4Addr::new(1, 2, 3, 4)); let external_dns_pip = DNS_OPTE_IPV4_SUBNET .nth(NUM_INITIAL_RESERVED_IP_ADDRESSES as u32 + 1) .unwrap(); - let external_dns_id = Uuid::new_v4(); + let external_dns_id = OmicronZoneUuid::new_v4(); let nexus_ip = IpAddr::V4(Ipv4Addr::new(1, 2, 3, 6)); let nexus_pip = NEXUS_OPTE_IPV4_SUBNET .nth(NUM_INITIAL_RESERVED_IP_ADDRESSES as u32 + 1) .unwrap(); - let nexus_id = Uuid::new_v4(); + let nexus_id = OmicronZoneUuid::new_v4(); let ntp1_ip = IpAddr::V4(Ipv4Addr::new(1, 2, 3, 5)); let ntp1_pip = NTP_OPTE_IPV4_SUBNET .nth(NUM_INITIAL_RESERVED_IP_ADDRESSES as u32 + 1) .unwrap(); - let ntp1_id = Uuid::new_v4(); + let ntp1_id = OmicronZoneUuid::new_v4(); let ntp2_ip = IpAddr::V4(Ipv4Addr::new(1, 2, 3, 5)); let ntp2_pip = NTP_OPTE_IPV4_SUBNET .nth(NUM_INITIAL_RESERVED_IP_ADDRESSES as u32 + 2) .unwrap(); - let ntp2_id = Uuid::new_v4(); - let ntp3_id = Uuid::new_v4(); + let ntp2_id = OmicronZoneUuid::new_v4(); + let ntp3_id = OmicronZoneUuid::new_v4(); let mut macs = MacAddr::iter_system(); - // Add services for our sleds to the inventory (which will cause them to - // be present in the blueprint we'll generate from it). - inventory_builder - .found_sled_omicron_zones( - "sled1", - SledUuid::from_untyped_uuid(sled1.id()), - OmicronZonesConfig { - generation: Generation::new().next(), - zones: vec![ - OmicronZoneConfig { - id: external_dns_id, - underlay_address: Ipv6Addr::LOCALHOST, - zone_type: OmicronZoneType::ExternalDns { + let mut blueprint_zones = BTreeMap::new(); + blueprint_zones.insert( + sled1.id(), + BlueprintZonesConfig { + generation: Generation::new().next(), + zones: vec![ + BlueprintZoneConfig { + disposition: BlueprintZoneDisposition::InService, + id: external_dns_id, + underlay_address: Ipv6Addr::LOCALHOST, + zone_type: BlueprintZoneType::ExternalDns( + blueprint_zone_type::ExternalDns { dataset: random_dataset(), - http_address: "[::1]:80".to_string(), + http_address: "[::1]:80".parse().unwrap(), dns_address: SocketAddr::new( external_dns_ip, 53, - ) - .to_string(), + ), nic: NetworkInterface { id: Uuid::new_v4(), kind: NetworkInterfaceKind::Service { - id: external_dns_id, + id: external_dns_id.into_untyped_uuid(), }, name: "external-dns".parse().unwrap(), ip: external_dns_pip.into(), @@ -1338,19 +1329,22 @@ mod test { slot: 0, }, }, - }, - OmicronZoneConfig { - id: ntp1_id, - underlay_address: Ipv6Addr::LOCALHOST, - zone_type: OmicronZoneType::BoundaryNtp { - address: "[::1]:80".to_string(), + ), + }, + BlueprintZoneConfig { + disposition: BlueprintZoneDisposition::InService, + id: ntp1_id, + underlay_address: Ipv6Addr::LOCALHOST, + zone_type: BlueprintZoneType::BoundaryNtp( + blueprint_zone_type::BoundaryNtp { + address: "[::1]:80".parse().unwrap(), ntp_servers: vec![], dns_servers: vec![], domain: None, nic: NetworkInterface { id: Uuid::new_v4(), kind: NetworkInterfaceKind::Service { - id: ntp1_id, + id: ntp1_id.into_untyped_uuid(), }, name: "ntp1".parse().unwrap(), ip: ntp1_pip.into(), @@ -1368,30 +1362,30 @@ mod test { ) .unwrap(), }, - }, - ], - }, - ) - .expect("recording Omicron zones"); - inventory_builder - .found_sled_omicron_zones( - "sled2", - SledUuid::from_untyped_uuid(sled2.id()), - OmicronZonesConfig { - generation: Generation::new().next(), - zones: vec![ - OmicronZoneConfig { - id: nexus_id, - underlay_address: Ipv6Addr::LOCALHOST, - zone_type: OmicronZoneType::Nexus { - internal_address: "[::1]:80".to_string(), + ), + }, + ], + }, + ); + blueprint_zones.insert( + sled2.id(), + BlueprintZonesConfig { + generation: Generation::new().next(), + zones: vec![ + BlueprintZoneConfig { + disposition: BlueprintZoneDisposition::InService, + id: nexus_id, + underlay_address: Ipv6Addr::LOCALHOST, + zone_type: BlueprintZoneType::Nexus( + blueprint_zone_type::Nexus { + internal_address: "[::1]:80".parse().unwrap(), external_ip: nexus_ip, external_tls: false, external_dns_servers: vec![], nic: NetworkInterface { id: Uuid::new_v4(), kind: NetworkInterfaceKind::Service { - id: nexus_id, + id: nexus_id.into_untyped_uuid(), }, name: "nexus".parse().unwrap(), ip: nexus_pip.into(), @@ -1405,19 +1399,22 @@ mod test { slot: 0, }, }, - }, - OmicronZoneConfig { - id: ntp2_id, - underlay_address: Ipv6Addr::LOCALHOST, - zone_type: OmicronZoneType::BoundaryNtp { - address: "[::1]:80".to_string(), + ), + }, + BlueprintZoneConfig { + disposition: BlueprintZoneDisposition::InService, + id: ntp2_id, + underlay_address: Ipv6Addr::LOCALHOST, + zone_type: BlueprintZoneType::BoundaryNtp( + blueprint_zone_type::BoundaryNtp { + address: "[::1]:80".parse().unwrap(), ntp_servers: vec![], dns_servers: vec![], domain: None, nic: NetworkInterface { id: Uuid::new_v4(), kind: NetworkInterfaceKind::Service { - id: ntp2_id, + id: ntp2_id.into_untyped_uuid(), }, name: "ntp2".parse().unwrap(), ip: ntp2_pip.into(), @@ -1435,39 +1432,44 @@ mod test { ) .unwrap(), }, - }, - ], - }, - ) - .expect("recording Omicron zones"); - inventory_builder - .found_sled_omicron_zones( - "sled3", - SledUuid::from_untyped_uuid(sled3.id()), - OmicronZonesConfig { - generation: Generation::new().next(), - zones: vec![OmicronZoneConfig { - id: ntp3_id, - underlay_address: Ipv6Addr::LOCALHOST, - zone_type: OmicronZoneType::InternalNtp { - address: "[::1]:80".to_string(), + ), + }, + ], + }, + ); + blueprint_zones.insert( + sled3.id(), + BlueprintZonesConfig { + generation: Generation::new().next(), + zones: vec![BlueprintZoneConfig { + disposition: BlueprintZoneDisposition::InService, + id: ntp3_id, + underlay_address: Ipv6Addr::LOCALHOST, + zone_type: BlueprintZoneType::InternalNtp( + blueprint_zone_type::InternalNtp { + address: "[::1]:80".parse().unwrap(), ntp_servers: vec![], dns_servers: vec![], domain: None, }, - }], - }, - ) - .expect("recording Omicron zones"); - let blueprint = BlueprintBuilder::build_initial_from_collection_seeded( - &inventory_builder.build(), - *Generation::new(), - *Generation::new(), - planning_input.all_sled_ids(SledFilter::All), - "test suite", - (test_name, "initial blueprint"), - ) - .expect("failed to build blueprint"); + ), + }], + }, + ); + for zone_config in blueprint_zones.values_mut() { + zone_config.sort(); + } + let blueprint = Blueprint { + id: Uuid::new_v4(), + blueprint_zones, + blueprint_disks: BTreeMap::new(), + parent_blueprint_id: None, + internal_dns_version: *Generation::new(), + external_dns_version: *Generation::new(), + time_created: now_db_precision(), + creator: "test suite".to_string(), + comment: "test blueprint".to_string(), + }; let rack = datastore .rack_set_initialized( @@ -1497,23 +1499,23 @@ mod test { assert_eq!(observed_external_ips.len(), 4); let dns_external_ip = observed_external_ips .iter() - .find(|e| e.parent_id == Some(external_dns_id)) + .find(|e| e.parent_id == Some(external_dns_id.into_untyped_uuid())) .unwrap(); let nexus_external_ip = observed_external_ips .iter() - .find(|e| e.parent_id == Some(nexus_id)) + .find(|e| e.parent_id == Some(nexus_id.into_untyped_uuid())) .unwrap(); let ntp1_external_ip = observed_external_ips .iter() - .find(|e| e.parent_id == Some(ntp1_id)) + .find(|e| e.parent_id == Some(ntp1_id.into_untyped_uuid())) .unwrap(); let ntp2_external_ip = observed_external_ips .iter() - .find(|e| e.parent_id == Some(ntp2_id)) + .find(|e| e.parent_id == Some(ntp2_id.into_untyped_uuid())) .unwrap(); assert!(!observed_external_ips .iter() - .any(|e| e.parent_id == Some(ntp3_id))); + .any(|e| e.parent_id == Some(ntp3_id.into_untyped_uuid()))); assert!(dns_external_ip.is_service); assert_eq!(dns_external_ip.kind, IpKind::Floating); @@ -1600,16 +1602,9 @@ mod test { SledBuilder::new().id(TypedUuid::from_untyped_uuid(sled.id())), ) .expect("failed to add sled"); - let planning_input = system - .to_planning_input_builder() - .expect("failed to make planning input") - .build(); - let mut inventory_builder = system - .to_collection_builder() - .expect("failed to make collection builder"); - - let nexus_id1 = Uuid::new_v4(); - let nexus_id2 = Uuid::new_v4(); + + let nexus_id1 = OmicronZoneUuid::new_v4(); + let nexus_id2 = OmicronZoneUuid::new_v4(); let nexus_pip1 = NEXUS_OPTE_IPV4_SUBNET .nth(NUM_INITIAL_RESERVED_IP_ADDRESSES as u32 + 1) .unwrap(); @@ -1618,25 +1613,26 @@ mod test { .unwrap(); let mut macs = MacAddr::iter_system(); - inventory_builder - .found_sled_omicron_zones( - "sled", - SledUuid::from_untyped_uuid(sled.id()), - OmicronZonesConfig { - generation: Generation::new().next(), - zones: vec![ - OmicronZoneConfig { - id: nexus_id1, - underlay_address: Ipv6Addr::LOCALHOST, - zone_type: OmicronZoneType::Nexus { - internal_address: "[::1]:80".to_string(), + let mut blueprint_zones = BTreeMap::new(); + blueprint_zones.insert( + sled.id(), + BlueprintZonesConfig { + generation: Generation::new().next(), + zones: vec![ + BlueprintZoneConfig { + disposition: BlueprintZoneDisposition::InService, + id: nexus_id1, + underlay_address: Ipv6Addr::LOCALHOST, + zone_type: BlueprintZoneType::Nexus( + blueprint_zone_type::Nexus { + internal_address: "[::1]:80".parse().unwrap(), external_ip: nexus_ip_start.into(), external_tls: false, external_dns_servers: vec![], nic: NetworkInterface { id: Uuid::new_v4(), kind: NetworkInterfaceKind::Service { - id: nexus_id1, + id: nexus_id1.into_untyped_uuid(), }, name: "nexus1".parse().unwrap(), ip: nexus_pip1.into(), @@ -1650,19 +1646,22 @@ mod test { slot: 0, }, }, - }, - OmicronZoneConfig { - id: nexus_id2, - underlay_address: Ipv6Addr::LOCALHOST, - zone_type: OmicronZoneType::Nexus { - internal_address: "[::1]:80".to_string(), + ), + }, + BlueprintZoneConfig { + disposition: BlueprintZoneDisposition::InService, + id: nexus_id2, + underlay_address: Ipv6Addr::LOCALHOST, + zone_type: BlueprintZoneType::Nexus( + blueprint_zone_type::Nexus { + internal_address: "[::1]:80".parse().unwrap(), external_ip: nexus_ip_end.into(), external_tls: false, external_dns_servers: vec![], nic: NetworkInterface { id: Uuid::new_v4(), kind: NetworkInterfaceKind::Service { - id: nexus_id2, + id: nexus_id2.into_untyped_uuid(), }, name: "nexus2".parse().unwrap(), ip: nexus_pip2.into(), @@ -1676,11 +1675,11 @@ mod test { slot: 0, }, }, - }, - ], - }, - ) - .expect("recording Omicron zones"); + ), + }, + ], + }, + ); let datasets = vec![]; @@ -1706,15 +1705,20 @@ mod test { HashMap::from([("api.sys".to_string(), external_records.clone())]), ); - let blueprint = BlueprintBuilder::build_initial_from_collection_seeded( - &inventory_builder.build(), - *Generation::new(), - *Generation::new(), - planning_input.all_sled_ids(SledFilter::All), - "test suite", - (test_name, "initial blueprint"), - ) - .expect("failed to build blueprint"); + for zone_config in blueprint_zones.values_mut() { + zone_config.sort(); + } + let blueprint = Blueprint { + id: Uuid::new_v4(), + blueprint_zones, + blueprint_disks: BTreeMap::new(), + parent_blueprint_id: None, + internal_dns_version: *Generation::new(), + external_dns_version: *Generation::new(), + time_created: now_db_precision(), + creator: "test suite".to_string(), + comment: "test blueprint".to_string(), + }; let rack = datastore .rack_set_initialized( @@ -1866,38 +1870,32 @@ mod test { SledBuilder::new().id(TypedUuid::from_untyped_uuid(sled.id())), ) .expect("failed to add sled"); - let planning_input = system - .to_planning_input_builder() - .expect("failed to make planning input") - .build(); - let mut inventory_builder = system - .to_collection_builder() - .expect("failed to make collection builder"); let nexus_ip = IpAddr::V4(Ipv4Addr::new(1, 2, 3, 4)); let nexus_pip = NEXUS_OPTE_IPV4_SUBNET .nth(NUM_INITIAL_RESERVED_IP_ADDRESSES as u32 + 1) .unwrap(); - let nexus_id = Uuid::new_v4(); + let nexus_id = OmicronZoneUuid::new_v4(); let mut macs = MacAddr::iter_system(); - inventory_builder - .found_sled_omicron_zones( - "sled", - SledUuid::from_untyped_uuid(sled.id()), - OmicronZonesConfig { - generation: Generation::new().next(), - zones: vec![OmicronZoneConfig { - id: nexus_id, - underlay_address: Ipv6Addr::LOCALHOST, - zone_type: OmicronZoneType::Nexus { - internal_address: "[::1]:80".to_string(), + let mut blueprint_zones = BTreeMap::new(); + blueprint_zones.insert( + sled.id(), + BlueprintZonesConfig { + generation: Generation::new().next(), + zones: vec![BlueprintZoneConfig { + disposition: BlueprintZoneDisposition::InService, + id: nexus_id, + underlay_address: Ipv6Addr::LOCALHOST, + zone_type: BlueprintZoneType::Nexus( + blueprint_zone_type::Nexus { + internal_address: "[::1]:80".parse().unwrap(), external_ip: nexus_ip, external_tls: false, external_dns_servers: vec![], nic: NetworkInterface { id: Uuid::new_v4(), kind: NetworkInterfaceKind::Service { - id: nexus_id, + id: nexus_id.into_untyped_uuid(), }, name: "nexus".parse().unwrap(), ip: nexus_pip.into(), @@ -1911,20 +1909,24 @@ mod test { slot: 0, }, }, - }], - }, - ) - .expect("recording Omicron zones"); - - let blueprint = BlueprintBuilder::build_initial_from_collection_seeded( - &inventory_builder.build(), - *Generation::new(), - *Generation::new(), - planning_input.all_sled_ids(SledFilter::All), - "test suite", - (test_name, "initial blueprint"), - ) - .expect("failed to build blueprint"); + ), + }], + }, + ); + for zone_config in blueprint_zones.values_mut() { + zone_config.sort(); + } + let blueprint = Blueprint { + id: Uuid::new_v4(), + blueprint_zones, + blueprint_disks: BTreeMap::new(), + parent_blueprint_id: None, + internal_dns_version: *Generation::new(), + external_dns_version: *Generation::new(), + time_created: now_db_precision(), + creator: "test suite".to_string(), + comment: "test blueprint".to_string(), + }; let result = datastore .rack_set_initialized( @@ -1964,44 +1966,37 @@ mod test { SledBuilder::new().id(TypedUuid::from_untyped_uuid(sled.id())), ) .expect("failed to add sled"); - let planning_input = system - .to_planning_input_builder() - .expect("failed to make planning input") - .build(); - let mut inventory_builder = system - .to_collection_builder() - .expect("failed to make collection builder"); // Request two services which happen to be using the same IP address. - let external_dns_id = Uuid::new_v4(); + let external_dns_id = OmicronZoneUuid::new_v4(); let external_dns_pip = DNS_OPTE_IPV4_SUBNET .nth(NUM_INITIAL_RESERVED_IP_ADDRESSES as u32 + 1) .unwrap(); - let nexus_id = Uuid::new_v4(); + let nexus_id = OmicronZoneUuid::new_v4(); let nexus_pip = NEXUS_OPTE_IPV4_SUBNET .nth(NUM_INITIAL_RESERVED_IP_ADDRESSES as u32 + 1) .unwrap(); let mut macs = MacAddr::iter_system(); - inventory_builder - .found_sled_omicron_zones( - "sled", - SledUuid::from_untyped_uuid(sled.id()), - OmicronZonesConfig { - generation: Generation::new().next(), - zones: vec![ - OmicronZoneConfig { - id: external_dns_id, - underlay_address: Ipv6Addr::LOCALHOST, - zone_type: OmicronZoneType::ExternalDns { + let mut blueprint_zones = BTreeMap::new(); + blueprint_zones.insert( + sled.id(), + BlueprintZonesConfig { + generation: Generation::new().next(), + zones: vec![ + BlueprintZoneConfig { + disposition: BlueprintZoneDisposition::InService, + id: external_dns_id, + underlay_address: Ipv6Addr::LOCALHOST, + zone_type: BlueprintZoneType::ExternalDns( + blueprint_zone_type::ExternalDns { dataset: random_dataset(), - http_address: "[::1]:80".to_string(), - dns_address: SocketAddr::new(ip, 53) - .to_string(), + http_address: "[::1]:80".parse().unwrap(), + dns_address: SocketAddr::new(ip, 53), nic: NetworkInterface { id: Uuid::new_v4(), kind: NetworkInterfaceKind::Service { - id: external_dns_id, + id: external_dns_id.into_untyped_uuid(), }, name: "external-dns".parse().unwrap(), ip: external_dns_pip.into(), @@ -2015,19 +2010,22 @@ mod test { slot: 0, }, }, - }, - OmicronZoneConfig { - id: nexus_id, - underlay_address: Ipv6Addr::LOCALHOST, - zone_type: OmicronZoneType::Nexus { - internal_address: "[::1]:80".to_string(), + ), + }, + BlueprintZoneConfig { + disposition: BlueprintZoneDisposition::InService, + id: nexus_id, + underlay_address: Ipv6Addr::LOCALHOST, + zone_type: BlueprintZoneType::Nexus( + blueprint_zone_type::Nexus { + internal_address: "[::1]:80".parse().unwrap(), external_ip: ip, external_tls: false, external_dns_servers: vec![], nic: NetworkInterface { id: Uuid::new_v4(), kind: NetworkInterfaceKind::Service { - id: nexus_id, + id: nexus_id.into_untyped_uuid(), }, name: "nexus".parse().unwrap(), ip: nexus_pip.into(), @@ -2041,21 +2039,26 @@ mod test { slot: 0, }, }, - }, - ], - }, - ) - .expect("recording Omicron zones"); + ), + }, + ], + }, + ); - let blueprint = BlueprintBuilder::build_initial_from_collection_seeded( - &inventory_builder.build(), - *Generation::new(), - *Generation::new(), - planning_input.all_sled_ids(SledFilter::All), - "test suite", - (test_name, "initial blueprint"), - ) - .expect("failed to build blueprint"); + for zone_config in blueprint_zones.values_mut() { + zone_config.sort(); + } + let blueprint = Blueprint { + id: Uuid::new_v4(), + blueprint_zones, + blueprint_disks: BTreeMap::new(), + parent_blueprint_id: None, + internal_dns_version: *Generation::new(), + external_dns_version: *Generation::new(), + time_created: now_db_precision(), + creator: "test suite".to_string(), + comment: "test blueprint".to_string(), + }; let result = datastore .rack_set_initialized( diff --git a/nexus/reconfigurator/execution/src/dns.rs b/nexus/reconfigurator/execution/src/dns.rs index 79eb86fe09..c93ac94408 100644 --- a/nexus/reconfigurator/execution/src/dns.rs +++ b/nexus/reconfigurator/execution/src/dns.rs @@ -465,7 +465,7 @@ mod test { use nexus_db_queries::authz; use nexus_db_queries::context::OpContext; use nexus_db_queries::db::DataStore; - use nexus_inventory::CollectionBuilder; + use nexus_inventory::now_db_precision; use nexus_reconfigurator_planning::blueprint_builder::BlueprintBuilder; use nexus_reconfigurator_planning::blueprint_builder::EnsureMultiple; use nexus_reconfigurator_planning::example::example; @@ -476,13 +476,9 @@ mod test { use nexus_types::deployment::BlueprintTarget; use nexus_types::deployment::BlueprintZoneConfig; use nexus_types::deployment::BlueprintZoneDisposition; - use nexus_types::deployment::SledDisk; - use nexus_types::deployment::SledFilter; - use nexus_types::deployment::SledResources; + use nexus_types::deployment::BlueprintZonesConfig; use nexus_types::external_api::params; use nexus_types::external_api::shared; - use nexus_types::external_api::views::PhysicalDiskPolicy; - use nexus_types::external_api::views::PhysicalDiskState; use nexus_types::identity::Resource; use nexus_types::internal_api::params::DnsConfigParams; use nexus_types::internal_api::params::DnsConfigZone; @@ -497,11 +493,9 @@ mod test { use omicron_common::address::SLED_PREFIX; use omicron_common::api::external::Generation; use omicron_common::api::external::IdentityMetadataCreateParams; - use omicron_common::disk::DiskIdentity; use omicron_test_utils::dev::test_setup_log; + use omicron_uuid_kinds::GenericUuid; use omicron_uuid_kinds::OmicronZoneUuid; - use omicron_uuid_kinds::PhysicalDiskUuid; - use omicron_uuid_kinds::ZpoolUuid; use std::collections::BTreeMap; use std::collections::BTreeSet; use std::collections::HashMap; @@ -510,23 +504,11 @@ mod test { use std::net::Ipv6Addr; use std::net::SocketAddrV6; use std::sync::Arc; + use uuid::Uuid; type ControlPlaneTestContext = nexus_test_utils::ControlPlaneTestContext; - fn blueprint_empty() -> Blueprint { - let builder = CollectionBuilder::new("test-suite"); - let collection = builder.build(); - BlueprintBuilder::build_initial_from_collection( - &collection, - Generation::new(), - Generation::new(), - std::iter::empty(), - "test-suite", - ) - .expect("failed to generate empty blueprint") - } - fn dns_config_empty() -> DnsConfigParams { DnsConfigParams { generation: 1, @@ -541,7 +523,10 @@ mod test { /// test blueprint_internal_dns_config(): trivial case of an empty blueprint #[test] fn test_blueprint_internal_dns_empty() { - let blueprint = blueprint_empty(); + let blueprint = BlueprintBuilder::build_empty_with_sleds( + std::iter::empty(), + "test-suite", + ); let blueprint_dns = blueprint_internal_dns_config( &blueprint, &BTreeMap::new(), @@ -566,45 +551,46 @@ mod test { let rack_subnet = ipnet::Ipv6Net::new(rack_subnet_base, RACK_PREFIX).unwrap(); let possible_sled_subnets = rack_subnet.subnets(SLED_PREFIX).unwrap(); - // Ignore sleds with no associated zones in the inventory. - // This is included in the "representative" collection, but it's - // not allowed by BlueprintBuilder::build_initial_from_collection(). - let policy_sleds = collection - .omicron_zones - .keys() - .zip(possible_sled_subnets) - .map(|(sled_id, subnet)| { - let sled_resources = SledResources { - zpools: BTreeMap::from([( - ZpoolUuid::new_v4(), - SledDisk { - disk_identity: DiskIdentity { - vendor: String::from("v"), - serial: format!("s-{sled_id}"), - model: String::from("m"), - }, - disk_id: PhysicalDiskUuid::new_v4(), - policy: PhysicalDiskPolicy::InService, - state: PhysicalDiskState::Active, - }, - )]), - subnet: Ipv6Subnet::new(subnet.network()), - }; - (*sled_id, sled_resources) - }) - .collect::>(); + + // Convert the inventory `OmicronZonesConfig`s into + // `BlueprintZonesConfig`. This is going to get more painful over time + // as we add to blueprints, but for now we can make this work. + let mut blueprint_zones = BTreeMap::new(); + for (sled_id, zones_config) in collection.omicron_zones { + blueprint_zones.insert( + sled_id.into_untyped_uuid(), + BlueprintZonesConfig { + generation: zones_config.zones.generation, + zones: zones_config + .zones + .zones + .into_iter() + .map(|config| { + BlueprintZoneConfig::from_omicron_zone_config( + config, + BlueprintZoneDisposition::InService, + ) + .expect("failed to convert zone config") + }) + .collect(), + }, + ); + } let dns_empty = dns_config_empty(); let initial_dns_generation = Generation::from(u32::try_from(dns_empty.generation).unwrap()); - let mut blueprint = BlueprintBuilder::build_initial_from_collection( - &collection, - initial_dns_generation, - Generation::new(), - policy_sleds.keys().copied(), - "test-suite", - ) - .expect("failed to build initial blueprint"); + let mut blueprint = Blueprint { + id: Uuid::new_v4(), + blueprint_zones, + blueprint_disks: BTreeMap::new(), + parent_blueprint_id: None, + internal_dns_version: initial_dns_generation, + external_dns_version: Generation::new(), + time_created: now_db_precision(), + creator: "test-suite".to_string(), + comment: "test blueprint".to_string(), + }; // To make things slightly more interesting, let's add a zone that's // not currently in service. @@ -630,18 +616,23 @@ mod test { // To generate the blueprint's DNS config, we need to make up a // different set of information about the Quiesced fake system. - let sleds_by_id = policy_sleds - .iter() + let sleds_by_id = blueprint + .blueprint_zones + .keys() + .zip(possible_sled_subnets) .enumerate() - .map(|(i, (sled_id, sled_resources))| { + .map(|(i, (sled_id, subnet))| { + let sled_id = SledUuid::from_untyped_uuid(*sled_id); let sled_info = Sled { - id: *sled_id, - sled_agent_address: get_sled_address(sled_resources.subnet), + id: sled_id, + sled_agent_address: get_sled_address(Ipv6Subnet::new( + subnet.network(), + )), // The first two of these (arbitrarily) will be marked // Scrimlets. is_scrimlet: i < 2, }; - (*sled_id, sled_info) + (sled_id, sled_info) }) .collect(); @@ -693,7 +684,8 @@ mod test { .iter() .filter_map(|(sled_id, sled)| { if sled.is_scrimlet { - let sled_subnet = policy_sleds.get(sled_id).unwrap().subnet; + let sled_subnet = + sleds_by_id.get(sled_id).unwrap().subnet(); let switch_zone_ip = get_switch_zone_address(sled_subnet); Some((switch_zone_ip, *sled_id)) } else { @@ -829,16 +821,9 @@ mod test { async fn test_blueprint_external_dns_basic() { static TEST_NAME: &str = "test_blueprint_external_dns_basic"; let logctx = test_setup_log(TEST_NAME); - let (collection, input) = example(&logctx.log, TEST_NAME, 5); - let initial_external_dns_generation = Generation::new(); - let mut blueprint = BlueprintBuilder::build_initial_from_collection( - &collection, - Generation::new(), - initial_external_dns_generation, - input.all_sled_ids(SledFilter::All), - "test suite", - ) - .expect("failed to generate initial blueprint"); + let (_, _, mut blueprint) = example(&logctx.log, TEST_NAME, 5); + blueprint.internal_dns_version = Generation::new(); + blueprint.external_dns_version = Generation::new(); let my_silo = Silo::new(params::SiloCreate { identity: IdentityMetadataCreateParams { diff --git a/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs b/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs index e1621a11c8..a58b96162b 100644 --- a/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs +++ b/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs @@ -24,13 +24,11 @@ use nexus_types::deployment::BlueprintZoneFilter; use nexus_types::deployment::BlueprintZoneType; use nexus_types::deployment::BlueprintZonesConfig; use nexus_types::deployment::DiskFilter; -use nexus_types::deployment::InvalidOmicronZoneType; use nexus_types::deployment::OmicronZoneDataset; use nexus_types::deployment::PlanningInput; use nexus_types::deployment::SledFilter; use nexus_types::deployment::SledResources; use nexus_types::deployment::ZpoolName; -use nexus_types::inventory::Collection; use omicron_common::address::get_internal_dns_server_addresses; use omicron_common::address::get_sled_address; use omicron_common::address::get_switch_zone_address; @@ -92,8 +90,6 @@ pub enum Error { ExhaustedNexusIps, #[error("programming error in planner")] Planner(#[from] anyhow::Error), - #[error("invalid OmicronZoneType in collection")] - InvalidOmicronZoneType(#[from] InvalidOmicronZoneType), } /// Describes whether an idempotent "ensure" operation resulted in action taken @@ -129,11 +125,11 @@ fn zpool_id_to_external_name(zpool_id: ZpoolUuid) -> anyhow::Result { /// /// There are two basic ways to assemble a new blueprint: /// -/// 1. Build one directly from a collection. Such blueprints have no parent -/// blueprint. They are not customizable. Use -/// [`BlueprintBuilder::build_initial_from_collection`] for this. This would -/// generally only be used once in the lifetime of a rack, to assemble the -/// first blueprint. +/// 1. Build one directly. This would generally only be used once in the +/// lifetime of a rack, to assemble the first blueprint during rack setup. +/// It is also common in tests. To start with a blueprint that contains an +/// empty zone config for some number of sleds, use +/// [`BlueprintBuilder::build_empty_with_sleds`]. /// /// 2. Build one _from_ another blueprint, called the "parent", making changes /// as desired. Use [`BlueprintBuilder::new_based_on`] for this. Once the @@ -174,102 +170,57 @@ pub struct BlueprintBuilder<'a> { } impl<'a> BlueprintBuilder<'a> { - /// Directly construct a `Blueprint` from the contents of a particular - /// collection (representing no changes from the collection state) - pub fn build_initial_from_collection( - collection: &Collection, - internal_dns_version: Generation, - external_dns_version: Generation, - all_sleds: impl Iterator, + /// Directly construct a `Blueprint` that contains an empty zone config for + /// the given sleds. + pub fn build_empty_with_sleds( + sled_ids: impl Iterator, creator: &str, - ) -> Result { - Self::build_initial_impl( - collection, - internal_dns_version, - external_dns_version, - all_sleds, + ) -> Blueprint { + Self::build_empty_with_sleds_impl( + sled_ids, creator, BlueprintBuilderRng::new(), ) } - /// A version of [`Self::build_initial_from_collection`] that allows the + /// A version of [`Self::build_empty_with_sleds`] that allows the /// blueprint ID to be generated from a random seed. - pub fn build_initial_from_collection_seeded( - collection: &Collection, - internal_dns_version: Generation, - external_dns_version: Generation, - all_sleds: impl Iterator, + pub fn build_empty_with_sleds_seeded( + sled_ids: impl Iterator, creator: &str, seed: H, - ) -> Result { + ) -> Blueprint { let mut rng = BlueprintBuilderRng::new(); rng.set_seed(seed); - Self::build_initial_impl( - collection, - internal_dns_version, - external_dns_version, - all_sleds, - creator, - rng, - ) + Self::build_empty_with_sleds_impl(sled_ids, creator, rng) } - fn build_initial_impl( - collection: &Collection, - internal_dns_version: Generation, - external_dns_version: Generation, - all_sleds: impl Iterator, + fn build_empty_with_sleds_impl( + sled_ids: impl Iterator, creator: &str, mut rng: BlueprintBuilderRng, - ) -> Result { - let blueprint_zones = all_sleds + ) -> Blueprint { + let blueprint_zones = sled_ids .map(|sled_id| { - let zones = collection - .omicron_zones - .get(&sled_id) - .map(|z| &z.zones) - .ok_or_else(|| { - // We should not find a sled that's supposed to be - // in-service but is not part of the inventory. It's - // not that that can't ever happen. This could happen - // when a sled is first being added to the system. Of - // course it could also happen if this sled agent failed - // our inventory request. But this is the initial - // blueprint (so this shouldn't be the "add sled" case) - // and we want to get it right (so we don't want to - // leave out sleds whose sled agent happened to be down - // when we tried to do this). The operator (or, more - // likely, a support person) will have to sort out - // what's going on if this happens. - Error::Planner(anyhow!( - "building initial blueprint: sled {:?} is \ - supposed to be in service but has no zones \ - in inventory", - sled_id - )) - })?; - let config = - BlueprintZonesConfig::initial_from_collection(&zones)?; - - Ok(( - // TODO-cleanup use `TypedUuid` everywhere - sled_id.into_untyped_uuid(), - config, - )) + let config = BlueprintZonesConfig { + generation: Generation::new(), + zones: Vec::new(), + }; + (sled_id.into_untyped_uuid(), config) }) - .collect::>()?; - Ok(Blueprint { + .collect::>(); + let num_sleds = blueprint_zones.len(); + Blueprint { id: rng.blueprint_rng.next(), blueprint_zones, blueprint_disks: BTreeMap::new(), parent_blueprint_id: None, - internal_dns_version, - external_dns_version, + internal_dns_version: Generation::new(), + external_dns_version: Generation::new(), time_created: now_db_precision(), creator: creator.to_owned(), - comment: format!("from collection {}", collection.id), - }) + comment: format!("starting blueprint with {num_sleds} empty sleds"), + } } /// Construct a new `BlueprintBuilder` based on a previous blueprint, @@ -1206,7 +1157,6 @@ pub mod test { use nexus_types::deployment::BlueprintZoneFilter; use omicron_common::address::IpRange; use omicron_test_utils::dev::test_setup_log; - use sled_agent_client::types::OmicronZoneType; use std::collections::BTreeSet; use test_strategy::proptest; @@ -1239,18 +1189,8 @@ pub mod test { // describes no changes. static TEST_NAME: &str = "blueprint_builder_test_initial"; let logctx = test_setup_log(TEST_NAME); - let (collection, input) = + let (collection, input, blueprint_initial) = example(&logctx.log, TEST_NAME, DEFAULT_N_SLEDS); - let blueprint_initial = - BlueprintBuilder::build_initial_from_collection_seeded( - &collection, - Generation::new(), - Generation::new(), - input.all_sled_ids(SledFilter::All), - "the_test", - TEST_NAME, - ) - .expect("failed to create initial blueprint"); verify_blueprint(&blueprint_initial); let diff = @@ -1425,21 +1365,14 @@ pub mod test { fn test_add_physical_disks() { static TEST_NAME: &str = "blueprint_builder_test_add_physical_disks"; let logctx = test_setup_log(TEST_NAME); - let (collection, input) = - example(&logctx.log, TEST_NAME, DEFAULT_N_SLEDS); + let (_, input, _) = example(&logctx.log, TEST_NAME, DEFAULT_N_SLEDS); - // We don't care about the DNS versions here. - let internal_dns_version = Generation::new(); - let external_dns_version = Generation::new(); - let parent = BlueprintBuilder::build_initial_from_collection_seeded( - &collection, - internal_dns_version, - external_dns_version, + // Start with an empty blueprint (sleds with no zones). + let parent = BlueprintBuilder::build_empty_with_sleds_seeded( input.all_sled_ids(SledFilter::All), "test", TEST_NAME, - ) - .expect("failed to create initial blueprint"); + ); { // We start empty, and can add a disk @@ -1477,33 +1410,19 @@ pub mod test { static TEST_NAME: &str = "blueprint_builder_test_add_nexus_with_no_existing_nexus_zones"; let logctx = test_setup_log(TEST_NAME); - let (mut collection, input) = - example(&logctx.log, TEST_NAME, DEFAULT_N_SLEDS); - - // We don't care about the DNS versions here. - let internal_dns_version = Generation::new(); - let external_dns_version = Generation::new(); - - // Adding a new Nexus zone currently requires copying settings from an - // existing Nexus zone. If we remove all Nexus zones from the - // collection, create a blueprint, then try to add a Nexus zone, it - // should fail. - for zones in collection.omicron_zones.values_mut() { - zones.zones.zones.retain(|z| { - !matches!(z.zone_type, OmicronZoneType::Nexus { .. }) - }); - } - let parent = BlueprintBuilder::build_initial_from_collection_seeded( - &collection, - internal_dns_version, - external_dns_version, + // Discard the example blueprint and start with an empty one. + let (collection, input, _) = + example(&logctx.log, TEST_NAME, DEFAULT_N_SLEDS); + let parent = BlueprintBuilder::build_empty_with_sleds_seeded( input.all_sled_ids(SledFilter::All), "test", TEST_NAME, - ) - .expect("failed to create initial blueprint"); + ); + // Adding a new Nexus zone currently requires copying settings from an + // existing Nexus zone. `parent` has no zones, so we should fail if we + // try to add a Nexus zone. let mut builder = BlueprintBuilder::new_based_on( &logctx.log, &parent, @@ -1536,13 +1455,9 @@ pub mod test { fn test_add_nexus_error_cases() { static TEST_NAME: &str = "blueprint_builder_test_add_nexus_error_cases"; let logctx = test_setup_log(TEST_NAME); - let (mut collection, input) = + let (mut collection, input, mut parent) = example(&logctx.log, TEST_NAME, DEFAULT_N_SLEDS); - // We don't care about the DNS versions here. - let internal_dns_version = Generation::new(); - let external_dns_version = Generation::new(); - // Remove the Nexus zone from one of the sleds so that // `sled_ensure_zone_nexus` can attempt to add a Nexus zone to // `sled_id`. @@ -1550,27 +1465,22 @@ pub mod test { let mut selected_sled_id = None; for (sled_id, zones) in &mut collection.omicron_zones { let nzones_before_retain = zones.zones.zones.len(); - zones.zones.zones.retain(|z| { - !matches!(z.zone_type, OmicronZoneType::Nexus { .. }) - }); + zones.zones.zones.retain(|z| !z.zone_type.is_nexus()); if zones.zones.zones.len() < nzones_before_retain { selected_sled_id = Some(*sled_id); + // Also remove this zone from the blueprint. + parent + .blueprint_zones + .get_mut(sled_id.as_untyped_uuid()) + .expect("missing sled") + .zones + .retain(|z| !z.zone_type.is_nexus()); break; } } selected_sled_id.expect("found no sleds with Nexus zone") }; - let parent = BlueprintBuilder::build_initial_from_collection_seeded( - &collection, - internal_dns_version, - external_dns_version, - input.all_sled_ids(SledFilter::All), - "test", - TEST_NAME, - ) - .expect("failed to create initial blueprint"); - { // Attempting to add Nexus to the sled we removed it from (with no // other changes to the environment) should succeed. @@ -1657,7 +1567,7 @@ pub mod test { "blueprint_builder_test_invalid_parent_blueprint_\ two_zones_with_same_external_ip"; let logctx = test_setup_log(TEST_NAME); - let (mut collection, input) = + let (_, input, mut parent) = example(&logctx.log, TEST_NAME, DEFAULT_N_SLEDS); // We should fail if the parent blueprint claims to contain two @@ -1667,10 +1577,12 @@ pub mod test { let mut found_second_nexus_zone = false; let mut nexus_external_ip = None; - 'outer: for zones in collection.omicron_zones.values_mut() { - for z in zones.zones.zones.iter_mut() { - if let OmicronZoneType::Nexus { external_ip, .. } = - &mut z.zone_type + 'outer: for zones in parent.blueprint_zones.values_mut() { + for z in zones.zones.iter_mut() { + if let BlueprintZoneType::Nexus(blueprint_zone_type::Nexus { + external_ip, + .. + }) = &mut z.zone_type { if let Some(ip) = nexus_external_ip { *external_ip = ip; @@ -1685,16 +1597,6 @@ pub mod test { } assert!(found_second_nexus_zone, "only one Nexus zone present?"); - let parent = BlueprintBuilder::build_initial_from_collection_seeded( - &collection, - Generation::new(), - Generation::new(), - input.all_sled_ids(SledFilter::All), - "test", - TEST_NAME, - ) - .unwrap(); - match BlueprintBuilder::new_based_on( &logctx.log, &parent, @@ -1717,7 +1619,7 @@ pub mod test { "blueprint_builder_test_invalid_parent_blueprint_\ two_nexus_zones_with_same_nic_ip"; let logctx = test_setup_log(TEST_NAME); - let (mut collection, input) = + let (_, input, mut parent) = example(&logctx.log, TEST_NAME, DEFAULT_N_SLEDS); // We should fail if the parent blueprint claims to contain two @@ -1727,9 +1629,13 @@ pub mod test { let mut found_second_nexus_zone = false; let mut nexus_nic_ip = None; - 'outer: for zones in collection.omicron_zones.values_mut() { - for z in zones.zones.zones.iter_mut() { - if let OmicronZoneType::Nexus { nic, .. } = &mut z.zone_type { + 'outer: for zones in parent.blueprint_zones.values_mut() { + for z in zones.zones.iter_mut() { + if let BlueprintZoneType::Nexus(blueprint_zone_type::Nexus { + nic, + .. + }) = &mut z.zone_type + { if let Some(ip) = nexus_nic_ip { nic.ip = ip; found_second_nexus_zone = true; @@ -1743,16 +1649,6 @@ pub mod test { } assert!(found_second_nexus_zone, "only one Nexus zone present?"); - let parent = BlueprintBuilder::build_initial_from_collection_seeded( - &collection, - Generation::new(), - Generation::new(), - input.all_sled_ids(SledFilter::All), - "test", - TEST_NAME, - ) - .unwrap(); - match BlueprintBuilder::new_based_on( &logctx.log, &parent, @@ -1775,7 +1671,7 @@ pub mod test { "blueprint_builder_test_invalid_parent_blueprint_\ two_zones_with_same_vnic_mac"; let logctx = test_setup_log(TEST_NAME); - let (mut collection, input) = + let (_, input, mut parent) = example(&logctx.log, TEST_NAME, DEFAULT_N_SLEDS); // We should fail if the parent blueprint claims to contain two @@ -1785,9 +1681,13 @@ pub mod test { let mut found_second_nexus_zone = false; let mut nexus_nic_mac = None; - 'outer: for zones in collection.omicron_zones.values_mut() { - for z in zones.zones.zones.iter_mut() { - if let OmicronZoneType::Nexus { nic, .. } = &mut z.zone_type { + 'outer: for zones in parent.blueprint_zones.values_mut() { + for z in zones.zones.iter_mut() { + if let BlueprintZoneType::Nexus(blueprint_zone_type::Nexus { + nic, + .. + }) = &mut z.zone_type + { if let Some(mac) = nexus_nic_mac { nic.mac = mac; found_second_nexus_zone = true; @@ -1801,16 +1701,6 @@ pub mod test { } assert!(found_second_nexus_zone, "only one Nexus zone present?"); - let parent = BlueprintBuilder::build_initial_from_collection_seeded( - &collection, - Generation::new(), - Generation::new(), - input.all_sled_ids(SledFilter::All), - "test", - TEST_NAME, - ) - .unwrap(); - match BlueprintBuilder::new_based_on( &logctx.log, &parent, diff --git a/nexus/reconfigurator/planning/src/blueprint_builder/zones.rs b/nexus/reconfigurator/planning/src/blueprint_builder/zones.rs index 5f8c0625a7..c0e0918503 100644 --- a/nexus/reconfigurator/planning/src/blueprint_builder/zones.rs +++ b/nexus/reconfigurator/planning/src/blueprint_builder/zones.rs @@ -217,16 +217,7 @@ mod tests { let logctx = test_setup_log(TEST_NAME); let mut example = ExampleSystem::new(&logctx.log, TEST_NAME, DEFAULT_N_SLEDS); - let blueprint_initial = - BlueprintBuilder::build_initial_from_collection_seeded( - &example.collection, - Generation::new(), - Generation::new(), - example.input.all_sled_ids(SledFilter::All), - "the_test", - TEST_NAME, - ) - .expect("creating initial blueprint"); + let blueprint_initial = example.blueprint; // Add a completely bare sled to the input. let (new_sled_id, input2) = { diff --git a/nexus/reconfigurator/planning/src/example.rs b/nexus/reconfigurator/planning/src/example.rs index 2c96e1e5a8..760e880b8d 100644 --- a/nexus/reconfigurator/planning/src/example.rs +++ b/nexus/reconfigurator/planning/src/example.rs @@ -15,11 +15,9 @@ use nexus_types::deployment::OmicronZoneNic; use nexus_types::deployment::PlanningInput; use nexus_types::deployment::SledFilter; use nexus_types::inventory::Collection; -use omicron_common::api::external::Generation; use omicron_uuid_kinds::ExternalIpUuid; use omicron_uuid_kinds::GenericUuid; use omicron_uuid_kinds::SledKind; -use sled_agent_client::types::OmicronZonesConfig; use typed_rng::TypedUuidRng; pub struct ExampleSystem { @@ -53,37 +51,14 @@ impl ExampleSystem { let mut input_builder = system .to_planning_input_builder() .expect("failed to make planning input builder"); - let mut inventory_builder = - system.to_collection_builder().expect("failed to build collection"); let base_input = input_builder.clone().build(); - // For each sled, have it report 0 zones in the initial inventory. - // This will enable us to build a blueprint from the initial - // inventory, which we can then use to build new blueprints. - for &sled_id in &sled_ids { - inventory_builder - .found_sled_omicron_zones( - "fake sled agent", - sled_id, - OmicronZonesConfig { - generation: Generation::new(), - zones: vec![], - }, - ) - .expect("recording Omicron zones"); - } - - let empty_zone_inventory = inventory_builder.build(); - let initial_blueprint = - BlueprintBuilder::build_initial_from_collection_seeded( - &empty_zone_inventory, - Generation::new(), - Generation::new(), - base_input.all_sled_ids(SledFilter::All), - "test suite", - (test_name, "ExampleSystem initial"), - ) - .unwrap(); + // Start with an empty blueprint containing only our sleds, no zones. + let initial_blueprint = BlueprintBuilder::build_empty_with_sleds_seeded( + base_input.all_sled_ids(SledFilter::All), + "test suite", + (test_name, "ExampleSystem initial"), + ); // Now make a blueprint and collection with some zones on each sled. let mut builder = BlueprintBuilder::new_based_on( @@ -176,7 +151,8 @@ impl ExampleSystem { } } -/// Returns a collection and planning input describing a pretty simple system. +/// Returns a collection, planning input, and blueprint describing a pretty +/// simple system. /// /// The test name is used as the RNG seed. /// @@ -187,7 +163,7 @@ pub fn example( log: &slog::Logger, test_name: &str, nsleds: usize, -) -> (Collection, PlanningInput) { +) -> (Collection, PlanningInput, Blueprint) { let example = ExampleSystem::new(log, test_name, nsleds); - (example.collection, example.input) + (example.collection, example.input, example.blueprint) } diff --git a/nexus/reconfigurator/planning/src/planner.rs b/nexus/reconfigurator/planning/src/planner.rs index 46716754a1..a252f9b821 100644 --- a/nexus/reconfigurator/planning/src/planner.rs +++ b/nexus/reconfigurator/planning/src/planner.rs @@ -396,7 +396,6 @@ mod test { use super::Planner; use crate::blueprint_builder::test::verify_blueprint; use crate::blueprint_builder::test::DEFAULT_N_SLEDS; - use crate::blueprint_builder::BlueprintBuilder; use crate::example::example; use crate::example::ExampleSystem; use crate::system::SledBuilder; @@ -410,7 +409,6 @@ mod test { use nexus_types::deployment::BlueprintZoneFilter; use nexus_types::deployment::BlueprintZoneType; use nexus_types::deployment::DiffSledModified; - use nexus_types::deployment::SledFilter; use nexus_types::external_api::views::SledPolicy; use nexus_types::external_api::views::SledProvisionPolicy; use nexus_types::external_api::views::SledState; @@ -426,34 +424,18 @@ mod test { static TEST_NAME: &str = "planner_basic_add_sled"; let logctx = test_setup_log(TEST_NAME); - // For our purposes, we don't care about the DNS generations. - let internal_dns_version = Generation::new(); - let external_dns_version = Generation::new(); - - // Use our example inventory collection. + // Use our example system. let mut example = ExampleSystem::new(&logctx.log, TEST_NAME, DEFAULT_N_SLEDS); - - // Build the initial blueprint. We don't bother verifying it here - // because there's a separate test for that. - let blueprint1 = - BlueprintBuilder::build_initial_from_collection_seeded( - &example.collection, - internal_dns_version, - external_dns_version, - example.input.all_sled_ids(SledFilter::All), - "the_test", - (TEST_NAME, "bp1"), - ) - .expect("failed to create initial blueprint"); - verify_blueprint(&blueprint1); + let blueprint1 = &example.blueprint; + verify_blueprint(blueprint1); // Now run the planner. It should do nothing because our initial // system didn't have any issues that the planner currently knows how to // fix. let blueprint2 = Planner::new_based_on( logctx.log.clone(), - &blueprint1, + blueprint1, &example.input, "no-op?", &example.collection, @@ -463,7 +445,7 @@ mod test { .plan() .expect("failed to plan"); - let diff = blueprint2.diff_since_blueprint(&blueprint1).unwrap(); + let diff = blueprint2.diff_since_blueprint(blueprint1).unwrap(); println!("1 -> 2 (expected no changes):\n{}", diff.display()); assert_eq!(diff.sleds_added().len(), 0); assert_eq!(diff.sleds_removed().len(), 0); @@ -626,14 +608,10 @@ mod test { static TEST_NAME: &str = "planner_add_multiple_nexus_to_one_sled"; let logctx = test_setup_log(TEST_NAME); - // For our purposes, we don't care about the DNS generations. - let internal_dns_version = Generation::new(); - let external_dns_version = Generation::new(); - - // Use our example inventory collection as a starting point, but strip - // it down to just one sled. - let (sled_id, collection, input) = { - let (mut collection, input) = + // Use our example system as a starting point, but strip it down to just + // one sled. + let (sled_id, blueprint1, collection, input) = { + let (mut collection, input, mut blueprint) = example(&logctx.log, TEST_NAME, DEFAULT_N_SLEDS); // Pick one sled ID to keep and remove the rest. @@ -646,22 +624,13 @@ mod test { assert_eq!(collection.sled_agents.len(), 1); assert_eq!(collection.omicron_zones.len(), 1); + blueprint + .blueprint_zones + .retain(|k, _v| keep_sled_id.as_untyped_uuid() == k); - (keep_sled_id, collection, builder.build()) + (keep_sled_id, blueprint, collection, builder.build()) }; - // Build the initial blueprint. - let blueprint1 = - BlueprintBuilder::build_initial_from_collection_seeded( - &collection, - internal_dns_version, - external_dns_version, - input.all_sled_ids(SledFilter::All), - "the_test", - (TEST_NAME, "bp1"), - ) - .expect("failed to create initial blueprint"); - // This blueprint should only have 1 Nexus instance on the one sled we // kept. assert_eq!(blueprint1.blueprint_zones.len(), 1); @@ -724,22 +693,10 @@ mod test { "planner_spread_additional_nexus_zones_across_sleds"; let logctx = test_setup_log(TEST_NAME); - // Use our example inventory collection as a starting point. - let (collection, input) = + // Use our example system as a starting point. + let (collection, input, blueprint1) = example(&logctx.log, TEST_NAME, DEFAULT_N_SLEDS); - // Build the initial blueprint. - let blueprint1 = - BlueprintBuilder::build_initial_from_collection_seeded( - &collection, - Generation::new(), - Generation::new(), - input.all_sled_ids(SledFilter::All), - "the_test", - (TEST_NAME, "bp1"), - ) - .expect("failed to create initial blueprint"); - // This blueprint should only have 3 Nexus zones: one on each sled. assert_eq!(blueprint1.blueprint_zones.len(), 3); for sled_config in blueprint1.blueprint_zones.values() { @@ -811,25 +768,14 @@ mod test { "planner_nexus_allocation_skips_nonprovisionable_sleds"; let logctx = test_setup_log(TEST_NAME); - // Use our example inventory collection as a starting point. + // Use our example system as a starting point. // // Request two extra sleds here so we test non-provisionable, expunged, // and decommissioned sleds. (When we add more kinds of // non-provisionable states in the future, we'll have to add more // sleds.) - let (collection, input) = example(&logctx.log, TEST_NAME, 5); - - // Build the initial blueprint. - let blueprint1 = - BlueprintBuilder::build_initial_from_collection_seeded( - &collection, - Generation::new(), - Generation::new(), - input.all_sled_ids(SledFilter::All), - "the_test", - (TEST_NAME, "bp1"), - ) - .expect("failed to create initial blueprint"); + let (collection, input, blueprint1) = + example(&logctx.log, TEST_NAME, 5); // This blueprint should only have 5 Nexus zones: one on each sled. assert_eq!(blueprint1.blueprint_zones.len(), 5); diff --git a/nexus/reconfigurator/planning/tests/output/blueprint_builder_initial_diff.txt b/nexus/reconfigurator/planning/tests/output/blueprint_builder_initial_diff.txt index fe56567f65..b421b8f383 100644 --- a/nexus/reconfigurator/planning/tests/output/blueprint_builder_initial_diff.txt +++ b/nexus/reconfigurator/planning/tests/output/blueprint_builder_initial_diff.txt @@ -1,5 +1,5 @@ from: collection 094d362b-7d79-49e7-a244-134276cca8fe -to: blueprint 9d2c007b-46f1-4ff2-8b4c-8a5767030f76 +to: blueprint e4aeb3b3-272f-4967-be34-2d34daa46aa1 ------------------------------------------------------------------------------------------------------ zone type zone ID disposition underlay IP status diff --git a/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_1_2.txt b/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_1_2.txt index 005d963475..ecc5b125d9 100644 --- a/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_1_2.txt +++ b/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_1_2.txt @@ -1,4 +1,4 @@ -from: blueprint 55502b1b-e255-438b-a16a-2680a4b5f962 +from: blueprint 4d4e6c38-cd95-4c4e-8f45-6af4d686964b to: blueprint 9f71f5d3-a272-4382-9154-6ea2e171a6c6 -------------------------------------------------------------------------------------------------------- diff --git a/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_bp2.txt b/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_bp2.txt index aa4da01852..623bf0a756 100644 --- a/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_bp2.txt +++ b/nexus/reconfigurator/planning/tests/output/planner_nonprovisionable_bp2.txt @@ -1,5 +1,5 @@ blueprint 9f71f5d3-a272-4382-9154-6ea2e171a6c6 -parent: 55502b1b-e255-438b-a16a-2680a4b5f962 +parent: 4d4e6c38-cd95-4c4e-8f45-6af4d686964b -------------------------------------------------------------------------------------------- zone type zone ID disposition underlay IP diff --git a/nexus/src/app/deployment.rs b/nexus/src/app/deployment.rs index 5f2d316efd..4d17cf43b0 100644 --- a/nexus/src/app/deployment.rs +++ b/nexus/src/app/deployment.rs @@ -7,7 +7,6 @@ use nexus_db_model::DnsGroup; use nexus_db_queries::authz; use nexus_db_queries::context::OpContext; -use nexus_reconfigurator_planning::blueprint_builder::BlueprintBuilder; use nexus_reconfigurator_planning::planner::Planner; use nexus_reconfigurator_preparation::PlanningInputFromDb; use nexus_types::deployment::Blueprint; @@ -15,7 +14,6 @@ use nexus_types::deployment::BlueprintMetadata; use nexus_types::deployment::BlueprintTarget; use nexus_types::deployment::BlueprintTargetSet; use nexus_types::deployment::PlanningInput; -use nexus_types::deployment::SledFilter; use nexus_types::inventory::Collection; use omicron_common::address::NEXUS_REDUNDANCY; use omicron_common::api::external::CreateResult; @@ -26,7 +24,6 @@ use omicron_common::api::external::InternalContext; use omicron_common::api::external::ListResultVec; use omicron_common::api::external::LookupResult; use omicron_common::api::external::LookupType; -use omicron_uuid_kinds::CollectionUuid; use slog_error_chain::InlineErrorChain; use uuid::Uuid; @@ -205,35 +202,6 @@ impl super::Nexus { self.db_datastore.blueprint_insert(opctx, blueprint).await } - pub async fn blueprint_generate_from_collection( - &self, - opctx: &OpContext, - collection_id: CollectionUuid, - ) -> CreateResult { - let collection = self - .datastore() - .inventory_collection_read(opctx, collection_id) - .await?; - let planning_context = self.blueprint_planning_context(opctx).await?; - let blueprint = BlueprintBuilder::build_initial_from_collection( - &collection, - planning_context.planning_input.internal_dns_version(), - planning_context.planning_input.external_dns_version(), - planning_context.planning_input.all_sled_ids(SledFilter::All), - &planning_context.creator, - ) - .map_err(|error| { - Error::internal_error(&format!( - "error generating initial blueprint from collection {}: {}", - collection_id, - InlineErrorChain::new(&error) - )) - })?; - - self.blueprint_add(&opctx, &blueprint).await?; - Ok(blueprint) - } - pub async fn blueprint_create_regenerate( &self, opctx: &OpContext, diff --git a/nexus/src/internal_api/http_entrypoints.rs b/nexus/src/internal_api/http_entrypoints.rs index 35ec5167f9..c2582daaf4 100644 --- a/nexus/src/internal_api/http_entrypoints.rs +++ b/nexus/src/internal_api/http_entrypoints.rs @@ -49,7 +49,6 @@ use omicron_common::api::internal::nexus::RepairProgress; use omicron_common::api::internal::nexus::RepairStartInfo; use omicron_common::api::internal::nexus::SledInstanceState; use omicron_common::update::ArtifactId; -use omicron_uuid_kinds::CollectionUuid; use omicron_uuid_kinds::DownstairsKind; use omicron_uuid_kinds::TypedUuid; use omicron_uuid_kinds::UpstairsKind; @@ -102,7 +101,6 @@ pub(crate) fn internal_api() -> NexusApiDescription { api.register(blueprint_target_view)?; api.register(blueprint_target_set)?; api.register(blueprint_target_set_enabled)?; - api.register(blueprint_generate_from_collection)?; api.register(blueprint_regenerate)?; api.register(blueprint_import)?; @@ -956,33 +954,6 @@ async fn blueprint_target_set_enabled( // Generating blueprints -#[derive(Debug, Deserialize, JsonSchema)] -struct CollectionId { - collection_id: CollectionUuid, -} - -/// Generates a new blueprint matching the specified inventory collection -#[endpoint { - method = POST, - path = "/deployment/blueprints/generate-from-collection", -}] -async fn blueprint_generate_from_collection( - rqctx: RequestContext>, - params: TypedBody, -) -> Result, HttpError> { - let apictx = rqctx.context(); - let handler = async { - let opctx = crate::context::op_context_for_internal_api(&rqctx).await; - let nexus = &apictx.nexus; - let collection_id = params.into_inner().collection_id; - let result = nexus - .blueprint_generate_from_collection(&opctx, collection_id) - .await?; - Ok(HttpResponseOk(result)) - }; - apictx.internal_latencies.instrument_dropshot_handler(&rqctx, handler).await -} - /// Generates a new blueprint for the current system, re-evaluating anything /// that's changed since the last one was generated #[endpoint { diff --git a/nexus/types/src/deployment.rs b/nexus/types/src/deployment.rs index 7dbaf9aa79..3a8e6e4066 100644 --- a/nexus/types/src/deployment.rs +++ b/nexus/types/src/deployment.rs @@ -203,11 +203,7 @@ impl Blueprint { /// /// Note that collections do not include information about zone /// disposition, so it is assumed that all zones in the collection have the - /// [`InService`](BlueprintZoneDisposition::InService) disposition. (This - /// is the same assumption made by - /// [`BlueprintZonesConfig::initial_from_collection`]. The logic here may - /// also be expanded to handle cases where not all zones in the collection - /// are in-service.) + /// [`InService`](BlueprintZoneDisposition::InService) disposition. pub fn diff_since_collection( &self, before: &Collection, @@ -324,37 +320,6 @@ pub struct BlueprintZonesConfig { } impl BlueprintZonesConfig { - /// Constructs a new [`BlueprintZonesConfig`] from a collection's zones. - /// - /// For the initial blueprint, all zones within a collection are assumed to - /// have the [`InService`](BlueprintZoneDisposition::InService) - /// disposition. - pub fn initial_from_collection( - collection: &OmicronZonesConfig, - ) -> Result { - let zones = collection - .zones - .iter() - .map(|z| { - BlueprintZoneConfig::from_omicron_zone_config( - z.clone(), - BlueprintZoneDisposition::InService, - ) - }) - .collect::>()?; - - let mut ret = Self { - // An initial `BlueprintZonesConfig` reuses the generation from - // `OmicronZonesConfig`. - generation: collection.generation, - zones, - }; - // For testing, it's helpful for zones to be in sorted order. - ret.sort(); - - Ok(ret) - } - /// Sorts the list of zones stored in this configuration. /// /// This is not strictly necessary. But for testing (particularly snapshot diff --git a/openapi/nexus-internal.json b/openapi/nexus-internal.json index 0383c9cbd2..593a841bfc 100644 --- a/openapi/nexus-internal.json +++ b/openapi/nexus-internal.json @@ -455,40 +455,6 @@ } } }, - "/deployment/blueprints/generate-from-collection": { - "post": { - "summary": "Generates a new blueprint matching the specified inventory collection", - "operationId": "blueprint_generate_from_collection", - "requestBody": { - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/CollectionId" - } - } - }, - "required": true - }, - "responses": { - "200": { - "description": "successful operation", - "content": { - "application/json": { - "schema": { - "$ref": "#/components/schemas/Blueprint" - } - } - } - }, - "4XX": { - "$ref": "#/components/responses/Error" - }, - "5XX": { - "$ref": "#/components/responses/Error" - } - } - } - }, "/deployment/blueprints/import": { "post": { "summary": "Imports a client-provided blueprint", @@ -3199,17 +3165,6 @@ "key" ] }, - "CollectionId": { - "type": "object", - "properties": { - "collection_id": { - "$ref": "#/components/schemas/TypedUuidForCollectionKind" - } - }, - "required": [ - "collection_id" - ] - }, "Cumulativedouble": { "description": "A cumulative or counter data type.", "type": "object", @@ -7178,10 +7133,6 @@ "SwitchPutResponse": { "type": "object" }, - "TypedUuidForCollectionKind": { - "type": "string", - "format": "uuid" - }, "TypedUuidForDownstairsRegionKind": { "type": "string", "format": "uuid" From f541cab3176c035702df4ad3f89b3eb87c396587 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Karen=20C=C3=A1rcamo?= Date: Mon, 22 Apr 2024 11:31:09 +1200 Subject: [PATCH 183/334] [sled-agent] NTP zone config set up via zone-setup CLI (#5440) ## Overview This PR repurposes the zone-network CLI into a zone-setup CLI, in order to remove as many zone start-up scripts as possible. This is also in preparation to use this zone-setup CLI with the self assembling switch zone. Related: https://github.com/oxidecomputer/omicron/issues/1898 --------- Co-authored-by: Andy Fiddaman --- Cargo.lock | 13 +- Cargo.toml | 5 +- illumos-utils/src/lib.rs | 1 + illumos-utils/src/svcadm.rs | 21 + package-manifest.toml | 62 ++- sled-agent/src/services.rs | 12 +- .../etc/logadm.d/chrony.logadm.conf | 0 smf/chrony-setup/manifest.xml | 46 ++ smf/ntp/etc/inet/chrony.conf.boundary | 32 -- smf/ntp/etc/inet/chrony.conf.internal | 31 -- smf/ntp/manifest/manifest.xml | 22 +- smf/ntp/method/svc-site-ntp | 128 ----- smf/opte-interface-setup/manifest.xml | 2 +- smf/zone-network-setup/manifest.xml | 2 +- zone-network-setup/src/bin/zone-networking.rs | 202 -------- {zone-network-setup => zone-setup}/Cargo.toml | 3 +- zone-setup/src/bin/zone-setup.rs | 484 ++++++++++++++++++ 17 files changed, 618 insertions(+), 448 deletions(-) create mode 100644 illumos-utils/src/svcadm.rs rename smf/{ntp => chrony-setup}/etc/logadm.d/chrony.logadm.conf (100%) create mode 100644 smf/chrony-setup/manifest.xml delete mode 100644 smf/ntp/etc/inet/chrony.conf.boundary delete mode 100644 smf/ntp/etc/inet/chrony.conf.internal delete mode 100755 smf/ntp/method/svc-site-ntp delete mode 100644 zone-network-setup/src/bin/zone-networking.rs rename {zone-network-setup => zone-setup}/Cargo.toml (88%) create mode 100644 zone-setup/src/bin/zone-setup.rs diff --git a/Cargo.lock b/Cargo.lock index 9eeb22632e..0f5477f077 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -10589,6 +10589,16 @@ dependencies = [ "serde", ] +[[package]] +name = "uzers" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76d283dc7e8c901e79e32d077866eaf599156cbf427fffa8289aecc52c5c3f63" +dependencies = [ + "libc", + "log", +] + [[package]] name = "vcpkg" version = "0.2.15" @@ -11393,7 +11403,7 @@ dependencies = [ ] [[package]] -name = "zone-network-setup" +name = "zone-setup" version = "0.1.0" dependencies = [ "anyhow", @@ -11404,6 +11414,7 @@ dependencies = [ "omicron-workspace-hack", "slog", "tokio", + "uzers", "zone 0.3.0", ] diff --git a/Cargo.toml b/Cargo.toml index fa1f548b56..b6b937614c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -79,7 +79,7 @@ members = [ "wicket", "wicketd", "workspace-hack", - "zone-network-setup", + "zone-setup", ] default-members = [ @@ -158,7 +158,7 @@ default-members = [ "wicket-dbg", "wicket", "wicketd", - "zone-network-setup", + "zone-setup", ] resolver = "2" @@ -442,6 +442,7 @@ update-common = { path = "update-common" } update-engine = { path = "update-engine" } usdt = "0.5.0" uuid = { version = "1.8.0", features = ["serde", "v4"] } +uzers = "0.11" walkdir = "2.5" whoami = "1.5" wicket = { path = "wicket" } diff --git a/illumos-utils/src/lib.rs b/illumos-utils/src/lib.rs index 550170b0f2..d041c866b0 100644 --- a/illumos-utils/src/lib.rs +++ b/illumos-utils/src/lib.rs @@ -24,6 +24,7 @@ pub mod route; pub mod running_zone; pub mod scf; pub mod svc; +pub mod svcadm; pub mod vmm_reservoir; pub mod zfs; pub mod zone; diff --git a/illumos-utils/src/svcadm.rs b/illumos-utils/src/svcadm.rs new file mode 100644 index 0000000000..0d472187df --- /dev/null +++ b/illumos-utils/src/svcadm.rs @@ -0,0 +1,21 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Utilities for manipulating SMF services. + +use crate::zone::SVCADM; +use crate::{execute, ExecutionError, PFEXEC}; + +/// Wraps commands for interacting with svcadm. +pub struct Svcadm {} + +#[cfg_attr(any(test, feature = "testing"), mockall::automock)] +impl Svcadm { + pub fn refresh_logadm_upgrade() -> Result<(), ExecutionError> { + let mut cmd = std::process::Command::new(PFEXEC); + let cmd = cmd.args(&[SVCADM, "refresh", "logadm-upgrade"]); + execute(cmd)?; + Ok(()) + } +} diff --git a/package-manifest.toml b/package-manifest.toml index 7fed672271..8633255c7e 100644 --- a/package-manifest.toml +++ b/package-manifest.toml @@ -100,7 +100,7 @@ only_for_targets.image = "standard" source.type = "composite" source.packages = [ "omicron-nexus.tar.gz", - "zone-network-setup.tar.gz", + "zone-setup.tar.gz", "zone-network-install.tar.gz", "opte-interface-setup.tar.gz", ] @@ -130,11 +130,7 @@ output.intermediate_only = true service_name = "oximeter" only_for_targets.image = "standard" source.type = "composite" -source.packages = [ - "oximeter-collector.tar.gz", - "zone-network-setup.tar.gz", - "zone-network-install.tar.gz", -] +source.packages = [ "oximeter-collector.tar.gz", "zone-setup.tar.gz", "zone-network-install.tar.gz" ] output.type = "zone" [package.oximeter-collector] @@ -157,8 +153,8 @@ source.type = "composite" source.packages = [ "clickhouse_svc.tar.gz", "internal-dns-cli.tar.gz", - "zone-network-setup.tar.gz", - "zone-network-install.tar.gz", + "zone-setup.tar.gz", + "zone-network-install.tar.gz" ] output.type = "zone" @@ -183,8 +179,8 @@ source.type = "composite" source.packages = [ "clickhouse_keeper_svc.tar.gz", "internal-dns-cli.tar.gz", - "zone-network-setup.tar.gz", - "zone-network-install.tar.gz", + "zone-setup.tar.gz", + "zone-network-install.tar.gz" ] output.type = "zone" @@ -209,8 +205,8 @@ source.type = "composite" source.packages = [ "cockroachdb-service.tar.gz", "internal-dns-cli.tar.gz", - "zone-network-setup.tar.gz", - "zone-network-install.tar.gz", + "zone-setup.tar.gz", + "zone-network-install.tar.gz" ] output.type = "zone" @@ -245,8 +241,8 @@ source.type = "composite" source.packages = [ "dns-server.tar.gz", "internal-dns-customizations.tar.gz", - "zone-network-setup.tar.gz", - "zone-network-install.tar.gz", + "zone-setup.tar.gz", + "zone-network-install.tar.gz" ] output.type = "zone" @@ -257,7 +253,7 @@ source.type = "composite" source.packages = [ "dns-server.tar.gz", "external-dns-customizations.tar.gz", - "zone-network-setup.tar.gz", + "zone-setup.tar.gz", "zone-network-install.tar.gz", "opte-interface-setup.tar.gz", ] @@ -298,10 +294,11 @@ service_name = "ntp" only_for_targets.image = "standard" source.type = "composite" source.packages = [ + "chrony-setup.tar.gz", "ntp-svc.tar.gz", "opte-interface-setup.tar.gz", - "zone-network-setup.tar.gz", - "zone-network-install.tar.gz", + "zone-setup.tar.gz", + "zone-network-install.tar.gz" ] output.type = "zone" @@ -311,8 +308,17 @@ only_for_targets.image = "standard" source.type = "local" source.paths = [ { from = "smf/ntp/manifest", to = "/var/svc/manifest/site/ntp" }, - { from = "smf/ntp/method", to = "/var/svc/method" }, - { from = "smf/ntp/etc", to = "/etc" }, +] +output.intermediate_only = true +output.type = "zone" + +[package.chrony-setup] +service_name = "chrony-setup" +only_for_targets.image = "standard" +source.type = "local" +source.paths = [ + { from = "smf/chrony-setup/manifest.xml", to = "/var/svc/manifest/site/chrony-setup/manifest.xml" }, + { from = "smf/chrony-setup/etc", to = "/etc" }, ] output.intermediate_only = true output.type = "zone" @@ -457,11 +463,7 @@ output.intermediate_only = true service_name = "crucible" only_for_targets.image = "standard" source.type = "composite" -source.packages = [ - "crucible.tar.gz", - "zone-network-setup.tar.gz", - "zone-network-install.tar.gz", -] +source.packages = [ "crucible.tar.gz", "zone-setup.tar.gz", "zone-network-install.tar.gz" ] output.type = "zone" @@ -469,11 +471,7 @@ output.type = "zone" service_name = "crucible_pantry" only_for_targets.image = "standard" source.type = "composite" -source.packages = [ - "crucible-pantry.tar.gz", - "zone-network-setup.tar.gz", - "zone-network-install.tar.gz", -] +source.packages = [ "crucible-pantry.tar.gz", "zone-setup.tar.gz", "zone-network-install.tar.gz" ] output.type = "zone" # Packages not built within Omicron, but which must be imported. @@ -746,11 +744,11 @@ source.paths = [ output.type = "zone" output.intermediate_only = true -[package.zone-network-setup] -service_name = "zone-network-cli" +[package.zone-setup] +service_name = "zone-setup-cli" only_for_targets.image = "standard" source.type = "local" -source.rust.binary_names = ["zone-networking"] +source.rust.binary_names = ["zone-setup"] source.rust.release = true output.type = "zone" output.intermediate_only = true diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index 1ddb3f9b0a..e2e86e327e 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -2006,7 +2006,7 @@ impl ServiceManager { Self::dns_install(info, Some(dns_servers.to_vec()), domain) .await?; - let mut ntp_config = PropertyGroupBuilder::new("config") + let mut chrony_config = PropertyGroupBuilder::new("config") .add_property("allow", "astring", &rack_net) .add_property( "boundary", @@ -2015,7 +2015,7 @@ impl ServiceManager { ); for s in ntp_servers { - ntp_config = ntp_config.add_property( + chrony_config = chrony_config.add_property( "server", "astring", &s.to_string(), @@ -2030,13 +2030,17 @@ impl ServiceManager { } let ntp_service = ServiceBuilder::new("oxide/ntp") - .add_instance( + .add_instance(ServiceInstanceBuilder::new("default")); + + let chrony_setup_service = + ServiceBuilder::new("oxide/chrony-setup").add_instance( ServiceInstanceBuilder::new("default") - .add_property_group(ntp_config), + .add_property_group(chrony_config), ); let mut profile = ProfileBuilder::new("omicron") .add_service(nw_setup_service) + .add_service(chrony_setup_service) .add_service(disabled_ssh_service) .add_service(dns_install_service) .add_service(dns_client_service) diff --git a/smf/ntp/etc/logadm.d/chrony.logadm.conf b/smf/chrony-setup/etc/logadm.d/chrony.logadm.conf similarity index 100% rename from smf/ntp/etc/logadm.d/chrony.logadm.conf rename to smf/chrony-setup/etc/logadm.d/chrony.logadm.conf diff --git a/smf/chrony-setup/manifest.xml b/smf/chrony-setup/manifest.xml new file mode 100644 index 0000000000..f31f13a2ea --- /dev/null +++ b/smf/chrony-setup/manifest.xml @@ -0,0 +1,46 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/smf/ntp/etc/inet/chrony.conf.boundary b/smf/ntp/etc/inet/chrony.conf.boundary deleted file mode 100644 index d13bc9c815..0000000000 --- a/smf/ntp/etc/inet/chrony.conf.boundary +++ /dev/null @@ -1,32 +0,0 @@ -# -# Configuration file for a boundary NTP server - one which communicates with -# NTP servers outside the rack. -# - -pool @SERVER@ iburst maxdelay 0.1 maxsources 16 - -driftfile /var/lib/chrony/drift -ntsdumpdir /var/lib/chrony -dumpdir /var/lib/chrony -pidfile /var/run/chrony/chronyd.pid -logdir /var/log/chrony - -log measurements statistics tracking - -allow fe80::/10 -allow @ALLOW@ - -# Enable local reference mode, which keeps us operating as an NTP server that -# appears synchronised even if there are currently no active upstreams. When -# in this mode, we report as stratum 10 to clients. -local stratum 10 - -# makestep -# We allow chrony to step the system clock during the first three time updates -# if we are more than 0.1 seconds out. -makestep 0.1 3 - -# When a leap second occurs we slew the clock over approximately 37 seconds. -leapsecmode slew -maxslewrate 2708.333 - diff --git a/smf/ntp/etc/inet/chrony.conf.internal b/smf/ntp/etc/inet/chrony.conf.internal deleted file mode 100644 index 9e9ff3ddea..0000000000 --- a/smf/ntp/etc/inet/chrony.conf.internal +++ /dev/null @@ -1,31 +0,0 @@ -# -# Configuration file for an internal NTP server - one which communicates with -# boundary NTP servers within the rack. -# - -server @SERVER@ iburst minpoll 0 maxpoll 4 - -driftfile /var/lib/chrony/drift -ntsdumpdir /var/lib/chrony -dumpdir /var/lib/chrony -pidfile /var/run/chrony/chronyd.pid -logdir /var/log/chrony - -log measurements statistics tracking - -# makestep -# We allow chrony to step the system clock if we are more than a day out, -# regardless of how many clock updates have occurred since boot. -# The boundary NTP servers are configured with local reference mode, which -# means that if they start up without external connectivity, they will appear -# as authoritative servers even if they are advertising January 1987 -# (which is the default system clock on a gimlet after boot). -# This configuration allows a one-off adjustment once RSS begins and the -# boundary servers are synchronised, after which the clock will advance -# monotonically forwards. -makestep 86400 -1 - -# When a leap second occurs we slew the clock over approximately 37 seconds. -leapsecmode slew -maxslewrate 2708.333 - diff --git a/smf/ntp/manifest/manifest.xml b/smf/ntp/manifest/manifest.xml index 7783bbe76c..df427a16a5 100644 --- a/smf/ntp/manifest/manifest.xml +++ b/smf/ntp/manifest/manifest.xml @@ -39,6 +39,11 @@ + + + + @@ -57,7 +62,9 @@ The service also always starts the binary with ASLR enabled, regardless of whether it was linked with -zaslr --> - - - - - - - - - - -