From 4646d9ae8e9998a35327ec5411b5136e26c0e8f5 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sat, 20 Jan 2024 01:22:57 +0000 Subject: [PATCH 01/49] Bump h2 from 0.3.21 to 0.3.24 (#4850) --- Cargo.lock | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 8c85d4c5cb..5211ecab78 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2707,9 +2707,9 @@ checksum = "92620684d99f750bae383ecb3be3748142d6095760afd5cbcf2261e9a279d780" [[package]] name = "h2" -version = "0.3.21" +version = "0.3.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "91fc23aa11be92976ef4729127f1a74adf36d8436f7816b185d18df956790833" +checksum = "bb2c4422095b67ee78da96fbb51a4cc413b3b25883c7717ff7ca1ab31022c9c9" dependencies = [ "bytes", "fnv", @@ -2717,7 +2717,7 @@ dependencies = [ "futures-sink", "futures-util", "http 0.2.11", - "indexmap 1.9.3", + "indexmap 2.1.0", "slab", "tokio", "tokio-util", From d8bbf6d20e3d2472f7f1e60b5b0fb591c7a61572 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Sat, 20 Jan 2024 05:25:37 +0000 Subject: [PATCH 02/49] Update taiki-e/install-action digest to bd4f144 (#4859) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR contains the following updates: | Package | Type | Update | Change | |---|---|---|---| | [taiki-e/install-action](https://togithub.com/taiki-e/install-action) | action | digest | [`242f1c0` -> `bd4f144`](https://togithub.com/taiki-e/install-action/compare/242f1c0...bd4f144) | --- ### Configuration 📅 **Schedule**: Branch creation - "after 8pm,before 6am" in timezone America/Los_Angeles, Automerge - "after 8pm,before 6am" in timezone America/Los_Angeles. 🚦 **Automerge**: Enabled. â™» **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. 🔕 **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box --- This PR has been generated by [Renovate Bot](https://togithub.com/renovatebot/renovate). Co-authored-by: oxide-renovate[bot] <146848827+oxide-renovate[bot]@users.noreply.github.com> --- .github/workflows/hakari.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/hakari.yml b/.github/workflows/hakari.yml index 3dade2e190..a4ebc74843 100644 --- a/.github/workflows/hakari.yml +++ b/.github/workflows/hakari.yml @@ -24,7 +24,7 @@ jobs: with: toolchain: stable - name: Install cargo-hakari - uses: taiki-e/install-action@242f1c0c1a882c44e7d32b89af9f2a0bced36540 # v2 + uses: taiki-e/install-action@bd4f14420660e33ca2929e5c0306a8367173c1ee # v2 with: tool: cargo-hakari - name: Check workspace-hack Cargo.toml is up-to-date From 205382f7ee151f09a5c6c11ed4ae73b14f0d64b3 Mon Sep 17 00:00:00 2001 From: David Crespo Date: Sat, 20 Jan 2024 01:30:16 -0600 Subject: [PATCH 03/49] [nexus] Endpoint to list IP pools for silo, add `is_default` to silo-scoped IP pools list (#4843) Fixes #4752 Fixes #4763 The main trick here is introducing `views::SiloIpPool`, which is the same as `views::IpPool` except it also has `is_default` on it. It only makes sense in the context of a particular silo because `is_default` is only defined for a (pool, silo) pair, not for a pool alone. - [x] Add `GET /v1/system/silos/{silo}/ip-pools` - [x] `/v1/ip-pools` and `/v1/ip-pools/{pool}` should return `SiloIpPool` too - [x] Tests for `/v1/system/silos/{silo}/ip-pools` - [x] We can't have both `SiloIpPool` and `IpPoolSilo`, cleaned up by changing: - `views::IpPoolSilo` -> `views::SiloIpSiloLink` - `params::IpPoolSiloLink` -> `views::IpPoolLinkSilo` --- end-to-end-tests/src/bin/bootstrap.rs | 4 +- nexus/db-model/src/ip_pool.rs | 2 +- nexus/db-queries/src/db/datastore/ip_pool.rs | 86 ++++----- nexus/src/app/ip_pool.rs | 47 ++++- nexus/src/external_api/http_entrypoints.rs | 81 ++++++-- nexus/test-utils/src/resource_helpers.rs | 4 +- nexus/tests/integration_tests/endpoints.rs | 14 +- nexus/tests/integration_tests/instances.rs | 6 +- nexus/tests/integration_tests/ip_pools.rs | 121 ++++++++---- nexus/tests/output/nexus_tags.txt | 1 + nexus/types/src/external_api/params.rs | 2 +- nexus/types/src/external_api/views.rs | 14 +- openapi/nexus.json | 186 ++++++++++++++++--- 13 files changed, 431 insertions(+), 137 deletions(-) diff --git a/end-to-end-tests/src/bin/bootstrap.rs b/end-to-end-tests/src/bin/bootstrap.rs index 21e59647ae..b02bed4265 100644 --- a/end-to-end-tests/src/bin/bootstrap.rs +++ b/end-to-end-tests/src/bin/bootstrap.rs @@ -4,7 +4,7 @@ use end_to_end_tests::helpers::{generate_name, get_system_ip_pool}; use omicron_test_utils::dev::poll::{wait_for_condition, CondCheckError}; use oxide_client::types::{ ByteCount, DeviceAccessTokenRequest, DeviceAuthRequest, DeviceAuthVerify, - DiskCreate, DiskSource, IpPoolCreate, IpPoolSiloLink, IpRange, Ipv4Range, + DiskCreate, DiskSource, IpPoolCreate, IpPoolLinkSilo, IpRange, Ipv4Range, NameOrId, SiloQuotasUpdate, }; use oxide_client::{ @@ -51,7 +51,7 @@ async fn main() -> Result<()> { client .ip_pool_silo_link() .pool(pool_name) - .body(IpPoolSiloLink { + .body(IpPoolLinkSilo { silo: NameOrId::Name(params.silo_name().parse().unwrap()), is_default: true, }) diff --git a/nexus/db-model/src/ip_pool.rs b/nexus/db-model/src/ip_pool.rs index bec1113151..030d052c22 100644 --- a/nexus/db-model/src/ip_pool.rs +++ b/nexus/db-model/src/ip_pool.rs @@ -97,7 +97,7 @@ pub struct IpPoolResource { pub is_default: bool, } -impl From for views::IpPoolSilo { +impl From for views::IpPoolSiloLink { fn from(assoc: IpPoolResource) -> Self { Self { ip_pool_id: assoc.ip_pool_id, diff --git a/nexus/db-queries/src/db/datastore/ip_pool.rs b/nexus/db-queries/src/db/datastore/ip_pool.rs index c9fdb5f0ee..331126ef97 100644 --- a/nexus/db-queries/src/db/datastore/ip_pool.rs +++ b/nexus/db-queries/src/db/datastore/ip_pool.rs @@ -79,47 +79,6 @@ impl DataStore { .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) } - /// List IP pools linked to the current silo - pub async fn silo_ip_pools_list( - &self, - opctx: &OpContext, - pagparams: &PaginatedBy<'_>, - ) -> ListResultVec { - use db::schema::ip_pool; - use db::schema::ip_pool_resource; - - // From the developer user's point of view, we treat IP pools linked to - // their silo as silo resources, so they can list them if they can list - // silo children - let authz_silo = - opctx.authn.silo_required().internal_context("listing IP pools")?; - opctx.authorize(authz::Action::ListChildren, &authz_silo).await?; - - let silo_id = authz_silo.id(); - - match pagparams { - PaginatedBy::Id(pagparams) => { - paginated(ip_pool::table, ip_pool::id, pagparams) - } - PaginatedBy::Name(pagparams) => paginated( - ip_pool::table, - ip_pool::name, - &pagparams.map_name(|n| Name::ref_cast(n)), - ), - } - .inner_join(ip_pool_resource::table) - .filter( - ip_pool_resource::resource_type - .eq(IpPoolResourceType::Silo) - .and(ip_pool_resource::resource_id.eq(silo_id)), - ) - .filter(ip_pool::time_deleted.is_null()) - .select(db::model::IpPool::as_select()) - .get_results_async(&*self.pool_connection_authorized(opctx).await?) - .await - .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) - } - /// Look up whether the given pool is available to users in the current /// silo, i.e., whether there is an entry in the association table linking /// the pool with that silo @@ -400,6 +359,37 @@ impl DataStore { .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) } + /// Returns (IpPool, IpPoolResource) so we can know in the calling code + /// whether the pool is default for the silo + pub async fn silo_ip_pool_list( + &self, + opctx: &OpContext, + authz_silo: &authz::Silo, + pagparams: &PaginatedBy<'_>, + ) -> ListResultVec<(IpPool, IpPoolResource)> { + use db::schema::ip_pool; + use db::schema::ip_pool_resource; + + match pagparams { + PaginatedBy::Id(pagparams) => { + paginated(ip_pool::table, ip_pool::id, pagparams) + } + PaginatedBy::Name(pagparams) => paginated( + ip_pool::table, + ip_pool::name, + &pagparams.map_name(|n| Name::ref_cast(n)), + ), + } + .inner_join(ip_pool_resource::table) + .filter(ip_pool_resource::resource_id.eq(authz_silo.id())) + .filter(ip_pool_resource::resource_type.eq(IpPoolResourceType::Silo)) + .filter(ip_pool::time_deleted.is_null()) + .select(<(IpPool, IpPoolResource)>::as_select()) + .load_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + } + pub async fn ip_pool_link_silo( &self, opctx: &OpContext, @@ -867,8 +857,11 @@ mod test { .await .expect("Should list IP pools"); assert_eq!(all_pools.len(), 0); + + let authz_silo = opctx.authn.silo_required().unwrap(); + let silo_pools = datastore - .silo_ip_pools_list(&opctx, &pagbyid) + .silo_ip_pool_list(&opctx, &authz_silo, &pagbyid) .await .expect("Should list silo IP pools"); assert_eq!(silo_pools.len(), 0); @@ -893,7 +886,7 @@ mod test { .expect("Should list IP pools"); assert_eq!(all_pools.len(), 1); let silo_pools = datastore - .silo_ip_pools_list(&opctx, &pagbyid) + .silo_ip_pool_list(&opctx, &authz_silo, &pagbyid) .await .expect("Should list silo IP pools"); assert_eq!(silo_pools.len(), 0); @@ -929,11 +922,12 @@ mod test { // now it shows up in the silo list let silo_pools = datastore - .silo_ip_pools_list(&opctx, &pagbyid) + .silo_ip_pool_list(&opctx, &authz_silo, &pagbyid) .await .expect("Should list silo IP pools"); assert_eq!(silo_pools.len(), 1); - assert_eq!(silo_pools[0].id(), pool1_for_silo.id()); + assert_eq!(silo_pools[0].0.id(), pool1_for_silo.id()); + assert_eq!(silo_pools[0].1.is_default, false); // linking an already linked silo errors due to PK conflict let err = datastore @@ -998,7 +992,7 @@ mod test { // and silo pools list is empty again let silo_pools = datastore - .silo_ip_pools_list(&opctx, &pagbyid) + .silo_ip_pool_list(&opctx, &authz_silo, &pagbyid) .await .expect("Should list silo IP pools"); assert_eq!(silo_pools.len(), 0); diff --git a/nexus/src/app/ip_pool.rs b/nexus/src/app/ip_pool.rs index 1d9b3e515e..d8d36fff4b 100644 --- a/nexus/src/app/ip_pool.rs +++ b/nexus/src/app/ip_pool.rs @@ -20,6 +20,7 @@ use omicron_common::api::external::CreateResult; use omicron_common::api::external::DataPageParams; use omicron_common::api::external::DeleteResult; use omicron_common::api::external::Error; +use omicron_common::api::external::InternalContext; use omicron_common::api::external::ListResultVec; use omicron_common::api::external::LookupResult; use omicron_common::api::external::NameOrId; @@ -74,12 +75,20 @@ impl super::Nexus { } /// List IP pools in current silo - pub(crate) async fn silo_ip_pools_list( + pub(crate) async fn current_silo_ip_pool_list( &self, opctx: &OpContext, pagparams: &PaginatedBy<'_>, - ) -> ListResultVec { - self.db_datastore.silo_ip_pools_list(opctx, pagparams).await + ) -> ListResultVec<(db::model::IpPool, db::model::IpPoolResource)> { + let authz_silo = + opctx.authn.silo_required().internal_context("listing IP pools")?; + + // From the developer user's point of view, we treat IP pools linked to + // their silo as silo resources, so they can list them if they can list + // silo children + opctx.authorize(authz::Action::ListChildren, &authz_silo).await?; + + self.db_datastore.silo_ip_pool_list(opctx, &authz_silo, pagparams).await } // Look up pool by name or ID, but only return it if it's linked to the @@ -88,19 +97,19 @@ impl super::Nexus { &'a self, opctx: &'a OpContext, pool: &'a NameOrId, - ) -> LookupResult { + ) -> LookupResult<(db::model::IpPool, db::model::IpPoolResource)> { let (authz_pool, pool) = self.ip_pool_lookup(opctx, pool)?.fetch().await?; // 404 if no link is found in the current silo let link = self.db_datastore.ip_pool_fetch_link(opctx, pool.id()).await; - if link.is_err() { - return Err(authz_pool.not_found()); + match link { + Ok(link) => Ok((pool, link)), + Err(_) => Err(authz_pool.not_found()), } - - Ok(pool) } + /// List silos for a given pool pub(crate) async fn ip_pool_silo_list( &self, opctx: &OpContext, @@ -109,14 +118,34 @@ impl super::Nexus { ) -> ListResultVec { let (.., authz_pool) = pool_lookup.lookup_for(authz::Action::ListChildren).await?; + + // check ability to list silos in general + opctx.authorize(authz::Action::ListChildren, &authz::FLEET).await?; + self.db_datastore.ip_pool_silo_list(opctx, &authz_pool, pagparams).await } + // List pools for a given silo + pub(crate) async fn silo_ip_pool_list( + &self, + opctx: &OpContext, + silo_lookup: &lookup::Silo<'_>, + pagparams: &PaginatedBy<'_>, + ) -> ListResultVec<(db::model::IpPool, db::model::IpPoolResource)> { + let (.., authz_silo) = + silo_lookup.lookup_for(authz::Action::Read).await?; + // check ability to list pools in general + opctx + .authorize(authz::Action::ListChildren, &authz::IP_POOL_LIST) + .await?; + self.db_datastore.silo_ip_pool_list(opctx, &authz_silo, pagparams).await + } + pub(crate) async fn ip_pool_link_silo( &self, opctx: &OpContext, pool_lookup: &lookup::IpPool<'_>, - silo_link: ¶ms::IpPoolSiloLink, + silo_link: ¶ms::IpPoolLinkSilo, ) -> CreateResult { let (authz_pool,) = pool_lookup.lookup_for(authz::Action::Modify).await?; diff --git a/nexus/src/external_api/http_entrypoints.rs b/nexus/src/external_api/http_entrypoints.rs index 21acb45ed3..65b03a9fdf 100644 --- a/nexus/src/external_api/http_entrypoints.rs +++ b/nexus/src/external_api/http_entrypoints.rs @@ -279,6 +279,7 @@ pub(crate) fn external_api() -> NexusApiDescription { api.register(silo_delete)?; api.register(silo_policy_view)?; api.register(silo_policy_update)?; + api.register(silo_ip_pool_list)?; api.register(silo_utilization_view)?; api.register(silo_utilization_list)?; @@ -741,7 +742,7 @@ async fn silo_create( /// Fetch a silo /// -/// Fetch a silo by name. +/// Fetch a silo by name or ID. #[endpoint { method = GET, path = "/v1/system/silos/{silo}", @@ -763,6 +764,48 @@ async fn silo_view( apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await } +/// List IP pools available within silo +#[endpoint { + method = GET, + path = "/v1/system/silos/{silo}/ip-pools", + tags = ["system/silos"], +}] +async fn silo_ip_pool_list( + rqctx: RequestContext>, + path_params: Path, + query_params: Query, +) -> Result>, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let opctx = crate::context::op_context_for_external_api(&rqctx).await?; + let nexus = &apictx.nexus; + let path = path_params.into_inner(); + + let query = query_params.into_inner(); + let pag_params = data_page_params_for(&rqctx, &query)?; + let scan_params = ScanByNameOrId::from_query(&query)?; + let paginated_by = name_or_id_pagination(&pag_params, scan_params)?; + + let silo_lookup = nexus.silo_lookup(&opctx, path.silo)?; + let pools = nexus + .silo_ip_pool_list(&opctx, &silo_lookup, &paginated_by) + .await? + .iter() + .map(|(pool, silo_link)| views::SiloIpPool { + identity: pool.identity(), + is_default: silo_link.is_default, + }) + .collect(); + + Ok(HttpResponseOk(ScanByNameOrId::results_page( + &query, + pools, + &marker_for_name_or_id, + )?)) + }; + apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await +} + /// Delete a silo /// /// Delete a silo by name. @@ -1302,7 +1345,7 @@ async fn project_policy_update( async fn project_ip_pool_list( rqctx: RequestContext>, query_params: Query, -) -> Result>, HttpError> { +) -> Result>, HttpError> { let apictx = rqctx.context(); let handler = async { let nexus = &apictx.nexus; @@ -1312,10 +1355,13 @@ async fn project_ip_pool_list( let paginated_by = name_or_id_pagination(&pag_params, scan_params)?; let opctx = crate::context::op_context_for_external_api(&rqctx).await?; let pools = nexus - .silo_ip_pools_list(&opctx, &paginated_by) + .current_silo_ip_pool_list(&opctx, &paginated_by) .await? .into_iter() - .map(IpPool::from) + .map(|(pool, silo_link)| views::SiloIpPool { + identity: pool.identity(), + is_default: silo_link.is_default, + }) .collect(); Ok(HttpResponseOk(ScanByNameOrId::results_page( &query, @@ -1335,14 +1381,18 @@ async fn project_ip_pool_list( async fn project_ip_pool_view( rqctx: RequestContext>, path_params: Path, -) -> Result, HttpError> { +) -> Result, HttpError> { let apictx = rqctx.context(); let handler = async { let opctx = crate::context::op_context_for_external_api(&rqctx).await?; let nexus = &apictx.nexus; let pool_selector = path_params.into_inner().pool; - let pool = nexus.silo_ip_pool_fetch(&opctx, &pool_selector).await?; - Ok(HttpResponseOk(IpPool::from(pool))) + let (pool, silo_link) = + nexus.silo_ip_pool_fetch(&opctx, &pool_selector).await?; + Ok(HttpResponseOk(views::SiloIpPool { + identity: pool.identity(), + is_default: silo_link.is_default, + })) }; apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await } @@ -1489,7 +1539,14 @@ async fn ip_pool_silo_list( // option would be to paginate by a composite key representing the (pool, // resource_type, resource) query_params: Query, -) -> Result>, HttpError> { + // TODO: this could just list views::Silo -- it's not like knowing silo_id + // and nothing else is particularly useful -- except we also want to say + // whether the pool is marked default on each silo. So one option would + // be to do the same as we did with SiloIpPool -- include is_default on + // whatever the thing is. Still... all we'd have to do to make this usable + // in both places would be to make it { ...IpPool, silo_id, silo_name, + // is_default } +) -> Result>, HttpError> { let apictx = rqctx.context(); let handler = async { let opctx = crate::context::op_context_for_external_api(&rqctx).await?; @@ -1511,7 +1568,7 @@ async fn ip_pool_silo_list( Ok(HttpResponseOk(ScanById::results_page( &query, assocs, - &|_, x: &views::IpPoolSilo| x.silo_id, + &|_, x: &views::IpPoolSiloLink| x.silo_id, )?)) }; apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await @@ -1526,8 +1583,8 @@ async fn ip_pool_silo_list( async fn ip_pool_silo_link( rqctx: RequestContext>, path_params: Path, - resource_assoc: TypedBody, -) -> Result, HttpError> { + resource_assoc: TypedBody, +) -> Result, HttpError> { let apictx = rqctx.context(); let handler = async { let opctx = crate::context::op_context_for_external_api(&rqctx).await?; @@ -1581,7 +1638,7 @@ async fn ip_pool_silo_update( rqctx: RequestContext>, path_params: Path, update: TypedBody, -) -> Result, HttpError> { +) -> Result, HttpError> { let apictx = rqctx.context(); let handler = async { let opctx = crate::context::op_context_for_external_api(&rqctx).await?; diff --git a/nexus/test-utils/src/resource_helpers.rs b/nexus/test-utils/src/resource_helpers.rs index c2516a1509..4fe03f204c 100644 --- a/nexus/test-utils/src/resource_helpers.rs +++ b/nexus/test-utils/src/resource_helpers.rs @@ -246,9 +246,9 @@ pub async fn link_ip_pool( is_default: bool, ) { let link = - params::IpPoolSiloLink { silo: NameOrId::Id(*silo_id), is_default }; + params::IpPoolLinkSilo { silo: NameOrId::Id(*silo_id), is_default }; let url = format!("/v1/system/ip-pools/{pool_name}/silos"); - object_create::( + object_create::( client, &url, &link, ) .await; diff --git a/nexus/tests/integration_tests/endpoints.rs b/nexus/tests/integration_tests/endpoints.rs index 11bfa34c5f..8beffe43a5 100644 --- a/nexus/tests/integration_tests/endpoints.rs +++ b/nexus/tests/integration_tests/endpoints.rs @@ -80,6 +80,8 @@ pub static DEMO_SILO_NAME: Lazy = Lazy::new(|| "demo-silo".parse().unwrap()); pub static DEMO_SILO_URL: Lazy = Lazy::new(|| format!("/v1/system/silos/{}", *DEMO_SILO_NAME)); +pub static DEMO_SILO_IP_POOLS_URL: Lazy = + Lazy::new(|| format!("{}/ip-pools", *DEMO_SILO_URL)); pub static DEMO_SILO_POLICY_URL: Lazy = Lazy::new(|| format!("/v1/system/silos/{}/policy", *DEMO_SILO_NAME)); pub static DEMO_SILO_QUOTAS_URL: Lazy = @@ -627,8 +629,8 @@ pub static DEMO_IP_POOL_UPDATE: Lazy = }); pub static DEMO_IP_POOL_SILOS_URL: Lazy = Lazy::new(|| format!("{}/silos", *DEMO_IP_POOL_URL)); -pub static DEMO_IP_POOL_SILOS_BODY: Lazy = - Lazy::new(|| params::IpPoolSiloLink { +pub static DEMO_IP_POOL_SILOS_BODY: Lazy = + Lazy::new(|| params::IpPoolLinkSilo { silo: NameOrId::Id(DEFAULT_SILO.identity().id), is_default: true, // necessary for demo instance create to go through }); @@ -1110,6 +1112,14 @@ pub static VERIFY_ENDPOINTS: Lazy> = Lazy::new(|| { AllowedMethod::Delete, ], }, + VerifyEndpoint { + url: &DEMO_SILO_IP_POOLS_URL, + visibility: Visibility::Protected, + unprivileged_access: UnprivilegedAccess::None, + allowed_methods: vec![ + AllowedMethod::Get, + ], + }, VerifyEndpoint { url: &DEMO_SILO_POLICY_URL, visibility: Visibility::Protected, diff --git a/nexus/tests/integration_tests/instances.rs b/nexus/tests/integration_tests/instances.rs index 044f87f7c1..2f4e913185 100644 --- a/nexus/tests/integration_tests/instances.rs +++ b/nexus/tests/integration_tests/instances.rs @@ -3657,7 +3657,7 @@ async fn test_instance_ephemeral_ip_from_correct_pool( ); // make pool2 default and create instance with default pool. check that it now it comes from pool2 - let _: views::IpPoolSilo = object_put( + let _: views::IpPoolSiloLink = object_put( client, &format!("/v1/system/ip-pools/pool2/silos/{}", DEFAULT_SILO.id()), ¶ms::IpPoolSiloUpdate { is_default: true }, @@ -3788,11 +3788,11 @@ async fn test_instance_ephemeral_ip_from_orphan_pool( // associate the pool with a different silo and we should get the same // error on instance create - let params = params::IpPoolSiloLink { + let params = params::IpPoolLinkSilo { silo: NameOrId::Name(cptestctx.silo_name.clone()), is_default: false, }; - let _: views::IpPoolSilo = + let _: views::IpPoolSiloLink = object_create(client, "/v1/system/ip-pools/orphan-pool/silos", ¶ms) .await; diff --git a/nexus/tests/integration_tests/ip_pools.rs b/nexus/tests/integration_tests/ip_pools.rs index d97eda9a0b..7843e816fd 100644 --- a/nexus/tests/integration_tests/ip_pools.rs +++ b/nexus/tests/integration_tests/ip_pools.rs @@ -31,7 +31,7 @@ use nexus_test_utils::resource_helpers::objects_list_page_authz; use nexus_test_utils_macros::nexus_test; use nexus_types::external_api::params; use nexus_types::external_api::params::IpPoolCreate; -use nexus_types::external_api::params::IpPoolSiloLink; +use nexus_types::external_api::params::IpPoolLinkSilo; use nexus_types::external_api::params::IpPoolSiloUpdate; use nexus_types::external_api::params::IpPoolUpdate; use nexus_types::external_api::shared::IpRange; @@ -40,8 +40,9 @@ use nexus_types::external_api::shared::Ipv6Range; use nexus_types::external_api::shared::SiloIdentityMode; use nexus_types::external_api::views::IpPool; use nexus_types::external_api::views::IpPoolRange; -use nexus_types::external_api::views::IpPoolSilo; +use nexus_types::external_api::views::IpPoolSiloLink; use nexus_types::external_api::views::Silo; +use nexus_types::external_api::views::SiloIpPool; use nexus_types::identity::Resource; use omicron_common::api::external::IdentityMetadataUpdateParams; use omicron_common::api::external::NameOrId; @@ -261,6 +262,19 @@ async fn test_ip_pool_list_dedupe(cptestctx: &ControlPlaneTestContext) { assert_eq!(ip_pools.len(), 2); assert_eq!(ip_pools[0].identity.id, pool1.id()); assert_eq!(ip_pools[1].identity.id, pool2.id()); + + let silo1_pools = pools_for_silo(client, "silo1").await; + assert_eq!(silo1_pools.len(), 2); + assert_eq!(silo1_pools[0].id(), pool1.id()); + assert_eq!(silo1_pools[1].id(), pool2.id()); + + let silo2_pools = pools_for_silo(client, "silo2").await; + assert_eq!(silo2_pools.len(), 1); + assert_eq!(silo2_pools[0].identity.name, "pool1"); + + let silo3_pools = pools_for_silo(client, "silo3").await; + assert_eq!(silo3_pools.len(), 1); + assert_eq!(silo3_pools[0].identity.name, "pool1"); } /// The internal IP pool, defined by its association with the internal silo, @@ -332,7 +346,7 @@ async fn test_ip_pool_service_no_cud(cptestctx: &ControlPlaneTestContext) { // linking not allowed - // let link_body = params::IpPoolSiloLink { + // let link_body = params::IpPoolLinkSilo { // silo: NameOrId::Name(cptestctx.silo_name.clone()), // is_default: false, // }; @@ -360,9 +374,13 @@ async fn test_ip_pool_silo_link(cptestctx: &ControlPlaneTestContext) { let assocs_p0 = silos_for_pool(client, "p0").await; assert_eq!(assocs_p0.items.len(), 0); + let silo_name = cptestctx.silo_name.as_str(); + let silo_pools = pools_for_silo(client, silo_name).await; + assert_eq!(silo_pools.len(), 0); + // expect 404 on association if the specified silo doesn't exist let nonexistent_silo_id = Uuid::new_v4(); - let params = params::IpPoolSiloLink { + let params = params::IpPoolLinkSilo { silo: NameOrId::Id(nonexistent_silo_id), is_default: false, }; @@ -374,17 +392,20 @@ async fn test_ip_pool_silo_link(cptestctx: &ControlPlaneTestContext) { StatusCode::NOT_FOUND, ) .await; + let not_found = + format!("not found: silo with id \"{nonexistent_silo_id}\""); + assert_eq!(error.message, not_found); - assert_eq!( - error.message, - format!("not found: silo with id \"{nonexistent_silo_id}\"") - ); + // pools for silo also 404s on nonexistent silo + let url = format!("/v1/system/silos/{}/ip-pools", nonexistent_silo_id); + let error = object_get_error(client, &url, StatusCode::NOT_FOUND).await; + assert_eq!(error.message, not_found); // associate by name with silo that exists let silo = NameOrId::Name(cptestctx.silo_name.clone()); let params = - params::IpPoolSiloLink { silo: silo.clone(), is_default: false }; - let _: IpPoolSilo = + params::IpPoolLinkSilo { silo: silo.clone(), is_default: false }; + let _: IpPoolSiloLink = object_create(client, "/v1/system/ip-pools/p0/silos", ¶ms).await; // second attempt to create the same link errors due to conflict @@ -402,26 +423,45 @@ async fn test_ip_pool_silo_link(cptestctx: &ControlPlaneTestContext) { let silo_id = object_get::(client, &silo_url).await.identity.id; let assocs_p0 = silos_for_pool(client, "p0").await; - let silo_link = - IpPoolSilo { ip_pool_id: p0.identity.id, silo_id, is_default: false }; + let silo_link = IpPoolSiloLink { + ip_pool_id: p0.identity.id, + silo_id, + is_default: false, + }; assert_eq!(assocs_p0.items.len(), 1); assert_eq!(assocs_p0.items[0], silo_link); + let silo_pools = pools_for_silo(client, silo_name).await; + assert_eq!(silo_pools.len(), 1); + assert_eq!(silo_pools[0].identity.id, p0.identity.id); + assert_eq!(silo_pools[0].is_default, false); + // associate same silo to other pool by ID instead of name - let link_params = params::IpPoolSiloLink { + let link_params = params::IpPoolLinkSilo { silo: NameOrId::Id(silo_id), is_default: true, }; let url = "/v1/system/ip-pools/p1/silos"; - let _: IpPoolSilo = object_create(client, &url, &link_params).await; + let _: IpPoolSiloLink = object_create(client, &url, &link_params).await; let silos_p1 = silos_for_pool(client, "p1").await; assert_eq!(silos_p1.items.len(), 1); assert_eq!( silos_p1.items[0], - IpPoolSilo { ip_pool_id: p1.identity.id, is_default: true, silo_id } + IpPoolSiloLink { + ip_pool_id: p1.identity.id, + is_default: true, + silo_id + } ); + let silo_pools = pools_for_silo(client, silo_name).await; + assert_eq!(silo_pools.len(), 2); + assert_eq!(silo_pools[0].id(), p0.id()); + assert_eq!(silo_pools[0].is_default, false); + assert_eq!(silo_pools[1].id(), p1.id()); + assert_eq!(silo_pools[1].is_default, true); + // creating a third pool and trying to link it as default: true should fail create_pool(client, "p2").await; let url = "/v1/system/ip-pools/p2/silos"; @@ -446,13 +486,19 @@ async fn test_ip_pool_silo_link(cptestctx: &ControlPlaneTestContext) { "IP Pool cannot be deleted while it is linked to a silo", ); - // unlink silo (doesn't matter that it's a default) + // unlink p1 from silo (doesn't matter that it's a default) let url = format!("/v1/system/ip-pools/p1/silos/{}", cptestctx.silo_name); object_delete(client, &url).await; let silos_p1 = silos_for_pool(client, "p1").await; assert_eq!(silos_p1.items.len(), 0); + // after unlinking p1, only p0 is left + let silo_pools = pools_for_silo(client, silo_name).await; + assert_eq!(silo_pools.len(), 1); + assert_eq!(silo_pools[0].identity.id, p0.identity.id); + assert_eq!(silo_pools[0].is_default, false); + // now we can delete the pool too object_delete(client, "/v1/system/ip-pools/p1").await; } @@ -486,10 +532,10 @@ async fn test_ip_pool_update_default(cptestctx: &ControlPlaneTestContext) { // associate both pools with the test silo let silo = NameOrId::Name(cptestctx.silo_name.clone()); let params = - params::IpPoolSiloLink { silo: silo.clone(), is_default: false }; - let _: IpPoolSilo = + params::IpPoolLinkSilo { silo: silo.clone(), is_default: false }; + let _: IpPoolSiloLink = object_create(client, "/v1/system/ip-pools/p0/silos", ¶ms).await; - let _: IpPoolSilo = + let _: IpPoolSiloLink = object_create(client, "/v1/system/ip-pools/p1/silos", ¶ms).await; // now both are linked to the silo, neither is marked default @@ -503,10 +549,10 @@ async fn test_ip_pool_update_default(cptestctx: &ControlPlaneTestContext) { // make p0 default let params = IpPoolSiloUpdate { is_default: true }; - let _: IpPoolSilo = object_put(client, &p0_silo_url, ¶ms).await; + let _: IpPoolSiloLink = object_put(client, &p0_silo_url, ¶ms).await; // making the same one default again is not an error - let _: IpPoolSilo = object_put(client, &p0_silo_url, ¶ms).await; + let _: IpPoolSiloLink = object_put(client, &p0_silo_url, ¶ms).await; // now p0 is default let silos_p0 = silos_for_pool(client, "p0").await; @@ -524,7 +570,7 @@ async fn test_ip_pool_update_default(cptestctx: &ControlPlaneTestContext) { let params = IpPoolSiloUpdate { is_default: true }; let p1_silo_url = format!("/v1/system/ip-pools/p1/silos/{}", cptestctx.silo_name); - let _: IpPoolSilo = object_put(client, &p1_silo_url, ¶ms).await; + let _: IpPoolSiloLink = object_put(client, &p1_silo_url, ¶ms).await; // p1 is now default let silos_p1 = silos_for_pool(client, "p1").await; @@ -538,7 +584,7 @@ async fn test_ip_pool_update_default(cptestctx: &ControlPlaneTestContext) { // we can also unset default let params = IpPoolSiloUpdate { is_default: false }; - let _: IpPoolSilo = object_put(client, &p1_silo_url, ¶ms).await; + let _: IpPoolSiloLink = object_put(client, &p1_silo_url, ¶ms).await; let silos_p1 = silos_for_pool(client, "p1").await; assert_eq!(silos_p1.items.len(), 1); @@ -589,10 +635,18 @@ fn get_names(pools: Vec) -> Vec { async fn silos_for_pool( client: &ClientTestContext, - id: &str, -) -> ResultsPage { - let url = format!("/v1/system/ip-pools/{}/silos", id); - objects_list_page_authz::(client, &url).await + pool: &str, +) -> ResultsPage { + let url = format!("/v1/system/ip-pools/{}/silos", pool); + objects_list_page_authz::(client, &url).await +} + +async fn pools_for_silo( + client: &ClientTestContext, + silo: &str, +) -> Vec { + let url = format!("/v1/system/silos/{}/ip-pools", silo); + objects_list_page_authz::(client, &url).await.items } async fn create_pool(client: &ClientTestContext, name: &str) -> IpPool { @@ -933,17 +987,20 @@ async fn test_ip_pool_list_in_silo(cptestctx: &ControlPlaneTestContext) { ); create_ip_pool(client, otherpool_name, Some(otherpool_range)).await; - let list = - objects_list_page_authz::(client, "/v1/ip-pools").await.items; + let list = objects_list_page_authz::(client, "/v1/ip-pools") + .await + .items; // only mypool shows up because it's linked to my silo assert_eq!(list.len(), 1); assert_eq!(list[0].identity.name.to_string(), mypool_name); + assert!(list[0].is_default); // fetch the pool directly too let url = format!("/v1/ip-pools/{}", mypool_name); - let pool: IpPool = object_get(client, &url).await; + let pool = object_get::(client, &url).await; assert_eq!(pool.identity.name.as_str(), mypool_name); + assert!(pool.is_default); // fetching the other pool directly 404s let url = format!("/v1/ip-pools/{}", otherpool_name); @@ -978,13 +1035,13 @@ async fn test_ip_range_delete_with_allocated_external_ip_fails( .await; // associate pool with default silo, which is the privileged user's silo - let params = IpPoolSiloLink { + let params = IpPoolLinkSilo { silo: NameOrId::Id(DEFAULT_SILO.id()), is_default: true, }; NexusRequest::objects_post(client, &ip_pool_silos_url, ¶ms) .authn_as(AuthnMode::PrivilegedUser) - .execute_and_parse_unwrap::() + .execute_and_parse_unwrap::() .await; // Add an IP range to the pool diff --git a/nexus/tests/output/nexus_tags.txt b/nexus/tests/output/nexus_tags.txt index 2d842dd930..bd79a9c3e9 100644 --- a/nexus/tests/output/nexus_tags.txt +++ b/nexus/tests/output/nexus_tags.txt @@ -185,6 +185,7 @@ saml_identity_provider_view GET /v1/system/identity-providers/ silo_create POST /v1/system/silos silo_delete DELETE /v1/system/silos/{silo} silo_identity_provider_list GET /v1/system/identity-providers +silo_ip_pool_list GET /v1/system/silos/{silo}/ip-pools silo_list GET /v1/system/silos silo_policy_update PUT /v1/system/silos/{silo}/policy silo_policy_view GET /v1/system/silos/{silo}/policy diff --git a/nexus/types/src/external_api/params.rs b/nexus/types/src/external_api/params.rs index a33bc0b8bb..750e83c2a2 100644 --- a/nexus/types/src/external_api/params.rs +++ b/nexus/types/src/external_api/params.rs @@ -855,7 +855,7 @@ pub struct IpPoolSiloPath { } #[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] -pub struct IpPoolSiloLink { +pub struct IpPoolLinkSilo { pub silo: NameOrId, /// When a pool is the default for a silo, floating IPs and instance /// ephemeral IPs will come from that pool when no other pool is specified. diff --git a/nexus/types/src/external_api/views.rs b/nexus/types/src/external_api/views.rs index cf312d3b82..314dd4ed00 100644 --- a/nexus/types/src/external_api/views.rs +++ b/nexus/types/src/external_api/views.rs @@ -303,10 +303,22 @@ pub struct IpPool { pub identity: IdentityMetadata, } +/// An IP pool in the context of a silo +#[derive(ObjectIdentity, Clone, Debug, Deserialize, Serialize, JsonSchema)] +pub struct SiloIpPool { + #[serde(flatten)] + pub identity: IdentityMetadata, + + /// When a pool is the default for a silo, floating IPs and instance + /// ephemeral IPs will come from that pool when no other pool is specified. + /// There can be at most one default for a given silo. + pub is_default: bool, +} + /// A link between an IP pool and a silo that allows one to allocate IPs from /// the pool within the silo #[derive(Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq)] -pub struct IpPoolSilo { +pub struct IpPoolSiloLink { pub ip_pool_id: Uuid, pub silo_id: Uuid, /// When a pool is the default for a silo, floating IPs and instance diff --git a/openapi/nexus.json b/openapi/nexus.json index a4ba6cbb86..2dd4037430 100644 --- a/openapi/nexus.json +++ b/openapi/nexus.json @@ -2191,7 +2191,7 @@ "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/IpPoolResultsPage" + "$ref": "#/components/schemas/SiloIpPoolResultsPage" } } } @@ -2232,7 +2232,7 @@ "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/IpPool" + "$ref": "#/components/schemas/SiloIpPool" } } } @@ -5039,7 +5039,7 @@ "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/IpPoolSiloResultsPage" + "$ref": "#/components/schemas/IpPoolSiloLinkResultsPage" } } } @@ -5076,7 +5076,7 @@ "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/IpPoolSiloLink" + "$ref": "#/components/schemas/IpPoolLinkSilo" } } }, @@ -5088,7 +5088,7 @@ "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/IpPoolSilo" + "$ref": "#/components/schemas/IpPoolSiloLink" } } } @@ -5144,7 +5144,7 @@ "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/IpPoolSilo" + "$ref": "#/components/schemas/IpPoolSiloLink" } } } @@ -6580,7 +6580,7 @@ "system/silos" ], "summary": "Fetch a silo", - "description": "Fetch a silo by name.", + "description": "Fetch a silo by name or ID.", "operationId": "silo_view", "parameters": [ { @@ -6643,6 +6643,74 @@ } } }, + "/v1/system/silos/{silo}/ip-pools": { + "get": { + "tags": [ + "system/silos" + ], + "summary": "List IP pools available within silo", + "operationId": "silo_ip_pool_list", + "parameters": [ + { + "in": "path", + "name": "silo", + "description": "Name or ID of the silo", + "required": true, + "schema": { + "$ref": "#/components/schemas/NameOrId" + } + }, + { + "in": "query", + "name": "limit", + "description": "Maximum number of items returned by a single call", + "schema": { + "nullable": true, + "type": "integer", + "format": "uint32", + "minimum": 1 + } + }, + { + "in": "query", + "name": "page_token", + "description": "Token returned by previous call to retrieve the subsequent page", + "schema": { + "nullable": true, + "type": "string" + } + }, + { + "in": "query", + "name": "sort_by", + "schema": { + "$ref": "#/components/schemas/NameOrIdSortMode" + } + } + ], + "responses": { + "200": { + "description": "successful operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/SiloIpPoolResultsPage" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + }, + "x-dropshot-pagination": { + "required": [] + } + } + }, "/v1/system/silos/{silo}/policy": { "get": { "tags": [ @@ -12497,6 +12565,22 @@ "name" ] }, + "IpPoolLinkSilo": { + "type": "object", + "properties": { + "is_default": { + "description": "When a pool is the default for a silo, floating IPs and instance ephemeral IPs will come from that pool when no other pool is specified. There can be at most one default for a given silo.", + "type": "boolean" + }, + "silo": { + "$ref": "#/components/schemas/NameOrId" + } + }, + "required": [ + "is_default", + "silo" + ] + }, "IpPoolRange": { "type": "object", "properties": { @@ -12565,7 +12649,7 @@ "items" ] }, - "IpPoolSilo": { + "IpPoolSiloLink": { "description": "A link between an IP pool and a silo that allows one to allocate IPs from the pool within the silo", "type": "object", "properties": { @@ -12588,23 +12672,7 @@ "silo_id" ] }, - "IpPoolSiloLink": { - "type": "object", - "properties": { - "is_default": { - "description": "When a pool is the default for a silo, floating IPs and instance ephemeral IPs will come from that pool when no other pool is specified. There can be at most one default for a given silo.", - "type": "boolean" - }, - "silo": { - "$ref": "#/components/schemas/NameOrId" - } - }, - "required": [ - "is_default", - "silo" - ] - }, - "IpPoolSiloResultsPage": { + "IpPoolSiloLinkResultsPage": { "description": "A single page of results", "type": "object", "properties": { @@ -12612,7 +12680,7 @@ "description": "list of items on this page of results", "type": "array", "items": { - "$ref": "#/components/schemas/IpPoolSilo" + "$ref": "#/components/schemas/IpPoolSiloLink" } }, "next_page": { @@ -13802,6 +13870,72 @@ } ] }, + "SiloIpPool": { + "description": "An IP pool in the context of a silo", + "type": "object", + "properties": { + "description": { + "description": "human-readable free-form text about a resource", + "type": "string" + }, + "id": { + "description": "unique, immutable, system-controlled identifier for each resource", + "type": "string", + "format": "uuid" + }, + "is_default": { + "description": "When a pool is the default for a silo, floating IPs and instance ephemeral IPs will come from that pool when no other pool is specified. There can be at most one default for a given silo.", + "type": "boolean" + }, + "name": { + "description": "unique, mutable, user-controlled identifier for each resource", + "allOf": [ + { + "$ref": "#/components/schemas/Name" + } + ] + }, + "time_created": { + "description": "timestamp when this resource was created", + "type": "string", + "format": "date-time" + }, + "time_modified": { + "description": "timestamp when this resource was last modified", + "type": "string", + "format": "date-time" + } + }, + "required": [ + "description", + "id", + "is_default", + "name", + "time_created", + "time_modified" + ] + }, + "SiloIpPoolResultsPage": { + "description": "A single page of results", + "type": "object", + "properties": { + "items": { + "description": "list of items on this page of results", + "type": "array", + "items": { + "$ref": "#/components/schemas/SiloIpPool" + } + }, + "next_page": { + "nullable": true, + "description": "token used to fetch the next page of results (if any)", + "type": "string" + } + }, + "required": [ + "items" + ] + }, "SiloQuotas": { "description": "A collection of resource counts used to set the virtual capacity of a silo", "type": "object", From e5a3caa09ba43eff5f8b6ef7016fbea0856f0911 Mon Sep 17 00:00:00 2001 From: Sean Klein Date: Mon, 22 Jan 2024 09:23:56 -0800 Subject: [PATCH 04/49] [sled-agent] Encrypt a specific set of U.2 datasets (#4853) This PR does the following: - As a part of processing U.2s during initialization, `ensure_zpool_datasets_are_encrypted` is invoked. This identifies all datasets which should be encrypted (`cockroachdb`, `clickhouse`, `internal_dns`, `external_dns`, `clickhouse_keeper`) and performs a one-way migration from unencrypted to encrypted dataset. - Additionally, during zone launching, the sled agent verifies properties about datasets that it expects should be encrypted. This helps prevent these encrypted dataset from being used before their transfer has finished, and also prevents these zones from ever using unencrypted datasets in the future. - Furthermore, for all new deployments, this PR uses encryption on these datasets by default. --- illumos-utils/src/zfs.rs | 63 +++-- sled-agent/src/services.rs | 51 ++++- sled-agent/src/sled_agent.rs | 2 +- sled-storage/src/dataset.rs | 431 ++++++++++++++++++++++++++++++++++- sled-storage/src/disk.rs | 10 + sled-storage/src/manager.rs | 5 +- 6 files changed, 532 insertions(+), 30 deletions(-) diff --git a/illumos-utils/src/zfs.rs b/illumos-utils/src/zfs.rs index e9554100af..c111955761 100644 --- a/illumos-utils/src/zfs.rs +++ b/illumos-utils/src/zfs.rs @@ -108,12 +108,13 @@ enum GetValueErrorRaw { MissingValue, } -/// Error returned by [`Zfs::get_oxide_value`]. +/// Error returned by [`Zfs::get_oxide_value`] or [`Zfs::get_value`]. #[derive(thiserror::Error, Debug)] -#[error("Failed to get value '{name}' from filesystem {filesystem}: {err}")] +#[error("Failed to get value '{name}' from filesystem {filesystem}")] pub struct GetValueError { filesystem: String, name: String, + #[source] err: GetValueErrorRaw, } @@ -464,28 +465,13 @@ impl Zfs { Zfs::get_value(filesystem_name, &format!("oxide:{}", name)) } + /// Calls "zfs get" with a single value pub fn get_value( filesystem_name: &str, name: &str, ) -> Result { - let mut command = std::process::Command::new(PFEXEC); - let cmd = - command.args(&[ZFS, "get", "-Ho", "value", &name, filesystem_name]); - let output = execute(cmd).map_err(|err| GetValueError { - filesystem: filesystem_name.to_string(), - name: name.to_string(), - err: err.into(), - })?; - let stdout = String::from_utf8_lossy(&output.stdout); - let value = stdout.trim(); - if value == "-" { - return Err(GetValueError { - filesystem: filesystem_name.to_string(), - name: name.to_string(), - err: GetValueErrorRaw::MissingValue, - }); - } - Ok(value.to_string()) + let [value] = Self::get_values(filesystem_name, &[name])?; + Ok(value) } /// List all extant snapshots. @@ -549,6 +535,43 @@ impl Zfs { } } +// These methods don't work with mockall, so they exist in a separate impl block +impl Zfs { + /// Calls "zfs get" to acquire multiple values + pub fn get_values( + filesystem_name: &str, + names: &[&str; N], + ) -> Result<[String; N], GetValueError> { + let mut cmd = std::process::Command::new(PFEXEC); + let all_names = + names.into_iter().map(|n| *n).collect::>().join(","); + cmd.args(&[ZFS, "get", "-Ho", "value", &all_names, filesystem_name]); + let output = execute(&mut cmd).map_err(|err| GetValueError { + filesystem: filesystem_name.to_string(), + name: format!("{:?}", names), + err: err.into(), + })?; + let stdout = String::from_utf8_lossy(&output.stdout); + let values = stdout.trim(); + + const EMPTY_STRING: String = String::new(); + let mut result: [String; N] = [EMPTY_STRING; N]; + + for (i, value) in values.lines().enumerate() { + let value = value.trim(); + if value == "-" { + return Err(GetValueError { + filesystem: filesystem_name.to_string(), + name: names[i].to_string(), + err: GetValueErrorRaw::MissingValue, + }); + } + result[i] = value.to_string(); + } + Ok(result) + } +} + /// A read-only snapshot of a ZFS filesystem. #[derive(Clone, Debug)] pub struct Snapshot { diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index c068515d14..211e602bbf 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -199,6 +199,24 @@ pub enum Error { #[error("Failed to get address: {0}")] GetAddressFailure(#[from] illumos_utils::zone::GetAddressError), + #[error( + "Failed to launch zone {zone} because ZFS value cannot be accessed" + )] + GetZfsValue { + zone: String, + #[source] + source: illumos_utils::zfs::GetValueError, + }, + + #[error("Cannot launch {zone} with {dataset} (saw {prop_name} = {prop_value}, expected {prop_value_expected})")] + DatasetNotReady { + zone: String, + dataset: String, + prop_name: String, + prop_value: String, + prop_value_expected: String, + }, + #[error("NTP zone not ready")] NtpZoneNotReady, @@ -1474,7 +1492,7 @@ impl ServiceManager { ZoneArgs::Omicron(zone_config) => zone_config .zone .dataset_name() - .map(|n| zone::Dataset { name: n.full() }) + .map(|n| zone::Dataset { name: n.full_name() }) .into_iter() .collect(), ZoneArgs::Switch(_) => vec![], @@ -1711,7 +1729,7 @@ impl ServiceManager { dataset.pool_name.clone(), DatasetKind::Crucible, ) - .full(); + .full_name(); let uuid = &Uuid::new_v4().to_string(); let config = PropertyGroupBuilder::new("config") .add_property("datalink", "astring", datalink) @@ -2930,6 +2948,35 @@ impl ServiceManager { // Currently, the zone filesystem should be destroyed between // reboots, so it's fine to make this decision locally. let root = if let Some(dataset) = zone.dataset_name() { + // Check that the dataset is actually ready to be used. + let [zoned, canmount, encryption] = + illumos_utils::zfs::Zfs::get_values( + &dataset.full_name(), + &["zoned", "canmount", "encryption"], + ) + .map_err(|err| Error::GetZfsValue { + zone: zone.zone_name(), + source: err, + })?; + + let check_property = |name, actual, expected| { + if actual != expected { + return Err(Error::DatasetNotReady { + zone: zone.zone_name(), + dataset: dataset.full_name(), + prop_name: String::from(name), + prop_value: actual, + prop_value_expected: String::from(expected), + }); + } + return Ok(()); + }; + check_property("zoned", zoned, "on")?; + check_property("canmount", canmount, "on")?; + if dataset.dataset().dataset_should_be_encrypted() { + check_property("encryption", encryption, "aes-256-gcm")?; + } + // If the zone happens to already manage a dataset, then // we co-locate the zone dataset on the same zpool. // diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index d094643cf9..71fe3584f0 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -611,7 +611,7 @@ impl SledAgent { warn!( self.log, "Failed to load services, will retry in {:?}", delay; - "error" => %err, + "error" => ?err, ); }, ) diff --git a/sled-storage/src/dataset.rs b/sled-storage/src/dataset.rs index a2878af7f6..41b77ea38b 100644 --- a/sled-storage/src/dataset.rs +++ b/sled-storage/src/dataset.rs @@ -18,7 +18,9 @@ use rand::distributions::{Alphanumeric, DistString}; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; use sled_hardware::DiskVariant; -use slog::{info, Logger}; +use slog::{debug, info, Logger}; +use std::process::Stdio; +use std::str::FromStr; use std::sync::OnceLock; pub const INSTALL_DATASET: &'static str = "install"; @@ -138,16 +140,57 @@ pub enum DatasetKind { InternalDns, } +impl DatasetKind { + pub fn dataset_should_be_encrypted(&self) -> bool { + match self { + // We encrypt all datasets except Crucible. + // + // Crucible already performs encryption internally, and we + // avoid double-encryption. + DatasetKind::Crucible => false, + _ => true, + } + } +} + +#[derive(Debug, thiserror::Error)] +pub enum DatasetKindParseError { + #[error("Dataset unknown: {0}")] + UnknownDataset(String), +} + +impl FromStr for DatasetKind { + type Err = DatasetKindParseError; + + fn from_str(s: &str) -> Result { + use DatasetKind::*; + let kind = match s { + "crucible" => Crucible, + "cockroachdb" => CockroachDb, + "clickhouse" => Clickhouse, + "clickhouse_keeper" => ClickhouseKeeper, + "external_dns" => ExternalDns, + "internal_dns" => InternalDns, + _ => { + return Err(DatasetKindParseError::UnknownDataset( + s.to_string(), + )) + } + }; + Ok(kind) + } +} + impl std::fmt::Display for DatasetKind { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { use DatasetKind::*; let s = match self { Crucible => "crucible", - CockroachDb { .. } => "cockroachdb", + CockroachDb => "cockroachdb", Clickhouse => "clickhouse", ClickhouseKeeper => "clickhouse_keeper", - ExternalDns { .. } => "external_dns", - InternalDns { .. } => "internal_dns", + ExternalDns => "external_dns", + InternalDns => "internal_dns", }; write!(f, "{}", s) } @@ -176,7 +219,28 @@ impl DatasetName { &self.kind } - pub fn full(&self) -> String { + /// Returns the full name of the dataset, as would be returned from + /// "zfs get" or "zfs list". + /// + /// If this dataset should be encrypted, this automatically adds the + /// "crypt" dataset component. + pub fn full_name(&self) -> String { + // Currently, we encrypt all datasets except Crucible. + // + // Crucible already performs encryption internally, and we + // avoid double-encryption. + if self.kind.dataset_should_be_encrypted() { + self.full_encrypted_name() + } else { + self.full_unencrypted_name() + } + } + + fn full_encrypted_name(&self) -> String { + format!("{}/crypt/{}", self.pool_name, self.kind) + } + + fn full_unencrypted_name(&self) -> String { format!("{}/{}", self.pool_name, self.kind) } } @@ -201,6 +265,8 @@ pub enum DatasetError { #[source] err: Box, }, + #[error("Failed to make datasets encrypted")] + EncryptionMigration(#[from] DatasetEncryptionMigrationError), } /// Ensure that the zpool contains all the datasets we would like it to @@ -364,6 +430,361 @@ pub(crate) async fn ensure_zpool_has_datasets( Ok(()) } +#[derive(Debug, thiserror::Error)] +pub enum DatasetEncryptionMigrationError { + #[error(transparent)] + IoError(#[from] std::io::Error), + + #[error("Failed to run command")] + FailedCommand { command: String, stderr: Option }, + + #[error("Cannot create new encrypted dataset")] + DatasetCreation(#[from] illumos_utils::zfs::EnsureFilesystemError), + + #[error("Missing stdout stream during 'zfs send' command")] + MissingStdoutForZfsSend, +} + +fn status_ok_or_get_stderr( + command: &tokio::process::Command, + output: &std::process::Output, +) -> Result<(), DatasetEncryptionMigrationError> { + if !output.status.success() { + let stdcmd = command.as_std(); + return Err(DatasetEncryptionMigrationError::FailedCommand { + command: format!( + "{:?} {:?}", + stdcmd.get_program(), + stdcmd + .get_args() + .collect::>() + .join(std::ffi::OsStr::new(" ")) + ), + stderr: Some(String::from_utf8_lossy(&output.stderr).to_string()), + }); + } + Ok(()) +} + +/// Migrates unencrypted datasets to their encrypted formats. +pub(crate) async fn ensure_zpool_datasets_are_encrypted( + log: &Logger, + zpool_name: &ZpoolName, +) -> Result<(), DatasetEncryptionMigrationError> { + info!(log, "Looking for unencrypted datasets in {zpool_name}"); + let unencrypted_datasets = + find_all_unencrypted_datasets_directly_within_pool(&log, &zpool_name) + .await?; + + // TODO: Could do this in parallel? + for dataset in unencrypted_datasets { + let log = &log.new(slog::o!("dataset" => dataset.clone())); + info!(log, "Found unencrypted dataset"); + + ensure_zpool_dataset_is_encrypted(&log, &zpool_name, &dataset).await?; + } + Ok(()) +} + +async fn find_all_unencrypted_datasets_directly_within_pool( + log: &Logger, + zpool_name: &ZpoolName, +) -> Result, DatasetEncryptionMigrationError> { + let mut command = tokio::process::Command::new(illumos_utils::zfs::ZFS); + let pool_name = zpool_name.to_string(); + let cmd = command.args(&[ + "list", + "-rHo", + "name,encryption", + "-d", + "1", + &pool_name, + ]); + let output = cmd.output().await?; + status_ok_or_get_stderr(&cmd, &output)?; + + let stdout = String::from_utf8_lossy(&output.stdout); + let lines = stdout.trim().split('\n'); + + let mut unencrypted_datasets = vec![]; + for line in lines { + let mut iter = line.split_whitespace(); + let Some(dataset) = iter.next() else { + continue; + }; + let log = log.new(slog::o!("dataset" => dataset.to_string())); + + let Some(encryption) = iter.next() else { + continue; + }; + + // We don't bother checking HOW the dataset is encrypted, just that it + // IS encrypted somehow. The sled agent is slightly more opinionated, as + // it looks for "aes-256-gcm" explicitly, but we currently don't plan on + // providing support for migrating between encryption schemes + // automatically. + let encrypted = match encryption { + "off" | "-" => false, + _ => true, + }; + if encrypted { + debug!(log, "Found dataset, but it is already encrypted"); + continue; + } + debug!(log, "Found dataset, and it isn't encrypted"); + if let Some(dataset) = + dataset.strip_prefix(&format!("{pool_name}/")).map(String::from) + { + unencrypted_datasets.push(dataset); + } + } + Ok(unencrypted_datasets) +} + +// Precondition: +// - We found the dataset as a direct descendant of "zpool_name", which +// has encryption set to "off". +// +// "dataset" does not include the zpool prefix; format!("{zpool_name}/dataset") +// would be the full name of the unencrypted dataset. +async fn ensure_zpool_dataset_is_encrypted( + log: &Logger, + zpool_name: &ZpoolName, + unencrypted_dataset: &str, +) -> Result<(), DatasetEncryptionMigrationError> { + let Ok(kind) = DatasetKind::from_str(&unencrypted_dataset) else { + info!(log, "Unrecognized dataset kind"); + return Ok(()); + }; + info!(log, "Dataset recognized"); + let unencrypted_dataset = format!("{zpool_name}/{unencrypted_dataset}"); + + if !kind.dataset_should_be_encrypted() { + info!(log, "Dataset should not be encrypted"); + return Ok(()); + } + info!(log, "Dataset should be encrypted"); + + let encrypted_dataset = DatasetName::new(zpool_name.clone(), kind); + let encrypted_dataset = encrypted_dataset.full_name(); + + let (unencrypted_dataset_exists, encrypted_dataset_exists) = ( + dataset_exists(&unencrypted_dataset).await?, + dataset_exists(&encrypted_dataset).await?, + ); + + match (unencrypted_dataset_exists, encrypted_dataset_exists) { + (false, _) => { + // In this case, there is no unencrypted dataset! Bail out, there is + // nothing to transfer. + return Ok(()); + } + (true, true) => { + // In this case, the following is true: + // - An unencrypted dataset exists + // - An encrypted dataset exists + // + // This is indicative of an incomplete transfer from "old" -> "new". + // If we managed to create the encrypted dataset, and got far enough to + // rename to it's "non-tmp" location, then pick up where we left off: + // - Mark the encrypted dataset as usable + // - Remove the unencrypted dataset + info!( + log, + "Dataset already has encrypted variant, resuming migration" + ); + return finalize_encryption_migration( + &log, + &encrypted_dataset, + &unencrypted_dataset, + ) + .await; + } + (true, false) => { + // This is the "normal" transfer case, initially: We have an + // unencrypted dataset that should become encrypted. + info!(log, "Dataset has not yet been encrypted"); + } + } + + let snapshot_name = |dataset: &str| format!("{dataset}@migration"); + + // A snapshot taken to provide a point-in-time view of the dataset for + // copying. + let unencrypted_dataset_snapshot = snapshot_name(&unencrypted_dataset); + // A "temporary" name for the encrypted target dataset. + let encrypted_dataset_tmp = format!("{}-tmp", encrypted_dataset); + // A snapshot that is automatically generated by "zfs receive". + let encrypted_dataset_tmp_snapshot = snapshot_name(&encrypted_dataset_tmp); + + // Get rid of snapshots and temporary datasets. + // + // This removes work of any prior sled agents that might have failed halfway + // through this operation. + let _ = zfs_destroy(&unencrypted_dataset_snapshot).await; + let _ = zfs_destroy(&encrypted_dataset_tmp).await; + + zfs_create_snapshot(&unencrypted_dataset_snapshot).await?; + info!(log, "Encrypted dataset snapshotted"); + + // Transfer to a "tmp" dataset that's encrypted, but not mountable. + // + // This makes it clear it's a "work-in-progress" dataset until the transfer + // has fully completed. + zfs_transfer_to_unmountable_dataset( + &unencrypted_dataset_snapshot, + &encrypted_dataset_tmp, + ) + .await?; + info!(log, "Dataset transferred to encrypted (temporary) location"); + + zfs_destroy(&unencrypted_dataset_snapshot).await?; + zfs_destroy(&encrypted_dataset_tmp_snapshot).await?; + info!(log, "Removed snapshots"); + + // We tragically cannot "zfs rename" any datasets with "zoned=on". + // + // We perform the rename first, then set "zoned=on" with "canmount=on". + // This prevents the dataset from being used by zones until these properties + // have finally been set. + zfs_rename(&encrypted_dataset_tmp, &encrypted_dataset).await?; + + return finalize_encryption_migration( + &log, + &encrypted_dataset, + &unencrypted_dataset, + ) + .await; +} + +// Returns true if the dataset exists. +async fn dataset_exists( + dataset: &str, +) -> Result { + let mut command = tokio::process::Command::new(illumos_utils::zfs::ZFS); + let cmd = command.args(&["list", "-H", dataset]); + Ok(cmd.status().await?.success()) +} + +// Destroys the dataset and all children, recursively. +async fn zfs_destroy( + dataset: &str, +) -> Result<(), DatasetEncryptionMigrationError> { + let mut command = tokio::process::Command::new(illumos_utils::zfs::ZFS); + let cmd = command.args(&["destroy", "-r", dataset]); + let output = cmd.output().await?; + status_ok_or_get_stderr(&cmd, &output)?; + Ok(()) +} + +// Creates a snapshot named "dataset_snapshot". +async fn zfs_create_snapshot( + dataset_snapshot: &str, +) -> Result<(), DatasetEncryptionMigrationError> { + let mut command = tokio::process::Command::new(illumos_utils::zfs::ZFS); + let cmd = command.args(&["snapshot", dataset_snapshot]); + let output = cmd.output().await?; + status_ok_or_get_stderr(&cmd, &output)?; + Ok(()) +} + +// Uses "zfs send" and "zfs receive" to create an unmountable, unzoned dataset. +// +// These properties are set to allow the caller to continue manipulating the +// dataset (via rename, setting other properties, etc) before it's used. +async fn zfs_transfer_to_unmountable_dataset( + from: &str, + to: &str, +) -> Result<(), DatasetEncryptionMigrationError> { + let mut command = tokio::process::Command::new(illumos_utils::zfs::ZFS); + let sender_cmd = command + .args(&["send", from]) + .stdout(Stdio::piped()) + .stderr(Stdio::piped()); + let mut sender = sender_cmd.spawn()?; + + let Some(sender_stdout) = sender.stdout.take() else { + return Err(DatasetEncryptionMigrationError::MissingStdoutForZfsSend); + }; + let sender_stdout: Stdio = sender_stdout.try_into().map_err(|_| { + DatasetEncryptionMigrationError::MissingStdoutForZfsSend + })?; + + let mut command = tokio::process::Command::new(illumos_utils::zfs::ZFS); + let receiver_cmd = command + .args(&[ + "receive", + "-o", + "mountpoint=/data", + "-o", + "canmount=off", + "-o", + "zoned=off", + to, + ]) + .stdin(sender_stdout) + .stderr(Stdio::piped()); + let receiver = receiver_cmd.spawn()?; + + let output = receiver.wait_with_output().await?; + status_ok_or_get_stderr(&receiver_cmd, &output)?; + let output = sender.wait_with_output().await?; + status_ok_or_get_stderr(&sender_cmd, &output)?; + + Ok(()) +} + +// Sets "properties" on "dataset". +// +// Each member of "properties" should have the form "key=value". +async fn zfs_set( + dataset: &str, + properties: &[&str], +) -> Result<(), DatasetEncryptionMigrationError> { + let mut command = tokio::process::Command::new(illumos_utils::zfs::ZFS); + let cmd = command.arg("set"); + for property in properties { + cmd.arg(property); + } + cmd.arg(dataset); + + let output = cmd.output().await?; + status_ok_or_get_stderr(&cmd, &output)?; + Ok(()) +} + +// Sets properties to make a dataset "ready to be used by zones". +async fn zfs_set_zoned_and_mountable( + dataset: &str, +) -> Result<(), DatasetEncryptionMigrationError> { + zfs_set(&dataset, &["zoned=on", "canmount=on"]).await +} + +// Renames a dataset from "from" to "to". +async fn zfs_rename( + from: &str, + to: &str, +) -> Result<(), DatasetEncryptionMigrationError> { + let mut command = tokio::process::Command::new(illumos_utils::zfs::ZFS); + let cmd = command.args(&["rename", from, to]); + let output = cmd.output().await?; + status_ok_or_get_stderr(&cmd, &output)?; + Ok(()) +} + +async fn finalize_encryption_migration( + log: &Logger, + encrypted_dataset: &str, + unencrypted_dataset: &str, +) -> Result<(), DatasetEncryptionMigrationError> { + zfs_set_zoned_and_mountable(&encrypted_dataset).await?; + info!(log, "Dataset is encrypted, zoned, and mountable"; "dataset" => encrypted_dataset); + + zfs_destroy(&unencrypted_dataset).await?; + info!(log, "Destroyed unencrypted dataset"; "dataset" => unencrypted_dataset); + Ok(()) +} + #[cfg(test)] mod test { use super::*; diff --git a/sled-storage/src/disk.rs b/sled-storage/src/disk.rs index f5209def77..cfe189a409 100644 --- a/sled-storage/src/disk.rs +++ b/sled-storage/src/disk.rs @@ -151,6 +151,16 @@ impl Disk { key_requester, ) .await?; + + if matches!(disk.variant(), DiskVariant::U2) { + dataset::ensure_zpool_datasets_are_encrypted( + log, + disk.zpool_name(), + ) + .await + .map_err(|err| crate::dataset::DatasetError::from(err))?; + } + Ok(disk) } diff --git a/sled-storage/src/manager.rs b/sled-storage/src/manager.rs index 50b1c44148..547c4ec2d7 100644 --- a/sled-storage/src/manager.rs +++ b/sled-storage/src/manager.rs @@ -471,7 +471,8 @@ impl StorageManager { Err(err) => { error!( self.log, - "Persistent error: {err}: not queueing disk"; + "Persistent error:not queueing disk"; + "err" => ?err, "disk_id" => ?raw_disk.identity() ); Err(err.into()) @@ -575,7 +576,7 @@ impl StorageManager { } let zoned = true; - let fs_name = &request.dataset_name.full(); + let fs_name = &request.dataset_name.full_name(); let do_format = true; let encryption_details = None; let size_details = None; From e8b6dd1dc4e7abb39276ad347bdf1ac08171862d Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Mon, 22 Jan 2024 18:33:01 +0000 Subject: [PATCH 05/49] Add floating IP check to project delete (#4862) The main floating IP PR missed the check in `project_delete` for any existing child floating IP objects. This commit adds in this check so that a project cannot be deleted while any FIPs remain (matching other project-scoped resources), as well as a matching integration test. Closes #4854. --- nexus/db-queries/src/db/datastore/project.rs | 2 ++ nexus/tests/integration_tests/projects.rs | 34 ++++++++++++++++++++ 2 files changed, 36 insertions(+) diff --git a/nexus/db-queries/src/db/datastore/project.rs b/nexus/db-queries/src/db/datastore/project.rs index e3927fdfc1..08647b421e 100644 --- a/nexus/db-queries/src/db/datastore/project.rs +++ b/nexus/db-queries/src/db/datastore/project.rs @@ -221,6 +221,7 @@ impl DataStore { generate_fn_to_ensure_none_in_project!(instance, name, String); generate_fn_to_ensure_none_in_project!(disk, name, String); + generate_fn_to_ensure_none_in_project!(floating_ip, name, String); generate_fn_to_ensure_none_in_project!(project_image, name, String); generate_fn_to_ensure_none_in_project!(snapshot, name, String); generate_fn_to_ensure_none_in_project!(vpc, name, String); @@ -237,6 +238,7 @@ impl DataStore { // Verify that child resources do not exist. self.ensure_no_instances_in_project(opctx, authz_project).await?; self.ensure_no_disks_in_project(opctx, authz_project).await?; + self.ensure_no_floating_ips_in_project(opctx, authz_project).await?; self.ensure_no_project_images_in_project(opctx, authz_project).await?; self.ensure_no_snapshots_in_project(opctx, authz_project).await?; self.ensure_no_vpcs_in_project(opctx, authz_project).await?; diff --git a/nexus/tests/integration_tests/projects.rs b/nexus/tests/integration_tests/projects.rs index d9d6ceef5b..60195e5902 100644 --- a/nexus/tests/integration_tests/projects.rs +++ b/nexus/tests/integration_tests/projects.rs @@ -9,6 +9,7 @@ use http::StatusCode; use nexus_test_utils::http_testing::AuthnMode; use nexus_test_utils::http_testing::NexusRequest; use nexus_test_utils::http_testing::RequestBuilder; +use nexus_test_utils::resource_helpers::create_floating_ip; use nexus_test_utils::resource_helpers::{ create_default_ip_pool, create_disk, create_project, create_vpc, object_create, project_get, projects_list, DiskTest, @@ -209,6 +210,39 @@ async fn test_project_deletion_with_disk(cptestctx: &ControlPlaneTestContext) { delete_project(&url, &client).await; } +#[nexus_test] +async fn test_project_deletion_with_floating_ip( + cptestctx: &ControlPlaneTestContext, +) { + let client = &cptestctx.external_client; + + let _test = DiskTest::new(&cptestctx).await; + + // Create a project that we'll use for testing. + let name = "springfield-squidport"; + let url = format!("/v1/projects/{}", name); + + create_default_ip_pool(&client).await; + + create_project(&client, &name).await; + delete_project_default_subnet(&name, &client).await; + delete_project_default_vpc(&name, &client).await; + let fip = create_floating_ip(&client, "my-fip", &name, None, None).await; + assert_eq!( + "project to be deleted contains a floating ip: my-fip", + delete_project_expect_fail(&url, &client).await, + ); + let disk_url = + super::external_ips::get_floating_ip_by_id_url(&fip.identity.id); + NexusRequest::object_delete(&client, &disk_url) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .expect("failed to delete floating IP"); + + delete_project(&url, &client).await; +} + #[nexus_test] async fn test_project_deletion_with_image(cptestctx: &ControlPlaneTestContext) { let client = &cptestctx.external_client; From 030adce411fe37c9e2d3c70ee5a6cdbdfd49f3f9 Mon Sep 17 00:00:00 2001 From: Rain Date: Mon, 22 Jan 2024 14:57:01 -0800 Subject: [PATCH 06/49] [update-engine] reuse parent_key_and_child_index from existing steps (#4858) During a dogfood mupdate on 2024-01-18, I saw some really weird output with wicket's `rack-update attach` that looked like: ``` [sled 8 00:20:48] Running .... 12a 5s 1/3) Writing host phase 2 to slot B ``` The "5s" is all wrong -- the letter there is supposed to indicate, for an engine that has one or more nested engines, the index of that nested engine. So for example, if a step 12 has two nested engines, they would be marked "12a" and "12b". "5s" indicates that that's the 19th nested engine for that step, and we definitely have nowhere near 19 nested engines for a step anywhere in wicketd. This turned out to be because we weren't reusing child indexes from earlier steps in the sequence. Fix that, and also add: * tests which catch this issue * some dev-only code to wicket which made it easy to debug this locally --- update-engine/src/buffer.rs | 185 +++++++++++++++++++++++++------- update-engine/src/test_utils.rs | 31 ++++-- wicket/src/cli/rack_update.rs | 179 +++++++++++++++++++++++++++++- 3 files changed, 350 insertions(+), 45 deletions(-) diff --git a/update-engine/src/buffer.rs b/update-engine/src/buffer.rs index 36a0626963..04363ffc26 100644 --- a/update-engine/src/buffer.rs +++ b/update-engine/src/buffer.rs @@ -262,45 +262,59 @@ impl EventStore { root_event_index, event.total_elapsed, ); + if let Some(new_execution) = actions.new_execution { if new_execution.nest_level == 0 { self.root_execution_id = Some(new_execution.execution_id); } - // If there's a parent key, then what's the child index? - let parent_key_and_child_index = - if let Some(parent_key) = new_execution.parent_key { - match self.map.get_mut(&parent_key) { - Some(parent_data) => { - let child_index = parent_data.child_executions_seen; - parent_data.child_executions_seen += 1; - Some((parent_key, child_index)) - } - None => { - // This should never happen -- it indicates that the - // parent key was unknown. This can happen if we - // didn't receive an event regarding a parent - // execution being started. + + if let Some((first_step_key, ..)) = + new_execution.steps_to_add.first() + { + // Do we already know about this execution? If so, grab the parent + // key and child index from the first step. + let parent_key_and_child_index = + if let Some(data) = self.map.get(first_step_key) { + data.parent_key_and_child_index + } else { + if let Some(parent_key) = new_execution.parent_key { + match self.map.get_mut(&parent_key) { + Some(parent_data) => { + let child_index = + parent_data.child_executions_seen; + parent_data.child_executions_seen += 1; + Some((parent_key, child_index)) + } + None => { + // This should never happen -- it indicates that the + // parent key was unknown. This can happen if we + // didn't receive an event regarding a parent + // execution being started. + None + } + } + } else { None } - } - } else { - None - }; - let total_steps = new_execution.steps_to_add.len(); - for (new_step_key, new_step, sort_key) in new_execution.steps_to_add - { - // These are brand new steps so their keys shouldn't exist in the - // map. But if they do, don't overwrite them. - self.map.entry(new_step_key).or_insert_with(|| { - EventBufferStepData::new( - new_step, - parent_key_and_child_index, - sort_key, - new_execution.nest_level, - total_steps, - root_event_index, - ) - }); + }; + + let total_steps = new_execution.steps_to_add.len(); + for (new_step_key, new_step, sort_key) in + new_execution.steps_to_add + { + // These are brand new steps so their keys shouldn't exist in the + // map. But if they do, don't overwrite them. + self.map.entry(new_step_key).or_insert_with(|| { + EventBufferStepData::new( + new_step, + parent_key_and_child_index, + sort_key, + new_execution.nest_level, + total_steps, + root_event_index, + ) + }); + } } } @@ -1808,6 +1822,7 @@ mod tests { struct BufferTestContext { root_execution_id: ExecutionId, generated_events: Vec>, + // Data derived from generated_events. generated_step_events: Vec>, } @@ -1885,9 +1900,95 @@ mod tests { Event::Progress(_) => None, }) .collect(); + + // Create two buffer and feed events. + // * The incremental buffer has each event fed into it one-by-one. + // * The "idempotent" buffer has events 0, 0..1, 0..2, 0..3, etc + // fed into it one by one. The name is because this is really + // testing the idempotency of the event buffer. + + println!("** generating incremental and idempotent buffers **"); + let mut incremental_buffer = EventBuffer::default(); + let mut idempotent_buffer = EventBuffer::default(); + for event in &generated_events { + incremental_buffer.add_event(event.clone()); + let report = incremental_buffer.generate_report(); + idempotent_buffer.add_event_report(report); + } + + // Check that the two buffers above are similar. + Self::ensure_buffers_similar( + &incremental_buffer, + &idempotent_buffer, + ) + .expect("idempotent buffer is similar to incremental buffer"); + + // Also generate a buffer with a single event report. + println!("** generating oneshot buffer **"); + let mut oneshot_buffer = EventBuffer::default(); + oneshot_buffer + .add_event_report(incremental_buffer.generate_report()); + + Self::ensure_buffers_similar(&incremental_buffer, &oneshot_buffer) + .expect("oneshot buffer is similar to incremental buffer"); + Self { root_execution_id, generated_events, generated_step_events } } + fn ensure_buffers_similar( + buf1: &EventBuffer, + buf2: &EventBuffer, + ) -> anyhow::Result<()> { + // The two should have the same step keys. + let buf1_steps = buf1.steps(); + let buf2_steps = buf2.steps(); + + ensure!( + buf1_steps.as_slice().len() == buf2_steps.as_slice().len(), + "buffers have same number of steps ({} vs {})", + buf1_steps.as_slice().len(), + buf2_steps.as_slice().len() + ); + + for (ix, ((k1, data1), (k2, data2))) in buf1_steps + .as_slice() + .iter() + .zip(buf2_steps.as_slice().iter()) + .enumerate() + { + ensure!( + k1 == k2, + "buffers have same step keys at index {} ({:?} vs {:?})", + ix, + k1, + k2 + ); + ensure!( + data1.sort_key() == data2.sort_key(), + "buffers have same sort key at index {} ({:?} vs {:?})", + ix, + data1.sort_key(), + data2.sort_key() + ); + ensure!( + data1.parent_key_and_child_index() == data2.parent_key_and_child_index(), + "buffers have same parent key and child index at index {} ({:?} vs {:?})", + ix, + data1.parent_key_and_child_index(), + data2.parent_key_and_child_index(), + ); + ensure!( + data1.nest_level() == data2.nest_level(), + "buffers have same nest level at index {} ({:?} vs {:?})", + ix, + data1.nest_level(), + data2.nest_level(), + ); + } + + Ok(()) + } + /// Runs a test in a scenario where all elements should be seen. /// /// Each event is added `times` times. @@ -2165,10 +2266,10 @@ mod tests { ), "this is the last event so ExecutionStatus must be completed" ); - // There are two nested engines. + // There are three nested engines. ensure!( - summary.len() == 3, - "two nested engines must be defined" + summary.len() == 4, + "three nested engines (plus one root engine) must be defined" ); let (_, nested_summary) = summary @@ -2186,6 +2287,18 @@ mod tests { let (_, nested_summary) = summary .get_index(2) .expect("this is the second nested engine"); + ensure!( + matches!( + &nested_summary.execution_status, + ExecutionStatus::Terminal(info) + if info.kind == TerminalKind::Failed + ), + "for this engine, the ExecutionStatus must be failed" + ); + + let (_, nested_summary) = summary + .get_index(3) + .expect("this is the third nested engine"); ensure!( matches!( &nested_summary.execution_status, diff --git a/update-engine/src/test_utils.rs b/update-engine/src/test_utils.rs index b943d1ddfe..539ef28864 100644 --- a/update-engine/src/test_utils.rs +++ b/update-engine/src/test_utils.rs @@ -141,7 +141,24 @@ fn define_test_steps( move |parent_cx| async move { parent_cx .with_nested_engine(|engine| { - define_nested_engine(&parent_cx, engine); + define_nested_engine(&parent_cx, engine, 3, "steps"); + Ok(()) + }) + .await + .expect_err("this is expected to fail"); + + // Define a second nested engine -- this verifies that internal + // buffer indexes match up. + parent_cx + .with_nested_engine(|engine| { + define_nested_engine( + &parent_cx, + engine, + 10, + // The tests in buffer.rs expect the units to be + // "steps" exactly once, so use a different name here. + "steps (again)", + ); Ok(()) }) .await @@ -214,18 +231,20 @@ fn define_test_steps( fn define_nested_engine<'a>( parent_cx: &'a StepContext, engine: &mut UpdateEngine<'a, TestSpec>, + start_id: usize, + step_units: &'static str, ) { engine .new_step( "nested-foo".to_owned(), - 4, + start_id + 1, "Nested step 1", move |cx| async move { parent_cx .send_progress(StepProgress::with_current_and_total( 1, 3, - "steps", + step_units, Default::default(), )) .await; @@ -239,7 +258,7 @@ fn define_nested_engine<'a>( engine .new_step::<_, _, ()>( "nested-bar".to_owned(), - 5, + start_id + 2, "Nested step 2 (fails)", move |cx| async move { // This is used by NestedProgressCheck below. @@ -247,7 +266,7 @@ fn define_nested_engine<'a>( .send_progress(StepProgress::with_current_and_total( 2, 3, - "steps", + step_units, Default::default(), )) .await; @@ -263,7 +282,7 @@ fn define_nested_engine<'a>( .send_progress(StepProgress::with_current_and_total( 3, 3, - "steps", + step_units, Default::default(), )) .await; diff --git a/wicket/src/cli/rack_update.rs b/wicket/src/cli/rack_update.rs index cac0f09ee5..ccacea0e38 100644 --- a/wicket/src/cli/rack_update.rs +++ b/wicket/src/cli/rack_update.rs @@ -8,23 +8,29 @@ use std::{ collections::{BTreeMap, BTreeSet}, + io::{BufReader, Write}, net::SocketAddrV6, time::Duration, }; use anyhow::{anyhow, bail, Context, Result}; +use camino::Utf8PathBuf; use clap::{Args, Subcommand, ValueEnum}; use slog::Logger; use tokio::{sync::watch, task::JoinHandle}; use update_engine::{ display::{GroupDisplay, LineDisplayStyles}, - NestedError, + EventBuffer, NestedError, }; use wicket_common::{ - rack_update::ClearUpdateStateResponse, update_events::EventReport, + rack_update::ClearUpdateStateResponse, + update_events::{EventReport, WicketdEngineSpec}, WICKETD_TIMEOUT, }; -use wicketd_client::types::{ClearUpdateStateParams, StartUpdateParams}; +use wicketd_client::types::{ + ClearUpdateStateParams, GetArtifactsAndEventReportsResponse, + StartUpdateParams, +}; use crate::{ cli::GlobalOpts, @@ -41,10 +47,22 @@ use super::command::CommandOutput; pub(crate) enum RackUpdateArgs { /// Start one or more updates. Start(StartRackUpdateArgs), + /// Attach to one or more running updates. Attach(AttachArgs), + /// Clear updates. Clear(ClearArgs), + + /// Dump artifacts and event reports from wicketd. + /// + /// Debug-only, intended for development. + DebugDump(DumpArgs), + + /// Replay update logs from a dump file. + /// + /// Debug-only, intended for development. + DebugReplay(ReplayArgs), } impl RackUpdateArgs { @@ -65,6 +83,12 @@ impl RackUpdateArgs { RackUpdateArgs::Clear(args) => { args.exec(log, wicketd_addr, global_opts, output).await } + RackUpdateArgs::DebugDump(args) => { + args.exec(log, wicketd_addr).await + } + RackUpdateArgs::DebugReplay(args) => { + args.exec(log, global_opts, output) + } } } } @@ -380,6 +404,155 @@ async fn do_clear_update_state( Ok(response) } +#[derive(Debug, Args)] +pub(crate) struct DumpArgs { + /// Pretty-print JSON output. + #[clap(long)] + pretty: bool, +} + +impl DumpArgs { + async fn exec(self, log: Logger, wicketd_addr: SocketAddrV6) -> Result<()> { + let client = create_wicketd_client(&log, wicketd_addr, WICKETD_TIMEOUT); + + let response = client + .get_artifacts_and_event_reports() + .await + .context("error calling get_artifacts_and_event_reports")?; + let response = response.into_inner(); + + // Return the response as a JSON object. + if self.pretty { + serde_json::to_writer_pretty(std::io::stdout(), &response) + .context("error writing to stdout")?; + } else { + serde_json::to_writer(std::io::stdout(), &response) + .context("error writing to stdout")?; + } + Ok(()) + } +} + +#[derive(Debug, Args)] +pub(crate) struct ReplayArgs { + /// The dump file to replay. + /// + /// This should be the output of `rack-update debug-dump`, or something + /// like . + file: Utf8PathBuf, + + /// How to feed events into the display. + #[clap(long, value_enum, default_value_t)] + strategy: ReplayStrategy, + + #[clap(flatten)] + component_ids: ComponentIdSelector, +} + +impl ReplayArgs { + fn exec( + self, + log: Logger, + global_opts: GlobalOpts, + output: CommandOutput<'_>, + ) -> Result<()> { + let update_ids = self.component_ids.to_component_ids()?; + let mut display = GroupDisplay::new_with_display( + &log, + update_ids.iter().copied(), + output.stderr, + ); + if global_opts.use_color() { + display.set_styles(LineDisplayStyles::colorized()); + } + + let file = BufReader::new( + std::fs::File::open(&self.file) + .with_context(|| format!("error opening {}", self.file))?, + ); + let response: GetArtifactsAndEventReportsResponse = + serde_json::from_reader(file)?; + let event_reports = + parse_event_report_map(&log, response.event_reports); + + self.strategy.execute(display, event_reports)?; + + Ok(()) + } +} + +#[derive(Clone, Copy, Default, Eq, PartialEq, Hash, Debug, ValueEnum)] +enum ReplayStrategy { + /// Feed all events into the buffer immediately. + #[default] + Oneshot, + + /// Feed events into the buffer one at a time. + Incremental, + + /// Feed events into the buffer as 0, 0..1, 0..2, 0..3 etc. + Idempotent, +} + +impl ReplayStrategy { + fn execute( + self, + mut display: GroupDisplay< + ComponentId, + &mut dyn Write, + WicketdEngineSpec, + >, + event_reports: BTreeMap, + ) -> Result<()> { + match self { + ReplayStrategy::Oneshot => { + // TODO: parallelize this computation? + for (id, event_report) in event_reports { + // If display.add_event_report errors out, it's for a report for a + // component we weren't interested in. Ignore it. + _ = display.add_event_report(&id, event_report); + } + + display.write_events()?; + } + ReplayStrategy::Incremental => { + for (id, event_report) in &event_reports { + let mut buffer = EventBuffer::default(); + let mut last_seen = None; + for event in &event_report.step_events { + buffer.add_step_event(event.clone()); + let report = + buffer.generate_report_since(&mut last_seen); + + // If display.add_event_report errors out, it's for a report for a + // component we weren't interested in. Ignore it. + _ = display.add_event_report(&id, report); + + display.write_events()?; + } + } + } + ReplayStrategy::Idempotent => { + for (id, event_report) in &event_reports { + let mut buffer = EventBuffer::default(); + for event in &event_report.step_events { + buffer.add_step_event(event.clone()); + let report = buffer.generate_report(); + + // If display.add_event_report errors out, it's for a report for a + // component we weren't interested in. Ignore it. + _ = display.add_event_report(&id, report); + + display.write_events()?; + } + } + } + } + + Ok(()) + } +} + #[derive(Clone, Copy, Eq, PartialEq, Hash, Debug, ValueEnum)] enum MessageFormat { Human, From 8726a5fa86e9c5d8423a97cf9c710af54914a687 Mon Sep 17 00:00:00 2001 From: iliana etaoin Date: Tue, 23 Jan 2024 10:06:58 -0800 Subject: [PATCH 07/49] Set version to 6.0.0 (#4841) --- .github/buildomat/jobs/package.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/buildomat/jobs/package.sh b/.github/buildomat/jobs/package.sh index 350ab37233..f0bd764feb 100755 --- a/.github/buildomat/jobs/package.sh +++ b/.github/buildomat/jobs/package.sh @@ -37,7 +37,7 @@ rustc --version # trampoline global zone images. # COMMIT=$(git rev-parse HEAD) -VERSION="5.0.0-0.ci+git${COMMIT:0:11}" +VERSION="6.0.0-0.ci+git${COMMIT:0:11}" echo "$VERSION" >/work/version.txt ptime -m ./tools/install_builder_prerequisites.sh -yp From 4cf2a69699764ec27c1c3715d0b33554b460f417 Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Tue, 23 Jan 2024 14:21:19 -0500 Subject: [PATCH 08/49] [sled-agent] Include error when panicking on start (#4869) We build with `panic=abort`, so even though we `error!` log this error immediately prior to panicking, it's very likely the log won't be flushed by the time we abort. Include the error in the panic message itself so we don't have to fish it out of core files. --- sled-agent/src/bootstrap/server.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sled-agent/src/bootstrap/server.rs b/sled-agent/src/bootstrap/server.rs index 999e4cc0c8..1a9d36c86b 100644 --- a/sled-agent/src/bootstrap/server.rs +++ b/sled-agent/src/bootstrap/server.rs @@ -604,7 +604,7 @@ impl Inner { // This error is unrecoverable, and if returned we'd // end up in maintenance mode anyway. error!(log, "Failed to start sled agent: {err:#}"); - panic!("Failed to start sled agent"); + panic!("Failed to start sled agent: {err:#}"); } }; _ = response_tx.send(response); From 81831382091e8d9eebff1ae4628db1f3d0e241b2 Mon Sep 17 00:00:00 2001 From: David Crespo Date: Tue, 23 Jan 2024 13:27:15 -0600 Subject: [PATCH 09/49] Delete IP pool links on silo delete and IP pool delete (#4867) Closes #4849 Before this change, IP pool delete is blocked if there are any outstanding silo links, which would mean the user would have to unlink every silo before deleting a pool. This is annoying. The main insight here, discussed in #4849, is that once we get past the other checks to ensure that the pool or silo is not in use (pool contains no IP ranges, silo contains no projects), there is no need to block on links, and it is fine for us to delete any associated links when we delete the thing. --- nexus/db-model/src/schema.rs | 2 +- nexus/db-queries/src/db/datastore/ip_pool.rs | 37 +++---- nexus/db-queries/src/db/datastore/silo.rs | 18 ++++ nexus/tests/integration_tests/ip_pools.rs | 38 ++++--- nexus/tests/integration_tests/silos.rs | 105 ++++++++++++++++++- schema/crdb/25.0.0/up.sql | 7 ++ schema/crdb/dbinit.sql | 10 +- 7 files changed, 180 insertions(+), 37 deletions(-) create mode 100644 schema/crdb/25.0.0/up.sql diff --git a/nexus/db-model/src/schema.rs b/nexus/db-model/src/schema.rs index 68991f1d75..2e7493716e 100644 --- a/nexus/db-model/src/schema.rs +++ b/nexus/db-model/src/schema.rs @@ -13,7 +13,7 @@ use omicron_common::api::external::SemverVersion; /// /// This should be updated whenever the schema is changed. For more details, /// refer to: schema/crdb/README.adoc -pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(24, 0, 0); +pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(25, 0, 0); table! { disk (id) { diff --git a/nexus/db-queries/src/db/datastore/ip_pool.rs b/nexus/db-queries/src/db/datastore/ip_pool.rs index 331126ef97..6d3a95af7d 100644 --- a/nexus/db-queries/src/db/datastore/ip_pool.rs +++ b/nexus/db-queries/src/db/datastore/ip_pool.rs @@ -224,15 +224,15 @@ impl DataStore { use db::schema::ip_pool_resource; opctx.authorize(authz::Action::Delete, authz_pool).await?; + let conn = self.pool_connection_authorized(opctx).await?; + // Verify there are no IP ranges still in this pool let range = ip_pool_range::dsl::ip_pool_range .filter(ip_pool_range::dsl::ip_pool_id.eq(authz_pool.id())) .filter(ip_pool_range::dsl::time_deleted.is_null()) .select(ip_pool_range::dsl::id) .limit(1) - .first_async::( - &*self.pool_connection_authorized(opctx).await?, - ) + .first_async::(&*conn) .await .optional() .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; @@ -242,23 +242,6 @@ impl DataStore { )); } - // Verify there are no linked silos - let silo_link = ip_pool_resource::table - .filter(ip_pool_resource::ip_pool_id.eq(authz_pool.id())) - .select(ip_pool_resource::resource_id) - .limit(1) - .first_async::( - &*self.pool_connection_authorized(opctx).await?, - ) - .await - .optional() - .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; - if silo_link.is_some() { - return Err(Error::invalid_request( - "IP Pool cannot be deleted while it is linked to a silo", - )); - } - // Delete the pool, conditional on the rcgen not having changed. This // protects the delete from occuring if clients created a new IP range // in between the above check for children and this query. @@ -268,7 +251,7 @@ impl DataStore { .filter(dsl::id.eq(authz_pool.id())) .filter(dsl::rcgen.eq(db_pool.rcgen)) .set(dsl::time_deleted.eq(now)) - .execute_async(&*self.pool_connection_authorized(opctx).await?) + .execute_async(&*conn) .await .map_err(|e| { public_error_from_diesel( @@ -282,6 +265,18 @@ impl DataStore { "deletion failed due to concurrent modification", )); } + + // Rather than treating outstanding links as a blocker for pool delete, + // just delete them. If we've gotten this far, we know there are no + // ranges in the pool, which means it can't be in use. + + // delete any links from this pool to any other resources (silos) + diesel::delete(ip_pool_resource::table) + .filter(ip_pool_resource::ip_pool_id.eq(authz_pool.id())) + .execute_async(&*conn) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + Ok(()) } diff --git a/nexus/db-queries/src/db/datastore/silo.rs b/nexus/db-queries/src/db/datastore/silo.rs index 2c0c5f3c47..a88a27872f 100644 --- a/nexus/db-queries/src/db/datastore/silo.rs +++ b/nexus/db-queries/src/db/datastore/silo.rs @@ -17,6 +17,7 @@ use crate::db::error::TransactionError; use crate::db::fixed_data::silo::{DEFAULT_SILO, INTERNAL_SILO}; use crate::db::identity::Resource; use crate::db::model::CollectionTypeProvisioned; +use crate::db::model::IpPoolResourceType; use crate::db::model::Name; use crate::db::model::Silo; use crate::db::model::VirtualProvisioningCollection; @@ -547,6 +548,23 @@ impl DataStore { debug!(opctx.log, "deleted {} silo IdPs for silo {}", updated_rows, id); + // delete IP pool links (not IP pools, just the links) + use db::schema::ip_pool_resource; + + let updated_rows = diesel::delete(ip_pool_resource::table) + .filter(ip_pool_resource::resource_id.eq(id)) + .filter( + ip_pool_resource::resource_type.eq(IpPoolResourceType::Silo), + ) + .execute_async(&*conn) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + + debug!( + opctx.log, + "deleted {} IP pool links for silo {}", updated_rows, id + ); + Ok(()) } } diff --git a/nexus/tests/integration_tests/ip_pools.rs b/nexus/tests/integration_tests/ip_pools.rs index 7843e816fd..77a5cd5c8a 100644 --- a/nexus/tests/integration_tests/ip_pools.rs +++ b/nexus/tests/integration_tests/ip_pools.rs @@ -275,6 +275,32 @@ async fn test_ip_pool_list_dedupe(cptestctx: &ControlPlaneTestContext) { let silo3_pools = pools_for_silo(client, "silo3").await; assert_eq!(silo3_pools.len(), 1); assert_eq!(silo3_pools[0].identity.name, "pool1"); + + // this is a great spot to check that deleting a pool cleans up the links! + + // first we have to delete the range, otherwise delete will fail + let url = "/v1/system/ip-pools/pool1/ranges/remove"; + NexusRequest::new( + RequestBuilder::new(client, Method::POST, url) + .body(Some(&range1)) + .expect_status(Some(StatusCode::NO_CONTENT)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .expect("Failed to delete IP range from a pool"); + + object_delete(client, "/v1/system/ip-pools/pool1").await; + + let silo1_pools = pools_for_silo(client, "silo1").await; + assert_eq!(silo1_pools.len(), 1); + assert_eq!(silo1_pools[0].id(), pool2.id()); + + let silo2_pools = pools_for_silo(client, "silo2").await; + assert_eq!(silo2_pools.len(), 0); + + let silo3_pools = pools_for_silo(client, "silo3").await; + assert_eq!(silo3_pools.len(), 0); } /// The internal IP pool, defined by its association with the internal silo, @@ -474,18 +500,6 @@ async fn test_ip_pool_silo_link(cptestctx: &ControlPlaneTestContext) { .await; assert_eq!(error.error_code.unwrap(), "ObjectAlreadyExists"); - // pool delete fails because it is linked to a silo - let error = object_delete_error( - client, - "/v1/system/ip-pools/p1", - StatusCode::BAD_REQUEST, - ) - .await; - assert_eq!( - error.message, - "IP Pool cannot be deleted while it is linked to a silo", - ); - // unlink p1 from silo (doesn't matter that it's a default) let url = format!("/v1/system/ip-pools/p1/silos/{}", cptestctx.silo_name); object_delete(client, &url).await; diff --git a/nexus/tests/integration_tests/silos.rs b/nexus/tests/integration_tests/silos.rs index a5d4b47eaa..86bf01062f 100644 --- a/nexus/tests/integration_tests/silos.rs +++ b/nexus/tests/integration_tests/silos.rs @@ -16,8 +16,9 @@ use nexus_db_queries::db::identity::Asset; use nexus_db_queries::db::lookup::LookupPath; use nexus_test_utils::http_testing::{AuthnMode, NexusRequest, RequestBuilder}; use nexus_test_utils::resource_helpers::{ - create_local_user, create_project, create_silo, grant_iam, object_create, - objects_list_page_authz, projects_list, + create_ip_pool, create_local_user, create_project, create_silo, grant_iam, + link_ip_pool, object_create, object_delete, objects_list_page_authz, + projects_list, }; use nexus_test_utils_macros::nexus_test; use nexus_types::external_api::views::Certificate; @@ -25,6 +26,7 @@ use nexus_types::external_api::views::{ self, IdentityProvider, Project, SamlIdentityProvider, Silo, }; use nexus_types::external_api::{params, shared}; +use omicron_common::address::{IpRange, Ipv4Range}; use omicron_common::api::external::ObjectIdentity; use omicron_common::api::external::{ IdentityMetadataCreateParams, LookupType, Name, @@ -2526,3 +2528,102 @@ async fn test_silo_admin_can_create_certs(cptestctx: &ControlPlaneTestContext) { assert_eq!(silo_certs.len(), 1); assert_eq!(silo_certs[0].identity.id, cert.identity.id); } + +// Test that silo delete cleans up associated groups +#[nexus_test] +async fn test_silo_delete_cleans_up_ip_pool_links( + cptestctx: &ControlPlaneTestContext, +) { + let client = &cptestctx.external_client; + + // Create a silo + let silo1 = + create_silo(&client, "silo1", true, shared::SiloIdentityMode::SamlJit) + .await; + let silo2 = + create_silo(&client, "silo2", true, shared::SiloIdentityMode::SamlJit) + .await; + + // link pool1 to both, link pool2 to silo1 only + let range1 = IpRange::V4( + Ipv4Range::new( + std::net::Ipv4Addr::new(10, 0, 0, 51), + std::net::Ipv4Addr::new(10, 0, 0, 52), + ) + .unwrap(), + ); + create_ip_pool(client, "pool1", Some(range1)).await; + link_ip_pool(client, "pool1", &silo1.identity.id, true).await; + link_ip_pool(client, "pool1", &silo2.identity.id, true).await; + + let range2 = IpRange::V4( + Ipv4Range::new( + std::net::Ipv4Addr::new(10, 0, 0, 53), + std::net::Ipv4Addr::new(10, 0, 0, 54), + ) + .unwrap(), + ); + create_ip_pool(client, "pool2", Some(range2)).await; + link_ip_pool(client, "pool2", &silo1.identity.id, false).await; + + // we want to make sure the links are there before we make sure they're gone + let url = "/v1/system/ip-pools/pool1/silos"; + let links = + objects_list_page_authz::(client, &url).await; + assert_eq!(links.items.len(), 2); + + let url = "/v1/system/ip-pools/pool2/silos"; + let links = + objects_list_page_authz::(client, &url).await; + assert_eq!(links.items.len(), 1); + + // Delete the silo + let url = format!("/v1/system/silos/{}", silo1.identity.id); + object_delete(client, &url).await; + + // Now make sure the links are gone + let url = "/v1/system/ip-pools/pool1/silos"; + let links = + objects_list_page_authz::(client, &url).await; + assert_eq!(links.items.len(), 1); + + let url = "/v1/system/ip-pools/pool2/silos"; + let links = + objects_list_page_authz::(client, &url).await; + assert_eq!(links.items.len(), 0); + + // but the pools are of course still there + let url = "/v1/system/ip-pools"; + let pools = objects_list_page_authz::(client, &url).await; + assert_eq!(pools.items.len(), 2); + assert_eq!(pools.items[0].identity.name, "pool1"); + assert_eq!(pools.items[1].identity.name, "pool2"); + + // nothing prevents us from deleting the pools (except the child ranges -- + // we do have to remove those) + + let url = "/v1/system/ip-pools/pool1/ranges/remove"; + NexusRequest::new( + RequestBuilder::new(client, Method::POST, url) + .body(Some(&range1)) + .expect_status(Some(StatusCode::NO_CONTENT)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .expect("Failed to delete IP range from a pool"); + + let url = "/v1/system/ip-pools/pool2/ranges/remove"; + NexusRequest::new( + RequestBuilder::new(client, Method::POST, url) + .body(Some(&range2)) + .expect_status(Some(StatusCode::NO_CONTENT)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .expect("Failed to delete IP range from a pool"); + + object_delete(client, "/v1/system/ip-pools/pool1").await; + object_delete(client, "/v1/system/ip-pools/pool2").await; +} diff --git a/schema/crdb/25.0.0/up.sql b/schema/crdb/25.0.0/up.sql new file mode 100644 index 0000000000..3c963b9bc6 --- /dev/null +++ b/schema/crdb/25.0.0/up.sql @@ -0,0 +1,7 @@ +-- created solely to prevent a table scan when we delete links on silo delete +CREATE INDEX IF NOT EXISTS ip_pool_resource_id ON omicron.public.ip_pool_resource ( + resource_id +); +CREATE INDEX IF NOT EXISTS ip_pool_resource_ip_pool_id ON omicron.public.ip_pool_resource ( + ip_pool_id +); diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index 2105caabef..f3ca5c4b85 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -1604,6 +1604,14 @@ CREATE UNIQUE INDEX IF NOT EXISTS one_default_ip_pool_per_resource ON omicron.pu ) where is_default = true; +-- created solely to prevent a table scan when we delete links on silo delete +CREATE INDEX IF NOT EXISTS ip_pool_resource_id ON omicron.public.ip_pool_resource ( + resource_id +); +CREATE INDEX IF NOT EXISTS ip_pool_resource_ip_pool_id ON omicron.public.ip_pool_resource ( + ip_pool_id +); + /* * IP Pools are made up of a set of IP ranges, which are start/stop addresses. * Note that these need not be CIDR blocks or well-behaved subnets with a @@ -3258,7 +3266,7 @@ INSERT INTO omicron.public.db_metadata ( version, target_version ) VALUES - ( TRUE, NOW(), NOW(), '24.0.0', NULL) + ( TRUE, NOW(), NOW(), '25.0.0', NULL) ON CONFLICT DO NOTHING; COMMIT; From 624fbba20d7eb754e2c1a3ca7f1c5676fd2f1e0d Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Tue, 23 Jan 2024 19:32:39 +0000 Subject: [PATCH 10/49] Chore: Update OPTE to v0.27.214 (#4868) This PR introduces fixes to TCP state machine handling, which has been causing new TCP flows to hang whenever a source-port is reused under certain circumstances. Most of the relevant details are outlined on oxidecomputer/opte#444, but the gist is that OPTE is far more permissive about TCP packets which are allowed to be sent/received by guests. --- Cargo.lock | 47 ++++++++++++++++++++++++++++++++-------------- Cargo.toml | 4 ++-- tools/opte_version | 2 +- 3 files changed, 36 insertions(+), 17 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5211ecab78..a2d9601a38 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2739,6 +2739,15 @@ dependencies = [ "byteorder", ] +[[package]] +name = "hash32" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "47d60b12902ba28e2730cd37e95b8c9223af2808df9e902d4df49588d1470606" +dependencies = [ + "byteorder", +] + [[package]] name = "hashbrown" version = "0.12.3" @@ -2791,12 +2800,22 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "db04bc24a18b9ea980628ecf00e6c0264f3c1426dac36c00cb49b6fbad8b0743" dependencies = [ "atomic-polyfill", - "hash32", + "hash32 0.2.1", "rustc_version 0.4.0", "spin 0.9.8", "stable_deref_trait", ] +[[package]] +name = "heapless" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0bfb9eb618601c89945a70e254898da93b13be0388091d42117462b265bb3fad" +dependencies = [ + "hash32 0.3.1", + "stable_deref_trait", +] + [[package]] name = "heck" version = "0.3.3" @@ -3174,7 +3193,7 @@ dependencies = [ [[package]] name = "illumos-sys-hdrs" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=4e6e6ab6379fa4bc40f5d0c7340b9f35c45ad6e4#4e6e6ab6379fa4bc40f5d0c7340b9f35c45ad6e4" +source = "git+https://github.com/oxidecomputer/opte?rev=dd2b7b0306d3f01fa09170b8884d402209e49244#dd2b7b0306d3f01fa09170b8884d402209e49244" [[package]] name = "illumos-utils" @@ -3582,7 +3601,7 @@ dependencies = [ [[package]] name = "kstat-macro" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=4e6e6ab6379fa4bc40f5d0c7340b9f35c45ad6e4#4e6e6ab6379fa4bc40f5d0c7340b9f35c45ad6e4" +source = "git+https://github.com/oxidecomputer/opte?rev=dd2b7b0306d3f01fa09170b8884d402209e49244#dd2b7b0306d3f01fa09170b8884d402209e49244" dependencies = [ "quote", "syn 2.0.46", @@ -5323,7 +5342,7 @@ dependencies = [ [[package]] name = "opte" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=4e6e6ab6379fa4bc40f5d0c7340b9f35c45ad6e4#4e6e6ab6379fa4bc40f5d0c7340b9f35c45ad6e4" +source = "git+https://github.com/oxidecomputer/opte?rev=dd2b7b0306d3f01fa09170b8884d402209e49244#dd2b7b0306d3f01fa09170b8884d402209e49244" dependencies = [ "cfg-if", "dyn-clone", @@ -5332,26 +5351,26 @@ dependencies = [ "opte-api", "postcard", "serde", - "smoltcp 0.10.0", + "smoltcp 0.11.0", "version_check", ] [[package]] name = "opte-api" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=4e6e6ab6379fa4bc40f5d0c7340b9f35c45ad6e4#4e6e6ab6379fa4bc40f5d0c7340b9f35c45ad6e4" +source = "git+https://github.com/oxidecomputer/opte?rev=dd2b7b0306d3f01fa09170b8884d402209e49244#dd2b7b0306d3f01fa09170b8884d402209e49244" dependencies = [ "illumos-sys-hdrs", "ipnetwork", "postcard", "serde", - "smoltcp 0.10.0", + "smoltcp 0.11.0", ] [[package]] name = "opte-ioctl" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=4e6e6ab6379fa4bc40f5d0c7340b9f35c45ad6e4#4e6e6ab6379fa4bc40f5d0c7340b9f35c45ad6e4" +source = "git+https://github.com/oxidecomputer/opte?rev=dd2b7b0306d3f01fa09170b8884d402209e49244#dd2b7b0306d3f01fa09170b8884d402209e49244" dependencies = [ "libc", "libnet", @@ -5425,12 +5444,12 @@ dependencies = [ [[package]] name = "oxide-vpc" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=4e6e6ab6379fa4bc40f5d0c7340b9f35c45ad6e4#4e6e6ab6379fa4bc40f5d0c7340b9f35c45ad6e4" +source = "git+https://github.com/oxidecomputer/opte?rev=dd2b7b0306d3f01fa09170b8884d402209e49244#dd2b7b0306d3f01fa09170b8884d402209e49244" dependencies = [ "illumos-sys-hdrs", "opte", "serde", - "smoltcp 0.10.0", + "smoltcp 0.11.0", "zerocopy 0.7.31", ] @@ -8019,21 +8038,21 @@ dependencies = [ "bitflags 1.3.2", "byteorder", "cfg-if", - "heapless", + "heapless 0.7.16", "managed", ] [[package]] name = "smoltcp" -version = "0.10.0" +version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8d2e3a36ac8fea7b94e666dfa3871063d6e0a5c9d5d4fec9a1a6b7b6760f0229" +checksum = "5a1a996951e50b5971a2c8c0fa05a381480d70a933064245c4a223ddc87ccc97" dependencies = [ "bitflags 1.3.2", "byteorder", "cfg-if", "defmt", - "heapless", + "heapless 0.8.0", "managed", ] diff --git a/Cargo.toml b/Cargo.toml index 54db531d06..d97236b632 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -267,7 +267,7 @@ omicron-sled-agent = { path = "sled-agent" } omicron-test-utils = { path = "test-utils" } omicron-zone-package = "0.10.1" oxide-client = { path = "clients/oxide-client" } -oxide-vpc = { git = "https://github.com/oxidecomputer/opte", rev = "4e6e6ab6379fa4bc40f5d0c7340b9f35c45ad6e4", features = [ "api", "std" ] } +oxide-vpc = { git = "https://github.com/oxidecomputer/opte", rev = "dd2b7b0306d3f01fa09170b8884d402209e49244", features = [ "api", "std" ] } once_cell = "1.19.0" openapi-lint = { git = "https://github.com/oxidecomputer/openapi-lint", branch = "main" } openapiv3 = "2.0.0" @@ -275,7 +275,7 @@ openapiv3 = "2.0.0" openssl = "0.10" openssl-sys = "0.9" openssl-probe = "0.1.5" -opte-ioctl = { git = "https://github.com/oxidecomputer/opte", rev = "4e6e6ab6379fa4bc40f5d0c7340b9f35c45ad6e4" } +opte-ioctl = { git = "https://github.com/oxidecomputer/opte", rev = "dd2b7b0306d3f01fa09170b8884d402209e49244" } oso = "0.27" owo-colors = "3.5.0" oximeter = { path = "oximeter/oximeter" } diff --git a/tools/opte_version b/tools/opte_version index 619a109b35..82d79dcf28 100644 --- a/tools/opte_version +++ b/tools/opte_version @@ -1 +1 @@ -0.27.202 +0.27.214 From 8af3d844deb340bd0018ad6df60210ea957113f2 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 23 Jan 2024 13:44:30 -0800 Subject: [PATCH 11/49] Bump shlex from 1.1.0 to 1.3.0 (#4865) --- Cargo.lock | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a2d9601a38..1b04752e34 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -7691,9 +7691,9 @@ checksum = "24188a676b6ae68c3b2cb3a01be17fbf7240ce009799bb56d5b1409051e78fde" [[package]] name = "shlex" -version = "1.1.0" +version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43b2853a4d09f215c24cc5489c992ce46052d359b5109343cbafbf26bc62f8a3" +checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" [[package]] name = "signal-hook" From beb1d1149a0876a786e94bf24a60568a29911056 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Tue, 23 Jan 2024 13:46:07 -0800 Subject: [PATCH 12/49] Update Rust crate hyper-rustls to 0.26.0 (#4814) --- Cargo.lock | 127 ++++++++++++++++++++++++++++++-------- Cargo.toml | 2 +- workspace-hack/Cargo.toml | 2 + 3 files changed, 103 insertions(+), 28 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 1b04752e34..d66fc3dadc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1920,7 +1920,7 @@ dependencies = [ "futures", "hostname", "http 0.2.11", - "hyper", + "hyper 0.14.27", "indexmap 2.1.0", "multer", "openapiv3", @@ -2091,7 +2091,7 @@ dependencies = [ "base64", "chrono", "http 0.2.11", - "hyper", + "hyper 0.14.27", "omicron-sled-agent", "omicron-test-utils", "omicron-workspace-hack", @@ -2938,6 +2938,16 @@ dependencies = [ "pin-project-lite", ] +[[package]] +name = "http-body" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1cac85db508abc24a2e48553ba12a996e87244a0395ce011e62b37158745d643" +dependencies = [ + "bytes", + "http 1.0.0", +] + [[package]] name = "http-range" version = "0.1.5" @@ -2968,7 +2978,7 @@ dependencies = [ "form_urlencoded", "futures", "http 0.2.11", - "hyper", + "hyper 0.14.27", "log", "once_cell", "regex", @@ -3055,7 +3065,7 @@ dependencies = [ "futures-util", "h2", "http 0.2.11", - "http-body", + "http-body 0.4.5", "httparse", "httpdate", "itoa", @@ -3067,6 +3077,24 @@ dependencies = [ "want", ] +[[package]] +name = "hyper" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fb5aa53871fc917b1a9ed87b683a5d86db645e23acb32c2e0785a353e522fb75" +dependencies = [ + "bytes", + "futures-channel", + "futures-util", + "http 1.0.0", + "http-body 1.0.0", + "httparse", + "itoa", + "pin-project-lite", + "tokio", + "want", +] + [[package]] name = "hyper-rustls" version = "0.24.2" @@ -3075,7 +3103,7 @@ checksum = "ec3efd23720e2049821a693cbc7e65ea87c72f1c58ff2f9522ff332b1491e590" dependencies = [ "futures-util", "http 0.2.11", - "hyper", + "hyper 0.14.27", "rustls 0.21.9", "tokio", "tokio-rustls 0.24.1", @@ -3083,19 +3111,21 @@ dependencies = [ [[package]] name = "hyper-rustls" -version = "0.25.0" +version = "0.26.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "399c78f9338483cb7e630c8474b07268983c6bd5acee012e4211f9f7bb21b070" +checksum = "a0bea761b46ae2b24eb4aef630d8d1c398157b6fc29e6350ecf090a0b70c952c" dependencies = [ "futures-util", - "http 0.2.11", - "hyper", + "http 1.0.0", + "hyper 1.1.0", + "hyper-util", "log", "rustls 0.22.2", "rustls-native-certs", "rustls-pki-types", "tokio", "tokio-rustls 0.25.0", + "tower-service", ] [[package]] @@ -3108,7 +3138,7 @@ dependencies = [ "http 0.2.11", "http-range", "httpdate", - "hyper", + "hyper 0.14.27", "mime_guess", "percent-encoding", "rand 0.8.5", @@ -3124,12 +3154,32 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d6183ddfa99b85da61a140bea0efc93fdf56ceaa041b37d553518030827f9905" dependencies = [ "bytes", - "hyper", + "hyper 0.14.27", "native-tls", "tokio", "tokio-native-tls", ] +[[package]] +name = "hyper-util" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bdea9aac0dbe5a9240d68cfd9501e2db94222c6dc06843e06640b9e07f0fdc67" +dependencies = [ + "bytes", + "futures-channel", + "futures-util", + "http 1.0.0", + "http-body 1.0.0", + "hyper 1.1.0", + "pin-project-lite", + "socket2 0.5.5", + "tokio", + "tower", + "tower-service", + "tracing", +] + [[package]] name = "iana-time-zone" version = "0.1.57" @@ -3371,7 +3421,7 @@ dependencies = [ "clap 4.4.3", "dropshot", "expectorate", - "hyper", + "hyper 0.14.27", "installinator-common", "omicron-common", "omicron-test-utils", @@ -3427,7 +3477,7 @@ dependencies = [ "dropshot", "expectorate", "futures", - "hyper", + "hyper 0.14.27", "omicron-common", "omicron-test-utils", "omicron-workspace-hack", @@ -4165,8 +4215,8 @@ dependencies = [ "gateway-client", "headers", "http 0.2.11", - "hyper", - "hyper-rustls 0.25.0", + "hyper 0.14.27", + "hyper-rustls 0.26.0", "internal-dns", "ipnetwork", "itertools 0.12.0", @@ -4304,7 +4354,7 @@ dependencies = [ "gateway-test-utils", "headers", "http 0.2.11", - "hyper", + "hyper 0.14.27", "internal-dns", "nexus-db-queries", "nexus-test-interface", @@ -4745,7 +4795,7 @@ dependencies = [ "gateway-test-utils", "hex", "http 0.2.11", - "hyper", + "hyper 0.14.27", "illumos-utils", "ipcc", "omicron-common", @@ -4806,8 +4856,8 @@ dependencies = [ "http 0.2.11", "httptest", "hubtools", - "hyper", - "hyper-rustls 0.25.0", + "hyper 0.14.27", + "hyper-rustls 0.26.0", "illumos-utils", "internal-dns", "ipnetwork", @@ -5020,7 +5070,7 @@ dependencies = [ "guppy", "hex", "http 0.2.11", - "hyper", + "hyper 0.14.27", "hyper-staticfile", "illumos-utils", "installinator-common", @@ -5159,7 +5209,7 @@ dependencies = [ "hashbrown 0.13.2", "hex", "hmac", - "hyper", + "hyper 0.14.27", "indexmap 2.1.0", "inout", "ipnetwork", @@ -5200,6 +5250,7 @@ dependencies = [ "similar", "slog", "snafu", + "socket2 0.5.5", "spin 0.9.8", "string_cache", "subtle", @@ -5427,7 +5478,7 @@ dependencies = [ "chrono", "futures", "http 0.2.11", - "hyper", + "hyper 0.14.27", "omicron-workspace-hack", "progenitor", "rand 0.8.5", @@ -5501,7 +5552,7 @@ dependencies = [ "dropshot", "expectorate", "futures", - "hyper", + "hyper 0.14.27", "internal-dns", "nexus-client", "nexus-types", @@ -6352,7 +6403,7 @@ dependencies = [ "clap 4.4.3", "dropshot", "futures", - "hyper", + "hyper 0.14.27", "progenitor", "propolis_types", "rand 0.8.5", @@ -6787,8 +6838,8 @@ dependencies = [ "futures-util", "h2", "http 0.2.11", - "http-body", - "hyper", + "http-body 0.4.5", + "hyper 0.14.27", "hyper-rustls 0.24.2", "hyper-tls", "ipnet", @@ -9048,6 +9099,28 @@ dependencies = [ "walkdir", ] +[[package]] +name = "tower" +version = "0.4.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8fa9be0de6cf49e536ce1851f987bd21a43b771b09473c3549a6c853db37c1c" +dependencies = [ + "futures-core", + "futures-util", + "pin-project", + "pin-project-lite", + "tokio", + "tower-layer", + "tower-service", + "tracing", +] + +[[package]] +name = "tower-layer" +version = "0.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c20c8dbed6283a09604c3e69b4b7eeb54e298b8a600d4d5ecb5ad39de609f1d0" + [[package]] name = "tower-service" version = "0.3.2" @@ -9933,7 +10006,7 @@ dependencies = [ "hex", "http 0.2.11", "hubtools", - "hyper", + "hyper 0.14.27", "illumos-utils", "installinator", "installinator-artifact-client", diff --git a/Cargo.toml b/Cargo.toml index d97236b632..43faed9360 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -219,7 +219,7 @@ httptest = "0.15.5" hubtools = { git = "https://github.com/oxidecomputer/hubtools.git", branch = "main" } humantime = "2.1.0" hyper = "0.14" -hyper-rustls = "0.25.0" +hyper-rustls = "0.26.0" hyper-staticfile = "0.9.5" illumos-utils = { path = "illumos-utils" } indexmap = "2.1.0" diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index b574a292d1..e4733992bc 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -94,6 +94,7 @@ sha2 = { version = "0.10.8", features = ["oid"] } similar = { version = "2.3.0", features = ["inline", "unicode"] } slog = { version = "2.7.0", features = ["dynamic-keys", "max_level_trace", "release_max_level_debug", "release_max_level_trace"] } snafu = { version = "0.7.5", features = ["futures"] } +socket2 = { version = "0.5.5", default-features = false, features = ["all"] } spin = { version = "0.9.8" } string_cache = { version = "0.8.7" } subtle = { version = "2.5.0" } @@ -198,6 +199,7 @@ sha2 = { version = "0.10.8", features = ["oid"] } similar = { version = "2.3.0", features = ["inline", "unicode"] } slog = { version = "2.7.0", features = ["dynamic-keys", "max_level_trace", "release_max_level_debug", "release_max_level_trace"] } snafu = { version = "0.7.5", features = ["futures"] } +socket2 = { version = "0.5.5", default-features = false, features = ["all"] } spin = { version = "0.9.8" } string_cache = { version = "0.8.7" } subtle = { version = "2.5.0" } From 66afddbd174c02e3da3f47e2519fb82558fec01c Mon Sep 17 00:00:00 2001 From: "Andrew J. Stone" Date: Tue, 23 Jan 2024 17:10:55 -0500 Subject: [PATCH 13/49] Add oxlog tool and library (#4810) --- .github/buildomat/jobs/package.sh | 2 +- Cargo.lock | 14 + Cargo.toml | 3 + dev-tools/oxlog/Cargo.toml | 16 + dev-tools/oxlog/src/bin/oxlog.rs | 121 ++++++++ dev-tools/oxlog/src/lib.rs | 457 ++++++++++++++++++++++++++++ illumos-utils/Cargo.toml | 1 + illumos-utils/src/running_zone.rs | 28 +- package-manifest.toml | 8 + tools/build-global-zone-packages.sh | 7 + workspace-hack/Cargo.toml | 2 + 11 files changed, 631 insertions(+), 28 deletions(-) create mode 100644 dev-tools/oxlog/Cargo.toml create mode 100644 dev-tools/oxlog/src/bin/oxlog.rs create mode 100644 dev-tools/oxlog/src/lib.rs diff --git a/.github/buildomat/jobs/package.sh b/.github/buildomat/jobs/package.sh index f0bd764feb..b4d10891b9 100755 --- a/.github/buildomat/jobs/package.sh +++ b/.github/buildomat/jobs/package.sh @@ -91,7 +91,7 @@ ptime -m cargo run --locked --release --bin omicron-package -- \ -t host target create -i standard -m gimlet -s asic -r multi-sled ptime -m cargo run --locked --release --bin omicron-package -- \ -t host package -stamp_packages omicron-sled-agent mg-ddm-gz propolis-server overlay +stamp_packages omicron-sled-agent mg-ddm-gz propolis-server overlay oxlog # Create global zone package @ /work/global-zone-packages.tar.gz ptime -m ./tools/build-global-zone-packages.sh "$tarball_src_dir" /work diff --git a/Cargo.lock b/Cargo.lock index d66fc3dadc..ec594abb8f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3266,6 +3266,7 @@ dependencies = [ "omicron-workspace-hack", "opte-ioctl", "oxide-vpc", + "oxlog", "regress", "schemars", "serde", @@ -5177,6 +5178,7 @@ dependencies = [ "bstr 1.6.0", "byteorder", "bytes", + "camino", "chrono", "cipher", "clap 4.4.3", @@ -5676,6 +5678,18 @@ dependencies = [ "uuid", ] +[[package]] +name = "oxlog" +version = "0.1.0" +dependencies = [ + "anyhow", + "camino", + "chrono", + "clap 4.4.3", + "omicron-workspace-hack", + "uuid", +] + [[package]] name = "p256" version = "0.13.2" diff --git a/Cargo.toml b/Cargo.toml index 43faed9360..3d0be64380 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -20,6 +20,7 @@ members = [ "dev-tools/crdb-seed", "dev-tools/omdb", "dev-tools/omicron-dev", + "dev-tools/oxlog", "dev-tools/thing-flinger", "dev-tools/xtask", "dns-server", @@ -93,6 +94,7 @@ default-members = [ "dev-tools/crdb-seed", "dev-tools/omdb", "dev-tools/omicron-dev", + "dev-tools/oxlog", "dev-tools/thing-flinger", # Do not include xtask in the list of default members, because this causes # hakari to not work as well and build times to be longer. @@ -252,6 +254,7 @@ nexus-inventory = { path = "nexus/inventory" } omicron-certificates = { path = "certificates" } omicron-passwords = { path = "passwords" } omicron-workspace-hack = "0.1.0" +oxlog = { path = "dev-tools/oxlog" } nexus-test-interface = { path = "nexus/test-interface" } nexus-test-utils-macros = { path = "nexus/test-utils-macros" } nexus-test-utils = { path = "nexus/test-utils" } diff --git a/dev-tools/oxlog/Cargo.toml b/dev-tools/oxlog/Cargo.toml new file mode 100644 index 0000000000..5d7cfaf5c1 --- /dev/null +++ b/dev-tools/oxlog/Cargo.toml @@ -0,0 +1,16 @@ +[package] +name = "oxlog" +version = "0.1.0" +edition = "2021" +license = "MPL-2.0" + +[dependencies] +anyhow.workspace = true +camino.workspace = true +chrono.workspace = true +clap.workspace = true +uuid.workspace = true +omicron-workspace-hack.workspace = true + +[[bin]] +name = "oxlog" diff --git a/dev-tools/oxlog/src/bin/oxlog.rs b/dev-tools/oxlog/src/bin/oxlog.rs new file mode 100644 index 0000000000..ef79605dda --- /dev/null +++ b/dev-tools/oxlog/src/bin/oxlog.rs @@ -0,0 +1,121 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Tool for discovering oxide related logfiles on sleds + +use clap::{Args, Parser, Subcommand}; +use oxlog::{Filter, LogFile, Zones}; + +#[derive(Debug, Parser)] +#[command(version)] +struct Cli { + #[command(subcommand)] + command: Commands, +} + +#[derive(Debug, Subcommand)] +enum Commands { + /// List all zones found on the filesystem + Zones, + + /// List logs for a given service + Logs { + // The name of the zone + zone: String, + + /// The name of the service to list logs for + service: Option, + + /// Print available metadata + #[arg(short, long)] + metadata: bool, + + #[command(flatten)] + filter: FilterArgs, + }, +} + +#[derive(Args, Debug)] +#[group(required = true, multiple = true)] +struct FilterArgs { + /// Print only the current log file + #[arg(short, long)] + current: bool, + + /// Print only the archived log files + #[arg(short, long)] + archived: bool, + + // Print only the extra log files + #[arg(short, long)] + extra: bool, +} + +fn main() -> Result<(), anyhow::Error> { + let cli = Cli::parse(); + + match cli.command { + Commands::Zones => { + for zone in Zones::load()?.zones.keys() { + println!("{zone}"); + } + Ok(()) + } + Commands::Logs { zone, service, metadata, filter } => { + let zones = Zones::load()?; + let filter = Filter { + current: filter.current, + archived: filter.archived, + extra: filter.extra, + }; + let print_metadata = |f: &LogFile| { + println!( + "{}\t{}\t{}", + f.path, + f.size.map_or_else(|| "-".to_string(), |s| s.to_string()), + f.modified + .map_or_else(|| "-".to_string(), |s| s.to_rfc3339()) + ); + }; + + let logs = zones.zone_logs(&zone, filter); + for (svc_name, mut svc_logs) in logs { + if let Some(service) = &service { + if svc_name != service.as_str() { + continue; + } + } + svc_logs.archived.sort(); + if filter.current { + if let Some(current) = &svc_logs.current { + if metadata { + print_metadata(current); + } else { + println!("{}", current.path); + } + } + } + if filter.archived { + for f in &svc_logs.archived { + if metadata { + print_metadata(f); + } else { + println!("{}", f.path); + } + } + } + if filter.extra { + for f in &svc_logs.extra { + if metadata { + print_metadata(f); + } else { + println!("{}", f.path); + } + } + } + } + Ok(()) + } + } +} diff --git a/dev-tools/oxlog/src/lib.rs b/dev-tools/oxlog/src/lib.rs new file mode 100644 index 0000000000..589b113928 --- /dev/null +++ b/dev-tools/oxlog/src/lib.rs @@ -0,0 +1,457 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! A tool to show oxide related log file paths +//! +//! All data is based off of reading the filesystem + +use anyhow::Context; +use camino::{Utf8DirEntry, Utf8Path, Utf8PathBuf}; +use chrono::{DateTime, Utc}; +use std::collections::BTreeMap; +use std::io; +use uuid::Uuid; + +/// Return a UUID if the `DirEntry` contains a directory that parses into a UUID. +fn get_uuid_dir(result: io::Result) -> Option { + let Ok(entry) = result else { + return None; + }; + let Ok(file_type) = entry.file_type() else { + return None; + }; + if !file_type.is_dir() { + return None; + } + let file_name = entry.file_name(); + if let Ok(uuid) = file_name.parse() { + Some(uuid) + } else { + None + } +} + +#[derive(Debug)] +pub struct Pools { + pub internal: Vec, + pub external: Vec, +} + +impl Pools { + pub fn read() -> anyhow::Result { + let internal = Utf8Path::new("/pool/int/") + .read_dir_utf8() + .context("Failed to read /pool/int")? + .filter_map(get_uuid_dir) + .collect(); + let external = Utf8Path::new("/pool/ext/") + .read_dir_utf8() + .context("Failed to read /pool/ext")? + .filter_map(get_uuid_dir) + .collect(); + Ok(Pools { internal, external }) + } +} + +/// Filter which logs to search for in a given zone +/// +/// Each field in the filter is additive. +/// +/// The filter was added to the library and not just the CLI because in some +/// cases searching for archived logs is pretty expensive. +#[derive(Clone, Copy, Debug)] +pub struct Filter { + /// The current logfile for a service. + /// e.g. `/var/svc/log/oxide-sled-agent:default.log` + pub current: bool, + + /// Any rotated log files in the default service directory or archived to + /// a debug directory. e.g. `/var/svc/log/oxide-sled-agent:default.log.0` + /// or `/pool/ext/021afd19-2f87-4def-9284-ab7add1dd6ae/crypt/debug/global/oxide-sled-agent:default.log.1697509861` + pub archived: bool, + + /// Any files of special interest for a given service that don't reside in + /// standard paths or don't follow the naming conventions of SMF service + /// files. e.g. `/pool/ext/e12f29b8-1ab8-431e-bc96-1c1298947980/crypt/zone/oxz_cockroachdb_8bbea076-ff60-4330-8302-383e18140ef3/root/data/logs/cockroach.log` + pub extra: bool, +} + +/// Path and metadata about a logfile +/// We use options for metadata as retrieval is fallible +#[derive(Debug, Clone, Eq)] +pub struct LogFile { + pub path: Utf8PathBuf, + pub size: Option, + pub modified: Option>, +} + +impl LogFile { + pub fn read_metadata(&mut self, entry: &Utf8DirEntry) { + if let Ok(metadata) = entry.metadata() { + self.size = Some(metadata.len()); + if let Ok(modified) = metadata.modified() { + self.modified = Some(modified.into()); + } + } + } +} + +impl PartialEq for LogFile { + fn eq(&self, other: &Self) -> bool { + self.path == other.path + } +} + +impl PartialOrd for LogFile { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl Ord for LogFile { + fn cmp(&self, other: &Self) -> std::cmp::Ordering { + self.path.cmp(&other.path) + } +} + +impl LogFile { + fn new(path: Utf8PathBuf) -> LogFile { + LogFile { path, size: None, modified: None } + } +} + +/// All oxide logs for a given service in a given zone +#[derive(Debug, Clone, Default)] +pub struct SvcLogs { + /// The current logfile for a service. + /// e.g. `/var/svc/log/oxide-sled-agent:default.log` + pub current: Option, + + /// Any rotated log files in the default service directory or archived to + /// a debug directory. e.g. `/var/svc/log/oxide-sled-agent:default.log.0` + /// or `/pool/ext/021afd19-2f87-4def-9284-ab7add1dd6ae/crypt/debug/global/oxide-sled-agent:default.log.1697509861` + pub archived: Vec, + + /// Any files of special interest for a given service that don't reside in + /// standard paths or don't follow the naming conventions of SMF service + /// files. e.g. `/pool/ext/e12f29b8-1ab8-431e-bc96-1c1298947980/crypt/zone/oxz_cockroachdb_8bbea076-ff60-4330-8302-383e18140ef3/root/data/logs/cockroach.log` + pub extra: Vec, +} + +// These probably don't warrant newtypes. They are just to make the +// keys in maps a bit easier to read. +type ZoneName = String; +type ServiceName = String; + +pub struct Paths { + /// Links to the location of current and rotated log files for a given service + pub primary: Utf8PathBuf, + + /// Links to debug directories containing archived log files + pub debug: Vec, + + /// Links to directories containing extra files such as cockroachdb logs + /// that reside outside our SMF log and debug service log paths. + pub extra: Vec<(&'static str, Utf8PathBuf)>, +} + +pub struct Zones { + pub zones: BTreeMap, +} + +impl Zones { + pub fn load() -> Result { + let mut zones = BTreeMap::new(); + + // Describe where to find logs for the global zone + zones.insert( + "global".to_string(), + Paths { + primary: Utf8PathBuf::from("/var/svc/log"), + debug: vec![], + extra: vec![], + }, + ); + + // Describe where to find logs for the switch zone + zones.insert( + "oxz_switch".to_string(), + Paths { + primary: Utf8PathBuf::from("/zone/oxz_switch/root/var/svc/log"), + debug: vec![], + extra: vec![( + "dendrite", + "/zone/oxz_switch/root/var/dendrite".into(), + )], + }, + ); + + // Find the directories containing the primary and extra log files + // for all zones on external storage pools. + let pools = Pools::read()?; + for uuid in &pools.external { + let zones_path: Utf8PathBuf = + ["/pool/ext", &uuid.to_string(), "crypt/zone"].iter().collect(); + // Find the zones on the given pool + let Ok(entries) = zones_path.read_dir_utf8() else { + continue; + }; + for entry in entries { + let Ok(zone_entry) = entry else { + continue; + }; + let zone = zone_entry.file_name(); + + // Add the path to the current logs for the zone + let mut dir = zones_path.clone(); + dir.push(zone); + dir.push("root/var/svc/log"); + let mut paths = + Paths { primary: dir, debug: vec![], extra: vec![] }; + + // Add the path to the extra logs for the zone + if zone.starts_with("oxz_cockroachdb") { + let mut dir = zones_path.clone(); + dir.push(zone); + dir.push("root/data/logs"); + paths.extra.push(("cockroachdb", dir)); + } + + zones.insert(zone.to_string(), paths); + } + } + + // Find the directories containing the debug log files + for uuid in &pools.external { + let zones_path: Utf8PathBuf = + ["/pool/ext", &uuid.to_string(), "crypt/debug"] + .iter() + .collect(); + // Find the zones on the given pool + let Ok(entries) = zones_path.read_dir_utf8() else { + continue; + }; + for entry in entries { + let Ok(zone_entry) = entry else { + continue; + }; + let zone = zone_entry.file_name(); + let mut dir = zones_path.clone(); + dir.push(zone); + + // We only add debug paths if the zones have primary paths + if let Some(paths) = zones.get_mut(zone) { + paths.debug.push(dir); + } + } + } + + Ok(Zones { zones }) + } + + /// Return log files organized by service name + pub fn zone_logs( + &self, + zone: &str, + filter: Filter, + ) -> BTreeMap { + let mut output = BTreeMap::new(); + let Some(paths) = self.zones.get(zone) else { + return BTreeMap::new(); + }; + // Some rotated files exist in `paths.primary` that we track as + // 'archived'. These files have not yet been migrated into the debug + // directory. + if filter.current || filter.archived { + load_svc_logs(paths.primary.clone(), &mut output); + } + + if filter.archived { + for dir in paths.debug.clone() { + load_svc_logs(dir, &mut output); + } + } + if filter.extra { + for (svc_name, dir) in paths.extra.clone() { + load_extra_logs(dir, svc_name, &mut output); + } + } + output + } +} + +const OX_SMF_PREFIXES: [&str; 2] = ["oxide-", "system-illumos-"]; + +/// Return true if the provided file name appears to be a valid log file for an +/// Oxide-managed SMF service. +/// +/// Note that this operates on the _file name_. Any leading path components will +/// cause this check to return `false`. +pub fn is_oxide_smf_log_file(filename: impl AsRef) -> bool { + // Log files are named by the SMF services, with the `/` in the FMRI + // translated to a `-`. + let filename = filename.as_ref(); + OX_SMF_PREFIXES + .iter() + .any(|prefix| filename.starts_with(prefix) && filename.contains(".log")) +} + +// Parse an oxide smf log file name and return the name of the underlying +// service. +// +// If parsing fails for some reason, return `None`. +pub fn oxide_smf_service_name_from_log_file_name( + filename: &str, +) -> Option<&str> { + let Some((prefix, _suffix)) = filename.split_once(':') else { + // No ':' found + return None; + }; + + for ox_prefix in OX_SMF_PREFIXES { + if let Some(svc_name) = prefix.strip_prefix(ox_prefix) { + return Some(svc_name); + } + } + + None +} + +// Given a directory, find all oxide specific SMF service logs and return them +// mapped to their inferred service name. +fn load_svc_logs(dir: Utf8PathBuf, logs: &mut BTreeMap) { + let Ok(entries) = dir.read_dir_utf8() else { + return; + }; + for entry in entries { + let Ok(entry) = entry else { + continue; + }; + let filename = entry.file_name(); + + // Is this a log file we care about? + if is_oxide_smf_log_file(filename) { + let mut path = dir.clone(); + path.push(filename); + let mut logfile = LogFile::new(path); + + let Some(svc_name) = + oxide_smf_service_name_from_log_file_name(filename) + else { + // parsing failed + continue; + }; + + logfile.read_metadata(&entry); + if logfile.size == Some(0) { + // skip 0 size files + continue; + } + + let is_current = filename.ends_with(".log"); + + let svc_logs = + logs.entry(svc_name.to_string()).or_insert(SvcLogs::default()); + + if is_current { + svc_logs.current = Some(logfile.clone()); + } else { + svc_logs.archived.push(logfile.clone()); + } + } + } +} + +// Load any logs in non-standard paths. We grab all logs in `dir` and +// don't filter based on filename prefix as in `load_svc_logs`. +fn load_extra_logs( + dir: Utf8PathBuf, + svc_name: &str, + logs: &mut BTreeMap, +) { + let Ok(entries) = dir.read_dir_utf8() else { + return; + }; + + let svc_logs = + logs.entry(svc_name.to_string()).or_insert(SvcLogs::default()); + + for entry in entries { + let Ok(entry) = entry else { + continue; + }; + let filename = entry.file_name(); + let mut path = dir.clone(); + path.push(filename); + let mut logfile = LogFile::new(path); + logfile.read_metadata(&entry); + if logfile.size == Some(0) { + // skip 0 size files + continue; + } + svc_logs.extra.push(logfile); + } +} + +#[cfg(test)] +mod tests { + pub use super::is_oxide_smf_log_file; + pub use super::oxide_smf_service_name_from_log_file_name; + + #[test] + fn test_is_oxide_smf_log_file() { + assert!(is_oxide_smf_log_file("oxide-blah:default.log")); + assert!(is_oxide_smf_log_file("oxide-blah:default.log.0")); + assert!(is_oxide_smf_log_file("oxide-blah:default.log.1111")); + assert!(is_oxide_smf_log_file("system-illumos-blah:default.log")); + assert!(is_oxide_smf_log_file("system-illumos-blah:default.log.0")); + assert!(!is_oxide_smf_log_file("not-oxide-blah:default.log")); + assert!(!is_oxide_smf_log_file("not-system-illumos-blah:default.log")); + assert!(!is_oxide_smf_log_file("system-blah:default.log")); + } + + #[test] + fn test_oxide_smf_service_name_from_log_file_name() { + assert_eq!( + Some("blah"), + oxide_smf_service_name_from_log_file_name("oxide-blah:default.log") + ); + assert_eq!( + Some("blah"), + oxide_smf_service_name_from_log_file_name( + "oxide-blah:default.log.0" + ) + ); + assert_eq!( + Some("blah"), + oxide_smf_service_name_from_log_file_name( + "oxide-blah:default.log.1111" + ) + ); + assert_eq!( + Some("blah"), + oxide_smf_service_name_from_log_file_name( + "system-illumos-blah:default.log" + ) + ); + assert_eq!( + Some("blah"), + oxide_smf_service_name_from_log_file_name( + "system-illumos-blah:default.log.0" + ) + ); + assert!(oxide_smf_service_name_from_log_file_name( + "not-oxide-blah:default.log" + ) + .is_none()); + assert!(oxide_smf_service_name_from_log_file_name( + "not-system-illumos-blah:default.log" + ) + .is_none()); + assert!(oxide_smf_service_name_from_log_file_name( + "system-blah:default.log" + ) + .is_none()); + } +} diff --git a/illumos-utils/Cargo.toml b/illumos-utils/Cargo.toml index 8296eace5c..e4a99095fd 100644 --- a/illumos-utils/Cargo.toml +++ b/illumos-utils/Cargo.toml @@ -20,6 +20,7 @@ libc.workspace = true macaddr.workspace = true omicron-common.workspace = true oxide-vpc.workspace = true +oxlog.workspace = true schemars.workspace = true serde.workspace = true slog.workspace = true diff --git a/illumos-utils/src/running_zone.rs b/illumos-utils/src/running_zone.rs index ea80a6d34b..4b4107f529 100644 --- a/illumos-utils/src/running_zone.rs +++ b/illumos-utils/src/running_zone.rs @@ -14,6 +14,7 @@ use camino::{Utf8Path, Utf8PathBuf}; use camino_tempfile::Utf8TempDir; use ipnetwork::IpNetwork; use omicron_common::backoff; +pub use oxlog::is_oxide_smf_log_file; use slog::{error, info, o, warn, Logger}; use std::net::{IpAddr, Ipv4Addr, Ipv6Addr}; use std::sync::Arc; @@ -1411,24 +1412,8 @@ pub fn is_oxide_smf_service(fmri: impl AsRef) -> bool { SMF_SERVICE_PREFIXES.iter().any(|prefix| fmri.starts_with(prefix)) } -/// Return true if the provided file name appears to be a valid log file for an -/// Oxide-managed SMF service. -/// -/// Note that this operates on the _file name_. Any leading path components will -/// cause this check to return `false`. -pub fn is_oxide_smf_log_file(filename: impl AsRef) -> bool { - // Log files are named by the SMF services, with the `/` in the FMRI - // translated to a `-`. - const PREFIXES: [&str; 2] = ["oxide-", "system-illumos-"]; - let filename = filename.as_ref(); - PREFIXES - .iter() - .any(|prefix| filename.starts_with(prefix) && filename.contains(".log")) -} - #[cfg(test)] mod tests { - use super::is_oxide_smf_log_file; use super::is_oxide_smf_service; #[test] @@ -1438,15 +1423,4 @@ mod tests { assert!(!is_oxide_smf_service("svc:/system/blah:default")); assert!(!is_oxide_smf_service("svc:/not/oxide/blah:default")); } - - #[test] - fn test_is_oxide_smf_log_file() { - assert!(is_oxide_smf_log_file("oxide-blah:default.log")); - assert!(is_oxide_smf_log_file("oxide-blah:default.log.0")); - assert!(is_oxide_smf_log_file("oxide-blah:default.log.1111")); - assert!(is_oxide_smf_log_file("system-illumos-blah:default.log")); - assert!(is_oxide_smf_log_file("system-illumos-blah:default.log.0")); - assert!(!is_oxide_smf_log_file("not-oxide-blah:default.log")); - assert!(!is_oxide_smf_log_file("not-system-illumos-blah:default.log")); - } } diff --git a/package-manifest.toml b/package-manifest.toml index 7b12583437..fa6bba7a96 100644 --- a/package-manifest.toml +++ b/package-manifest.toml @@ -638,3 +638,11 @@ source.rust.binary_names = ["omdb"] source.rust.release = true output.type = "zone" output.intermediate_only = true + +[package.oxlog] +service_name = "oxlog" +only_for_targets.image = "standard" +source.type = "local" +source.rust.binary_names = ["oxlog"] +source.rust.release = true +output.type = "tarball" diff --git a/tools/build-global-zone-packages.sh b/tools/build-global-zone-packages.sh index fc1ab42ade..b989e6a543 100755 --- a/tools/build-global-zone-packages.sh +++ b/tools/build-global-zone-packages.sh @@ -15,6 +15,7 @@ deps=( "$tarball_src_dir/mg-ddm-gz.tar" "$tarball_src_dir/propolis-server.tar.gz" "$tarball_src_dir/overlay.tar.gz" + "$tarball_src_dir/oxlog.tar" ) for dep in "${deps[@]}"; do if [[ ! -e $dep ]]; then @@ -48,6 +49,12 @@ mkdir -p "$pkg_dir" cd "$pkg_dir" tar -xvfz "$tarball_src_dir/mg-ddm-gz.tar" cd - +# Extract the oxlog tarball for re-packaging into the layered GZ archive. +pkg_dir="$tmp_gz/root/opt/oxide/oxlog" +mkdir -p "$pkg_dir" +cd "$pkg_dir" +tar -xvfz "$tarball_src_dir/oxlog.tar" +cd - # propolis should be bundled with this OS: Put the propolis-server zone image # under /opt/oxide in the gz. diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index e4733992bc..b6d61d9ea5 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -25,6 +25,7 @@ bstr-6f8ce4dd05d13bba = { package = "bstr", version = "0.2.17" } bstr-dff4ba8e3ae991db = { package = "bstr", version = "1.6.0" } byteorder = { version = "1.5.0" } bytes = { version = "1.5.0", features = ["serde"] } +camino = { version = "1.1.6", default-features = false, features = ["serde1"] } chrono = { version = "0.4.31", features = ["alloc", "serde"] } cipher = { version = "0.4.4", default-features = false, features = ["block-padding", "zeroize"] } clap = { version = "4.4.3", features = ["cargo", "derive", "env", "wrap_help"] } @@ -130,6 +131,7 @@ bstr-6f8ce4dd05d13bba = { package = "bstr", version = "0.2.17" } bstr-dff4ba8e3ae991db = { package = "bstr", version = "1.6.0" } byteorder = { version = "1.5.0" } bytes = { version = "1.5.0", features = ["serde"] } +camino = { version = "1.1.6", default-features = false, features = ["serde1"] } chrono = { version = "0.4.31", features = ["alloc", "serde"] } cipher = { version = "0.4.4", default-features = false, features = ["block-padding", "zeroize"] } clap = { version = "4.4.3", features = ["cargo", "derive", "env", "wrap_help"] } From 7bb6cd3d9b5b597424e3929f5f5dd8485fe8d454 Mon Sep 17 00:00:00 2001 From: David Crespo Date: Tue, 23 Jan 2024 16:15:19 -0600 Subject: [PATCH 14/49] Bump web console (#4873) https://github.com/oxidecomputer/console/compare/644a45b8...b9013a33 * [b9013a33](https://github.com/oxidecomputer/console/commit/b9013a33) oxidecomputer/console#1898 * [fb9e9ca5](https://github.com/oxidecomputer/console/commit/fb9e9ca5) oxidecomputer/console#1904 * [9ae29498](https://github.com/oxidecomputer/console/commit/9ae29498) oxidecomputer/console#1897 * [6b894ceb](https://github.com/oxidecomputer/console/commit/6b894ceb) oxidecomputer/console#1901 * [d80d2e7c](https://github.com/oxidecomputer/console/commit/d80d2e7c) oxidecomputer/console#1886 * [2a7da0fa](https://github.com/oxidecomputer/console/commit/2a7da0fa) bump vite for security fix * [700e2700](https://github.com/oxidecomputer/console/commit/700e2700) oxidecomputer/console#1893 --- tools/console_version | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/console_version b/tools/console_version index e76c29c9a0..0dc0024f2c 100644 --- a/tools/console_version +++ b/tools/console_version @@ -1,2 +1,2 @@ -COMMIT="644a45b8e4ab673ad51754e372277abc3ddfd036" -SHA2="a059917d826081df04efd44186f6dfeef0099fc53f1e8618796ea990a510f4b0" +COMMIT="b9013a33eaa3f5efdcd5c7d244e36a54e7222295" +SHA2="bebb9800ff94c42897d54faac8c2a3f89b2b0e927ebf75ec74223b6163e4209d" From 0de612278713efe3e48c990dd516125402eedb31 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Wed, 24 Jan 2024 05:59:51 +0000 Subject: [PATCH 15/49] Update taiki-e/install-action digest to cf2d7f1 (#4879) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR contains the following updates: | Package | Type | Update | Change | |---|---|---|---| | [taiki-e/install-action](https://togithub.com/taiki-e/install-action) | action | digest | [`bd4f144` -> `cf2d7f1`](https://togithub.com/taiki-e/install-action/compare/bd4f144...cf2d7f1) | --- ### Configuration 📅 **Schedule**: Branch creation - "after 8pm,before 6am" in timezone America/Los_Angeles, Automerge - "after 8pm,before 6am" in timezone America/Los_Angeles. 🚦 **Automerge**: Enabled. â™» **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. 🔕 **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box --- This PR has been generated by [Renovate Bot](https://togithub.com/renovatebot/renovate). Co-authored-by: oxide-renovate[bot] <146848827+oxide-renovate[bot]@users.noreply.github.com> --- .github/workflows/hakari.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/hakari.yml b/.github/workflows/hakari.yml index a4ebc74843..06da0395a1 100644 --- a/.github/workflows/hakari.yml +++ b/.github/workflows/hakari.yml @@ -24,7 +24,7 @@ jobs: with: toolchain: stable - name: Install cargo-hakari - uses: taiki-e/install-action@bd4f14420660e33ca2929e5c0306a8367173c1ee # v2 + uses: taiki-e/install-action@cf2d7f1118304815479579570ad3ec572fe94523 # v2 with: tool: cargo-hakari - name: Check workspace-hack Cargo.toml is up-to-date From a6a74d3efcbb9db3b00d2f1ae03dd1b8f86cbfba Mon Sep 17 00:00:00 2001 From: Rain Date: Wed, 24 Jan 2024 00:42:54 -0800 Subject: [PATCH 16/49] [nexus] simplify BackgroundTask trait (#4877) Just using `'a` everywhere is semantically identical to the current pattern because all the lifetime parameters are covariant. --- nexus/src/app/background/common.rs | 29 ++++++------------- nexus/src/app/background/dns_config.rs | 10 ++----- nexus/src/app/background/dns_propagation.rs | 10 ++----- nexus/src/app/background/dns_servers.rs | 10 ++----- .../src/app/background/external_endpoints.rs | 10 ++----- .../app/background/inventory_collection.rs | 10 ++----- nexus/src/app/background/nat_cleanup.rs | 10 ++----- nexus/src/app/background/phantom_disks.rs | 10 ++----- 8 files changed, 30 insertions(+), 69 deletions(-) diff --git a/nexus/src/app/background/common.rs b/nexus/src/app/background/common.rs index 7b05eab61b..4fcce74714 100644 --- a/nexus/src/app/background/common.rs +++ b/nexus/src/app/background/common.rs @@ -153,13 +153,10 @@ use tokio::time::MissedTickBehavior; /// /// See module-level documentation for details. pub trait BackgroundTask: Send + Sync { - fn activate<'a, 'b, 'c>( + fn activate<'a>( &'a mut self, - opctx: &'b OpContext, - ) -> BoxFuture<'c, serde_json::Value> - where - 'a: 'c, - 'b: 'c; + opctx: &'a OpContext, + ) -> BoxFuture<'a, serde_json::Value>; } /// Drives the execution of background tasks @@ -499,14 +496,10 @@ mod test { } impl BackgroundTask for ReportingTask { - fn activate<'a, 'b, 'c>( + fn activate<'a>( &'a mut self, - _: &'b OpContext, - ) -> BoxFuture<'c, serde_json::Value> - where - 'a: 'c, - 'b: 'c, - { + _: &'a OpContext, + ) -> BoxFuture<'a, serde_json::Value> { async { let count = self.counter; self.counter += 1; @@ -684,14 +677,10 @@ mod test { } impl BackgroundTask for PausingTask { - fn activate<'a, 'b, 'c>( + fn activate<'a>( &'a mut self, - _: &'b OpContext, - ) -> BoxFuture<'c, serde_json::Value> - where - 'a: 'c, - 'b: 'c, - { + _: &'a OpContext, + ) -> BoxFuture<'a, serde_json::Value> { async { let count = self.counter; self.counter += 1; diff --git a/nexus/src/app/background/dns_config.rs b/nexus/src/app/background/dns_config.rs index 805ae813fe..959cf1843e 100644 --- a/nexus/src/app/background/dns_config.rs +++ b/nexus/src/app/background/dns_config.rs @@ -43,14 +43,10 @@ impl DnsConfigWatcher { } impl BackgroundTask for DnsConfigWatcher { - fn activate<'a, 'b, 'c>( + fn activate<'a>( &'a mut self, - opctx: &'b OpContext, - ) -> BoxFuture<'c, serde_json::Value> - where - 'a: 'c, - 'b: 'c, - { + opctx: &'a OpContext, + ) -> BoxFuture<'a, serde_json::Value> { async { // Set up a logger for this activation that includes metadata about // the current generation. diff --git a/nexus/src/app/background/dns_propagation.rs b/nexus/src/app/background/dns_propagation.rs index 45776df21b..cf7a399999 100644 --- a/nexus/src/app/background/dns_propagation.rs +++ b/nexus/src/app/background/dns_propagation.rs @@ -36,14 +36,10 @@ impl DnsPropagator { } impl BackgroundTask for DnsPropagator { - fn activate<'a, 'b, 'c>( + fn activate<'a>( &'a mut self, - opctx: &'b OpContext, - ) -> BoxFuture<'c, serde_json::Value> - where - 'a: 'c, - 'b: 'c, - { + opctx: &'a OpContext, + ) -> BoxFuture<'a, serde_json::Value> { async { // Read the DNS configuration and server list from the other // background tasks that assemble these. Clone them because diff --git a/nexus/src/app/background/dns_servers.rs b/nexus/src/app/background/dns_servers.rs index 3a75c09302..97fb3510b7 100644 --- a/nexus/src/app/background/dns_servers.rs +++ b/nexus/src/app/background/dns_servers.rs @@ -57,14 +57,10 @@ impl DnsServersWatcher { } impl BackgroundTask for DnsServersWatcher { - fn activate<'a, 'b, 'c>( + fn activate<'a>( &'a mut self, - opctx: &'b OpContext, - ) -> BoxFuture<'c, serde_json::Value> - where - 'a: 'c, - 'b: 'c, - { + opctx: &'a OpContext, + ) -> BoxFuture<'a, serde_json::Value> { async { // Set up a logger for this activation that includes metadata about // the current generation. diff --git a/nexus/src/app/background/external_endpoints.rs b/nexus/src/app/background/external_endpoints.rs index 53401c16de..ed530e0775 100644 --- a/nexus/src/app/background/external_endpoints.rs +++ b/nexus/src/app/background/external_endpoints.rs @@ -42,14 +42,10 @@ impl ExternalEndpointsWatcher { } impl BackgroundTask for ExternalEndpointsWatcher { - fn activate<'a, 'b, 'c>( + fn activate<'a>( &'a mut self, - opctx: &'b OpContext, - ) -> BoxFuture<'c, serde_json::Value> - where - 'a: 'c, - 'b: 'c, - { + opctx: &'a OpContext, + ) -> BoxFuture<'a, serde_json::Value> { async { let log = &opctx.log; diff --git a/nexus/src/app/background/inventory_collection.rs b/nexus/src/app/background/inventory_collection.rs index 5c52fa519b..044e5a2234 100644 --- a/nexus/src/app/background/inventory_collection.rs +++ b/nexus/src/app/background/inventory_collection.rs @@ -51,14 +51,10 @@ impl InventoryCollector { } impl BackgroundTask for InventoryCollector { - fn activate<'a, 'b, 'c>( + fn activate<'a>( &'a mut self, - opctx: &'b OpContext, - ) -> BoxFuture<'c, serde_json::Value> - where - 'a: 'c, - 'b: 'c, - { + opctx: &'a OpContext, + ) -> BoxFuture<'a, serde_json::Value> { async { match inventory_activate( opctx, diff --git a/nexus/src/app/background/nat_cleanup.rs b/nexus/src/app/background/nat_cleanup.rs index 1691d96a4b..5014dc0553 100644 --- a/nexus/src/app/background/nat_cleanup.rs +++ b/nexus/src/app/background/nat_cleanup.rs @@ -32,14 +32,10 @@ impl Ipv4NatGarbageCollector { } impl BackgroundTask for Ipv4NatGarbageCollector { - fn activate<'a, 'b, 'c>( + fn activate<'a>( &'a mut self, - opctx: &'b OpContext, - ) -> BoxFuture<'c, serde_json::Value> - where - 'a: 'c, - 'b: 'c, - { + opctx: &'a OpContext, + ) -> BoxFuture<'a, serde_json::Value> { async { let log = &opctx.log; diff --git a/nexus/src/app/background/phantom_disks.rs b/nexus/src/app/background/phantom_disks.rs index b038d70ac6..48688838e5 100644 --- a/nexus/src/app/background/phantom_disks.rs +++ b/nexus/src/app/background/phantom_disks.rs @@ -37,14 +37,10 @@ impl PhantomDiskDetector { } impl BackgroundTask for PhantomDiskDetector { - fn activate<'a, 'b, 'c>( + fn activate<'a>( &'a mut self, - opctx: &'b OpContext, - ) -> BoxFuture<'c, serde_json::Value> - where - 'a: 'c, - 'b: 'c, - { + opctx: &'a OpContext, + ) -> BoxFuture<'a, serde_json::Value> { async { let log = &opctx.log; warn!(&log, "phantom disk task started"); From 95778ebb270929469e208ad4d7052101e372aad2 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Wed, 24 Jan 2024 08:50:52 +0000 Subject: [PATCH 17/49] Update Rust crate argon2 to 0.5.3 (#4881) --- Cargo.lock | 8 ++++---- passwords/Cargo.toml | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ec594abb8f..d58b5f6cd9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -188,9 +188,9 @@ checksum = "bddcadddf5e9015d310179a59bb28c4d4b9920ad0f11e8e14dbadf654890c9a6" [[package]] name = "argon2" -version = "0.5.2" +version = "0.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "17ba4cac0a46bc1d2912652a751c47f2a9f3a7fe89bcae2275d418f5270402f9" +checksum = "3c3610892ee6e0cbce8ae2700349fcf8f98adb0dbfbee85aec3c9179d29cc072" dependencies = [ "base64ct", "blake2", @@ -1100,9 +1100,9 @@ checksum = "f9236877021b66ad90f833d8a73a7acb702b985b64c5986682d9f1f1a184f0fb" [[package]] name = "cpufeatures" -version = "0.2.9" +version = "0.2.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a17b76ff3a4162b0b27f354a0c87015ddad39d35f9c0c36607a3bdd175dde1f1" +checksum = "53fe5e26ff1b7aef8bca9c6080520cfb8d9333c7568e1829cef191a9723e5504" dependencies = [ "libc", ] diff --git a/passwords/Cargo.toml b/passwords/Cargo.toml index 8adcf75a2e..4f3922a7a5 100644 --- a/passwords/Cargo.toml +++ b/passwords/Cargo.toml @@ -5,7 +5,7 @@ edition = "2021" license = "MPL-2.0" [dependencies] -argon2 = { version = "0.5.2", features = ["alloc", "password-hash", "rand", "std"] } +argon2 = { version = "0.5.3", features = ["alloc", "password-hash", "rand", "std"] } rand.workspace = true thiserror.workspace = true schemars.workspace = true From d978cd6cedc1653643c7ab2ab529634a10b46c0d Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Wed, 24 Jan 2024 09:39:53 +0000 Subject: [PATCH 18/49] Update Rust crate tabled to 0.15 (#4847) --- Cargo.lock | 12 ++++++------ Cargo.toml | 2 +- oximeter/db/src/bin/oxdb.rs | 4 ++-- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d58b5f6cd9..d179ad4b0f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5726,9 +5726,9 @@ dependencies = [ [[package]] name = "papergrid" -version = "0.10.0" +version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a2ccbe15f2b6db62f9a9871642746427e297b0ceb85f9a7f1ee5ff47d184d0c8" +checksum = "9ad43c07024ef767f9160710b3a6773976194758c7919b17e63b863db0bdf7fb" dependencies = [ "bytecount", "fnv", @@ -8528,9 +8528,9 @@ dependencies = [ [[package]] name = "tabled" -version = "0.14.0" +version = "0.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dfe9c3632da101aba5131ed63f9eed38665f8b3c68703a6bb18124835c1a5d22" +checksum = "4c998b0c8b921495196a48aabaf1901ff28be0760136e31604f7967b0792050e" dependencies = [ "papergrid", "tabled_derive", @@ -8539,9 +8539,9 @@ dependencies = [ [[package]] name = "tabled_derive" -version = "0.6.0" +version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "99f688a08b54f4f02f0a3c382aefdb7884d3d69609f785bd253dc033243e3fe4" +checksum = "4c138f99377e5d653a371cdad263615634cfc8467685dfe8e73e2b8e98f44b17" dependencies = [ "heck 0.4.1", "proc-macro-error", diff --git a/Cargo.toml b/Cargo.toml index 3d0be64380..2e538656ac 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -371,7 +371,7 @@ supports-color = "2.1.0" swrite = "0.1.0" libsw = { version = "3.3.1", features = ["tokio"] } syn = { version = "2.0" } -tabled = "0.14" +tabled = "0.15.0" tar = "0.4" tempdir = "0.3" tempfile = "3.9" diff --git a/oximeter/db/src/bin/oxdb.rs b/oximeter/db/src/bin/oxdb.rs index 17f05c24e2..02a8054da0 100644 --- a/oximeter/db/src/bin/oxdb.rs +++ b/oximeter/db/src/bin/oxdb.rs @@ -391,7 +391,7 @@ async fn describe_virtual_table( } let mut builder = tabled::builder::Builder::default(); - builder.set_header(cols); + builder.push_record(cols); // first record is the header builder.push_record(types); println!( "{}", @@ -553,7 +553,7 @@ async fn sql_shell( println!(); let mut builder = tabled::builder::Builder::default(); - builder.set_header(&table.column_names); + builder.push_record(&table.column_names); // first record is the header for row in table.rows.iter() { builder.push_record( row.iter().map(ToString::to_string), From 255cb83822098f80c2f60811152922259ca8e82c Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Wed, 24 Jan 2024 09:57:33 +0000 Subject: [PATCH 19/49] Update Rust crate ciborium to 0.2.2 (#4882) --- Cargo.lock | 20 ++++++++++++-------- Cargo.toml | 2 +- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index d179ad4b0f..4d531366f6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -892,9 +892,9 @@ dependencies = [ [[package]] name = "ciborium" -version = "0.2.1" +version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "effd91f6c78e5a4ace8a5d3c0b6bfaec9e2baaef55f3efc00e45fb2e477ee926" +checksum = "42e69ffd6f0917f5c029256a24d0161db17cea3997d185db0d35926308770f0e" dependencies = [ "ciborium-io", "ciborium-ll", @@ -903,15 +903,15 @@ dependencies = [ [[package]] name = "ciborium-io" -version = "0.2.1" +version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cdf919175532b369853f5d5e20b26b43112613fd6fe7aee757e35f7a44642656" +checksum = "05afea1e0a06c9be33d539b876f1ce3692f4afea2cb41f740e7743225ed1c757" [[package]] name = "ciborium-ll" -version = "0.2.1" +version = "0.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "defaa24ecc093c77630e6c15e17c51f5e187bf35ee514f4e2d67baaa96dae22b" +checksum = "57663b653d948a338bfb3eeba9bb2fd5fcfaecb9e199e87e1eda4d9e8b240fd9" dependencies = [ "ciborium-io", "half", @@ -2726,9 +2726,13 @@ dependencies = [ [[package]] name = "half" -version = "1.8.2" +version = "2.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eabb4a44450da02c90444cf74558da904edde8fb4e9035a9a6a4e15445af0bd7" +checksum = "bc52e53916c08643f1b56ec082790d1e86a32e58dc5268f897f313fbae7b4872" +dependencies = [ + "cfg-if", + "crunchy", +] [[package]] name = "hash32" diff --git a/Cargo.toml b/Cargo.toml index 2e538656ac..5364f4b4e1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -170,7 +170,7 @@ camino = "1.1" camino-tempfile = "1.1.1" cancel-safe-futures = "0.1.5" chacha20poly1305 = "0.10.1" -ciborium = "0.2.1" +ciborium = "0.2.2" cfg-if = "1.0" chrono = { version = "0.4", features = [ "serde" ] } clap = { version = "4.4", features = ["cargo", "derive", "env", "wrap_help"] } From 8f1134a911532792cac29fdea7ad42b9403ac233 Mon Sep 17 00:00:00 2001 From: Rain Date: Wed, 24 Jan 2024 08:36:41 -0800 Subject: [PATCH 20/49] [wicket] update to ratatui 0.25.0 (#4876) This was mostly a smooth upgrade, with just a small change required to our use of tui-tree-widget. Tested the TUI locally -- works great. --- Cargo.lock | 51 +++++++++++++++------ Cargo.toml | 3 +- wicket/Cargo.toml | 2 +- wicket/src/runner.rs | 1 - wicket/src/state/update.rs | 1 + wicket/src/ui/controls/mod.rs | 4 +- wicket/src/ui/main.rs | 3 +- wicket/src/ui/panes/overview.rs | 3 +- wicket/src/ui/panes/rack_setup.rs | 2 +- wicket/src/ui/panes/update.rs | 68 ++++++++++++++++------------ wicket/src/ui/splash.rs | 3 +- wicket/src/ui/widgets/fade.rs | 11 +---- wicket/src/ui/widgets/status_view.rs | 3 +- workspace-hack/Cargo.toml | 6 ++- 14 files changed, 96 insertions(+), 65 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 4d531366f6..f5b80f9262 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -74,6 +74,12 @@ dependencies = [ "memchr", ] +[[package]] +name = "allocator-api2" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0942ffc6dcaadf03badf6e6a2d0228460359d5e34b57ccdc720b7382dfbd5ec5" + [[package]] name = "android-tzdata" version = "0.1.1" @@ -2772,6 +2778,10 @@ name = "hashbrown" version = "0.14.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f93e7192158dbcda357bdec5fb5788eebf8bbac027f3f33e719d29135ae84156" +dependencies = [ + "ahash", + "allocator-api2", +] [[package]] name = "headers" @@ -3595,15 +3605,6 @@ dependencies = [ "either", ] -[[package]] -name = "itertools" -version = "0.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57" -dependencies = [ - "either", -] - [[package]] name = "itertools" version = "0.12.0" @@ -3885,6 +3886,15 @@ dependencies = [ "zerocopy 0.6.4", ] +[[package]] +name = "lru" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2994eeba8ed550fd9b47a0b38f0242bc3344e496483c6180b69139cc2fa5d1d7" +dependencies = [ + "hashbrown 0.14.2", +] + [[package]] name = "lru-cache" version = "0.1.2" @@ -5213,6 +5223,7 @@ dependencies = [ "getrandom 0.2.10", "group", "hashbrown 0.13.2", + "hashbrown 0.14.2", "hex", "hmac", "hyper 0.14.27", @@ -6631,16 +6642,18 @@ dependencies = [ [[package]] name = "ratatui" -version = "0.23.0" +version = "0.25.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2e2e4cd95294a85c3b4446e63ef054eea43e0205b1fd60120c16b74ff7ff96ad" +checksum = "a5659e52e4ba6e07b2dad9f1158f578ef84a73762625ddb51536019f34d180eb" dependencies = [ "bitflags 2.4.0", "cassowary", "crossterm", "indoc 2.0.3", - "itertools 0.11.0", + "itertools 0.12.0", + "lru", "paste", + "stability", "strum", "unicode-segmentation", "unicode-width", @@ -8276,6 +8289,16 @@ dependencies = [ "syn 2.0.46", ] +[[package]] +name = "stability" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebd1b177894da2a2d9120208c3386066af06a488255caabc5de8ddca22dbc3ce" +dependencies = [ + "quote", + "syn 1.0.109", +] + [[package]] name = "stable_deref_trait" version = "1.2.0" @@ -9351,9 +9374,9 @@ dependencies = [ [[package]] name = "tui-tree-widget" -version = "0.13.0" +version = "0.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f01f9172fb8f4f925fb1e259c2f411be14af031ab8b35d517fd05cb78c0784d5" +checksum = "136011b328c4f392499a02c4b5b78d509fb297bf9c10f2bda5d11d65cb946e4c" dependencies = [ "ratatui", "unicode-width", diff --git a/Cargo.toml b/Cargo.toml index 5364f4b4e1..d47b5727ad 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -310,7 +310,7 @@ propolis-mock-server = { git = "https://github.com/oxidecomputer/propolis", rev proptest = "1.4.0" quote = "1.0" rand = "0.8.5" -ratatui = "0.23.0" +ratatui = "0.25.0" rayon = "1.8" rcgen = "0.12.0" reedline = "0.28.0" @@ -396,6 +396,7 @@ trust-dns-server = "0.22" trybuild = "1.0.89" tufaceous = { path = "tufaceous" } tufaceous-lib = { path = "tufaceous-lib" } +tui-tree-widget = "0.16.0" unicode-width = "0.1.11" update-common = { path = "update-common" } update-engine = { path = "update-engine" } diff --git a/wicket/Cargo.toml b/wicket/Cargo.toml index efb8e51dff..140c011511 100644 --- a/wicket/Cargo.toml +++ b/wicket/Cargo.toml @@ -37,7 +37,7 @@ tokio = { workspace = true, features = ["full"] } tokio-util.workspace = true toml.workspace = true toml_edit.workspace = true -tui-tree-widget = "0.13.0" +tui-tree-widget.workspace = true unicode-width.workspace = true zeroize.workspace = true diff --git a/wicket/src/runner.rs b/wicket/src/runner.rs index 32fabde53e..e83d321459 100644 --- a/wicket/src/runner.rs +++ b/wicket/src/runner.rs @@ -34,7 +34,6 @@ use crate::{Action, Cmd, Event, KeyHandler, Recorder, State, TICK_INTERVAL}; // We can avoid a bunch of unnecessary type parameters by picking them ahead of time. pub type Term = Terminal>; -pub type Frame<'a> = ratatui::Frame<'a, CrosstermBackend>; const MAX_RECORDED_EVENTS: usize = 10000; diff --git a/wicket/src/state/update.rs b/wicket/src/state/update.rs index 6d8a168614..77bbdd83d2 100644 --- a/wicket/src/state/update.rs +++ b/wicket/src/state/update.rs @@ -333,6 +333,7 @@ impl UpdateItem { } } +#[derive(Debug, Copy, Clone)] pub enum UpdateState { NotStarted, Starting, diff --git a/wicket/src/ui/controls/mod.rs b/wicket/src/ui/controls/mod.rs index 4305fb5809..a2682b8052 100644 --- a/wicket/src/ui/controls/mod.rs +++ b/wicket/src/ui/controls/mod.rs @@ -2,8 +2,8 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -use crate::{Action, Cmd, Frame, State}; -use ratatui::layout::Rect; +use crate::{Action, Cmd, State}; +use ratatui::{layout::Rect, Frame}; /// A [`Control`] is the an item on a screen that can be selected and interacted with. /// Control's render [`ratatui::widgets::Widget`]s when drawn. diff --git a/wicket/src/ui/main.rs b/wicket/src/ui/main.rs index 58ea6c1771..379cbd03af 100644 --- a/wicket/src/ui/main.rs +++ b/wicket/src/ui/main.rs @@ -8,11 +8,12 @@ use super::{Control, OverviewPane, RackSetupPane, StatefulList, UpdatePane}; use crate::ui::defaults::colors::*; use crate::ui::defaults::style; use crate::ui::widgets::Fade; -use crate::{Action, Cmd, Frame, State, Term}; +use crate::{Action, Cmd, State, Term}; use ratatui::layout::{Alignment, Constraint, Direction, Layout, Rect}; use ratatui::style::{Modifier, Style}; use ratatui::text::{Line, Span}; use ratatui::widgets::{Block, BorderType, Borders, List, ListItem, Paragraph}; +use ratatui::Frame; use slog::{o, Logger}; use wicketd_client::types::GetLocationResponse; diff --git a/wicket/src/ui/panes/overview.rs b/wicket/src/ui/panes/overview.rs index e8cf50bb32..f2d4d4a7ab 100644 --- a/wicket/src/ui/panes/overview.rs +++ b/wicket/src/ui/panes/overview.rs @@ -16,11 +16,12 @@ use crate::ui::defaults::style; use crate::ui::widgets::IgnitionPopup; use crate::ui::widgets::{BoxConnector, BoxConnectorKind, Rack}; use crate::ui::wrap::wrap_text; -use crate::{Action, Cmd, Frame, State}; +use crate::{Action, Cmd, State}; use ratatui::layout::{Constraint, Direction, Layout, Rect}; use ratatui::style::Style; use ratatui::text::{Line, Span, Text}; use ratatui::widgets::{Block, BorderType, Borders, Paragraph}; +use ratatui::Frame; use wicketd_client::types::RotState; use wicketd_client::types::SpComponentCaboose; use wicketd_client::types::SpComponentInfo; diff --git a/wicket/src/ui/panes/rack_setup.rs b/wicket/src/ui/panes/rack_setup.rs index 086d01ce9d..ab85c63819 100644 --- a/wicket/src/ui/panes/rack_setup.rs +++ b/wicket/src/ui/panes/rack_setup.rs @@ -16,7 +16,6 @@ use crate::ui::widgets::PopupScrollOffset; use crate::Action; use crate::Cmd; use crate::Control; -use crate::Frame; use crate::State; use ratatui::layout::Constraint; use ratatui::layout::Direction; @@ -29,6 +28,7 @@ use ratatui::widgets::Block; use ratatui::widgets::BorderType; use ratatui::widgets::Borders; use ratatui::widgets::Paragraph; +use ratatui::Frame; use std::borrow::Cow; use wicketd_client::types::Baseboard; use wicketd_client::types::CurrentRssUserConfig; diff --git a/wicket/src/ui/panes/update.rs b/wicket/src/ui/panes/update.rs index d14b90dfab..be21984997 100644 --- a/wicket/src/ui/panes/update.rs +++ b/wicket/src/ui/panes/update.rs @@ -17,7 +17,7 @@ use crate::ui::widgets::{ PopupScrollOffset, StatusView, }; use crate::ui::wrap::wrap_text; -use crate::{Action, Cmd, Frame, State}; +use crate::{Action, Cmd, State}; use indexmap::IndexMap; use omicron_common::api::internal::nexus::KnownArtifactKind; use ratatui::layout::{Alignment, Constraint, Direction, Layout, Rect}; @@ -26,6 +26,7 @@ use ratatui::widgets::{ Block, BorderType, Borders, Cell, List, ListItem, ListState, Paragraph, Row, Table, }; +use ratatui::Frame; use slog::{info, o, Logger}; use tui_tree_widget::{Tree, TreeItem, TreeState}; use update_engine::{ @@ -148,8 +149,11 @@ pub struct UpdatePane { /// TODO: Move following state into global `State` so that recorder snapshots /// capture all state. - tree_state: TreeState, - items: Vec>, + /// + /// TODO: The generic parameter is carried over from earlier versions + /// of tui-tree-widget, but there's likely a better index type. + tree_state: TreeState, + items: Vec>, // Per-component update state that isn't serializable. component_state: BTreeMap, @@ -175,14 +179,20 @@ impl UpdatePane { pub fn new(log: &Logger) -> UpdatePane { let log = log.new(o!("component" => "UpdatePane")); let mut tree_state = TreeState::default(); - tree_state.select_first(); + let items = ALL_COMPONENT_IDS + .iter() + .enumerate() + .map(|(index, id)| { + TreeItem::new(index, id.to_string_uppercase(), vec![]) + .expect("no children so no duplicate identifiers") + }) + .collect::>(); + tree_state.select_first(&items); + UpdatePane { log, tree_state, - items: ALL_COMPONENT_IDS - .iter() - .map(|id| TreeItem::new(id.to_string_uppercase(), vec![])) - .collect(), + items, help: vec![ ("Expand", ""), ("Collapse", ""), @@ -826,7 +836,8 @@ impl UpdatePane { .update_state .items .iter() - .map(|(id, states)| { + .enumerate() + .map(|(index, (id, states))| { let children: Vec<_> = states .iter() .flat_map(|(component, s)| { @@ -834,9 +845,8 @@ impl UpdatePane { artifact_version(id, component, &versions); let installed_versions = all_installed_versions(id, component, inventory); - let contents_rect = self.contents_rect; installed_versions.into_iter().map(move |v| { - let spans = vec![ + vec![ Span::styled(v.title, style::selected()), Span::styled(v.version, style::selected_line()), Span::styled( @@ -844,17 +854,20 @@ impl UpdatePane { style::selected(), ), Span::styled(s.to_string(), s.style()), - ]; - TreeItem::new_leaf(align_by( - 0, - MAX_COLUMN_WIDTH, - contents_rect, - spans, - )) + ] }) }) + .enumerate() + .map(|(leaf_index, spans)| { + let contents_rect = self.contents_rect; + TreeItem::new_leaf( + leaf_index, + align_by(0, MAX_COLUMN_WIDTH, contents_rect, spans), + ) + }) .collect(); - TreeItem::new(id.to_string_uppercase(), children) + TreeItem::new(index, id.to_string_uppercase(), children) + .expect("tree does not contain duplicate identifiers") }) .collect(); } @@ -1365,6 +1378,7 @@ impl UpdatePane { // Draw the contents let tree = Tree::new(self.items.clone()) + .expect("tree does not have duplicate identifiers") .block(block.clone().borders(Borders::LEFT | Borders::RIGHT)) .style(style::plain_text()) .highlight_style(style::highlighted()); @@ -1421,12 +1435,11 @@ impl UpdatePane { Constraint::Length(cell_width), Constraint::Length(cell_width), ]; - let header_table = Table::new(std::iter::empty()) + let header_table = Table::new(std::iter::empty(), &width_constraints) .header( Row::new(vec!["COMPONENT", "VERSION", "TARGET", "STATUS"]) .style(header_style), ) - .widths(&width_constraints) .block(block.clone().title("OVERVIEW (* = active)")); frame.render_widget(header_table, self.table_headers_rect); @@ -1458,12 +1471,11 @@ impl UpdatePane { ]) }) }); - let version_table = - Table::new(version_rows).widths(&width_constraints).block( - block - .clone() - .borders(Borders::LEFT | Borders::RIGHT | Borders::BOTTOM), - ); + let version_table = Table::new(version_rows, &width_constraints).block( + block + .clone() + .borders(Borders::LEFT | Borders::RIGHT | Borders::BOTTOM), + ); frame.render_widget(version_table, self.status_view_version_rect); // Ensure the version table is connected to the table headers @@ -2413,7 +2425,7 @@ impl Control for UpdatePane { Some(Action::Redraw) } Cmd::GotoTop => { - self.tree_state.select_first(); + self.tree_state.select_first(&self.items); state.rack_state.selected = ALL_COMPONENT_IDS[0]; Some(Action::Redraw) } diff --git a/wicket/src/ui/splash.rs b/wicket/src/ui/splash.rs index cc8ab0bff8..9da9fa8648 100644 --- a/wicket/src/ui/splash.rs +++ b/wicket/src/ui/splash.rs @@ -10,9 +10,10 @@ use super::defaults::colors::*; use super::defaults::dimensions::RectExt; use super::defaults::style; use super::widgets::{Logo, LogoState, LOGO_HEIGHT, LOGO_WIDTH}; -use crate::{Cmd, Frame, Term}; +use crate::{Cmd, Term}; use ratatui::style::Style; use ratatui::widgets::Block; +use ratatui::Frame; const TOTAL_FRAMES: usize = 100; diff --git a/wicket/src/ui/widgets/fade.rs b/wicket/src/ui/widgets/fade.rs index d1669cd5b7..5462a4ecf2 100644 --- a/wicket/src/ui/widgets/fade.rs +++ b/wicket/src/ui/widgets/fade.rs @@ -9,15 +9,6 @@ pub struct Fade {} impl Widget for Fade { fn render(self, area: Rect, buf: &mut Buffer) { - for x in area.left()..area.right() { - for y in area.top()..area.bottom() { - buf.set_string( - x, - y, - buf.get(x, y).symbol.clone(), - style::faded_background(), - ); - } - } + buf.set_style(area, style::faded_background()); } } diff --git a/wicket/src/ui/widgets/status_view.rs b/wicket/src/ui/widgets/status_view.rs index 7418fed512..b9e981c9bc 100644 --- a/wicket/src/ui/widgets/status_view.rs +++ b/wicket/src/ui/widgets/status_view.rs @@ -6,10 +6,9 @@ use ratatui::{ layout::{Alignment, Rect}, text::Text, widgets::{Block, Borders, List, Paragraph, StatefulWidget, Widget}, + Frame, }; -use crate::Frame; - use super::{BoxConnector, BoxConnectorKind}; /// A displayer for the status view. diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index b6d61d9ea5..a15b972554 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -54,7 +54,8 @@ gateway-messages = { git = "https://github.com/oxidecomputer/management-gateway- generic-array = { version = "0.14.7", default-features = false, features = ["more_lengths", "zeroize"] } getrandom = { version = "0.2.10", default-features = false, features = ["js", "rdrand", "std"] } group = { version = "0.13.0", default-features = false, features = ["alloc"] } -hashbrown = { version = "0.13.2" } +hashbrown-582f2526e08bb6a0 = { package = "hashbrown", version = "0.14.2", features = ["raw"] } +hashbrown-594e8ee84c453af0 = { package = "hashbrown", version = "0.13.2" } hex = { version = "0.4.3", features = ["serde"] } hmac = { version = "0.12.1", default-features = false, features = ["reset"] } hyper = { version = "0.14.27", features = ["full"] } @@ -160,7 +161,8 @@ gateway-messages = { git = "https://github.com/oxidecomputer/management-gateway- generic-array = { version = "0.14.7", default-features = false, features = ["more_lengths", "zeroize"] } getrandom = { version = "0.2.10", default-features = false, features = ["js", "rdrand", "std"] } group = { version = "0.13.0", default-features = false, features = ["alloc"] } -hashbrown = { version = "0.13.2" } +hashbrown-582f2526e08bb6a0 = { package = "hashbrown", version = "0.14.2", features = ["raw"] } +hashbrown-594e8ee84c453af0 = { package = "hashbrown", version = "0.13.2" } hex = { version = "0.4.3", features = ["serde"] } hmac = { version = "0.12.1", default-features = false, features = ["reset"] } hyper = { version = "0.14.27", features = ["full"] } From e85af5b1e51bba3f8d0c8eea633307b90a2cb2a8 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Wed, 24 Jan 2024 08:37:19 -0800 Subject: [PATCH 21/49] Update Rust crate uuid to 1.7.0 (#4885) --- Cargo.lock | 4 ++-- Cargo.toml | 2 +- workspace-hack/Cargo.toml | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index f5b80f9262..2c58c7c33c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -9705,9 +9705,9 @@ checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" [[package]] name = "uuid" -version = "1.6.1" +version = "1.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e395fcf16a7a3d8127ec99782007af141946b4795001f876d54fb0d55978560" +checksum = "f00cc9702ca12d3c81455259621e676d0f7251cec66a21e98fe2e9a37db93b2a" dependencies = [ "getrandom 0.2.10", "serde", diff --git a/Cargo.toml b/Cargo.toml index d47b5727ad..f5e5a13e0c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -401,7 +401,7 @@ unicode-width = "0.1.11" update-common = { path = "update-common" } update-engine = { path = "update-engine" } usdt = "0.3" -uuid = { version = "1.6.1", features = ["serde", "v4"] } +uuid = { version = "1.7.0", features = ["serde", "v4"] } walkdir = "2.4" wicket = { path = "wicket" } wicket-common = { path = "wicket-common" } diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index a15b972554..33434766d3 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -114,7 +114,7 @@ trust-dns-proto = { version = "0.22.0" } unicode-bidi = { version = "0.3.13" } unicode-normalization = { version = "0.1.22" } usdt = { version = "0.3.5" } -uuid = { version = "1.6.1", features = ["serde", "v4"] } +uuid = { version = "1.7.0", features = ["serde", "v4"] } yasna = { version = "0.5.2", features = ["bit-vec", "num-bigint", "std", "time"] } zerocopy = { version = "0.7.31", features = ["derive", "simd"] } zeroize = { version = "1.7.0", features = ["std", "zeroize_derive"] } @@ -222,7 +222,7 @@ trust-dns-proto = { version = "0.22.0" } unicode-bidi = { version = "0.3.13" } unicode-normalization = { version = "0.1.22" } usdt = { version = "0.3.5" } -uuid = { version = "1.6.1", features = ["serde", "v4"] } +uuid = { version = "1.7.0", features = ["serde", "v4"] } yasna = { version = "0.5.2", features = ["bit-vec", "num-bigint", "std", "time"] } zerocopy = { version = "0.7.31", features = ["derive", "simd"] } zeroize = { version = "1.7.0", features = ["std", "zeroize_derive"] } From 7bd6ea60e504c288a63db79a0deb44944897e84c Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Wed, 24 Jan 2024 08:37:41 -0800 Subject: [PATCH 22/49] Update Rust crate serde_with to 3.5.1 (#4884) --- Cargo.lock | 8 ++++---- Cargo.toml | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 2c58c7c33c..bdb288be18 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -7693,9 +7693,9 @@ dependencies = [ [[package]] name = "serde_with" -version = "3.4.0" +version = "3.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64cd236ccc1b7a29e7e2739f27c0b2dd199804abc4290e32f59f3b68d6405c23" +checksum = "f5c9fdb6b00a489875b22efd4b78fe2b363b72265cc5f6eb2e2b9ee270e6140c" dependencies = [ "base64", "chrono", @@ -7710,9 +7710,9 @@ dependencies = [ [[package]] name = "serde_with_macros" -version = "3.4.0" +version = "3.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "93634eb5f75a2323b16de4748022ac4297f9e76b6dced2be287a099f41b5e788" +checksum = "dbff351eb4b33600a2e138dfa0b10b65a238ea8ff8fb2387c422c5022a3e8298" dependencies = [ "darling 0.20.3", "proc-macro2", diff --git a/Cargo.toml b/Cargo.toml index f5e5a13e0c..fc8afecc9c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -336,7 +336,7 @@ serde_json = "1.0.111" serde_path_to_error = "0.1.15" serde_tokenstream = "0.2" serde_urlencoded = "0.7.1" -serde_with = "3.4.0" +serde_with = "3.5.1" sha2 = "0.10.8" sha3 = "0.10.8" shell-words = "1.1.0" From c392c76f0cc8a0bdaca5cf7690c6d95dd071e7db Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Wed, 24 Jan 2024 08:38:34 -0800 Subject: [PATCH 23/49] Update Rust crate regex to 1.10.3 (#4883) --- Cargo.lock | 13 +++++++------ Cargo.toml | 2 +- workspace-hack/Cargo.toml | 10 ++++++---- 3 files changed, 14 insertions(+), 11 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index bdb288be18..2451404a1e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5182,6 +5182,7 @@ name = "omicron-workspace-hack" version = "0.1.0" dependencies = [ "ahash", + "aho-corasick", "anyhow", "base16ct", "bit-set", @@ -5254,7 +5255,7 @@ dependencies = [ "rand 0.8.5", "rand_chacha 0.3.1", "regex", - "regex-automata 0.4.3", + "regex-automata 0.4.4", "regex-syntax 0.8.2", "reqwest", "ring 0.17.7", @@ -6780,13 +6781,13 @@ dependencies = [ [[package]] name = "regex" -version = "1.10.2" +version = "1.10.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "380b951a9c5e80ddfd6136919eef32310721aa4aacd4889a8d39124b026ab343" +checksum = "b62dbe01f0b06f9d8dc7d49e05a0785f153b00b2c227856282f671e0318c9b15" dependencies = [ "aho-corasick", "memchr", - "regex-automata 0.4.3", + "regex-automata 0.4.4", "regex-syntax 0.8.2", ] @@ -6804,9 +6805,9 @@ checksum = "c2f401f4955220693b56f8ec66ee9c78abffd8d1c4f23dc41a23839eb88f0795" [[package]] name = "regex-automata" -version = "0.4.3" +version = "0.4.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f804c7828047e88b2d32e2d7fe5a105da8ee3264f01902f796c8e067dc2483f" +checksum = "3b7fa1134405e2ec9353fd416b17f8dacd46c473d7d3fd1cf202706a14eb792a" dependencies = [ "aho-corasick", "memchr", diff --git a/Cargo.toml b/Cargo.toml index fc8afecc9c..fb4327f575 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -315,7 +315,7 @@ rayon = "1.8" rcgen = "0.12.0" reedline = "0.28.0" ref-cast = "1.0" -regex = "1.10.2" +regex = "1.10.3" regress = "0.7.1" reqwest = { version = "0.11", default-features = false } ring = "0.17.7" diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index 33434766d3..cda4426c9b 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -15,6 +15,7 @@ publish = false ### BEGIN HAKARI SECTION [dependencies] ahash = { version = "0.8.6" } +aho-corasick = { version = "1.0.4" } anyhow = { version = "1.0.75", features = ["backtrace"] } base16ct = { version = "0.2.0", default-features = false, features = ["alloc"] } bit-set = { version = "0.5.3" } @@ -83,8 +84,8 @@ predicates = { version = "3.1.0" } proc-macro2 = { version = "1.0.74" } rand = { version = "0.8.5" } rand_chacha = { version = "0.3.1", default-features = false, features = ["std"] } -regex = { version = "1.10.2" } -regex-automata = { version = "0.4.3", default-features = false, features = ["dfa-onepass", "hybrid", "meta", "nfa-backtrack", "perf-inline", "perf-literal", "unicode"] } +regex = { version = "1.10.3" } +regex-automata = { version = "0.4.4", default-features = false, features = ["dfa-onepass", "hybrid", "meta", "nfa-backtrack", "perf-inline", "perf-literal", "unicode"] } regex-syntax = { version = "0.8.2" } reqwest = { version = "0.11.22", features = ["blocking", "json", "rustls-tls", "stream"] } ring = { version = "0.17.7", features = ["std"] } @@ -122,6 +123,7 @@ zip = { version = "0.6.6", default-features = false, features = ["bzip2", "defla [build-dependencies] ahash = { version = "0.8.6" } +aho-corasick = { version = "1.0.4" } anyhow = { version = "1.0.75", features = ["backtrace"] } base16ct = { version = "0.2.0", default-features = false, features = ["alloc"] } bit-set = { version = "0.5.3" } @@ -190,8 +192,8 @@ predicates = { version = "3.1.0" } proc-macro2 = { version = "1.0.74" } rand = { version = "0.8.5" } rand_chacha = { version = "0.3.1", default-features = false, features = ["std"] } -regex = { version = "1.10.2" } -regex-automata = { version = "0.4.3", default-features = false, features = ["dfa-onepass", "hybrid", "meta", "nfa-backtrack", "perf-inline", "perf-literal", "unicode"] } +regex = { version = "1.10.3" } +regex-automata = { version = "0.4.4", default-features = false, features = ["dfa-onepass", "hybrid", "meta", "nfa-backtrack", "perf-inline", "perf-literal", "unicode"] } regex-syntax = { version = "0.8.2" } reqwest = { version = "0.11.22", features = ["blocking", "json", "rustls-tls", "stream"] } ring = { version = "0.17.7", features = ["std"] } From 9ac047e54985d581a4e6b3f77077bd69d5ff20f5 Mon Sep 17 00:00:00 2001 From: Ryan Goodfellow Date: Wed, 24 Jan 2024 12:50:26 -0800 Subject: [PATCH 24/49] updates for tunnel routing (#3859) --- .github/buildomat/jobs/deploy.sh | 21 +++- Cargo.lock | 19 ++- Cargo.toml | 4 +- clients/ddm-admin-client/src/lib.rs | 18 ++- common/src/api/external/mod.rs | 2 +- docs/boundary-services-a-to-z.adoc | 117 ++---------------- docs/networking.adoc | 10 +- illumos-utils/src/opte/mod.rs | 20 --- illumos-utils/src/opte/port_manager.rs | 3 - nexus/src/app/rack.rs | 76 +----------- .../app/sagas/switch_port_settings_apply.rs | 84 +++++++++++++ .../app/sagas/switch_port_settings_clear.rs | 112 ++++++++++++++++- .../app/sagas/switch_port_settings_common.rs | 41 +----- package-manifest.toml | 12 +- sled-agent/src/bootstrap/early_networking.rs | 94 ++++++-------- smf/sled-agent/non-gimlet/config-rss.toml | 2 +- tools/ci_check_opte_ver.sh | 5 + tools/install_opte.sh | 10 ++ tools/maghemite_ddm_openapi_version | 4 +- tools/maghemite_mg_openapi_version | 4 +- tools/maghemite_mgd_checksums | 4 +- tools/opte_version | 2 +- tools/opte_version_override | 5 + 23 files changed, 342 insertions(+), 327 deletions(-) create mode 100644 tools/opte_version_override diff --git a/.github/buildomat/jobs/deploy.sh b/.github/buildomat/jobs/deploy.sh index f4f1e0a999..e69cfb0078 100755 --- a/.github/buildomat/jobs/deploy.sh +++ b/.github/buildomat/jobs/deploy.sh @@ -2,7 +2,7 @@ #: #: name = "helios / deploy" #: variety = "basic" -#: target = "lab-2.0-opte-0.27" +#: target = "lab-2.0-opte-0.28" #: output_rules = [ #: "%/var/svc/log/oxide-sled-agent:default.log*", #: "%/pool/ext/*/crypt/zone/oxz_*/root/var/svc/log/oxide-*.log*", @@ -33,6 +33,9 @@ _exit_trap() { local status=$? [[ $status -eq 0 ]] && exit 0 + # XXX paranoia + pfexec cp /tmp/opteadm /opt/oxide/opte/bin/opteadm + set +o errexit set -o xtrace banner evidence @@ -50,6 +53,7 @@ _exit_trap() { standalone \ dump-state pfexec /opt/oxide/opte/bin/opteadm list-ports + pfexec /opt/oxide/opte/bin/opteadm dump-v2b z_swadm link ls z_swadm addr list z_swadm route list @@ -97,6 +101,19 @@ z_swadm () { pfexec zlogin oxz_switch /opt/oxide/dendrite/bin/swadm $@ } +# XXX remove. This is just to test against a development branch of OPTE in CI. +set +x +OPTE_COMMIT="73d4669ea213d0b7aca35c4babb6fd09ed51d29e" +curl -sSfOL https://buildomat.eng.oxide.computer/public/file/oxidecomputer/opte/module/$OPTE_COMMIT/xde +pfexec rem_drv xde || true +pfexec mv xde /kernel/drv/amd64/xde +pfexec add_drv xde || true +curl -sSfOL https://buildomat.eng.oxide.computer/wg/0/artefact/01HM09S4M15WNXB2B2MX8R1GBT/yLalJU5vT4S4IEpwSeY4hPuspxw3JcINokZmlfNU14npHkzG/01HM09SJ2RQSFGW7MVKC9JKZ8D/01HM0A58D888AJ7YP6N1Q6T6ZD/opteadm +chmod +x opteadm +cp opteadm /tmp/opteadm +pfexec mv opteadm /opt/oxide/opte/bin/opteadm +set -x + # # XXX work around 14537 (UFS should not allow directories to be unlinked) which # is probably not yet fixed in xde branch? Once the xde branch merges from @@ -236,7 +253,7 @@ infra_ip_last = \"$UPLINK_IP\" /^routes/c\\ routes = \\[{nexthop = \"$GATEWAY_IP\", destination = \"0.0.0.0/0\"}\\] /^addresses/c\\ -addresses = \\[\"$UPLINK_IP/32\"\\] +addresses = \\[\"$UPLINK_IP/24\"\\] } " pkg/config-rss.toml diff -u pkg/config-rss.toml{~,} || true diff --git a/Cargo.lock b/Cargo.lock index 2451404a1e..0c3eb15179 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3257,7 +3257,7 @@ dependencies = [ [[package]] name = "illumos-sys-hdrs" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=dd2b7b0306d3f01fa09170b8884d402209e49244#dd2b7b0306d3f01fa09170b8884d402209e49244" +source = "git+https://github.com/oxidecomputer/opte?rev=1d29ef60a18179babfb44f0f7a3c2fe71034a2c1#1d29ef60a18179babfb44f0f7a3c2fe71034a2c1" [[package]] name = "illumos-utils" @@ -3657,7 +3657,7 @@ dependencies = [ [[package]] name = "kstat-macro" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=dd2b7b0306d3f01fa09170b8884d402209e49244#dd2b7b0306d3f01fa09170b8884d402209e49244" +source = "git+https://github.com/oxidecomputer/opte?rev=1d29ef60a18179babfb44f0f7a3c2fe71034a2c1#1d29ef60a18179babfb44f0f7a3c2fe71034a2c1" dependencies = [ "quote", "syn 2.0.46", @@ -5411,7 +5411,7 @@ dependencies = [ [[package]] name = "opte" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=dd2b7b0306d3f01fa09170b8884d402209e49244#dd2b7b0306d3f01fa09170b8884d402209e49244" +source = "git+https://github.com/oxidecomputer/opte?rev=1d29ef60a18179babfb44f0f7a3c2fe71034a2c1#1d29ef60a18179babfb44f0f7a3c2fe71034a2c1" dependencies = [ "cfg-if", "dyn-clone", @@ -5427,7 +5427,7 @@ dependencies = [ [[package]] name = "opte-api" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=dd2b7b0306d3f01fa09170b8884d402209e49244#dd2b7b0306d3f01fa09170b8884d402209e49244" +source = "git+https://github.com/oxidecomputer/opte?rev=1d29ef60a18179babfb44f0f7a3c2fe71034a2c1#1d29ef60a18179babfb44f0f7a3c2fe71034a2c1" dependencies = [ "illumos-sys-hdrs", "ipnetwork", @@ -5439,7 +5439,7 @@ dependencies = [ [[package]] name = "opte-ioctl" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=dd2b7b0306d3f01fa09170b8884d402209e49244#dd2b7b0306d3f01fa09170b8884d402209e49244" +source = "git+https://github.com/oxidecomputer/opte?rev=1d29ef60a18179babfb44f0f7a3c2fe71034a2c1#1d29ef60a18179babfb44f0f7a3c2fe71034a2c1" dependencies = [ "libc", "libnet", @@ -5513,10 +5513,12 @@ dependencies = [ [[package]] name = "oxide-vpc" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/opte?rev=dd2b7b0306d3f01fa09170b8884d402209e49244#dd2b7b0306d3f01fa09170b8884d402209e49244" +source = "git+https://github.com/oxidecomputer/opte?rev=1d29ef60a18179babfb44f0f7a3c2fe71034a2c1#1d29ef60a18179babfb44f0f7a3c2fe71034a2c1" dependencies = [ + "cfg-if", "illumos-sys-hdrs", "opte", + "poptrie", "serde", "smoltcp 0.11.0", "zerocopy 0.7.31", @@ -6153,6 +6155,11 @@ dependencies = [ "universal-hash", ] +[[package]] +name = "poptrie" +version = "0.1.0" +source = "git+https://github.com/oxidecomputer/poptrie?branch=multipath#ca52bef3f87ff1a67d81b3c6e601dcb5fdbcc165" + [[package]] name = "portable-atomic" version = "1.4.3" diff --git a/Cargo.toml b/Cargo.toml index fb4327f575..093e972b42 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -270,7 +270,7 @@ omicron-sled-agent = { path = "sled-agent" } omicron-test-utils = { path = "test-utils" } omicron-zone-package = "0.10.1" oxide-client = { path = "clients/oxide-client" } -oxide-vpc = { git = "https://github.com/oxidecomputer/opte", rev = "dd2b7b0306d3f01fa09170b8884d402209e49244", features = [ "api", "std" ] } +oxide-vpc = { git = "https://github.com/oxidecomputer/opte", rev = "1d29ef60a18179babfb44f0f7a3c2fe71034a2c1", features = [ "api", "std" ] } once_cell = "1.19.0" openapi-lint = { git = "https://github.com/oxidecomputer/openapi-lint", branch = "main" } openapiv3 = "2.0.0" @@ -278,7 +278,7 @@ openapiv3 = "2.0.0" openssl = "0.10" openssl-sys = "0.9" openssl-probe = "0.1.5" -opte-ioctl = { git = "https://github.com/oxidecomputer/opte", rev = "dd2b7b0306d3f01fa09170b8884d402209e49244" } +opte-ioctl = { git = "https://github.com/oxidecomputer/opte", rev = "1d29ef60a18179babfb44f0f7a3c2fe71034a2c1" } oso = "0.27" owo-colors = "3.5.0" oximeter = { path = "oximeter/oximeter" } diff --git a/clients/ddm-admin-client/src/lib.rs b/clients/ddm-admin-client/src/lib.rs index 93248c73a1..c32345d1dc 100644 --- a/clients/ddm-admin-client/src/lib.rs +++ b/clients/ddm-admin-client/src/lib.rs @@ -20,7 +20,7 @@ pub use inner::types; pub use inner::Error; use either::Either; -use inner::types::Ipv6Prefix; +use inner::types::{Ipv6Prefix, TunnelOrigin}; use inner::Client as InnerClient; use omicron_common::address::Ipv6Subnet; use omicron_common::address::SLED_PREFIX; @@ -108,6 +108,22 @@ impl Client { }); } + pub fn advertise_tunnel_endpoint(&self, endpoint: TunnelOrigin) { + let me = self.clone(); + tokio::spawn(async move { + retry_notify(retry_policy_internal_service_aggressive(), || async { + me.inner.advertise_tunnel_endpoints(&vec![endpoint.clone()]).await?; + Ok(()) + }, |err, duration| { + info!( + me.log, + "Failed to notify ddmd of tunnel endpoint (retry in {duration:?}"; + "err" => %err, + ); + }).await.unwrap(); + }); + } + /// Returns the addresses of connected sleds. /// /// Note: These sleds have not yet been verified. diff --git a/common/src/api/external/mod.rs b/common/src/api/external/mod.rs index 68fcb0f9fa..a8aff00afa 100644 --- a/common/src/api/external/mod.rs +++ b/common/src/api/external/mod.rs @@ -3253,7 +3253,7 @@ mod test { let net_des = serde_json::from_str::(&ser).unwrap(); assert_eq!(net, net_des); - let net_str = "fd00:99::1/64"; + let net_str = "fd00:47::1/64"; let net = IpNet::from_str(net_str).unwrap(); let ser = serde_json::to_string(&net).unwrap(); diff --git a/docs/boundary-services-a-to-z.adoc b/docs/boundary-services-a-to-z.adoc index 6f4f2fcea6..e4c47ac7f9 100644 --- a/docs/boundary-services-a-to-z.adoc +++ b/docs/boundary-services-a-to-z.adoc @@ -1,115 +1,18 @@ = Boundary Services A-Z -NOTE: The instructions for _deploying_ SoftNPU with Omicron have been folded into xref:how-to-run.adoc[the main how-to-run docs]. +NOTE: The instructions for _deploying_ SoftNPU with Omicron have been folded +into xref:how-to-run.adoc[the main how-to-run docs]. -The virtual hardware making up SoftNPU is a bit different than what was previously used. What we now have looks like this. +The virtual hardware making up SoftNPU is depicted in the diagram below. image::plumbing.png[] -The `softnpu` zone will be configured and launched during the `create_virtual_hardware.sh` script. +The `softnpu` zone will be configured and launched during the +`create_virtual_hardware.sh` script. Once the control plane is running, `softnpu` can be configured via `dendrite` -using the `swadm` binary located in the `oxz_switch` zone. -An example script is provided in `tools/scrimlet/softnpu-init.sh`. -This script should work without modification for basic development setups, -but feel free to tweak it as needed. - ----- -$ ./tools/scrimlet/softnpu-init.sh -++ netstat -rn -f inet -++ grep default -++ awk -F ' ' '{print $2}' -+ GATEWAY_IP=10.85.0.1 -+ echo 'Using 10.85.0.1 as gateway ip' -Using 10.85.0.1 as gateway ip -++ arp 10.85.0.1 -++ awk -F ' ' '{print $4}' -+ GATEWAY_MAC=68:d7:9a:1f:77:a1 -+ echo 'Using 68:d7:9a:1f:77:a1 as gateway mac' -Using 68:d7:9a:1f:77:a1 as gateway mac -+ z_swadm link create rear0 --speed 100G --fec RS -+ pfexec zlogin oxz_switch /opt/oxide/dendrite/bin/swadm link create rear0 --speed 100G --fec RS -+ z_swadm link create qsfp0 --speed 100G --fec RS -+ pfexec zlogin oxz_switch /opt/oxide/dendrite/bin/swadm link create qsfp0 --speed 100G --fec RS -+ z_swadm addr add rear0/0 fe80::aae1:deff:fe01:701c -+ pfexec zlogin oxz_switch /opt/oxide/dendrite/bin/swadm addr add rear0/0 fe80::aae1:deff:fe01:701c -+ z_swadm addr add qsfp0/0 fe80::aae1:deff:fe01:701d -+ pfexec zlogin oxz_switch /opt/oxide/dendrite/bin/swadm addr add qsfp0/0 fe80::aae1:deff:fe01:701d -+ z_swadm addr add rear0/0 fd00:99::1 -+ pfexec zlogin oxz_switch /opt/oxide/dendrite/bin/swadm addr add rear0/0 fd00:99::1 -+ z_swadm route add fd00:1122:3344:0101::/64 rear0/0 fe80::aae1:deff:fe00:1 -+ pfexec zlogin oxz_switch /opt/oxide/dendrite/bin/swadm route add fd00:1122:3344:0101::/64 rear0/0 fe80::aae1:deff:fe00:1 -+ z_swadm arp add fe80::aae1:deff:fe00:1 a8:e1:de:00:00:01 -+ pfexec zlogin oxz_switch /opt/oxide/dendrite/bin/swadm arp add fe80::aae1:deff:fe00:1 a8:e1:de:00:00:01 -+ z_swadm arp add 10.85.0.1 68:d7:9a:1f:77:a1 -+ pfexec zlogin oxz_switch /opt/oxide/dendrite/bin/swadm arp add 10.85.0.1 68:d7:9a:1f:77:a1 -+ z_swadm route add 0.0.0.0/0 qsfp0/0 10.85.0.1 -+ pfexec zlogin oxz_switch /opt/oxide/dendrite/bin/swadm route add 0.0.0.0/0 qsfp0/0 10.85.0.1 -+ z_swadm link ls -+ pfexec zlogin oxz_switch /opt/oxide/dendrite/bin/swadm link ls -Port/Link Media Speed FEC Enabled Link MAC -rear0/0 Copper 100G RS true Up a8:40:25:46:55:e3 -qsfp0/0 Copper 100G RS true Up a8:40:25:46:55:e4 -+ z_swadm addr list -+ pfexec zlogin oxz_switch /opt/oxide/dendrite/bin/swadm addr list -Link IPv4 IPv6 -rear0/0 fe80::aae1:deff:fe01:701c - fd00:99::1 -qsfp0/0 fe80::aae1:deff:fe01:701d -+ z_swadm route list -+ pfexec zlogin oxz_switch /opt/oxide/dendrite/bin/swadm route list -Subnet Port Link Gateway -0.0.0.0/0 qsfp0 0 10.85.0.1 -fd00:1122:3344:101::/64 rear0 0 fe80::aae1:deff:fe00:1 -+ z_swadm arp list -+ pfexec zlogin oxz_switch /opt/oxide/dendrite/bin/swadm arp list -host mac age -10.85.0.1 68:d7:9a:1f:77:a1 0s -fe80::aae1:deff:fe00:1 a8:e1:de:00:00:01 0s ----- - -While following -https://github.com/oxidecomputer/omicron/blob/main/docs/how-to-run.adoc[how-to-run.adoc] -to set up IPs, images, disks, instances etc, pay particular attention to the -following. - -- The address range in the IP pool should be on a subnet in your local network that - can NAT out to the Internet. -- Be sure to set up an external IP for the instance you create. - -You will need to set up `proxy-arp` if your VM external IP addresses are on the -same L2 network as the router or other non-oxide hosts: ----- -pfexec /opt/oxide/softnpu/stuff/scadm \ - --server /opt/oxide/softnpu/stuff/server \ - --client /opt/oxide/softnpu/stuff/client \ - standalone \ - add-proxy-arp \ - $ip_pool_start \ - $ip_pool_end \ - $softnpu_mac ----- - -By the end, we have an instance up and running with external connectivity -configured via boundary services: ----- -ry@korgano:~/omicron$ ~/propolis/target/release/propolis-cli --server fd00:1122:3344:101::c serial - -debian login: root -Linux debian 5.10.0-9-amd64 #1 SMP Debian 5.10.70-1 (2021-09-30) x86_64 - -The programs included with the Debian GNU/Linux system are free software; -the exact distribution terms for each program are described in the -individual files in /usr/share/doc/*/copyright. - -Debian GNU/Linux comes with ABSOLUTELY NO WARRANTY, to the extent -permitted by applicable law. -root@debian:~# host oxide.computer -oxide.computer has address 76.76.21.61 -oxide.computer has address 76.76.21.22 -oxide.computer mail is handled by 5 alt2.aspmx.l.google.com. -oxide.computer mail is handled by 1 aspmx.l.google.com. -oxide.computer mail is handled by 10 aspmx3.googlemail.com. -oxide.computer mail is handled by 5 alt1.aspmx.l.google.com. -oxide.computer mail is handled by 10 aspmx2.googlemail.com. ----- +using the `swadm` binary located in the `oxz_switch` zone. This is not necessary +under normal operation, as the switch state will be managed automatically by the +control plane and networking daemons. An example script is provided in +`tools/scrimlet/softnpu-init.sh`. This script should work without modification +for basic development setups, but feel free to tweak it as needed. diff --git a/docs/networking.adoc b/docs/networking.adoc index 2ebad97842..84c95832c0 100644 --- a/docs/networking.adoc +++ b/docs/networking.adoc @@ -669,13 +669,13 @@ fdb0:a840:2504:352::/64 fe80::aa40:25ff:fe05:c UG 2 640 cxgbe0 fd00:1122:3344:1::/64 fe80::aa40:25ff:fe05:c UG 2 2401 cxgbe0 fd00:1122:3344:1::/64 fe80::aa40:25ff:fe05:40c UG 2 51 cxgbe1 fdb0:a840:2504:352::/64 fe80::aa40:25ff:fe05:40c UG 2 11090 cxgbe1 -fd00:99::/64 fe80::aa40:25ff:fe05:c UG 1 0 cxgbe0 +fdb2:ceeb:3ab7:8c9d::1/64 fe80::aa40:25ff:fe05:c UG 1 0 cxgbe0 fdb0:a840:2504:1d1::/64 fe80::aa40:25ff:fe05:c UG 1 0 cxgbe0 fdb0:a840:2504:393::/64 fe80::aa40:25ff:fe05:c UG 1 0 cxgbe0 fdb0:a840:2504:191::/64 fe80::aa40:25ff:fe05:c UG 1 0 cxgbe0 fdb0:a840:2504:353::/64 fe80::aa40:25ff:fe05:c UG 1 0 cxgbe0 fd00:1122:3344:101::/64 fe80::aa40:25ff:fe05:c UG 2 634578 cxgbe0 -fd00:99::/64 fe80::aa40:25ff:fe05:40c UG 1 0 cxgbe1 +fd96:354:c1dc:606d::1/64 fe80::aa40:25ff:fe05:40c UG 1 0 cxgbe1 fd00:1122:3344:101::/64 fe80::aa40:25ff:fe05:40c UG 2 14094545 cxgbe1 fdb0:a840:2504:1d1::/64 fe80::aa40:25ff:fe05:40c UG 1 0 cxgbe1 fdb0:a840:2504:353::/64 fe80::aa40:25ff:fe05:40c UG 1 0 cxgbe1 @@ -733,7 +733,11 @@ fd00:1122:3344:3::/64 fe80::aa40:25ff:fe05:c UG 2 2437 cxgbe0 Recall that cxgbe0 and cxgbe1 are connected to separate switches in the rack. So we're seeing the prefixes for the other sleds in this deployment. We have two routes to reach each sled: one through each switch. The gateway is the link-local address _of each switch_ on the corresponding link. One notable exception: the route for this same sled (`fd00:1122:3344:104::/64`) points to `underlay0`, the GZ's VNIC on the sled's underlay network. In this way, traffic leaving the GZ (whether it originated in this GZ or arrived from one of the switches) is directed to the sled's underlay network etherstub and from there to the right zone VNIC. -(Questions: Why does 107 only have one route? What are the `fd00:99::` routes?) +(Questions: Why does 107 only have one route?) + +The `fdb2:ceeb:3ab7:8c9d::1/64` and `fd96:354:c1dc:606d::1/64` routes are +randomly generated boundary services tunnel endpoint addresses. See RFD 404 for +more details. There are similar routes for other sleds' prefixes on the bootstrap network. diff --git a/illumos-utils/src/opte/mod.rs b/illumos-utils/src/opte/mod.rs index 710e783181..d06b6b26e5 100644 --- a/illumos-utils/src/opte/mod.rs +++ b/illumos-utils/src/opte/mod.rs @@ -29,26 +29,6 @@ pub use oxide_vpc::api::DhcpCfg; pub use oxide_vpc::api::Vni; use std::net::IpAddr; -fn default_boundary_services() -> BoundaryServices { - use oxide_vpc::api::Ipv6Addr; - use oxide_vpc::api::MacAddr; - // TODO-completeness: Don't hardcode any of these values. - // - // Boundary Services will be started on several Sidecars during rack - // setup, and those addresses and VNIs will need to be propagated here. - // See https://github.com/oxidecomputer/omicron/issues/1382 - let ip = Ipv6Addr::from([0xfd00, 0x99, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01]); - - // This MAC address is entirely irrelevant to the functionality of OPTE and - // the Oxide VPC. It's never used to actually forward packets. It only - // represents the "logical" destination of Boundary Services as a - // destination that OPTE as a virtual gateway forwards packets to as its - // next hop. - let mac = MacAddr::from_const([0xa8, 0x40, 0x25, 0xf9, 0x99, 0x99]); - let vni = Vni::new(99_u32).unwrap(); - BoundaryServices { ip, mac, vni } -} - /// Information about the gateway for an OPTE port #[derive(Debug, Clone, Copy)] #[allow(dead_code)] diff --git a/illumos-utils/src/opte/port_manager.rs b/illumos-utils/src/opte/port_manager.rs index 3558ef1c78..c472996598 100644 --- a/illumos-utils/src/opte/port_manager.rs +++ b/illumos-utils/src/opte/port_manager.rs @@ -4,7 +4,6 @@ //! Manager for all OPTE ports on a Helios system -use crate::opte::default_boundary_services; use crate::opte::opte_firewall_rules; use crate::opte::params::DeleteVirtualNetworkInterfaceHost; use crate::opte::params::SetVirtualNetworkInterfaceHost; @@ -110,7 +109,6 @@ impl PortManager { let subnet = IpNetwork::from(nic.subnet); let vpc_subnet = IpCidr::from(subnet); let gateway = Gateway::from_subnet(&subnet); - let boundary_services = default_boundary_services(); // Describe the external IP addresses for this port. macro_rules! ip_cfg { @@ -219,7 +217,6 @@ impl PortManager { gateway_mac: MacAddr::from(gateway.mac.into_array()), vni, phys_ip: self.inner.underlay_ip.into(), - boundary_services, }; // Create the xde device. diff --git a/nexus/src/app/rack.rs b/nexus/src/app/rack.rs index 23ee39415f..17e7a17444 100644 --- a/nexus/src/app/rack.rs +++ b/nexus/src/app/rack.rs @@ -33,7 +33,7 @@ use nexus_types::external_api::params::RouteConfig; use nexus_types::external_api::params::SwitchPortConfigCreate; use nexus_types::external_api::params::UninitializedSledId; use nexus_types::external_api::params::{ - AddressLotCreate, BgpPeerConfig, LoopbackAddressCreate, Route, SiloCreate, + AddressLotCreate, BgpPeerConfig, Route, SiloCreate, SwitchPortSettingsCreate, }; use nexus_types::external_api::shared::Baseboard; @@ -375,24 +375,7 @@ impl super::Nexus { let ipv4_block = AddressLotBlockCreate { first_address, last_address }; - let first_address = - IpAddr::from_str("fd00:99::1").map_err(|e| { - Error::internal_error(&format!( - "failed to parse `fd00:99::1` as `IpAddr`: {e}" - )) - })?; - - let last_address = - IpAddr::from_str("fd00:99::ffff").map_err(|e| { - Error::internal_error(&format!( - "failed to parse `fd00:99::ffff` as `IpAddr`: {e}" - )) - })?; - - let ipv6_block = - AddressLotBlockCreate { first_address, last_address }; - - let blocks = vec![ipv4_block, ipv6_block]; + let blocks = vec![ipv4_block]; let address_lot_params = AddressLotCreate { identity, kind, blocks }; @@ -412,24 +395,6 @@ impl super::Nexus { }, }?; - let address_lot_lookup = self - .address_lot_lookup( - &opctx, - NameOrId::Name(address_lot_name.clone()), - ) - .map_err(|e| { - Error::internal_error(&format!( - "unable to lookup infra address_lot: {e}" - )) - })?; - - let (.., authz_address_lot) = address_lot_lookup - .lookup_for(authz::Action::Modify) - .await - .map_err(|e| { - Error::internal_error(&format!("unable to retrieve authz_address_lot for infra address_lot: {e}")) - })?; - let mut bgp_configs = HashMap::new(); for bgp_config in &rack_network_config.bgp { @@ -542,43 +507,6 @@ impl super::Nexus { )) })?; - // TODO: #3603 Use separate address lots for loopback addresses and infra ips - let loopback_address_params = LoopbackAddressCreate { - address_lot: NameOrId::Name(address_lot_name.clone()), - rack_id, - switch_location: switch_location.clone(), - address: first_address, - mask: 64, - anycast: true, - }; - - if self - .loopback_address_lookup( - &opctx, - rack_id, - switch_location.clone().into(), - ipnetwork::IpNetwork::new( - loopback_address_params.address, - loopback_address_params.mask, - ) - .map_err(|_| { - Error::invalid_request("invalid loopback address") - })? - .into(), - )? - .lookup_for(authz::Action::Read) - .await - .is_err() - { - self.db_datastore - .loopback_address_create( - opctx, - &loopback_address_params, - None, - &authz_address_lot, - ) - .await?; - } let uplink_name = format!("default-uplink{idx}"); let name = Name::from_str(&uplink_name).unwrap(); diff --git a/nexus/src/app/sagas/switch_port_settings_apply.rs b/nexus/src/app/sagas/switch_port_settings_apply.rs index 0d6bb52421..979ec54afd 100644 --- a/nexus/src/app/sagas/switch_port_settings_apply.rs +++ b/nexus/src/app/sagas/switch_port_settings_apply.rs @@ -15,6 +15,10 @@ use crate::app::sagas::{ use anyhow::Error; use db::datastore::SwitchPortSettingsCombinedResult; use dpd_client::types::PortId; +use mg_admin_client::types::{ + AddStaticRoute4Request, DeleteStaticRoute4Request, Prefix4, StaticRoute4, + StaticRoute4List, +}; use nexus_db_model::NETWORK_KEY; use nexus_db_queries::db::datastore::UpdatePrecondition; use nexus_db_queries::{authn, db}; @@ -52,6 +56,10 @@ declare_saga_actions! { + spa_ensure_switch_port_settings - spa_undo_ensure_switch_port_settings } + ENSURE_SWITCH_ROUTES -> "ensure_switch_routes" { + + spa_ensure_switch_routes + - spa_undo_ensure_switch_routes + } ENSURE_SWITCH_PORT_UPLINK -> "ensure_switch_port_uplink" { + spa_ensure_switch_port_uplink - spa_undo_ensure_switch_port_uplink @@ -210,6 +218,82 @@ async fn spa_ensure_switch_port_settings( Ok(()) } +async fn spa_ensure_switch_routes( + sagactx: NexusActionContext, +) -> Result<(), ActionError> { + let params = sagactx.saga_params::()?; + let opctx = crate::context::op_context_for_saga_action( + &sagactx, + ¶ms.serialized_authn, + ); + + let settings = sagactx + .lookup::("switch_port_settings")?; + + let mut rq = AddStaticRoute4Request { + routes: StaticRoute4List { list: Vec::new() }, + }; + for r in settings.routes { + let nexthop = match r.gw.ip() { + IpAddr::V4(v4) => v4, + IpAddr::V6(_) => continue, + }; + let prefix = match r.gw.ip() { + IpAddr::V4(v4) => Prefix4 { value: v4, length: r.gw.prefix() }, + IpAddr::V6(_) => continue, + }; + let sr = StaticRoute4 { nexthop, prefix }; + rq.routes.list.push(sr); + } + + let mg_client: Arc = + select_mg_client(&sagactx, &opctx, params.switch_port_id).await?; + + mg_client.inner.static_add_v4_route(&rq).await.map_err(|e| { + ActionError::action_failed(format!("mgd static route add {e}")) + })?; + + Ok(()) +} + +async fn spa_undo_ensure_switch_routes( + sagactx: NexusActionContext, +) -> Result<(), Error> { + let params = sagactx.saga_params::()?; + let opctx = crate::context::op_context_for_saga_action( + &sagactx, + ¶ms.serialized_authn, + ); + let settings = sagactx + .lookup::("switch_port_settings")?; + + let mut rq = DeleteStaticRoute4Request { + routes: StaticRoute4List { list: Vec::new() }, + }; + + for r in settings.routes { + let nexthop = match r.gw.ip() { + IpAddr::V4(v4) => v4, + IpAddr::V6(_) => continue, + }; + let prefix = match r.gw.ip() { + IpAddr::V4(v4) => Prefix4 { value: v4, length: r.gw.prefix() }, + IpAddr::V6(_) => continue, + }; + let sr = StaticRoute4 { nexthop, prefix }; + rq.routes.list.push(sr); + } + + let mg_client: Arc = + select_mg_client(&sagactx, &opctx, params.switch_port_id).await?; + + mg_client.inner.static_remove_v4_route(&rq).await.map_err(|e| { + ActionError::action_failed(format!("mgd static route remove {e}")) + })?; + + Ok(()) +} + async fn spa_undo_ensure_switch_port_settings( sagactx: NexusActionContext, ) -> Result<(), Error> { diff --git a/nexus/src/app/sagas/switch_port_settings_clear.rs b/nexus/src/app/sagas/switch_port_settings_clear.rs index 0d876f8159..ff79de8e8e 100644 --- a/nexus/src/app/sagas/switch_port_settings_clear.rs +++ b/nexus/src/app/sagas/switch_port_settings_clear.rs @@ -15,12 +15,16 @@ use crate::app::sagas::{ }; use anyhow::Error; use dpd_client::types::PortId; -use mg_admin_client::types::DeleteNeighborRequest; +use mg_admin_client::types::{ + AddStaticRoute4Request, DeleteNeighborRequest, DeleteStaticRoute4Request, + Prefix4, StaticRoute4, StaticRoute4List, +}; use nexus_db_model::NETWORK_KEY; use nexus_db_queries::authn; use nexus_db_queries::db::datastore::UpdatePrecondition; use omicron_common::api::external::{self, NameOrId, SwitchLocation}; use serde::{Deserialize, Serialize}; +use std::net::IpAddr; use std::str::FromStr; use std::sync::Arc; use steno::ActionError; @@ -43,6 +47,10 @@ declare_saga_actions! { + spa_clear_switch_port_settings - spa_undo_clear_switch_port_settings } + CLEAR_SWITCH_PORT_ROUTES -> "clear_switch_port_routes" { + + spa_clear_switch_port_routes + - spa_undo_clear_switch_port_routes + } CLEAR_SWITCH_PORT_UPLINK -> "clear_switch_port_uplink" { + spa_clear_switch_port_uplink - spa_undo_clear_switch_port_uplink @@ -351,6 +359,108 @@ async fn spa_undo_clear_switch_port_bgp_settings( .await?) } +async fn spa_clear_switch_port_routes( + sagactx: NexusActionContext, +) -> Result<(), ActionError> { + let osagactx = sagactx.user_data(); + let params = sagactx.saga_params::()?; + let nexus = osagactx.nexus(); + let opctx = crate::context::op_context_for_saga_action( + &sagactx, + ¶ms.serialized_authn, + ); + + let orig_port_settings_id = + sagactx.lookup::>("original_switch_port_settings_id")?; + + let id = match orig_port_settings_id { + Some(id) => id, + None => return Ok(()), + }; + + let settings = nexus + .switch_port_settings_get(&opctx, &NameOrId::Id(id)) + .await + .map_err(ActionError::action_failed)?; + + let mut rq = DeleteStaticRoute4Request { + routes: StaticRoute4List { list: Vec::new() }, + }; + + for r in settings.routes { + let nexthop = match r.gw.ip() { + IpAddr::V4(v4) => v4, + IpAddr::V6(_) => continue, + }; + let prefix = match r.gw.ip() { + IpAddr::V4(v4) => Prefix4 { value: v4, length: r.gw.prefix() }, + IpAddr::V6(_) => continue, + }; + let sr = StaticRoute4 { nexthop, prefix }; + rq.routes.list.push(sr); + } + + let mg_client: Arc = + select_mg_client(&sagactx, &opctx, params.switch_port_id).await?; + + mg_client.inner.static_remove_v4_route(&rq).await.map_err(|e| { + ActionError::action_failed(format!("mgd static route remove {e}")) + })?; + + Ok(()) +} + +async fn spa_undo_clear_switch_port_routes( + sagactx: NexusActionContext, +) -> Result<(), Error> { + let osagactx = sagactx.user_data(); + let params = sagactx.saga_params::()?; + let nexus = osagactx.nexus(); + let opctx = crate::context::op_context_for_saga_action( + &sagactx, + ¶ms.serialized_authn, + ); + + let orig_port_settings_id = + sagactx.lookup::>("original_switch_port_settings_id")?; + + let id = match orig_port_settings_id { + Some(id) => id, + None => return Ok(()), + }; + + let settings = nexus + .switch_port_settings_get(&opctx, &NameOrId::Id(id)) + .await + .map_err(ActionError::action_failed)?; + + let mut rq = AddStaticRoute4Request { + routes: StaticRoute4List { list: Vec::new() }, + }; + + for r in settings.routes { + let nexthop = match r.gw.ip() { + IpAddr::V4(v4) => v4, + IpAddr::V6(_) => continue, + }; + let prefix = match r.gw.ip() { + IpAddr::V4(v4) => Prefix4 { value: v4, length: r.gw.prefix() }, + IpAddr::V6(_) => continue, + }; + let sr = StaticRoute4 { nexthop, prefix }; + rq.routes.list.push(sr); + } + + let mg_client: Arc = + select_mg_client(&sagactx, &opctx, params.switch_port_id).await?; + + mg_client.inner.static_add_v4_route(&rq).await.map_err(|e| { + ActionError::action_failed(format!("mgd static route remove {e}")) + })?; + + Ok(()) +} + async fn spa_clear_switch_port_bootstore_network_settings( sagactx: NexusActionContext, ) -> Result<(), ActionError> { diff --git a/nexus/src/app/sagas/switch_port_settings_common.rs b/nexus/src/app/sagas/switch_port_settings_common.rs index 9ef23ebf44..9c710d837d 100644 --- a/nexus/src/app/sagas/switch_port_settings_common.rs +++ b/nexus/src/app/sagas/switch_port_settings_common.rs @@ -1,12 +1,14 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + use super::NexusActionContext; use crate::app::map_switch_zone_addrs; use crate::Nexus; use db::datastore::SwitchPortSettingsCombinedResult; use dpd_client::types::{ LinkCreate, LinkId, LinkSettings, PortFec, PortSettings, PortSpeed, - RouteSettingsV4, RouteSettingsV6, }; -use dpd_client::{Ipv4Cidr, Ipv6Cidr}; use internal_dns::ServiceName; use ipnetwork::IpNetwork; use mg_admin_client::types::Prefix4; @@ -85,41 +87,6 @@ pub(crate) fn api_to_dpd_port_settings( ); } - for r in &settings.routes { - match &r.dst { - IpNetwork::V4(n) => { - let gw = match r.gw.ip() { - IpAddr::V4(gw) => gw, - IpAddr::V6(_) => { - return Err( - "IPv4 destination cannot have IPv6 nexthop".into() - ) - } - }; - dpd_port_settings.v4_routes.insert( - Ipv4Cidr { prefix: n.ip(), prefix_len: n.prefix() } - .to_string(), - vec![RouteSettingsV4 { link_id: link_id.0, nexthop: gw }], - ); - } - IpNetwork::V6(n) => { - let gw = match r.gw.ip() { - IpAddr::V6(gw) => gw, - IpAddr::V4(_) => { - return Err( - "IPv6 destination cannot have IPv4 nexthop".into() - ) - } - }; - dpd_port_settings.v6_routes.insert( - Ipv6Cidr { prefix: n.ip(), prefix_len: n.prefix() } - .to_string(), - vec![RouteSettingsV6 { link_id: link_id.0, nexthop: gw }], - ); - } - } - } - Ok(dpd_port_settings) } diff --git a/package-manifest.toml b/package-manifest.toml index fa6bba7a96..3525b121e4 100644 --- a/package-manifest.toml +++ b/package-manifest.toml @@ -438,10 +438,10 @@ source.repo = "maghemite" # `tools/maghemite_openapi_version`. Failing to do so will cause a failure when # building `ddm-admin-client` (which will instruct you to update # `tools/maghemite_openapi_version`). -source.commit = "2fd39b75df696961e5ea190c7d74dd91f4849cd3" +source.commit = "869cf802efcbd2f0edbb614ed92caa3e3164c1fc" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/maghemite/image//maghemite.sha256.txt -source.sha256 = "38851c79c85d53e997db748520fb27c82299ce7e58a550e35646a548498f1271" +source.sha256 = "1cf9cb514d11275d93c4e4760500539a778f23039374508ca07528fcaf0ba3f8" output.type = "tarball" [package.mg-ddm] @@ -454,10 +454,10 @@ source.repo = "maghemite" # `tools/maghemite_openapi_version`. Failing to do so will cause a failure when # building `ddm-admin-client` (which will instruct you to update # `tools/maghemite_openapi_version`). -source.commit = "2fd39b75df696961e5ea190c7d74dd91f4849cd3" +source.commit = "869cf802efcbd2f0edbb614ed92caa3e3164c1fc" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/maghemite/image//mg-ddm.sha256.txt -source.sha256 = "8cd94e9a6f6175081ce78f0281085a08a5306cde453d8e21deb28050945b1d88" +source.sha256 = "a9b959b4287ac2ec7b45ed99ccd00e1f134b8e3d501099cd669cee5de9525ae3" output.type = "zone" output.intermediate_only = true @@ -469,10 +469,10 @@ source.repo = "maghemite" # `tools/maghemite_openapi_version`. Failing to do so will cause a failure when # building `ddm-admin-client` (which will instruct you to update # `tools/maghemite_openapi_version`). -source.commit = "2fd39b75df696961e5ea190c7d74dd91f4849cd3" +source.commit = "869cf802efcbd2f0edbb614ed92caa3e3164c1fc" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/maghemite/image//mg-ddm.sha256.txt -source.sha256 = "802636775fa77dc6eec193e65fde87e403f6a11531745d47ef5e7ff13b242890" +source.sha256 = "ab882fbeab54987645492872e67f3351f8d14629a041465cc845ac8583a7002b" output.type = "zone" output.intermediate_only = true diff --git a/sled-agent/src/bootstrap/early_networking.rs b/sled-agent/src/bootstrap/early_networking.rs index 75958a2f37..acad2b8d3c 100644 --- a/sled-agent/src/bootstrap/early_networking.rs +++ b/sled-agent/src/bootstrap/early_networking.rs @@ -6,21 +6,23 @@ use anyhow::{anyhow, Context}; use bootstore::schemes::v0 as bootstore; -use ddm_admin_client::{Client as DdmAdminClient, DdmError}; -use dpd_client::types::{Ipv6Entry, RouteSettingsV6}; +use ddm_admin_client::DdmError; use dpd_client::types::{ - LinkCreate, LinkId, LinkSettings, PortId, PortSettings, RouteSettingsV4, + LinkCreate, LinkId, LinkSettings, PortId, PortSettings, }; use dpd_client::Client as DpdClient; use futures::future; use gateway_client::Client as MgsClient; use internal_dns::resolver::{ResolveError, Resolver as DnsResolver}; use internal_dns::ServiceName; -use ipnetwork::{IpNetwork, Ipv6Network}; -use mg_admin_client::types::{ApplyRequest, BgpPeerConfig, Prefix4}; +use ipnetwork::Ipv6Network; +use mg_admin_client::types::{ + AddStaticRoute4Request, ApplyRequest, BgpPeerConfig, Prefix4, StaticRoute4, + StaticRoute4List, +}; use mg_admin_client::Client as MgdClient; -use omicron_common::address::{Ipv6Subnet, MGD_PORT, MGS_PORT}; -use omicron_common::address::{DDMD_PORT, DENDRITE_PORT}; +use omicron_common::address::DENDRITE_PORT; +use omicron_common::address::{MGD_PORT, MGS_PORT}; use omicron_common::api::internal::shared::{ BgpConfig, PortConfigV1, PortFec, PortSpeed, RackNetworkConfig, RackNetworkConfigV1, SwitchLocation, UplinkConfig, @@ -38,7 +40,6 @@ use std::net::{IpAddr, Ipv4Addr, Ipv6Addr, SocketAddrV6}; use std::time::{Duration, Instant}; use thiserror::Error; -static BOUNDARY_SERVICES_ADDR: &str = "fd00:99::1"; const BGP_SESSION_RESOLUTION: u64 = 100; /// Errors that can occur during early network setup @@ -421,22 +422,11 @@ impl<'a> EarlyNetworkSetup<'a> { // configure uplink for each requested uplink in configuration that // matches our switch_location for port_config in &our_ports { - let (ipv6_entry, dpd_port_settings, port_id) = + let (dpd_port_settings, port_id) = self.build_port_config(port_config)?; self.wait_for_dendrite(&dpd).await; - info!( - self.log, - "Configuring boundary services loopback address on switch"; - "config" => #?ipv6_entry - ); - dpd.loopback_ipv6_create(&ipv6_entry).await.map_err(|e| { - EarlyNetworkSetupError::Dendrite(format!( - "unable to create inital switch loopback address: {e}" - )) - })?; - info!( self.log, "Configuring default uplink on switch"; @@ -453,13 +443,6 @@ impl<'a> EarlyNetworkSetup<'a> { "unable to apply uplink port configuration: {e}" )) })?; - - info!(self.log, "advertising boundary services loopback address"); - - let ddmd_addr = - SocketAddrV6::new(switch_zone_underlay_ip, DDMD_PORT, 0, 0); - let ddmd_client = DdmAdminClient::new(&self.log, ddmd_addr)?; - ddmd_client.advertise_prefix(Ipv6Subnet::new(ipv6_entry.addr)); } let mgd = MgdClient::new( @@ -548,22 +531,40 @@ impl<'a> EarlyNetworkSetup<'a> { } } + // Iterate through ports and apply static routing config. + let mut rq = AddStaticRoute4Request { + routes: StaticRoute4List { list: Vec::new() }, + }; + for port in &our_ports { + for r in &port.routes { + let nexthop = match r.nexthop { + IpAddr::V4(v4) => v4, + IpAddr::V6(_) => continue, + }; + let prefix = match r.destination.ip() { + IpAddr::V4(v4) => { + Prefix4 { value: v4, length: r.destination.prefix() } + } + IpAddr::V6(_) => continue, + }; + let sr = StaticRoute4 { nexthop, prefix }; + rq.routes.list.push(sr); + } + } + mgd.inner.static_add_v4_route(&rq).await.map_err(|e| { + EarlyNetworkSetupError::BgpConfigurationError(format!( + "static routing configuration failed: {e}", + )) + })?; + Ok(our_ports) } fn build_port_config( &self, port_config: &PortConfigV1, - ) -> Result<(Ipv6Entry, PortSettings, PortId), EarlyNetworkSetupError> { + ) -> Result<(PortSettings, PortId), EarlyNetworkSetupError> { info!(self.log, "Building Port Configuration"); - let ipv6_entry = Ipv6Entry { - addr: BOUNDARY_SERVICES_ADDR.parse().map_err(|e| { - EarlyNetworkSetupError::BadConfig(format!( - "failed to parse `BOUNDARY_SERVICES_ADDR` as `Ipv6Addr`: {e}" - )) - })?, - tag: OMICRON_DPD_TAG.into(), - }; let mut dpd_port_settings = PortSettings { links: HashMap::new(), v4_routes: HashMap::new(), @@ -600,26 +601,7 @@ impl<'a> EarlyNetworkSetup<'a> { )) })?; - for r in &port_config.routes { - if let (IpNetwork::V4(dst), IpAddr::V4(nexthop)) = - (r.destination, r.nexthop) - { - dpd_port_settings.v4_routes.insert( - dst.to_string(), - vec![RouteSettingsV4 { link_id: link_id.0, nexthop }], - ); - } - if let (IpNetwork::V6(dst), IpAddr::V6(nexthop)) = - (r.destination, r.nexthop) - { - dpd_port_settings.v6_routes.insert( - dst.to_string(), - vec![RouteSettingsV6 { link_id: link_id.0, nexthop }], - ); - } - } - - Ok((ipv6_entry, dpd_port_settings, port_id)) + Ok((dpd_port_settings, port_id)) } async fn wait_for_dendrite(&self, dpd: &DpdClient) { diff --git a/smf/sled-agent/non-gimlet/config-rss.toml b/smf/sled-agent/non-gimlet/config-rss.toml index fdc81c0f8f..12cb2afd24 100644 --- a/smf/sled-agent/non-gimlet/config-rss.toml +++ b/smf/sled-agent/non-gimlet/config-rss.toml @@ -103,7 +103,7 @@ bgp = [] # Routes associated with this port. routes = [{nexthop = "192.168.1.199", destination = "0.0.0.0/0"}] # Addresses associated with this port. -addresses = ["192.168.1.30/32"] +addresses = ["192.168.1.30/24"] # Name of the uplink port. This should always be "qsfp0" when using softnpu. port = "qsfp0" # The speed of this port. diff --git a/tools/ci_check_opte_ver.sh b/tools/ci_check_opte_ver.sh index 26382690e1..7f05ec1f36 100755 --- a/tools/ci_check_opte_ver.sh +++ b/tools/ci_check_opte_ver.sh @@ -1,6 +1,11 @@ #!/bin/bash set -euo pipefail +source tools/opte_version_override +if [[ "x$OPTE_COMMIT" != "x" ]]; then + exit 0 +fi + # Grab all the oxidecomputer/opte dependencies' revisions readarray -t opte_deps_revs < <(toml get Cargo.toml workspace.dependencies | jq -r 'to_entries | .[] | select(.value.git? | contains("oxidecomputer/opte")?) | .value.rev') OPTE_REV="${opte_deps_revs[0]}" diff --git a/tools/install_opte.sh b/tools/install_opte.sh index 20a33b05a5..b572c305a7 100755 --- a/tools/install_opte.sh +++ b/tools/install_opte.sh @@ -97,3 +97,13 @@ if [[ "$RC" -ne 0 ]]; then echo "The \`opteadm\` administration tool is not on your path." echo "You may add \"/opt/oxide/opte/bin\" to your path to access it." fi + +source $OMICRON_TOP/tools/opte_version_override + +if [[ "x$OPTE_COMMIT" != "x" ]]; then + set +x + curl -fOL https://buildomat.eng.oxide.computer/public/file/oxidecomputer/opte/module/$OPTE_COMMIT/xde + pfexec rem_drv xde || true + pfexec mv xde /kernel/drv/amd64/xde + pfexec add_drv xde || true +fi diff --git a/tools/maghemite_ddm_openapi_version b/tools/maghemite_ddm_openapi_version index 37c099d7f5..be8772b7e6 100644 --- a/tools/maghemite_ddm_openapi_version +++ b/tools/maghemite_ddm_openapi_version @@ -1,2 +1,2 @@ -COMMIT="2fd39b75df696961e5ea190c7d74dd91f4849cd3" -SHA2="9737906555a60911636532f00f1dc2866dc7cd6553beb106e9e57beabad41cdf" +COMMIT="869cf802efcbd2f0edbb614ed92caa3e3164c1fc" +SHA2="0b0dbc2f8bbc5d2d9be92d64c4865f8f9335355aae62f7de9f67f81dfb3f1803" diff --git a/tools/maghemite_mg_openapi_version b/tools/maghemite_mg_openapi_version index 329c05fc42..6bf1999c61 100644 --- a/tools/maghemite_mg_openapi_version +++ b/tools/maghemite_mg_openapi_version @@ -1,2 +1,2 @@ -COMMIT="2fd39b75df696961e5ea190c7d74dd91f4849cd3" -SHA2="931efa310d972b1f8afba2308751fc6a2035afbaebba77b3a40a8358c123ba3c" +COMMIT="869cf802efcbd2f0edbb614ed92caa3e3164c1fc" +SHA2="7618511f905d26394ef7c552339dd78835ce36a6def0d85b05b6d1e363a5e7b4" diff --git a/tools/maghemite_mgd_checksums b/tools/maghemite_mgd_checksums index 1d3cf98f94..b5fe84b662 100644 --- a/tools/maghemite_mgd_checksums +++ b/tools/maghemite_mgd_checksums @@ -1,2 +1,2 @@ -CIDL_SHA256="802636775fa77dc6eec193e65fde87e403f6a11531745d47ef5e7ff13b242890" -MGD_LINUX_SHA256="1bcadfd700902e3640843e0bb53d3defdbcd8d86c3279efa0953ae8d6437e2b0" \ No newline at end of file +CIDL_SHA256="ab882fbeab54987645492872e67f3351f8d14629a041465cc845ac8583a7002b" +MGD_LINUX_SHA256="93331c1001e3aa506a8c1b83346abba1995e489910bff2c94a86730b96617a34" \ No newline at end of file diff --git a/tools/opte_version b/tools/opte_version index 82d79dcf28..0a04873e11 100644 --- a/tools/opte_version +++ b/tools/opte_version @@ -1 +1 @@ -0.27.214 +0.28.215 diff --git a/tools/opte_version_override b/tools/opte_version_override new file mode 100644 index 0000000000..80a6529b24 --- /dev/null +++ b/tools/opte_version_override @@ -0,0 +1,5 @@ +#!/bin/bash + +# only set this if you want to override the version of opte/xde installed by the +# install_opte.sh script +OPTE_COMMIT="" From cc643045191d92e539faa839d662a4198d81d718 Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Wed, 24 Jan 2024 21:05:37 +0000 Subject: [PATCH 25/49] Live attach/detach of external IPs (#4694) This PR adds new endpoints to attach and detach external IPs to/from an individual instance at runtime, when instances are either stopped or started. These new endpoints are: * POST `/v1/floating-ips/{floating_ip}/attach` * POST `/v1/floating-ips/{floating_ip}/detach` * POST `/v1/instances/{instance}/external-ips/ephemeral` * DELETE `/v1/instances/{instance}/external-ips/ephemeral` These follow and enforce the same rules as external IPs registered during instance creation: at most one ephemeral IP, and at most 32 external IPs total. `/v1/floating-ips/{floating_ip}/attach` includes a `kind` field to account for future API resources which a FIP may be bound to -- such as internet gateways, load balancers, and services. ## Interaction with other instance lifecycle changes and sagas Both external IP modify sagas begin with an atomic update to external IP attach state conditioned on $\mathit{state}\in[ \mathit{started},\mathit{stopped}]$. As a result, we know that an external IP saga can only ever start before any other instance state change occurs. We then only need to think about how these other sagas/events must behave when called *during* an attach/detach, keeping in mind that these are worst-case orderings: attach/detach are likely to complete quickly. ### Instance start & migrate Both of these sagas alter an instance's functional sled ID, which controls whether NAT entry insertion and OPTE port state updates are performed. If an IP attach/detach is incomplete when either saga reaches `instance_ensure_dpd_config` or `instance_ensure_registered` (e.g., any IP associated with the target instance is in attaching/detaching state), the start/migrate will unwind with an HTTP 503. Generally, neither should undo in practice since IP attach/detach are fast operations -- particularly when an instance is formerly stopped. This is used solely to guarantee that only one saga is accessing a given external IP at a time, and that the update target remains unchanged. ### Instance stop & delete These operations are either not sagaized (stop), or cannot unwind (delete), and so we cannot block them using IP attach state. IP attach/detach will unwind if a given sled-agent is no longer responsible for an instance. Instance delete will force-detach IP addresses bound to an instance, and if this is seen then IP attach will deliberately unwind to potentially clean up NAT state. OPTE/DPD undo operations are best-effort in such a case to prevent stuck sagas. Instance stop and IP attach may interleave such that the latter adds additional NAT entries after other network state is cleared. Because we cannot unwind in this case, `instance_ensure_dpd_config` will now attempt to remove leftover conflicting RPW entries if they are detected, since we know they are a deviation from intended state. ## Additional/supporting changes * Pool/floating IP specifiers in instance create now take `NameOrId`, parameter names changed to match. * External IP create/bind in instance create no longer double-resolves name on saga unwind. * `views::ExternalIp` can now contain `FloatingIp` body. * DPD NAT insert/remove functions now perform single rule update via ID instead of index into the EIP list -- index-based was unstable under live addition/removal. * NAT RPW ensure is now more authoritative, and will remove conflicting entries if an initial insert fails. * Pool `NameOrId` resolution for floating IP allocation pulled up from `Datastore` into `Nexus`. --- Closes #4630 and #4628. --- dev-tools/omdb/src/bin/omdb/db.rs | 3 + end-to-end-tests/src/instance_launch.rs | 13 +- illumos-utils/src/opte/illumos.rs | 10 + illumos-utils/src/opte/non_illumos.rs | 10 + illumos-utils/src/opte/port_manager.rs | 116 +++ nexus/db-model/src/external_ip.rs | 120 ++- nexus/db-model/src/instance.rs | 17 +- nexus/db-model/src/instance_state.rs | 6 + nexus/db-model/src/ipv4_nat_entry.rs | 3 +- nexus/db-model/src/macaddr.rs | 13 +- nexus/db-model/src/schema.rs | 1 + nexus/db-queries/src/db/datastore/disk.rs | 4 +- .../src/db/datastore/external_ip.rs | 707 +++++++++++++++--- nexus/db-queries/src/db/datastore/instance.rs | 102 +-- .../src/db/datastore/ipv4_nat_entry.rs | 29 +- nexus/db-queries/src/db/datastore/mod.rs | 26 +- nexus/db-queries/src/db/pool_connection.rs | 1 + .../db-queries/src/db/queries/external_ip.rs | 131 +++- nexus/src/app/external_ip.rs | 84 ++- nexus/src/app/instance.rs | 78 ++ nexus/src/app/instance_network.rs | 384 ++++++---- nexus/src/app/mod.rs | 4 +- nexus/src/app/sagas/instance_common.rs | 336 ++++++++- nexus/src/app/sagas/instance_create.rs | 141 +++- nexus/src/app/sagas/instance_delete.rs | 2 +- nexus/src/app/sagas/instance_ip_attach.rs | 583 +++++++++++++++ nexus/src/app/sagas/instance_ip_detach.rs | 551 ++++++++++++++ nexus/src/app/sagas/instance_start.rs | 27 +- nexus/src/app/sagas/mod.rs | 8 + nexus/src/external_api/http_entrypoints.rs | 140 ++++ nexus/test-utils/src/resource_helpers.rs | 4 +- nexus/tests/integration_tests/disks.rs | 1 + nexus/tests/integration_tests/endpoints.rs | 62 +- nexus/tests/integration_tests/external_ips.rs | 490 +++++++++++- nexus/tests/integration_tests/instances.rs | 49 +- .../integration_tests/subnet_allocation.rs | 1 + nexus/tests/output/nexus_tags.txt | 4 + nexus/types/src/external_api/params.rs | 45 +- nexus/types/src/external_api/shared.rs | 4 +- nexus/types/src/external_api/views.rs | 53 +- openapi/nexus.json | 363 ++++++++- openapi/sled-agent.json | 115 +++ schema/crdb/25.0.0/up01.sql | 6 + schema/crdb/25.0.0/up02.sql | 4 + schema/crdb/25.0.0/up03.sql | 7 + schema/crdb/25.0.0/up04.sql | 7 + schema/crdb/25.0.0/up05.sql | 2 + schema/crdb/25.0.0/up06.sql | 4 + schema/crdb/25.0.0/up07.sql | 4 + schema/crdb/25.0.0/up08.sql | 2 + schema/crdb/25.0.0/up09.sql | 4 + schema/crdb/dbinit.sql | 38 +- sled-agent/src/http_entrypoints.rs | 36 +- sled-agent/src/instance.rs | 154 +++- sled-agent/src/instance_manager.rs | 37 + sled-agent/src/params.rs | 10 + sled-agent/src/sim/http_entrypoints.rs | 41 +- sled-agent/src/sim/sled_agent.rs | 64 +- sled-agent/src/sled_agent.rs | 40 +- 59 files changed, 4776 insertions(+), 525 deletions(-) create mode 100644 nexus/src/app/sagas/instance_ip_attach.rs create mode 100644 nexus/src/app/sagas/instance_ip_detach.rs create mode 100644 schema/crdb/25.0.0/up01.sql create mode 100644 schema/crdb/25.0.0/up02.sql create mode 100644 schema/crdb/25.0.0/up03.sql create mode 100644 schema/crdb/25.0.0/up04.sql create mode 100644 schema/crdb/25.0.0/up05.sql create mode 100644 schema/crdb/25.0.0/up06.sql create mode 100644 schema/crdb/25.0.0/up07.sql create mode 100644 schema/crdb/25.0.0/up08.sql create mode 100644 schema/crdb/25.0.0/up09.sql diff --git a/dev-tools/omdb/src/bin/omdb/db.rs b/dev-tools/omdb/src/bin/omdb/db.rs index 23e9206506..a465183351 100644 --- a/dev-tools/omdb/src/bin/omdb/db.rs +++ b/dev-tools/omdb/src/bin/omdb/db.rs @@ -44,6 +44,7 @@ use nexus_db_model::ExternalIp; use nexus_db_model::HwBaseboardId; use nexus_db_model::Instance; use nexus_db_model::InvCollection; +use nexus_db_model::IpAttachState; use nexus_db_model::Project; use nexus_db_model::Region; use nexus_db_model::RegionSnapshot; @@ -1705,6 +1706,7 @@ async fn cmd_db_eips( ip: ipnetwork::IpNetwork, ports: PortRange, kind: String, + state: IpAttachState, owner: Owner, } @@ -1789,6 +1791,7 @@ async fn cmd_db_eips( first: ip.first_port.into(), last: ip.last_port.into(), }, + state: ip.state, kind: format!("{:?}", ip.kind), owner, }; diff --git a/end-to-end-tests/src/instance_launch.rs b/end-to-end-tests/src/instance_launch.rs index b3d1406070..2efd66bf91 100644 --- a/end-to-end-tests/src/instance_launch.rs +++ b/end-to-end-tests/src/instance_launch.rs @@ -5,9 +5,9 @@ use anyhow::{ensure, Context as _, Result}; use async_trait::async_trait; use omicron_test_utils::dev::poll::{wait_for_condition, CondCheckError}; use oxide_client::types::{ - ByteCount, DiskCreate, DiskSource, ExternalIpCreate, InstanceCpuCount, - InstanceCreate, InstanceDiskAttachment, InstanceNetworkInterfaceAttachment, - SshKeyCreate, + ByteCount, DiskCreate, DiskSource, ExternalIp, ExternalIpCreate, + InstanceCpuCount, InstanceCreate, InstanceDiskAttachment, + InstanceNetworkInterfaceAttachment, SshKeyCreate, }; use oxide_client::{ClientDisksExt, ClientInstancesExt, ClientSessionExt}; use russh::{ChannelMsg, Disconnect}; @@ -70,7 +70,7 @@ async fn instance_launch() -> Result<()> { name: disk_name.clone(), }], network_interfaces: InstanceNetworkInterfaceAttachment::Default, - external_ips: vec![ExternalIpCreate::Ephemeral { pool_name: None }], + external_ips: vec![ExternalIpCreate::Ephemeral { pool: None }], user_data: String::new(), start: true, }) @@ -87,7 +87,10 @@ async fn instance_launch() -> Result<()> { .items .first() .context("no external IPs")? - .ip; + .clone(); + let ExternalIp::Ephemeral { ip: ip_addr } = ip_addr else { + anyhow::bail!("IP bound to instance was not ephemeral as required.") + }; eprintln!("instance external IP: {}", ip_addr); // poll serial for login prompt, waiting 5 min max diff --git a/illumos-utils/src/opte/illumos.rs b/illumos-utils/src/opte/illumos.rs index 88e8d343b1..527172b976 100644 --- a/illumos-utils/src/opte/illumos.rs +++ b/illumos-utils/src/opte/illumos.rs @@ -11,6 +11,7 @@ use omicron_common::api::internal::shared::NetworkInterfaceKind; use opte_ioctl::OpteHdl; use slog::info; use slog::Logger; +use std::net::IpAddr; #[derive(thiserror::Error, Debug)] pub enum Error { @@ -46,6 +47,15 @@ pub enum Error { #[error("Tried to release non-existent port ({0}, {1:?})")] ReleaseMissingPort(uuid::Uuid, NetworkInterfaceKind), + + #[error("Tried to update external IPs on non-existent port ({0}, {1:?})")] + ExternalIpUpdateMissingPort(uuid::Uuid, NetworkInterfaceKind), + + #[error("Could not find Primary NIC")] + NoPrimaryNic, + + #[error("Can't attach new ephemeral IP {0}, currently have {1}")] + ImplicitEphemeralIpDetach(IpAddr, IpAddr), } /// Delete all xde devices on the system. diff --git a/illumos-utils/src/opte/non_illumos.rs b/illumos-utils/src/opte/non_illumos.rs index ccd4990d5f..bf61249fb1 100644 --- a/illumos-utils/src/opte/non_illumos.rs +++ b/illumos-utils/src/opte/non_illumos.rs @@ -8,6 +8,7 @@ use slog::Logger; use crate::addrobj::AddrObject; use omicron_common::api::internal::shared::NetworkInterfaceKind; +use std::net::IpAddr; #[derive(thiserror::Error, Debug)] pub enum Error { @@ -16,6 +17,15 @@ pub enum Error { #[error("Tried to release non-existent port ({0}, {1:?})")] ReleaseMissingPort(uuid::Uuid, NetworkInterfaceKind), + + #[error("Tried to update external IPs on non-existent port ({0}, {1:?})")] + ExternalIpUpdateMissingPort(uuid::Uuid, NetworkInterfaceKind), + + #[error("Could not find Primary NIC")] + NoPrimaryNic, + + #[error("Can't attach new ephemeral IP {0}, currently have {1}")] + ImplicitEphemeralIpDetach(IpAddr, IpAddr), } pub fn initialize_xde_driver( diff --git a/illumos-utils/src/opte/port_manager.rs b/illumos-utils/src/opte/port_manager.rs index c472996598..2b2f622070 100644 --- a/illumos-utils/src/opte/port_manager.rs +++ b/illumos-utils/src/opte/port_manager.rs @@ -28,6 +28,7 @@ use oxide_vpc::api::MacAddr; use oxide_vpc::api::RouterTarget; use oxide_vpc::api::SNat4Cfg; use oxide_vpc::api::SNat6Cfg; +use oxide_vpc::api::SetExternalIpsReq; use oxide_vpc::api::VpcCfg; use slog::debug; use slog::error; @@ -398,6 +399,121 @@ impl PortManager { Ok((port, ticket)) } + /// Ensure external IPs for an OPTE port are up to date. + #[cfg_attr(not(target_os = "illumos"), allow(unused_variables))] + pub fn external_ips_ensure( + &self, + nic_id: Uuid, + nic_kind: NetworkInterfaceKind, + source_nat: Option, + ephemeral_ip: Option, + floating_ips: &[IpAddr], + ) -> Result<(), Error> { + let ports = self.inner.ports.lock().unwrap(); + let port = ports.get(&(nic_id, nic_kind)).ok_or_else(|| { + Error::ExternalIpUpdateMissingPort(nic_id, nic_kind) + })?; + + // XXX: duplicates parts of macro logic in `create_port`. + macro_rules! ext_ip_cfg { + ($ip:expr, $log_prefix:literal, $ip_t:path, $cidr_t:path, + $ipcfg_e:path, $ipcfg_t:ident, $snat_t:ident) => {{ + let snat = match source_nat { + Some(snat) => { + let $ip_t(snat_ip) = snat.ip else { + error!( + self.inner.log, + concat!($log_prefix, " SNAT config"); + "snat_ip" => ?snat.ip, + ); + return Err(Error::InvalidPortIpConfig); + }; + let ports = snat.first_port..=snat.last_port; + Some($snat_t { external_ip: snat_ip.into(), ports }) + } + None => None, + }; + let ephemeral_ip = match ephemeral_ip { + Some($ip_t(ip)) => Some(ip.into()), + Some(_) => { + error!( + self.inner.log, + concat!($log_prefix, " ephemeral IP"); + "ephemeral_ip" => ?ephemeral_ip, + ); + return Err(Error::InvalidPortIpConfig); + } + None => None, + }; + let floating_ips: Vec<_> = floating_ips + .iter() + .copied() + .map(|ip| match ip { + $ip_t(ip) => Ok(ip.into()), + _ => { + error!( + self.inner.log, + concat!($log_prefix, " ephemeral IP"); + "ephemeral_ip" => ?ephemeral_ip, + ); + Err(Error::InvalidPortIpConfig) + } + }) + .collect::, _>>()?; + + ExternalIpCfg { + ephemeral_ip, + snat, + floating_ips, + } + }} + } + + // TODO-completeness: support dual-stack. We'll need to explicitly store + // a v4 and a v6 ephemeral IP + SNat + gateway + ... in `InstanceInner` + // to have enough info to build both. + let mut v4_cfg = None; + let mut v6_cfg = None; + match port.gateway().ip { + IpAddr::V4(_) => { + v4_cfg = Some(ext_ip_cfg!( + ip, + "Expected IPv4", + IpAddr::V4, + IpCidr::Ip4, + IpCfg::Ipv4, + Ipv4Cfg, + SNat4Cfg + )) + } + IpAddr::V6(_) => { + v6_cfg = Some(ext_ip_cfg!( + ip, + "Expected IPv6", + IpAddr::V6, + IpCidr::Ip6, + IpCfg::Ipv6, + Ipv6Cfg, + SNat6Cfg + )) + } + } + + let req = SetExternalIpsReq { + port_name: port.name().into(), + external_ips_v4: v4_cfg, + external_ips_v6: v6_cfg, + }; + + #[cfg(target_os = "illumos")] + let hdl = opte_ioctl::OpteHdl::open(opte_ioctl::OpteHdl::XDE_CTL)?; + + #[cfg(target_os = "illumos")] + hdl.set_external_ips(&req)?; + + Ok(()) + } + #[cfg(target_os = "illumos")] pub fn firewall_rules_ensure( &self, diff --git a/nexus/db-model/src/external_ip.rs b/nexus/db-model/src/external_ip.rs index e95185658f..1e9def4182 100644 --- a/nexus/db-model/src/external_ip.rs +++ b/nexus/db-model/src/external_ip.rs @@ -23,6 +23,7 @@ use omicron_common::api::external::Error; use omicron_common::api::external::IdentityMetadata; use serde::Deserialize; use serde::Serialize; +use sled_agent_client::types::InstanceExternalIpBody; use std::convert::TryFrom; use std::net::IpAddr; use uuid::Uuid; @@ -32,7 +33,7 @@ impl_enum_type!( #[diesel(postgres_type(name = "ip_kind", schema = "public"))] pub struct IpKindEnum; - #[derive(Clone, Copy, Debug, AsExpression, FromSqlRow, PartialEq)] + #[derive(Clone, Copy, Debug, AsExpression, FromSqlRow, PartialEq, Deserialize, Serialize)] #[diesel(sql_type = IpKindEnum)] pub enum IpKind; @@ -41,6 +42,42 @@ impl_enum_type!( Floating => b"floating" ); +impl_enum_type!( + #[derive(SqlType, Debug, Clone, Copy, QueryId)] + #[diesel(postgres_type(name = "ip_attach_state"))] + pub struct IpAttachStateEnum; + + #[derive(Clone, Copy, Debug, AsExpression, FromSqlRow, PartialEq, Deserialize, Serialize)] + #[diesel(sql_type = IpAttachStateEnum)] + pub enum IpAttachState; + + Detached => b"detached" + Attached => b"attached" + Detaching => b"detaching" + Attaching => b"attaching" +); + +impl std::fmt::Display for IpAttachState { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_str(match self { + IpAttachState::Detached => "Detached", + IpAttachState::Attached => "Attached", + IpAttachState::Detaching => "Detaching", + IpAttachState::Attaching => "Attaching", + }) + } +} + +impl std::fmt::Display for IpKind { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_str(match self { + IpKind::Floating => "floating", + IpKind::Ephemeral => "ephemeral", + IpKind::SNat => "SNAT", + }) + } +} + /// The main model type for external IP addresses for instances /// and externally-facing services. /// @@ -51,7 +88,9 @@ impl_enum_type!( /// addresses and port ranges, while source NAT IPs are not discoverable in the /// API at all, and only provide outbound connectivity to instances, not /// inbound. -#[derive(Debug, Clone, Selectable, Queryable, Insertable)] +#[derive( + Debug, Clone, Selectable, Queryable, Insertable, Deserialize, Serialize, +)] #[diesel(table_name = external_ip)] pub struct ExternalIp { pub id: Uuid, @@ -76,6 +115,7 @@ pub struct ExternalIp { pub last_port: SqlU16, // Only Some(_) for instance Floating IPs pub project_id: Option, + pub state: IpAttachState, } /// A view type constructed from `ExternalIp` used to represent Floating IP @@ -125,6 +165,7 @@ pub struct IncompleteExternalIp { parent_id: Option, pool_id: Uuid, project_id: Option, + state: IpAttachState, // Optional address requesting that a specific IP address be allocated. explicit_ip: Option, // Optional range when requesting a specific SNAT range be allocated. @@ -137,34 +178,38 @@ impl IncompleteExternalIp { instance_id: Uuid, pool_id: Uuid, ) -> Self { + let kind = IpKind::SNat; Self { id, name: None, description: None, time_created: Utc::now(), - kind: IpKind::SNat, + kind, is_service: false, parent_id: Some(instance_id), pool_id, project_id: None, explicit_ip: None, explicit_port_range: None, + state: kind.initial_state(), } } - pub fn for_ephemeral(id: Uuid, instance_id: Uuid, pool_id: Uuid) -> Self { + pub fn for_ephemeral(id: Uuid, pool_id: Uuid) -> Self { + let kind = IpKind::Ephemeral; Self { id, name: None, description: None, time_created: Utc::now(), - kind: IpKind::Ephemeral, + kind, is_service: false, - parent_id: Some(instance_id), + parent_id: None, pool_id, project_id: None, explicit_ip: None, explicit_port_range: None, + state: kind.initial_state(), } } @@ -175,18 +220,20 @@ impl IncompleteExternalIp { project_id: Uuid, pool_id: Uuid, ) -> Self { + let kind = IpKind::Floating; Self { id, name: Some(name.clone()), description: Some(description.to_string()), time_created: Utc::now(), - kind: IpKind::Floating, + kind, is_service: false, parent_id: None, pool_id, project_id: Some(project_id), explicit_ip: None, explicit_port_range: None, + state: kind.initial_state(), } } @@ -198,18 +245,20 @@ impl IncompleteExternalIp { explicit_ip: IpAddr, pool_id: Uuid, ) -> Self { + let kind = IpKind::Floating; Self { id, name: Some(name.clone()), description: Some(description.to_string()), time_created: Utc::now(), - kind: IpKind::Floating, + kind, is_service: false, parent_id: None, pool_id, project_id: Some(project_id), explicit_ip: Some(explicit_ip.into()), explicit_port_range: None, + state: kind.initial_state(), } } @@ -233,6 +282,7 @@ impl IncompleteExternalIp { project_id: None, explicit_ip: Some(IpNetwork::from(address)), explicit_port_range: None, + state: IpAttachState::Attached, } } @@ -250,18 +300,20 @@ impl IncompleteExternalIp { NUM_SOURCE_NAT_PORTS, ); let explicit_port_range = Some((first_port.into(), last_port.into())); + let kind = IpKind::SNat; Self { id, name: None, description: None, time_created: Utc::now(), - kind: IpKind::SNat, + kind, is_service: true, parent_id: Some(service_id), pool_id, project_id: None, explicit_ip: Some(IpNetwork::from(address)), explicit_port_range, + state: kind.initial_state(), } } @@ -272,34 +324,38 @@ impl IncompleteExternalIp { service_id: Uuid, pool_id: Uuid, ) -> Self { + let kind = IpKind::Floating; Self { id, name: Some(name.clone()), description: Some(description.to_string()), time_created: Utc::now(), - kind: IpKind::Floating, + kind, is_service: true, parent_id: Some(service_id), pool_id, project_id: None, explicit_ip: None, explicit_port_range: None, + state: IpAttachState::Attached, } } pub fn for_service_snat(id: Uuid, service_id: Uuid, pool_id: Uuid) -> Self { + let kind = IpKind::SNat; Self { id, name: None, description: None, time_created: Utc::now(), - kind: IpKind::SNat, + kind, is_service: true, parent_id: Some(service_id), pool_id, project_id: None, explicit_ip: None, explicit_port_range: None, + state: kind.initial_state(), } } @@ -339,6 +395,10 @@ impl IncompleteExternalIp { &self.project_id } + pub fn state(&self) -> &IpAttachState { + &self.state + } + pub fn explicit_ip(&self) -> &Option { &self.explicit_ip } @@ -348,6 +408,18 @@ impl IncompleteExternalIp { } } +impl IpKind { + /// The initial state which a new non-service IP should + /// be allocated in. + pub fn initial_state(&self) -> IpAttachState { + match &self { + IpKind::SNat => IpAttachState::Attached, + IpKind::Ephemeral => IpAttachState::Detached, + IpKind::Floating => IpAttachState::Detached, + } + } +} + impl TryFrom for shared::IpKind { type Error = Error; @@ -371,8 +443,15 @@ impl TryFrom for views::ExternalIp { "Service IPs should not be exposed in the API", )); } - let kind = ip.kind.try_into()?; - Ok(views::ExternalIp { kind, ip: ip.ip.ip() }) + match ip.kind { + IpKind::Floating => Ok(views::ExternalIp::Floating(ip.try_into()?)), + IpKind::Ephemeral => { + Ok(views::ExternalIp::Ephemeral { ip: ip.ip.ip() }) + } + IpKind::SNat => Err(Error::internal_error( + "SNAT IP addresses should not be exposed in the API", + )), + } } } @@ -450,3 +529,18 @@ impl From for views::FloatingIp { } } } + +impl TryFrom for InstanceExternalIpBody { + type Error = Error; + + fn try_from(value: ExternalIp) -> Result { + let ip = value.ip.ip(); + match value.kind { + IpKind::Ephemeral => Ok(InstanceExternalIpBody::Ephemeral(ip)), + IpKind::Floating => Ok(InstanceExternalIpBody::Floating(ip)), + IpKind::SNat => Err(Error::invalid_request( + "cannot dynamically add/remove SNAT allocation", + )), + } + } +} diff --git a/nexus/db-model/src/instance.rs b/nexus/db-model/src/instance.rs index 9252926547..e10f8c2603 100644 --- a/nexus/db-model/src/instance.rs +++ b/nexus/db-model/src/instance.rs @@ -2,9 +2,11 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -use super::{ByteCount, Disk, Generation, InstanceCpuCount, InstanceState}; +use super::{ + ByteCount, Disk, ExternalIp, Generation, InstanceCpuCount, InstanceState, +}; use crate::collection::DatastoreAttachTargetConfig; -use crate::schema::{disk, instance}; +use crate::schema::{disk, external_ip, instance}; use chrono::{DateTime, Utc}; use db_macros::Resource; use nexus_types::external_api::params; @@ -101,6 +103,17 @@ impl DatastoreAttachTargetConfig for Instance { type ResourceTimeDeletedColumn = disk::dsl::time_deleted; } +impl DatastoreAttachTargetConfig for Instance { + type Id = Uuid; + + type CollectionIdColumn = instance::dsl::id; + type CollectionTimeDeletedColumn = instance::dsl::time_deleted; + + type ResourceIdColumn = external_ip::dsl::id; + type ResourceCollectionIdColumn = external_ip::dsl::parent_id; + type ResourceTimeDeletedColumn = external_ip::dsl::time_deleted; +} + /// Runtime state of the Instance, including the actual running state and minimal /// metadata /// diff --git a/nexus/db-model/src/instance_state.rs b/nexus/db-model/src/instance_state.rs index 7b98850b43..dca809758f 100644 --- a/nexus/db-model/src/instance_state.rs +++ b/nexus/db-model/src/instance_state.rs @@ -65,3 +65,9 @@ impl From for sled_agent_client::types::InstanceState { } } } + +impl From for InstanceState { + fn from(state: external::InstanceState) -> Self { + Self::new(state) + } +} diff --git a/nexus/db-model/src/ipv4_nat_entry.rs b/nexus/db-model/src/ipv4_nat_entry.rs index 570a46b5e9..b0fa2b8eb9 100644 --- a/nexus/db-model/src/ipv4_nat_entry.rs +++ b/nexus/db-model/src/ipv4_nat_entry.rs @@ -5,6 +5,7 @@ use crate::{schema::ipv4_nat_entry, Ipv4Net, Ipv6Net, SqlU16, Vni}; use chrono::{DateTime, Utc}; use omicron_common::api::external; use schemars::JsonSchema; +use serde::Deserialize; use serde::Serialize; use uuid::Uuid; @@ -21,7 +22,7 @@ pub struct Ipv4NatValues { } /// Database representation of an Ipv4 NAT Entry. -#[derive(Queryable, Debug, Clone, Selectable)] +#[derive(Queryable, Debug, Clone, Selectable, Serialize, Deserialize)] #[diesel(table_name = ipv4_nat_entry)] pub struct Ipv4NatEntry { pub id: Uuid, diff --git a/nexus/db-model/src/macaddr.rs b/nexus/db-model/src/macaddr.rs index dceb8acf48..b3329598bd 100644 --- a/nexus/db-model/src/macaddr.rs +++ b/nexus/db-model/src/macaddr.rs @@ -8,8 +8,19 @@ use diesel::pg::Pg; use diesel::serialize::{self, ToSql}; use diesel::sql_types; use omicron_common::api::external; +use serde::Deserialize; +use serde::Serialize; -#[derive(Clone, Copy, Debug, PartialEq, AsExpression, FromSqlRow)] +#[derive( + Clone, + Copy, + Debug, + PartialEq, + AsExpression, + FromSqlRow, + Serialize, + Deserialize, +)] #[diesel(sql_type = sql_types::BigInt)] pub struct MacAddr(pub external::MacAddr); diff --git a/nexus/db-model/src/schema.rs b/nexus/db-model/src/schema.rs index 2e7493716e..11cdf87f6c 100644 --- a/nexus/db-model/src/schema.rs +++ b/nexus/db-model/src/schema.rs @@ -574,6 +574,7 @@ table! { last_port -> Int4, project_id -> Nullable, + state -> crate::IpAttachStateEnum, } } diff --git a/nexus/db-queries/src/db/datastore/disk.rs b/nexus/db-queries/src/db/datastore/disk.rs index 2055287e62..390376e627 100644 --- a/nexus/db-queries/src/db/datastore/disk.rs +++ b/nexus/db-queries/src/db/datastore/disk.rs @@ -206,7 +206,7 @@ impl DataStore { let (instance, disk) = query.attach_and_get_result_async(&*self.pool_connection_authorized(opctx).await?) .await - .or_else(|e| { + .or_else(|e: AttachError| { match e { AttachError::CollectionNotFound => { Err(Error::not_found_by_id( @@ -348,7 +348,7 @@ impl DataStore { ) .detach_and_get_result_async(&*self.pool_connection_authorized(opctx).await?) .await - .or_else(|e| { + .or_else(|e: DetachError| { match e { DetachError::CollectionNotFound => { Err(Error::not_found_by_id( diff --git a/nexus/db-queries/src/db/datastore/external_ip.rs b/nexus/db-queries/src/db/datastore/external_ip.rs index 02ce950118..9d4d947476 100644 --- a/nexus/db-queries/src/db/datastore/external_ip.rs +++ b/nexus/db-queries/src/db/datastore/external_ip.rs @@ -9,6 +9,10 @@ use crate::authz; use crate::authz::ApiResource; use crate::context::OpContext; use crate::db; +use crate::db::collection_attach::AttachError; +use crate::db::collection_attach::DatastoreAttachTarget; +use crate::db::collection_detach::DatastoreDetachTarget; +use crate::db::collection_detach::DetachError; use crate::db::error::public_error_from_diesel; use crate::db::error::retryable; use crate::db::error::ErrorHandler; @@ -22,11 +26,17 @@ use crate::db::model::Name; use crate::db::pagination::paginated; use crate::db::pool::DbConnection; use crate::db::queries::external_ip::NextExternalIp; +use crate::db::queries::external_ip::MAX_EXTERNAL_IPS_PER_INSTANCE; +use crate::db::queries::external_ip::SAFE_TO_ATTACH_INSTANCE_STATES; +use crate::db::queries::external_ip::SAFE_TO_ATTACH_INSTANCE_STATES_CREATING; +use crate::db::queries::external_ip::SAFE_TRANSIENT_INSTANCE_STATES; use crate::db::update_and_check::UpdateAndCheck; use crate::db::update_and_check::UpdateStatus; use async_bb8_diesel::AsyncRunQueryDsl; use chrono::Utc; use diesel::prelude::*; +use nexus_db_model::Instance; +use nexus_db_model::IpAttachState; use nexus_types::external_api::params; use nexus_types::identity::Resource; use omicron_common::api::external::http_pagination::PaginatedBy; @@ -35,13 +45,14 @@ use omicron_common::api::external::DeleteResult; use omicron_common::api::external::Error; use omicron_common::api::external::ListResultVec; use omicron_common::api::external::LookupResult; -use omicron_common::api::external::NameOrId; use omicron_common::api::external::ResourceType; use omicron_common::api::external::UpdateResult; use ref_cast::RefCast; use std::net::IpAddr; use uuid::Uuid; +const MAX_EXTERNAL_IPS_PLUS_SNAT: u32 = MAX_EXTERNAL_IPS_PER_INSTANCE + 1; + impl DataStore { /// Create an external IP address for source NAT for an instance. pub async fn allocate_instance_snat_ip( @@ -60,23 +71,43 @@ impl DataStore { } /// Create an Ephemeral IP address for an instance. + /// + /// For consistency between instance create and External IP attach/detach + /// operations, this IP will be created in the `Attaching` state to block + /// concurrent access. + /// Callers must call `external_ip_complete_op` on saga completion to move + /// the IP to `Attached`. + /// + /// To better handle idempotent attachment, this method returns an + /// additional bool: + /// - true: EIP was detached or attaching. proceed with saga. + /// - false: EIP was attached. No-op for remainder of saga. pub async fn allocate_instance_ephemeral_ip( &self, opctx: &OpContext, ip_id: Uuid, instance_id: Uuid, - pool_name: Option, - ) -> CreateResult { - let pool = match pool_name { - Some(name) => { - let (.., authz_pool, pool) = LookupPath::new(opctx, &self) - .ip_pool_name(&name) + pool: Option, + creating_instance: bool, + ) -> CreateResult<(ExternalIp, bool)> { + // This is slightly hacky: we need to create an unbound ephemeral IP, and + // then attempt to bind it to respect two separate constraints: + // - At most one Ephemeral IP per instance + // - At most MAX external IPs per instance + // Naturally, we now *need* to destroy the ephemeral IP if the newly alloc'd + // IP was not attached, including on idempotent success. + let pool = match pool { + Some(authz_pool) => { + let (.., pool) = LookupPath::new(opctx, &self) + .ip_pool_id(authz_pool.id()) // any authenticated user can CreateChild on an IP pool. this is // meant to represent allocating an IP .fetch_for(authz::Action::CreateChild) .await?; // If this pool is not linked to the current silo, 404 + // As name resolution happens one layer up, we need to use the *original* + // authz Pool. if self.ip_pool_fetch_link(opctx, pool.id()).await.is_err() { return Err(authz_pool.not_found()); } @@ -91,9 +122,49 @@ impl DataStore { }; let pool_id = pool.identity.id; - let data = - IncompleteExternalIp::for_ephemeral(ip_id, instance_id, pool_id); - self.allocate_external_ip(opctx, data).await + let data = IncompleteExternalIp::for_ephemeral(ip_id, pool_id); + + // We might not be able to acquire a new IP, but in the event of an + // idempotent or double attach this failure is allowed. + let temp_ip = self.allocate_external_ip(opctx, data).await; + if let Err(e) = temp_ip { + let eip = self + .instance_lookup_ephemeral_ip(opctx, instance_id) + .await? + .ok_or(e)?; + + return Ok((eip, false)); + } + let temp_ip = temp_ip?; + + match self + .begin_attach_ip( + opctx, + temp_ip.id, + instance_id, + IpKind::Ephemeral, + creating_instance, + ) + .await + { + Err(e) => { + self.deallocate_external_ip(opctx, temp_ip.id).await?; + Err(e) + } + // Idempotent case: attach failed due to a caught UniqueViolation. + Ok(None) => { + self.deallocate_external_ip(opctx, temp_ip.id).await?; + let eip = self + .instance_lookup_ephemeral_ip(opctx, instance_id) + .await? + .ok_or_else(|| Error::internal_error( + "failed to lookup current ephemeral IP for idempotent attach" + ))?; + let do_saga = eip.state != IpAttachState::Attached; + Ok((eip, do_saga)) + } + Ok(Some(v)) => Ok(v), + } } /// Allocates an IP address for internal service usage. @@ -140,33 +211,34 @@ impl DataStore { opctx: &OpContext, project_id: Uuid, params: params::FloatingIpCreate, + pool: Option, ) -> CreateResult { let ip_id = Uuid::new_v4(); - // TODO: NameOrId resolution should happen a level higher, in the nexus function - let (.., authz_pool, pool) = match params.pool { - Some(NameOrId::Name(name)) => { - LookupPath::new(opctx, self) - .ip_pool_name(&Name(name)) - .fetch_for(authz::Action::Read) - .await? + // This implements the same pattern as in `allocate_instance_ephemeral_ip` to + // check that a chosen pool is valid from within the current silo. + let pool = match pool { + Some(authz_pool) => { + let (.., pool) = LookupPath::new(opctx, &self) + .ip_pool_id(authz_pool.id()) + .fetch_for(authz::Action::CreateChild) + .await?; + + if self.ip_pool_fetch_link(opctx, pool.id()).await.is_err() { + return Err(authz_pool.not_found()); + } + + pool } - Some(NameOrId::Id(id)) => { - LookupPath::new(opctx, self) - .ip_pool_id(id) - .fetch_for(authz::Action::Read) - .await? + // If no name given, use the default logic + None => { + let (.., pool) = self.ip_pools_fetch_default(&opctx).await?; + pool } - None => self.ip_pools_fetch_default(opctx).await?, }; let pool_id = pool.id(); - // If this pool is not linked to the current silo, 404 - if self.ip_pool_fetch_link(opctx, pool_id).await.is_err() { - return Err(authz_pool.not_found()); - } - let data = if let Some(ip) = params.address { IncompleteExternalIp::for_floating_explicit( ip_id, @@ -228,6 +300,7 @@ impl DataStore { ) } } + // Floating IP: name conflict DatabaseError(UniqueViolation, ..) if name.is_some() => { TransactionError::CustomError(public_error_from_diesel( e, @@ -299,7 +372,266 @@ impl DataStore { self.allocate_external_ip(opctx, data).await } - /// Deallocate the external IP address with the provided ID. + /// Attempt to move a target external IP from detached to attaching, + /// checking that its parent instance does not have too many addresses + /// and is in a valid state. + /// + /// Returns the `ExternalIp` which was modified, where possible. This + /// is only nullable when trying to double-attach ephemeral IPs. + /// To better handle idempotent attachment, this method returns an + /// additional bool: + /// - true: EIP was detached or attaching. proceed with saga. + /// - false: EIP was attached. No-op for remainder of saga. + async fn begin_attach_ip( + &self, + opctx: &OpContext, + ip_id: Uuid, + instance_id: Uuid, + kind: IpKind, + creating_instance: bool, + ) -> Result, Error> { + use db::schema::external_ip::dsl; + use db::schema::external_ip::table; + use db::schema::instance::dsl as inst_dsl; + use db::schema::instance::table as inst_table; + use diesel::result::DatabaseErrorKind::UniqueViolation; + use diesel::result::Error::DatabaseError; + + let safe_states = if creating_instance { + &SAFE_TO_ATTACH_INSTANCE_STATES_CREATING[..] + } else { + &SAFE_TO_ATTACH_INSTANCE_STATES[..] + }; + + let query = Instance::attach_resource( + instance_id, + ip_id, + inst_table + .into_boxed() + .filter(inst_dsl::state.eq_any(safe_states)) + .filter(inst_dsl::migration_id.is_null()), + table + .into_boxed() + .filter(dsl::state.eq(IpAttachState::Detached)) + .filter(dsl::kind.eq(kind)) + .filter(dsl::parent_id.is_null()), + MAX_EXTERNAL_IPS_PLUS_SNAT, + diesel::update(dsl::external_ip).set(( + dsl::parent_id.eq(Some(instance_id)), + dsl::time_modified.eq(Utc::now()), + dsl::state.eq(IpAttachState::Attaching), + )), + ); + + let mut do_saga = true; + query.attach_and_get_result_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map(|(_, resource)| Some(resource)) + .or_else(|e: AttachError| match e { + AttachError::CollectionNotFound => { + Err(Error::not_found_by_id( + ResourceType::Instance, + &instance_id, + )) + }, + AttachError::ResourceNotFound => { + Err(if kind == IpKind::Ephemeral { + Error::internal_error("call-scoped ephemeral IP was lost") + } else { + Error::not_found_by_id( + ResourceType::FloatingIp, + &ip_id, + ) + }) + }, + AttachError::NoUpdate { attached_count, resource, collection } => { + match resource.state { + // Idempotent errors: is in progress or complete for same resource pair -- this is fine. + IpAttachState::Attaching if resource.parent_id == Some(instance_id) => + return Ok(Some(resource)), + IpAttachState::Attached if resource.parent_id == Some(instance_id) => { + do_saga = false; + return Ok(Some(resource)) + }, + IpAttachState::Attached => + return Err(Error::invalid_request(&format!( + "{kind} IP cannot be attached to one \ + instance while still attached to another" + ))), + // User can reattempt depending on how the current saga unfolds. + // NB; only floating IP can return this case, eph will return + // a UniqueViolation. + IpAttachState::Attaching | IpAttachState::Detaching + => return Err(Error::unavail(&format!( + "tried to attach {kind} IP mid-attach/detach: \ + attach will be safe to retry once operation on \ + same IP resource completes" + ))), + + IpAttachState::Detached => {}, + } + + if collection.runtime_state.migration_id.is_some() { + return Err(Error::unavail(&format!( + "tried to attach {kind} IP while instance was migrating: \ + detach will be safe to retry once migrate completes" + ))) + } + + Err(match &collection.runtime_state.nexus_state { + state if SAFE_TRANSIENT_INSTANCE_STATES.contains(&state) + => Error::unavail(&format!( + "tried to attach {kind} IP while instance was changing state: \ + attach will be safe to retry once start/stop completes" + )), + state if SAFE_TO_ATTACH_INSTANCE_STATES.contains(&state) => { + if attached_count >= MAX_EXTERNAL_IPS_PLUS_SNAT as i64 { + Error::invalid_request(&format!( + "an instance may not have more than \ + {MAX_EXTERNAL_IPS_PER_INSTANCE} external IP addresses", + )) + } else { + Error::internal_error(&format!("failed to attach {kind} IP")) + } + }, + state => Error::invalid_request(&format!( + "cannot attach {kind} IP to instance in {state} state" + )), + }) + }, + // This case occurs for both currently attaching and attached ephemeral IPs: + AttachError::DatabaseError(DatabaseError(UniqueViolation, ..)) + if kind == IpKind::Ephemeral => { + Ok(None) + }, + AttachError::DatabaseError(e) => { + Err(public_error_from_diesel(e, ErrorHandler::Server)) + }, + }) + .map(|eip| eip.map(|v| (v, do_saga))) + } + + /// Attempt to move a target external IP from attached to detaching, + /// checking that its parent instance is in a valid state. + /// + /// Returns the `ExternalIp` which was modified, where possible. This + /// is only nullable when trying to double-detach ephemeral IPs. + /// To better handle idempotent attachment, this method returns an + /// additional bool: + /// - true: EIP was detached or attaching. proceed with saga. + /// - false: EIP was attached. No-op for remainder of saga. + async fn begin_detach_ip( + &self, + opctx: &OpContext, + ip_id: Uuid, + instance_id: Uuid, + kind: IpKind, + creating_instance: bool, + ) -> UpdateResult> { + use db::schema::external_ip::dsl; + use db::schema::external_ip::table; + use db::schema::instance::dsl as inst_dsl; + use db::schema::instance::table as inst_table; + + let safe_states = if creating_instance { + &SAFE_TO_ATTACH_INSTANCE_STATES_CREATING[..] + } else { + &SAFE_TO_ATTACH_INSTANCE_STATES[..] + }; + + let query = Instance::detach_resource( + instance_id, + ip_id, + inst_table + .into_boxed() + .filter(inst_dsl::state.eq_any(safe_states)) + .filter(inst_dsl::migration_id.is_null()), + table + .into_boxed() + .filter(dsl::state.eq(IpAttachState::Attached)) + .filter(dsl::kind.eq(kind)), + diesel::update(dsl::external_ip).set(( + dsl::time_modified.eq(Utc::now()), + dsl::state.eq(IpAttachState::Detaching), + )), + ); + + let mut do_saga = true; + query.detach_and_get_result_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map(Some) + .or_else(|e: DetachError| Err(match e { + DetachError::CollectionNotFound => { + Error::not_found_by_id( + ResourceType::Instance, + &instance_id, + ) + }, + DetachError::ResourceNotFound => { + if kind == IpKind::Ephemeral { + return Ok(None); + } else { + Error::not_found_by_id( + ResourceType::FloatingIp, + &ip_id, + ) + } + }, + DetachError::NoUpdate { resource, collection } => { + let parent_match = resource.parent_id == Some(instance_id); + match resource.state { + // Idempotent cases: already detached OR detaching from same instance. + IpAttachState::Detached => { + do_saga = false; + return Ok(Some(resource)) + }, + IpAttachState::Detaching if parent_match => return Ok(Some(resource)), + IpAttachState::Attached if !parent_match + => return Err(Error::invalid_request(&format!( + "{kind} IP is not attached to the target instance", + ))), + // User can reattempt depending on how the current saga unfolds. + IpAttachState::Attaching + | IpAttachState::Detaching => return Err(Error::unavail(&format!( + "tried to detach {kind} IP mid-attach/detach: \ + detach will be safe to retry once operation on \ + same IP resource completes" + ))), + IpAttachState::Attached => {}, + } + + if collection.runtime_state.migration_id.is_some() { + return Err(Error::unavail(&format!( + "tried to detach {kind} IP while instance was migrating: \ + detach will be safe to retry once migrate completes" + ))) + } + + match collection.runtime_state.nexus_state { + state if SAFE_TRANSIENT_INSTANCE_STATES.contains(&state) => Error::unavail(&format!( + "tried to attach {kind} IP while instance was changing state: \ + detach will be safe to retry once start/stop completes" + )), + state if SAFE_TO_ATTACH_INSTANCE_STATES.contains(&state) => { + Error::internal_error(&format!("failed to detach {kind} IP")) + }, + state => Error::invalid_request(&format!( + "cannot detach {kind} IP from instance in {state} state" + )), + } + }, + DetachError::DatabaseError(e) => { + public_error_from_diesel(e, ErrorHandler::Server) + }, + + })) + .map(|eip| eip.map(|v| (v, do_saga))) + } + + /// Deallocate the external IP address with the provided ID. This is a complete + /// removal of the IP entry, in contrast with `begin_deallocate_ephemeral_ip`, + /// and should only be used for SNAT entries or cleanup of short-lived ephemeral + /// IPs on failure. /// /// To support idempotency, such as in saga operations, this method returns /// an extra boolean, rather than the usual `DeleteResult`. The meaning of @@ -329,7 +661,34 @@ impl DataStore { .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) } - /// Delete all external IP addresses associated with the provided instance + /// Moves an instance's ephemeral IP from 'Attached' to 'Detaching'. + /// + /// To support idempotency, this method will succeed if the instance + /// has no ephemeral IP or one is actively being removed. As a result, + /// information on an actual `ExternalIp` is best-effort. + pub async fn begin_deallocate_ephemeral_ip( + &self, + opctx: &OpContext, + ip_id: Uuid, + instance_id: Uuid, + ) -> Result, Error> { + let _ = LookupPath::new(&opctx, self) + .instance_id(instance_id) + .lookup_for(authz::Action::Modify) + .await?; + + self.begin_detach_ip( + opctx, + ip_id, + instance_id, + IpKind::Ephemeral, + false, + ) + .await + .map(|res| res.map(|(ip, _do_saga)| ip)) + } + + /// Delete all non-floating IP addresses associated with the provided instance /// ID. /// /// This method returns the number of records deleted, rather than the usual @@ -347,16 +706,22 @@ impl DataStore { .filter(dsl::is_service.eq(false)) .filter(dsl::parent_id.eq(instance_id)) .filter(dsl::kind.ne(IpKind::Floating)) - .set(dsl::time_deleted.eq(now)) + .set(( + dsl::time_deleted.eq(now), + dsl::state.eq(IpAttachState::Detached), + )) .execute_async(&*self.pool_connection_authorized(opctx).await?) .await .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) } - /// Detach an individual Floating IP address from its parent instance. + /// Detach all Floating IP address from their parent instance. /// /// As in `deallocate_external_ip_by_instance_id`, this method returns the /// number of records altered, rather than an `UpdateResult`. + /// + /// This method ignores ongoing state transitions, and is only safely + /// usable from within the instance_delete saga. pub async fn detach_floating_ips_by_instance_id( &self, opctx: &OpContext, @@ -368,13 +733,18 @@ impl DataStore { .filter(dsl::is_service.eq(false)) .filter(dsl::parent_id.eq(instance_id)) .filter(dsl::kind.eq(IpKind::Floating)) - .set(dsl::parent_id.eq(Option::::None)) + .set(( + dsl::time_modified.eq(Utc::now()), + dsl::parent_id.eq(Option::::None), + dsl::state.eq(IpAttachState::Detached), + )) .execute_async(&*self.pool_connection_authorized(opctx).await?) .await .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) } /// Fetch all external IP addresses of any kind for the provided instance + /// in all attachment states. pub async fn instance_lookup_external_ips( &self, opctx: &OpContext, @@ -391,6 +761,20 @@ impl DataStore { .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) } + /// Fetch the ephmeral IP address assigned to the provided instance, if this + /// has been configured. + pub async fn instance_lookup_ephemeral_ip( + &self, + opctx: &OpContext, + instance_id: Uuid, + ) -> LookupResult> { + Ok(self + .instance_lookup_external_ips(opctx, instance_id) + .await? + .into_iter() + .find(|v| v.kind == IpKind::Ephemeral)) + } + /// Fetch all Floating IP addresses for the provided project. pub async fn floating_ips_list( &self, @@ -425,26 +809,20 @@ impl DataStore { &self, opctx: &OpContext, authz_fip: &authz::FloatingIp, - db_fip: &FloatingIp, ) -> DeleteResult { use db::schema::external_ip::dsl; - // Verify this FIP is not attached to any instances/services. - if db_fip.parent_id.is_some() { - return Err(Error::invalid_request( - "Floating IP cannot be deleted while attached to an instance", - )); - } - opctx.authorize(authz::Action::Delete, authz_fip).await?; let now = Utc::now(); - let updated_rows = diesel::update(dsl::external_ip) - .filter(dsl::id.eq(db_fip.id())) + let result = diesel::update(dsl::external_ip) + .filter(dsl::id.eq(authz_fip.id())) .filter(dsl::time_deleted.is_null()) .filter(dsl::parent_id.is_null()) + .filter(dsl::state.eq(IpAttachState::Detached)) .set(dsl::time_deleted.eq(now)) - .execute_async(&*self.pool_connection_authorized(opctx).await?) + .check_if_exists::(authz_fip.id()) + .execute_and_check(&*self.pool_connection_authorized(opctx).await?) .await .map_err(|e| { public_error_from_diesel( @@ -453,103 +831,208 @@ impl DataStore { ) })?; - if updated_rows == 0 { - return Err(Error::invalid_request( - "deletion failed due to concurrent modification", - )); + match result.status { + // Verify this FIP is not attached to any instances/services. + UpdateStatus::NotUpdatedButExists if result.found.parent_id.is_some() => Err(Error::invalid_request( + "Floating IP cannot be deleted while attached to an instance", + )), + // Only remaining cause of `NotUpdated` is earlier soft-deletion. + // Return success in this case to maintain idempotency. + UpdateStatus::Updated | UpdateStatus::NotUpdatedButExists => Ok(()), } - Ok(()) } /// Attaches a Floating IP address to an instance. - pub async fn floating_ip_attach( + /// + /// This moves a floating IP into the 'attaching' state. Callers are + /// responsible for calling `external_ip_complete_op` to finalise the + /// IP in 'attached' state at saga completion. + /// + /// To better handle idempotent attachment, this method returns an + /// additional bool: + /// - true: EIP was detached or attaching. proceed with saga. + /// - false: EIP was attached. No-op for remainder of saga. + pub async fn floating_ip_begin_attach( &self, opctx: &OpContext, authz_fip: &authz::FloatingIp, - db_fip: &FloatingIp, instance_id: Uuid, - ) -> UpdateResult { - use db::schema::external_ip::dsl; - - // Verify this FIP is not attached to any instances/services. - if db_fip.parent_id.is_some() { - return Err(Error::invalid_request( - "Floating IP cannot be attached to one instance while still attached to another", - )); - } - - let (.., authz_instance, _db_instance) = LookupPath::new(&opctx, self) + creating_instance: bool, + ) -> UpdateResult<(ExternalIp, bool)> { + let (.., authz_instance) = LookupPath::new(&opctx, self) .instance_id(instance_id) - .fetch_for(authz::Action::Modify) + .lookup_for(authz::Action::Modify) .await?; opctx.authorize(authz::Action::Modify, authz_fip).await?; opctx.authorize(authz::Action::Modify, &authz_instance).await?; - diesel::update(dsl::external_ip) - .filter(dsl::id.eq(db_fip.id())) - .filter(dsl::kind.eq(IpKind::Floating)) - .filter(dsl::time_deleted.is_null()) - .filter(dsl::parent_id.is_null()) - .set(( - dsl::parent_id.eq(Some(instance_id)), - dsl::time_modified.eq(Utc::now()), - )) - .returning(ExternalIp::as_returning()) - .get_result_async(&*self.pool_connection_authorized(opctx).await?) - .await - .map_err(|e| { - public_error_from_diesel( - e, - ErrorHandler::NotFoundByResource(authz_fip), + self.begin_attach_ip( + opctx, + authz_fip.id(), + instance_id, + IpKind::Floating, + creating_instance, + ) + .await + .and_then(|v| { + v.ok_or_else(|| { + Error::internal_error( + "floating IP should never return `None` from begin_attach", ) }) - .and_then(|r| FloatingIp::try_from(r)) - .map_err(|e| Error::internal_error(&format!("{e}"))) + }) } /// Detaches a Floating IP address from an instance. - pub async fn floating_ip_detach( + /// + /// This moves a floating IP into the 'detaching' state. Callers are + /// responsible for calling `external_ip_complete_op` to finalise the + /// IP in 'detached' state at saga completion. + /// + /// To better handle idempotent detachment, this method returns an + /// additional bool: + /// - true: EIP was attached or detaching. proceed with saga. + /// - false: EIP was detached. No-op for remainder of saga. + pub async fn floating_ip_begin_detach( &self, opctx: &OpContext, authz_fip: &authz::FloatingIp, - db_fip: &FloatingIp, - ) -> UpdateResult { - use db::schema::external_ip::dsl; - - let Some(instance_id) = db_fip.parent_id else { - return Err(Error::invalid_request( - "Floating IP is not attached to an instance", - )); - }; - - let (.., authz_instance, _db_instance) = LookupPath::new(&opctx, self) + instance_id: Uuid, + creating_instance: bool, + ) -> UpdateResult<(ExternalIp, bool)> { + let (.., authz_instance) = LookupPath::new(&opctx, self) .instance_id(instance_id) - .fetch_for(authz::Action::Modify) + .lookup_for(authz::Action::Modify) .await?; opctx.authorize(authz::Action::Modify, authz_fip).await?; opctx.authorize(authz::Action::Modify, &authz_instance).await?; - diesel::update(dsl::external_ip) - .filter(dsl::id.eq(db_fip.id())) - .filter(dsl::kind.eq(IpKind::Floating)) - .filter(dsl::time_deleted.is_null()) - .filter(dsl::parent_id.eq(instance_id)) - .set(( - dsl::parent_id.eq(Option::::None), - dsl::time_modified.eq(Utc::now()), - )) - .returning(ExternalIp::as_returning()) - .get_result_async(&*self.pool_connection_authorized(opctx).await?) - .await - .map_err(|e| { - public_error_from_diesel( - e, - ErrorHandler::NotFoundByResource(authz_fip), + self.begin_detach_ip( + opctx, + authz_fip.id(), + instance_id, + IpKind::Floating, + creating_instance, + ) + .await + .and_then(|v| { + v.ok_or_else(|| { + Error::internal_error( + "floating IP should never return `None` from begin_detach", ) }) - .and_then(|r| FloatingIp::try_from(r)) - .map_err(|e| Error::internal_error(&format!("{e}"))) + }) + } + + /// Move an external IP from a transitional state (attaching, detaching) + /// to its intended end state. + /// + /// Returns the number of rows modified, this may be zero on: + /// - instance delete by another saga + /// - saga action rerun + /// + /// This is valid in both cases for idempotency. + pub async fn external_ip_complete_op( + &self, + opctx: &OpContext, + ip_id: Uuid, + ip_kind: IpKind, + expected_state: IpAttachState, + target_state: IpAttachState, + ) -> Result { + use db::schema::external_ip::dsl; + + if matches!( + expected_state, + IpAttachState::Attached | IpAttachState::Detached + ) { + return Err(Error::internal_error(&format!( + "{expected_state:?} is not a valid transition state for attach/detach" + ))); + } + + let part_out = diesel::update(dsl::external_ip) + .filter(dsl::id.eq(ip_id)) + .filter(dsl::time_deleted.is_null()) + .filter(dsl::state.eq(expected_state)); + + let now = Utc::now(); + let conn = self.pool_connection_authorized(opctx).await?; + match (ip_kind, expected_state, target_state) { + (IpKind::SNat, _, _) => return Err(Error::internal_error( + "SNAT should not be removed via `external_ip_complete_op`, \ + use `deallocate_external_ip`", + )), + + (IpKind::Ephemeral, _, IpAttachState::Detached) => { + part_out + .set(( + dsl::parent_id.eq(Option::::None), + dsl::time_modified.eq(now), + dsl::time_deleted.eq(now), + dsl::state.eq(target_state), + )) + .execute_async(&*conn) + .await + } + + (IpKind::Floating, _, IpAttachState::Detached) => { + part_out + .set(( + dsl::parent_id.eq(Option::::None), + dsl::time_modified.eq(now), + dsl::state.eq(target_state), + )) + .execute_async(&*conn) + .await + } + + // Attaching->Attached gets separate logic because we choose to fail + // and unwind on instance delete. This covers two cases: + // - External IP is deleted. + // - Floating IP is suddenly `detached`. + (_, IpAttachState::Attaching, IpAttachState::Attached) => { + return part_out + .set(( + dsl::time_modified.eq(Utc::now()), + dsl::state.eq(target_state), + )) + .check_if_exists::(ip_id) + .execute_and_check( + &*self.pool_connection_authorized(opctx).await?, + ) + .await + .map_err(|e| { + public_error_from_diesel(e, ErrorHandler::Server) + }) + .and_then(|r| match r.status { + UpdateStatus::Updated => Ok(1), + UpdateStatus::NotUpdatedButExists + if r.found.state == IpAttachState::Detached + || r.found.time_deleted.is_some() => + { + Err(Error::internal_error( + "unwinding due to concurrent instance delete", + )) + } + UpdateStatus::NotUpdatedButExists => Ok(0), + }) + } + + // Unwind from failed detach. + (_, _, IpAttachState::Attached) => { + part_out + .set(( + dsl::time_modified.eq(Utc::now()), + dsl::state.eq(target_state), + )) + .execute_async(&*conn) + .await + } + _ => return Err(Error::internal_error("unreachable")), + } + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) } } diff --git a/nexus/db-queries/src/db/datastore/instance.rs b/nexus/db-queries/src/db/datastore/instance.rs index 188f5c30c9..c01f40e791 100644 --- a/nexus/db-queries/src/db/datastore/instance.rs +++ b/nexus/db-queries/src/db/datastore/instance.rs @@ -11,6 +11,7 @@ use crate::context::OpContext; use crate::db; use crate::db::collection_detach_many::DatastoreDetachManyTarget; use crate::db::collection_detach_many::DetachManyError; +use crate::db::collection_detach_many::DetachManyFromCollectionStatement; use crate::db::collection_insert::AsyncInsertError; use crate::db::collection_insert::DatastoreCollection; use crate::db::error::public_error_from_diesel; @@ -28,6 +29,7 @@ use crate::db::update_and_check::UpdateStatus; use async_bb8_diesel::AsyncRunQueryDsl; use chrono::Utc; use diesel::prelude::*; +use nexus_db_model::Disk; use nexus_db_model::VmmRuntimeState; use omicron_common::api; use omicron_common::api::external::http_pagination::PaginatedBy; @@ -405,59 +407,63 @@ impl DataStore { let ok_to_detach_disk_state_labels: Vec<_> = ok_to_detach_disk_states.iter().map(|s| s.label()).collect(); - let _instance = Instance::detach_resources( - authz_instance.id(), - instance::table.into_boxed().filter( - instance::dsl::state - .eq_any(ok_to_delete_instance_states) - .and(instance::dsl::active_propolis_id.is_null()), - ), - disk::table.into_boxed().filter( - disk::dsl::disk_state.eq_any(ok_to_detach_disk_state_labels), - ), - diesel::update(instance::dsl::instance).set(( - instance::dsl::state.eq(destroyed), - instance::dsl::time_deleted.eq(Utc::now()), - )), - diesel::update(disk::dsl::disk).set(( - disk::dsl::disk_state.eq(detached_label), - disk::dsl::attach_instance_id.eq(Option::::None), - disk::dsl::slot.eq(Option::::None), - )), - ) - .detach_and_get_result_async( - &*self.pool_connection_authorized(opctx).await?, - ) - .await - .map_err(|e| match e { - DetachManyError::CollectionNotFound => Error::not_found_by_id( - ResourceType::Instance, - &authz_instance.id(), - ), - DetachManyError::NoUpdate { collection } => { - if collection.runtime_state.propolis_id.is_some() { - return Error::invalid_request( + let stmt: DetachManyFromCollectionStatement = + Instance::detach_resources( + authz_instance.id(), + instance::table.into_boxed().filter( + instance::dsl::state + .eq_any(ok_to_delete_instance_states) + .and(instance::dsl::active_propolis_id.is_null()), + ), + disk::table.into_boxed().filter( + disk::dsl::disk_state + .eq_any(ok_to_detach_disk_state_labels), + ), + diesel::update(instance::dsl::instance).set(( + instance::dsl::state.eq(destroyed), + instance::dsl::time_deleted.eq(Utc::now()), + )), + diesel::update(disk::dsl::disk).set(( + disk::dsl::disk_state.eq(detached_label), + disk::dsl::attach_instance_id.eq(Option::::None), + disk::dsl::slot.eq(Option::::None), + )), + ); + + let _instance = stmt + .detach_and_get_result_async( + &*self.pool_connection_authorized(opctx).await?, + ) + .await + .map_err(|e| match e { + DetachManyError::CollectionNotFound => Error::not_found_by_id( + ResourceType::Instance, + &authz_instance.id(), + ), + DetachManyError::NoUpdate { collection } => { + if collection.runtime_state.propolis_id.is_some() { + return Error::invalid_request( "cannot delete instance: instance is running or has \ not yet fully stopped", ); - } - let instance_state = - collection.runtime_state.nexus_state.state(); - match instance_state { - api::external::InstanceState::Stopped - | api::external::InstanceState::Failed => { - Error::internal_error("cannot delete instance") } - _ => Error::invalid_request(&format!( - "instance cannot be deleted in state \"{}\"", - instance_state, - )), + let instance_state = + collection.runtime_state.nexus_state.state(); + match instance_state { + api::external::InstanceState::Stopped + | api::external::InstanceState::Failed => { + Error::internal_error("cannot delete instance") + } + _ => Error::invalid_request(&format!( + "instance cannot be deleted in state \"{}\"", + instance_state, + )), + } } - } - DetachManyError::DatabaseError(e) => { - public_error_from_diesel(e, ErrorHandler::Server) - } - })?; + DetachManyError::DatabaseError(e) => { + public_error_from_diesel(e, ErrorHandler::Server) + } + })?; Ok(()) } diff --git a/nexus/db-queries/src/db/datastore/ipv4_nat_entry.rs b/nexus/db-queries/src/db/datastore/ipv4_nat_entry.rs index a44fed4cdf..655a267fe1 100644 --- a/nexus/db-queries/src/db/datastore/ipv4_nat_entry.rs +++ b/nexus/db-queries/src/db/datastore/ipv4_nat_entry.rs @@ -23,12 +23,14 @@ impl DataStore { &self, opctx: &OpContext, nat_entry: Ipv4NatValues, - ) -> CreateResult<()> { + ) -> CreateResult { use db::schema::ipv4_nat_entry::dsl; use diesel::sql_types; // Look up any NAT entries that already have the exact parameters // we're trying to INSERT. + // We want to return any existing entry, but not to mask the UniqueViolation + // when trying to use an existing IP + port range with a different target. let matching_entry_subquery = dsl::ipv4_nat_entry .filter(dsl::external_address.eq(nat_entry.external_address)) .filter(dsl::first_port.eq(nat_entry.first_port)) @@ -58,7 +60,7 @@ impl DataStore { )) .filter(diesel::dsl::not(diesel::dsl::exists(matching_entry_subquery))); - diesel::insert_into(dsl::ipv4_nat_entry) + let out = diesel::insert_into(dsl::ipv4_nat_entry) .values(new_entry_subquery) .into_columns(( dsl::external_address, @@ -68,11 +70,24 @@ impl DataStore { dsl::vni, dsl::mac, )) - .execute_async(&*self.pool_connection_authorized(opctx).await?) - .await - .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; - - Ok(()) + .returning(Ipv4NatEntry::as_returning()) + .get_result_async(&*self.pool_connection_authorized(opctx).await?) + .await; + + match out { + Ok(o) => Ok(o), + Err(diesel::result::Error::NotFound) => { + // Idempotent ensure. Annoyingly, we can't easily extract + // the existing row as part of the insert query: + // - (SELECT ..) UNION (INSERT INTO .. RETURNING ..) isn't + // allowed by crdb. + // - Can't ON CONFLICT with a partial constraint, so we can't + // do a no-op write and return the row that way either. + // So, we do another lookup. + self.ipv4_nat_find_by_values(opctx, nat_entry).await + } + Err(e) => Err(public_error_from_diesel(e, ErrorHandler::Server)), + } } pub async fn ipv4_nat_delete( diff --git a/nexus/db-queries/src/db/datastore/mod.rs b/nexus/db-queries/src/db/datastore/mod.rs index d61ff15a3d..5fd16e2633 100644 --- a/nexus/db-queries/src/db/datastore/mod.rs +++ b/nexus/db-queries/src/db/datastore/mod.rs @@ -408,6 +408,7 @@ mod test { use chrono::{Duration, Utc}; use futures::stream; use futures::StreamExt; + use nexus_db_model::IpAttachState; use nexus_test_utils::db::test_setup_database; use nexus_types::external_api::params; use omicron_common::api::external::DataPageParams; @@ -1625,7 +1626,8 @@ mod test { // Create a few records. let now = Utc::now(); let instance_id = Uuid::new_v4(); - let ips = (0..4) + let kinds = [IpKind::SNat, IpKind::Ephemeral]; + let ips = (0..2) .map(|i| ExternalIp { id: Uuid::new_v4(), name: None, @@ -1638,12 +1640,13 @@ mod test { project_id: None, is_service: false, parent_id: Some(instance_id), - kind: IpKind::Ephemeral, + kind: kinds[i as usize], ip: ipnetwork::IpNetwork::from(IpAddr::from(Ipv4Addr::new( 10, 0, 0, i, ))), first_port: crate::db::model::SqlU16(0), last_port: crate::db::model::SqlU16(10), + state: nexus_db_model::IpAttachState::Attached, }) .collect::>(); diesel::insert_into(dsl::external_ip) @@ -1705,6 +1708,7 @@ mod test { ))), first_port: crate::db::model::SqlU16(0), last_port: crate::db::model::SqlU16(10), + state: nexus_db_model::IpAttachState::Attached, }; diesel::insert_into(dsl::external_ip) .values(ip.clone()) @@ -1775,6 +1779,7 @@ mod test { ip: addresses.next().unwrap().into(), first_port: crate::db::model::SqlU16(0), last_port: crate::db::model::SqlU16(10), + state: nexus_db_model::IpAttachState::Attached, }; // Combinations of NULL and non-NULL for: @@ -1782,6 +1787,7 @@ mod test { // - description // - parent (instance / service) UUID // - project UUID + // - attach state let names = [None, Some("foo")]; let descriptions = [None, Some("foo".to_string())]; let parent_ids = [None, Some(Uuid::new_v4())]; @@ -1822,6 +1828,12 @@ mod test { continue; } + let state = if parent_id.is_some() { + IpAttachState::Attached + } else { + IpAttachState::Detached + }; + let new_ip = ExternalIp { id: Uuid::new_v4(), name: name_local.clone(), @@ -1830,6 +1842,7 @@ mod test { is_service, parent_id: *parent_id, project_id: *project_id, + state, ..ip }; @@ -1902,6 +1915,11 @@ mod test { let name_local = name.map(|v| { db::model::Name(Name::try_from(v.to_string()).unwrap()) }); + let state = if parent_id.is_some() { + IpAttachState::Attached + } else { + IpAttachState::Detached + }; let new_ip = ExternalIp { id: Uuid::new_v4(), name: name_local, @@ -1911,6 +1929,7 @@ mod test { is_service, parent_id: *parent_id, project_id: *project_id, + state, ..ip }; let res = diesel::insert_into(dsl::external_ip) @@ -1918,9 +1937,10 @@ mod test { .execute_async(&*conn) .await; let ip_type = if is_service { "Service" } else { "Instance" }; + let null_snat_parent = parent_id.is_none() && kind == IpKind::SNat; if name.is_none() && description.is_none() - && parent_id.is_some() + && !null_snat_parent && project_id.is_none() { // Name/description must be NULL, instance ID cannot diff --git a/nexus/db-queries/src/db/pool_connection.rs b/nexus/db-queries/src/db/pool_connection.rs index 090c6865b7..e8ef721e98 100644 --- a/nexus/db-queries/src/db/pool_connection.rs +++ b/nexus/db-queries/src/db/pool_connection.rs @@ -47,6 +47,7 @@ static CUSTOM_TYPE_KEYS: &'static [&'static str] = &[ "hw_rot_slot", "identity_type", "instance_state", + "ip_attach_state", "ip_kind", "ip_pool_resource_type", "network_interface_kind", diff --git a/nexus/db-queries/src/db/queries/external_ip.rs b/nexus/db-queries/src/db/queries/external_ip.rs index 49403aac61..8114b9e363 100644 --- a/nexus/db-queries/src/db/queries/external_ip.rs +++ b/nexus/db-queries/src/db/queries/external_ip.rs @@ -26,10 +26,42 @@ use diesel::Column; use diesel::Expression; use diesel::QueryResult; use diesel::RunQueryDsl; +use nexus_db_model::InstanceState as DbInstanceState; +use nexus_db_model::IpAttachState; +use nexus_db_model::IpAttachStateEnum; use omicron_common::address::NUM_SOURCE_NAT_PORTS; use omicron_common::api::external; +use omicron_common::api::external::InstanceState as ApiInstanceState; use uuid::Uuid; +// Broadly, we want users to be able to attach/detach at will +// once an instance is created and functional. +pub const SAFE_TO_ATTACH_INSTANCE_STATES_CREATING: [DbInstanceState; 3] = [ + DbInstanceState(ApiInstanceState::Stopped), + DbInstanceState(ApiInstanceState::Running), + DbInstanceState(ApiInstanceState::Creating), +]; +pub const SAFE_TO_ATTACH_INSTANCE_STATES: [DbInstanceState; 2] = [ + DbInstanceState(ApiInstanceState::Stopped), + DbInstanceState(ApiInstanceState::Running), +]; +// If we're in a state which will naturally resolve to either +// stopped/running, we want users to know that the request can be +// retried safely via Error::unavail. +// TODO: We currently stop if there's a migration or other state change. +// There may be a good case for RPWing +// external_ip_state -> { NAT RPW, sled-agent } in future. +pub const SAFE_TRANSIENT_INSTANCE_STATES: [DbInstanceState; 5] = [ + DbInstanceState(ApiInstanceState::Starting), + DbInstanceState(ApiInstanceState::Stopping), + DbInstanceState(ApiInstanceState::Creating), + DbInstanceState(ApiInstanceState::Rebooting), + DbInstanceState(ApiInstanceState::Migrating), +]; + +/// The maximum number of disks that can be attached to an instance. +pub const MAX_EXTERNAL_IPS_PER_INSTANCE: u32 = 32; + type FromClause = diesel::internal::table_macro::StaticQueryFragmentInstance; type IpPoolRangeFromClause = FromClause; @@ -99,7 +131,8 @@ const MAX_PORT: u16 = u16::MAX; /// candidate_ip AS ip, /// CAST(candidate_first_port AS INT4) AS first_port, /// CAST(candidate_last_port AS INT4) AS last_port, -/// AS project_id +/// AS project_id, +/// AS state /// FROM /// SELECT * FROM ( /// -- Select all IP addresses by pool and range. @@ -378,6 +411,14 @@ impl NextExternalIp { out.push_bind_param::, Option>(self.ip.project_id())?; out.push_sql(" AS "); out.push_identifier(dsl::project_id::NAME)?; + out.push_sql(", "); + + // Initial state, mainly needed by Ephemeral/Floating IPs. + out.push_bind_param::( + self.ip.state(), + )?; + out.push_sql(" AS "); + out.push_identifier(dsl::state::NAME)?; out.push_sql(" FROM ("); self.push_address_sequence_subquery(out.reborrow())?; @@ -822,10 +863,12 @@ impl RunQueryDsl for NextExternalIp {} #[cfg(test)] mod tests { + use crate::authz; use crate::context::OpContext; use crate::db::datastore::DataStore; use crate::db::datastore::SERVICE_IP_POOL_NAME; use crate::db::identity::Resource; + use crate::db::lookup::LookupPath; use crate::db::model::IpKind; use crate::db::model::IpPool; use crate::db::model::IpPoolRange; @@ -833,9 +876,13 @@ mod tests { use async_bb8_diesel::AsyncRunQueryDsl; use diesel::{ExpressionMethods, QueryDsl, SelectableHelper}; use dropshot::test_util::LogContext; + use nexus_db_model::ByteCount; + use nexus_db_model::Instance; + use nexus_db_model::InstanceCpuCount; use nexus_db_model::IpPoolResource; use nexus_db_model::IpPoolResourceType; use nexus_test_utils::db::test_setup_database; + use nexus_types::external_api::params::InstanceCreate; use nexus_types::external_api::shared::IpRange; use omicron_common::address::NUM_SOURCE_NAT_PORTS; use omicron_common::api::external::Error; @@ -878,7 +925,7 @@ mod tests { name: &str, range: IpRange, is_default: bool, - ) { + ) -> authz::IpPool { let pool = IpPool::new(&IdentityMetadataCreateParams { name: String::from(name).parse().unwrap(), description: format!("ip pool {}", name), @@ -902,6 +949,13 @@ mod tests { .expect("Failed to associate IP pool with silo"); self.initialize_ip_pool(name, range).await; + + LookupPath::new(&self.opctx, &self.db_datastore) + .ip_pool_id(pool.id()) + .lookup_for(authz::Action::Read) + .await + .unwrap() + .0 } async fn initialize_ip_pool(&self, name: &str, range: IpRange) { @@ -937,6 +991,37 @@ mod tests { .expect("Failed to create IP Pool range"); } + async fn create_instance(&self, name: &str) -> Uuid { + let instance_id = Uuid::new_v4(); + let project_id = Uuid::new_v4(); + let instance = Instance::new(instance_id, project_id, &InstanceCreate { + identity: IdentityMetadataCreateParams { name: String::from(name).parse().unwrap(), description: format!("instance {}", name) }, + ncpus: InstanceCpuCount(omicron_common::api::external::InstanceCpuCount(1)).into(), + memory: ByteCount(omicron_common::api::external::ByteCount::from_gibibytes_u32(1)).into(), + hostname: "test".into(), + user_data: vec![], + network_interfaces: Default::default(), + external_ips: vec![], + disks: vec![], + start: false, + }); + + let conn = self + .db_datastore + .pool_connection_authorized(&self.opctx) + .await + .unwrap(); + + use crate::db::schema::instance::dsl as instance_dsl; + diesel::insert_into(instance_dsl::instance) + .values(instance.clone()) + .execute_async(&*conn) + .await + .expect("Failed to create Instance"); + + instance_id + } + async fn default_pool_id(&self) -> Uuid { let (.., pool) = self .db_datastore @@ -1021,7 +1106,7 @@ mod tests { // Allocate an Ephemeral IP, which should take the entire port range of // the only address in the pool. - let instance_id = Uuid::new_v4(); + let instance_id = context.create_instance("for-eph").await; let ephemeral_ip = context .db_datastore .allocate_instance_ephemeral_ip( @@ -1029,16 +1114,18 @@ mod tests { Uuid::new_v4(), instance_id, /* pool_name = */ None, + true, ) .await - .expect("Failed to allocate Ephemeral IP when there is space"); + .expect("Failed to allocate Ephemeral IP when there is space") + .0; assert_eq!(ephemeral_ip.ip.ip(), range.last_address()); assert_eq!(ephemeral_ip.first_port.0, 0); assert_eq!(ephemeral_ip.last_port.0, super::MAX_PORT); // At this point, we should be able to allocate neither a new Ephemeral // nor any SNAT IPs. - let instance_id = Uuid::new_v4(); + let instance_id = context.create_instance("for-snat").await; let res = context .db_datastore .allocate_instance_snat_ip( @@ -1069,6 +1156,7 @@ mod tests { Uuid::new_v4(), instance_id, /* pool_name = */ None, + true, ) .await; assert!( @@ -1203,7 +1291,7 @@ mod tests { .unwrap(); context.create_ip_pool("default", range, true).await; - let instance_id = Uuid::new_v4(); + let instance_id = context.create_instance("all-the-ports").await; let id = Uuid::new_v4(); let pool_name = None; @@ -1214,9 +1302,11 @@ mod tests { id, instance_id, pool_name, + true, ) .await - .expect("Failed to allocate instance ephemeral IP address"); + .expect("Failed to allocate instance ephemeral IP address") + .0; assert_eq!(ip.kind, IpKind::Ephemeral); assert_eq!(ip.ip.ip(), range.first_address()); assert_eq!(ip.first_port.0, 0); @@ -1729,13 +1819,12 @@ mod tests { Ipv4Addr::new(10, 0, 0, 6), )) .unwrap(); - context.create_ip_pool("p1", second_range, false).await; + let p1 = context.create_ip_pool("p1", second_range, false).await; // Allocating an address on an instance in the second pool should be // respected, even though there are IPs available in the first. - let instance_id = Uuid::new_v4(); + let instance_id = context.create_instance("test").await; let id = Uuid::new_v4(); - let pool_name = Some(Name("p1".parse().unwrap())); let ip = context .db_datastore @@ -1743,10 +1832,12 @@ mod tests { &context.opctx, id, instance_id, - pool_name, + Some(p1), + true, ) .await - .expect("Failed to allocate instance ephemeral IP address"); + .expect("Failed to allocate instance ephemeral IP address") + .0; assert_eq!(ip.kind, IpKind::Ephemeral); assert_eq!(ip.ip.ip(), second_range.first_address()); assert_eq!(ip.first_port.0, 0); @@ -1772,24 +1863,26 @@ mod tests { let last_address = Ipv4Addr::new(10, 0, 0, 6); let second_range = IpRange::try_from((first_address, last_address)).unwrap(); - context.create_ip_pool("p1", second_range, false).await; + let p1 = context.create_ip_pool("p1", second_range, false).await; // Allocate all available addresses in the second pool. - let instance_id = Uuid::new_v4(); - let pool_name = Some(Name("p1".parse().unwrap())); let first_octet = first_address.octets()[3]; let last_octet = last_address.octets()[3]; for octet in first_octet..=last_octet { + let instance_id = + context.create_instance(&format!("o{octet}")).await; let ip = context .db_datastore .allocate_instance_ephemeral_ip( &context.opctx, Uuid::new_v4(), instance_id, - pool_name.clone(), + Some(p1.clone()), + true, ) .await - .expect("Failed to allocate instance ephemeral IP address"); + .expect("Failed to allocate instance ephemeral IP address") + .0; println!("{ip:#?}"); if let IpAddr::V4(addr) = ip.ip.ip() { assert_eq!(addr.octets()[3], octet); @@ -1799,13 +1892,15 @@ mod tests { } // Allocating another address should _fail_, and not use the first pool. + let instance_id = context.create_instance("final").await; context .db_datastore .allocate_instance_ephemeral_ip( &context.opctx, Uuid::new_v4(), instance_id, - pool_name, + Some(p1), + true, ) .await .expect_err("Should not use IP addresses from a different pool"); diff --git a/nexus/src/app/external_ip.rs b/nexus/src/app/external_ip.rs index 404f597288..45b05fbb0b 100644 --- a/nexus/src/app/external_ip.rs +++ b/nexus/src/app/external_ip.rs @@ -4,14 +4,18 @@ //! External IP addresses for instances +use std::sync::Arc; + use crate::external_api::views::ExternalIp; use crate::external_api::views::FloatingIp; +use nexus_db_model::IpAttachState; use nexus_db_queries::authz; use nexus_db_queries::context::OpContext; use nexus_db_queries::db::lookup; use nexus_db_queries::db::lookup::LookupPath; use nexus_db_queries::db::model::IpKind; use nexus_types::external_api::params; +use nexus_types::external_api::views; use omicron_common::api::external::http_pagination::PaginatedBy; use omicron_common::api::external::CreateResult; use omicron_common::api::external::DeleteResult; @@ -19,6 +23,7 @@ use omicron_common::api::external::Error; use omicron_common::api::external::ListResultVec; use omicron_common::api::external::LookupResult; use omicron_common::api::external::NameOrId; +use omicron_common::api::external::UpdateResult; impl super::Nexus { pub(crate) async fn instance_list_external_ips( @@ -34,7 +39,9 @@ impl super::Nexus { .await? .into_iter() .filter_map(|ip| { - if ip.kind == IpKind::SNat { + if ip.kind == IpKind::SNat + || ip.state != IpAttachState::Attached + { None } else { Some(ip.try_into().unwrap()) @@ -102,9 +109,19 @@ impl super::Nexus { let (.., authz_project) = project_lookup.lookup_for(authz::Action::CreateChild).await?; + let pool = match ¶ms.pool { + Some(pool) => Some( + self.ip_pool_lookup(opctx, pool)? + .lookup_for(authz::Action::Read) + .await? + .0, + ), + None => None, + }; + Ok(self .db_datastore - .allocate_floating_ip(opctx, authz_project.id(), params) + .allocate_floating_ip(opctx, authz_project.id(), params, pool) .await? .try_into() .unwrap()) @@ -115,9 +132,68 @@ impl super::Nexus { opctx: &OpContext, ip_lookup: lookup::FloatingIp<'_>, ) -> DeleteResult { + let (.., authz_fip) = + ip_lookup.lookup_for(authz::Action::Delete).await?; + + self.db_datastore.floating_ip_delete(opctx, &authz_fip).await + } + + pub(crate) async fn floating_ip_attach( + self: &Arc, + opctx: &OpContext, + fip_selector: params::FloatingIpSelector, + target: params::FloatingIpAttach, + ) -> UpdateResult { + match target.kind { + params::FloatingIpParentKind::Instance => { + let instance_selector = params::InstanceSelector { + project: fip_selector.project, + instance: target.parent, + }; + let instance = + self.instance_lookup(opctx, instance_selector)?; + let attach_params = ¶ms::ExternalIpCreate::Floating { + floating_ip: fip_selector.floating_ip, + }; + self.instance_attach_external_ip( + opctx, + &instance, + attach_params, + ) + .await + .and_then(FloatingIp::try_from) + } + } + } + + pub(crate) async fn floating_ip_detach( + self: &Arc, + opctx: &OpContext, + ip_lookup: lookup::FloatingIp<'_>, + ) -> UpdateResult { + // XXX: Today, this only happens for instances. + // In future, we will need to separate out by the *type* of + // parent attached to a floating IP. We don't yet store this + // in db for user-facing FIPs (is_service => internal-only + // at this point). let (.., authz_fip, db_fip) = - ip_lookup.fetch_for(authz::Action::Delete).await?; + ip_lookup.fetch_for(authz::Action::Modify).await?; + + let Some(parent_id) = db_fip.parent_id else { + return Ok(db_fip.into()); + }; + + let instance_selector = params::InstanceSelector { + project: None, + instance: parent_id.into(), + }; + let instance = self.instance_lookup(opctx, instance_selector)?; + let attach_params = ¶ms::ExternalIpDetach::Floating { + floating_ip: authz_fip.id().into(), + }; - self.db_datastore.floating_ip_delete(opctx, &authz_fip, &db_fip).await + self.instance_detach_external_ip(opctx, &instance, attach_params) + .await + .and_then(FloatingIp::try_from) } } diff --git a/nexus/src/app/instance.rs b/nexus/src/app/instance.rs index 778c5e2fe1..f924653525 100644 --- a/nexus/src/app/instance.rs +++ b/nexus/src/app/instance.rs @@ -17,6 +17,7 @@ use crate::external_api::params; use cancel_safe_futures::prelude::*; use futures::future::Fuse; use futures::{FutureExt, SinkExt, StreamExt}; +use nexus_db_model::IpAttachState; use nexus_db_model::IpKind; use nexus_db_queries::authn; use nexus_db_queries::authz; @@ -26,6 +27,7 @@ use nexus_db_queries::db::datastore::InstanceAndActiveVmm; use nexus_db_queries::db::identity::Resource; use nexus_db_queries::db::lookup; use nexus_db_queries::db::lookup::LookupPath; +use nexus_types::external_api::views; use omicron_common::address::PROPOLIS_PORT; use omicron_common::api::external::http_pagination::PaginatedBy; use omicron_common::api::external::ByteCount; @@ -1052,6 +1054,15 @@ impl super::Nexus { )); } + // If there are any external IPs not yet fully attached/detached,then + // there are attach/detach sagas in progress. That should complete in + // its own time, so return a 503 to indicate a possible retry. + if external_ips.iter().any(|v| v.state != IpAttachState::Attached) { + return Err(Error::unavail( + "External IP attach/detach is in progress during instance_ensure_registered" + )); + } + // Partition remaining external IPs by class: we can have at most // one ephemeral ip. let (ephemeral_ips, floating_ips): (Vec<_>, Vec<_>) = external_ips @@ -1904,6 +1915,73 @@ impl super::Nexus { Ok(()) } + + /// Attach an external IP to an instance. + pub(crate) async fn instance_attach_external_ip( + self: &Arc, + opctx: &OpContext, + instance_lookup: &lookup::Instance<'_>, + ext_ip: ¶ms::ExternalIpCreate, + ) -> UpdateResult { + let (.., authz_project, authz_instance) = + instance_lookup.lookup_for(authz::Action::Modify).await?; + + let saga_params = sagas::instance_ip_attach::Params { + create_params: ext_ip.clone(), + authz_instance, + project_id: authz_project.id(), + serialized_authn: authn::saga::Serialized::for_opctx(opctx), + }; + + let saga_outputs = self + .execute_saga::( + saga_params, + ) + .await?; + + saga_outputs + .lookup_node_output::("output") + .map_err(|e| Error::internal_error(&format!("{:#}", &e))) + .internal_context("looking up output from ip attach saga") + } + + /// Detach an external IP from an instance. + pub(crate) async fn instance_detach_external_ip( + self: &Arc, + opctx: &OpContext, + instance_lookup: &lookup::Instance<'_>, + ext_ip: ¶ms::ExternalIpDetach, + ) -> UpdateResult { + let (.., authz_project, authz_instance) = + instance_lookup.lookup_for(authz::Action::Modify).await?; + + let saga_params = sagas::instance_ip_detach::Params { + delete_params: ext_ip.clone(), + authz_instance, + project_id: authz_project.id(), + serialized_authn: authn::saga::Serialized::for_opctx(opctx), + }; + + let saga_outputs = self + .execute_saga::( + saga_params, + ) + .await?; + + saga_outputs + .lookup_node_output::>("output") + .map_err(|e| Error::internal_error(&format!("{:#}", &e))) + .internal_context("looking up output from ip detach saga") + .and_then(|eip| { + // Saga idempotency means we'll get Ok(None) on double detach + // of an ephemeral IP. Convert this case to an error here. + eip.ok_or_else(|| { + Error::invalid_request( + "instance does not have an ephemeral IP attached", + ) + }) + }) + } } #[cfg(test)] diff --git a/nexus/src/app/instance_network.rs b/nexus/src/app/instance_network.rs index 8f97642c88..c0bc5d237b 100644 --- a/nexus/src/app/instance_network.rs +++ b/nexus/src/app/instance_network.rs @@ -7,6 +7,9 @@ use crate::app::sagas::retry_until_known_result; use ipnetwork::IpNetwork; use ipnetwork::Ipv6Network; +use nexus_db_model::ExternalIp; +use nexus_db_model::IpAttachState; +use nexus_db_model::Ipv4NatEntry; use nexus_db_model::Ipv4NatValues; use nexus_db_model::Vni as DbVni; use nexus_db_queries::authz; @@ -24,7 +27,6 @@ use sled_agent_client::types::DeleteVirtualNetworkInterfaceHost; use sled_agent_client::types::SetVirtualNetworkInterfaceHost; use std::collections::HashSet; use std::str::FromStr; -use std::sync::Arc; use uuid::Uuid; impl super::Nexus { @@ -276,6 +278,10 @@ impl super::Nexus { /// Ensures that the Dendrite configuration for the supplied instance is /// up-to-date. /// + /// Returns a list of live NAT RPW table entries from this call. Generally + /// these should only be needed for specific unwind operations, like in + /// the IP attach saga. + /// /// # Parameters /// /// - `opctx`: An operation context that grants read and list-children @@ -283,22 +289,21 @@ impl super::Nexus { /// - `instance_id`: The ID of the instance to act on. /// - `sled_ip_address`: The internal IP address assigned to the sled's /// sled agent. - /// - `ip_index_filter`: An optional filter on the index into the instance's + /// - `ip_filter`: An optional filter on the index into the instance's /// external IP array. - /// - If this is `Some(n)`, this routine configures DPD state for only the - /// Nth external IP in the collection returned from CRDB. The caller is - /// responsible for ensuring that the IP collection has stable indices - /// when making this call. + /// - If this is `Some(id)`, this routine configures DPD state for only the + /// external IP with `id` in the collection returned from CRDB. This will + /// proceed even when the target IP is 'attaching'. /// - If this is `None`, this routine configures DPD for all external - /// IPs. + /// IPs and *will back out* if any IPs are not yet fully attached to + /// the instance. pub(crate) async fn instance_ensure_dpd_config( &self, opctx: &OpContext, instance_id: Uuid, sled_ip_address: &std::net::SocketAddrV6, - ip_index_filter: Option, - dpd_client: &Arc, - ) -> Result<(), Error> { + ip_filter: Option, + ) -> Result, Error> { let log = &self.log; info!(log, "looking up instance's primary network interface"; @@ -309,6 +314,9 @@ impl super::Nexus { .lookup_for(authz::Action::ListChildren) .await?; + // XXX: Need to abstract over v6 and v4 entries here. + let mut nat_entries = vec![]; + // All external IPs map to the primary network interface, so find that // interface. If there is no such interface, there's no way to route // traffic destined to those IPs, so there's nothing to configure and @@ -324,7 +332,7 @@ impl super::Nexus { None => { info!(log, "Instance has no primary network interface"; "instance_id" => %instance_id); - return Ok(()); + return Ok(nat_entries); } }; @@ -344,49 +352,104 @@ impl super::Nexus { .instance_lookup_external_ips(&opctx, instance_id) .await?; - if let Some(wanted_index) = ip_index_filter { - if let None = ips.get(wanted_index) { + let (ips_of_interest, must_all_be_attached) = if let Some(wanted_id) = + ip_filter + { + if let Some(ip) = ips.iter().find(|v| v.id == wanted_id) { + (std::slice::from_ref(ip), false) + } else { return Err(Error::internal_error(&format!( - "failed to find external ip address at index: {}", - wanted_index + "failed to find external ip address with id: {wanted_id}, saw {ips:?}", ))); } + } else { + (&ips[..], true) + }; + + // This is performed so that an IP attach/detach will block the + // instance_start saga. Return service unavailable to indicate + // the request is retryable. + if must_all_be_attached + && ips_of_interest + .iter() + .any(|ip| ip.state != IpAttachState::Attached) + { + return Err(Error::unavail( + "cannot push all DPD state: IP attach/detach in progress", + )); } let sled_address = Ipv6Net(Ipv6Network::new(*sled_ip_address.ip(), 128).unwrap()); - for target_ip in ips - .iter() - .enumerate() - .filter(|(index, _)| { - if let Some(wanted_index) = ip_index_filter { - *index == wanted_index - } else { - true - } - }) - .map(|(_, ip)| ip) - { + // If all of our IPs are attached or are guaranteed to be owned + // by the saga calling this fn, then we need to disregard and + // remove conflicting rows. No other instance/service should be + // using these as its own, and we are dealing with detritus, e.g., + // the case where we have a concurrent stop -> detach followed + // by an attach to another instance, or other ongoing attach saga + // cleanup. + let mut err_and_limit = None; + for (i, external_ip) in ips_of_interest.iter().enumerate() { // For each external ip, add a nat entry to the database - self.ensure_nat_entry( - target_ip, - sled_address, - &network_interface, - mac_address, - opctx, - ) - .await?; + if let Ok(id) = self + .ensure_nat_entry( + external_ip, + sled_address, + &network_interface, + mac_address, + opctx, + ) + .await + { + nat_entries.push(id); + continue; + } + + // We seem to be blocked by a bad row -- take it out and retry. + // This will return Ok() for a non-existent row. + if let Err(e) = self + .external_ip_delete_dpd_config_inner(opctx, external_ip) + .await + { + err_and_limit = Some((e, i)); + break; + }; + + match self + .ensure_nat_entry( + external_ip, + sled_address, + &network_interface, + mac_address, + opctx, + ) + .await + { + Ok(id) => nat_entries.push(id), + Err(e) => { + err_and_limit = Some((e, i)); + break; + } + } } - // Notify dendrite that there are changes for it to reconcile. - // In the event of a failure to notify dendrite, we'll log an error - // and rely on dendrite's RPW timer to catch it up. - if let Err(e) = dpd_client.ipv4_nat_trigger_update().await { - error!(self.log, "failed to notify dendrite of nat updates"; "error" => ?e); - }; + // In the event of an unresolvable failure, we need to remove + // the entries we just added because the undo won't call into + // `instance_delete_dpd_config`. These entries won't stop a + // future caller, but it's better not to pollute switch state. + if let Some((e, max)) = err_and_limit { + for external_ip in &ips_of_interest[..max] { + let _ = self + .external_ip_delete_dpd_config_inner(opctx, external_ip) + .await; + } + return Err(e); + } - Ok(()) + self.notify_dendrite_nat_state(Some(instance_id), true).await?; + + Ok(nat_entries) } async fn ensure_nat_entry( @@ -396,7 +459,7 @@ impl super::Nexus { network_interface: &sled_agent_client::types::NetworkInterface, mac_address: macaddr::MacAddr6, opctx: &OpContext, - ) -> Result<(), Error> { + ) -> Result { match target_ip.ip { IpNetwork::V4(v4net) => { let nat_entry = Ipv4NatValues { @@ -409,9 +472,10 @@ impl super::Nexus { omicron_common::api::external::MacAddr(mac_address), ), }; - self.db_datastore + Ok(self + .db_datastore .ensure_ipv4_nat_entry(opctx, nat_entry) - .await?; + .await?) } IpNetwork::V6(_v6net) => { // TODO: implement handling of v6 nat. @@ -419,13 +483,16 @@ impl super::Nexus { internal_message: "ipv6 nat is not yet implemented".into(), }); } - }; - Ok(()) + } } /// Attempts to delete all of the Dendrite NAT configuration for the /// instance identified by `authz_instance`. /// + /// Unlike `instance_ensure_dpd_config`, this function will disregard the + /// attachment states of any external IPs because likely callers (instance + /// delete) cannot be piecewise undone. + /// /// # Return value /// /// - `Ok(())` if all NAT entries were successfully deleted. @@ -435,6 +502,12 @@ impl super::Nexus { /// - If an operation fails while this routine is walking NAT entries, it /// will continue trying to delete subsequent entries but will return the /// first error it encountered. + /// - `ip_filter`: An optional filter on the index into the instance's + /// external IP array. + /// - If this is `Some(id)`, this routine configures DPD state for only the + /// external IP with `id` in the collection returned from CRDB. + /// - If this is `None`, this routine configures DPD for all external + /// IPs. pub(crate) async fn instance_delete_dpd_config( &self, opctx: &OpContext, @@ -451,37 +524,122 @@ impl super::Nexus { .instance_lookup_external_ips(opctx, instance_id) .await?; - let mut errors = vec![]; for entry in external_ips { - // Soft delete the NAT entry - match self - .db_datastore - .ipv4_nat_delete_by_external_ip(&opctx, &entry) - .await - { - Ok(_) => Ok(()), - Err(err) => match err { - Error::ObjectNotFound { .. } => { - warn!(log, "no matching nat entries to soft delete"); - Ok(()) - } - _ => { - let message = format!( - "failed to delete nat entry due to error: {err:?}" - ); - error!(log, "{}", message); - Err(Error::internal_error(&message)) - } - }, - }?; + self.external_ip_delete_dpd_config_inner(opctx, &entry).await?; } + self.notify_dendrite_nat_state(Some(instance_id), false).await + } + + /// Attempts to delete Dendrite NAT configuration for a single external IP. + /// + /// This function is primarily used to detach an IP which currently belongs + /// to a known instance. + pub(crate) async fn external_ip_delete_dpd_config( + &self, + opctx: &OpContext, + external_ip: &ExternalIp, + ) -> Result<(), Error> { + let log = &self.log; + let instance_id = external_ip.parent_id; + + info!(log, "deleting individual NAT entry from dpd configuration"; + "instance_id" => ?instance_id, + "external_ip" => %external_ip.ip); + + self.external_ip_delete_dpd_config_inner(opctx, external_ip).await?; + + self.notify_dendrite_nat_state(instance_id, false).await + } + + /// Attempts to soft-delete Dendrite NAT configuration for a specific entry + /// via ID. + /// + /// This function is needed to safely cleanup in at least one unwind scenario + /// where a potential second user could need to use the same (IP, portset) pair, + /// e.g. a rapid reattach or a reallocated ephemeral IP. + pub(crate) async fn delete_dpd_config_by_entry( + &self, + opctx: &OpContext, + nat_entry: &Ipv4NatEntry, + ) -> Result<(), Error> { + let log = &self.log; + + info!(log, "deleting individual NAT entry from dpd configuration"; + "id" => ?nat_entry.id, + "version_added" => %nat_entry.external_address.0); + + match self.db_datastore.ipv4_nat_delete(&opctx, nat_entry).await { + Ok(_) => {} + Err(err) => match err { + Error::ObjectNotFound { .. } => { + warn!(log, "no matching nat entries to soft delete"); + } + _ => { + let message = format!( + "failed to delete nat entry due to error: {err:?}" + ); + error!(log, "{}", message); + return Err(Error::internal_error(&message)); + } + }, + } + + self.notify_dendrite_nat_state(None, false).await + } + + /// Soft-delete an individual external IP from the NAT RPW, without + /// triggering a Dendrite notification. + async fn external_ip_delete_dpd_config_inner( + &self, + opctx: &OpContext, + external_ip: &ExternalIp, + ) -> Result<(), Error> { + let log = &self.log; + + // Soft delete the NAT entry + match self + .db_datastore + .ipv4_nat_delete_by_external_ip(&opctx, external_ip) + .await + { + Ok(_) => Ok(()), + Err(err) => match err { + Error::ObjectNotFound { .. } => { + warn!(log, "no matching nat entries to soft delete"); + Ok(()) + } + _ => { + let message = format!( + "failed to delete nat entry due to error: {err:?}" + ); + error!(log, "{}", message); + Err(Error::internal_error(&message)) + } + }, + } + } + + /// Informs all available boundary switches that the set of NAT entries + /// has changed. + /// + /// When `fail_fast` is set, this function will return on any error when + /// acquiring a handle to a DPD client. Otherwise, it will attempt to notify + /// all clients and then finally return the first error. + async fn notify_dendrite_nat_state( + &self, + instance_id: Option, + fail_fast: bool, + ) -> Result<(), Error> { + // Querying boundary switches also requires fleet access and the use of the + // instance allocator context. let boundary_switches = self.boundary_switches(&self.opctx_alloc).await?; + let mut errors = vec![]; for switch in &boundary_switches { debug!(&self.log, "notifying dendrite of updates"; - "instance_id" => %authz_instance.id(), + "instance_id" => ?instance_id, "switch" => switch.to_string()); let client_result = self.dpd_clients.get(switch).ok_or_else(|| { @@ -494,7 +652,11 @@ impl super::Nexus { Ok(client) => client, Err(new_error) => { errors.push(new_error); - continue; + if fail_fast { + break; + } else { + continue; + } } }; @@ -506,7 +668,7 @@ impl super::Nexus { }; } - if let Some(e) = errors.into_iter().nth(0) { + if let Some(e) = errors.into_iter().next() { return Err(e); } @@ -525,58 +687,9 @@ impl super::Nexus { ) -> Result<(), Error> { self.delete_instance_v2p_mappings(opctx, authz_instance.id()).await?; - let external_ips = self - .datastore() - .instance_lookup_external_ips(opctx, authz_instance.id()) - .await?; - - let boundary_switches = self.boundary_switches(opctx).await?; - for external_ip in external_ips { - match self - .db_datastore - .ipv4_nat_delete_by_external_ip(&opctx, &external_ip) - .await - { - Ok(_) => Ok(()), - Err(err) => match err { - Error::ObjectNotFound { .. } => { - warn!( - self.log, - "no matching nat entries to soft delete" - ); - Ok(()) - } - _ => { - let message = format!( - "failed to delete nat entry due to error: {err:?}" - ); - error!(self.log, "{}", message); - Err(Error::internal_error(&message)) - } - }, - }?; - } - - for switch in &boundary_switches { - debug!(&self.log, "notifying dendrite of updates"; - "instance_id" => %authz_instance.id(), - "switch" => switch.to_string()); - - let dpd_client = self.dpd_clients.get(switch).ok_or_else(|| { - Error::internal_error(&format!( - "unable to find dendrite client for {switch}" - )) - })?; + self.instance_delete_dpd_config(opctx, authz_instance).await?; - // Notify dendrite that there are changes for it to reconcile. - // In the event of a failure to notify dendrite, we'll log an error - // and rely on dendrite's RPW timer to catch it up. - if let Err(e) = dpd_client.ipv4_nat_trigger_update().await { - error!(self.log, "failed to notify dendrite of nat updates"; "error" => ?e); - }; - } - - Ok(()) + self.notify_dendrite_nat_state(Some(authz_instance.id()), true).await } /// Given old and new instance runtime states, determines the desired @@ -715,24 +828,13 @@ impl super::Nexus { .fetch() .await?; - let boundary_switches = - self.boundary_switches(&self.opctx_alloc).await?; - - for switch in &boundary_switches { - let dpd_client = self.dpd_clients.get(switch).ok_or_else(|| { - Error::internal_error(&format!( - "could not find dpd client for {switch}" - )) - })?; - self.instance_ensure_dpd_config( - opctx, - instance_id, - &sled.address(), - None, - dpd_client, - ) - .await?; - } + self.instance_ensure_dpd_config( + opctx, + instance_id, + &sled.address(), + None, + ) + .await?; Ok(()) } diff --git a/nexus/src/app/mod.rs b/nexus/src/app/mod.rs index 80bfd5ef22..d643969924 100644 --- a/nexus/src/app/mod.rs +++ b/nexus/src/app/mod.rs @@ -87,7 +87,9 @@ pub(crate) const MAX_NICS_PER_INSTANCE: usize = 8; // The value here is arbitrary, but we need *a* limit for the instance // create saga to have a bounded DAG. We might want to only enforce // this during instance create (rather than live attach) in future. -pub(crate) const MAX_EXTERNAL_IPS_PER_INSTANCE: usize = 32; +pub(crate) const MAX_EXTERNAL_IPS_PER_INSTANCE: usize = + nexus_db_queries::db::queries::external_ip::MAX_EXTERNAL_IPS_PER_INSTANCE + as usize; pub(crate) const MAX_EPHEMERAL_IPS_PER_INSTANCE: usize = 1; pub const MAX_VCPU_PER_INSTANCE: u16 = 64; diff --git a/nexus/src/app/sagas/instance_common.rs b/nexus/src/app/sagas/instance_common.rs index 8f9197b03b..445abd5daf 100644 --- a/nexus/src/app/sagas/instance_common.rs +++ b/nexus/src/app/sagas/instance_common.rs @@ -8,12 +8,22 @@ use std::net::{IpAddr, Ipv6Addr}; use crate::Nexus; use chrono::Utc; -use nexus_db_model::{ByteCount, SledReservationConstraints, SledResource}; -use nexus_db_queries::{context::OpContext, db, db::DataStore}; +use nexus_db_model::{ + ByteCount, ExternalIp, IpAttachState, Ipv4NatEntry, + SledReservationConstraints, SledResource, +}; +use nexus_db_queries::authz; +use nexus_db_queries::db::lookup::LookupPath; +use nexus_db_queries::db::queries::external_ip::SAFE_TRANSIENT_INSTANCE_STATES; +use nexus_db_queries::{authn, context::OpContext, db, db::DataStore}; +use omicron_common::api::external::Error; use omicron_common::api::external::InstanceState; +use serde::{Deserialize, Serialize}; use steno::ActionError; use uuid::Uuid; +use super::NexusActionContext; + /// Reserves resources for a new VMM whose instance has `ncpus` guest logical /// processors and `guest_memory` bytes of guest RAM. The selected sled is /// random within the set of sleds allowed by the supplied `constraints`. @@ -133,3 +143,325 @@ pub(super) async fn allocate_vmm_ipv6( .await .map_err(ActionError::action_failed) } + +/// External IP state needed for IP attach/detachment. +/// +/// This holds a record of the mid-processing external IP, where possible. +/// there are cases where this might not be known (e.g., double detach of an +/// ephemeral IP). +/// In particular we need to explicitly no-op if not `do_saga`, to prevent +/// failures borne from instance state changes from knocking out a valid IP binding. +#[derive(Debug, Deserialize, Serialize)] +pub struct ModifyStateForExternalIp { + pub external_ip: Option, + pub do_saga: bool, +} + +/// Move an external IP from one state to another as a saga operation, +/// returning `Ok(true)` if the record was successfully moved and `Ok(false)` +/// if the record was lost. +/// +/// Returns `Err` if given an illegal state transition or several rows +/// were updated, which are programmer errors. +pub async fn instance_ip_move_state( + sagactx: &NexusActionContext, + serialized_authn: &authn::saga::Serialized, + from: IpAttachState, + to: IpAttachState, + new_ip: &ModifyStateForExternalIp, +) -> Result { + let osagactx = sagactx.user_data(); + let datastore = osagactx.datastore(); + let opctx = + crate::context::op_context_for_saga_action(&sagactx, serialized_authn); + + if !new_ip.do_saga { + return Ok(true); + } + let Some(new_ip) = new_ip.external_ip.as_ref() else { + return Err(ActionError::action_failed(Error::internal_error( + "tried to `do_saga` without valid external IP", + ))); + }; + + match datastore + .external_ip_complete_op(&opctx, new_ip.id, new_ip.kind, from, to) + .await + .map_err(ActionError::action_failed)? + { + 0 => Ok(false), + 1 => Ok(true), + _ => Err(ActionError::action_failed(Error::internal_error( + "ip state change affected > 1 row", + ))), + } +} + +pub async fn instance_ip_get_instance_state( + sagactx: &NexusActionContext, + serialized_authn: &authn::saga::Serialized, + authz_instance: &authz::Instance, + verb: &str, +) -> Result, ActionError> { + // XXX: we can get instance state (but not sled ID) in same transaction + // as attach (but not detach) wth current design. We need to re-query + // for sled ID anyhow, so keep consistent between attach/detach. + let osagactx = sagactx.user_data(); + let datastore = osagactx.datastore(); + let opctx = + crate::context::op_context_for_saga_action(&sagactx, serialized_authn); + + let inst_and_vmm = datastore + .instance_fetch_with_vmm(&opctx, authz_instance) + .await + .map_err(ActionError::action_failed)?; + + let found_state = inst_and_vmm.instance().runtime_state.nexus_state.0; + let mut sled_id = inst_and_vmm.sled_id(); + + // Arriving here means we started in a correct state (running/stopped). + // We need to consider how we interact with the other sagas/ops: + // - starting: our claim on an IP will block it from moving past + // DPD_ensure and instance_start will undo. If we complete + // before then, it can move past and will fill in routes/opte. + // Act as though we have no sled_id. + // - stopping: this is not sagaized, and the propolis/sled-agent might + // go away. Act as though stopped if we catch it here, + // otherwise convert OPTE ensure to 'service unavailable' + // and undo. + // - deleting: can only be called from stopped -- we won't push to dpd + // or sled-agent, and IP record might be deleted or forcibly + // detached. Catch here just in case. + match found_state { + InstanceState::Stopped + | InstanceState::Starting + | InstanceState::Stopping => { + sled_id = None; + } + InstanceState::Running => {} + state if SAFE_TRANSIENT_INSTANCE_STATES.contains(&state.into()) => { + return Err(ActionError::action_failed(Error::unavail(&format!( + "can't {verb} in transient state {state}" + )))) + } + InstanceState::Destroyed => { + return Err(ActionError::action_failed(Error::not_found_by_id( + omicron_common::api::external::ResourceType::Instance, + &authz_instance.id(), + ))) + } + // Final cases are repairing/failed. + _ => { + return Err(ActionError::action_failed(Error::invalid_request( + "cannot modify instance IPs, instance is in unhealthy state", + ))) + } + } + + Ok(sled_id) +} + +/// Adds a NAT entry to DPD, routing packets bound for `target_ip` to a +/// target sled. +/// +/// This call is a no-op if `sled_uuid` is `None` or the saga is explicitly +/// set to be inactive in event of double attach/detach (`!target_ip.do_saga`). +pub async fn instance_ip_add_nat( + sagactx: &NexusActionContext, + serialized_authn: &authn::saga::Serialized, + authz_instance: &authz::Instance, + sled_uuid: Option, + target_ip: ModifyStateForExternalIp, +) -> Result, ActionError> { + let osagactx = sagactx.user_data(); + let datastore = osagactx.datastore(); + let opctx = + crate::context::op_context_for_saga_action(&sagactx, serialized_authn); + + // No physical sled? Don't push NAT. + let Some(sled_uuid) = sled_uuid else { + return Ok(None); + }; + + if !target_ip.do_saga { + return Ok(None); + } + let Some(target_ip) = target_ip.external_ip else { + return Err(ActionError::action_failed(Error::internal_error( + "tried to `do_saga` without valid external IP", + ))); + }; + + // Querying sleds requires fleet access; use the instance allocator context + // for this. + let (.., sled) = LookupPath::new(&osagactx.nexus().opctx_alloc, &datastore) + .sled_id(sled_uuid) + .fetch() + .await + .map_err(ActionError::action_failed)?; + + osagactx + .nexus() + .instance_ensure_dpd_config( + &opctx, + authz_instance.id(), + &sled.address(), + Some(target_ip.id), + ) + .await + .and_then(|v| { + v.into_iter().next().map(Some).ok_or_else(|| { + Error::internal_error( + "NAT RPW failed to return concrete NAT entry", + ) + }) + }) + .map_err(ActionError::action_failed) +} + +/// Remove a single NAT entry from DPD, dropping packets bound for `target_ip`. +/// +/// This call is a no-op if `sled_uuid` is `None` or the saga is explicitly +/// set to be inactive in event of double attach/detach (`!target_ip.do_saga`). +pub async fn instance_ip_remove_nat( + sagactx: &NexusActionContext, + serialized_authn: &authn::saga::Serialized, + sled_uuid: Option, + target_ip: ModifyStateForExternalIp, +) -> Result<(), ActionError> { + let osagactx = sagactx.user_data(); + let opctx = + crate::context::op_context_for_saga_action(&sagactx, serialized_authn); + + // No physical sled? Don't push NAT. + if sled_uuid.is_none() { + return Ok(()); + }; + + if !target_ip.do_saga { + return Ok(()); + } + let Some(target_ip) = target_ip.external_ip else { + return Err(ActionError::action_failed(Error::internal_error( + "tried to `do_saga` without valid external IP", + ))); + }; + + osagactx + .nexus() + .external_ip_delete_dpd_config(&opctx, &target_ip) + .await + .map_err(ActionError::action_failed)?; + + Ok(()) +} + +/// Inform the OPTE port for a running instance that it should start +/// sending/receiving traffic on a given IP address. +/// +/// This call is a no-op if `sled_uuid` is `None` or the saga is explicitly +/// set to be inactive in event of double attach/detach (`!target_ip.do_saga`). +pub async fn instance_ip_add_opte( + sagactx: &NexusActionContext, + authz_instance: &authz::Instance, + sled_uuid: Option, + target_ip: ModifyStateForExternalIp, +) -> Result<(), ActionError> { + let osagactx = sagactx.user_data(); + + // No physical sled? Don't inform OPTE. + let Some(sled_uuid) = sled_uuid else { + return Ok(()); + }; + + if !target_ip.do_saga { + return Ok(()); + } + let Some(target_ip) = target_ip.external_ip else { + return Err(ActionError::action_failed(Error::internal_error( + "tried to `do_saga` without valid external IP", + ))); + }; + + let sled_agent_body = + target_ip.try_into().map_err(ActionError::action_failed)?; + + osagactx + .nexus() + .sled_client(&sled_uuid) + .await + .map_err(|_| { + ActionError::action_failed(Error::unavail( + "sled agent client went away mid-attach/detach", + )) + })? + .instance_put_external_ip(&authz_instance.id(), &sled_agent_body) + .await + .map_err(|e| { + ActionError::action_failed(match e { + progenitor_client::Error::CommunicationError(_) => { + Error::unavail( + "sled agent client went away mid-attach/detach", + ) + } + e => Error::internal_error(&format!("{e}")), + }) + })?; + + Ok(()) +} + +/// Inform the OPTE port for a running instance that it should cease +/// sending/receiving traffic on a given IP address. +/// +/// This call is a no-op if `sled_uuid` is `None` or the saga is explicitly +/// set to be inactive in event of double attach/detach (`!target_ip.do_saga`). +pub async fn instance_ip_remove_opte( + sagactx: &NexusActionContext, + authz_instance: &authz::Instance, + sled_uuid: Option, + target_ip: ModifyStateForExternalIp, +) -> Result<(), ActionError> { + let osagactx = sagactx.user_data(); + + // No physical sled? Don't inform OPTE. + let Some(sled_uuid) = sled_uuid else { + return Ok(()); + }; + + if !target_ip.do_saga { + return Ok(()); + } + let Some(target_ip) = target_ip.external_ip else { + return Err(ActionError::action_failed(Error::internal_error( + "tried to `do_saga` without valid external IP", + ))); + }; + + let sled_agent_body = + target_ip.try_into().map_err(ActionError::action_failed)?; + + osagactx + .nexus() + .sled_client(&sled_uuid) + .await + .map_err(|_| { + ActionError::action_failed(Error::unavail( + "sled agent client went away mid-attach/detach", + )) + })? + .instance_delete_external_ip(&authz_instance.id(), &sled_agent_body) + .await + .map_err(|e| { + ActionError::action_failed(match e { + progenitor_client::Error::CommunicationError(_) => { + Error::unavail( + "sled agent client went away mid-attach/detach", + ) + } + e => Error::internal_error(&format!("{e}")), + }) + })?; + + Ok(()) +} diff --git a/nexus/src/app/sagas/instance_create.rs b/nexus/src/app/sagas/instance_create.rs index c4c9c4e083..3aa491d978 100644 --- a/nexus/src/app/sagas/instance_create.rs +++ b/nexus/src/app/sagas/instance_create.rs @@ -10,7 +10,7 @@ use crate::app::{ MAX_NICS_PER_INSTANCE, }; use crate::external_api::params; -use nexus_db_model::NetworkInterfaceKind; +use nexus_db_model::{ExternalIp, NetworkInterfaceKind}; use nexus_db_queries::db::identity::Resource; use nexus_db_queries::db::lookup::LookupPath; use nexus_db_queries::db::queries::network_interface::InsertError as InsertNicError; @@ -21,7 +21,9 @@ use omicron_common::api::external::Error; use omicron_common::api::external::IdentityMetadataCreateParams; use omicron_common::api::external::InstanceState; use omicron_common::api::external::Name; +use omicron_common::api::external::NameOrId; use omicron_common::api::internal::shared::SwitchLocation; +use ref_cast::RefCast; use serde::Deserialize; use serde::Serialize; use slog::warn; @@ -223,7 +225,7 @@ impl NexusSaga for SagaInstanceCreate { SagaName::new(&format!("instance-create-external-ip{i}")); let mut subsaga_builder = DagBuilder::new(subsaga_name); subsaga_builder.append(Node::action( - "output", + format!("external-ip-{i}").as_str(), format!("CreateExternalIp{i}").as_str(), CREATE_EXTERNAL_IP.as_ref(), )); @@ -597,7 +599,7 @@ async fn sic_allocate_instance_snat_ip_undo( /// index `ip_index`, and return its ID if one is created (or None). async fn sic_allocate_instance_external_ip( sagactx: NexusActionContext, -) -> Result<(), ActionError> { +) -> Result, ActionError> { // XXX: may wish to restructure partially: we have at most one ephemeral // and then at most $n$ floating. let osagactx = sagactx.user_data(); @@ -607,7 +609,7 @@ async fn sic_allocate_instance_external_ip( let ip_index = repeat_saga_params.which; let Some(ip_params) = saga_params.create_params.external_ips.get(ip_index) else { - return Ok(()); + return Ok(None); }; let opctx = crate::context::op_context_for_saga_action( &sagactx, @@ -615,39 +617,80 @@ async fn sic_allocate_instance_external_ip( ); let instance_id = repeat_saga_params.instance_id; - match ip_params { + // We perform the 'complete_op' in this saga stage because our IPs are + // created in the attaching state, and we need to move them to attached. + // We *can* do so because the `creating` state will block the IP attach/detach + // sagas from running, so we can safely undo in event of later error in this saga + // without worrying they have been detached by another API call. + // Runtime state should never be able to make 'complete_op' fallible. + let ip = match ip_params { // Allocate a new IP address from the target, possibly default, pool - params::ExternalIpCreate::Ephemeral { ref pool_name } => { - let pool_name = - pool_name.as_ref().map(|name| db::model::Name(name.clone())); + params::ExternalIpCreate::Ephemeral { pool } => { + let pool = if let Some(name_or_id) = pool { + Some( + osagactx + .nexus() + .ip_pool_lookup(&opctx, name_or_id) + .map_err(ActionError::action_failed)? + .lookup_for(authz::Action::CreateChild) + .await + .map_err(ActionError::action_failed)? + .0, + ) + } else { + None + }; + let ip_id = repeat_saga_params.new_id; datastore .allocate_instance_ephemeral_ip( &opctx, ip_id, instance_id, - pool_name, + pool, + true, ) .await - .map_err(ActionError::action_failed)?; + .map_err(ActionError::action_failed)? + .0 } // Set the parent of an existing floating IP to the new instance's ID. - params::ExternalIpCreate::Floating { ref floating_ip_name } => { - let floating_ip_name = db::model::Name(floating_ip_name.clone()); - let (.., authz_fip, db_fip) = LookupPath::new(&opctx, &datastore) - .project_id(saga_params.project_id) - .floating_ip_name(&floating_ip_name) - .fetch_for(authz::Action::Modify) - .await - .map_err(ActionError::action_failed)?; + params::ExternalIpCreate::Floating { floating_ip } => { + let (.., authz_fip) = match floating_ip { + NameOrId::Name(name) => LookupPath::new(&opctx, datastore) + .project_id(saga_params.project_id) + .floating_ip_name(db::model::Name::ref_cast(name)), + NameOrId::Id(id) => { + LookupPath::new(&opctx, datastore).floating_ip_id(*id) + } + } + .lookup_for(authz::Action::Modify) + .await + .map_err(ActionError::action_failed)?; datastore - .floating_ip_attach(&opctx, &authz_fip, &db_fip, instance_id) + .floating_ip_begin_attach(&opctx, &authz_fip, instance_id, true) .await - .map_err(ActionError::action_failed)?; + .map_err(ActionError::action_failed)? + .0 } - } - Ok(()) + }; + + // Ignore row count here, this is infallible with correct + // (state, state', kind) but may be zero on repeat call for + // idempotency. + _ = datastore + .external_ip_complete_op( + &opctx, + ip.id, + ip.kind, + nexus_db_model::IpAttachState::Attaching, + nexus_db_model::IpAttachState::Attached, + ) + .await + .map_err(ActionError::action_failed)?; + + Ok(Some(ip)) } async fn sic_allocate_instance_external_ip_undo( @@ -662,6 +705,16 @@ async fn sic_allocate_instance_external_ip_undo( &sagactx, &saga_params.serialized_authn, ); + + // We store and lookup `ExternalIp` so that we can detach + // and/or deallocate without double name resolution. + let new_ip = sagactx + .lookup::>(&format!("external-ip-{ip_index}"))?; + + let Some(ip) = new_ip else { + return Ok(()); + }; + let Some(ip_params) = saga_params.create_params.external_ips.get(ip_index) else { return Ok(()); @@ -669,18 +722,42 @@ async fn sic_allocate_instance_external_ip_undo( match ip_params { params::ExternalIpCreate::Ephemeral { .. } => { - let ip_id = repeat_saga_params.new_id; - datastore.deallocate_external_ip(&opctx, ip_id).await?; + datastore.deallocate_external_ip(&opctx, ip.id).await?; } - params::ExternalIpCreate::Floating { floating_ip_name } => { - let floating_ip_name = db::model::Name(floating_ip_name.clone()); - let (.., authz_fip, db_fip) = LookupPath::new(&opctx, &datastore) - .project_id(saga_params.project_id) - .floating_ip_name(&floating_ip_name) - .fetch_for(authz::Action::Modify) + params::ExternalIpCreate::Floating { .. } => { + let (.., authz_fip) = LookupPath::new(&opctx, &datastore) + .floating_ip_id(ip.id) + .lookup_for(authz::Action::Modify) + .await?; + + datastore + .floating_ip_begin_detach( + &opctx, + &authz_fip, + repeat_saga_params.instance_id, + true, + ) .await?; - datastore.floating_ip_detach(&opctx, &authz_fip, &db_fip).await?; + let n_rows = datastore + .external_ip_complete_op( + &opctx, + ip.id, + ip.kind, + nexus_db_model::IpAttachState::Detaching, + nexus_db_model::IpAttachState::Detached, + ) + .await + .map_err(ActionError::action_failed)?; + + if n_rows != 1 { + error!( + osagactx.log(), + "sic_allocate_instance_external_ip_undo: failed to \ + completely detach ip {}", + ip.id + ); + } } } Ok(()) @@ -953,7 +1030,7 @@ pub mod test { network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, external_ips: vec![params::ExternalIpCreate::Ephemeral { - pool_name: None, + pool: None, }], disks: vec![params::InstanceDiskAttachment::Attach( params::InstanceDiskAttach { diff --git a/nexus/src/app/sagas/instance_delete.rs b/nexus/src/app/sagas/instance_delete.rs index 013bececee..aaf5dcb033 100644 --- a/nexus/src/app/sagas/instance_delete.rs +++ b/nexus/src/app/sagas/instance_delete.rs @@ -240,7 +240,7 @@ mod test { network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, external_ips: vec![params::ExternalIpCreate::Ephemeral { - pool_name: None, + pool: None, }], disks: vec![params::InstanceDiskAttachment::Attach( params::InstanceDiskAttach { name: DISK_NAME.parse().unwrap() }, diff --git a/nexus/src/app/sagas/instance_ip_attach.rs b/nexus/src/app/sagas/instance_ip_attach.rs new file mode 100644 index 0000000000..be7f81368e --- /dev/null +++ b/nexus/src/app/sagas/instance_ip_attach.rs @@ -0,0 +1,583 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use super::instance_common::{ + instance_ip_add_nat, instance_ip_add_opte, instance_ip_get_instance_state, + instance_ip_move_state, instance_ip_remove_opte, ModifyStateForExternalIp, +}; +use super::{ActionRegistry, NexusActionContext, NexusSaga}; +use crate::app::sagas::declare_saga_actions; +use crate::app::{authn, authz, db}; +use crate::external_api::params; +use nexus_db_model::{IpAttachState, Ipv4NatEntry}; +use nexus_db_queries::db::lookup::LookupPath; +use nexus_types::external_api::views; +use omicron_common::api::external::{Error, NameOrId}; +use ref_cast::RefCast; +use serde::Deserialize; +use serde::Serialize; +use steno::ActionError; +use uuid::Uuid; + +// The IP attach/detach sagas do some resource locking -- because we +// allow them to be called in [Running, Stopped], they must contend +// with each other/themselves, instance start, instance delete, and +// the instance stop action (noting the latter is not a saga). +// +// The main means of access control here is an external IP's `state`. +// Entering either saga begins with an atomic swap from Attached/Detached +// to Attaching/Detaching. This prevents concurrent attach/detach on the +// same EIP, and prevents instance start and migrate from completing with an +// Error::unavail via instance_ensure_registered and/or DPD. +// +// Overlap with stop is handled by treating comms failures with +// sled-agent as temporary errors and unwinding. For the delete case, we +// allow the detach completion to have a missing record -- both instance delete +// and detach will leave NAT in the correct state. For attach, if we make it +// to completion and an IP is `detached`, we unwind as a precaution. +// See `instance_common::instance_ip_get_instance_state` for more info. +// +// One more consequence of sled state being able to change beneath us +// is that the central undo actions (DPD/OPTE state) *must* be best-effort. +// This is not bad per-se: instance stop does not itself remove NAT routing +// rules. The only reason these should fail is because an instance has stopped, +// or DPD has died. + +declare_saga_actions! { + instance_ip_attach; + ATTACH_EXTERNAL_IP -> "target_ip" { + + siia_begin_attach_ip + - siia_begin_attach_ip_undo + } + + INSTANCE_STATE -> "instance_state" { + + siia_get_instance_state + } + + REGISTER_NAT -> "nat_entry" { + + siia_nat + - siia_nat_undo + } + + ENSURE_OPTE_PORT -> "no_result1" { + + siia_update_opte + - siia_update_opte_undo + } + + COMPLETE_ATTACH -> "output" { + + siia_complete_attach + } +} + +#[derive(Debug, Deserialize, Serialize)] +pub struct Params { + pub create_params: params::ExternalIpCreate, + pub authz_instance: authz::Instance, + pub project_id: Uuid, + /// Authentication context to use to fetch the instance's current state from + /// the database. + pub serialized_authn: authn::saga::Serialized, +} + +async fn siia_begin_attach_ip( + sagactx: NexusActionContext, +) -> Result { + let osagactx = sagactx.user_data(); + let datastore = osagactx.datastore(); + let params = sagactx.saga_params::()?; + let opctx = crate::context::op_context_for_saga_action( + &sagactx, + ¶ms.serialized_authn, + ); + + match ¶ms.create_params { + // Allocate a new IP address from the target, possibly default, pool + params::ExternalIpCreate::Ephemeral { pool } => { + let pool = if let Some(name_or_id) = pool { + Some( + osagactx + .nexus() + .ip_pool_lookup(&opctx, name_or_id) + .map_err(ActionError::action_failed)? + .lookup_for(authz::Action::CreateChild) + .await + .map_err(ActionError::action_failed)? + .0, + ) + } else { + None + }; + + datastore + .allocate_instance_ephemeral_ip( + &opctx, + Uuid::new_v4(), + params.authz_instance.id(), + pool, + false, + ) + .await + .map_err(ActionError::action_failed) + .map(|(external_ip, do_saga)| ModifyStateForExternalIp { + external_ip: Some(external_ip), + do_saga, + }) + } + // Set the parent of an existing floating IP to the new instance's ID. + params::ExternalIpCreate::Floating { floating_ip } => { + let (.., authz_fip) = match floating_ip { + NameOrId::Name(name) => LookupPath::new(&opctx, datastore) + .project_id(params.project_id) + .floating_ip_name(db::model::Name::ref_cast(name)), + NameOrId::Id(id) => { + LookupPath::new(&opctx, datastore).floating_ip_id(*id) + } + } + .lookup_for(authz::Action::Modify) + .await + .map_err(ActionError::action_failed)?; + + datastore + .floating_ip_begin_attach( + &opctx, + &authz_fip, + params.authz_instance.id(), + false, + ) + .await + .map_err(ActionError::action_failed) + .map(|(external_ip, do_saga)| ModifyStateForExternalIp { + external_ip: Some(external_ip), + do_saga, + }) + } + } +} + +async fn siia_begin_attach_ip_undo( + sagactx: NexusActionContext, +) -> Result<(), anyhow::Error> { + let log = sagactx.user_data().log(); + warn!(log, "siia_begin_attach_ip_undo: Reverting detached->attaching"); + let params = sagactx.saga_params::()?; + let new_ip = sagactx.lookup::("target_ip")?; + if !instance_ip_move_state( + &sagactx, + ¶ms.serialized_authn, + IpAttachState::Attaching, + IpAttachState::Detached, + &new_ip, + ) + .await? + { + error!(log, "siia_begin_attach_ip_undo: external IP was deleted") + } + + Ok(()) +} + +async fn siia_get_instance_state( + sagactx: NexusActionContext, +) -> Result, ActionError> { + let params = sagactx.saga_params::()?; + instance_ip_get_instance_state( + &sagactx, + ¶ms.serialized_authn, + ¶ms.authz_instance, + "attach", + ) + .await +} + +// XXX: Need to abstract over v4 and v6 NAT entries when the time comes. +async fn siia_nat( + sagactx: NexusActionContext, +) -> Result, ActionError> { + let params = sagactx.saga_params::()?; + let sled_id = sagactx.lookup::>("instance_state")?; + let target_ip = sagactx.lookup::("target_ip")?; + instance_ip_add_nat( + &sagactx, + ¶ms.serialized_authn, + ¶ms.authz_instance, + sled_id, + target_ip, + ) + .await +} + +async fn siia_nat_undo( + sagactx: NexusActionContext, +) -> Result<(), anyhow::Error> { + let log = sagactx.user_data().log(); + let osagactx = sagactx.user_data(); + let params = sagactx.saga_params::()?; + let nat_entry = sagactx.lookup::>("nat_entry")?; + let opctx = crate::context::op_context_for_saga_action( + &sagactx, + ¶ms.serialized_authn, + ); + + let Some(nat_entry) = nat_entry else { + // Seeing `None` here means that we never pushed DPD state in + // the first instance. Nothing to undo. + return Ok(()); + }; + + // This requires some explanation in one case, where we can fail because an + // instance may have moved running -> stopped -> deleted. + // An instance delete will cause us to unwind and return to this stage *but* + // the ExternalIp will no longer have a useful parent (or even a + // different parent!). + // + // Internally, we delete the NAT entry *without* checking its instance state because + // it may either be `None`, or another instance may have attached. The + // first case is fine, but we need to consider NAT RPW semantics for the second: + // * The NAT entry table will ensure uniqueness on (external IP, low_port, + // high_port) for non-deleted rows. + // * Instance start and IP attach on a running instance will try to insert such + // a row, fail, and then delete this row before moving forwards. + // - Until either side deletes the row, we're polluting switch NAT. + // - We can't guarantee quick reuse to remove this rule via attach. + // - This will lead to a *new* NAT entry we need to protect, so we need to be careful + // that we only remove *our* incarnation. This is likelier to be hit + // if an ephemeral IP is deallocated, reallocated, and reused in a short timeframe. + // * Instance create will successfully set parent, since it won't attempt to ensure + // DPD has correct NAT state unless set to `start: true`. + // So it is safe/necessary to remove using the old entry here to target the + // exact row we created.. + + if let Err(e) = osagactx + .nexus() + .delete_dpd_config_by_entry(&opctx, &nat_entry) + .await + .map_err(ActionError::action_failed) + { + error!(log, "siia_nat_undo: failed to notify DPD: {e}"); + } + + Ok(()) +} + +async fn siia_update_opte( + sagactx: NexusActionContext, +) -> Result<(), ActionError> { + let params = sagactx.saga_params::()?; + let sled_id = sagactx.lookup::>("instance_state")?; + let target_ip = sagactx.lookup::("target_ip")?; + instance_ip_add_opte(&sagactx, ¶ms.authz_instance, sled_id, target_ip) + .await +} + +async fn siia_update_opte_undo( + sagactx: NexusActionContext, +) -> Result<(), anyhow::Error> { + let log = sagactx.user_data().log(); + let params = sagactx.saga_params::()?; + let sled_id = sagactx.lookup::>("instance_state")?; + let target_ip = sagactx.lookup::("target_ip")?; + if let Err(e) = instance_ip_remove_opte( + &sagactx, + ¶ms.authz_instance, + sled_id, + target_ip, + ) + .await + { + error!(log, "siia_update_opte_undo: failed to notify sled-agent: {e}"); + } + Ok(()) +} + +async fn siia_complete_attach( + sagactx: NexusActionContext, +) -> Result { + let log = sagactx.user_data().log(); + let params = sagactx.saga_params::()?; + let target_ip = sagactx.lookup::("target_ip")?; + + // There is a clause in `external_ip_complete_op` which specifically + // causes an unwind here if the instance delete saga fires and an IP is either + // detached or deleted. + if !instance_ip_move_state( + &sagactx, + ¶ms.serialized_authn, + IpAttachState::Attaching, + IpAttachState::Attached, + &target_ip, + ) + .await? + { + warn!(log, "siia_complete_attach: call was idempotent") + } + + target_ip + .external_ip + .ok_or_else(|| { + Error::internal_error( + "must always have a defined external IP during instance attach", + ) + }) + .and_then(TryInto::try_into) + .map_err(ActionError::action_failed) +} + +#[derive(Debug)] +pub struct SagaInstanceIpAttach; +impl NexusSaga for SagaInstanceIpAttach { + const NAME: &'static str = "external-ip-attach"; + type Params = Params; + + fn register_actions(registry: &mut ActionRegistry) { + instance_ip_attach_register_actions(registry); + } + + fn make_saga_dag( + _params: &Self::Params, + mut builder: steno::DagBuilder, + ) -> Result { + builder.append(attach_external_ip_action()); + builder.append(instance_state_action()); + builder.append(register_nat_action()); + builder.append(ensure_opte_port_action()); + builder.append(complete_attach_action()); + Ok(builder.build()?) + } +} + +#[cfg(test)] +pub(crate) mod test { + use super::*; + use crate::app::{saga::create_saga_dag, sagas::test_helpers}; + use async_bb8_diesel::AsyncRunQueryDsl; + use diesel::{ + ExpressionMethods, OptionalExtension, QueryDsl, SelectableHelper, + }; + use dropshot::test_util::ClientTestContext; + use nexus_db_model::{ExternalIp, IpKind}; + use nexus_db_queries::context::OpContext; + use nexus_test_utils::resource_helpers::{ + create_default_ip_pool, create_floating_ip, create_instance, + create_project, + }; + use nexus_test_utils_macros::nexus_test; + use omicron_common::api::external::{Name, SimpleIdentity}; + + type ControlPlaneTestContext = + nexus_test_utils::ControlPlaneTestContext; + + const PROJECT_NAME: &str = "cafe"; + const INSTANCE_NAME: &str = "menu"; + const FIP_NAME: &str = "affogato"; + + pub async fn ip_manip_test_setup(client: &ClientTestContext) -> Uuid { + create_default_ip_pool(&client).await; + let project = create_project(client, PROJECT_NAME).await; + create_floating_ip( + client, + FIP_NAME, + &project.identity.id.to_string(), + None, + None, + ) + .await; + + project.id() + } + + pub async fn new_test_params( + opctx: &OpContext, + datastore: &db::DataStore, + use_floating: bool, + ) -> Params { + let create_params = if use_floating { + params::ExternalIpCreate::Floating { + floating_ip: FIP_NAME.parse::().unwrap().into(), + } + } else { + params::ExternalIpCreate::Ephemeral { pool: None } + }; + + let (.., authz_project, authz_instance) = + LookupPath::new(opctx, datastore) + .project_name(&db::model::Name(PROJECT_NAME.parse().unwrap())) + .instance_name(&db::model::Name(INSTANCE_NAME.parse().unwrap())) + .lookup_for(authz::Action::Modify) + .await + .unwrap(); + + Params { + serialized_authn: authn::saga::Serialized::for_opctx(opctx), + project_id: authz_project.id(), + create_params, + authz_instance, + } + } + + #[nexus_test(server = crate::Server)] + async fn test_saga_basic_usage_succeeds( + cptestctx: &ControlPlaneTestContext, + ) { + let client = &cptestctx.external_client; + let apictx = &cptestctx.server.apictx(); + let nexus = &apictx.nexus; + let sled_agent = &cptestctx.sled_agent.sled_agent; + + let opctx = test_helpers::test_opctx(cptestctx); + let datastore = &nexus.db_datastore; + let _project_id = ip_manip_test_setup(&client).await; + let instance = + create_instance(client, PROJECT_NAME, INSTANCE_NAME).await; + + for use_float in [false, true] { + let params = new_test_params(&opctx, datastore, use_float).await; + + let dag = create_saga_dag::(params).unwrap(); + let saga = nexus.create_runnable_saga(dag).await.unwrap(); + nexus.run_saga(saga).await.expect("Attach saga should succeed"); + } + + let instance_id = instance.id(); + + // Sled agent has a record of the new external IPs. + let mut eips = sled_agent.external_ips.lock().await; + let my_eips = eips.entry(instance_id).or_default(); + assert!(my_eips.iter().any(|v| matches!( + v, + omicron_sled_agent::params::InstanceExternalIpBody::Floating(_) + ))); + assert!(my_eips.iter().any(|v| matches!( + v, + omicron_sled_agent::params::InstanceExternalIpBody::Ephemeral(_) + ))); + + // DB has records for SNAT plus the new IPs. + let db_eips = datastore + .instance_lookup_external_ips(&opctx, instance_id) + .await + .unwrap(); + assert_eq!(db_eips.len(), 3); + assert!(db_eips.iter().any(|v| v.kind == IpKind::Ephemeral)); + assert!(db_eips.iter().any(|v| v.kind == IpKind::Floating)); + assert!(db_eips.iter().any(|v| v.kind == IpKind::SNat)); + } + + pub(crate) async fn verify_clean_slate( + cptestctx: &ControlPlaneTestContext, + instance_id: Uuid, + ) { + use nexus_db_queries::db::schema::external_ip::dsl; + + let sled_agent = &cptestctx.sled_agent.sled_agent; + let datastore = cptestctx.server.apictx().nexus.datastore(); + + let conn = datastore.pool_connection_for_tests().await.unwrap(); + + // No Floating IPs exist in states other than 'detached'. + assert!(dsl::external_ip + .filter(dsl::kind.eq(IpKind::Floating)) + .filter(dsl::time_deleted.is_null()) + .filter(dsl::parent_id.eq(instance_id)) + .filter(dsl::state.ne(IpAttachState::Detached)) + .select(ExternalIp::as_select()) + .first_async::(&*conn) + .await + .optional() + .unwrap() + .is_none()); + + // All ephemeral IPs are removed. + assert!(dsl::external_ip + .filter(dsl::kind.eq(IpKind::Ephemeral)) + .filter(dsl::time_deleted.is_null()) + .select(ExternalIp::as_select()) + .first_async::(&*conn) + .await + .optional() + .unwrap() + .is_none()); + + // No IP bindings remain on sled-agent. + let mut eips = sled_agent.external_ips.lock().await; + let my_eips = eips.entry(instance_id).or_default(); + assert!(my_eips.is_empty()); + } + + #[nexus_test(server = crate::Server)] + async fn test_action_failure_can_unwind( + cptestctx: &ControlPlaneTestContext, + ) { + let log = &cptestctx.logctx.log; + let client = &cptestctx.external_client; + let apictx = &cptestctx.server.apictx(); + let nexus = &apictx.nexus; + + let opctx = test_helpers::test_opctx(cptestctx); + let datastore = &nexus.db_datastore; + let _project_id = ip_manip_test_setup(&client).await; + let instance = + create_instance(client, PROJECT_NAME, INSTANCE_NAME).await; + + for use_float in [false, true] { + test_helpers::action_failure_can_unwind::( + nexus, + || Box::pin(new_test_params(&opctx, datastore, use_float) ), + || Box::pin(verify_clean_slate(&cptestctx, instance.id())), + log, + ) + .await; + } + } + + #[nexus_test(server = crate::Server)] + async fn test_action_failure_can_unwind_idempotently( + cptestctx: &ControlPlaneTestContext, + ) { + let log = &cptestctx.logctx.log; + let client = &cptestctx.external_client; + let apictx = &cptestctx.server.apictx(); + let nexus = &apictx.nexus; + + let opctx = test_helpers::test_opctx(cptestctx); + let datastore = &nexus.db_datastore; + let _project_id = ip_manip_test_setup(&client).await; + let instance = + create_instance(client, PROJECT_NAME, INSTANCE_NAME).await; + + for use_float in [false, true] { + test_helpers::action_failure_can_unwind_idempotently::< + SagaInstanceIpAttach, + _, + _, + >( + nexus, + || Box::pin(new_test_params(&opctx, datastore, use_float)), + || Box::pin(verify_clean_slate(&cptestctx, instance.id())), + log, + ) + .await; + } + } + + #[nexus_test(server = crate::Server)] + async fn test_actions_succeed_idempotently( + cptestctx: &ControlPlaneTestContext, + ) { + let client = &cptestctx.external_client; + let apictx = &cptestctx.server.apictx(); + let nexus = &apictx.nexus; + + let opctx = test_helpers::test_opctx(cptestctx); + let datastore = &nexus.db_datastore; + let _project_id = ip_manip_test_setup(&client).await; + let _instance = + create_instance(client, PROJECT_NAME, INSTANCE_NAME).await; + + for use_float in [false, true] { + let params = new_test_params(&opctx, datastore, use_float).await; + let dag = create_saga_dag::(params).unwrap(); + test_helpers::actions_succeed_idempotently(nexus, dag).await; + } + } +} diff --git a/nexus/src/app/sagas/instance_ip_detach.rs b/nexus/src/app/sagas/instance_ip_detach.rs new file mode 100644 index 0000000000..da6c92077d --- /dev/null +++ b/nexus/src/app/sagas/instance_ip_detach.rs @@ -0,0 +1,551 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use super::instance_common::{ + instance_ip_add_nat, instance_ip_add_opte, instance_ip_get_instance_state, + instance_ip_move_state, instance_ip_remove_nat, instance_ip_remove_opte, + ModifyStateForExternalIp, +}; +use super::{ActionRegistry, NexusActionContext, NexusSaga}; +use crate::app::sagas::declare_saga_actions; +use crate::app::{authn, authz, db}; +use crate::external_api::params; +use nexus_db_model::IpAttachState; +use nexus_db_queries::db::lookup::LookupPath; +use nexus_types::external_api::views; +use omicron_common::api::external::NameOrId; +use ref_cast::RefCast; +use serde::Deserialize; +use serde::Serialize; +use steno::ActionError; +use uuid::Uuid; + +// This runs on similar logic to instance IP attach: see its head +// comment for an explanation of the structure wrt. other sagas. + +declare_saga_actions! { + instance_ip_detach; + DETACH_EXTERNAL_IP -> "target_ip" { + + siid_begin_detach_ip + - siid_begin_detach_ip_undo + } + + INSTANCE_STATE -> "instance_state" { + + siid_get_instance_state + } + + REMOVE_NAT -> "no_result0" { + + siid_nat + - siid_nat_undo + } + + REMOVE_OPTE_PORT -> "no_result1" { + + siid_update_opte + - siid_update_opte_undo + } + + COMPLETE_DETACH -> "output" { + + siid_complete_detach + } +} + +#[derive(Debug, Deserialize, Serialize)] +pub struct Params { + pub delete_params: params::ExternalIpDetach, + pub authz_instance: authz::Instance, + pub project_id: Uuid, + /// Authentication context to use to fetch the instance's current state from + /// the database. + pub serialized_authn: authn::saga::Serialized, +} + +async fn siid_begin_detach_ip( + sagactx: NexusActionContext, +) -> Result { + let osagactx = sagactx.user_data(); + let datastore = osagactx.datastore(); + let params = sagactx.saga_params::()?; + let opctx = crate::context::op_context_for_saga_action( + &sagactx, + ¶ms.serialized_authn, + ); + + match ¶ms.delete_params { + params::ExternalIpDetach::Ephemeral => { + let eip = datastore + .instance_lookup_ephemeral_ip( + &opctx, + params.authz_instance.id(), + ) + .await + .map_err(ActionError::action_failed)?; + + if let Some(eph_ip) = eip { + datastore + .begin_deallocate_ephemeral_ip( + &opctx, + eph_ip.id, + params.authz_instance.id(), + ) + .await + .map_err(ActionError::action_failed) + .map(|external_ip| ModifyStateForExternalIp { + do_saga: external_ip.is_some(), + external_ip, + }) + } else { + Ok(ModifyStateForExternalIp { + do_saga: false, + external_ip: None, + }) + } + } + params::ExternalIpDetach::Floating { floating_ip } => { + let (.., authz_fip) = match floating_ip { + NameOrId::Name(name) => LookupPath::new(&opctx, datastore) + .project_id(params.project_id) + .floating_ip_name(db::model::Name::ref_cast(name)), + NameOrId::Id(id) => { + LookupPath::new(&opctx, datastore).floating_ip_id(*id) + } + } + .lookup_for(authz::Action::Modify) + .await + .map_err(ActionError::action_failed)?; + + datastore + .floating_ip_begin_detach( + &opctx, + &authz_fip, + params.authz_instance.id(), + false, + ) + .await + .map_err(ActionError::action_failed) + .map(|(external_ip, do_saga)| ModifyStateForExternalIp { + external_ip: Some(external_ip), + do_saga, + }) + } + } +} + +async fn siid_begin_detach_ip_undo( + sagactx: NexusActionContext, +) -> Result<(), anyhow::Error> { + let log = sagactx.user_data().log(); + warn!(log, "siid_begin_detach_ip_undo: Reverting attached->detaching"); + let params = sagactx.saga_params::()?; + let new_ip = sagactx.lookup::("target_ip")?; + if !instance_ip_move_state( + &sagactx, + ¶ms.serialized_authn, + IpAttachState::Detaching, + IpAttachState::Attached, + &new_ip, + ) + .await? + { + error!(log, "siid_begin_detach_ip_undo: external IP was deleted") + } + + Ok(()) +} + +async fn siid_get_instance_state( + sagactx: NexusActionContext, +) -> Result, ActionError> { + let params = sagactx.saga_params::()?; + instance_ip_get_instance_state( + &sagactx, + ¶ms.serialized_authn, + ¶ms.authz_instance, + "detach", + ) + .await +} + +async fn siid_nat(sagactx: NexusActionContext) -> Result<(), ActionError> { + let params = sagactx.saga_params::()?; + let sled_id = sagactx.lookup::>("instance_state")?; + let target_ip = sagactx.lookup::("target_ip")?; + instance_ip_remove_nat( + &sagactx, + ¶ms.serialized_authn, + sled_id, + target_ip, + ) + .await +} + +async fn siid_nat_undo( + sagactx: NexusActionContext, +) -> Result<(), anyhow::Error> { + let log = sagactx.user_data().log(); + let params = sagactx.saga_params::()?; + let sled_id = sagactx.lookup::>("instance_state")?; + let target_ip = sagactx.lookup::("target_ip")?; + if let Err(e) = instance_ip_add_nat( + &sagactx, + ¶ms.serialized_authn, + ¶ms.authz_instance, + sled_id, + target_ip, + ) + .await + { + error!(log, "siid_nat_undo: failed to notify DPD: {e}"); + } + + Ok(()) +} + +async fn siid_update_opte( + sagactx: NexusActionContext, +) -> Result<(), ActionError> { + let params = sagactx.saga_params::()?; + let sled_id = sagactx.lookup::>("instance_state")?; + let target_ip = sagactx.lookup::("target_ip")?; + instance_ip_remove_opte( + &sagactx, + ¶ms.authz_instance, + sled_id, + target_ip, + ) + .await +} + +async fn siid_update_opte_undo( + sagactx: NexusActionContext, +) -> Result<(), anyhow::Error> { + let log = sagactx.user_data().log(); + let params = sagactx.saga_params::()?; + let sled_id = sagactx.lookup::>("instance_state")?; + let target_ip = sagactx.lookup::("target_ip")?; + if let Err(e) = instance_ip_add_opte( + &sagactx, + ¶ms.authz_instance, + sled_id, + target_ip, + ) + .await + { + error!(log, "siid_update_opte_undo: failed to notify sled-agent: {e}"); + } + Ok(()) +} + +async fn siid_complete_detach( + sagactx: NexusActionContext, +) -> Result, ActionError> { + let log = sagactx.user_data().log(); + let params = sagactx.saga_params::()?; + let target_ip = sagactx.lookup::("target_ip")?; + + if !instance_ip_move_state( + &sagactx, + ¶ms.serialized_authn, + IpAttachState::Detaching, + IpAttachState::Detached, + &target_ip, + ) + .await? + { + warn!( + log, + "siid_complete_detach: external IP was deleted or call was idempotent" + ) + } + + target_ip + .external_ip + .map(TryInto::try_into) + .transpose() + .map_err(ActionError::action_failed) +} + +#[derive(Debug)] +pub struct SagaInstanceIpDetach; +impl NexusSaga for SagaInstanceIpDetach { + const NAME: &'static str = "external-ip-detach"; + type Params = Params; + + fn register_actions(registry: &mut ActionRegistry) { + instance_ip_detach_register_actions(registry); + } + + fn make_saga_dag( + _params: &Self::Params, + mut builder: steno::DagBuilder, + ) -> Result { + builder.append(detach_external_ip_action()); + builder.append(instance_state_action()); + builder.append(remove_nat_action()); + builder.append(remove_opte_port_action()); + builder.append(complete_detach_action()); + Ok(builder.build()?) + } +} + +#[cfg(test)] +pub(crate) mod test { + use super::*; + use crate::{ + app::{ + saga::create_saga_dag, + sagas::{ + instance_ip_attach::{self, test::ip_manip_test_setup}, + test_helpers, + }, + }, + Nexus, + }; + use async_bb8_diesel::AsyncRunQueryDsl; + use diesel::{ + ExpressionMethods, OptionalExtension, QueryDsl, SelectableHelper, + }; + use nexus_db_model::{ExternalIp, IpKind}; + use nexus_db_queries::context::OpContext; + use nexus_test_utils::resource_helpers::create_instance; + use nexus_test_utils_macros::nexus_test; + use omicron_common::api::external::{Name, SimpleIdentity}; + use std::sync::Arc; + + type ControlPlaneTestContext = + nexus_test_utils::ControlPlaneTestContext; + + const PROJECT_NAME: &str = "cafe"; + const INSTANCE_NAME: &str = "menu"; + const FIP_NAME: &str = "affogato"; + + async fn new_test_params( + opctx: &OpContext, + datastore: &db::DataStore, + use_floating: bool, + ) -> Params { + let delete_params = if use_floating { + params::ExternalIpDetach::Floating { + floating_ip: FIP_NAME.parse::().unwrap().into(), + } + } else { + params::ExternalIpDetach::Ephemeral + }; + + let (.., authz_project, authz_instance) = + LookupPath::new(opctx, datastore) + .project_name(&db::model::Name(PROJECT_NAME.parse().unwrap())) + .instance_name(&db::model::Name(INSTANCE_NAME.parse().unwrap())) + .lookup_for(authz::Action::Modify) + .await + .unwrap(); + + Params { + serialized_authn: authn::saga::Serialized::for_opctx(opctx), + project_id: authz_project.id(), + delete_params, + authz_instance, + } + } + + async fn attach_instance_ips(nexus: &Arc, opctx: &OpContext) { + let datastore = &nexus.db_datastore; + + let proj_name = db::model::Name(PROJECT_NAME.parse().unwrap()); + let inst_name = db::model::Name(INSTANCE_NAME.parse().unwrap()); + let lookup = LookupPath::new(opctx, datastore) + .project_name(&proj_name) + .instance_name(&inst_name); + + for use_float in [false, true] { + let params = instance_ip_attach::test::new_test_params( + opctx, datastore, use_float, + ) + .await; + nexus + .instance_attach_external_ip( + opctx, + &lookup, + ¶ms.create_params, + ) + .await + .unwrap(); + } + } + + #[nexus_test(server = crate::Server)] + async fn test_saga_basic_usage_succeeds( + cptestctx: &ControlPlaneTestContext, + ) { + let client = &cptestctx.external_client; + let apictx = &cptestctx.server.apictx(); + let nexus = &apictx.nexus; + let sled_agent = &cptestctx.sled_agent.sled_agent; + + let opctx = test_helpers::test_opctx(cptestctx); + let datastore = &nexus.db_datastore; + let _ = ip_manip_test_setup(&client).await; + let instance = + create_instance(client, PROJECT_NAME, INSTANCE_NAME).await; + + attach_instance_ips(nexus, &opctx).await; + + for use_float in [false, true] { + let params = new_test_params(&opctx, datastore, use_float).await; + + let dag = create_saga_dag::(params).unwrap(); + let saga = nexus.create_runnable_saga(dag).await.unwrap(); + nexus.run_saga(saga).await.expect("Detach saga should succeed"); + } + + let instance_id = instance.id(); + + // Sled agent has removed its records of the external IPs. + let mut eips = sled_agent.external_ips.lock().await; + let my_eips = eips.entry(instance_id).or_default(); + assert!(my_eips.is_empty()); + + // DB only has record for SNAT. + let db_eips = datastore + .instance_lookup_external_ips(&opctx, instance_id) + .await + .unwrap(); + assert_eq!(db_eips.len(), 1); + assert!(db_eips.iter().any(|v| v.kind == IpKind::SNat)); + } + + pub(crate) async fn verify_clean_slate( + cptestctx: &ControlPlaneTestContext, + instance_id: Uuid, + ) { + use nexus_db_queries::db::schema::external_ip::dsl; + + let opctx = test_helpers::test_opctx(cptestctx); + let sled_agent = &cptestctx.sled_agent.sled_agent; + let datastore = cptestctx.server.apictx().nexus.datastore(); + + let conn = datastore.pool_connection_for_tests().await.unwrap(); + + // No IPs in transitional states w/ current instance. + assert!(dsl::external_ip + .filter(dsl::time_deleted.is_null()) + .filter(dsl::parent_id.eq(instance_id)) + .filter(dsl::state.ne(IpAttachState::Attached)) + .select(ExternalIp::as_select()) + .first_async::(&*conn) + .await + .optional() + .unwrap() + .is_none()); + + // No external IPs in detached state. + assert!(dsl::external_ip + .filter(dsl::time_deleted.is_null()) + .filter(dsl::state.eq(IpAttachState::Detached)) + .select(ExternalIp::as_select()) + .first_async::(&*conn) + .await + .optional() + .unwrap() + .is_none()); + + // Instance still has one Ephemeral IP, and one Floating IP. + let db_eips = datastore + .instance_lookup_external_ips(&opctx, instance_id) + .await + .unwrap(); + assert_eq!(db_eips.len(), 3); + assert!(db_eips.iter().any(|v| v.kind == IpKind::Ephemeral)); + assert!(db_eips.iter().any(|v| v.kind == IpKind::Floating)); + assert!(db_eips.iter().any(|v| v.kind == IpKind::SNat)); + + // No IP bindings remain on sled-agent. + let eips = &*sled_agent.external_ips.lock().await; + for (_nic_id, eip_set) in eips { + assert_eq!(eip_set.len(), 2); + } + } + + #[nexus_test(server = crate::Server)] + async fn test_action_failure_can_unwind( + cptestctx: &ControlPlaneTestContext, + ) { + let log = &cptestctx.logctx.log; + let client = &cptestctx.external_client; + let apictx = &cptestctx.server.apictx(); + let nexus = &apictx.nexus; + + let opctx = test_helpers::test_opctx(cptestctx); + let datastore = &nexus.db_datastore; + let _project_id = ip_manip_test_setup(&client).await; + let instance = + create_instance(client, PROJECT_NAME, INSTANCE_NAME).await; + + attach_instance_ips(nexus, &opctx).await; + + for use_float in [false, true] { + test_helpers::action_failure_can_unwind::( + nexus, + || Box::pin(new_test_params(&opctx, datastore, use_float) ), + || Box::pin(verify_clean_slate(&cptestctx, instance.id())), + log, + ) + .await; + } + } + + #[nexus_test(server = crate::Server)] + async fn test_action_failure_can_unwind_idempotently( + cptestctx: &ControlPlaneTestContext, + ) { + let log = &cptestctx.logctx.log; + let client = &cptestctx.external_client; + let apictx = &cptestctx.server.apictx(); + let nexus = &apictx.nexus; + + let opctx = test_helpers::test_opctx(cptestctx); + let datastore = &nexus.db_datastore; + let _project_id = ip_manip_test_setup(&client).await; + let instance = + create_instance(client, PROJECT_NAME, INSTANCE_NAME).await; + + attach_instance_ips(nexus, &opctx).await; + + for use_float in [false, true] { + test_helpers::action_failure_can_unwind_idempotently::< + SagaInstanceIpDetach, + _, + _, + >( + nexus, + || Box::pin(new_test_params(&opctx, datastore, use_float)), + || Box::pin(verify_clean_slate(&cptestctx, instance.id())), + log, + ) + .await; + } + } + + #[nexus_test(server = crate::Server)] + async fn test_actions_succeed_idempotently( + cptestctx: &ControlPlaneTestContext, + ) { + let client = &cptestctx.external_client; + let apictx = &cptestctx.server.apictx(); + let nexus = &apictx.nexus; + + let opctx = test_helpers::test_opctx(cptestctx); + let datastore = &nexus.db_datastore; + let _project_id = ip_manip_test_setup(&client).await; + let _instance = + create_instance(client, PROJECT_NAME, INSTANCE_NAME).await; + + attach_instance_ips(nexus, &opctx).await; + + for use_float in [false, true] { + let params = new_test_params(&opctx, datastore, use_float).await; + let dag = create_saga_dag::(params).unwrap(); + test_helpers::actions_succeed_idempotently(nexus, dag).await; + } + } +} diff --git a/nexus/src/app/sagas/instance_start.rs b/nexus/src/app/sagas/instance_start.rs index 9d12bd8031..92c927e1ce 100644 --- a/nexus/src/app/sagas/instance_start.rs +++ b/nexus/src/app/sagas/instance_start.rs @@ -405,35 +405,12 @@ async fn sis_dpd_ensure( .await .map_err(ActionError::action_failed)?; - // Querying boundary switches also requires fleet access and the use of the - // instance allocator context. - let boundary_switches = osagactx + osagactx .nexus() - .boundary_switches(&osagactx.nexus().opctx_alloc) + .instance_ensure_dpd_config(&opctx, instance_id, &sled.address(), None) .await .map_err(ActionError::action_failed)?; - for switch in boundary_switches { - let dpd_client = - osagactx.nexus().dpd_clients.get(&switch).ok_or_else(|| { - ActionError::action_failed(Error::internal_error(&format!( - "unable to find client for switch {switch}" - ))) - })?; - - osagactx - .nexus() - .instance_ensure_dpd_config( - &opctx, - instance_id, - &sled.address(), - None, - dpd_client, - ) - .await - .map_err(ActionError::action_failed)?; - } - Ok(()) } diff --git a/nexus/src/app/sagas/mod.rs b/nexus/src/app/sagas/mod.rs index c5918d32ef..1bd85ecf32 100644 --- a/nexus/src/app/sagas/mod.rs +++ b/nexus/src/app/sagas/mod.rs @@ -26,6 +26,8 @@ pub mod image_delete; mod instance_common; pub mod instance_create; pub mod instance_delete; +pub mod instance_ip_attach; +pub mod instance_ip_detach; pub mod instance_migrate; pub mod instance_start; pub mod loopback_address_create; @@ -130,6 +132,12 @@ fn make_action_registry() -> ActionRegistry { ::register_actions( &mut registry, ); + ::register_actions( + &mut registry, + ); + ::register_actions( + &mut registry, + ); ::register_actions( &mut registry, ); diff --git a/nexus/src/external_api/http_entrypoints.rs b/nexus/src/external_api/http_entrypoints.rs index 65b03a9fdf..a6cb9e80fe 100644 --- a/nexus/src/external_api/http_entrypoints.rs +++ b/nexus/src/external_api/http_entrypoints.rs @@ -142,6 +142,8 @@ pub(crate) fn external_api() -> NexusApiDescription { api.register(floating_ip_create)?; api.register(floating_ip_view)?; api.register(floating_ip_delete)?; + api.register(floating_ip_attach)?; + api.register(floating_ip_detach)?; api.register(disk_list)?; api.register(disk_create)?; @@ -200,6 +202,8 @@ pub(crate) fn external_api() -> NexusApiDescription { api.register(instance_network_interface_delete)?; api.register(instance_external_ip_list)?; + api.register(instance_ephemeral_ip_attach)?; + api.register(instance_ephemeral_ip_detach)?; api.register(vpc_router_list)?; api.register(vpc_router_view)?; @@ -1976,6 +1980,69 @@ async fn floating_ip_view( apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await } +/// Attach a floating IP to an instance or other resource +#[endpoint { + method = POST, + path = "/v1/floating-ips/{floating_ip}/attach", + tags = ["floating-ips"], +}] +async fn floating_ip_attach( + rqctx: RequestContext>, + path_params: Path, + query_params: Query, + target: TypedBody, +) -> Result, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let opctx = crate::context::op_context_for_external_api(&rqctx).await?; + let nexus = &apictx.nexus; + let path = path_params.into_inner(); + let query = query_params.into_inner(); + let floating_ip_selector = params::FloatingIpSelector { + floating_ip: path.floating_ip, + project: query.project, + }; + let ip = nexus + .floating_ip_attach( + &opctx, + floating_ip_selector, + target.into_inner(), + ) + .await?; + Ok(HttpResponseAccepted(ip)) + }; + apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await +} + +/// Detach a floating IP from an instance or other resource +#[endpoint { + method = POST, + path = "/v1/floating-ips/{floating_ip}/detach", + tags = ["floating-ips"], +}] +async fn floating_ip_detach( + rqctx: RequestContext>, + path_params: Path, + query_params: Query, +) -> Result, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let opctx = crate::context::op_context_for_external_api(&rqctx).await?; + let nexus = &apictx.nexus; + let path = path_params.into_inner(); + let query = query_params.into_inner(); + let floating_ip_selector = params::FloatingIpSelector { + floating_ip: path.floating_ip, + project: query.project, + }; + let fip_lookup = + nexus.floating_ip_lookup(&opctx, floating_ip_selector)?; + let ip = nexus.floating_ip_detach(&opctx, fip_lookup).await?; + Ok(HttpResponseAccepted(ip)) + }; + apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await +} + // Disks /// List disks @@ -3884,6 +3951,79 @@ async fn instance_external_ip_list( apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await } +/// Allocate and attach an ephemeral IP to an instance +#[endpoint { + method = POST, + path = "/v1/instances/{instance}/external-ips/ephemeral", + tags = ["instances"], +}] +async fn instance_ephemeral_ip_attach( + rqctx: RequestContext>, + path_params: Path, + query_params: Query, + ip_to_create: TypedBody, +) -> Result, HttpError> { + let apictx = rqctx.context(); + let handler = async { + let opctx = crate::context::op_context_for_external_api(&rqctx).await?; + let nexus = &apictx.nexus; + let path = path_params.into_inner(); + let query = query_params.into_inner(); + let instance_selector = params::InstanceSelector { + project: query.project, + instance: path.instance, + }; + let instance_lookup = + nexus.instance_lookup(&opctx, instance_selector)?; + let ip = nexus + .instance_attach_external_ip( + &opctx, + &instance_lookup, + ¶ms::ExternalIpCreate::Ephemeral { + pool: ip_to_create.into_inner().pool, + }, + ) + .await?; + Ok(HttpResponseAccepted(ip)) + }; + apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await +} + +/// Detach and deallocate an ephemeral IP from an instance +#[endpoint { + method = DELETE, + path = "/v1/instances/{instance}/external-ips/ephemeral", + tags = ["instances"], +}] +async fn instance_ephemeral_ip_detach( + rqctx: RequestContext>, + path_params: Path, + query_params: Query, +) -> Result { + let apictx = rqctx.context(); + let handler = async { + let opctx = crate::context::op_context_for_external_api(&rqctx).await?; + let nexus = &apictx.nexus; + let path = path_params.into_inner(); + let query = query_params.into_inner(); + let instance_selector = params::InstanceSelector { + project: query.project, + instance: path.instance, + }; + let instance_lookup = + nexus.instance_lookup(&opctx, instance_selector)?; + nexus + .instance_detach_external_ip( + &opctx, + &instance_lookup, + ¶ms::ExternalIpDetach::Ephemeral, + ) + .await?; + Ok(HttpResponseDeleted()) + }; + apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await +} + // Snapshots /// List snapshots diff --git a/nexus/test-utils/src/resource_helpers.rs b/nexus/test-utils/src/resource_helpers.rs index 4fe03f204c..d82a934686 100644 --- a/nexus/test-utils/src/resource_helpers.rs +++ b/nexus/test-utils/src/resource_helpers.rs @@ -492,6 +492,7 @@ pub async fn create_instance( Vec::::new(), // External IPs= Vec::::new(), + true, ) .await } @@ -504,6 +505,7 @@ pub async fn create_instance_with( nics: ¶ms::InstanceNetworkInterfaceAttachment, disks: Vec, external_ips: Vec, + start: bool, ) -> Instance { let url = format!("/v1/instances?project={}", project_name); object_create( @@ -523,7 +525,7 @@ pub async fn create_instance_with( network_interfaces: nics.clone(), external_ips, disks, - start: true, + start, }, ) .await diff --git a/nexus/tests/integration_tests/disks.rs b/nexus/tests/integration_tests/disks.rs index b9023a8212..379042c849 100644 --- a/nexus/tests/integration_tests/disks.rs +++ b/nexus/tests/integration_tests/disks.rs @@ -1747,6 +1747,7 @@ async fn create_instance_with_disk(client: &ClientTestContext) { params::InstanceDiskAttach { name: DISK_NAME.parse().unwrap() }, )], Vec::::new(), + true, ) .await; } diff --git a/nexus/tests/integration_tests/endpoints.rs b/nexus/tests/integration_tests/endpoints.rs index 8beffe43a5..4f606f2bff 100644 --- a/nexus/tests/integration_tests/endpoints.rs +++ b/nexus/tests/integration_tests/endpoints.rs @@ -390,6 +390,12 @@ pub static DEMO_INSTANCE_DISKS_DETACH_URL: Lazy = Lazy::new(|| { *DEMO_INSTANCE_NAME, *DEMO_PROJECT_SELECTOR ) }); +pub static DEMO_INSTANCE_EPHEMERAL_IP_URL: Lazy = Lazy::new(|| { + format!( + "/v1/instances/{}/external-ips/ephemeral?{}", + *DEMO_INSTANCE_NAME, *DEMO_PROJECT_SELECTOR + ) +}); pub static DEMO_INSTANCE_NICS_URL: Lazy = Lazy::new(|| { format!( "/v1/network-interfaces?project={}&instance={}", @@ -414,7 +420,7 @@ pub static DEMO_INSTANCE_CREATE: Lazy = user_data: vec![], network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, external_ips: vec![params::ExternalIpCreate::Ephemeral { - pool_name: Some(DEMO_IP_POOL_NAME.clone()), + pool: Some(DEMO_IP_POOL_NAME.clone().into()), }], disks: vec![], start: true, @@ -720,6 +726,19 @@ pub static DEMO_FLOAT_IP_URL: Lazy = Lazy::new(|| { ) }); +pub static DEMO_FLOATING_IP_ATTACH_URL: Lazy = Lazy::new(|| { + format!( + "/v1/floating-ips/{}/attach?{}", + *DEMO_FLOAT_IP_NAME, *DEMO_PROJECT_SELECTOR + ) +}); +pub static DEMO_FLOATING_IP_DETACH_URL: Lazy = Lazy::new(|| { + format!( + "/v1/floating-ips/{}/detach?{}", + *DEMO_FLOAT_IP_NAME, *DEMO_PROJECT_SELECTOR + ) +}); + pub static DEMO_FLOAT_IP_CREATE: Lazy = Lazy::new(|| params::FloatingIpCreate { identity: IdentityMetadataCreateParams { @@ -730,6 +749,13 @@ pub static DEMO_FLOAT_IP_CREATE: Lazy = pool: None, }); +pub static DEMO_FLOAT_IP_ATTACH: Lazy = + Lazy::new(|| params::FloatingIpAttach { + kind: params::FloatingIpParentKind::Instance, + parent: DEMO_FLOAT_IP_NAME.clone().into(), + }); +pub static DEMO_EPHEMERAL_IP_ATTACH: Lazy = + Lazy::new(|| params::EphemeralIpCreate { pool: None }); // Identity providers pub const IDENTITY_PROVIDERS_URL: &'static str = "/v1/system/identity-providers?silo=demo-silo"; @@ -1767,6 +1793,18 @@ pub static VERIFY_ENDPOINTS: Lazy> = Lazy::new(|| { allowed_methods: vec![AllowedMethod::Get], }, + VerifyEndpoint { + url: &DEMO_INSTANCE_EPHEMERAL_IP_URL, + visibility: Visibility::Protected, + unprivileged_access: UnprivilegedAccess::None, + allowed_methods: vec![ + AllowedMethod::Post( + serde_json::to_value(&*DEMO_EPHEMERAL_IP_ATTACH).unwrap() + ), + AllowedMethod::Delete, + ], + }, + /* IAM */ VerifyEndpoint { @@ -2240,5 +2278,27 @@ pub static VERIFY_ENDPOINTS: Lazy> = Lazy::new(|| { AllowedMethod::Delete, ], }, + + VerifyEndpoint { + url: &DEMO_FLOATING_IP_ATTACH_URL, + visibility: Visibility::Protected, + unprivileged_access: UnprivilegedAccess::None, + allowed_methods: vec![ + AllowedMethod::Post( + serde_json::to_value(&*DEMO_FLOAT_IP_ATTACH).unwrap(), + ), + ], + }, + + VerifyEndpoint { + url: &DEMO_FLOATING_IP_DETACH_URL, + visibility: Visibility::Protected, + unprivileged_access: UnprivilegedAccess::None, + allowed_methods: vec![ + AllowedMethod::Post( + serde_json::to_value(&()).unwrap(), + ), + ], + }, ] }); diff --git a/nexus/tests/integration_tests/external_ips.rs b/nexus/tests/integration_tests/external_ips.rs index 3b6127ceb1..57f813d505 100644 --- a/nexus/tests/integration_tests/external_ips.rs +++ b/nexus/tests/integration_tests/external_ips.rs @@ -7,6 +7,7 @@ use std::net::IpAddr; use std::net::Ipv4Addr; +use crate::integration_tests::instances::fetch_instance_external_ips; use crate::integration_tests::instances::instance_simulate; use dropshot::test_util::ClientTestContext; use dropshot::HttpErrorResponseBody; @@ -30,12 +31,14 @@ use nexus_test_utils::resource_helpers::object_delete_error; use nexus_test_utils_macros::nexus_test; use nexus_types::external_api::params; use nexus_types::external_api::shared; +use nexus_types::external_api::views; use nexus_types::external_api::views::FloatingIp; use nexus_types::identity::Resource; use omicron_common::address::IpRange; use omicron_common::address::Ipv4Range; use omicron_common::api::external::IdentityMetadataCreateParams; use omicron_common::api::external::Instance; +use omicron_common::api::external::Name; use omicron_common::api::external::NameOrId; use uuid::Uuid; @@ -47,10 +50,33 @@ const PROJECT_NAME: &str = "rootbeer-float"; const FIP_NAMES: &[&str] = &["vanilla", "chocolate", "strawberry", "pistachio", "caramel"]; +const INSTANCE_NAMES: &[&str] = &["anonymous-diner", "anonymous-restaurant"]; + pub fn get_floating_ips_url(project_name: &str) -> String { format!("/v1/floating-ips?project={project_name}") } +pub fn instance_ephemeral_ip_url( + instance_name: &str, + project_name: &str, +) -> String { + format!("/v1/instances/{instance_name}/external-ips/ephemeral?project={project_name}") +} + +pub fn attach_floating_ip_url( + floating_ip_name: &str, + project_name: &str, +) -> String { + format!("/v1/floating-ips/{floating_ip_name}/attach?project={project_name}") +} + +pub fn detach_floating_ip_url( + floating_ip_name: &str, + project_name: &str, +) -> String { + format!("/v1/floating-ips/{floating_ip_name}/detach?project={project_name}") +} + pub fn get_floating_ip_by_name_url( fip_name: &str, project_name: &str, @@ -392,7 +418,9 @@ async fn test_floating_ip_delete(cptestctx: &ControlPlaneTestContext) { } #[nexus_test] -async fn test_floating_ip_attachment(cptestctx: &ControlPlaneTestContext) { +async fn test_floating_ip_create_attachment( + cptestctx: &ControlPlaneTestContext, +) { let client = &cptestctx.external_client; let apictx = &cptestctx.server.apictx(); let nexus = &apictx.nexus; @@ -410,16 +438,13 @@ async fn test_floating_ip_attachment(cptestctx: &ControlPlaneTestContext) { .await; // Bind the floating IP to an instance at create time. - let instance_name = "anonymous-diner"; - let instance = create_instance_with( - &client, - PROJECT_NAME, + let instance_name = INSTANCE_NAMES[0]; + let instance = instance_for_external_ips( + client, instance_name, - ¶ms::InstanceNetworkInterfaceAttachment::Default, - vec![], - vec![params::ExternalIpCreate::Floating { - floating_ip_name: FIP_NAMES[0].parse().unwrap(), - }], + true, + false, + &FIP_NAMES[..1], ) .await; @@ -430,20 +455,12 @@ async fn test_floating_ip_attachment(cptestctx: &ControlPlaneTestContext) { assert_eq!(fetched_fip.instance_id, Some(instance.identity.id)); // Try to delete the floating IP, which should fail. - let error: HttpErrorResponseBody = NexusRequest::new( - RequestBuilder::new( - client, - Method::DELETE, - &get_floating_ip_by_id_url(&fip.identity.id), - ) - .expect_status(Some(StatusCode::BAD_REQUEST)), + let error = object_delete_error( + client, + &get_floating_ip_by_id_url(&fip.identity.id), + StatusCode::BAD_REQUEST, ) - .authn_as(AuthnMode::PrivilegedUser) - .execute() - .await - .unwrap() - .parsed_body() - .unwrap(); + .await; assert_eq!( error.message, format!("Floating IP cannot be deleted while attached to an instance"), @@ -497,6 +514,340 @@ async fn test_floating_ip_attachment(cptestctx: &ControlPlaneTestContext) { .unwrap(); } +#[nexus_test] +async fn test_external_ip_live_attach_detach( + cptestctx: &ControlPlaneTestContext, +) { + let client = &cptestctx.external_client; + let apictx = &cptestctx.server.apictx(); + let nexus = &apictx.nexus; + + create_default_ip_pool(&client).await; + let project = create_project(client, PROJECT_NAME).await; + + // Create 2 instances, and a floating IP for each instance. + // One instance will be started, and one will be stopped. + let mut fips = vec![]; + for i in 0..2 { + fips.push( + create_floating_ip( + client, + FIP_NAMES[i], + project.identity.name.as_str(), + None, + None, + ) + .await, + ); + } + + let mut instances = vec![]; + for (i, start) in [false, true].iter().enumerate() { + let instance = instance_for_external_ips( + client, + INSTANCE_NAMES[i], + *start, + false, + &[], + ) + .await; + + if *start { + instance_simulate(nexus, &instance.identity.id).await; + instance_simulate(nexus, &instance.identity.id).await; + } + + // Verify that each instance has no external IPs. + assert_eq!( + fetch_instance_external_ips( + client, + INSTANCE_NAMES[i], + PROJECT_NAME + ) + .await + .len(), + 0 + ); + + instances.push(instance); + } + + // Attach a floating IP and ephemeral IP to each instance. + let mut recorded_ephs = vec![]; + for (instance, fip) in instances.iter().zip(&fips) { + let instance_name = instance.identity.name.as_str(); + let eph_resp = ephemeral_ip_attach(client, instance_name, None).await; + let fip_resp = floating_ip_attach( + client, + instance_name, + fip.identity.name.as_str(), + ) + .await; + + // Verify both appear correctly. + // This implicitly checks FIP parent_id matches the instance, + // and state has fully moved into 'Attached'. + let eip_list = + fetch_instance_external_ips(client, instance_name, PROJECT_NAME) + .await; + + assert_eq!(eip_list.len(), 2); + assert!(eip_list.contains(&eph_resp)); + assert!(eip_list + .iter() + .any(|v| matches!(v, views::ExternalIp::Floating(..)) + && v.ip() == fip_resp.ip)); + assert_eq!(fip.ip, fip_resp.ip); + + // Check for idempotency: repeat requests should return same values. + let eph_resp_2 = ephemeral_ip_attach(client, instance_name, None).await; + let fip_resp_2 = floating_ip_attach( + client, + instance_name, + fip.identity.name.as_str(), + ) + .await; + + assert_eq!(eph_resp, eph_resp_2); + assert_eq!(fip_resp.ip, fip_resp_2.ip); + + recorded_ephs.push(eph_resp); + } + + // Detach a floating IP and ephemeral IP from each instance. + for (instance, fip) in instances.iter().zip(&fips) { + let instance_name = instance.identity.name.as_str(); + ephemeral_ip_detach(client, instance_name).await; + let fip_resp = + floating_ip_detach(client, fip.identity.name.as_str()).await; + + // Verify both are removed, and that their bodies match the known FIP/EIP combo. + let eip_list = + fetch_instance_external_ips(client, instance_name, PROJECT_NAME) + .await; + + assert_eq!(eip_list.len(), 0); + assert_eq!(fip.ip, fip_resp.ip); + + // Check for idempotency: repeat requests should return same values for FIP, + // but in ephemeral case there is no currently known IP so we return an error. + let fip_resp_2 = + floating_ip_detach(client, fip.identity.name.as_str()).await; + assert_eq!(fip_resp.ip, fip_resp_2.ip); + + let url = instance_ephemeral_ip_url(instance_name, PROJECT_NAME); + let error = + object_delete_error(client, &url, StatusCode::BAD_REQUEST).await; + assert_eq!( + error.message, + "instance does not have an ephemeral IP attached".to_string() + ); + } +} + +#[nexus_test] +async fn test_external_ip_attach_detach_fail_if_in_use_by_other( + cptestctx: &ControlPlaneTestContext, +) { + let client = &cptestctx.external_client; + let apictx = &cptestctx.server.apictx(); + let nexus = &apictx.nexus; + + create_default_ip_pool(&client).await; + let project = create_project(client, PROJECT_NAME).await; + + // Create 2 instances, bind a FIP to each. + let mut instances = vec![]; + let mut fips = vec![]; + for i in 0..2 { + let fip = create_floating_ip( + client, + FIP_NAMES[i], + project.identity.name.as_str(), + None, + None, + ) + .await; + let instance = instance_for_external_ips( + client, + INSTANCE_NAMES[i], + true, + false, + &[FIP_NAMES[i]], + ) + .await; + + instance_simulate(nexus, &instance.identity.id).await; + instance_simulate(nexus, &instance.identity.id).await; + + instances.push(instance); + fips.push(fip); + } + + // Attach in-use FIP to *other* instance should fail. + let url = + attach_floating_ip_url(fips[1].identity.name.as_str(), PROJECT_NAME); + let error: HttpErrorResponseBody = NexusRequest::new( + RequestBuilder::new(client, Method::POST, &url) + .body(Some(¶ms::FloatingIpAttach { + kind: params::FloatingIpParentKind::Instance, + parent: INSTANCE_NAMES[0].parse::().unwrap().into(), + })) + .expect_status(Some(StatusCode::BAD_REQUEST)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap() + .parsed_body() + .unwrap(); + assert_eq!(error.message, "floating IP cannot be attached to one instance while still attached to another".to_string()); +} + +#[nexus_test] +async fn test_external_ip_attach_fails_after_maximum( + cptestctx: &ControlPlaneTestContext, +) { + let client = &cptestctx.external_client; + + create_default_ip_pool(&client).await; + let project = create_project(client, PROJECT_NAME).await; + + // Create 33 floating IPs, and bind the first 32 to an instance. + let mut fip_names = vec![]; + for i in 0..33 { + let fip_name = format!("fip-{i}"); + create_floating_ip( + client, + &fip_name, + project.identity.name.as_str(), + None, + None, + ) + .await; + fip_names.push(fip_name); + } + + let fip_name_slice = + fip_names.iter().map(String::as_str).collect::>(); + let instance_name = INSTANCE_NAMES[0]; + instance_for_external_ips( + client, + instance_name, + true, + false, + &fip_name_slice[..32], + ) + .await; + + // Attempt to attach the final FIP should fail. + let url = attach_floating_ip_url(fip_name_slice[32], PROJECT_NAME); + let error: HttpErrorResponseBody = NexusRequest::new( + RequestBuilder::new(client, Method::POST, &url) + .body(Some(¶ms::FloatingIpAttach { + kind: params::FloatingIpParentKind::Instance, + parent: instance_name.parse::().unwrap().into(), + })) + .expect_status(Some(StatusCode::BAD_REQUEST)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap() + .parsed_body() + .unwrap(); + assert_eq!( + error.message, + "an instance may not have more than 32 external IP addresses" + .to_string() + ); + + // Attempt to attach an ephemeral IP should fail. + let url = instance_ephemeral_ip_url(instance_name, PROJECT_NAME); + let error: HttpErrorResponseBody = NexusRequest::new( + RequestBuilder::new(client, Method::POST, &url) + .body(Some(¶ms::EphemeralIpCreate { pool: None })) + .expect_status(Some(StatusCode::BAD_REQUEST)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap() + .parsed_body() + .unwrap(); + assert_eq!( + error.message, + "an instance may not have more than 32 external IP addresses" + .to_string() + ); +} + +#[nexus_test] +async fn test_external_ip_attach_ephemeral_at_pool_exhaustion( + cptestctx: &ControlPlaneTestContext, +) { + let client = &cptestctx.external_client; + + create_default_ip_pool(&client).await; + let other_pool_range = IpRange::V4( + Ipv4Range::new(Ipv4Addr::new(10, 1, 0, 1), Ipv4Addr::new(10, 1, 0, 1)) + .unwrap(), + ); + create_ip_pool(&client, "other-pool", Some(other_pool_range)).await; + let silo_id = DEFAULT_SILO.id(); + link_ip_pool(&client, "other-pool", &silo_id, false).await; + + create_project(client, PROJECT_NAME).await; + + // Create two instances, to which we will later add eph IPs from 'other-pool'. + for name in &INSTANCE_NAMES[..2] { + instance_for_external_ips(client, name, false, false, &[]).await; + } + + let pool_name: Name = "other-pool".parse().unwrap(); + + // Attach a new EIP from other-pool to both instances. + // This should succeed for the first, and fail for the second + // due to pool exhaustion. + let eph_resp = ephemeral_ip_attach( + client, + INSTANCE_NAMES[0], + Some(pool_name.as_str()), + ) + .await; + assert_eq!(eph_resp.ip(), other_pool_range.first_address()); + assert_eq!(eph_resp.ip(), other_pool_range.last_address()); + + let url = instance_ephemeral_ip_url(INSTANCE_NAMES[1], PROJECT_NAME); + let error: HttpErrorResponseBody = NexusRequest::new( + RequestBuilder::new(client, Method::POST, &url) + .body(Some(¶ms::ExternalIpCreate::Ephemeral { + pool: Some(pool_name.clone().into()), + })) + .expect_status(Some(StatusCode::INSUFFICIENT_STORAGE)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap() + .parsed_body() + .unwrap(); + assert_eq!( + error.message, + "Insufficient capacity: No external IP addresses available".to_string() + ); + + // Idempotent re-add to the first instance should succeed even if + // an internal attempt to alloc a new EIP would fail. + let eph_resp_2 = ephemeral_ip_attach( + client, + INSTANCE_NAMES[0], + Some(pool_name.as_str()), + ) + .await; + assert_eq!(eph_resp_2, eph_resp); +} + pub async fn floating_ip_get( client: &ClientTestContext, fip_url: &str, @@ -521,3 +872,96 @@ async fn floating_ip_get_as( panic!("failed to make \"get\" request to {fip_url}: {e}") }) } + +async fn instance_for_external_ips( + client: &ClientTestContext, + instance_name: &str, + start: bool, + use_ephemeral_ip: bool, + floating_ip_names: &[&str], +) -> Instance { + let mut fips: Vec<_> = floating_ip_names + .iter() + .map(|s| params::ExternalIpCreate::Floating { + floating_ip: s.parse::().unwrap().into(), + }) + .collect(); + if use_ephemeral_ip { + fips.push(params::ExternalIpCreate::Ephemeral { pool: None }) + } + create_instance_with( + &client, + PROJECT_NAME, + instance_name, + ¶ms::InstanceNetworkInterfaceAttachment::Default, + vec![], + fips, + start, + ) + .await +} + +async fn ephemeral_ip_attach( + client: &ClientTestContext, + instance_name: &str, + pool_name: Option<&str>, +) -> views::ExternalIp { + let url = instance_ephemeral_ip_url(instance_name, PROJECT_NAME); + NexusRequest::new( + RequestBuilder::new(client, Method::POST, &url) + .body(Some(¶ms::EphemeralIpCreate { + pool: pool_name.map(|v| v.parse::().unwrap().into()), + })) + .expect_status(Some(StatusCode::ACCEPTED)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap() + .parsed_body() + .unwrap() +} + +async fn ephemeral_ip_detach(client: &ClientTestContext, instance_name: &str) { + let url = instance_ephemeral_ip_url(instance_name, PROJECT_NAME); + object_delete(client, &url).await; +} + +async fn floating_ip_attach( + client: &ClientTestContext, + instance_name: &str, + floating_ip_name: &str, +) -> views::FloatingIp { + let url = attach_floating_ip_url(floating_ip_name, PROJECT_NAME); + NexusRequest::new( + RequestBuilder::new(client, Method::POST, &url) + .body(Some(¶ms::FloatingIpAttach { + kind: params::FloatingIpParentKind::Instance, + parent: instance_name.parse::().unwrap().into(), + })) + .expect_status(Some(StatusCode::ACCEPTED)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap() + .parsed_body() + .unwrap() +} + +async fn floating_ip_detach( + client: &ClientTestContext, + floating_ip_name: &str, +) -> views::FloatingIp { + let url = detach_floating_ip_url(floating_ip_name, PROJECT_NAME); + NexusRequest::new( + RequestBuilder::new(client, Method::POST, &url) + .expect_status(Some(StatusCode::ACCEPTED)), + ) + .authn_as(AuthnMode::PrivilegedUser) + .execute() + .await + .unwrap() + .parsed_body() + .unwrap() +} diff --git a/nexus/tests/integration_tests/instances.rs b/nexus/tests/integration_tests/instances.rs index 2f4e913185..8d97df6cda 100644 --- a/nexus/tests/integration_tests/instances.rs +++ b/nexus/tests/integration_tests/instances.rs @@ -649,6 +649,7 @@ async fn test_instance_migrate(cptestctx: &ControlPlaneTestContext) { ¶ms::InstanceNetworkInterfaceAttachment::Default, Vec::::new(), Vec::::new(), + true, ) .await; let instance_id = instance.identity.id; @@ -752,6 +753,7 @@ async fn test_instance_migrate_v2p(cptestctx: &ControlPlaneTestContext) { // located with their instances. Vec::::new(), Vec::::new(), + true, ) .await; let instance_id = instance.identity.id; @@ -1104,6 +1106,7 @@ async fn test_instance_metrics_with_migration( ¶ms::InstanceNetworkInterfaceAttachment::Default, Vec::::new(), Vec::::new(), + true, ) .await; let instance_id = instance.identity.id; @@ -3644,7 +3647,7 @@ async fn test_instance_ephemeral_ip_from_correct_pool( let ip = fetch_instance_ephemeral_ip(client, "pool1-inst").await; assert!( - ip.ip >= range1.first_address() && ip.ip <= range1.last_address(), + ip.ip() >= range1.first_address() && ip.ip() <= range1.last_address(), "Expected ephemeral IP to come from pool1" ); @@ -3652,7 +3655,7 @@ async fn test_instance_ephemeral_ip_from_correct_pool( create_instance_with_pool(client, "pool2-inst", Some("pool2")).await; let ip = fetch_instance_ephemeral_ip(client, "pool2-inst").await; assert!( - ip.ip >= range2.first_address() && ip.ip <= range2.last_address(), + ip.ip() >= range2.first_address() && ip.ip() <= range2.last_address(), "Expected ephemeral IP to come from pool2" ); @@ -3667,7 +3670,7 @@ async fn test_instance_ephemeral_ip_from_correct_pool( create_instance_with_pool(client, "pool2-inst2", None).await; let ip = fetch_instance_ephemeral_ip(client, "pool2-inst2").await; assert!( - ip.ip >= range2.first_address() && ip.ip <= range2.last_address(), + ip.ip() >= range2.first_address() && ip.ip() <= range2.last_address(), "Expected ephemeral IP to come from pool2" ); @@ -3705,7 +3708,7 @@ async fn test_instance_ephemeral_ip_from_correct_pool( user_data: vec![], network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, external_ips: vec![params::ExternalIpCreate::Ephemeral { - pool_name: Some("pool1".parse().unwrap()), + pool: Some("pool1".parse::().unwrap().into()), }], disks: vec![], start: true, @@ -3769,7 +3772,7 @@ async fn test_instance_ephemeral_ip_from_orphan_pool( user_data: vec![], network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, external_ips: vec![params::ExternalIpCreate::Ephemeral { - pool_name: Some("orphan-pool".parse().unwrap()), + pool: Some("orphan-pool".parse::().unwrap().into()), }], disks: vec![], start: true, @@ -3829,7 +3832,7 @@ async fn test_instance_ephemeral_ip_no_default_pool_error( user_data: vec![], network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, external_ips: vec![params::ExternalIpCreate::Ephemeral { - pool_name: None, // <--- the only important thing here + pool: None, // <--- the only important thing here }], disks: vec![], start: true, @@ -3845,7 +3848,7 @@ async fn test_instance_ephemeral_ip_no_default_pool_error( // same deal if you specify a pool that doesn't exist let body = params::InstanceCreate { external_ips: vec![params::ExternalIpCreate::Ephemeral { - pool_name: Some("nonexistent-pool".parse().unwrap()), + pool: Some("nonexistent-pool".parse::().unwrap().into()), }], ..body }; @@ -3879,7 +3882,7 @@ async fn test_instance_attach_several_external_ips( // Create several floating IPs for the instance, totalling 8 IPs. let mut external_ip_create = - vec![params::ExternalIpCreate::Ephemeral { pool_name: None }]; + vec![params::ExternalIpCreate::Ephemeral { pool: None }]; let mut fips = vec![]; for i in 1..8 { let name = format!("fip-{i}"); @@ -3887,7 +3890,7 @@ async fn test_instance_attach_several_external_ips( create_floating_ip(&client, &name, PROJECT_NAME, None, None).await, ); external_ip_create.push(params::ExternalIpCreate::Floating { - floating_ip_name: name.parse().unwrap(), + floating_ip: name.parse::().unwrap().into(), }); } @@ -3900,30 +3903,31 @@ async fn test_instance_attach_several_external_ips( ¶ms::InstanceNetworkInterfaceAttachment::Default, vec![], external_ip_create, + true, ) .await; // Verify that all external IPs are visible on the instance and have // been allocated in order. let external_ips = - fetch_instance_external_ips(&client, instance_name).await; + fetch_instance_external_ips(&client, instance_name, PROJECT_NAME).await; assert_eq!(external_ips.len(), 8); eprintln!("{external_ips:?}"); for (i, eip) in external_ips .iter() - .sorted_unstable_by(|a, b| a.ip.cmp(&b.ip)) + .sorted_unstable_by(|a, b| a.ip().cmp(&b.ip())) .enumerate() { let last_octet = i + if i != external_ips.len() - 1 { - assert_eq!(eip.kind, IpKind::Floating); + assert_eq!(eip.kind(), IpKind::Floating); 1 } else { // SNAT will occupy 1.0.0.8 here, since it it alloc'd before // the ephemeral. - assert_eq!(eip.kind, IpKind::Ephemeral); + assert_eq!(eip.kind(), IpKind::Ephemeral); 2 }; - assert_eq!(eip.ip, Ipv4Addr::new(10, 0, 0, last_octet as u8)); + assert_eq!(eip.ip(), Ipv4Addr::new(10, 0, 0, last_octet as u8)); } // Verify that all floating IPs are bound to their parent instance. @@ -3948,7 +3952,7 @@ async fn test_instance_allow_only_one_ephemeral_ip( // don't need any IP pools because request fails at parse time let ephemeral_create = params::ExternalIpCreate::Ephemeral { - pool_name: Some("default".parse().unwrap()), + pool: Some("default".parse::().unwrap().into()), }; let create_params = params::InstanceCreate { identity: IdentityMetadataCreateParams { @@ -3992,19 +3996,20 @@ async fn create_instance_with_pool( ¶ms::InstanceNetworkInterfaceAttachment::Default, vec![], vec![params::ExternalIpCreate::Ephemeral { - pool_name: pool_name.map(|name| name.parse().unwrap()), + pool: pool_name.map(|name| name.parse::().unwrap().into()), }], + true, ) .await } -async fn fetch_instance_external_ips( +pub async fn fetch_instance_external_ips( client: &ClientTestContext, instance_name: &str, + project_name: &str, ) -> Vec { let ips_url = format!( - "/v1/instances/{}/external-ips?project={}", - instance_name, PROJECT_NAME + "/v1/instances/{instance_name}/external-ips?project={project_name}", ); let ips = NexusRequest::object_get(client, &ips_url) .authn_as(AuthnMode::PrivilegedUser) @@ -4020,10 +4025,10 @@ async fn fetch_instance_ephemeral_ip( client: &ClientTestContext, instance_name: &str, ) -> views::ExternalIp { - fetch_instance_external_ips(client, instance_name) + fetch_instance_external_ips(client, instance_name, PROJECT_NAME) .await .into_iter() - .find(|v| v.kind == IpKind::Ephemeral) + .find(|v| v.kind() == IpKind::Ephemeral) .unwrap() } @@ -4087,7 +4092,7 @@ async fn test_instance_create_in_silo(cptestctx: &ControlPlaneTestContext) { user_data: vec![], network_interfaces: params::InstanceNetworkInterfaceAttachment::Default, external_ips: vec![params::ExternalIpCreate::Ephemeral { - pool_name: Some(Name::try_from(String::from("default")).unwrap()), + pool: Some("default".parse::().unwrap().into()), }], disks: vec![], start: true, diff --git a/nexus/tests/integration_tests/subnet_allocation.rs b/nexus/tests/integration_tests/subnet_allocation.rs index 91a933754c..e36b213f7e 100644 --- a/nexus/tests/integration_tests/subnet_allocation.rs +++ b/nexus/tests/integration_tests/subnet_allocation.rs @@ -143,6 +143,7 @@ async fn test_subnet_allocation(cptestctx: &ControlPlaneTestContext) { Vec::::new(), // External IPs= Vec::::new(), + true, ) .await; } diff --git a/nexus/tests/output/nexus_tags.txt b/nexus/tests/output/nexus_tags.txt index bd79a9c3e9..8bd2f34de5 100644 --- a/nexus/tests/output/nexus_tags.txt +++ b/nexus/tests/output/nexus_tags.txt @@ -12,8 +12,10 @@ disk_view GET /v1/disks/{disk} API operations found with tag "floating-ips" OPERATION ID METHOD URL PATH +floating_ip_attach POST /v1/floating-ips/{floating_ip}/attach floating_ip_create POST /v1/floating-ips floating_ip_delete DELETE /v1/floating-ips/{floating_ip} +floating_ip_detach POST /v1/floating-ips/{floating_ip}/detach floating_ip_list GET /v1/floating-ips floating_ip_view GET /v1/floating-ips/{floating_ip} @@ -40,6 +42,8 @@ instance_delete DELETE /v1/instances/{instance} instance_disk_attach POST /v1/instances/{instance}/disks/attach instance_disk_detach POST /v1/instances/{instance}/disks/detach instance_disk_list GET /v1/instances/{instance}/disks +instance_ephemeral_ip_attach POST /v1/instances/{instance}/external-ips/ephemeral +instance_ephemeral_ip_detach DELETE /v1/instances/{instance}/external-ips/ephemeral instance_external_ip_list GET /v1/instances/{instance}/external-ips instance_list GET /v1/instances instance_migrate POST /v1/instances/{instance}/migrate diff --git a/nexus/types/src/external_api/params.rs b/nexus/types/src/external_api/params.rs index 750e83c2a2..62c8224461 100644 --- a/nexus/types/src/external_api/params.rs +++ b/nexus/types/src/external_api/params.rs @@ -71,7 +71,7 @@ path_param!(VpcPath, vpc, "VPC"); path_param!(SubnetPath, subnet, "subnet"); path_param!(RouterPath, router, "router"); path_param!(RoutePath, route, "route"); -path_param!(FloatingIpPath, floating_ip, "Floating IP"); +path_param!(FloatingIpPath, floating_ip, "floating IP"); path_param!(DiskPath, disk, "disk"); path_param!(SnapshotPath, snapshot, "snapshot"); path_param!(ImagePath, image, "image"); @@ -890,6 +890,23 @@ pub struct FloatingIpCreate { pub pool: Option, } +/// The type of resource that a floating IP is attached to +#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] +#[serde(rename_all = "snake_case")] +pub enum FloatingIpParentKind { + Instance, +} + +/// Parameters for attaching a floating IP address to another resource +#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] +pub struct FloatingIpAttach { + /// Name or ID of the resource that this IP address should be attached to + pub parent: NameOrId, + + /// The type of `parent`'s resource + pub kind: FloatingIpParentKind, +} + // INSTANCES /// Describes an attachment of an `InstanceNetworkInterface` to an `Instance`, @@ -954,14 +971,30 @@ pub struct InstanceDiskAttach { #[serde(tag = "type", rename_all = "snake_case")] pub enum ExternalIpCreate { /// An IP address providing both inbound and outbound access. The address is - /// automatically-assigned from the provided IP Pool, or all available pools - /// if not specified. - Ephemeral { pool_name: Option }, + /// automatically-assigned from the provided IP Pool, or the current silo's + /// default pool if not specified. + Ephemeral { pool: Option }, /// An IP address providing both inbound and outbound access. The address is - /// an existing Floating IP object assigned to the current project. + /// an existing floating IP object assigned to the current project. /// /// The floating IP must not be in use by another instance or service. - Floating { floating_ip_name: Name }, + Floating { floating_ip: NameOrId }, +} + +/// Parameters for creating an ephemeral IP address for an instance. +#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] +#[serde(tag = "type", rename_all = "snake_case")] +pub struct EphemeralIpCreate { + /// Name or ID of the IP pool used to allocate an address + pub pool: Option, +} + +/// Parameters for detaching an external IP from an instance. +#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] +#[serde(tag = "type", rename_all = "snake_case")] +pub enum ExternalIpDetach { + Ephemeral, + Floating { floating_ip: NameOrId }, } /// Create-time parameters for an `Instance` diff --git a/nexus/types/src/external_api/shared.rs b/nexus/types/src/external_api/shared.rs index a4c5ae1e62..f6b4db18a3 100644 --- a/nexus/types/src/external_api/shared.rs +++ b/nexus/types/src/external_api/shared.rs @@ -221,7 +221,9 @@ pub enum ServiceUsingCertificate { } /// The kind of an external IP address for an instance -#[derive(Debug, Clone, Copy, Deserialize, Serialize, JsonSchema, PartialEq)] +#[derive( + Debug, Clone, Copy, Deserialize, Eq, Serialize, JsonSchema, PartialEq, +)] #[serde(rename_all = "snake_case")] pub enum IpKind { Ephemeral, diff --git a/nexus/types/src/external_api/views.rs b/nexus/types/src/external_api/views.rs index 314dd4ed00..5e31be7af8 100644 --- a/nexus/types/src/external_api/views.rs +++ b/nexus/types/src/external_api/views.rs @@ -12,8 +12,8 @@ use api_identity::ObjectIdentity; use chrono::DateTime; use chrono::Utc; use omicron_common::api::external::{ - ByteCount, Digest, IdentityMetadata, InstanceState, Ipv4Net, Ipv6Net, Name, - ObjectIdentity, RoleName, SemverVersion, SimpleIdentity, + ByteCount, Digest, Error, IdentityMetadata, InstanceState, Ipv4Net, + Ipv6Net, Name, ObjectIdentity, RoleName, SemverVersion, SimpleIdentity, }; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; @@ -337,16 +337,34 @@ pub struct IpPoolRange { // INSTANCE EXTERNAL IP ADDRESSES -#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema)] -#[serde(rename_all = "snake_case")] -pub struct ExternalIp { - pub ip: IpAddr, - pub kind: IpKind, +#[derive(Debug, Clone, Deserialize, PartialEq, Serialize, JsonSchema)] +#[serde(tag = "kind", rename_all = "snake_case")] +pub enum ExternalIp { + Ephemeral { ip: IpAddr }, + Floating(FloatingIp), +} + +impl ExternalIp { + pub fn ip(&self) -> IpAddr { + match self { + Self::Ephemeral { ip } => *ip, + Self::Floating(float) => float.ip, + } + } + + pub fn kind(&self) -> IpKind { + match self { + Self::Ephemeral { .. } => IpKind::Ephemeral, + Self::Floating(_) => IpKind::Floating, + } + } } /// A Floating IP is a well-known IP address which can be attached /// and detached from instances. -#[derive(ObjectIdentity, Debug, Clone, Deserialize, Serialize, JsonSchema)] +#[derive( + ObjectIdentity, Debug, PartialEq, Clone, Deserialize, Serialize, JsonSchema, +)] #[serde(rename_all = "snake_case")] pub struct FloatingIp { #[serde(flatten)] @@ -360,6 +378,25 @@ pub struct FloatingIp { pub instance_id: Option, } +impl From for ExternalIp { + fn from(value: FloatingIp) -> Self { + ExternalIp::Floating(value) + } +} + +impl TryFrom for FloatingIp { + type Error = Error; + + fn try_from(value: ExternalIp) -> Result { + match value { + ExternalIp::Ephemeral { .. } => Err(Error::internal_error( + "tried to convert an ephemeral IP into a floating IP", + )), + ExternalIp::Floating(v) => Ok(v), + } + } +} + // RACKS /// View of an Rack diff --git a/openapi/nexus.json b/openapi/nexus.json index 2dd4037430..59206ed010 100644 --- a/openapi/nexus.json +++ b/openapi/nexus.json @@ -930,7 +930,7 @@ { "in": "path", "name": "floating_ip", - "description": "Name or ID of the Floating IP", + "description": "Name or ID of the floating IP", "required": true, "schema": { "$ref": "#/components/schemas/NameOrId" @@ -974,7 +974,7 @@ { "in": "path", "name": "floating_ip", - "description": "Name or ID of the Floating IP", + "description": "Name or ID of the floating IP", "required": true, "schema": { "$ref": "#/components/schemas/NameOrId" @@ -1002,6 +1002,108 @@ } } }, + "/v1/floating-ips/{floating_ip}/attach": { + "post": { + "tags": [ + "floating-ips" + ], + "summary": "Attach a floating IP to an instance or other resource", + "operationId": "floating_ip_attach", + "parameters": [ + { + "in": "path", + "name": "floating_ip", + "description": "Name or ID of the floating IP", + "required": true, + "schema": { + "$ref": "#/components/schemas/NameOrId" + } + }, + { + "in": "query", + "name": "project", + "description": "Name or ID of the project", + "schema": { + "$ref": "#/components/schemas/NameOrId" + } + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/FloatingIpAttach" + } + } + }, + "required": true + }, + "responses": { + "202": { + "description": "successfully enqueued operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/FloatingIp" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, + "/v1/floating-ips/{floating_ip}/detach": { + "post": { + "tags": [ + "floating-ips" + ], + "summary": "Detach a floating IP from an instance or other resource", + "operationId": "floating_ip_detach", + "parameters": [ + { + "in": "path", + "name": "floating_ip", + "description": "Name or ID of the floating IP", + "required": true, + "schema": { + "$ref": "#/components/schemas/NameOrId" + } + }, + { + "in": "query", + "name": "project", + "description": "Name or ID of the project", + "schema": { + "$ref": "#/components/schemas/NameOrId" + } + } + ], + "responses": { + "202": { + "description": "successfully enqueued operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/FloatingIp" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, "/v1/groups": { "get": { "tags": [ @@ -1826,6 +1928,99 @@ } } }, + "/v1/instances/{instance}/external-ips/ephemeral": { + "post": { + "tags": [ + "instances" + ], + "summary": "Allocate and attach an ephemeral IP to an instance", + "operationId": "instance_ephemeral_ip_attach", + "parameters": [ + { + "in": "path", + "name": "instance", + "description": "Name or ID of the instance", + "required": true, + "schema": { + "$ref": "#/components/schemas/NameOrId" + } + }, + { + "in": "query", + "name": "project", + "description": "Name or ID of the project", + "schema": { + "$ref": "#/components/schemas/NameOrId" + } + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/EphemeralIpCreate" + } + } + }, + "required": true + }, + "responses": { + "202": { + "description": "successfully enqueued operation", + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/ExternalIp" + } + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + }, + "delete": { + "tags": [ + "instances" + ], + "summary": "Detach and deallocate an ephemeral IP from an instance", + "operationId": "instance_ephemeral_ip_detach", + "parameters": [ + { + "in": "path", + "name": "instance", + "description": "Name or ID of the instance", + "required": true, + "schema": { + "$ref": "#/components/schemas/NameOrId" + } + }, + { + "in": "query", + "name": "project", + "description": "Name or ID of the project", + "schema": { + "$ref": "#/components/schemas/NameOrId" + } + } + ], + "responses": { + "204": { + "description": "successful deletion" + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, "/v1/instances/{instance}/migrate": { "post": { "tags": [ @@ -11005,6 +11200,21 @@ } ] }, + "EphemeralIpCreate": { + "description": "Parameters for creating an ephemeral IP address for an instance.", + "type": "object", + "properties": { + "pool": { + "nullable": true, + "description": "Name or ID of the IP pool used to allocate an address", + "allOf": [ + { + "$ref": "#/components/schemas/NameOrId" + } + ] + } + } + }, "Error": { "description": "Error information from a response.", "type": "object", @@ -11025,33 +11235,105 @@ ] }, "ExternalIp": { - "type": "object", - "properties": { - "ip": { - "type": "string", - "format": "ip" + "oneOf": [ + { + "type": "object", + "properties": { + "ip": { + "type": "string", + "format": "ip" + }, + "kind": { + "type": "string", + "enum": [ + "ephemeral" + ] + } + }, + "required": [ + "ip", + "kind" + ] }, - "kind": { - "$ref": "#/components/schemas/IpKind" + { + "description": "A Floating IP is a well-known IP address which can be attached and detached from instances.", + "type": "object", + "properties": { + "description": { + "description": "human-readable free-form text about a resource", + "type": "string" + }, + "id": { + "description": "unique, immutable, system-controlled identifier for each resource", + "type": "string", + "format": "uuid" + }, + "instance_id": { + "nullable": true, + "description": "The ID of the instance that this Floating IP is attached to, if it is presently in use.", + "type": "string", + "format": "uuid" + }, + "ip": { + "description": "The IP address held by this resource.", + "type": "string", + "format": "ip" + }, + "kind": { + "type": "string", + "enum": [ + "floating" + ] + }, + "name": { + "description": "unique, mutable, user-controlled identifier for each resource", + "allOf": [ + { + "$ref": "#/components/schemas/Name" + } + ] + }, + "project_id": { + "description": "The project this resource exists within.", + "type": "string", + "format": "uuid" + }, + "time_created": { + "description": "timestamp when this resource was created", + "type": "string", + "format": "date-time" + }, + "time_modified": { + "description": "timestamp when this resource was last modified", + "type": "string", + "format": "date-time" + } + }, + "required": [ + "description", + "id", + "ip", + "kind", + "name", + "project_id", + "time_created", + "time_modified" + ] } - }, - "required": [ - "ip", - "kind" ] }, "ExternalIpCreate": { "description": "Parameters for creating an external IP address for instances.", "oneOf": [ { - "description": "An IP address providing both inbound and outbound access. The address is automatically-assigned from the provided IP Pool, or all available pools if not specified.", + "description": "An IP address providing both inbound and outbound access. The address is automatically-assigned from the provided IP Pool, or the current silo's default pool if not specified.", "type": "object", "properties": { - "pool_name": { + "pool": { "nullable": true, "allOf": [ { - "$ref": "#/components/schemas/Name" + "$ref": "#/components/schemas/NameOrId" } ] }, @@ -11067,11 +11349,11 @@ ] }, { - "description": "An IP address providing both inbound and outbound access. The address is an existing Floating IP object assigned to the current project.\n\nThe floating IP must not be in use by another instance or service.", + "description": "An IP address providing both inbound and outbound access. The address is an existing floating IP object assigned to the current project.\n\nThe floating IP must not be in use by another instance or service.", "type": "object", "properties": { - "floating_ip_name": { - "$ref": "#/components/schemas/Name" + "floating_ip": { + "$ref": "#/components/schemas/NameOrId" }, "type": { "type": "string", @@ -11081,7 +11363,7 @@ } }, "required": [ - "floating_ip_name", + "floating_ip", "type" ] } @@ -11226,6 +11508,32 @@ "time_modified" ] }, + "FloatingIpAttach": { + "description": "Parameters for attaching a floating IP address to another resource", + "type": "object", + "properties": { + "kind": { + "description": "The type of `parent`'s resource", + "allOf": [ + { + "$ref": "#/components/schemas/FloatingIpParentKind" + } + ] + }, + "parent": { + "description": "Name or ID of the resource that this IP address should be attached to", + "allOf": [ + { + "$ref": "#/components/schemas/NameOrId" + } + ] + } + }, + "required": [ + "kind", + "parent" + ] + }, "FloatingIpCreate": { "description": "Parameters for creating a new floating IP address for instances.", "type": "object", @@ -11257,6 +11565,13 @@ "name" ] }, + "FloatingIpParentKind": { + "description": "The type of resource that a floating IP is attached to", + "type": "string", + "enum": [ + "instance" + ] + }, "FloatingIpResultsPage": { "description": "A single page of results", "type": "object", @@ -12481,14 +12796,6 @@ } ] }, - "IpKind": { - "description": "The kind of an external IP address for an instance", - "type": "string", - "enum": [ - "ephemeral", - "floating" - ] - }, "IpNet": { "oneOf": [ { diff --git a/openapi/sled-agent.json b/openapi/sled-agent.json index b5b9d3fd5b..3e3f6abec6 100644 --- a/openapi/sled-agent.json +++ b/openapi/sled-agent.json @@ -327,6 +327,78 @@ } } }, + "/instances/{instance_id}/external-ip": { + "put": { + "operationId": "instance_put_external_ip", + "parameters": [ + { + "in": "path", + "name": "instance_id", + "required": true, + "schema": { + "type": "string", + "format": "uuid" + } + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/InstanceExternalIpBody" + } + } + }, + "required": true + }, + "responses": { + "204": { + "description": "resource updated" + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + }, + "delete": { + "operationId": "instance_delete_external_ip", + "parameters": [ + { + "in": "path", + "name": "instance_id", + "required": true, + "schema": { + "type": "string", + "format": "uuid" + } + } + ], + "requestBody": { + "content": { + "application/json": { + "schema": { + "$ref": "#/components/schemas/InstanceExternalIpBody" + } + } + }, + "required": true + }, + "responses": { + "204": { + "description": "resource updated" + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, "/instances/{instance_id}/migration-ids": { "put": { "operationId": "instance_put_migration_ids", @@ -4541,6 +4613,49 @@ "vmm_runtime" ] }, + "InstanceExternalIpBody": { + "description": "Used to dynamically update external IPs attached to an instance.", + "oneOf": [ + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "ephemeral" + ] + }, + "value": { + "type": "string", + "format": "ip" + } + }, + "required": [ + "type", + "value" + ] + }, + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": [ + "floating" + ] + }, + "value": { + "type": "string", + "format": "ip" + } + }, + "required": [ + "type", + "value" + ] + } + ] + }, "InstanceHardware": { "description": "Describes the instance hardware.", "type": "object", diff --git a/schema/crdb/25.0.0/up01.sql b/schema/crdb/25.0.0/up01.sql new file mode 100644 index 0000000000..0cb511fb91 --- /dev/null +++ b/schema/crdb/25.0.0/up01.sql @@ -0,0 +1,6 @@ +CREATE TYPE IF NOT EXISTS omicron.public.ip_attach_state AS ENUM ( + 'detached', + 'attached', + 'detaching', + 'attaching' +); diff --git a/schema/crdb/25.0.0/up02.sql b/schema/crdb/25.0.0/up02.sql new file mode 100644 index 0000000000..324a907dd4 --- /dev/null +++ b/schema/crdb/25.0.0/up02.sql @@ -0,0 +1,4 @@ +-- Intentionally nullable for now as we need to backfill using the current +-- value of parent_id. +ALTER TABLE omicron.public.external_ip +ADD COLUMN IF NOT EXISTS state omicron.public.ip_attach_state; diff --git a/schema/crdb/25.0.0/up03.sql b/schema/crdb/25.0.0/up03.sql new file mode 100644 index 0000000000..ea1d461250 --- /dev/null +++ b/schema/crdb/25.0.0/up03.sql @@ -0,0 +1,7 @@ +-- initialise external ip state for detached IPs. +set + local disallow_full_table_scans = off; + +UPDATE omicron.public.external_ip +SET state = 'detached' +WHERE parent_id IS NULL; diff --git a/schema/crdb/25.0.0/up04.sql b/schema/crdb/25.0.0/up04.sql new file mode 100644 index 0000000000..7bf89d6626 --- /dev/null +++ b/schema/crdb/25.0.0/up04.sql @@ -0,0 +1,7 @@ +-- initialise external ip state for attached IPs. +set + local disallow_full_table_scans = off; + +UPDATE omicron.public.external_ip +SET state = 'attached' +WHERE parent_id IS NOT NULL; diff --git a/schema/crdb/25.0.0/up05.sql b/schema/crdb/25.0.0/up05.sql new file mode 100644 index 0000000000..894806a3dc --- /dev/null +++ b/schema/crdb/25.0.0/up05.sql @@ -0,0 +1,2 @@ +-- Now move the new column to its intended state of non-nullable. +ALTER TABLE omicron.public.external_ip ALTER COLUMN state SET NOT NULL; diff --git a/schema/crdb/25.0.0/up06.sql b/schema/crdb/25.0.0/up06.sql new file mode 100644 index 0000000000..ca19081e37 --- /dev/null +++ b/schema/crdb/25.0.0/up06.sql @@ -0,0 +1,4 @@ +ALTER TABLE omicron.public.external_ip +ADD CONSTRAINT IF NOT EXISTS detached_null_parent_id CHECK ( + (state = 'detached') OR (parent_id IS NOT NULL) +); diff --git a/schema/crdb/25.0.0/up07.sql b/schema/crdb/25.0.0/up07.sql new file mode 100644 index 0000000000..00f9310c2e --- /dev/null +++ b/schema/crdb/25.0.0/up07.sql @@ -0,0 +1,4 @@ +CREATE UNIQUE INDEX IF NOT EXISTS one_ephemeral_ip_per_instance ON omicron.public.external_ip ( + parent_id +) + WHERE kind = 'ephemeral' AND parent_id IS NOT NULL AND time_deleted IS NULL; diff --git a/schema/crdb/25.0.0/up08.sql b/schema/crdb/25.0.0/up08.sql new file mode 100644 index 0000000000..3d85aaad05 --- /dev/null +++ b/schema/crdb/25.0.0/up08.sql @@ -0,0 +1,2 @@ +ALTER TABLE IF EXISTS omicron.public.external_ip +DROP CONSTRAINT IF EXISTS null_non_fip_parent_id; diff --git a/schema/crdb/25.0.0/up09.sql b/schema/crdb/25.0.0/up09.sql new file mode 100644 index 0000000000..bac963cce5 --- /dev/null +++ b/schema/crdb/25.0.0/up09.sql @@ -0,0 +1,4 @@ +ALTER TABLE IF EXISTS omicron.public.external_ip +ADD CONSTRAINT IF NOT EXISTS null_snat_parent_id CHECK ( + (kind != 'snat') OR (parent_id IS NOT NULL) +); diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index f3ca5c4b85..86d88f5fe9 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -1669,6 +1669,13 @@ CREATE TYPE IF NOT EXISTS omicron.public.ip_kind AS ENUM ( 'floating' ); +CREATE TYPE IF NOT EXISTS omicron.public.ip_attach_state AS ENUM ( + 'detached', + 'attached', + 'detaching', + 'attaching' +); + /* * External IP addresses used for guest instances and externally-facing * services. @@ -1714,6 +1721,12 @@ CREATE TABLE IF NOT EXISTS omicron.public.external_ip ( /* FK to the `project` table. */ project_id UUID, + /* State of this IP with regard to instance attach/detach + * operations. This is mainly used to prevent concurrent use + * across sagas and allow rollback to correct state. + */ + state omicron.public.ip_attach_state NOT NULL, + /* The name must be non-NULL iff this is a floating IP. */ CONSTRAINT null_fip_name CHECK ( (kind != 'floating' AND name IS NULL) OR @@ -1735,16 +1748,27 @@ CREATE TABLE IF NOT EXISTS omicron.public.external_ip ( ), /* - * Only nullable if this is a floating IP, which may exist not - * attached to any instance or service yet. + * Only nullable if this is a floating/ephemeral IP, which may exist not + * attached to any instance or service yet. Ephemeral IPs should not generally + * exist without parent instances/services, but need to temporarily exist in + * this state for live attachment. */ - CONSTRAINT null_non_fip_parent_id CHECK ( - (kind != 'floating' AND parent_id is NOT NULL) OR (kind = 'floating') + CONSTRAINT null_snat_parent_id CHECK ( + (kind != 'snat') OR (parent_id IS NOT NULL) ), /* Ephemeral IPs are not supported for services. */ CONSTRAINT ephemeral_kind_service CHECK ( (kind = 'ephemeral' AND is_service = FALSE) OR (kind != 'ephemeral') + ), + + /* + * (Not detached) => non-null parent_id. + * This is not a two-way implication because SNAT IPs + * cannot have a null parent_id. + */ + CONSTRAINT detached_null_parent_id CHECK ( + (state = 'detached') OR (parent_id IS NOT NULL) ) ); @@ -1777,6 +1801,12 @@ CREATE UNIQUE INDEX IF NOT EXISTS lookup_external_ip_by_parent ON omicron.public ) WHERE parent_id IS NOT NULL AND time_deleted IS NULL; +/* Enforce a limit of one Ephemeral IP per instance */ +CREATE UNIQUE INDEX IF NOT EXISTS one_ephemeral_ip_per_instance ON omicron.public.external_ip ( + parent_id +) + WHERE kind = 'ephemeral' AND parent_id IS NOT NULL AND time_deleted IS NULL; + /* Enforce name-uniqueness of floating (service) IPs at fleet level. */ CREATE UNIQUE INDEX IF NOT EXISTS lookup_floating_ip_by_name on omicron.public.external_ip ( name diff --git a/sled-agent/src/http_entrypoints.rs b/sled-agent/src/http_entrypoints.rs index 39d1ae26a0..0798aed664 100644 --- a/sled-agent/src/http_entrypoints.rs +++ b/sled-agent/src/http_entrypoints.rs @@ -9,7 +9,7 @@ use crate::bootstrap::early_networking::EarlyNetworkConfig; use crate::bootstrap::params::AddSledRequest; use crate::params::{ CleanupContextUpdate, DiskEnsureBody, InstanceEnsureBody, - InstancePutMigrationIdsBody, InstancePutStateBody, + InstanceExternalIpBody, InstancePutMigrationIdsBody, InstancePutStateBody, InstancePutStateResponse, InstanceUnregisterResponse, Inventory, OmicronZonesConfig, SledRole, TimeSync, VpcFirewallRulesEnsureBody, ZoneBundleId, ZoneBundleMetadata, Zpool, @@ -53,6 +53,8 @@ pub fn api() -> SledApiDescription { api.register(instance_issue_disk_snapshot_request)?; api.register(instance_put_migration_ids)?; api.register(instance_put_state)?; + api.register(instance_put_external_ip)?; + api.register(instance_delete_external_ip)?; api.register(instance_register)?; api.register(instance_unregister)?; api.register(omicron_zones_get)?; @@ -467,6 +469,38 @@ async fn instance_put_migration_ids( )) } +#[endpoint { + method = PUT, + path = "/instances/{instance_id}/external-ip", +}] +async fn instance_put_external_ip( + rqctx: RequestContext, + path_params: Path, + body: TypedBody, +) -> Result { + let sa = rqctx.context(); + let instance_id = path_params.into_inner().instance_id; + let body_args = body.into_inner(); + sa.instance_put_external_ip(instance_id, &body_args).await?; + Ok(HttpResponseUpdatedNoContent()) +} + +#[endpoint { + method = DELETE, + path = "/instances/{instance_id}/external-ip", +}] +async fn instance_delete_external_ip( + rqctx: RequestContext, + path_params: Path, + body: TypedBody, +) -> Result { + let sa = rqctx.context(); + let instance_id = path_params.into_inner().instance_id; + let body_args = body.into_inner(); + sa.instance_delete_external_ip(instance_id, &body_args).await?; + Ok(HttpResponseUpdatedNoContent()) +} + /// Path parameters for Disk requests (sled agent API) #[derive(Deserialize, JsonSchema)] struct DiskPathParam { diff --git a/sled-agent/src/instance.rs b/sled-agent/src/instance.rs index 057402c57a..3bbe0762f8 100644 --- a/sled-agent/src/instance.rs +++ b/sled-agent/src/instance.rs @@ -10,8 +10,8 @@ use crate::common::instance::{ }; use crate::instance_manager::{InstanceManagerServices, InstanceTicket}; use crate::nexus::NexusClientWithResolver; -use crate::params::ZoneBundleCause; use crate::params::ZoneBundleMetadata; +use crate::params::{InstanceExternalIpBody, ZoneBundleCause}; use crate::params::{ InstanceHardware, InstanceMigrationSourceParams, InstanceMigrationTargetParams, InstanceStateRequested, VpcFirewallRule, @@ -558,6 +558,110 @@ impl InstanceInner { Ok(()) } + + pub async fn add_external_ip( + &mut self, + ip: &InstanceExternalIpBody, + ) -> Result<(), Error> { + // v4 + v6 handling is delegated to `external_ips_ensure`. + // If OPTE is unhappy, we undo at `Instance` level. + + match ip { + // For idempotency of add/delete, we want to return + // success on 'already done'. + InstanceExternalIpBody::Ephemeral(ip) + if Some(ip) == self.ephemeral_ip.as_ref() => + { + return Ok(()); + } + InstanceExternalIpBody::Floating(ip) + if self.floating_ips.contains(ip) => + { + return Ok(()); + } + // New Ephemeral IP while current exists -- error without + // explicit delete. + InstanceExternalIpBody::Ephemeral(ip) + if self.ephemeral_ip.is_some() => + { + return Err(Error::Opte( + illumos_utils::opte::Error::ImplicitEphemeralIpDetach( + *ip, + self.ephemeral_ip.unwrap(), + ), + )); + } + // Not found, proceed with OPTE update. + InstanceExternalIpBody::Ephemeral(ip) => { + self.ephemeral_ip = Some(*ip); + } + InstanceExternalIpBody::Floating(ip) => { + self.floating_ips.push(*ip); + } + } + + let Some(primary_nic) = self.requested_nics.get(0) else { + return Err(Error::Opte(illumos_utils::opte::Error::NoPrimaryNic)); + }; + + self.port_manager.external_ips_ensure( + primary_nic.id, + primary_nic.kind, + Some(self.source_nat), + self.ephemeral_ip, + &self.floating_ips, + )?; + + Ok(()) + } + + pub async fn delete_external_ip( + &mut self, + ip: &InstanceExternalIpBody, + ) -> Result<(), Error> { + // v4 + v6 handling is delegated to `external_ips_ensure`. + // If OPTE is unhappy, we undo at `Instance` level. + + match ip { + // For idempotency of add/delete, we want to return + // success on 'already done'. + // IP Mismatch and 'deleted in past' can't really be + // disambiguated here. + InstanceExternalIpBody::Ephemeral(ip) + if self.ephemeral_ip != Some(*ip) => + { + return Ok(()); + } + InstanceExternalIpBody::Ephemeral(_) => { + self.ephemeral_ip = None; + } + InstanceExternalIpBody::Floating(ip) => { + let floating_index = + self.floating_ips.iter().position(|v| v == ip); + if let Some(pos) = floating_index { + // Swap remove is valid here, OPTE is not sensitive + // to Floating Ip ordering. + self.floating_ips.swap_remove(pos); + } else { + return Ok(()); + } + } + } + + let Some(primary_nic) = self.requested_nics.get(0) else { + return Err(Error::Opte(illumos_utils::opte::Error::NoPrimaryNic)); + }; + + self.port_manager.external_ips_ensure( + primary_nic.id, + primary_nic.kind, + Some(self.source_nat), + self.ephemeral_ip, + &self.floating_ips, + )?; + + Ok(()) + } } /// A reference to a single instance running a running Propolis server. @@ -1094,4 +1198,52 @@ impl Instance { Err(Error::InstanceNotRunning(inner.properties.id)) } } + + pub async fn add_external_ip( + &self, + ip: &InstanceExternalIpBody, + ) -> Result<(), Error> { + let mut inner = self.inner.lock().await; + + // The internal call can either fail on adding the IP + // to the list, or on the OPTE step. + // Be cautious and reset state if either fails. + // Note we don't need to re-ensure port manager/OPTE state + // since that's the last call we make internally. + let old_eph = inner.ephemeral_ip; + let out = inner.add_external_ip(ip).await; + + if out.is_err() { + inner.ephemeral_ip = old_eph; + if let InstanceExternalIpBody::Floating(ip) = ip { + inner.floating_ips.retain(|v| v != ip); + } + } + + out + } + + pub async fn delete_external_ip( + &self, + ip: &InstanceExternalIpBody, + ) -> Result<(), Error> { + let mut inner = self.inner.lock().await; + + // Similar logic to `add_external_ip`, except here we + // need to readd the floating IP if it was removed. + // OPTE doesn't care about the order of floating IPs. + let old_eph = inner.ephemeral_ip; + let out = inner.delete_external_ip(ip).await; + + if out.is_err() { + inner.ephemeral_ip = old_eph; + if let InstanceExternalIpBody::Floating(ip) = ip { + if !inner.floating_ips.contains(ip) { + inner.floating_ips.push(*ip); + } + } + } + + out + } } diff --git a/sled-agent/src/instance_manager.rs b/sled-agent/src/instance_manager.rs index c1b7e402a4..b66b0400e1 100644 --- a/sled-agent/src/instance_manager.rs +++ b/sled-agent/src/instance_manager.rs @@ -7,6 +7,7 @@ use crate::instance::propolis_zone_name; use crate::instance::Instance; use crate::nexus::NexusClientWithResolver; +use crate::params::InstanceExternalIpBody; use crate::params::ZoneBundleMetadata; use crate::params::{ InstanceHardware, InstanceMigrationSourceParams, InstancePutStateResponse, @@ -434,6 +435,42 @@ impl InstanceManager { }; instance.request_zone_bundle().await } + + pub async fn add_external_ip( + &self, + instance_id: Uuid, + ip: &InstanceExternalIpBody, + ) -> Result<(), Error> { + let instance = { + let instances = self.inner.instances.lock().unwrap(); + instances.get(&instance_id).map(|(_id, v)| v.clone()) + }; + + let Some(instance) = instance else { + return Err(Error::NoSuchInstance(instance_id)); + }; + + instance.add_external_ip(ip).await?; + Ok(()) + } + + pub async fn delete_external_ip( + &self, + instance_id: Uuid, + ip: &InstanceExternalIpBody, + ) -> Result<(), Error> { + let instance = { + let instances = self.inner.instances.lock().unwrap(); + instances.get(&instance_id).map(|(_id, v)| v.clone()) + }; + + let Some(instance) = instance else { + return Err(Error::NoSuchInstance(instance_id)); + }; + + instance.delete_external_ip(ip).await?; + Ok(()) + } } /// Represents membership of an instance in the [`InstanceManager`]. diff --git a/sled-agent/src/params.rs b/sled-agent/src/params.rs index 9120bafa9a..f14a13aa41 100644 --- a/sled-agent/src/params.rs +++ b/sled-agent/src/params.rs @@ -818,6 +818,16 @@ pub struct CleanupContextUpdate { pub storage_limit: Option, } +/// Used to dynamically update external IPs attached to an instance. +#[derive( + Copy, Clone, Debug, Eq, PartialEq, Hash, Deserialize, JsonSchema, Serialize, +)] +#[serde(rename_all = "snake_case", tag = "type", content = "value")] +pub enum InstanceExternalIpBody { + Ephemeral(IpAddr), + Floating(IpAddr), +} + // Our SledRole and Baseboard types do not have to be identical to the Nexus // ones, but they generally should be, and this avoids duplication. If it // becomes easier to maintain a separate copy, we should do that. diff --git a/sled-agent/src/sim/http_entrypoints.rs b/sled-agent/src/sim/http_entrypoints.rs index e5d7752511..09ffdf5dc4 100644 --- a/sled-agent/src/sim/http_entrypoints.rs +++ b/sled-agent/src/sim/http_entrypoints.rs @@ -8,9 +8,10 @@ use crate::bootstrap::early_networking::{ EarlyNetworkConfig, EarlyNetworkConfigBody, }; use crate::params::{ - DiskEnsureBody, InstanceEnsureBody, InstancePutMigrationIdsBody, - InstancePutStateBody, InstancePutStateResponse, InstanceUnregisterResponse, - Inventory, OmicronZonesConfig, VpcFirewallRulesEnsureBody, + DiskEnsureBody, InstanceEnsureBody, InstanceExternalIpBody, + InstancePutMigrationIdsBody, InstancePutStateBody, + InstancePutStateResponse, InstanceUnregisterResponse, Inventory, + OmicronZonesConfig, VpcFirewallRulesEnsureBody, }; use dropshot::endpoint; use dropshot::ApiDescription; @@ -45,6 +46,8 @@ pub fn api() -> SledApiDescription { api.register(instance_put_state)?; api.register(instance_register)?; api.register(instance_unregister)?; + api.register(instance_put_external_ip)?; + api.register(instance_delete_external_ip)?; api.register(instance_poke_post)?; api.register(disk_put)?; api.register(disk_poke_post)?; @@ -152,6 +155,38 @@ async fn instance_put_migration_ids( )) } +#[endpoint { + method = PUT, + path = "/instances/{instance_id}/external-ip", +}] +async fn instance_put_external_ip( + rqctx: RequestContext>, + path_params: Path, + body: TypedBody, +) -> Result { + let sa = rqctx.context(); + let instance_id = path_params.into_inner().instance_id; + let body_args = body.into_inner(); + sa.instance_put_external_ip(instance_id, &body_args).await?; + Ok(HttpResponseUpdatedNoContent()) +} + +#[endpoint { + method = DELETE, + path = "/instances/{instance_id}/external-ip", +}] +async fn instance_delete_external_ip( + rqctx: RequestContext>, + path_params: Path, + body: TypedBody, +) -> Result { + let sa = rqctx.context(); + let instance_id = path_params.into_inner().instance_id; + let body_args = body.into_inner(); + sa.instance_delete_external_ip(instance_id, &body_args).await?; + Ok(HttpResponseUpdatedNoContent()) +} + #[endpoint { method = POST, path = "/instances/{instance_id}/poke", diff --git a/sled-agent/src/sim/sled_agent.rs b/sled-agent/src/sim/sled_agent.rs index 8a76bf6abc..56cfaf57c8 100644 --- a/sled-agent/src/sim/sled_agent.rs +++ b/sled-agent/src/sim/sled_agent.rs @@ -12,9 +12,10 @@ use super::storage::CrucibleData; use super::storage::Storage; use crate::nexus::NexusClient; use crate::params::{ - DiskStateRequested, InstanceHardware, InstanceMigrationSourceParams, - InstancePutStateResponse, InstanceStateRequested, - InstanceUnregisterResponse, Inventory, OmicronZonesConfig, SledRole, + DiskStateRequested, InstanceExternalIpBody, InstanceHardware, + InstanceMigrationSourceParams, InstancePutStateResponse, + InstanceStateRequested, InstanceUnregisterResponse, Inventory, + OmicronZonesConfig, SledRole, }; use crate::sim::simulatable::Simulatable; use crate::updates::UpdateManager; @@ -41,7 +42,7 @@ use propolis_client::{ }; use propolis_mock_server::Context as PropolisContext; use slog::Logger; -use std::collections::HashMap; +use std::collections::{HashMap, HashSet}; use std::net::{IpAddr, Ipv6Addr, SocketAddr}; use std::str::FromStr; use std::sync::Arc; @@ -69,6 +70,8 @@ pub struct SledAgent { pub v2p_mappings: Mutex>>, mock_propolis: Mutex>, PropolisClient)>>, + /// lists of external IPs assigned to instances + pub external_ips: Mutex>>, config: Config, fake_zones: Mutex, instance_ensure_state_error: Mutex>, @@ -162,6 +165,7 @@ impl SledAgent { nexus_client, disk_id_to_region_ids: Mutex::new(HashMap::new()), v2p_mappings: Mutex::new(HashMap::new()), + external_ips: Mutex::new(HashMap::new()), mock_propolis: Mutex::new(None), config: config.clone(), fake_zones: Mutex::new(OmicronZonesConfig { @@ -627,6 +631,58 @@ impl SledAgent { Ok(()) } + pub async fn instance_put_external_ip( + &self, + instance_id: Uuid, + body_args: &InstanceExternalIpBody, + ) -> Result<(), Error> { + if !self.instances.contains_key(&instance_id).await { + return Err(Error::internal_error( + "can't alter IP state for nonexistent instance", + )); + } + + let mut eips = self.external_ips.lock().await; + let my_eips = eips.entry(instance_id).or_default(); + + // High-level behaviour: this should always succeed UNLESS + // trying to add a double ephemeral. + if let InstanceExternalIpBody::Ephemeral(curr_ip) = &body_args { + if my_eips.iter().any(|v| { + if let InstanceExternalIpBody::Ephemeral(other_ip) = v { + curr_ip != other_ip + } else { + false + } + }) { + return Err(Error::invalid_request("cannot replace existing ephemeral IP without explicit removal")); + } + } + + my_eips.insert(*body_args); + + Ok(()) + } + + pub async fn instance_delete_external_ip( + &self, + instance_id: Uuid, + body_args: &InstanceExternalIpBody, + ) -> Result<(), Error> { + if !self.instances.contains_key(&instance_id).await { + return Err(Error::internal_error( + "can't alter IP state for nonexistent instance", + )); + } + + let mut eips = self.external_ips.lock().await; + let my_eips = eips.entry(instance_id).or_default(); + + my_eips.remove(&body_args); + + Ok(()) + } + /// Used for integration tests that require a component to talk to a /// mocked propolis-server API. // TODO: fix schemas so propolis-server's port isn't hardcoded in nexus diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index 71fe3584f0..eaf354db26 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -16,10 +16,11 @@ use crate::long_running_tasks::LongRunningTaskHandles; use crate::metrics::MetricsManager; use crate::nexus::{ConvertInto, NexusClientWithResolver, NexusRequestQueue}; use crate::params::{ - DiskStateRequested, InstanceHardware, InstanceMigrationSourceParams, - InstancePutStateResponse, InstanceStateRequested, - InstanceUnregisterResponse, Inventory, OmicronZonesConfig, SledRole, - TimeSync, VpcFirewallRule, ZoneBundleMetadata, Zpool, + DiskStateRequested, InstanceExternalIpBody, InstanceHardware, + InstanceMigrationSourceParams, InstancePutStateResponse, + InstanceStateRequested, InstanceUnregisterResponse, Inventory, + OmicronZonesConfig, SledRole, TimeSync, VpcFirewallRule, + ZoneBundleMetadata, Zpool, }; use crate::services::{self, ServiceManager}; use crate::storage_monitor::UnderlayAccess; @@ -979,6 +980,37 @@ impl SledAgent { .map_err(|e| Error::Instance(e)) } + /// Idempotently ensures that an instance's OPTE/port state includes the + /// specified external IP address. + /// + /// This method will return an error when trying to register an ephemeral IP which + /// does not match the current ephemeral IP. + pub async fn instance_put_external_ip( + &self, + instance_id: Uuid, + external_ip: &InstanceExternalIpBody, + ) -> Result<(), Error> { + self.inner + .instances + .add_external_ip(instance_id, external_ip) + .await + .map_err(|e| Error::Instance(e)) + } + + /// Idempotently ensures that an instance's OPTE/port state does not include the + /// specified external IP address in either its ephemeral or floating IP set. + pub async fn instance_delete_external_ip( + &self, + instance_id: Uuid, + external_ip: &InstanceExternalIpBody, + ) -> Result<(), Error> { + self.inner + .instances + .delete_external_ip(instance_id, external_ip) + .await + .map_err(|e| Error::Instance(e)) + } + /// Idempotently ensures that the given virtual disk is attached (or not) as /// specified. /// From e261a960cb365ad92f103a35b262713118ea6441 Mon Sep 17 00:00:00 2001 From: Rain Date: Wed, 24 Jan 2024 15:38:25 -0800 Subject: [PATCH 26/49] [meta] update samael to 0.0.14 (#4878) Required to unblock a bunch of other updates. The behavior of a test changed, but in a way that to my understanding based on [the Duo article](https://duo.com/blog/duo-finds-saml-vulnerabilities-affecting-multiple-implementations) is still safe. See the comment included in the PR for more. --- Cargo.lock | 18 +++++++-------- Cargo.toml | 2 +- nexus/tests/integration_tests/saml.rs | 33 +++++++++++++++++++++++---- workspace-hack/Cargo.toml | 2 -- 4 files changed, 38 insertions(+), 17 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 0c3eb15179..6ee028bbc5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -479,11 +479,11 @@ dependencies = [ [[package]] name = "bindgen" -version = "0.65.1" +version = "0.69.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cfdf7b466f9a4903edc73f95d6d2bcd5baf8ae620638762244d3f60143643cc5" +checksum = "a4c69fae65a523209d34240b60abe0c42d33d1045d445c0839d8a4894a736e2d" dependencies = [ - "bitflags 1.3.2", + "bitflags 2.4.0", "cexpr", "clang-sys", "lazy_static", @@ -5267,7 +5267,6 @@ dependencies = [ "sha2", "similar", "slog", - "snafu", "socket2 0.5.5", "spin 0.9.8", "string_cache", @@ -6496,9 +6495,9 @@ checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0" [[package]] name = "quick-xml" -version = "0.23.1" +version = "0.30.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "11bafc859c6815fbaffbbbf4229ecb767ac913fecb27f9ad4343662e9ef099ea" +checksum = "eff6510e86862b57b210fd8cbe8ed3f0d7d600b9c2863cd4549a2e033c66e956" dependencies = [ "memchr", "serde", @@ -7372,8 +7371,9 @@ dependencies = [ [[package]] name = "samael" -version = "0.0.10" -source = "git+https://github.com/njaremko/samael?branch=master#52028e45d11ceb7114bf0c730a9971207e965602" +version = "0.0.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b75583aad4a51c50fc0af69c230d18078c9d5a69a98d0f6013d01053acf744f4" dependencies = [ "base64", "bindgen", @@ -7391,7 +7391,7 @@ dependencies = [ "quick-xml", "rand 0.8.5", "serde", - "snafu", + "thiserror", "url", "uuid", ] diff --git a/Cargo.toml b/Cargo.toml index 093e972b42..ed54ae8c6a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -325,7 +325,7 @@ rustfmt-wrapper = "0.2" rustls = "0.22.2" rustls-pemfile = "2.0.0" rustyline = "12.0.0" -samael = { git = "https://github.com/njaremko/samael", features = ["xmlsec"], branch = "master" } +samael = { version = "0.0.14", features = ["xmlsec"] } schemars = "0.8.16" secrecy = "0.8.0" semver = { version = "1.0.21", features = ["std", "serde"] } diff --git a/nexus/tests/integration_tests/saml.rs b/nexus/tests/integration_tests/saml.rs index fc04bbf908..b1b0429c2e 100644 --- a/nexus/tests/integration_tests/saml.rs +++ b/nexus/tests/integration_tests/saml.rs @@ -964,12 +964,33 @@ fn test_reject_unsigned_saml_response() { assert!(result.is_err()); } -// Test rejecting a correct SAML response that contains a XML comment in -// saml:NameID. +// Test accepting a correct SAML response that contains a XML comment in +// saml:NameID, and ensuring that the full text node is extracted (and not a +// substring). // -// See: https://duo.com/blog/duo-finds-saml-vulnerabilities-affecting-multiple-implementations +// This used to be a test that _rejected_ such responses, but a change to an +// upstream dependency (quick-xml) caused the behavior around text nodes with +// embedded comments to change. Specifically, consider: +// +// user@example.com.evil.com +// +// What should the text node for this element be? +// +// * Some XML parsing libraries just return "user@example.com". That leads to a +// vulnerability, where an attacker can get a response signed with a +// different email address than intended. +// * Some XML libraries return "user@example.com.evil.com". This is safe, +// because the text after the comment hasn't been dropped. This is the behavior +// with quick-xml 0.30, and the one that we're testing here. +// * Some XML libraries are unable to deserialize the document. This is also +// safe (and not particularly problematic because typically SAML responses +// aren't going to contain comments), and was the behavior with quick-xml +// 0.23. +// +// See: +// https://duo.com/blog/duo-finds-saml-vulnerabilities-affecting-multiple-implementations #[test] -fn test_reject_saml_response_with_xml_comment() { +fn test_handle_saml_response_with_xml_comment() { let silo_saml_identity_provider = SamlIdentityProvider { idp_metadata_document_string: SAML_RESPONSE_IDP_DESCRIPTOR.to_string(), @@ -1004,7 +1025,9 @@ fn test_reject_saml_response_with_xml_comment() { ), ); - assert!(result.is_err()); + let (authenticated_subject, _) = + result.expect("expected validation to succeed"); + assert_eq!(authenticated_subject.external_id, "some@customer.com"); } // Test receiving a correct SAML response that has group attributes diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index cda4426c9b..25a72838a0 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -96,7 +96,6 @@ serde_json = { version = "1.0.111", features = ["raw_value", "unbounded_depth"] sha2 = { version = "0.10.8", features = ["oid"] } similar = { version = "2.3.0", features = ["inline", "unicode"] } slog = { version = "2.7.0", features = ["dynamic-keys", "max_level_trace", "release_max_level_debug", "release_max_level_trace"] } -snafu = { version = "0.7.5", features = ["futures"] } socket2 = { version = "0.5.5", default-features = false, features = ["all"] } spin = { version = "0.9.8" } string_cache = { version = "0.8.7" } @@ -204,7 +203,6 @@ serde_json = { version = "1.0.111", features = ["raw_value", "unbounded_depth"] sha2 = { version = "0.10.8", features = ["oid"] } similar = { version = "2.3.0", features = ["inline", "unicode"] } slog = { version = "2.7.0", features = ["dynamic-keys", "max_level_trace", "release_max_level_debug", "release_max_level_trace"] } -snafu = { version = "0.7.5", features = ["futures"] } socket2 = { version = "0.5.5", default-features = false, features = ["all"] } spin = { version = "0.9.8" } string_cache = { version = "0.8.7" } From 97318e91017124629c4231a7cdfc473a90f99270 Mon Sep 17 00:00:00 2001 From: Kyle Simpson Date: Thu, 25 Jan 2024 00:14:37 +0000 Subject: [PATCH 27/49] Correctly bump schema version for floating IP changes. (#4890) Two closely-occurring merges hit at high speed! This PR bumps the schema to 26.0.0, as was missed at the time. --- nexus/db-model/src/schema.rs | 2 +- schema/crdb/{25.0.0 => 26.0.0}/up01.sql | 0 schema/crdb/{25.0.0 => 26.0.0}/up02.sql | 0 schema/crdb/{25.0.0 => 26.0.0}/up03.sql | 0 schema/crdb/{25.0.0 => 26.0.0}/up04.sql | 0 schema/crdb/{25.0.0 => 26.0.0}/up05.sql | 0 schema/crdb/{25.0.0 => 26.0.0}/up06.sql | 0 schema/crdb/{25.0.0 => 26.0.0}/up07.sql | 0 schema/crdb/{25.0.0 => 26.0.0}/up08.sql | 0 schema/crdb/{25.0.0 => 26.0.0}/up09.sql | 0 schema/crdb/dbinit.sql | 2 +- 11 files changed, 2 insertions(+), 2 deletions(-) rename schema/crdb/{25.0.0 => 26.0.0}/up01.sql (100%) rename schema/crdb/{25.0.0 => 26.0.0}/up02.sql (100%) rename schema/crdb/{25.0.0 => 26.0.0}/up03.sql (100%) rename schema/crdb/{25.0.0 => 26.0.0}/up04.sql (100%) rename schema/crdb/{25.0.0 => 26.0.0}/up05.sql (100%) rename schema/crdb/{25.0.0 => 26.0.0}/up06.sql (100%) rename schema/crdb/{25.0.0 => 26.0.0}/up07.sql (100%) rename schema/crdb/{25.0.0 => 26.0.0}/up08.sql (100%) rename schema/crdb/{25.0.0 => 26.0.0}/up09.sql (100%) diff --git a/nexus/db-model/src/schema.rs b/nexus/db-model/src/schema.rs index 11cdf87f6c..954647f70d 100644 --- a/nexus/db-model/src/schema.rs +++ b/nexus/db-model/src/schema.rs @@ -13,7 +13,7 @@ use omicron_common::api::external::SemverVersion; /// /// This should be updated whenever the schema is changed. For more details, /// refer to: schema/crdb/README.adoc -pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(25, 0, 0); +pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(26, 0, 0); table! { disk (id) { diff --git a/schema/crdb/25.0.0/up01.sql b/schema/crdb/26.0.0/up01.sql similarity index 100% rename from schema/crdb/25.0.0/up01.sql rename to schema/crdb/26.0.0/up01.sql diff --git a/schema/crdb/25.0.0/up02.sql b/schema/crdb/26.0.0/up02.sql similarity index 100% rename from schema/crdb/25.0.0/up02.sql rename to schema/crdb/26.0.0/up02.sql diff --git a/schema/crdb/25.0.0/up03.sql b/schema/crdb/26.0.0/up03.sql similarity index 100% rename from schema/crdb/25.0.0/up03.sql rename to schema/crdb/26.0.0/up03.sql diff --git a/schema/crdb/25.0.0/up04.sql b/schema/crdb/26.0.0/up04.sql similarity index 100% rename from schema/crdb/25.0.0/up04.sql rename to schema/crdb/26.0.0/up04.sql diff --git a/schema/crdb/25.0.0/up05.sql b/schema/crdb/26.0.0/up05.sql similarity index 100% rename from schema/crdb/25.0.0/up05.sql rename to schema/crdb/26.0.0/up05.sql diff --git a/schema/crdb/25.0.0/up06.sql b/schema/crdb/26.0.0/up06.sql similarity index 100% rename from schema/crdb/25.0.0/up06.sql rename to schema/crdb/26.0.0/up06.sql diff --git a/schema/crdb/25.0.0/up07.sql b/schema/crdb/26.0.0/up07.sql similarity index 100% rename from schema/crdb/25.0.0/up07.sql rename to schema/crdb/26.0.0/up07.sql diff --git a/schema/crdb/25.0.0/up08.sql b/schema/crdb/26.0.0/up08.sql similarity index 100% rename from schema/crdb/25.0.0/up08.sql rename to schema/crdb/26.0.0/up08.sql diff --git a/schema/crdb/25.0.0/up09.sql b/schema/crdb/26.0.0/up09.sql similarity index 100% rename from schema/crdb/25.0.0/up09.sql rename to schema/crdb/26.0.0/up09.sql diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index 86d88f5fe9..79a43d3c89 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -3296,7 +3296,7 @@ INSERT INTO omicron.public.db_metadata ( version, target_version ) VALUES - ( TRUE, NOW(), NOW(), '25.0.0', NULL) + ( TRUE, NOW(), NOW(), '26.0.0', NULL) ON CONFLICT DO NOTHING; COMMIT; From 9e08978c5d932a8fe89c248abae06ff54161daf9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Karen=20C=C3=A1rcamo?= Date: Thu, 25 Jan 2024 16:43:45 +1300 Subject: [PATCH 28/49] [sled-agent] Oximeter self-assembling zone (#4534) Related https://github.com/oxidecomputer/omicron/issues/1898 Closes: https://github.com/oxidecomputer/omicron/issues/2883 --- .github/buildomat/jobs/package.sh | 2 +- oximeter/db/schema/README.md | 2 +- package-manifest.toml | 10 +++- sled-agent/src/services.rs | 98 ++++++++++++++++++++++++++----- smf/oximeter/manifest.xml | 13 +++- 5 files changed, 103 insertions(+), 22 deletions(-) diff --git a/.github/buildomat/jobs/package.sh b/.github/buildomat/jobs/package.sh index b4d10891b9..79590a44df 100755 --- a/.github/buildomat/jobs/package.sh +++ b/.github/buildomat/jobs/package.sh @@ -117,7 +117,7 @@ zones=( out/internal-dns.tar.gz out/omicron-nexus.tar.gz out/omicron-nexus-single-sled.tar.gz - out/oximeter-collector.tar.gz + out/oximeter.tar.gz out/propolis-server.tar.gz out/switch-*.tar.gz out/ntp.tar.gz diff --git a/oximeter/db/schema/README.md b/oximeter/db/schema/README.md index 2f1633138d..929144bccf 100644 --- a/oximeter/db/schema/README.md +++ b/oximeter/db/schema/README.md @@ -32,7 +32,7 @@ To run this program: - Run this tool, pointing it at the desired schema directory, e.g.: ```bash -# /opt/oxide/oximeter/bin/clickhouse-schema-updater \ +# /opt/oxide/oximeter-collector/bin/clickhouse-schema-updater \ --host \ --schema-dir /opt/oxide/oximeter/sql up VERSION diff --git a/package-manifest.toml b/package-manifest.toml index 3525b121e4..36e43157f9 100644 --- a/package-manifest.toml +++ b/package-manifest.toml @@ -116,9 +116,16 @@ setup_hint = """ - Run `pkg install library/postgresql-13` to download Postgres libraries """ -[package.oximeter-collector] +[package.oximeter] service_name = "oximeter" only_for_targets.image = "standard" +source.type = "composite" +source.packages = [ "oximeter-collector.tar.gz", "zone-network-setup.tar.gz" ] +output.type = "zone" + +[package.oximeter-collector] +service_name = "oximeter-collector" +only_for_targets.image = "standard" source.type = "local" source.rust.binary_names = ["oximeter", "clickhouse-schema-updater"] source.rust.release = true @@ -127,6 +134,7 @@ source.paths = [ { from = "oximeter/db/schema", to = "/opt/oxide/oximeter/schema" }, ] output.type = "zone" +output.intermediate_only = true [package.clickhouse] service_name = "clickhouse" diff --git a/sled-agent/src/services.rs b/sled-agent/src/services.rs index 211e602bbf..77b6bcbed4 100644 --- a/sled-agent/src/services.rs +++ b/sled-agent/src/services.rs @@ -61,7 +61,6 @@ use illumos_utils::zone::Zones; use illumos_utils::{execute, PFEXEC}; use internal_dns::resolver::Resolver; use itertools::Itertools; -use omicron_common::address::AZ_PREFIX; use omicron_common::address::BOOTSTRAP_ARTIFACT_PORT; use omicron_common::address::CLICKHOUSE_KEEPER_PORT; use omicron_common::address::CLICKHOUSE_PORT; @@ -75,6 +74,7 @@ use omicron_common::address::SLED_PREFIX; use omicron_common::address::WICKETD_NEXUS_PROXY_PORT; use omicron_common::address::WICKETD_PORT; use omicron_common::address::{Ipv6Subnet, NEXUS_TECHPORT_EXTERNAL_PORT}; +use omicron_common::address::{AZ_PREFIX, OXIMETER_PORT}; use omicron_common::api::external::Generation; use omicron_common::api::internal::shared::{ HostPortConfig, RackNetworkConfig, @@ -1798,7 +1798,55 @@ impl ServiceManager { let running_zone = RunningZone::boot(installed_zone).await?; return Ok(running_zone); } + ZoneArgs::Omicron(OmicronZoneConfigLocal { + zone: + OmicronZoneConfig { + id, + zone_type: OmicronZoneType::Oximeter { .. }, + underlay_address, + .. + }, + .. + }) => { + let Some(info) = self.inner.sled_info.get() else { + return Err(Error::SledAgentNotReady); + }; + + // Configure the Oximeter service. + let address = SocketAddr::new( + IpAddr::V6(*underlay_address), + OXIMETER_PORT, + ); + + let listen_addr = &address.ip().to_string(); + + let nw_setup_service = Self::zone_network_setup_install( + info, + &installed_zone, + listen_addr, + )?; + + let oximeter_config = PropertyGroupBuilder::new("config") + .add_property("id", "astring", &id.to_string()) + .add_property("address", "astring", &address.to_string()); + let oximeter_service = ServiceBuilder::new("oxide/oximeter") + .add_instance( + ServiceInstanceBuilder::new("default") + .add_property_group(oximeter_config), + ); + let profile = ProfileBuilder::new("omicron") + .add_service(nw_setup_service) + .add_service(disabled_ssh_service) + .add_service(oximeter_service); + profile + .add_to_zone(&self.inner.log, &installed_zone) + .await + .map_err(|err| { + Error::io("Failed to setup Oximeter profile", err) + })?; + return Ok(RunningZone::boot(installed_zone).await?); + } _ => {} } @@ -2154,14 +2202,6 @@ impl ServiceManager { // service is enabled. smfh.refresh()?; } - - OmicronZoneType::Oximeter { address } => { - info!(self.inner.log, "Setting up oximeter service"); - smfh.setprop("config/id", zone_config.zone.id)?; - smfh.setprop("config/address", address.to_string())?; - smfh.refresh()?; - } - OmicronZoneType::BoundaryNtp { ntp_servers, dns_servers, @@ -2227,7 +2267,8 @@ impl ServiceManager { | OmicronZoneType::ClickhouseKeeper { .. } | OmicronZoneType::CockroachDb { .. } | OmicronZoneType::Crucible { .. } - | OmicronZoneType::CruciblePantry { .. } => { + | OmicronZoneType::CruciblePantry { .. } + | OmicronZoneType::Oximeter { .. } => { panic!( "{} is a service which exists as part of a \ self-assembling zone", @@ -3729,7 +3770,7 @@ mod test { const GLOBAL_ZONE_BOOTSTRAP_IP: Ipv6Addr = Ipv6Addr::LOCALHOST; const SWITCH_ZONE_BOOTSTRAP_IP: Ipv6Addr = Ipv6Addr::LOCALHOST; - const EXPECTED_ZONE_NAME_PREFIX: &str = "oxz_oximeter"; + const EXPECTED_ZONE_NAME_PREFIX: &str = "oxz_ntp"; const EXPECTED_PORT: u16 = 12223; fn make_bootstrap_networking_config() -> BootstrapNetworking { @@ -3906,7 +3947,12 @@ mod test { mgr, id, generation, - OmicronZoneType::Oximeter { address }, + OmicronZoneType::InternalNtp { + address, + ntp_servers: vec![], + dns_servers: vec![], + domain: None, + }, ) .await .expect("Could not create service"); @@ -3945,7 +3991,12 @@ mod test { zones: vec![OmicronZoneConfig { id, underlay_address: Ipv6Addr::LOCALHOST, - zone_type: OmicronZoneType::Oximeter { address }, + zone_type: OmicronZoneType::InternalNtp { + address, + ntp_servers: vec![], + dns_servers: vec![], + domain: None, + }, }], }) .await @@ -4314,7 +4365,12 @@ mod test { let mut zones = vec![OmicronZoneConfig { id: id1, underlay_address: Ipv6Addr::LOCALHOST, - zone_type: OmicronZoneType::Oximeter { address }, + zone_type: OmicronZoneType::InternalNtp { + address, + ntp_servers: vec![], + dns_servers: vec![], + domain: None, + }, }]; mgr.ensure_all_omicron_zones_persistent(OmicronZonesConfig { generation: v2, @@ -4335,7 +4391,12 @@ mod test { zones.push(OmicronZoneConfig { id: id2, underlay_address: Ipv6Addr::LOCALHOST, - zone_type: OmicronZoneType::Oximeter { address }, + zone_type: OmicronZoneType::InternalNtp { + address, + ntp_servers: vec![], + dns_servers: vec![], + domain: None, + }, }); // Now try to apply that list with an older generation number. This @@ -4508,7 +4569,12 @@ mod test { zones.push(OmicronZoneConfig { id, underlay_address: Ipv6Addr::LOCALHOST, - zone_type: OmicronZoneType::Oximeter { address }, + zone_type: OmicronZoneType::InternalNtp { + address, + ntp_servers: vec![], + dns_servers: vec![], + domain: None, + }, }); mgr.ensure_all_omicron_zones_persistent(OmicronZonesConfig { generation: vv, diff --git a/smf/oximeter/manifest.xml b/smf/oximeter/manifest.xml index 9c8b30f1f4..fe6c9ac23a 100644 --- a/smf/oximeter/manifest.xml +++ b/smf/oximeter/manifest.xml @@ -4,21 +4,28 @@ - + + + + + + exec='ctrun -l child -o noorphan,regent /opt/oxide/oximeter-collector/bin/oximeter run /var/svc/manifest/site/oximeter/config.toml --address %{config/address} --id %{config/id} &' + timeout_seconds='0'> + + From e69e6f68154f6efe923f5fa74ef719a26feb17b4 Mon Sep 17 00:00:00 2001 From: Rain Date: Wed, 24 Jan 2024 20:58:40 -0800 Subject: [PATCH 29/49] [nexus] add support for ingesting TUF repos (#4690) Implement basic support for uploading TUF repos via an endpoint. The PR looks pretty big but most of it is fairly mechanical addition and removal (and much of it has to be done in one go because of internal dependencies.) Also include a few more changes: - Move more code to update-common. - Move the `by_hash` and `by_id` maps to update-common's `UpdatePlanBuilder`. - Remove old update-related code and database migrations that will be replaced by newer blueprint design. (This is the vast majority of deleted code, and ideally would be a separate PR except it's a bit inconvenient to have a PR stack with multiple schema migrations.) This PR **does not include** actually storing TUF repos and replicating them among sleds. That work has been deprioritized for now, to instead focus on sled addition and removal. --- Cargo.lock | 7 + clients/nexus-client/src/lib.rs | 26 - common/src/api/external/mod.rs | 105 +- common/src/nexus_config.rs | 4 - common/src/update.rs | 7 + nexus/Cargo.toml | 5 + nexus/db-model/src/lib.rs | 6 +- nexus/db-model/src/schema.rs | 84 +- nexus/db-model/src/semver_version.rs | 2 + nexus/db-model/src/system_update.rs | 306 ------ nexus/db-model/src/tuf_repo.rs | 312 ++++++ nexus/db-model/src/update_artifact.rs | 62 -- nexus/db-queries/Cargo.toml | 1 + nexus/db-queries/src/authz/api_resources.rs | 26 +- nexus/db-queries/src/authz/oso_generic.rs | 5 +- .../src/authz/policy_test/resources.rs | 26 +- nexus/db-queries/src/db/datastore/mod.rs | 117 +-- nexus/db-queries/src/db/datastore/update.rs | 638 ++++++------ nexus/db-queries/src/db/lookup.rs | 50 +- nexus/db-queries/src/db/pool_connection.rs | 3 - nexus/db-queries/tests/output/authz-roles.out | 4 +- nexus/examples/config.toml | 10 +- nexus/src/app/mod.rs | 1 + nexus/src/app/rack.rs | 3 - nexus/src/app/update/mod.rs | 965 +----------------- nexus/src/external_api/http_entrypoints.rs | 343 +------ nexus/src/internal_api/http_entrypoints.rs | 9 +- nexus/src/lib.rs | 1 - nexus/src/updates.rs | 74 -- nexus/test-utils/Cargo.toml | 1 + nexus/test-utils/src/http_testing.rs | 30 + nexus/tests/integration_tests/endpoints.rs | 83 +- nexus/tests/integration_tests/mod.rs | 1 - .../tests/integration_tests/system_updates.rs | 219 ---- nexus/tests/integration_tests/updates.rs | 524 ++++++---- .../output/unexpected-authz-endpoints.txt | 12 +- nexus/types/src/external_api/params.rs | 31 +- nexus/types/src/external_api/views.rs | 61 +- openapi/nexus-internal.json | 19 +- schema/crdb/27.0.0/up01.sql | 1 + schema/crdb/27.0.0/up02.sql | 1 + schema/crdb/27.0.0/up03.sql | 1 + schema/crdb/27.0.0/up04.sql | 1 + schema/crdb/27.0.0/up05.sql | 1 + schema/crdb/27.0.0/up06.sql | 1 + schema/crdb/27.0.0/up07.sql | 1 + schema/crdb/27.0.0/up08.sql | 1 + schema/crdb/27.0.0/up09.sql | 1 + schema/crdb/27.0.0/up10.sql | 33 + schema/crdb/27.0.0/up11.sql | 23 + schema/crdb/27.0.0/up12.sql | 21 + schema/crdb/dbinit.sql | 224 ++-- sled-agent/src/updates.rs | 2 +- tufaceous-lib/src/assemble/manifest.rs | 125 +++ update-common/Cargo.toml | 1 + .../src/artifacts/artifacts_with_plan.rs | 221 +++- .../src/artifacts/extracted_artifacts.rs | 4 +- update-common/src/artifacts/update_plan.rs | 228 ++--- update-common/src/errors.rs | 27 +- wicketd/src/artifacts/store.rs | 22 +- wicketd/src/http_entrypoints.rs | 42 +- wicketd/src/update_tracker.rs | 36 +- 62 files changed, 1896 insertions(+), 3305 deletions(-) delete mode 100644 nexus/db-model/src/system_update.rs create mode 100644 nexus/db-model/src/tuf_repo.rs delete mode 100644 nexus/db-model/src/update_artifact.rs delete mode 100644 nexus/src/updates.rs delete mode 100644 nexus/tests/integration_tests/system_updates.rs create mode 100644 schema/crdb/27.0.0/up01.sql create mode 100644 schema/crdb/27.0.0/up02.sql create mode 100644 schema/crdb/27.0.0/up03.sql create mode 100644 schema/crdb/27.0.0/up04.sql create mode 100644 schema/crdb/27.0.0/up05.sql create mode 100644 schema/crdb/27.0.0/up06.sql create mode 100644 schema/crdb/27.0.0/up07.sql create mode 100644 schema/crdb/27.0.0/up08.sql create mode 100644 schema/crdb/27.0.0/up09.sql create mode 100644 schema/crdb/27.0.0/up10.sql create mode 100644 schema/crdb/27.0.0/up11.sql create mode 100644 schema/crdb/27.0.0/up12.sql diff --git a/Cargo.lock b/Cargo.lock index 6ee028bbc5..c2f3e1a949 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4273,6 +4273,7 @@ dependencies = [ "steno", "strum", "subprocess", + "swrite", "term", "thiserror", "tokio", @@ -4387,6 +4388,7 @@ dependencies = [ "serde_urlencoded", "slog", "tokio", + "tokio-util", "trust-dns-resolver", "uuid", ] @@ -4847,6 +4849,7 @@ dependencies = [ "async-trait", "base64", "buf-list", + "bytes", "camino", "camino-tempfile", "cancel-safe-futures", @@ -4947,6 +4950,9 @@ dependencies = [ "tokio-postgres", "tough", "trust-dns-resolver", + "tufaceous", + "tufaceous-lib", + "update-common", "uuid", ] @@ -9570,6 +9576,7 @@ dependencies = [ "bytes", "camino", "camino-tempfile", + "chrono", "clap 4.4.3", "debug-ignore", "display-error-chain", diff --git a/clients/nexus-client/src/lib.rs b/clients/nexus-client/src/lib.rs index 1e1cbc31e7..17fb5aa367 100644 --- a/clients/nexus-client/src/lib.rs +++ b/clients/nexus-client/src/lib.rs @@ -236,32 +236,6 @@ impl From } } -impl From - for types::KnownArtifactKind -{ - fn from( - s: omicron_common::api::internal::nexus::KnownArtifactKind, - ) -> Self { - use omicron_common::api::internal::nexus::KnownArtifactKind; - - match s { - KnownArtifactKind::GimletSp => types::KnownArtifactKind::GimletSp, - KnownArtifactKind::GimletRot => types::KnownArtifactKind::GimletRot, - KnownArtifactKind::Host => types::KnownArtifactKind::Host, - KnownArtifactKind::Trampoline => { - types::KnownArtifactKind::Trampoline - } - KnownArtifactKind::ControlPlane => { - types::KnownArtifactKind::ControlPlane - } - KnownArtifactKind::PscSp => types::KnownArtifactKind::PscSp, - KnownArtifactKind::PscRot => types::KnownArtifactKind::PscRot, - KnownArtifactKind::SwitchSp => types::KnownArtifactKind::SwitchSp, - KnownArtifactKind::SwitchRot => types::KnownArtifactKind::SwitchRot, - } - } -} - impl From for types::Duration { fn from(s: std::time::Duration) -> Self { Self { secs: s.as_secs(), nanos: s.subsec_nanos() } diff --git a/common/src/api/external/mod.rs b/common/src/api/external/mod.rs index a8aff00afa..dc3537fbb2 100644 --- a/common/src/api/external/mod.rs +++ b/common/src/api/external/mod.rs @@ -13,6 +13,8 @@ use dropshot::HttpError; pub use error::*; pub use crate::api::internal::shared::SwitchLocation; +use crate::update::ArtifactHash; +use crate::update::ArtifactId; use anyhow::anyhow; use anyhow::Context; use api_identity::ObjectIdentity; @@ -760,13 +762,9 @@ pub enum ResourceType { Oximeter, MetricProducer, RoleBuiltin, - UpdateArtifact, + TufRepo, + TufArtifact, SwitchPort, - SystemUpdate, - ComponentUpdate, - SystemUpdateComponentUpdate, - UpdateDeployment, - UpdateableComponent, UserBuiltin, Zpool, Vmm, @@ -2625,6 +2623,101 @@ pub struct BgpImportedRouteIpv4 { pub switch: SwitchLocation, } +/// A description of an uploaded TUF repository. +#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize, JsonSchema)] +pub struct TufRepoDescription { + // Information about the repository. + pub repo: TufRepoMeta, + + // Information about the artifacts present in the repository. + pub artifacts: Vec, +} + +impl TufRepoDescription { + /// Sorts the artifacts so that descriptions can be compared. + pub fn sort_artifacts(&mut self) { + self.artifacts.sort_by(|a, b| a.id.cmp(&b.id)); + } +} + +/// Metadata about a TUF repository. +/// +/// Found within a [`TufRepoDescription`]. +#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize, JsonSchema)] +pub struct TufRepoMeta { + /// The hash of the repository. + /// + /// This is a slight abuse of `ArtifactHash`, since that's the hash of + /// individual artifacts within the repository. However, we use it here for + /// convenience. + pub hash: ArtifactHash, + + /// The version of the targets role. + pub targets_role_version: u64, + + /// The time until which the repo is valid. + pub valid_until: DateTime, + + /// The system version in artifacts.json. + pub system_version: SemverVersion, + + /// The file name of the repository. + /// + /// This is purely used for debugging and may not always be correct (e.g. + /// with wicket, we read the file contents from stdin so we don't know the + /// correct file name). + pub file_name: String, +} + +/// Metadata about an individual TUF artifact. +/// +/// Found within a [`TufRepoDescription`]. +#[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize, JsonSchema)] +pub struct TufArtifactMeta { + /// The artifact ID. + pub id: ArtifactId, + + /// The hash of the artifact. + pub hash: ArtifactHash, + + /// The size of the artifact in bytes. + pub size: u64, +} + +/// Data about a successful TUF repo import into Nexus. +#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema)] +#[serde(rename_all = "snake_case")] +pub struct TufRepoInsertResponse { + /// The repository as present in the database. + pub recorded: TufRepoDescription, + + /// Whether this repository already existed or is new. + pub status: TufRepoInsertStatus, +} + +/// Status of a TUF repo import. +/// +/// Part of [`TufRepoInsertResponse`]. +#[derive( + Debug, Clone, Copy, PartialEq, Eq, Deserialize, Serialize, JsonSchema, +)] +#[serde(rename_all = "snake_case")] +pub enum TufRepoInsertStatus { + /// The repository already existed in the database. + AlreadyExists, + + /// The repository did not exist, and was inserted into the database. + Inserted, +} + +/// Data about a successful TUF repo get from Nexus. +#[derive(Debug, Clone, Deserialize, Serialize, JsonSchema)] +#[serde(rename_all = "snake_case")] +pub struct TufRepoGetResponse { + /// The description of the repository. + pub description: TufRepoDescription, +} + #[cfg(test)] mod test { use serde::Deserialize; diff --git a/common/src/nexus_config.rs b/common/src/nexus_config.rs index 7f26bd84b0..be4b05ffdf 100644 --- a/common/src/nexus_config.rs +++ b/common/src/nexus_config.rs @@ -213,8 +213,6 @@ pub struct ConsoleConfig { pub struct UpdatesConfig { /// Trusted root.json role for the TUF updates repository. pub trusted_root: Utf8PathBuf, - /// Default base URL for the TUF repository. - pub default_base_url: String, } /// Options to tweak database schema changes. @@ -631,7 +629,6 @@ mod test { address = "[::1]:8123" [updates] trusted_root = "/path/to/root.json" - default_base_url = "http://example.invalid/" [tunables] max_vpc_ipv4_subnet_prefix = 27 [deployment] @@ -728,7 +725,6 @@ mod test { }, updates: Some(UpdatesConfig { trusted_root: Utf8PathBuf::from("/path/to/root.json"), - default_base_url: "http://example.invalid/".into(), }), schema: None, tunables: Tunables { max_vpc_ipv4_subnet_prefix: 27 }, diff --git a/common/src/update.rs b/common/src/update.rs index 28d5ae50a6..9feff1f868 100644 --- a/common/src/update.rs +++ b/common/src/update.rs @@ -95,6 +95,13 @@ pub struct ArtifactId { pub kind: ArtifactKind, } +/// Used for user-friendly messages. +impl fmt::Display for ArtifactId { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{} v{} ({})", self.name, self.version, self.kind) + } +} + /// A hash-based identifier for an artifact. /// /// Some places, e.g. the installinator, request artifacts by hash rather than diff --git a/nexus/Cargo.toml b/nexus/Cargo.toml index 52ee7034dd..87703cce77 100644 --- a/nexus/Cargo.toml +++ b/nexus/Cargo.toml @@ -13,8 +13,10 @@ assert_matches.workspace = true async-trait.workspace = true base64.workspace = true buf-list.workspace = true +bytes.workspace = true cancel-safe-futures.workspace = true camino.workspace = true +camino-tempfile.workspace = true clap.workspace = true chrono.workspace = true crucible-agent-client.workspace = true @@ -88,6 +90,7 @@ oximeter-instruments = { workspace = true, features = ["http-instruments"] } oximeter-producer.workspace = true rustls = { workspace = true } rustls-pemfile = { workspace = true } +update-common.workspace = true omicron-workspace-hack.workspace = true [dev-dependencies] @@ -120,6 +123,8 @@ rustls = { workspace = true } subprocess.workspace = true term.workspace = true trust-dns-resolver.workspace = true +tufaceous.workspace = true +tufaceous-lib.workspace = true httptest.workspace = true strum.workspace = true diff --git a/nexus/db-model/src/lib.rs b/nexus/db-model/src/lib.rs index 8fdf05e876..5c0a68c253 100644 --- a/nexus/db-model/src/lib.rs +++ b/nexus/db-model/src/lib.rs @@ -49,7 +49,6 @@ mod project; mod semver_version; mod switch_interface; mod switch_port; -mod system_update; // These actually represent subqueries, not real table. // However, they must be defined in the same crate as our tables // for join-based marker trait generation. @@ -78,8 +77,8 @@ mod sled_underlay_subnet_allocation; mod snapshot; mod ssh_key; mod switch; +mod tuf_repo; mod unsigned; -mod update_artifact; mod user_builtin; mod utilization; mod virtual_provisioning_collection; @@ -165,8 +164,7 @@ pub use ssh_key::*; pub use switch::*; pub use switch_interface::*; pub use switch_port::*; -pub use system_update::*; -pub use update_artifact::*; +pub use tuf_repo::*; pub use user_builtin::*; pub use utilization::*; pub use virtual_provisioning_collection::*; diff --git a/nexus/db-model/src/schema.rs b/nexus/db-model/src/schema.rs index 954647f70d..eb71a12f04 100644 --- a/nexus/db-model/src/schema.rs +++ b/nexus/db-model/src/schema.rs @@ -13,7 +13,7 @@ use omicron_common::api::external::SemverVersion; /// /// This should be updated whenever the schema is changed. For more details, /// refer to: schema/crdb/README.adoc -pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(26, 0, 0); +pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(27, 0, 0); table! { disk (id) { @@ -1177,72 +1177,45 @@ table! { } table! { - update_artifact (name, version, kind) { - name -> Text, - version -> Text, - kind -> crate::KnownArtifactKindEnum, + tuf_repo (id) { + id -> Uuid, + time_created -> Timestamptz, + sha256 -> Text, targets_role_version -> Int8, valid_until -> Timestamptz, - target_name -> Text, - target_sha256 -> Text, - target_length -> Int8, + system_version -> Text, + file_name -> Text, } } table! { - system_update (id) { - id -> Uuid, - time_created -> Timestamptz, - time_modified -> Timestamptz, - + tuf_artifact (name, version, kind) { + name -> Text, version -> Text, - } -} - -table! { - update_deployment (id) { - id -> Uuid, + kind -> Text, time_created -> Timestamptz, - time_modified -> Timestamptz, - - version -> Text, - status -> crate::UpdateStatusEnum, - // TODO: status reason for updateable_component + sha256 -> Text, + artifact_size -> Int8, } } table! { - component_update (id) { - id -> Uuid, - time_created -> Timestamptz, - time_modified -> Timestamptz, - - version -> Text, - component_type -> crate::UpdateableComponentTypeEnum, - } -} - -table! { - updateable_component (id) { - id -> Uuid, - time_created -> Timestamptz, - time_modified -> Timestamptz, - - device_id -> Text, - version -> Text, - system_version -> Text, - component_type -> crate::UpdateableComponentTypeEnum, - status -> crate::UpdateStatusEnum, - // TODO: status reason for updateable_component + tuf_repo_artifact (tuf_repo_id, tuf_artifact_name, tuf_artifact_version, tuf_artifact_kind) { + tuf_repo_id -> Uuid, + tuf_artifact_name -> Text, + tuf_artifact_version -> Text, + tuf_artifact_kind -> Text, } } -table! { - system_update_component_update (system_update_id, component_update_id) { - system_update_id -> Uuid, - component_update_id -> Uuid, - } -} +allow_tables_to_appear_in_same_query!( + tuf_repo, + tuf_repo_artifact, + tuf_artifact +); +joinable!(tuf_repo_artifact -> tuf_repo (tuf_repo_id)); +// Can't specify joinable for a composite primary key (tuf_repo_artifact -> +// tuf_artifact). /* hardware inventory */ @@ -1432,13 +1405,6 @@ table! { } } -allow_tables_to_appear_in_same_query!( - system_update, - component_update, - system_update_component_update, -); -joinable!(system_update_component_update -> component_update (component_update_id)); - allow_tables_to_appear_in_same_query!(ip_pool_range, ip_pool, ip_pool_resource); joinable!(ip_pool_range -> ip_pool (ip_pool_id)); joinable!(ip_pool_resource -> ip_pool (ip_pool_id)); diff --git a/nexus/db-model/src/semver_version.rs b/nexus/db-model/src/semver_version.rs index 8e168e11a2..f314e98ab3 100644 --- a/nexus/db-model/src/semver_version.rs +++ b/nexus/db-model/src/semver_version.rs @@ -24,6 +24,8 @@ use serde::{Deserialize, Serialize}; Serialize, Deserialize, PartialEq, + Eq, + Hash, Display, )] #[diesel(sql_type = sql_types::Text)] diff --git a/nexus/db-model/src/system_update.rs b/nexus/db-model/src/system_update.rs deleted file mode 100644 index 17421936b1..0000000000 --- a/nexus/db-model/src/system_update.rs +++ /dev/null @@ -1,306 +0,0 @@ -// This Source Code Form is subject to the terms of the Mozilla Public -// License, v. 2.0. If a copy of the MPL was not distributed with this -// file, You can obtain one at https://mozilla.org/MPL/2.0/. - -use crate::{ - impl_enum_type, - schema::{ - component_update, system_update, system_update_component_update, - update_deployment, updateable_component, - }, - SemverVersion, -}; -use db_macros::Asset; -use nexus_types::{ - external_api::{params, shared, views}, - identity::Asset, -}; -use omicron_common::api::external; -use serde::{Deserialize, Serialize}; -use uuid::Uuid; - -#[derive( - Queryable, - Insertable, - Selectable, - Clone, - Debug, - Asset, - Serialize, - Deserialize, -)] -#[diesel(table_name = system_update)] -pub struct SystemUpdate { - #[diesel(embed)] - pub identity: SystemUpdateIdentity, - pub version: SemverVersion, -} - -impl SystemUpdate { - /// Can fail if version numbers are too high. - pub fn new( - version: external::SemverVersion, - ) -> Result { - Ok(Self { - identity: SystemUpdateIdentity::new(Uuid::new_v4()), - version: SemverVersion(version), - }) - } -} - -impl From for views::SystemUpdate { - fn from(system_update: SystemUpdate) -> Self { - Self { - identity: system_update.identity(), - version: system_update.version.into(), - } - } -} - -impl_enum_type!( - #[derive(SqlType, Debug, QueryId)] - #[diesel(postgres_type(name = "update_status", schema = "public"))] - pub struct UpdateStatusEnum; - - #[derive(Copy, Clone, Debug, AsExpression, FromSqlRow, Serialize, Deserialize, PartialEq)] - #[diesel(sql_type = UpdateStatusEnum)] - pub enum UpdateStatus; - - Updating => b"updating" - Steady => b"steady" -); - -impl From for views::UpdateStatus { - fn from(status: UpdateStatus) -> Self { - match status { - UpdateStatus::Updating => Self::Updating, - UpdateStatus::Steady => Self::Steady, - } - } -} - -impl_enum_type!( - #[derive(SqlType, Debug, QueryId)] - #[diesel(postgres_type(name = "updateable_component_type", schema = "public"))] - pub struct UpdateableComponentTypeEnum; - - #[derive(Copy, Clone, Debug, AsExpression, FromSqlRow, Serialize, Deserialize, PartialEq)] - #[diesel(sql_type = UpdateableComponentTypeEnum)] - pub enum UpdateableComponentType; - - BootloaderForRot => b"bootloader_for_rot" - BootloaderForSp => b"bootloader_for_sp" - BootloaderForHostProc => b"bootloader_for_host_proc" - HubrisForPscRot => b"hubris_for_psc_rot" - HubrisForPscSp => b"hubris_for_psc_sp" - HubrisForSidecarRot => b"hubris_for_sidecar_rot" - HubrisForSidecarSp => b"hubris_for_sidecar_sp" - HubrisForGimletRot => b"hubris_for_gimlet_rot" - HubrisForGimletSp => b"hubris_for_gimlet_sp" - HeliosHostPhase1 => b"helios_host_phase_1" - HeliosHostPhase2 => b"helios_host_phase_2" - HostOmicron => b"host_omicron" -); - -impl From for UpdateableComponentType { - fn from(component_type: shared::UpdateableComponentType) -> Self { - match component_type { - shared::UpdateableComponentType::BootloaderForRot => { - UpdateableComponentType::BootloaderForRot - } - shared::UpdateableComponentType::BootloaderForSp => { - UpdateableComponentType::BootloaderForSp - } - shared::UpdateableComponentType::BootloaderForHostProc => { - UpdateableComponentType::BootloaderForHostProc - } - shared::UpdateableComponentType::HubrisForPscRot => { - UpdateableComponentType::HubrisForPscRot - } - shared::UpdateableComponentType::HubrisForPscSp => { - UpdateableComponentType::HubrisForPscSp - } - shared::UpdateableComponentType::HubrisForSidecarRot => { - UpdateableComponentType::HubrisForSidecarRot - } - shared::UpdateableComponentType::HubrisForSidecarSp => { - UpdateableComponentType::HubrisForSidecarSp - } - shared::UpdateableComponentType::HubrisForGimletRot => { - UpdateableComponentType::HubrisForGimletRot - } - shared::UpdateableComponentType::HubrisForGimletSp => { - UpdateableComponentType::HubrisForGimletSp - } - shared::UpdateableComponentType::HeliosHostPhase1 => { - UpdateableComponentType::HeliosHostPhase1 - } - shared::UpdateableComponentType::HeliosHostPhase2 => { - UpdateableComponentType::HeliosHostPhase2 - } - shared::UpdateableComponentType::HostOmicron => { - UpdateableComponentType::HostOmicron - } - } - } -} - -impl Into for UpdateableComponentType { - fn into(self) -> shared::UpdateableComponentType { - match self { - UpdateableComponentType::BootloaderForRot => { - shared::UpdateableComponentType::BootloaderForRot - } - UpdateableComponentType::BootloaderForSp => { - shared::UpdateableComponentType::BootloaderForSp - } - UpdateableComponentType::BootloaderForHostProc => { - shared::UpdateableComponentType::BootloaderForHostProc - } - UpdateableComponentType::HubrisForPscRot => { - shared::UpdateableComponentType::HubrisForPscRot - } - UpdateableComponentType::HubrisForPscSp => { - shared::UpdateableComponentType::HubrisForPscSp - } - UpdateableComponentType::HubrisForSidecarRot => { - shared::UpdateableComponentType::HubrisForSidecarRot - } - UpdateableComponentType::HubrisForSidecarSp => { - shared::UpdateableComponentType::HubrisForSidecarSp - } - UpdateableComponentType::HubrisForGimletRot => { - shared::UpdateableComponentType::HubrisForGimletRot - } - UpdateableComponentType::HubrisForGimletSp => { - shared::UpdateableComponentType::HubrisForGimletSp - } - UpdateableComponentType::HeliosHostPhase1 => { - shared::UpdateableComponentType::HeliosHostPhase1 - } - UpdateableComponentType::HeliosHostPhase2 => { - shared::UpdateableComponentType::HeliosHostPhase2 - } - UpdateableComponentType::HostOmicron => { - shared::UpdateableComponentType::HostOmicron - } - } - } -} - -#[derive( - Queryable, - Insertable, - Selectable, - Clone, - Debug, - Asset, - Serialize, - Deserialize, -)] -#[diesel(table_name = component_update)] -pub struct ComponentUpdate { - #[diesel(embed)] - pub identity: ComponentUpdateIdentity, - pub version: SemverVersion, - pub component_type: UpdateableComponentType, -} - -#[derive( - Queryable, Insertable, Selectable, Clone, Debug, Serialize, Deserialize, -)] -#[diesel(table_name = system_update_component_update)] -pub struct SystemUpdateComponentUpdate { - pub component_update_id: Uuid, - pub system_update_id: Uuid, -} - -impl From for views::ComponentUpdate { - fn from(component_update: ComponentUpdate) -> Self { - Self { - identity: component_update.identity(), - version: component_update.version.into(), - component_type: component_update.component_type.into(), - } - } -} - -#[derive( - Queryable, - Insertable, - Selectable, - Clone, - Debug, - Asset, - Serialize, - Deserialize, -)] -#[diesel(table_name = updateable_component)] -pub struct UpdateableComponent { - #[diesel(embed)] - pub identity: UpdateableComponentIdentity, - pub device_id: String, - pub component_type: UpdateableComponentType, - pub version: SemverVersion, - pub system_version: SemverVersion, - pub status: UpdateStatus, - // TODO: point to the actual update artifact -} - -impl TryFrom for UpdateableComponent { - type Error = external::Error; - - fn try_from( - create: params::UpdateableComponentCreate, - ) -> Result { - Ok(Self { - identity: UpdateableComponentIdentity::new(Uuid::new_v4()), - version: SemverVersion(create.version), - system_version: SemverVersion(create.system_version), - component_type: create.component_type.into(), - device_id: create.device_id, - status: UpdateStatus::Steady, - }) - } -} - -impl From for views::UpdateableComponent { - fn from(component: UpdateableComponent) -> Self { - Self { - identity: component.identity(), - device_id: component.device_id, - component_type: component.component_type.into(), - version: component.version.into(), - system_version: component.system_version.into(), - status: component.status.into(), - } - } -} - -#[derive( - Queryable, - Insertable, - Selectable, - Clone, - Debug, - Asset, - Serialize, - Deserialize, -)] -#[diesel(table_name = update_deployment)] -pub struct UpdateDeployment { - #[diesel(embed)] - pub identity: UpdateDeploymentIdentity, - pub version: SemverVersion, - pub status: UpdateStatus, -} - -impl From for views::UpdateDeployment { - fn from(deployment: UpdateDeployment) -> Self { - Self { - identity: deployment.identity(), - version: deployment.version.into(), - status: deployment.status.into(), - } - } -} diff --git a/nexus/db-model/src/tuf_repo.rs b/nexus/db-model/src/tuf_repo.rs new file mode 100644 index 0000000000..5fa2a0aac7 --- /dev/null +++ b/nexus/db-model/src/tuf_repo.rs @@ -0,0 +1,312 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use std::str::FromStr; + +use crate::{ + schema::{tuf_artifact, tuf_repo, tuf_repo_artifact}, + SemverVersion, +}; +use chrono::{DateTime, Utc}; +use diesel::{deserialize::FromSql, serialize::ToSql, sql_types::Text}; +use omicron_common::{ + api::external, + update::{ + ArtifactHash as ExternalArtifactHash, ArtifactId as ExternalArtifactId, + ArtifactKind, + }, +}; +use serde::{Deserialize, Serialize}; +use std::fmt; +use uuid::Uuid; + +/// A description of a TUF update: a repo, along with the artifacts it +/// contains. +/// +/// This is the internal variant of [`external::TufRepoDescription`]. +#[derive(Debug, Clone)] +pub struct TufRepoDescription { + /// The repository. + pub repo: TufRepo, + + /// The artifacts. + pub artifacts: Vec, +} + +impl TufRepoDescription { + /// Creates a new `TufRepoDescription` from an + /// [`external::TufRepoDescription`]. + /// + /// This is not implemented as a `From` impl because we insert new fields + /// as part of the process, which `From` doesn't necessarily communicate + /// and can be surprising. + pub fn from_external(description: external::TufRepoDescription) -> Self { + Self { + repo: TufRepo::from_external(description.repo), + artifacts: description + .artifacts + .into_iter() + .map(TufArtifact::from_external) + .collect(), + } + } + + /// Converts self into [`external::TufRepoDescription`]. + pub fn into_external(self) -> external::TufRepoDescription { + external::TufRepoDescription { + repo: self.repo.into_external(), + artifacts: self + .artifacts + .into_iter() + .map(TufArtifact::into_external) + .collect(), + } + } +} + +/// A record representing an uploaded TUF repository. +/// +/// This is the internal variant of [`external::TufRepoMeta`]. +#[derive( + Queryable, Identifiable, Insertable, Clone, Debug, Selectable, AsChangeset, +)] +#[diesel(table_name = tuf_repo)] +pub struct TufRepo { + pub id: Uuid, + pub time_created: DateTime, + // XXX: We're overloading ArtifactHash here to also mean the hash of the + // repository zip itself. + pub sha256: ArtifactHash, + pub targets_role_version: i64, + pub valid_until: DateTime, + pub system_version: SemverVersion, + pub file_name: String, +} + +impl TufRepo { + /// Creates a new `TufRepo` ready for insertion. + pub fn new( + sha256: ArtifactHash, + targets_role_version: u64, + valid_until: DateTime, + system_version: SemverVersion, + file_name: String, + ) -> Self { + Self { + id: Uuid::new_v4(), + time_created: Utc::now(), + sha256, + targets_role_version: targets_role_version as i64, + valid_until, + system_version, + file_name, + } + } + + /// Creates a new `TufRepo` ready for insertion from an external + /// `TufRepoMeta`. + /// + /// This is not implemented as a `From` impl because we insert new fields + /// as part of the process, which `From` doesn't necessarily communicate + /// and can be surprising. + pub fn from_external(repo: external::TufRepoMeta) -> Self { + Self::new( + repo.hash.into(), + repo.targets_role_version, + repo.valid_until, + repo.system_version.into(), + repo.file_name, + ) + } + + /// Converts self into [`external::TufRepoMeta`]. + pub fn into_external(self) -> external::TufRepoMeta { + external::TufRepoMeta { + hash: self.sha256.into(), + targets_role_version: self.targets_role_version as u64, + valid_until: self.valid_until, + system_version: self.system_version.into(), + file_name: self.file_name, + } + } + + /// Returns the repository's ID. + pub fn id(&self) -> Uuid { + self.id + } + + /// Returns the targets role version. + pub fn targets_role_version(&self) -> u64 { + self.targets_role_version as u64 + } +} + +#[derive(Queryable, Insertable, Clone, Debug, Selectable, AsChangeset)] +#[diesel(table_name = tuf_artifact)] +pub struct TufArtifact { + #[diesel(embed)] + pub id: ArtifactId, + pub time_created: DateTime, + pub sha256: ArtifactHash, + artifact_size: i64, +} + +impl TufArtifact { + /// Creates a new `TufArtifact` ready for insertion. + pub fn new( + id: ArtifactId, + sha256: ArtifactHash, + artifact_size: u64, + ) -> Self { + Self { + id, + time_created: Utc::now(), + sha256, + artifact_size: artifact_size as i64, + } + } + + /// Creates a new `TufArtifact` ready for insertion from an external + /// `TufArtifactMeta`. + /// + /// This is not implemented as a `From` impl because we insert new fields + /// as part of the process, which `From` doesn't necessarily communicate + /// and can be surprising. + pub fn from_external(artifact: external::TufArtifactMeta) -> Self { + Self::new(artifact.id.into(), artifact.hash.into(), artifact.size) + } + + /// Converts self into [`external::TufArtifactMeta`]. + pub fn into_external(self) -> external::TufArtifactMeta { + external::TufArtifactMeta { + id: self.id.into(), + hash: self.sha256.into(), + size: self.artifact_size as u64, + } + } + + /// Returns the artifact's ID. + pub fn id(&self) -> (String, SemverVersion, String) { + (self.id.name.clone(), self.id.version.clone(), self.id.kind.clone()) + } + + /// Returns the artifact length in bytes. + pub fn artifact_size(&self) -> u64 { + self.artifact_size as u64 + } +} + +/// The ID (primary key) of a [`TufArtifact`]. +/// +/// This is the internal variant of a [`ExternalArtifactId`]. +#[derive( + Queryable, + Insertable, + Clone, + Debug, + Selectable, + PartialEq, + Eq, + Hash, + Deserialize, + Serialize, +)] +#[diesel(table_name = tuf_artifact)] +pub struct ArtifactId { + pub name: String, + pub version: SemverVersion, + pub kind: String, +} + +impl From for ArtifactId { + fn from(id: ExternalArtifactId) -> Self { + Self { + name: id.name, + version: id.version.into(), + kind: id.kind.as_str().to_owned(), + } + } +} + +impl From for ExternalArtifactId { + fn from(id: ArtifactId) -> Self { + Self { + name: id.name, + version: id.version.into(), + kind: ArtifactKind::new(id.kind), + } + } +} + +impl fmt::Display for ArtifactId { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + // This is the same as ExternalArtifactId's Display impl. + write!(f, "{} v{} ({})", self.name, self.version, self.kind) + } +} + +/// Required by the authz_resource macro. +impl From for (String, SemverVersion, String) { + fn from(id: ArtifactId) -> Self { + (id.name, id.version, id.kind) + } +} + +/// A many-to-many relationship between [`TufRepo`] and [`TufArtifact`]. +#[derive(Queryable, Insertable, Clone, Debug, Selectable)] +#[diesel(table_name = tuf_repo_artifact)] +pub struct TufRepoArtifact { + pub tuf_repo_id: Uuid, + pub tuf_artifact_name: String, + pub tuf_artifact_version: SemverVersion, + pub tuf_artifact_kind: String, +} + +/// A wrapper around omicron-common's [`ArtifactHash`](ExternalArtifactHash), +/// supported by Diesel. +#[derive( + Copy, + Clone, + Debug, + AsExpression, + FromSqlRow, + Serialize, + Deserialize, + PartialEq, +)] +#[diesel(sql_type = Text)] +#[serde(transparent)] +pub struct ArtifactHash(pub ExternalArtifactHash); + +NewtypeFrom! { () pub struct ArtifactHash(ExternalArtifactHash); } +NewtypeDeref! { () pub struct ArtifactHash(ExternalArtifactHash); } + +impl fmt::Display for ArtifactHash { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.0) + } +} + +impl ToSql for ArtifactHash { + fn to_sql<'a>( + &'a self, + out: &mut diesel::serialize::Output<'a, '_, diesel::pg::Pg>, + ) -> diesel::serialize::Result { + >::to_sql( + &self.0.to_string(), + &mut out.reborrow(), + ) + } +} + +impl FromSql for ArtifactHash { + fn from_sql( + bytes: diesel::pg::PgValue<'_>, + ) -> diesel::deserialize::Result { + let s = String::from_sql(bytes)?; + ExternalArtifactHash::from_str(&s) + .map(ArtifactHash) + .map_err(|e| e.into()) + } +} diff --git a/nexus/db-model/src/update_artifact.rs b/nexus/db-model/src/update_artifact.rs deleted file mode 100644 index 97c57b44cc..0000000000 --- a/nexus/db-model/src/update_artifact.rs +++ /dev/null @@ -1,62 +0,0 @@ -// This Source Code Form is subject to the terms of the Mozilla Public -// License, v. 2.0. If a copy of the MPL was not distributed with this -// file, You can obtain one at https://mozilla.org/MPL/2.0/. - -use super::impl_enum_wrapper; -use crate::schema::update_artifact; -use crate::SemverVersion; -use chrono::{DateTime, Utc}; -use omicron_common::api::internal; -use parse_display::Display; -use serde::Deserialize; -use serde::Serialize; -use std::io::Write; - -impl_enum_wrapper!( - #[derive(SqlType, Debug, QueryId)] - #[diesel(postgres_type(name = "update_artifact_kind", schema = "public"))] - pub struct KnownArtifactKindEnum; - - #[derive(Clone, Copy, Debug, Display, AsExpression, FromSqlRow, PartialEq, Eq, Serialize, Deserialize)] - #[display("{0}")] - #[diesel(sql_type = KnownArtifactKindEnum)] - pub struct KnownArtifactKind(pub internal::nexus::KnownArtifactKind); - - // Enum values - GimletSp => b"gimlet_sp" - GimletRot => b"gimlet_rot" - Host => b"host" - Trampoline => b"trampoline" - ControlPlane => b"control_plane" - PscSp => b"psc_sp" - PscRot => b"psc_rot" - SwitchSp => b"switch_sp" - SwitchRot => b"switch_rot" -); - -#[derive( - Queryable, Insertable, Clone, Debug, Display, Selectable, AsChangeset, -)] -#[diesel(table_name = update_artifact)] -#[display("{kind} \"{name}\" v{version}")] -pub struct UpdateArtifact { - pub name: String, - /// Version of the artifact itself - pub version: SemverVersion, - pub kind: KnownArtifactKind, - /// `version` field of targets.json from the repository - // FIXME this *should* be a NonZeroU64 - pub targets_role_version: i64, - pub valid_until: DateTime, - pub target_name: String, - // FIXME should this be [u8; 32]? - pub target_sha256: String, - // FIXME this *should* be a u64 - pub target_length: i64, -} - -impl UpdateArtifact { - pub fn id(&self) -> (String, SemverVersion, KnownArtifactKind) { - (self.name.clone(), self.version.clone(), self.kind) - } -} diff --git a/nexus/db-queries/Cargo.toml b/nexus/db-queries/Cargo.toml index cae42a0944..3240c54f3f 100644 --- a/nexus/db-queries/Cargo.toml +++ b/nexus/db-queries/Cargo.toml @@ -43,6 +43,7 @@ sled-agent-client.workspace = true slog.workspace = true static_assertions.workspace = true steno.workspace = true +swrite.workspace = true thiserror.workspace = true tokio = { workspace = true, features = [ "full" ] } uuid.workspace = true diff --git a/nexus/db-queries/src/authz/api_resources.rs b/nexus/db-queries/src/authz/api_resources.rs index 444a00d5ad..b4fd4e1890 100644 --- a/nexus/db-queries/src/authz/api_resources.rs +++ b/nexus/db-queries/src/authz/api_resources.rs @@ -36,8 +36,7 @@ use crate::authn; use crate::context::OpContext; use crate::db; use crate::db::fixed_data::FLEET_ID; -use crate::db::model::KnownArtifactKind; -use crate::db::model::SemverVersion; +use crate::db::model::{ArtifactId, SemverVersion}; use crate::db::DataStore; use authz_macros::authz_resource; use futures::future::BoxFuture; @@ -1067,35 +1066,28 @@ authz_resource! { } authz_resource! { - name = "UpdateArtifact", + name = "TufRepo", parent = "Fleet", - primary_key = (String, SemverVersion, KnownArtifactKind), - roles_allowed = false, - polar_snippet = FleetChild, -} - -authz_resource! { - name = "Certificate", - parent = "Silo", primary_key = Uuid, roles_allowed = false, - polar_snippet = Custom, + polar_snippet = FleetChild, } authz_resource! { - name = "SystemUpdate", + name = "TufArtifact", parent = "Fleet", - primary_key = Uuid, + primary_key = (String, SemverVersion, String), + input_key = ArtifactId, roles_allowed = false, polar_snippet = FleetChild, } authz_resource! { - name = "UpdateDeployment", - parent = "Fleet", + name = "Certificate", + parent = "Silo", primary_key = Uuid, roles_allowed = false, - polar_snippet = FleetChild, + polar_snippet = Custom, } authz_resource! { diff --git a/nexus/db-queries/src/authz/oso_generic.rs b/nexus/db-queries/src/authz/oso_generic.rs index 9b842216b4..dd646a1c98 100644 --- a/nexus/db-queries/src/authz/oso_generic.rs +++ b/nexus/db-queries/src/authz/oso_generic.rs @@ -154,12 +154,11 @@ pub fn make_omicron_oso(log: &slog::Logger) -> Result { IdentityProvider::init(), SamlIdentityProvider::init(), Sled::init(), + TufRepo::init(), + TufArtifact::init(), Zpool::init(), Service::init(), - UpdateArtifact::init(), UserBuiltin::init(), - SystemUpdate::init(), - UpdateDeployment::init(), ]; for init in generated_inits { diff --git a/nexus/db-queries/src/authz/policy_test/resources.rs b/nexus/db-queries/src/authz/policy_test/resources.rs index 9cc4e28790..3e87f6db51 100644 --- a/nexus/db-queries/src/authz/policy_test/resources.rs +++ b/nexus/db-queries/src/authz/policy_test/resources.rs @@ -7,6 +7,8 @@ use super::resource_builder::ResourceBuilder; use super::resource_builder::ResourceSet; use crate::authz; +use crate::db::model::ArtifactId; +use nexus_db_model::SemverVersion; use omicron_common::api::external::LookupType; use oso::PolarClass; use std::collections::BTreeSet; @@ -126,20 +128,23 @@ pub async fn make_resources( LookupType::ById(blueprint_id), )); - let system_update_id = - "9c86d713-1bc2-4927-9892-ada3eb6f5f62".parse().unwrap(); - builder.new_resource(authz::SystemUpdate::new( + let tuf_repo_id = "3c52d72f-cbf7-4951-a62f-a4154e74da87".parse().unwrap(); + builder.new_resource(authz::TufRepo::new( authz::FLEET, - system_update_id, - LookupType::ById(system_update_id), + tuf_repo_id, + LookupType::ById(tuf_repo_id), )); - let update_deployment_id = - "c617a035-7c42-49ff-a36a-5dfeee382832".parse().unwrap(); - builder.new_resource(authz::UpdateDeployment::new( + let artifact_id = ArtifactId { + name: "a".to_owned(), + version: SemverVersion("1.0.0".parse().unwrap()), + kind: "b".to_owned(), + }; + let artifact_id_desc = artifact_id.to_string(); + builder.new_resource(authz::TufArtifact::new( authz::FLEET, - update_deployment_id, - LookupType::ById(update_deployment_id), + artifact_id, + LookupType::ByCompositeId(artifact_id_desc), )); let address_lot_id = @@ -375,7 +380,6 @@ pub fn exempted_authz_classes() -> BTreeSet { authz::RouterRoute::get_polar_class(), authz::ConsoleSession::get_polar_class(), authz::RoleBuiltin::get_polar_class(), - authz::UpdateArtifact::get_polar_class(), authz::UserBuiltin::get_polar_class(), ] .into_iter() diff --git a/nexus/db-queries/src/db/datastore/mod.rs b/nexus/db-queries/src/db/datastore/mod.rs index 5fd16e2633..78a7aeda87 100644 --- a/nexus/db-queries/src/db/datastore/mod.rs +++ b/nexus/db-queries/src/db/datastore/mod.rs @@ -397,14 +397,12 @@ mod test { use crate::db::identity::Asset; use crate::db::lookup::LookupPath; use crate::db::model::{ - BlockSize, ComponentUpdate, ComponentUpdateIdentity, ConsoleSession, - Dataset, DatasetKind, ExternalIp, PhysicalDisk, PhysicalDiskKind, - Project, Rack, Region, Service, ServiceKind, SiloUser, SledBaseboard, - SledProvisionState, SledSystemHardware, SledUpdate, SshKey, - SystemUpdate, UpdateableComponentType, VpcSubnet, Zpool, + BlockSize, ConsoleSession, Dataset, DatasetKind, ExternalIp, + PhysicalDisk, PhysicalDiskKind, Project, Rack, Region, Service, + ServiceKind, SiloUser, SledBaseboard, SledProvisionState, + SledSystemHardware, SledUpdate, SshKey, VpcSubnet, Zpool, }; use crate::db::queries::vpc_subnet::FilterConflictingVpcSubnetRangesQuery; - use assert_matches::assert_matches; use chrono::{Duration, Utc}; use futures::stream; use futures::StreamExt; @@ -413,7 +411,7 @@ mod test { use nexus_types::external_api::params; use omicron_common::api::external::DataPageParams; use omicron_common::api::external::{ - self, ByteCount, Error, IdentityMetadataCreateParams, LookupType, Name, + ByteCount, Error, IdentityMetadataCreateParams, LookupType, Name, }; use omicron_common::nexus_config::RegionAllocationStrategy; use omicron_test_utils::dev; @@ -1988,109 +1986,4 @@ mod test { db.cleanup().await.unwrap(); logctx.cleanup_successful(); } - - /// Expect DB error if we try to insert a system update with an id that - /// already exists. If version matches, update the existing row (currently - /// only time_modified) - #[tokio::test] - async fn test_system_update_conflict() { - let logctx = dev::test_setup_log("test_system_update_conflict"); - let mut db = test_setup_database(&logctx.log).await; - let (opctx, datastore) = datastore_test(&logctx, &db).await; - - let v1 = external::SemverVersion::new(1, 0, 0); - let update1 = SystemUpdate::new(v1.clone()).unwrap(); - datastore - .upsert_system_update(&opctx, update1.clone()) - .await - .expect("Failed to create system update"); - - // same version, but different ID (generated by constructor). should - // conflict and therefore update time_modified, keeping the old ID - let update2 = SystemUpdate::new(v1).unwrap(); - let updated_update = datastore - .upsert_system_update(&opctx, update2.clone()) - .await - .unwrap(); - assert!(updated_update.identity.id == update1.identity.id); - assert!( - updated_update.identity.time_modified - != update1.identity.time_modified - ); - - // now let's do same ID, but different version. should conflict on the - // ID because it's the PK, but since the version doesn't match an - // existing row, it errors out instead of updating one - let update3 = - SystemUpdate::new(external::SemverVersion::new(2, 0, 0)).unwrap(); - let update3 = SystemUpdate { identity: update1.identity, ..update3 }; - let conflict = - datastore.upsert_system_update(&opctx, update3).await.unwrap_err(); - assert_matches!(conflict, Error::ObjectAlreadyExists { .. }); - - db.cleanup().await.unwrap(); - logctx.cleanup_successful(); - } - - /// Expect DB error if we try to insert a component update with a (version, - /// component_type) that already exists - #[tokio::test] - async fn test_component_update_conflict() { - let logctx = dev::test_setup_log("test_component_update_conflict"); - let mut db = test_setup_database(&logctx.log).await; - let (opctx, datastore) = datastore_test(&logctx, &db).await; - - // we need a system update for the component updates to hang off of - let v1 = external::SemverVersion::new(1, 0, 0); - let system_update = SystemUpdate::new(v1.clone()).unwrap(); - datastore - .upsert_system_update(&opctx, system_update.clone()) - .await - .expect("Failed to create system update"); - - // create a component update, that's fine - let cu1 = ComponentUpdate { - identity: ComponentUpdateIdentity::new(Uuid::new_v4()), - component_type: UpdateableComponentType::HubrisForSidecarRot, - version: db::model::SemverVersion::new(1, 0, 0), - }; - datastore - .create_component_update( - &opctx, - system_update.identity.id, - cu1.clone(), - ) - .await - .expect("Failed to create component update"); - - // create a second component update with same version but different - // type, also fine - let cu2 = ComponentUpdate { - identity: ComponentUpdateIdentity::new(Uuid::new_v4()), - component_type: UpdateableComponentType::HubrisForSidecarSp, - version: db::model::SemverVersion::new(1, 0, 0), - }; - datastore - .create_component_update( - &opctx, - system_update.identity.id, - cu2.clone(), - ) - .await - .expect("Failed to create component update"); - - // but same type and version should fail - let cu3 = ComponentUpdate { - identity: ComponentUpdateIdentity::new(Uuid::new_v4()), - ..cu1 - }; - let conflict = datastore - .create_component_update(&opctx, system_update.identity.id, cu3) - .await - .unwrap_err(); - assert_matches!(conflict, Error::ObjectAlreadyExists { .. }); - - db.cleanup().await.unwrap(); - logctx.cleanup_successful(); - } } diff --git a/nexus/db-queries/src/db/datastore/update.rs b/nexus/db-queries/src/db/datastore/update.rs index 0790bd458e..3725797f83 100644 --- a/nexus/db-queries/src/db/datastore/update.rs +++ b/nexus/db-queries/src/db/datastore/update.rs @@ -4,376 +4,368 @@ //! [`DataStore`] methods related to updates and artifacts. +use std::collections::HashMap; + use super::DataStore; use crate::authz; use crate::context::OpContext; use crate::db; use crate::db::error::{public_error_from_diesel, ErrorHandler}; -use crate::db::model::{ - ComponentUpdate, SemverVersion, SystemUpdate, UpdateArtifact, - UpdateDeployment, UpdateStatus, UpdateableComponent, -}; -use crate::db::pagination::paginated; +use crate::db::model::SemverVersion; +use crate::transaction_retry::OptionalError; use async_bb8_diesel::AsyncRunQueryDsl; -use chrono::Utc; use diesel::prelude::*; -use nexus_db_model::SystemUpdateComponentUpdate; -use nexus_types::identity::Asset; +use diesel::result::Error as DieselError; +use nexus_db_model::{ArtifactHash, TufArtifact, TufRepo, TufRepoDescription}; use omicron_common::api::external::{ - CreateResult, DataPageParams, DeleteResult, InternalContext, ListResultVec, - LookupResult, LookupType, ResourceType, UpdateResult, + self, CreateResult, LookupResult, LookupType, ResourceType, + TufRepoInsertStatus, }; +use swrite::{swrite, SWrite}; use uuid::Uuid; -impl DataStore { - pub async fn update_artifact_upsert( - &self, - opctx: &OpContext, - artifact: UpdateArtifact, - ) -> CreateResult { - opctx.authorize(authz::Action::Modify, &authz::FLEET).await?; +/// The return value of [`DataStore::update_tuf_repo_description_insert`]. +/// +/// This is similar to [`external::TufRepoInsertResponse`], but uses +/// nexus-db-model's types instead of external types. +pub struct TufRepoInsertResponse { + pub recorded: TufRepoDescription, + pub status: TufRepoInsertStatus, +} - use db::schema::update_artifact::dsl; - diesel::insert_into(dsl::update_artifact) - .values(artifact.clone()) - .on_conflict((dsl::name, dsl::version, dsl::kind)) - .do_update() - .set(artifact.clone()) - .returning(UpdateArtifact::as_returning()) - .get_result_async(&*self.pool_connection_authorized(opctx).await?) - .await - .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) +impl TufRepoInsertResponse { + pub fn into_external(self) -> external::TufRepoInsertResponse { + external::TufRepoInsertResponse { + recorded: self.recorded.into_external(), + status: self.status, + } } +} + +async fn artifacts_for_repo( + repo_id: Uuid, + conn: &async_bb8_diesel::Connection, +) -> Result, DieselError> { + use db::schema::tuf_artifact::dsl as tuf_artifact_dsl; + use db::schema::tuf_repo_artifact::dsl as tuf_repo_artifact_dsl; + + let join_on_dsl = tuf_artifact_dsl::name + .eq(tuf_repo_artifact_dsl::tuf_artifact_name) + .and( + tuf_artifact_dsl::version + .eq(tuf_repo_artifact_dsl::tuf_artifact_version), + ) + .and( + tuf_artifact_dsl::kind.eq(tuf_repo_artifact_dsl::tuf_artifact_kind), + ); + // Don't bother paginating because each repo should only have a few (under + // 20) artifacts. + tuf_repo_artifact_dsl::tuf_repo_artifact + .filter(tuf_repo_artifact_dsl::tuf_repo_id.eq(repo_id)) + .inner_join(tuf_artifact_dsl::tuf_artifact.on(join_on_dsl)) + .select(TufArtifact::as_select()) + .load_async(conn) + .await +} - pub async fn update_artifact_hard_delete_outdated( +impl DataStore { + /// Inserts a new TUF repository into the database. + /// + /// Returns the repository just inserted, or an existing + /// `TufRepoDescription` if one was already found. (This is not an upsert, + /// because if we know about an existing repo but with different contents, + /// we reject that.) + pub async fn update_tuf_repo_insert( &self, opctx: &OpContext, - current_targets_role_version: i64, - ) -> DeleteResult { + description: TufRepoDescription, + ) -> CreateResult { opctx.authorize(authz::Action::Modify, &authz::FLEET).await?; + let log = opctx.log.new( + slog::o!( + "method" => "update_tuf_repo_insert", + "uploaded_system_version" => description.repo.system_version.to_string(), + ), + ); - // We use the `targets_role_version` column in the table to delete any - // old rows, keeping the table in sync with the current copy of - // artifacts.json. - use db::schema::update_artifact::dsl; - diesel::delete(dsl::update_artifact) - .filter(dsl::targets_role_version.lt(current_targets_role_version)) - .execute_async(&*self.pool_connection_authorized(opctx).await?) - .await - .map(|_rows_deleted| ()) - .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) - .internal_context("deleting outdated available artifacts") - } + let err = OptionalError::new(); + let err2 = err.clone(); - pub async fn upsert_system_update( - &self, - opctx: &OpContext, - update: SystemUpdate, - ) -> CreateResult { - opctx.authorize(authz::Action::CreateChild, &authz::FLEET).await?; - - use db::schema::system_update::dsl::*; - - diesel::insert_into(system_update) - .values(update.clone()) - .on_conflict(version) - .do_update() - // for now the only modifiable field is time_modified, but we intend - // to add more metadata to this model - .set(time_modified.eq(Utc::now())) - .returning(SystemUpdate::as_returning()) - .get_result_async(&*self.pool_connection_authorized(opctx).await?) - .await - .map_err(|e| { - public_error_from_diesel( - e, - ErrorHandler::Conflict( - ResourceType::SystemUpdate, - &update.version.to_string(), - ), + let conn = self.pool_connection_authorized(opctx).await?; + self.transaction_retry_wrapper("update_tuf_repo_insert") + .transaction(&conn, move |conn| { + insert_impl( + log.clone(), + conn, + description.clone(), + err2.clone(), ) }) - } - - // version is unique but not the primary key, so we can't use LookupPath to handle this for us - pub async fn system_update_fetch_by_version( - &self, - opctx: &OpContext, - target: SemverVersion, - ) -> LookupResult { - opctx.authorize(authz::Action::ListChildren, &authz::FLEET).await?; - - use db::schema::system_update::dsl::*; - - let version_string = target.to_string(); - - system_update - .filter(version.eq(target)) - .select(SystemUpdate::as_select()) - .first_async(&*self.pool_connection_authorized(opctx).await?) .await .map_err(|e| { - public_error_from_diesel( - e, - ErrorHandler::NotFoundByLookup( - ResourceType::SystemUpdate, - LookupType::ByCompositeId(version_string), - ), - ) - }) - } - - pub async fn create_component_update( - &self, - opctx: &OpContext, - system_update_id: Uuid, - update: ComponentUpdate, - ) -> CreateResult { - opctx.authorize(authz::Action::CreateChild, &authz::FLEET).await?; - - // TODO: make sure system update with that ID exists first - // let (.., db_system_update) = LookupPath::new(opctx, &self) - - use db::schema::component_update; - use db::schema::system_update_component_update as join_table; - - let version_string = update.version.to_string(); - - let conn = self.pool_connection_authorized(opctx).await?; - - self.transaction_retry_wrapper("create_component_update") - .transaction(&conn, |conn| { - let update = update.clone(); - async move { - let db_update = - diesel::insert_into(component_update::table) - .values(update.clone()) - .returning(ComponentUpdate::as_returning()) - .get_result_async(&conn) - .await?; - - diesel::insert_into(join_table::table) - .values(SystemUpdateComponentUpdate { - system_update_id, - component_update_id: update.id(), - }) - .returning(SystemUpdateComponentUpdate::as_returning()) - .get_result_async(&conn) - .await?; - - Ok(db_update) + if let Some(err) = err.take() { + err.into() + } else { + public_error_from_diesel(e, ErrorHandler::Server) } }) - .await - .map_err(|e| { - public_error_from_diesel( - e, - ErrorHandler::Conflict( - ResourceType::ComponentUpdate, - &version_string, - ), - ) - }) } - pub async fn system_updates_list_by_id( + /// Returns the TUF repo description corresponding to this system version. + pub async fn update_tuf_repo_get( &self, opctx: &OpContext, - pagparams: &DataPageParams<'_, Uuid>, - ) -> ListResultVec { - opctx.authorize(authz::Action::ListChildren, &authz::FLEET).await?; + system_version: SemverVersion, + ) -> LookupResult { + opctx.authorize(authz::Action::Read, &authz::FLEET).await?; - use db::schema::system_update::dsl::*; + use db::schema::tuf_repo::dsl; - paginated(system_update, id, pagparams) - .select(SystemUpdate::as_select()) - .order(version.desc()) - .load_async(&*self.pool_connection_authorized(opctx).await?) - .await - .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) - } - - pub async fn system_update_components_list( - &self, - opctx: &OpContext, - system_update_id: Uuid, - ) -> ListResultVec { - opctx.authorize(authz::Action::ListChildren, &authz::FLEET).await?; - - use db::schema::component_update; - use db::schema::system_update_component_update as join_table; - - component_update::table - .inner_join(join_table::table) - .filter(join_table::columns::system_update_id.eq(system_update_id)) - .select(ComponentUpdate::as_select()) - .get_results_async(&*self.pool_connection_authorized(opctx).await?) - .await - .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) - } - - pub async fn create_updateable_component( - &self, - opctx: &OpContext, - component: UpdateableComponent, - ) -> CreateResult { - opctx.authorize(authz::Action::CreateChild, &authz::FLEET).await?; - - // make sure system version exists - let sys_version = component.system_version.clone(); - self.system_update_fetch_by_version(opctx, sys_version).await?; - - use db::schema::updateable_component::dsl::*; + let conn = self.pool_connection_authorized(opctx).await?; - diesel::insert_into(updateable_component) - .values(component.clone()) - .returning(UpdateableComponent::as_returning()) - .get_result_async(&*self.pool_connection_authorized(opctx).await?) + let repo = dsl::tuf_repo + .filter(dsl::system_version.eq(system_version.clone())) + .select(TufRepo::as_select()) + .first_async::(&*conn) .await .map_err(|e| { public_error_from_diesel( e, - ErrorHandler::Conflict( - ResourceType::UpdateableComponent, - &component.id().to_string(), // TODO: more informative identifier + ErrorHandler::NotFoundByLookup( + ResourceType::TufRepo, + LookupType::ByCompositeId(system_version.to_string()), ), ) - }) - } - - pub async fn updateable_components_list_by_id( - &self, - opctx: &OpContext, - pagparams: &DataPageParams<'_, Uuid>, - ) -> ListResultVec { - opctx.authorize(authz::Action::ListChildren, &authz::FLEET).await?; - - use db::schema::updateable_component::dsl::*; - - paginated(updateable_component, id, pagparams) - .select(UpdateableComponent::as_select()) - .load_async(&*self.pool_connection_authorized(opctx).await?) - .await - .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) - } - - pub async fn lowest_component_system_version( - &self, - opctx: &OpContext, - ) -> LookupResult { - opctx.authorize(authz::Action::ListChildren, &authz::FLEET).await?; - - use db::schema::updateable_component::dsl::*; - - updateable_component - .select(system_version) - .order(system_version.asc()) - .first_async(&*self.pool_connection_authorized(opctx).await?) - .await - .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) - } - - pub async fn highest_component_system_version( - &self, - opctx: &OpContext, - ) -> LookupResult { - opctx.authorize(authz::Action::ListChildren, &authz::FLEET).await?; - - use db::schema::updateable_component::dsl::*; - - updateable_component - .select(system_version) - .order(system_version.desc()) - .first_async(&*self.pool_connection_authorized(opctx).await?) - .await - .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) - } - - pub async fn create_update_deployment( - &self, - opctx: &OpContext, - deployment: UpdateDeployment, - ) -> CreateResult { - opctx.authorize(authz::Action::CreateChild, &authz::FLEET).await?; - - use db::schema::update_deployment::dsl::*; + })?; - diesel::insert_into(update_deployment) - .values(deployment.clone()) - .returning(UpdateDeployment::as_returning()) - .get_result_async(&*self.pool_connection_authorized(opctx).await?) + let artifacts = artifacts_for_repo(repo.id, &conn) .await - .map_err(|e| { - public_error_from_diesel( - e, - ErrorHandler::Conflict( - ResourceType::UpdateDeployment, - &deployment.id().to_string(), - ), - ) - }) + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + Ok(TufRepoDescription { repo, artifacts }) } +} - pub async fn steady_update_deployment( - &self, - opctx: &OpContext, - deployment_id: Uuid, - ) -> UpdateResult { - // TODO: use authz::UpdateDeployment as the input so we can check Modify - // on that instead - opctx.authorize(authz::Action::CreateChild, &authz::FLEET).await?; - - use db::schema::update_deployment::dsl::*; - - diesel::update(update_deployment) - .filter(id.eq(deployment_id)) - .set(( - status.eq(UpdateStatus::Steady), - time_modified.eq(diesel::dsl::now), - )) - .returning(UpdateDeployment::as_returning()) - .get_result_async(&*self.pool_connection_authorized(opctx).await?) +// This is a separate method mostly to make rustfmt not bail out on long lines +// of text. +async fn insert_impl( + log: slog::Logger, + conn: async_bb8_diesel::Connection, + desc: TufRepoDescription, + err: OptionalError, +) -> Result { + let repo = { + use db::schema::tuf_repo::dsl; + + // Load the existing repo by the system version, if + // any. + let existing_repo = dsl::tuf_repo + .filter(dsl::system_version.eq(desc.repo.system_version.clone())) + .select(TufRepo::as_select()) + .first_async::(&conn) .await - .map_err(|e| { - public_error_from_diesel( - e, - ErrorHandler::NotFoundByLookup( - ResourceType::UpdateDeployment, - LookupType::ById(deployment_id), - ), - ) - }) + .optional()?; + + if let Some(existing_repo) = existing_repo { + // It doesn't matter whether the UUID of the repo matches or not, + // since it's uniquely generated. But do check the hash. + if existing_repo.sha256 != desc.repo.sha256 { + return Err(err.bail(InsertError::RepoHashMismatch { + system_version: desc.repo.system_version, + uploaded: desc.repo.sha256, + existing: existing_repo.sha256, + })); + } + + // Just return the existing repo along with all of its artifacts. + let artifacts = artifacts_for_repo(existing_repo.id, &conn).await?; + + let recorded = + TufRepoDescription { repo: existing_repo, artifacts }; + return Ok(TufRepoInsertResponse { + recorded, + status: TufRepoInsertStatus::AlreadyExists, + }); + } + + // This will fail if this ID or system version already exists with a + // different hash, but that's a weird situation that should error out + // anyway (IDs are not user controlled, hashes are). + diesel::insert_into(dsl::tuf_repo) + .values(desc.repo.clone()) + .execute_async(&conn) + .await?; + desc.repo.clone() + }; + + // Since we've inserted a new repo, we also need to insert the + // corresponding artifacts. + let all_artifacts = { + use db::schema::tuf_artifact::dsl; + + // Multiple repos can have the same artifacts, so we shouldn't error + // out if we find an existing artifact. However, we should check that + // the SHA256 hash and length matches if an existing artifact matches. + + let mut filter_dsl = dsl::tuf_artifact.into_boxed(); + for artifact in desc.artifacts.clone() { + filter_dsl = filter_dsl.or_filter( + dsl::name + .eq(artifact.id.name) + .and(dsl::version.eq(artifact.id.version)) + .and(dsl::kind.eq(artifact.id.kind)), + ); + } + + let results = filter_dsl + .select(TufArtifact::as_select()) + .load_async(&conn) + .await?; + debug!( + log, + "found {} existing artifacts", results.len(); + "results" => ?results, + ); + + let results_by_id = results + .iter() + .map(|artifact| (&artifact.id, artifact)) + .collect::>(); + + // uploaded_and_existing contains non-matching artifacts in pairs of + // (uploaded, currently in db). + let mut uploaded_and_existing = Vec::new(); + let mut new_artifacts = Vec::new(); + let mut all_artifacts = Vec::new(); + + for uploaded_artifact in desc.artifacts.clone() { + let Some(&existing_artifact) = + results_by_id.get(&uploaded_artifact.id) + else { + // This is a new artifact. + new_artifacts.push(uploaded_artifact.clone()); + all_artifacts.push(uploaded_artifact); + continue; + }; + + if existing_artifact.sha256 != uploaded_artifact.sha256 + || existing_artifact.artifact_size() + != uploaded_artifact.artifact_size() + { + uploaded_and_existing.push(( + uploaded_artifact.clone(), + existing_artifact.clone(), + )); + } else { + all_artifacts.push(uploaded_artifact); + } + } + + if !uploaded_and_existing.is_empty() { + debug!(log, "uploaded artifacts don't match existing artifacts"; + "uploaded_and_existing" => ?uploaded_and_existing, + ); + return Err(err.bail(InsertError::ArtifactMismatch { + uploaded_and_existing, + })); + } + + debug!( + log, + "inserting {} new artifacts", new_artifacts.len(); + "new_artifacts" => ?new_artifacts, + ); + + // Insert new artifacts into the database. + diesel::insert_into(dsl::tuf_artifact) + .values(new_artifacts) + .execute_async(&conn) + .await?; + all_artifacts + }; + + // Finally, insert all the associations into the tuf_repo_artifact table. + { + use db::schema::tuf_repo_artifact::dsl; + + let mut values = Vec::new(); + for artifact in desc.artifacts.clone() { + slog::debug!( + log, + "inserting artifact into tuf_repo_artifact table"; + "artifact" => %artifact.id, + ); + values.push(( + dsl::tuf_repo_id.eq(desc.repo.id), + dsl::tuf_artifact_name.eq(artifact.id.name), + dsl::tuf_artifact_version.eq(artifact.id.version), + dsl::tuf_artifact_kind.eq(artifact.id.kind), + )); + } + + diesel::insert_into(dsl::tuf_repo_artifact) + .values(values) + .execute_async(&conn) + .await?; } - pub async fn update_deployments_list_by_id( - &self, - opctx: &OpContext, - pagparams: &DataPageParams<'_, Uuid>, - ) -> ListResultVec { - opctx.authorize(authz::Action::ListChildren, &authz::FLEET).await?; - - use db::schema::update_deployment::dsl::*; - - paginated(update_deployment, id, pagparams) - .select(UpdateDeployment::as_select()) - .load_async(&*self.pool_connection_authorized(opctx).await?) - .await - .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) - } + let recorded = TufRepoDescription { repo, artifacts: all_artifacts }; + Ok(TufRepoInsertResponse { + recorded, + status: TufRepoInsertStatus::Inserted, + }) +} - pub async fn latest_update_deployment( - &self, - opctx: &OpContext, - ) -> LookupResult { - opctx.authorize(authz::Action::ListChildren, &authz::FLEET).await?; +#[derive(Clone, Debug)] +enum InsertError { + /// The SHA256 of the uploaded repository doesn't match the SHA256 of the + /// existing repository with the same system version. + RepoHashMismatch { + system_version: SemverVersion, + uploaded: ArtifactHash, + existing: ArtifactHash, + }, + /// The SHA256 or length of one or more artifacts doesn't match the + /// corresponding entries in the database. + ArtifactMismatch { + // Pairs of (uploaded, existing) artifacts. + uploaded_and_existing: Vec<(TufArtifact, TufArtifact)>, + }, +} - use db::schema::update_deployment::dsl::*; +impl From for external::Error { + fn from(e: InsertError) -> Self { + match e { + InsertError::RepoHashMismatch { + system_version, + uploaded, + existing, + } => external::Error::conflict(format!( + "Uploaded repository with system version {} has SHA256 hash \ + {}, but existing repository has SHA256 hash {}.", + system_version, uploaded, existing, + )), + InsertError::ArtifactMismatch { uploaded_and_existing } => { + // Build a message out of uploaded and existing artifacts. + let mut message = "Uploaded artifacts don't match existing \ + artifacts with same IDs:\n" + .to_string(); + for (uploaded, existing) in uploaded_and_existing { + swrite!( + message, + "- Uploaded artifact {} has SHA256 hash {} and length \ + {}, but existing artifact {} has SHA256 hash {} and \ + length {}.\n", + uploaded.id, + uploaded.sha256, + uploaded.artifact_size(), + existing.id, + existing.sha256, + existing.artifact_size(), + ); + } - update_deployment - .select(UpdateDeployment::as_returning()) - .order(time_created.desc()) - .first_async(&*self.pool_connection_authorized(opctx).await?) - .await - .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + external::Error::conflict(message) + } + } } } diff --git a/nexus/db-queries/src/db/lookup.rs b/nexus/db-queries/src/db/lookup.rs index 028694dc4b..1cf14c5a8f 100644 --- a/nexus/db-queries/src/db/lookup.rs +++ b/nexus/db-queries/src/db/lookup.rs @@ -17,7 +17,6 @@ use async_bb8_diesel::AsyncRunQueryDsl; use db_macros::lookup_resource; use diesel::{ExpressionMethods, QueryDsl, SelectableHelper}; use ipnetwork::IpNetwork; -use nexus_db_model::KnownArtifactKind; use nexus_db_model::Name; use omicron_common::api::external::Error; use omicron_common::api::external::InternalContext; @@ -431,27 +430,27 @@ impl<'a> LookupPath<'a> { ) } + /// Select a resource of type TufRepo, identified by its UUID. + pub fn tuf_repo_id(self, id: Uuid) -> TufRepo<'a> { + TufRepo::PrimaryKey(Root { lookup_root: self }, id) + } + /// Select a resource of type UpdateArtifact, identified by its /// `(name, version, kind)` tuple - pub fn update_artifact_tuple( + pub fn tuf_artifact_tuple( self, - name: &str, + name: impl Into, version: db::model::SemverVersion, - kind: KnownArtifactKind, - ) -> UpdateArtifact<'a> { - UpdateArtifact::PrimaryKey( + kind: impl Into, + ) -> TufArtifact<'a> { + TufArtifact::PrimaryKey( Root { lookup_root: self }, - name.to_string(), + name.into(), version, - kind, + kind.into(), ) } - /// Select a resource of type UpdateDeployment, identified by its id - pub fn update_deployment_id(self, id: Uuid) -> UpdateDeployment<'a> { - UpdateDeployment::PrimaryKey(Root { lookup_root: self }, id) - } - /// Select a resource of type UserBuiltin, identified by its `name` pub fn user_builtin_id<'b>(self, id: Uuid) -> UserBuiltin<'b> where @@ -857,21 +856,10 @@ lookup_resource! { } lookup_resource! { - name = "UpdateArtifact", - ancestors = [], - children = [], - lookup_by_name = false, - soft_deletes = false, - primary_key_columns = [ - { column_name = "name", rust_type = String }, - { column_name = "version", rust_type = db::model::SemverVersion }, - { column_name = "kind", rust_type = KnownArtifactKind } - ] -} - -lookup_resource! { - name = "SystemUpdate", + name = "TufRepo", ancestors = [], + // TODO: should this have TufArtifact as a child? This is a many-many + // relationship. children = [], lookup_by_name = false, soft_deletes = false, @@ -879,12 +867,16 @@ lookup_resource! { } lookup_resource! { - name = "UpdateDeployment", + name = "TufArtifact", ancestors = [], children = [], lookup_by_name = false, soft_deletes = false, - primary_key_columns = [ { column_name = "id", rust_type = Uuid } ] + primary_key_columns = [ + { column_name = "name", rust_type = String }, + { column_name = "version", rust_type = db::model::SemverVersion }, + { column_name = "kind", rust_type = String }, + ] } lookup_resource! { diff --git a/nexus/db-queries/src/db/pool_connection.rs b/nexus/db-queries/src/db/pool_connection.rs index e8ef721e98..2d57274909 100644 --- a/nexus/db-queries/src/db/pool_connection.rs +++ b/nexus/db-queries/src/db/pool_connection.rs @@ -67,9 +67,6 @@ static CUSTOM_TYPE_KEYS: &'static [&'static str] = &[ "switch_link_fec", "switch_link_speed", "switch_port_geometry", - "update_artifact_kind", - "update_status", - "updateable_component_type", "user_provision_type", "vpc_firewall_rule_action", "vpc_firewall_rule_direction", diff --git a/nexus/db-queries/tests/output/authz-roles.out b/nexus/db-queries/tests/output/authz-roles.out index 26cc13fc6a..ee55d775f0 100644 --- a/nexus/db-queries/tests/output/authz-roles.out +++ b/nexus/db-queries/tests/output/authz-roles.out @@ -950,7 +950,7 @@ resource: Blueprint id "b9e923f6-caf3-4c83-96f9-8ffe8c627dd2" silo1-proj1-viewer ✘ ✘ ✘ ✘ ✘ ✘ ✘ ✘ unauthenticated ! ! ! ! ! ! ! ! -resource: SystemUpdate id "9c86d713-1bc2-4927-9892-ada3eb6f5f62" +resource: TufRepo id "3c52d72f-cbf7-4951-a62f-a4154e74da87" USER Q R LC RP M MP CC D fleet-admin ✘ ✔ ✔ ✔ ✔ ✔ ✔ ✔ @@ -964,7 +964,7 @@ resource: SystemUpdate id "9c86d713-1bc2-4927-9892-ada3eb6f5f62" silo1-proj1-viewer ✘ ✘ ✘ ✘ ✘ ✘ ✘ ✘ unauthenticated ! ! ! ! ! ! ! ! -resource: UpdateDeployment id "c617a035-7c42-49ff-a36a-5dfeee382832" +resource: TufArtifact id "a v1.0.0 (b)" USER Q R LC RP M MP CC D fleet-admin ✘ ✔ ✔ ✔ ✔ ✔ ✔ ✔ diff --git a/nexus/examples/config.toml b/nexus/examples/config.toml index 9d6bf2d22f..f13ea721b8 100644 --- a/nexus/examples/config.toml +++ b/nexus/examples/config.toml @@ -40,9 +40,13 @@ external_dns_servers = [ "1.1.1.1", "9.9.9.9" ] [deployment.dropshot_external] # IP Address and TCP port on which to listen for the external API bind_address = "127.0.0.1:12220" -# Allow larger request bodies (1MiB) to accomodate firewall endpoints (one -# rule is ~500 bytes) -request_body_max_bytes = 1048576 +# Allow large request bodies to support uploading TUF archives. The number here +# is picked based on the typical size for tuf-mupdate.zip as of 2024-01 +# (~1.5GiB) and multiplying it by 2. +# +# This should be brought back down to a more reasonable value once per-endpoint +# request body limits are implemented. +request_body_max_bytes = 3221225472 # To have Nexus's external HTTP endpoint use TLS, uncomment the line below. You # will also need to provide an initial TLS certificate during rack # initialization. If you're using this config file, you're probably running a diff --git a/nexus/src/app/mod.rs b/nexus/src/app/mod.rs index d643969924..d6ad7c98ea 100644 --- a/nexus/src/app/mod.rs +++ b/nexus/src/app/mod.rs @@ -140,6 +140,7 @@ pub struct Nexus { timeseries_client: LazyTimeseriesClient, /// Contents of the trusted root role for the TUF repository. + #[allow(dead_code)] updates_config: Option, /// The tunable parameters from a configuration file diff --git a/nexus/src/app/rack.rs b/nexus/src/app/rack.rs index 17e7a17444..38c7861e46 100644 --- a/nexus/src/app/rack.rs +++ b/nexus/src/app/rack.rs @@ -151,9 +151,6 @@ impl super::Nexus { }) .collect(); - // internally ignores ObjectAlreadyExists, so will not error on repeat runs - let _ = self.populate_mock_system_updates(&opctx).await?; - let dns_zone = request .internal_dns_zone_config .zones diff --git a/nexus/src/app/update/mod.rs b/nexus/src/app/update/mod.rs index 36d4dbcb9e..d4a47375bc 100644 --- a/nexus/src/app/update/mod.rs +++ b/nexus/src/app/update/mod.rs @@ -4,27 +4,17 @@ //! Software Updates -use chrono::Utc; -use hex; +use bytes::Bytes; +use dropshot::HttpError; +use futures::Stream; +use nexus_db_model::TufRepoDescription; use nexus_db_queries::authz; use nexus_db_queries::context::OpContext; -use nexus_db_queries::db; -use nexus_db_queries::db::identity::Asset; -use nexus_db_queries::db::lookup::LookupPath; -use nexus_db_queries::db::model::KnownArtifactKind; -use nexus_types::external_api::{params, shared}; use omicron_common::api::external::{ - self, CreateResult, DataPageParams, Error, ListResultVec, LookupResult, - PaginationOrder, UpdateResult, + Error, SemverVersion, TufRepoInsertResponse, }; -use omicron_common::api::internal::nexus::UpdateArtifactId; -use rand::Rng; -use ring::digest; -use std::convert::TryFrom; -use std::num::NonZeroU32; -use std::path::Path; -use tokio::io::AsyncWriteExt; -use uuid::Uuid; +use omicron_common::update::ArtifactId; +use update_common::artifacts::ArtifactsWithPlan; mod common_sp_update; mod host_phase1_updater; @@ -47,927 +37,70 @@ pub enum UpdateProgress { Failed(String), } -static BASE_ARTIFACT_DIR: &str = "/var/tmp/oxide_artifacts"; - impl super::Nexus { - async fn tuf_base_url( + pub(crate) async fn updates_put_repository( &self, opctx: &OpContext, - ) -> Result, Error> { - let rack = self.rack_lookup(opctx, &self.rack_id).await?; - - Ok(self.updates_config.as_ref().map(|c| { - rack.tuf_base_url.unwrap_or_else(|| c.default_base_url.clone()) - })) - } - - pub(crate) async fn updates_refresh_metadata( - &self, - opctx: &OpContext, - ) -> Result<(), Error> { + body: impl Stream> + Send + Sync + 'static, + file_name: String, + ) -> Result { opctx.authorize(authz::Action::Modify, &authz::FLEET).await?; - let updates_config = self.updates_config.as_ref().ok_or_else(|| { - Error::invalid_request("updates system not configured") - })?; - let base_url = self.tuf_base_url(opctx).await?.ok_or_else(|| { - Error::invalid_request("updates system not configured") - })?; - let trusted_root = tokio::fs::read(&updates_config.trusted_root) - .await - .map_err(|e| Error::InternalError { - internal_message: format!( - "error trying to read trusted root: {}", - e - ), + // XXX: this needs to validate against the trusted root! + let _updates_config = + self.updates_config.as_ref().ok_or_else(|| { + Error::internal_error("updates system not initialized") })?; - let artifacts = crate::updates::read_artifacts(&trusted_root, base_url) - .await - .map_err(|e| Error::InternalError { - internal_message: format!( - "error trying to refresh updates: {}", - e - ), - })?; - - // FIXME: if we hit an error in any of these database calls, the - // available artifact table will be out of sync with the current - // artifacts.json. can we do a transaction or something? + let artifacts_with_plan = + ArtifactsWithPlan::from_stream(body, Some(file_name), &self.log) + .await + .map_err(|error| error.to_http_error())?; - let mut current_version = None; - for artifact in &artifacts { - current_version = Some(artifact.targets_role_version); - self.db_datastore - .update_artifact_upsert(&opctx, artifact.clone()) - .await?; - } - - // ensure table is in sync with current copy of artifacts.json - if let Some(current_version) = current_version { - self.db_datastore - .update_artifact_hard_delete_outdated(&opctx, current_version) - .await?; - } - - // demo-grade update logic: tell all sleds to apply all artifacts - for sled in self - .db_datastore - .sled_list( - &opctx, - &DataPageParams { - marker: None, - direction: PaginationOrder::Ascending, - limit: NonZeroU32::new(100).unwrap(), - }, - ) - .await? - { - let client = self.sled_client(&sled.id()).await?; - for artifact in &artifacts { - info!( - self.log, - "telling sled {} to apply {}", - sled.id(), - artifact.target_name - ); - client - .update_artifact( - &sled_agent_client::types::UpdateArtifactId { - name: artifact.name.clone(), - version: artifact.version.0.clone().into(), - kind: artifact.kind.0.into(), - }, - ) - .await?; - } - } - - Ok(()) - } - - /// Downloads a file from within [`BASE_ARTIFACT_DIR`]. - pub(crate) async fn download_artifact( - &self, - opctx: &OpContext, - artifact: UpdateArtifactId, - ) -> Result, Error> { - let mut base_url = - self.tuf_base_url(opctx).await?.ok_or_else(|| { - Error::invalid_request("updates system not configured") - })?; - if !base_url.ends_with('/') { - base_url.push('/'); - } - - // We cache the artifact based on its checksum, so fetch that from the - // database. - let (.., artifact_entry) = LookupPath::new(opctx, &self.db_datastore) - .update_artifact_tuple( - &artifact.name, - db::model::SemverVersion(artifact.version.clone()), - KnownArtifactKind(artifact.kind), - ) - .fetch() - .await?; - let filename = format!( - "{}.{}.{}-{}", - artifact_entry.target_sha256, - artifact.kind, - artifact.name, - artifact.version + // Now store the artifacts in the database. + let tuf_repo_description = TufRepoDescription::from_external( + artifacts_with_plan.description().clone(), ); - let path = Path::new(BASE_ARTIFACT_DIR).join(&filename); - - if !path.exists() { - // If the artifact doesn't exist, we should download it. - // - // TODO: There also exists the question of "when should we *remove* - // things from BASE_ARTIFACT_DIR", which we should also resolve. - // Demo-quality solution could be "destroy it on boot" or something? - // (we aren't doing that yet). - info!(self.log, "Accessing {} - needs to be downloaded", filename); - tokio::fs::create_dir_all(BASE_ARTIFACT_DIR).await.map_err( - |e| { - Error::internal_error(&format!( - "Failed to create artifacts directory: {}", - e - )) - }, - )?; - - let mut response = reqwest::get(format!( - "{}targets/{}.{}", - base_url, - artifact_entry.target_sha256, - artifact_entry.target_name - )) - .await - .map_err(|e| { - Error::internal_error(&format!( - "Failed to fetch artifact: {}", - e - )) - })?; - // To ensure another request isn't trying to use this target while we're downloading it - // or before we've verified it, write to a random path in the same directory, then move - // it to the correct path after verification. - let temp_path = path.with_file_name(format!( - ".{}.{:x}", - filename, - rand::thread_rng().gen::() - )); - let mut file = - tokio::fs::File::create(&temp_path).await.map_err(|e| { - Error::internal_error(&format!( - "Failed to create file: {}", - e - )) - })?; - - let mut context = digest::Context::new(&digest::SHA256); - let mut length: i64 = 0; - while let Some(chunk) = response.chunk().await.map_err(|e| { - Error::internal_error(&format!( - "Failed to read HTTP body: {}", - e - )) - })? { - file.write_all(&chunk).await.map_err(|e| { - Error::internal_error(&format!( - "Failed to write to file: {}", - e - )) - })?; - context.update(&chunk); - length += i64::try_from(chunk.len()).unwrap(); - - if length > artifact_entry.target_length { - return Err(Error::internal_error(&format!( - "target {} is larger than expected", - artifact_entry.target_name - ))); - } - } - drop(file); - - if hex::encode(context.finish()) == artifact_entry.target_sha256 - && length == artifact_entry.target_length - { - tokio::fs::rename(temp_path, &path).await.map_err(|e| { - Error::internal_error(&format!( - "Failed to rename file after verification: {}", - e - )) - })? - } else { - return Err(Error::internal_error(&format!( - "failed to verify target {}", - artifact_entry.target_name - ))); - } - - info!( - self.log, - "wrote {} to artifact dir", artifact_entry.target_name - ); - } else { - info!(self.log, "Accessing {} - already exists", path.display()); - } - - // TODO: These artifacts could be quite large - we should figure out how to - // stream this file back instead of holding it entirely in-memory in a - // Vec. - // - // Options: - // - RFC 7233 - "Range Requests" (is this HTTP/1.1 only?) - // https://developer.mozilla.org/en-US/docs/Web/HTTP/Range_requests - // - "Roll our own". See: - // https://stackoverflow.com/questions/20969331/standard-method-for-http-partial-upload-resume-upload - let body = tokio::fs::read(&path).await.map_err(|e| { - Error::internal_error(&format!( - "Cannot read artifact from filesystem: {}", - e - )) - })?; - Ok(body) - } - - pub async fn upsert_system_update( - &self, - opctx: &OpContext, - create_update: params::SystemUpdateCreate, - ) -> CreateResult { - let update = db::model::SystemUpdate::new(create_update.version)?; - self.db_datastore.upsert_system_update(opctx, update).await - } - - pub async fn create_component_update( - &self, - opctx: &OpContext, - create_update: params::ComponentUpdateCreate, - ) -> CreateResult { - let now = Utc::now(); - let update = db::model::ComponentUpdate { - identity: db::model::ComponentUpdateIdentity { - id: Uuid::new_v4(), - time_created: now, - time_modified: now, - }, - version: db::model::SemverVersion(create_update.version), - component_type: create_update.component_type.into(), - }; - - self.db_datastore - .create_component_update( - opctx, - create_update.system_update_id, - update, - ) - .await - } - - pub(crate) async fn system_update_fetch_by_version( - &self, - opctx: &OpContext, - version: &external::SemverVersion, - ) -> LookupResult { - opctx.authorize(authz::Action::ListChildren, &authz::FLEET).await?; - - self.db_datastore - .system_update_fetch_by_version(opctx, version.clone().into()) + let response = self + .db_datastore + .update_tuf_repo_insert(opctx, tuf_repo_description) .await + .map_err(HttpError::from)?; + Ok(response.into_external()) } - pub(crate) async fn system_updates_list_by_id( + pub(crate) async fn updates_get_repository( &self, opctx: &OpContext, - pagparams: &DataPageParams<'_, Uuid>, - ) -> ListResultVec { - opctx.authorize(authz::Action::ListChildren, &authz::FLEET).await?; - self.db_datastore.system_updates_list_by_id(opctx, pagparams).await - } + system_version: SemverVersion, + ) -> Result { + opctx.authorize(authz::Action::Read, &authz::FLEET).await?; - pub(crate) async fn system_update_list_components( - &self, - opctx: &OpContext, - version: &external::SemverVersion, - ) -> ListResultVec { - opctx.authorize(authz::Action::ListChildren, &authz::FLEET).await?; + let _updates_config = + self.updates_config.as_ref().ok_or_else(|| { + Error::internal_error("updates system not initialized") + })?; - let system_update = self + let tuf_repo_description = self .db_datastore - .system_update_fetch_by_version(opctx, version.clone().into()) - .await?; - - self.db_datastore - .system_update_components_list(opctx, system_update.id()) + .update_tuf_repo_get(opctx, system_version.into()) .await - } - - pub async fn create_updateable_component( - &self, - opctx: &OpContext, - create_component: params::UpdateableComponentCreate, - ) -> CreateResult { - let component = - db::model::UpdateableComponent::try_from(create_component)?; - self.db_datastore.create_updateable_component(opctx, component).await - } - - pub(crate) async fn updateable_components_list_by_id( - &self, - opctx: &OpContext, - pagparams: &DataPageParams<'_, Uuid>, - ) -> ListResultVec { - self.db_datastore - .updateable_components_list_by_id(opctx, pagparams) - .await - } - - pub(crate) async fn create_update_deployment( - &self, - opctx: &OpContext, - start: params::SystemUpdateStart, - ) -> CreateResult { - // 404 if specified version doesn't exist - // TODO: is 404 the right error for starting an update with a nonexistent version? - self.system_update_fetch_by_version(opctx, &start.version).await?; - - // We only need to look at the latest deployment because it's the only - // one that could be running - - let latest_deployment = self.latest_update_deployment(opctx).await; - if let Ok(dep) = latest_deployment { - if dep.status == db::model::UpdateStatus::Updating { - // TODO: should "already updating" conflict be a new kind of error? - return Err(Error::ObjectAlreadyExists { - type_name: external::ResourceType::UpdateDeployment, - object_name: dep.id().to_string(), - }); - } - } - - let deployment = db::model::UpdateDeployment { - identity: db::model::UpdateDeploymentIdentity::new(Uuid::new_v4()), - version: db::model::SemverVersion(start.version), - status: db::model::UpdateStatus::Updating, - }; - self.db_datastore.create_update_deployment(opctx, deployment).await - } - - /// If there's a running update, change it to steady. Otherwise do nothing. - // TODO: codify the state machine around update deployments - pub(crate) async fn steady_update_deployment( - &self, - opctx: &OpContext, - ) -> UpdateResult { - let latest = self.latest_update_deployment(opctx).await?; - // already steady. do nothing in order to avoid updating `time_modified` - if latest.status == db::model::UpdateStatus::Steady { - return Ok(latest); - } - - self.db_datastore.steady_update_deployment(opctx, latest.id()).await - } - - pub(crate) async fn update_deployments_list_by_id( - &self, - opctx: &OpContext, - pagparams: &DataPageParams<'_, Uuid>, - ) -> ListResultVec { - self.db_datastore.update_deployments_list_by_id(opctx, pagparams).await - } + .map_err(HttpError::from)?; - pub(crate) async fn update_deployment_fetch_by_id( - &self, - opctx: &OpContext, - deployment_id: &Uuid, - ) -> LookupResult { - opctx.authorize(authz::Action::ListChildren, &authz::FLEET).await?; - let (.., db_deployment) = LookupPath::new(opctx, &self.db_datastore) - .update_deployment_id(*deployment_id) - .fetch() - .await?; - Ok(db_deployment) + Ok(tuf_repo_description) } - pub(crate) async fn latest_update_deployment( + /// Downloads a file (currently not implemented). + pub(crate) async fn updates_download_artifact( &self, - opctx: &OpContext, - ) -> LookupResult { - self.db_datastore.latest_update_deployment(opctx).await - } - - pub(crate) async fn lowest_component_system_version( - &self, - opctx: &OpContext, - ) -> LookupResult { - self.db_datastore.lowest_component_system_version(opctx).await - } - - pub(crate) async fn highest_component_system_version( - &self, - opctx: &OpContext, - ) -> LookupResult { - self.db_datastore.highest_component_system_version(opctx).await - } - - /// Inner function makes it easier to implement the logic where we ignore - /// ObjectAlreadyExists errors but let the others pass through - async fn populate_mock_system_updates_inner( - &self, - opctx: &OpContext, - ) -> CreateResult<()> { - let types = vec![ - shared::UpdateableComponentType::HubrisForPscRot, - shared::UpdateableComponentType::HubrisForPscSp, - shared::UpdateableComponentType::HubrisForSidecarRot, - shared::UpdateableComponentType::HubrisForSidecarSp, - shared::UpdateableComponentType::HubrisForGimletRot, - shared::UpdateableComponentType::HubrisForGimletSp, - shared::UpdateableComponentType::HeliosHostPhase1, - shared::UpdateableComponentType::HeliosHostPhase2, - shared::UpdateableComponentType::HostOmicron, - ]; - - // create system updates and associated component updates - for v in [1, 2, 3] { - let version = external::SemverVersion::new(v, 0, 0); - let su = self - .upsert_system_update( - opctx, - params::SystemUpdateCreate { version: version.clone() }, - ) - .await?; - - for component_type in types.clone() { - self.create_component_update( - &opctx, - params::ComponentUpdateCreate { - version: external::SemverVersion::new(1, v, 0), - system_update_id: su.identity.id, - component_type, - }, - ) - .await?; - } - } - - // create deployment for v1.0.0, stop it, then create one for v2.0.0. - // This makes plausible the state of the components: all v1 except for one v2 - self.create_update_deployment( - &opctx, - params::SystemUpdateStart { - version: external::SemverVersion::new(1, 0, 0), - }, - ) - .await?; - self.steady_update_deployment(opctx).await?; - - self.create_update_deployment( - &opctx, - params::SystemUpdateStart { - version: external::SemverVersion::new(2, 0, 0), - }, - ) - .await?; - - // now create components, with one component on a different system - // version from the others - - for (i, component_type) in types.iter().enumerate() { - let version = if i == 0 { - external::SemverVersion::new(1, 2, 0) - } else { - external::SemverVersion::new(1, 1, 0) - }; - - let system_version = if i == 0 { - external::SemverVersion::new(2, 0, 0) - } else { - external::SemverVersion::new(1, 0, 0) - }; - - self.create_updateable_component( - opctx, - params::UpdateableComponentCreate { - version, - system_version, - device_id: "a-device".to_string(), - component_type: component_type.clone(), - }, - ) - .await?; - } - - Ok(()) - } - - /// Populate the DB with update-related data. Data is hard-coded until we - /// figure out how to pull it from the TUF repo. - /// - /// We need this to be idempotent because it can be called arbitrarily many - /// times. The service functions we call to create these resources will - /// error on ID or version conflicts, so to remain idempotent we can simply - /// ignore those errors. We let other errors through. - pub(crate) async fn populate_mock_system_updates( - &self, - opctx: &OpContext, - ) -> CreateResult<()> { - self.populate_mock_system_updates_inner(opctx).await.or_else(|error| { - match error { - // ignore ObjectAlreadyExists but pass through other errors - external::Error::ObjectAlreadyExists { .. } => Ok(()), - _ => Err(error), - } - }) - } -} - -// TODO: convert system update tests to integration tests now that I know how to -// call nexus functions in those - -#[cfg(test)] -mod tests { - use assert_matches::assert_matches; - use std::num::NonZeroU32; - - use dropshot::PaginationOrder; - use nexus_db_queries::context::OpContext; - use nexus_db_queries::db::model::UpdateStatus; - use nexus_test_utils_macros::nexus_test; - use nexus_types::external_api::{ - params::{ - ComponentUpdateCreate, SystemUpdateCreate, SystemUpdateStart, - UpdateableComponentCreate, - }, - shared::UpdateableComponentType, - }; - use omicron_common::api::external::{self, DataPageParams}; - use uuid::Uuid; - - type ControlPlaneTestContext = - nexus_test_utils::ControlPlaneTestContext; - - pub fn test_opctx(cptestctx: &ControlPlaneTestContext) -> OpContext { - OpContext::for_tests( - cptestctx.logctx.log.new(o!()), - cptestctx.server.apictx.nexus.datastore().clone(), - ) - } - - pub fn test_pagparams() -> DataPageParams<'static, Uuid> { - DataPageParams { - marker: None, - direction: PaginationOrder::Ascending, - limit: NonZeroU32::new(100).unwrap(), - } - } - - #[nexus_test(server = crate::Server)] - async fn test_system_updates(cptestctx: &ControlPlaneTestContext) { - let nexus = &cptestctx.server.apictx.nexus; - let opctx = test_opctx(&cptestctx); - - // starts out with 3 populated - let system_updates = nexus - .system_updates_list_by_id(&opctx, &test_pagparams()) - .await - .unwrap(); - - assert_eq!(system_updates.len(), 3); - - let su1_create = SystemUpdateCreate { - version: external::SemverVersion::new(5, 0, 0), - }; - let su1 = nexus.upsert_system_update(&opctx, su1_create).await.unwrap(); - - // weird order is deliberate - let su3_create = SystemUpdateCreate { - version: external::SemverVersion::new(10, 0, 0), - }; - nexus.upsert_system_update(&opctx, su3_create).await.unwrap(); - - let su2_create = SystemUpdateCreate { - version: external::SemverVersion::new(0, 7, 0), - }; - let su2 = nexus.upsert_system_update(&opctx, su2_create).await.unwrap(); - - // now there should be a bunch of system updates, sorted by version descending - let versions: Vec = nexus - .system_updates_list_by_id(&opctx, &test_pagparams()) - .await - .unwrap() - .iter() - .map(|su| su.version.to_string()) - .collect(); - - assert_eq!(versions.len(), 6); - assert_eq!(versions[0], "10.0.0".to_string()); - assert_eq!(versions[1], "5.0.0".to_string()); - assert_eq!(versions[2], "3.0.0".to_string()); - assert_eq!(versions[3], "2.0.0".to_string()); - assert_eq!(versions[4], "1.0.0".to_string()); - assert_eq!(versions[5], "0.7.0".to_string()); - - // let's also make sure we can fetch by version - let su1_fetched = nexus - .system_update_fetch_by_version(&opctx, &su1.version) - .await - .unwrap(); - assert_eq!(su1.identity.id, su1_fetched.identity.id); - - // now create two component updates for update 1, one at root, and one - // hanging off the first - nexus - .create_component_update( - &opctx, - ComponentUpdateCreate { - version: external::SemverVersion::new(1, 0, 0), - component_type: UpdateableComponentType::BootloaderForRot, - system_update_id: su1.identity.id, - }, - ) - .await - .expect("Failed to create component update"); - nexus - .create_component_update( - &opctx, - ComponentUpdateCreate { - version: external::SemverVersion::new(2, 0, 0), - component_type: UpdateableComponentType::HubrisForGimletSp, - system_update_id: su1.identity.id, - }, - ) - .await - .expect("Failed to create component update"); - - // now there should be two component updates - let cus_for_su1 = nexus - .system_update_list_components(&opctx, &su1.version) - .await - .unwrap(); - - assert_eq!(cus_for_su1.len(), 2); - - // other system update should not be associated with any component updates - let cus_for_su2 = nexus - .system_update_list_components(&opctx, &su2.version) - .await - .unwrap(); - - assert_eq!(cus_for_su2.len(), 0); - } - - #[nexus_test(server = crate::Server)] - async fn test_semver_max(cptestctx: &ControlPlaneTestContext) { - let nexus = &cptestctx.server.apictx.nexus; - let opctx = test_opctx(&cptestctx); - - let expected = "Invalid Value: version, Major, minor, and patch version must be less than 99999999"; - - // major, minor, and patch are all capped - - let su_create = SystemUpdateCreate { - version: external::SemverVersion::new(100000000, 0, 0), - }; - let error = - nexus.upsert_system_update(&opctx, su_create).await.unwrap_err(); - assert!(error.to_string().contains(expected)); - - let su_create = SystemUpdateCreate { - version: external::SemverVersion::new(0, 100000000, 0), - }; - let error = - nexus.upsert_system_update(&opctx, su_create).await.unwrap_err(); - assert!(error.to_string().contains(expected)); - - let su_create = SystemUpdateCreate { - version: external::SemverVersion::new(0, 0, 100000000), - }; - let error = - nexus.upsert_system_update(&opctx, su_create).await.unwrap_err(); - assert!(error.to_string().contains(expected)); - } - - #[nexus_test(server = crate::Server)] - async fn test_updateable_components(cptestctx: &ControlPlaneTestContext) { - let nexus = &cptestctx.server.apictx.nexus; - let opctx = test_opctx(&cptestctx); - - // starts out populated - let components = nexus - .updateable_components_list_by_id(&opctx, &test_pagparams()) - .await - .unwrap(); - - assert_eq!(components.len(), 9); - - // with no components these should both 500. as discussed in the - // implementation, this is appropriate because we should never be - // running the external API without components populated - // - // let low = - // nexus.lowest_component_system_version(&opctx).await.unwrap_err(); - // assert_matches!(low, external::Error::InternalError { .. }); - // let high = - // nexus.highest_component_system_version(&opctx).await.unwrap_err(); - // assert_matches!(high, external::Error::InternalError { .. }); - - // creating a component if its system_version doesn't exist is a 404 - let uc_create = UpdateableComponentCreate { - version: external::SemverVersion::new(0, 4, 1), - system_version: external::SemverVersion::new(0, 2, 0), - component_type: UpdateableComponentType::BootloaderForSp, - device_id: "look-a-device".to_string(), - }; - let uc_404 = nexus - .create_updateable_component(&opctx, uc_create.clone()) - .await - .unwrap_err(); - assert_matches!(uc_404, external::Error::ObjectNotFound { .. }); - - // create system updates for the component updates to hang off of - let v020 = external::SemverVersion::new(0, 2, 0); - nexus - .upsert_system_update(&opctx, SystemUpdateCreate { version: v020 }) - .await - .expect("Failed to create system update"); - let v3 = external::SemverVersion::new(4, 0, 0); - nexus - .upsert_system_update(&opctx, SystemUpdateCreate { version: v3 }) - .await - .expect("Failed to create system update"); - let v10 = external::SemverVersion::new(10, 0, 0); - nexus - .upsert_system_update(&opctx, SystemUpdateCreate { version: v10 }) - .await - .expect("Failed to create system update"); - - // now uc_create and friends will work - nexus - .create_updateable_component(&opctx, uc_create) - .await - .expect("failed to create updateable component"); - nexus - .create_updateable_component( - &opctx, - UpdateableComponentCreate { - version: external::SemverVersion::new(0, 4, 1), - system_version: external::SemverVersion::new(3, 0, 0), - component_type: UpdateableComponentType::HeliosHostPhase2, - device_id: "another-device".to_string(), - }, - ) - .await - .expect("failed to create updateable component"); - nexus - .create_updateable_component( - &opctx, - UpdateableComponentCreate { - version: external::SemverVersion::new(0, 4, 1), - system_version: external::SemverVersion::new(10, 0, 0), - component_type: UpdateableComponentType::HeliosHostPhase1, - device_id: "a-third-device".to_string(), - }, - ) - .await - .expect("failed to create updateable component"); - - // now there should be 3 more, or 12 - let components = nexus - .updateable_components_list_by_id(&opctx, &test_pagparams()) - .await - .unwrap(); - - assert_eq!(components.len(), 12); - - let low = nexus.lowest_component_system_version(&opctx).await.unwrap(); - assert_eq!(&low.to_string(), "0.2.0"); - let high = - nexus.highest_component_system_version(&opctx).await.unwrap(); - assert_eq!(&high.to_string(), "10.0.0"); - - // TODO: update the version of a component - } - - #[nexus_test(server = crate::Server)] - async fn test_update_deployments(cptestctx: &ControlPlaneTestContext) { - let nexus = &cptestctx.server.apictx.nexus; - let opctx = test_opctx(&cptestctx); - - // starts out with one populated - let deployments = nexus - .update_deployments_list_by_id(&opctx, &test_pagparams()) - .await - .unwrap(); - - assert_eq!(deployments.len(), 2); - - // start update fails with nonexistent version - let not_found = nexus - .create_update_deployment( - &opctx, - SystemUpdateStart { - version: external::SemverVersion::new(6, 0, 0), - }, - ) - .await - .unwrap_err(); - - assert_matches!(not_found, external::Error::ObjectNotFound { .. }); - - // starting with existing version fails because there's already an - // update running - let start_v3 = SystemUpdateStart { - version: external::SemverVersion::new(3, 0, 0), - }; - let already_updating = nexus - .create_update_deployment(&opctx, start_v3.clone()) - .await - .unwrap_err(); - - assert_matches!( - already_updating, - external::Error::ObjectAlreadyExists { .. } - ); - - // stop the running update - nexus - .steady_update_deployment(&opctx) - .await - .expect("Failed to stop running update"); - - // now starting an update succeeds - let d = nexus - .create_update_deployment(&opctx, start_v3) - .await - .expect("Failed to create deployment"); - - let deployment_ids: Vec = nexus - .update_deployments_list_by_id(&opctx, &test_pagparams()) - .await - .unwrap() - .into_iter() - .map(|d| d.identity.id) - .collect(); - - assert_eq!(deployment_ids.len(), 3); - assert!(deployment_ids.contains(&d.identity.id)); - - // latest deployment returns the one just created - let latest_deployment = - nexus.latest_update_deployment(&opctx).await.unwrap(); - - assert_eq!(latest_deployment.identity.id, d.identity.id); - assert_eq!(latest_deployment.status, UpdateStatus::Updating); - assert!( - latest_deployment.identity.time_modified - == d.identity.time_modified - ); - - // stopping update updates both its status and its time_modified - nexus - .steady_update_deployment(&opctx) - .await - .expect("Failed to steady running update"); - - let latest_deployment = - nexus.latest_update_deployment(&opctx).await.unwrap(); - - assert_eq!(latest_deployment.identity.id, d.identity.id); - assert_eq!(latest_deployment.status, UpdateStatus::Steady); - assert!( - latest_deployment.identity.time_modified > d.identity.time_modified - ); - } - - #[nexus_test(server = crate::Server)] - async fn test_populate_mock_system_updates( - cptestctx: &ControlPlaneTestContext, - ) { - let nexus = &cptestctx.server.apictx.nexus; - let opctx = test_opctx(&cptestctx); - - // starts out with updates because they're populated at rack init - let su_count = nexus - .system_updates_list_by_id(&opctx, &test_pagparams()) - .await - .unwrap() - .len(); - assert!(su_count > 0); - - // additional call doesn't error because the conflict gets eaten - let result = nexus.populate_mock_system_updates(&opctx).await; - assert!(result.is_ok()); - - // count didn't change - let system_updates = nexus - .system_updates_list_by_id(&opctx, &test_pagparams()) - .await - .unwrap(); - assert_eq!(system_updates.len(), su_count); + _opctx: &OpContext, + _artifact: ArtifactId, + ) -> Result, Error> { + // TODO: this is part of the TUF repo depot. + return Err(Error::internal_error( + "artifact download not implemented, \ + will be part of TUF repo depot", + )); } } diff --git a/nexus/src/external_api/http_entrypoints.rs b/nexus/src/external_api/http_entrypoints.rs index a6cb9e80fe..3c3c40d026 100644 --- a/nexus/src/external_api/http_entrypoints.rs +++ b/nexus/src/external_api/http_entrypoints.rs @@ -8,14 +8,13 @@ use super::{ console_api, device_auth, params, views::{ self, Certificate, Group, IdentityProvider, Image, IpPool, IpPoolRange, - PhysicalDisk, Project, Rack, Role, Silo, SiloUtilization, Sled, - Snapshot, SshKey, User, UserBuiltin, Vpc, VpcRouter, VpcSubnet, + PhysicalDisk, Project, Rack, Role, Silo, SiloQuotas, SiloUtilization, + Sled, Snapshot, SshKey, User, UserBuiltin, Utilization, Vpc, VpcRouter, + VpcSubnet, }, }; use crate::external_api::shared; use crate::ServerContext; -use chrono::Utc; -use dropshot::ApiDescription; use dropshot::EmptyScanParams; use dropshot::HttpError; use dropshot::HttpResponseAccepted; @@ -34,6 +33,7 @@ use dropshot::WhichPage; use dropshot::{ channel, endpoint, WebsocketChannelResult, WebsocketConnection, }; +use dropshot::{ApiDescription, StreamingBody}; use ipnetwork::IpNetwork; use nexus_db_queries::authz; use nexus_db_queries::db; @@ -41,9 +41,6 @@ use nexus_db_queries::db::identity::Resource; use nexus_db_queries::db::lookup::ImageLookup; use nexus_db_queries::db::lookup::ImageParentLookup; use nexus_db_queries::db::model::Name; -use nexus_types::external_api::views::SiloQuotas; -use nexus_types::external_api::views::Utilization; -use nexus_types::identity::AssetIdentityMetadata; use omicron_common::api::external::http_pagination::data_page_params_for; use omicron_common::api::external::http_pagination::marker_for_name; use omicron_common::api::external::http_pagination::marker_for_name_or_id; @@ -76,6 +73,8 @@ use omicron_common::api::external::RouterRouteKind; use omicron_common::api::external::SwitchPort; use omicron_common::api::external::SwitchPortSettings; use omicron_common::api::external::SwitchPortSettingsView; +use omicron_common::api::external::TufRepoGetResponse; +use omicron_common::api::external::TufRepoInsertResponse; use omicron_common::api::external::VpcFirewallRuleUpdateParams; use omicron_common::api::external::VpcFirewallRules; use omicron_common::bail_unless; @@ -309,16 +308,8 @@ pub(crate) fn external_api() -> NexusApiDescription { api.register(system_metric)?; api.register(silo_metric)?; - api.register(system_update_refresh)?; - api.register(system_version)?; - api.register(system_component_version_list)?; - api.register(system_update_list)?; - api.register(system_update_view)?; - api.register(system_update_start)?; - api.register(system_update_stop)?; - api.register(system_update_components_list)?; - api.register(update_deployments_list)?; - api.register(update_deployment_view)?; + api.register(system_update_put_repository)?; + api.register(system_update_get_repository)?; api.register(user_list)?; api.register(silo_user_list)?; @@ -433,12 +424,6 @@ async fn system_policy_view( apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await } -/// Path parameters for `/by-id/` endpoints -#[derive(Deserialize, JsonSchema)] -struct ByIdPathParams { - id: Uuid, -} - /// Update the top-level IAM policy #[endpoint { method = PUT, @@ -5376,320 +5361,56 @@ async fn silo_metric( // Updates -/// Refresh update data -#[endpoint { - method = POST, - path = "/v1/system/update/refresh", - tags = ["system/update"], - unpublished = true, -}] -async fn system_update_refresh( - rqctx: RequestContext>, -) -> Result { - let apictx = rqctx.context(); - let nexus = &apictx.nexus; - let handler = async { - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - nexus.updates_refresh_metadata(&opctx).await?; - Ok(HttpResponseUpdatedNoContent()) - }; - apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await -} - -/// View system version and update status -#[endpoint { - method = GET, - path = "/v1/system/update/version", - tags = ["system/update"], - unpublished = true, -}] -async fn system_version( - rqctx: RequestContext>, -) -> Result, HttpError> { - let apictx = rqctx.context(); - let nexus = &apictx.nexus; - let handler = async { - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - opctx.authorize(authz::Action::ListChildren, &authz::FLEET).await?; - - // The only way we have no latest deployment is if the rack was just set - // up and no system updates have ever been run. In this case there is no - // update running, so we can fall back to steady. - let status = nexus - .latest_update_deployment(&opctx) - .await - .map_or(views::UpdateStatus::Steady, |d| d.status.into()); - - // Updateable components, however, are populated at rack setup before - // the external API is even started, so if we get here and there are no - // components, that's a real issue and the 500 we throw is appropriate. - let low = nexus.lowest_component_system_version(&opctx).await?.into(); - let high = nexus.highest_component_system_version(&opctx).await?.into(); - - Ok(HttpResponseOk(views::SystemVersion { - version_range: views::VersionRange { low, high }, - status, - })) - }; - apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await -} - -/// View version and update status of component tree -#[endpoint { - method = GET, - path = "/v1/system/update/components", - tags = ["system/update"], - unpublished = true, -}] -async fn system_component_version_list( - rqctx: RequestContext>, - query_params: Query, -) -> Result>, HttpError> -{ - let apictx = rqctx.context(); - let nexus = &apictx.nexus; - let query = query_params.into_inner(); - let pagparams = data_page_params_for(&rqctx, &query)?; - let handler = async { - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let components = nexus - .updateable_components_list_by_id(&opctx, &pagparams) - .await? - .into_iter() - .map(|u| u.into()) - .collect(); - Ok(HttpResponseOk(ScanById::results_page( - &query, - components, - &|_, u: &views::UpdateableComponent| u.identity.id, - )?)) - }; - apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await -} - -/// List all updates -#[endpoint { - method = GET, - path = "/v1/system/update/updates", - tags = ["system/update"], - unpublished = true, -}] -async fn system_update_list( - rqctx: RequestContext>, - query_params: Query, -) -> Result>, HttpError> { - let apictx = rqctx.context(); - let nexus = &apictx.nexus; - let query = query_params.into_inner(); - let pagparams = data_page_params_for(&rqctx, &query)?; - let handler = async { - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let updates = nexus - .system_updates_list_by_id(&opctx, &pagparams) - .await? - .into_iter() - .map(|u| u.into()) - .collect(); - Ok(HttpResponseOk(ScanById::results_page( - &query, - updates, - &|_, u: &views::SystemUpdate| u.identity.id, - )?)) - }; - apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await -} - -/// View system update -#[endpoint { - method = GET, - path = "/v1/system/update/updates/{version}", - tags = ["system/update"], - unpublished = true, -}] -async fn system_update_view( - rqctx: RequestContext>, - path_params: Path, -) -> Result, HttpError> { - let apictx = rqctx.context(); - let nexus = &apictx.nexus; - let path = path_params.into_inner(); - let handler = async { - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let system_update = - nexus.system_update_fetch_by_version(&opctx, &path.version).await?; - Ok(HttpResponseOk(system_update.into())) - }; - apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await -} - -/// View system update component tree +/// Upload a TUF repository #[endpoint { - method = GET, - path = "/v1/system/update/updates/{version}/components", + method = PUT, + path = "/v1/system/update/repository", tags = ["system/update"], unpublished = true, }] -async fn system_update_components_list( +async fn system_update_put_repository( rqctx: RequestContext>, - path_params: Path, -) -> Result>, HttpError> { + query: Query, + body: StreamingBody, +) -> Result, HttpError> { let apictx = rqctx.context(); let nexus = &apictx.nexus; - let path = path_params.into_inner(); let handler = async { let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let components = nexus - .system_update_list_components(&opctx, &path.version) - .await? - .into_iter() - .map(|i| i.into()) - .collect(); - Ok(HttpResponseOk(ResultsPage { items: components, next_page: None })) + let query = query.into_inner(); + let body = body.into_stream(); + let update = + nexus.updates_put_repository(&opctx, body, query.file_name).await?; + Ok(HttpResponseOk(update)) }; apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await } -/// Start system update +/// Get the description of a repository by system version. #[endpoint { - method = POST, - path = "/v1/system/update/start", - tags = ["system/update"], - unpublished = true, -}] -async fn system_update_start( - rqctx: RequestContext>, - // The use of the request body here instead of a path param is deliberate. - // Unlike instance start (which uses a path param), update start is about - // modifying the state of the system rather than the state of the resource - // (instance there, system update here) identified by the param. This - // approach also gives us symmetry with the /stop endpoint. - update: TypedBody, -) -> Result, HttpError> { - let apictx = rqctx.context(); - let _nexus = &apictx.nexus; - let handler = async { - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - opctx.authorize(authz::Action::Modify, &authz::FLEET).await?; - - // inverse situation to stop: we only want to actually start an update - // if there isn't one already in progress. - - // 1. check that there is no update in progress - // a. if there is one, this should probably 409 - // 2. kick off the update start saga, which - // a. tells the update system to get going - // b. creates an update deployment - - // similar question for stop: do we return the deployment directly, or a - // special StartUpdateResult that includes a deployment ID iff an update - // was actually started - - Ok(HttpResponseAccepted(views::UpdateDeployment { - identity: AssetIdentityMetadata { - id: Uuid::new_v4(), - time_created: Utc::now(), - time_modified: Utc::now(), - }, - version: update.into_inner().version, - status: views::UpdateStatus::Updating, - })) - }; - apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await -} - -/// Stop system update -/// -/// If there is no update in progress, do nothing. -#[endpoint { - method = POST, - path = "/v1/system/update/stop", + method = GET, + path = "/v1/system/update/repository/{system_version}", tags = ["system/update"], unpublished = true, }] -async fn system_update_stop( - rqctx: RequestContext>, -) -> Result { - let apictx = rqctx.context(); - let _nexus = &apictx.nexus; - let handler = async { - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - opctx.authorize(authz::Action::Modify, &authz::FLEET).await?; - - // TODO: Implement stopping an update. Should probably be a saga. - - // Ask update subsystem if it's doing anything. If so, tell it to stop. - // This could be done in a single call to the updater if the latter can - // respond to a stop command differently depending on whether it did - // anything or not. - - // If we did in fact stop a running update, update the status on the - // latest update deployment in the DB to `stopped` and respond with that - // deployment. If we do nothing, what should we return? Maybe instead of - // responding with the deployment, this endpoint gets its own - // `StopUpdateResult` response view that says whether it was a noop, and - // if it wasn't, includes the ID of the stopped deployment, which allows - // the client to fetch it if it actually wants it. - - Ok(HttpResponseUpdatedNoContent()) - }; - apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await -} - -/// List all update deployments -#[endpoint { - method = GET, - path = "/v1/system/update/deployments", - tags = ["system/update"], - unpublished = true, -}] -async fn update_deployments_list( +async fn system_update_get_repository( rqctx: RequestContext>, - query_params: Query, -) -> Result>, HttpError> { + path_params: Path, +) -> Result, HttpError> { let apictx = rqctx.context(); let nexus = &apictx.nexus; - let query = query_params.into_inner(); - let pagparams = data_page_params_for(&rqctx, &query)?; let handler = async { let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let updates = nexus - .update_deployments_list_by_id(&opctx, &pagparams) - .await? - .into_iter() - .map(|u| u.into()) - .collect(); - Ok(HttpResponseOk(ScanById::results_page( - &query, - updates, - &|_, u: &views::UpdateDeployment| u.identity.id, - )?)) + let params = path_params.into_inner(); + let description = + nexus.updates_get_repository(&opctx, params.system_version).await?; + Ok(HttpResponseOk(TufRepoGetResponse { + description: description.into_external(), + })) }; apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await } -/// Fetch a system update deployment -#[endpoint { - method = GET, - path = "/v1/system/update/deployments/{id}", - tags = ["system/update"], - unpublished = true, -}] -async fn update_deployment_view( - rqctx: RequestContext>, - path_params: Path, -) -> Result, HttpError> { - let apictx = rqctx.context(); - let nexus = &apictx.nexus; - let path = path_params.into_inner(); - let id = &path.id; - let handler = async { - let opctx = crate::context::op_context_for_external_api(&rqctx).await?; - let deployment = - nexus.update_deployment_fetch_by_id(&opctx, id).await?; - Ok(HttpResponseOk(deployment.into())) - }; - apictx.external_latencies.instrument_dropshot_handler(&rqctx, handler).await -} // Silo users /// List users diff --git a/nexus/src/internal_api/http_entrypoints.rs b/nexus/src/internal_api/http_entrypoints.rs index 63578e360a..58038cb37a 100644 --- a/nexus/src/internal_api/http_entrypoints.rs +++ b/nexus/src/internal_api/http_entrypoints.rs @@ -40,7 +40,7 @@ use omicron_common::api::external::Error; use omicron_common::api::internal::nexus::DiskRuntimeState; use omicron_common::api::internal::nexus::ProducerEndpoint; use omicron_common::api::internal::nexus::SledInstanceState; -use omicron_common::api::internal::nexus::UpdateArtifactId; +use omicron_common::update::ArtifactId; use oximeter::types::ProducerResults; use oximeter_producer::{collect, ProducerIdPathParams}; use schemars::JsonSchema; @@ -438,15 +438,16 @@ async fn cpapi_metrics_collect( }] async fn cpapi_artifact_download( request_context: RequestContext>, - path_params: Path, + path_params: Path, ) -> Result, HttpError> { let context = request_context.context(); let nexus = &context.nexus; let opctx = crate::context::op_context_for_internal_api(&request_context).await; // TODO: return 404 if the error we get here says that the record isn't found - let body = - nexus.download_artifact(&opctx, path_params.into_inner()).await?; + let body = nexus + .updates_download_artifact(&opctx, path_params.into_inner()) + .await?; Ok(HttpResponseOk(Body::from(body).into())) } diff --git a/nexus/src/lib.rs b/nexus/src/lib.rs index 01aca36e1d..e1392440a1 100644 --- a/nexus/src/lib.rs +++ b/nexus/src/lib.rs @@ -20,7 +20,6 @@ pub mod external_api; // Public for testing mod internal_api; mod populate; mod saga_interface; -mod updates; // public for testing pub use app::test_interfaces::TestInterfaces; pub use app::Nexus; diff --git a/nexus/src/updates.rs b/nexus/src/updates.rs deleted file mode 100644 index 2f57868acc..0000000000 --- a/nexus/src/updates.rs +++ /dev/null @@ -1,74 +0,0 @@ -// This Source Code Form is subject to the terms of the Mozilla Public -// License, v. 2.0. If a copy of the MPL was not distributed with this -// file, You can obtain one at https://mozilla.org/MPL/2.0/. - -use buf_list::BufList; -use futures::TryStreamExt; -use nexus_db_queries::db; -use omicron_common::update::ArtifactsDocument; -use std::convert::TryInto; - -pub(crate) async fn read_artifacts( - trusted_root: &[u8], - mut base_url: String, -) -> Result< - Vec, - Box, -> { - if !base_url.ends_with('/') { - base_url.push('/'); - } - - let repository = tough::RepositoryLoader::new( - &trusted_root, - format!("{}metadata/", base_url).parse()?, - format!("{}targets/", base_url).parse()?, - ) - .load() - .await?; - - let artifact_document = - match repository.read_target(&"artifacts.json".parse()?).await? { - Some(target) => target.try_collect::().await?, - None => return Err("artifacts.json missing".into()), - }; - let artifacts: ArtifactsDocument = - serde_json::from_reader(buf_list::Cursor::new(&artifact_document))?; - - let valid_until = repository - .root() - .signed - .expires - .min(repository.snapshot().signed.expires) - .min(repository.targets().signed.expires) - .min(repository.timestamp().signed.expires); - - let mut v = Vec::new(); - for artifact in artifacts.artifacts { - // Skip any artifacts where we don't recognize its kind or the target - // name isn't in the repository - let target = - repository.targets().signed.targets.get(&artifact.target.parse()?); - let (kind, target) = match (artifact.kind.to_known(), target) { - (Some(kind), Some(target)) => (kind, target), - _ => break, - }; - - v.push(db::model::UpdateArtifact { - name: artifact.name, - version: db::model::SemverVersion(artifact.version), - kind: db::model::KnownArtifactKind(kind), - targets_role_version: repository - .targets() - .signed - .version - .get() - .try_into()?, - valid_until, - target_name: artifact.target, - target_sha256: hex::encode(&target.hashes.sha256), - target_length: target.length.try_into()?, - }); - } - Ok(v) -} diff --git a/nexus/test-utils/Cargo.toml b/nexus/test-utils/Cargo.toml index 4a7924770e..5605f33f75 100644 --- a/nexus/test-utils/Cargo.toml +++ b/nexus/test-utils/Cargo.toml @@ -36,6 +36,7 @@ serde_json.workspace = true serde_urlencoded.workspace = true slog.workspace = true tokio.workspace = true +tokio-util.workspace = true trust-dns-resolver.workspace = true uuid.workspace = true omicron-workspace-hack.workspace = true diff --git a/nexus/test-utils/src/http_testing.rs b/nexus/test-utils/src/http_testing.rs index bf5370a925..ae62218c93 100644 --- a/nexus/test-utils/src/http_testing.rs +++ b/nexus/test-utils/src/http_testing.rs @@ -7,6 +7,7 @@ use anyhow::anyhow; use anyhow::ensure; use anyhow::Context; +use camino::Utf8Path; use dropshot::test_util::ClientTestContext; use dropshot::ResultsPage; use headers::authorization::Credentials; @@ -147,6 +148,35 @@ impl<'a> RequestBuilder<'a> { self } + /// Set the outgoing request body to the contents of a file. + /// + /// A handle to the file will be kept open until the request is completed. + /// + /// If `path` is `None`, the request body will be empty. + pub fn body_file(mut self, path: Option<&Utf8Path>) -> Self { + match path { + Some(path) => { + // Turn the file into a stream. (Opening the file with + // std::fs::File::open means that this method doesn't have to + // be async.) + let file = std::fs::File::open(path).with_context(|| { + format!("failed to open request body file at {path}") + }); + match file { + Ok(file) => { + let stream = tokio_util::io::ReaderStream::new( + tokio::fs::File::from_std(file), + ); + self.body = hyper::Body::wrap_stream(stream); + } + Err(error) => self.error = Some(error), + } + } + None => self.body = hyper::Body::empty(), + }; + self + } + /// Set the outgoing request body using URL encoding /// and set the content type appropriately /// diff --git a/nexus/tests/integration_tests/endpoints.rs b/nexus/tests/integration_tests/endpoints.rs index 4f606f2bff..c721fe3606 100644 --- a/nexus/tests/integration_tests/endpoints.rs +++ b/nexus/tests/integration_tests/endpoints.rs @@ -32,7 +32,6 @@ use omicron_common::api::external::Name; use omicron_common::api::external::NameOrId; use omicron_common::api::external::RouteDestination; use omicron_common::api::external::RouteTarget; -use omicron_common::api::external::SemverVersion; use omicron_common::api::external::VpcFirewallRuleUpdateParams; use omicron_test_utils::certificates::CertificateChain; use once_cell::sync::Lazy; @@ -708,13 +707,6 @@ pub static DEMO_SSHKEY_CREATE: Lazy = pub static DEMO_SPECIFIC_SSHKEY_URL: Lazy = Lazy::new(|| format!("{}/{}", DEMO_SSHKEYS_URL, *DEMO_SSHKEY_NAME)); -// System update - -pub static DEMO_SYSTEM_UPDATE_PARAMS: Lazy = - Lazy::new(|| params::SystemUpdatePath { - version: SemverVersion::new(1, 0, 0), - }); - // Project Floating IPs pub static DEMO_FLOAT_IP_NAME: Lazy = Lazy::new(|| "float-ip".parse().unwrap()); @@ -1920,81 +1912,22 @@ pub static VERIFY_ENDPOINTS: Lazy> = Lazy::new(|| { /* Updates */ VerifyEndpoint { - url: "/v1/system/update/refresh", - visibility: Visibility::Public, - unprivileged_access: UnprivilegedAccess::None, - allowed_methods: vec![AllowedMethod::Post( - serde_json::Value::Null - )], - }, - - VerifyEndpoint { - url: "/v1/system/update/version", - visibility: Visibility::Public, - unprivileged_access: UnprivilegedAccess::None, - allowed_methods: vec![AllowedMethod::Get], - }, - - VerifyEndpoint { - url: "/v1/system/update/components", - visibility: Visibility::Public, - unprivileged_access: UnprivilegedAccess::None, - allowed_methods: vec![AllowedMethod::Get], - }, - - VerifyEndpoint { - url: "/v1/system/update/updates", - visibility: Visibility::Public, - unprivileged_access: UnprivilegedAccess::None, - allowed_methods: vec![AllowedMethod::Get], - }, - - // TODO: make system update endpoints work instead of expecting 404 - - VerifyEndpoint { - url: "/v1/system/update/updates/1.0.0", + url: "/v1/system/update/repository?file_name=demo-repo.zip", visibility: Visibility::Public, unprivileged_access: UnprivilegedAccess::None, - allowed_methods: vec![AllowedMethod::Get], - }, - - VerifyEndpoint { - url: "/v1/system/update/updates/1.0.0/components", - visibility: Visibility::Public, - unprivileged_access: UnprivilegedAccess::None, - allowed_methods: vec![AllowedMethod::Get], - }, - - VerifyEndpoint { - url: "/v1/system/update/start", - visibility: Visibility::Public, - unprivileged_access: UnprivilegedAccess::None, - allowed_methods: vec![AllowedMethod::Post( - serde_json::to_value(&*DEMO_SYSTEM_UPDATE_PARAMS).unwrap() - )], - }, - - VerifyEndpoint { - url: "/v1/system/update/stop", - visibility: Visibility::Public, - unprivileged_access: UnprivilegedAccess::None, - allowed_methods: vec![AllowedMethod::Post( - serde_json::Value::Null + allowed_methods: vec![AllowedMethod::Put( + // In reality this is the contents of a zip file. + serde_json::Value::Null, )], }, VerifyEndpoint { - url: "/v1/system/update/deployments", + url: "/v1/system/update/repository/1.0.0", visibility: Visibility::Public, unprivileged_access: UnprivilegedAccess::None, - allowed_methods: vec![AllowedMethod::Get], - }, - - VerifyEndpoint { - url: "/v1/system/update/deployments/120bbb6f-660a-440c-8cb7-199be202ddff", - visibility: Visibility::Public, - unprivileged_access: UnprivilegedAccess::None, - allowed_methods: vec![AllowedMethod::GetNonexistent], + // The update system is disabled, which causes a 500 error even for + // privileged users. That is captured by GetUnimplemented. + allowed_methods: vec![AllowedMethod::GetUnimplemented], }, /* Metrics */ diff --git a/nexus/tests/integration_tests/mod.rs b/nexus/tests/integration_tests/mod.rs index 6cb99b9e45..4b68a6c4f2 100644 --- a/nexus/tests/integration_tests/mod.rs +++ b/nexus/tests/integration_tests/mod.rs @@ -40,7 +40,6 @@ mod sp_updater; mod ssh_keys; mod subnet_allocation; mod switch_port; -mod system_updates; mod unauthorized; mod unauthorized_coverage; mod updates; diff --git a/nexus/tests/integration_tests/system_updates.rs b/nexus/tests/integration_tests/system_updates.rs deleted file mode 100644 index aa00caac29..0000000000 --- a/nexus/tests/integration_tests/system_updates.rs +++ /dev/null @@ -1,219 +0,0 @@ -// This Source Code Form is subject to the terms of the Mozilla Public -// License, v. 2.0. If a copy of the MPL was not distributed with this -// file, You can obtain one at https://mozilla.org/MPL/2.0/. - -use dropshot::ResultsPage; -use http::{method::Method, StatusCode}; -use nexus_db_queries::context::OpContext; -use nexus_test_utils::http_testing::{AuthnMode, NexusRequest}; -use nexus_test_utils_macros::nexus_test; -use nexus_types::external_api::{ - params, shared::UpdateableComponentType, views, -}; -use omicron_common::api::external::SemverVersion; - -type ControlPlaneTestContext = - nexus_test_utils::ControlPlaneTestContext; - -// This file could be combined with ./updates.rs, but there's a lot going on in -// there that has nothing to do with testing the API endpoints. We could come up -// with more descriptive names. - -/// Because there are no create endpoints for these resources, we need to call -/// the `nexus` functions directly. -async fn populate_db(cptestctx: &ControlPlaneTestContext) { - let nexus = &cptestctx.server.apictx().nexus; - let opctx = OpContext::for_tests( - cptestctx.logctx.log.new(o!()), - cptestctx.server.apictx().nexus.datastore().clone(), - ); - - // system updates have to exist first - let create_su = - params::SystemUpdateCreate { version: SemverVersion::new(0, 2, 0) }; - nexus - .upsert_system_update(&opctx, create_su) - .await - .expect("Failed to create system update"); - let create_su = - params::SystemUpdateCreate { version: SemverVersion::new(1, 0, 1) }; - nexus - .upsert_system_update(&opctx, create_su) - .await - .expect("Failed to create system update"); - - nexus - .create_updateable_component( - &opctx, - params::UpdateableComponentCreate { - version: SemverVersion::new(0, 4, 1), - system_version: SemverVersion::new(0, 2, 0), - component_type: UpdateableComponentType::BootloaderForSp, - device_id: "look-a-device".to_string(), - }, - ) - .await - .expect("failed to create updateable component"); - - nexus - .create_updateable_component( - &opctx, - params::UpdateableComponentCreate { - version: SemverVersion::new(0, 4, 1), - system_version: SemverVersion::new(1, 0, 1), - component_type: UpdateableComponentType::HubrisForGimletSp, - device_id: "another-device".to_string(), - }, - ) - .await - .expect("failed to create updateable component"); -} - -#[nexus_test] -async fn test_system_version(cptestctx: &ControlPlaneTestContext) { - let client = &cptestctx.external_client; - - // Initially the endpoint 500s because there are no updateable components. - // This is the desired behavior because those are populated by rack startup - // before the external API starts, so it really is a problem if we can hit - // this endpoint without any data backing it. - // - // Because this data is now populated at rack init, this doesn't work as a - // test. If we really wanted to test it, we would have to run the tests - // without that bit of setup. - // - // NexusRequest::expect_failure( - // &client, - // StatusCode::INTERNAL_SERVER_ERROR, - // Method::GET, - // "/v1/system/update/version", - // ) - // .authn_as(AuthnMode::PrivilegedUser) - // .execute() - // .await - // .expect("Failed to 500 with no system version data"); - - // create two updateable components - populate_db(&cptestctx).await; - - let version = - NexusRequest::object_get(&client, "/v1/system/update/version") - .authn_as(AuthnMode::PrivilegedUser) - .execute_and_parse_unwrap::() - .await; - - assert_eq!( - version, - views::SystemVersion { - version_range: views::VersionRange { - low: SemverVersion::new(0, 2, 0), - high: SemverVersion::new(2, 0, 0), - }, - status: views::UpdateStatus::Updating, - } - ); -} - -#[nexus_test] -async fn test_list_updates(cptestctx: &ControlPlaneTestContext) { - let client = &cptestctx.external_client; - - let updates = - NexusRequest::object_get(&client, &"/v1/system/update/updates") - .authn_as(AuthnMode::PrivilegedUser) - .execute_and_parse_unwrap::>() - .await; - - assert_eq!(updates.items.len(), 3); -} - -#[nexus_test] -async fn test_list_components(cptestctx: &ControlPlaneTestContext) { - let client = &cptestctx.external_client; - - let component_updates = - NexusRequest::object_get(&client, &"/v1/system/update/components") - .authn_as(AuthnMode::PrivilegedUser) - .execute_and_parse_unwrap::>() - .await; - - assert_eq!(component_updates.items.len(), 9); -} - -#[nexus_test] -async fn test_get_update(cptestctx: &ControlPlaneTestContext) { - let client = &cptestctx.external_client; - - // existing update works - let update = - NexusRequest::object_get(&client, &"/v1/system/update/updates/1.0.0") - .authn_as(AuthnMode::PrivilegedUser) - .execute_and_parse_unwrap::() - .await; - - assert_eq!(update.version, SemverVersion::new(1, 0, 0)); - - // non-existent update 404s - NexusRequest::expect_failure( - client, - StatusCode::NOT_FOUND, - Method::GET, - "/v1/system/update/updates/1.0.1", - ) - .authn_as(AuthnMode::PrivilegedUser) - .execute() - .await - .expect("Failed to 404 on non-existent update"); -} - -#[nexus_test] -async fn test_list_update_components(cptestctx: &ControlPlaneTestContext) { - let client = &cptestctx.external_client; - - // listing components of an existing update works - let components = NexusRequest::object_get( - &client, - &"/v1/system/update/updates/1.0.0/components", - ) - .authn_as(AuthnMode::PrivilegedUser) - .execute_and_parse_unwrap::>() - .await; - - assert_eq!(components.items.len(), 9); - - // non existent 404s - NexusRequest::expect_failure( - client, - StatusCode::NOT_FOUND, - Method::GET, - "/v1/system/update/updates/1.0.1/components", - ) - .authn_as(AuthnMode::PrivilegedUser) - .execute() - .await - .expect("Failed to 404 on components of nonexistent system update"); -} - -#[nexus_test] -async fn test_update_deployments(cptestctx: &ControlPlaneTestContext) { - let client = &cptestctx.external_client; - - let deployments = - NexusRequest::object_get(&client, &"/v1/system/update/deployments") - .authn_as(AuthnMode::PrivilegedUser) - .execute_and_parse_unwrap::>() - .await; - - assert_eq!(deployments.items.len(), 2); - - let first_dep = deployments.items.get(0).unwrap(); - - let dep_id = first_dep.identity.id.to_string(); - let dep_url = format!("/v1/system/update/deployments/{}", dep_id); - let deployment = NexusRequest::object_get(&client, &dep_url) - .authn_as(AuthnMode::PrivilegedUser) - .execute_and_parse_unwrap::() - .await; - - assert_eq!(deployment.version, first_dep.version); -} diff --git a/nexus/tests/integration_tests/updates.rs b/nexus/tests/integration_tests/updates.rs index 418e12e001..e830348103 100644 --- a/nexus/tests/integration_tests/updates.rs +++ b/nexus/tests/integration_tests/updates.rs @@ -7,69 +7,49 @@ // - test that an unknown artifact returns 404, not 500 // - tests around target names and artifact names that contain dangerous paths like `../` -use async_trait::async_trait; -use camino_tempfile::Utf8TempDir; -use chrono::{Duration, Utc}; +use anyhow::{ensure, Context, Result}; +use camino::Utf8Path; +use camino_tempfile::{Builder, Utf8TempDir, Utf8TempPath}; +use clap::Parser; use dropshot::test_util::LogContext; -use dropshot::{ - endpoint, ApiDescription, HttpError, HttpServerStarter, Path, - RequestContext, -}; -use http::{Method, Response, StatusCode}; -use hyper::Body; +use http::{Method, StatusCode}; use nexus_test_utils::http_testing::{AuthnMode, NexusRequest, RequestBuilder}; use nexus_test_utils::{load_test_config, test_setup, test_setup_with_config}; +use omicron_common::api::external::{ + SemverVersion, TufRepoGetResponse, TufRepoInsertResponse, + TufRepoInsertStatus, +}; use omicron_common::api::internal::nexus::KnownArtifactKind; use omicron_common::nexus_config::UpdatesConfig; -use omicron_common::update::{Artifact, ArtifactKind, ArtifactsDocument}; use omicron_sled_agent::sim; -use ring::pkcs8::Document; -use ring::rand::{SecureRandom, SystemRandom}; -use ring::signature::Ed25519KeyPair; -use schemars::JsonSchema; +use pretty_assertions::assert_eq; use serde::Deserialize; -use std::collections::HashMap; -use std::convert::TryInto; -use std::fmt::{self, Debug}; +use std::fmt::Debug; use std::fs::File; use std::io::Write; -use std::num::NonZeroU64; -use std::path::PathBuf; -use tempfile::{NamedTempFile, TempDir}; -use tough::editor::signed::{PathExists, SignedRole}; -use tough::editor::RepositoryEditor; -use tough::key_source::KeySource; -use tough::schema::{KeyHolder, RoleKeys, RoleType, Root}; -use tough::sign::Sign; +use tufaceous_lib::assemble::{DeserializedManifest, ManifestTweak}; -const UPDATE_COMPONENT: &'static str = "omicron-test-component"; +const FAKE_MANIFEST_PATH: &'static str = "../tufaceous/manifests/fake.toml"; -#[tokio::test] -async fn test_update_end_to_end() { +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn test_update_uninitialized() -> Result<()> { let mut config = load_test_config(); - let logctx = LogContext::new("test_update_end_to_end", &config.pkg.log); + let logctx = LogContext::new("test_update_uninitialized", &config.pkg.log); + + // Build a fake TUF repo + let temp_dir = Utf8TempDir::new()?; + let archive_path = temp_dir.path().join("archive.zip"); + + let args = tufaceous::Args::try_parse_from([ + "tufaceous", + "assemble", + FAKE_MANIFEST_PATH, + archive_path.as_str(), + ]) + .context("error parsing args")?; + + args.exec(&logctx.log).await.context("error executing assemble command")?; - // build the TUF repo - let rng = SystemRandom::new(); - let tuf_repo = new_tuf_repo(&rng).await; - slog::info!(logctx.log, "TUF repo created at {}", tuf_repo.path()); - - // serve it over HTTP - let dropshot_config = Default::default(); - let mut api = ApiDescription::new(); - api.register(static_content).unwrap(); - let context = FileServerContext { base: tuf_repo.path().to_owned().into() }; - let server = - HttpServerStarter::new(&dropshot_config, api, context, &logctx.log) - .unwrap() - .start(); - let local_addr = server.local_addr(); - - // stand up the test environment - config.pkg.updates = Some(UpdatesConfig { - trusted_root: tuf_repo.path().join("metadata").join("1.root.json"), - default_base_url: format!("http://{}/", local_addr), - }); let cptestctx = test_setup_with_config::( "test_update_end_to_end", &mut config, @@ -79,212 +59,304 @@ async fn test_update_end_to_end() { .await; let client = &cptestctx.external_client; - // call /v1/system/update/refresh on nexus - // - download and verify the repo - // - return 204 Non Content - // - tells sled agent to do the thing - NexusRequest::new( - RequestBuilder::new(client, Method::POST, "/v1/system/update/refresh") - .expect_status(Some(StatusCode::NO_CONTENT)), - ) - .authn_as(AuthnMode::PrivilegedUser) - .execute() - .await - .unwrap(); + // Attempt to upload the repository to Nexus. This should fail with a 500 + // error because the updates system is not configured. + { + make_upload_request( + client, + &archive_path, + StatusCode::INTERNAL_SERVER_ERROR, + ) + .execute() + .await + .context("repository upload should have failed with 500 error")?; + } - let artifact_path = cptestctx.sled_agent_storage.path(); - let component_path = artifact_path.join(UPDATE_COMPONENT); - // check sled agent did the thing - assert_eq!(tokio::fs::read(component_path).await.unwrap(), TARGET_CONTENTS); + // Attempt to fetch a repository description from Nexus. This should also + // fail with a 500 error. + { + make_get_request( + client, + "1.0.0".parse().unwrap(), + StatusCode::INTERNAL_SERVER_ERROR, + ) + .execute() + .await + .context("repository fetch should have failed with 500 error")?; + } - server.close().await.expect("failed to shut down dropshot server"); cptestctx.teardown().await; logctx.cleanup_successful(); + + Ok(()) } -// =^..^= =^..^= =^..^= =^..^= =^..^= =^..^= =^..^= =^..^= +#[tokio::test(flavor = "multi_thread", worker_threads = 2)] +async fn test_update_end_to_end() -> Result<()> { + let mut config = load_test_config(); + config.pkg.updates = Some(UpdatesConfig { + // XXX: This is currently not used by the update system, but + // trusted_root will become meaningful in the future. + trusted_root: "does-not-exist.json".into(), + }); + let logctx = LogContext::new("test_update_end_to_end", &config.pkg.log); -struct FileServerContext { - base: PathBuf, -} + // Build a fake TUF repo + let temp_dir = Utf8TempDir::new()?; + let archive_path = temp_dir.path().join("archive.zip"); -#[derive(Deserialize, JsonSchema)] -struct AllPath { - path: Vec, -} + let args = tufaceous::Args::try_parse_from([ + "tufaceous", + "assemble", + FAKE_MANIFEST_PATH, + archive_path.as_str(), + ]) + .context("error parsing args")?; -#[endpoint(method = GET, path = "/{path:.*}", unpublished = true)] -async fn static_content( - rqctx: RequestContext, - path: Path, -) -> Result, HttpError> { - // NOTE: this is a particularly brief and bad implementation of this to keep the test shorter. - // see https://github.com/oxidecomputer/dropshot/blob/main/dropshot/examples/file_server.rs for - // something more robust! - let mut fs_path = rqctx.context().base.clone(); - for component in path.into_inner().path { - fs_path.push(component); - } - let body = tokio::fs::read(fs_path).await.map_err(|e| { - // tough 0.15+ depend on ENOENT being translated into 404. - if e.kind() == std::io::ErrorKind::NotFound { - HttpError::for_not_found(None, e.to_string()) - } else { - HttpError::for_bad_request(None, e.to_string()) - } - })?; - Ok(Response::builder().status(StatusCode::OK).body(body.into())?) -} + args.exec(&logctx.log).await.context("error executing assemble command")?; -// =^..^= =^..^= =^..^= =^..^= =^..^= =^..^= =^..^= =^..^= + let cptestctx = test_setup_with_config::( + "test_update_end_to_end", + &mut config, + sim::SimMode::Explicit, + None, + ) + .await; + let client = &cptestctx.external_client; -const TARGET_CONTENTS: &[u8] = b"hello world".as_slice(); - -async fn new_tuf_repo(rng: &(dyn SecureRandom + Sync)) -> Utf8TempDir { - let version = - NonZeroU64::new(Utc::now().timestamp().try_into().unwrap()).unwrap(); - let expires = Utc::now() + Duration::minutes(5); - - // create the key - let key_data = Ed25519KeyPair::generate_pkcs8(rng).unwrap(); - let key = Ed25519KeyPair::from_pkcs8(key_data.as_ref()).unwrap(); - let tuf_key = key.tuf_key(); - let key_id = tuf_key.key_id().unwrap(); - - // create the root role - let mut root = Root { - spec_version: "1.0.0".to_string(), - consistent_snapshot: true, - version: NonZeroU64::new(1).unwrap(), - expires, - keys: HashMap::new(), - roles: HashMap::new(), - _extra: HashMap::new(), + // Upload the repository to Nexus. + let mut initial_description = { + let response = + make_upload_request(client, &archive_path, StatusCode::OK) + .execute() + .await + .context("error uploading repository")?; + + let response = + serde_json::from_slice::(&response.body) + .context("error deserializing response body")?; + assert_eq!(response.status, TufRepoInsertStatus::Inserted); + response.recorded }; - root.keys.insert(key_id.clone(), tuf_key); - for role in [ - RoleType::Root, - RoleType::Snapshot, - RoleType::Targets, - RoleType::Timestamp, - ] { - root.roles.insert( - role, - RoleKeys { - keyids: vec![key_id.clone()], - threshold: NonZeroU64::new(1).unwrap(), - _extra: HashMap::new(), - }, - ); - } - - let signing_keys = - vec![Box::new(KeyKeySource(key_data)) as Box]; - // self-sign the root role - let signed_root = SignedRole::new( - root.clone(), - &KeyHolder::Root(root), - &signing_keys, - rng, - ) - .await - .unwrap(); + // Upload the repository to Nexus again. This should return a 200 with an + // `AlreadyExists` status. + let mut reupload_description = { + let response = + make_upload_request(client, &archive_path, StatusCode::OK) + .execute() + .await + .context("error uploading repository a second time")?; + + let response = + serde_json::from_slice::(&response.body) + .context("error deserializing response body")?; + assert_eq!(response.status, TufRepoInsertStatus::AlreadyExists); + response.recorded + }; - // TODO(iliana): there's no way to create a `RepositoryEditor` without having the root.json on - // disk. this is really unergonomic. write and upstream a fix - let mut root_tmp = NamedTempFile::new().unwrap(); - root_tmp.as_file_mut().write_all(signed_root.buffer()).unwrap(); - let mut editor = RepositoryEditor::new(&root_tmp).await.unwrap(); - root_tmp.close().unwrap(); - - editor - .targets_version(version) - .unwrap() - .targets_expires(expires) - .unwrap() - .snapshot_version(version) - .snapshot_expires(expires) - .timestamp_version(version) - .timestamp_expires(expires); - let (targets_dir, target_names) = generate_targets(); - for target in target_names { - editor.add_target_path(targets_dir.path().join(target)).await.unwrap(); - } + initial_description.sort_artifacts(); + reupload_description.sort_artifacts(); - let signed_repo = editor.sign(&signing_keys).await.unwrap(); + assert_eq!( + initial_description, reupload_description, + "initial description matches reupload" + ); - let repo = Utf8TempDir::new().unwrap(); - signed_repo.write(repo.path().join("metadata")).await.unwrap(); - signed_repo - .copy_targets( - targets_dir, - repo.path().join("targets"), - PathExists::Fail, + // Now get the repository that was just uploaded. + let mut get_description = { + let response = make_get_request( + client, + "1.0.0".parse().unwrap(), // this is the system version of the fake manifest + StatusCode::OK, ) + .execute() .await - .unwrap(); - - repo -} + .context("error fetching repository")?; -// Returns a temporary directory of targets and the list of filenames in it. -fn generate_targets() -> (TempDir, Vec<&'static str>) { - let dir = TempDir::new().unwrap(); - - // The update artifact. This will someday be a tarball of some variety. - std::fs::write( - dir.path().join(format!("{UPDATE_COMPONENT}-1")), - TARGET_CONTENTS, - ) - .unwrap(); - - // artifacts.json, which describes all available artifacts. - let artifacts = ArtifactsDocument { - system_version: "1.0.0".parse().unwrap(), - artifacts: vec![Artifact { - name: UPDATE_COMPONENT.into(), - version: "0.0.0".parse().unwrap(), - kind: ArtifactKind::from_known(KnownArtifactKind::ControlPlane), - target: format!("{UPDATE_COMPONENT}-1"), - }], + let response = + serde_json::from_slice::(&response.body) + .context("error deserializing response body")?; + response.description }; - let f = File::create(dir.path().join("artifacts.json")).unwrap(); - serde_json::to_writer_pretty(f, &artifacts).unwrap(); - (dir, vec!["omicron-test-component-1", "artifacts.json"]) -} + get_description.sort_artifacts(); -// =^..^= =^..^= =^..^= =^..^= =^..^= =^..^= =^..^= =^..^= + assert_eq!( + initial_description, get_description, + "initial description matches fetched description" + ); -// Wrapper struct so that we can use an in-memory key as a key source. -// TODO(iliana): this should just be in tough with a lot less hacks -struct KeyKeySource(Document); + // TODO: attempt to download extracted artifacts. -impl Debug for KeyKeySource { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - f.debug_struct("KeyKeySource").finish() + // Upload a new repository with the same system version but a different + // version for one of the components. This will produce a different hash, + // which should return an error. + { + let tweaks = &[ManifestTweak::ArtifactVersion { + kind: KnownArtifactKind::GimletSp, + version: "2.0.0".parse().unwrap(), + }]; + let archive_path = + make_tweaked_archive(&logctx.log, &temp_dir, tweaks).await?; + + let response = make_upload_request( + client, + &archive_path, + StatusCode::CONFLICT, + ) + .execute() + .await + .context( + "error uploading repository with different artifact version \ + but same system version", + )?; + assert_error_message_contains( + &response.body, + "Uploaded repository with system version 1.0.0 has SHA256 hash", + )?; } -} -#[async_trait] -impl KeySource for KeyKeySource { - async fn as_sign( - &self, - ) -> Result, Box> + // Upload a new repository with a different system version and different + // contents (but same version) for an artifact. { - // this is a really ugly hack, because tough doesn't `impl Sign for &'a T where T: Sign`. - // awslabs/tough#446 - Ok(Box::new(Ed25519KeyPair::from_pkcs8(self.0.as_ref()).unwrap())) + let tweaks = &[ + ManifestTweak::SystemVersion("2.0.0".parse().unwrap()), + ManifestTweak::ArtifactContents { + kind: KnownArtifactKind::ControlPlane, + size_delta: 1024, + }, + ]; + let archive_path = + make_tweaked_archive(&logctx.log, &temp_dir, tweaks).await?; + + let response = + make_upload_request(client, &archive_path, StatusCode::CONFLICT) + .execute() + .await + .context( + "error uploading repository with artifact \ + containing different hash for same version", + )?; + assert_error_message_contains( + &response.body, + "Uploaded artifacts don't match existing artifacts with same IDs:", + )?; } - async fn write( - &self, - _value: &str, - _key_id_hex: &str, - ) -> Result<(), Box> { - unimplemented!(); + // Upload a new repository with a different system version but no other + // changes. This should be accepted. + { + let tweaks = &[ManifestTweak::SystemVersion("2.0.0".parse().unwrap())]; + let archive_path = + make_tweaked_archive(&logctx.log, &temp_dir, tweaks).await?; + + let response = + make_upload_request(client, &archive_path, StatusCode::OK) + .execute() + .await + .context("error uploading repository with different system version (should succeed)")?; + + let response = + serde_json::from_slice::(&response.body) + .context("error deserializing response body")?; + assert_eq!(response.status, TufRepoInsertStatus::Inserted); } + + cptestctx.teardown().await; + logctx.cleanup_successful(); + + Ok(()) +} + +async fn make_tweaked_archive( + log: &slog::Logger, + temp_dir: &Utf8TempDir, + tweaks: &[ManifestTweak], +) -> anyhow::Result { + let manifest = DeserializedManifest::tweaked_fake(tweaks); + let manifest_path = temp_dir.path().join("fake2.toml"); + let mut manifest_file = + File::create(&manifest_path).context("error creating manifest file")?; + let manifest_to_toml = manifest.to_toml()?; + manifest_file.write_all(manifest_to_toml.as_bytes())?; + + let archive_path = Builder::new() + .prefix("archive") + .suffix(".zip") + .tempfile_in(temp_dir.path()) + .context("error creating temp file for tweaked archive")? + .into_temp_path(); + + let args = tufaceous::Args::try_parse_from([ + "tufaceous", + "assemble", + manifest_path.as_str(), + archive_path.as_str(), + ]) + .context("error parsing args")?; + + args.exec(log).await.context("error executing assemble command")?; + + Ok(archive_path) +} + +fn make_upload_request<'a>( + client: &'a dropshot::test_util::ClientTestContext, + archive_path: &'a Utf8Path, + expected_status: StatusCode, +) -> NexusRequest<'a> { + let file_name = + archive_path.file_name().expect("archive_path must have a file name"); + let request = NexusRequest::new( + RequestBuilder::new( + client, + Method::PUT, + &format!("/v1/system/update/repository?file_name={}", file_name), + ) + .body_file(Some(archive_path)) + .expect_status(Some(expected_status)), + ) + .authn_as(AuthnMode::PrivilegedUser); + request +} + +fn make_get_request( + client: &dropshot::test_util::ClientTestContext, + system_version: SemverVersion, + expected_status: StatusCode, +) -> NexusRequest<'_> { + let request = NexusRequest::new( + RequestBuilder::new( + client, + Method::GET, + &format!("/v1/system/update/repository/{system_version}"), + ) + .expect_status(Some(expected_status)), + ) + .authn_as(AuthnMode::PrivilegedUser); + request +} + +#[derive(Debug, Deserialize)] +struct ErrorBody { + message: String, +} + +// XXX: maybe replace this with a more detailed error code +fn assert_error_message_contains( + body: &[u8], + needle: &str, +) -> anyhow::Result<()> { + let body: ErrorBody = + serde_json::from_slice(body).context("body is not valid JSON")?; + ensure!( + body.message.contains(needle), + "expected body to contain {:?}, but it was {:?}", + needle, + body + ); + Ok(()) } // =^..^= =^..^= =^..^= =^..^= =^..^= =^..^= =^..^= =^..^= diff --git a/nexus/tests/output/unexpected-authz-endpoints.txt b/nexus/tests/output/unexpected-authz-endpoints.txt index 1cd87a75e5..e8bb60224a 100644 --- a/nexus/tests/output/unexpected-authz-endpoints.txt +++ b/nexus/tests/output/unexpected-authz-endpoints.txt @@ -9,13 +9,5 @@ POST "/v1/vpc-router-routes?project=demo-project&vpc=demo-vpc&router=demo-vpc- GET "/v1/vpc-router-routes/demo-router-route?project=demo-project&vpc=demo-vpc&router=demo-vpc-router" PUT "/v1/vpc-router-routes/demo-router-route?project=demo-project&vpc=demo-vpc&router=demo-vpc-router" DELETE "/v1/vpc-router-routes/demo-router-route?project=demo-project&vpc=demo-vpc&router=demo-vpc-router" -POST "/v1/system/update/refresh" -GET "/v1/system/update/version" -GET "/v1/system/update/components" -GET "/v1/system/update/updates" -GET "/v1/system/update/updates/1.0.0" -GET "/v1/system/update/updates/1.0.0/components" -POST "/v1/system/update/start" -POST "/v1/system/update/stop" -GET "/v1/system/update/deployments" -GET "/v1/system/update/deployments/120bbb6f-660a-440c-8cb7-199be202ddff" +PUT "/v1/system/update/repository?file_name=demo-repo.zip" +GET "/v1/system/update/repository/1.0.0" diff --git a/nexus/types/src/external_api/params.rs b/nexus/types/src/external_api/params.rs index 62c8224461..c32dae4df9 100644 --- a/nexus/types/src/external_api/params.rs +++ b/nexus/types/src/external_api/params.rs @@ -1960,32 +1960,17 @@ pub struct ResourceMetrics { // SYSTEM UPDATE +/// Parameters for PUT requests for `/v1/system/update/repository`. #[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] -pub struct SystemUpdatePath { - pub version: SemverVersion, +pub struct UpdatesPutRepositoryParams { + /// The name of the uploaded file. + pub file_name: String, } -#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] -pub struct SystemUpdateStart { - pub version: SemverVersion, -} - -#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] -pub struct SystemUpdateCreate { - pub version: SemverVersion, -} +/// Parameters for GET requests for `/v1/system/update/repository`. -#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] -pub struct ComponentUpdateCreate { - pub version: SemverVersion, - pub component_type: shared::UpdateableComponentType, - pub system_update_id: Uuid, -} - -#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] -pub struct UpdateableComponentCreate { - pub version: SemverVersion, +#[derive(Clone, Debug, Deserialize, JsonSchema)] +pub struct UpdatesGetRepositoryParams { + /// The version to get. pub system_version: SemverVersion, - pub component_type: shared::UpdateableComponentType, - pub device_id: String, } diff --git a/nexus/types/src/external_api/views.rs b/nexus/types/src/external_api/views.rs index 5e31be7af8..45cfe8e267 100644 --- a/nexus/types/src/external_api/views.rs +++ b/nexus/types/src/external_api/views.rs @@ -13,7 +13,7 @@ use chrono::DateTime; use chrono::Utc; use omicron_common::api::external::{ ByteCount, Digest, Error, IdentityMetadata, InstanceState, Ipv4Net, - Ipv6Net, Name, ObjectIdentity, RoleName, SemverVersion, SimpleIdentity, + Ipv6Net, Name, ObjectIdentity, RoleName, SimpleIdentity, }; use schemars::JsonSchema; use serde::{Deserialize, Serialize}; @@ -608,65 +608,6 @@ pub enum DeviceAccessTokenType { Bearer, } -// SYSTEM UPDATES - -#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq, Eq)] -pub struct VersionRange { - pub low: SemverVersion, - pub high: SemverVersion, -} - -#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq, Eq)] -#[serde(tag = "status", rename_all = "snake_case")] -pub enum UpdateStatus { - Updating, - Steady, -} - -#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq, Eq)] -pub struct SystemVersion { - pub version_range: VersionRange, - pub status: UpdateStatus, - // TODO: time_released? time_last_applied? I got a fever and the only - // prescription is more timestamps -} - -#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] -pub struct SystemUpdate { - #[serde(flatten)] - pub identity: AssetIdentityMetadata, - pub version: SemverVersion, -} - -#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] -pub struct ComponentUpdate { - #[serde(flatten)] - pub identity: AssetIdentityMetadata, - - pub component_type: shared::UpdateableComponentType, - pub version: SemverVersion, -} - -#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] -pub struct UpdateableComponent { - #[serde(flatten)] - pub identity: AssetIdentityMetadata, - - pub device_id: String, - pub component_type: shared::UpdateableComponentType, - pub version: SemverVersion, - pub system_version: SemverVersion, - pub status: UpdateStatus, -} - -#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] -pub struct UpdateDeployment { - #[serde(flatten)] - pub identity: AssetIdentityMetadata, - pub version: SemverVersion, - pub status: UpdateStatus, -} - // SYSTEM HEALTH #[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize, JsonSchema)] diff --git a/openapi/nexus-internal.json b/openapi/nexus-internal.json index b5cbb25c66..2a047068ee 100644 --- a/openapi/nexus-internal.json +++ b/openapi/nexus-internal.json @@ -18,10 +18,10 @@ { "in": "path", "name": "kind", - "description": "The kind of update artifact this is.", + "description": "The kind of artifact this is.", "required": true, "schema": { - "$ref": "#/components/schemas/KnownArtifactKind" + "type": "string" } }, { @@ -6534,21 +6534,6 @@ "ZpoolPutResponse": { "type": "object" }, - "KnownArtifactKind": { - "description": "Kinds of update artifacts, as used by Nexus to determine what updates are available and by sled-agent to determine how to apply an update when asked.", - "type": "string", - "enum": [ - "gimlet_sp", - "gimlet_rot", - "host", - "trampoline", - "control_plane", - "psc_sp", - "psc_rot", - "switch_sp", - "switch_rot" - ] - }, "SemverVersion": { "type": "string", "pattern": "^(0|[1-9]\\d*)\\.(0|[1-9]\\d*)\\.(0|[1-9]\\d*)(?:-((?:0|[1-9]\\d*|\\d*[a-zA-Z-][0-9a-zA-Z-]*)(?:\\.(?:0|[1-9]\\d*|\\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?(?:\\+([0-9a-zA-Z-]+(?:\\.[0-9a-zA-Z-]+)*))?$" diff --git a/schema/crdb/27.0.0/up01.sql b/schema/crdb/27.0.0/up01.sql new file mode 100644 index 0000000000..5b7fb4df93 --- /dev/null +++ b/schema/crdb/27.0.0/up01.sql @@ -0,0 +1 @@ +DROP TABLE IF EXISTS omicron.public.update_deployment; diff --git a/schema/crdb/27.0.0/up02.sql b/schema/crdb/27.0.0/up02.sql new file mode 100644 index 0000000000..a6ab82583d --- /dev/null +++ b/schema/crdb/27.0.0/up02.sql @@ -0,0 +1 @@ +DROP TABLE IF EXISTS omicron.public.updateable_component; diff --git a/schema/crdb/27.0.0/up03.sql b/schema/crdb/27.0.0/up03.sql new file mode 100644 index 0000000000..8a9b89bd5c --- /dev/null +++ b/schema/crdb/27.0.0/up03.sql @@ -0,0 +1 @@ +DROP TABLE IF EXISTS omicron.public.system_update_component_update; diff --git a/schema/crdb/27.0.0/up04.sql b/schema/crdb/27.0.0/up04.sql new file mode 100644 index 0000000000..9fb8d61a1e --- /dev/null +++ b/schema/crdb/27.0.0/up04.sql @@ -0,0 +1 @@ +DROP TABLE IF EXISTS omicron.public.component_update; diff --git a/schema/crdb/27.0.0/up05.sql b/schema/crdb/27.0.0/up05.sql new file mode 100644 index 0000000000..bb76e717ab --- /dev/null +++ b/schema/crdb/27.0.0/up05.sql @@ -0,0 +1 @@ +DROP TYPE IF EXISTS omicron.public.updateable_component_type; diff --git a/schema/crdb/27.0.0/up06.sql b/schema/crdb/27.0.0/up06.sql new file mode 100644 index 0000000000..a68d6595bb --- /dev/null +++ b/schema/crdb/27.0.0/up06.sql @@ -0,0 +1 @@ +DROP TABLE IF EXISTS omicron.public.system_update; diff --git a/schema/crdb/27.0.0/up07.sql b/schema/crdb/27.0.0/up07.sql new file mode 100644 index 0000000000..ddcbbbb8fd --- /dev/null +++ b/schema/crdb/27.0.0/up07.sql @@ -0,0 +1 @@ +DROP TABLE IF EXISTS omicron.public.update_artifact; diff --git a/schema/crdb/27.0.0/up08.sql b/schema/crdb/27.0.0/up08.sql new file mode 100644 index 0000000000..75a15dc817 --- /dev/null +++ b/schema/crdb/27.0.0/up08.sql @@ -0,0 +1 @@ +DROP TYPE IF EXISTS omicron.public.update_artifact_kind; diff --git a/schema/crdb/27.0.0/up09.sql b/schema/crdb/27.0.0/up09.sql new file mode 100644 index 0000000000..984aff57de --- /dev/null +++ b/schema/crdb/27.0.0/up09.sql @@ -0,0 +1 @@ +DROP TYPE IF EXISTS omicron.public.update_status; diff --git a/schema/crdb/27.0.0/up10.sql b/schema/crdb/27.0.0/up10.sql new file mode 100644 index 0000000000..ddb13ca1c0 --- /dev/null +++ b/schema/crdb/27.0.0/up10.sql @@ -0,0 +1,33 @@ +-- Describes a single uploaded TUF repo. +-- +-- Identified by both a random uuid and its SHA256 hash. The hash could be the +-- primary key, but it seems unnecessarily large and unwieldy. +CREATE TABLE IF NOT EXISTS omicron.public.tuf_repo ( + id UUID PRIMARY KEY, + time_created TIMESTAMPTZ NOT NULL, + + sha256 STRING(64) NOT NULL, + + -- The version of the targets.json role that was used to generate the repo. + targets_role_version INT NOT NULL, + + -- The valid_until time for the repo. + valid_until TIMESTAMPTZ NOT NULL, + + -- The system version described in the TUF repo. + -- + -- This is the "true" primary key, but is not treated as such in the + -- database because we may want to change this format in the future. + -- Re-doing primary keys is annoying. + -- + -- Because the system version is embedded in the repo's artifacts.json, + -- each system version is associated with exactly one checksum. + system_version STRING(64) NOT NULL, + + -- For debugging only: + -- Filename provided by the user. + file_name TEXT NOT NULL, + + CONSTRAINT unique_checksum UNIQUE (sha256), + CONSTRAINT unique_system_version UNIQUE (system_version) +); diff --git a/schema/crdb/27.0.0/up11.sql b/schema/crdb/27.0.0/up11.sql new file mode 100644 index 0000000000..e0e36a51d7 --- /dev/null +++ b/schema/crdb/27.0.0/up11.sql @@ -0,0 +1,23 @@ +-- Describes an individual artifact from an uploaded TUF repo. +-- +-- In the future, this may also be used to describe artifacts that are fetched +-- from a remote TUF repo, but that requires some additional design work. +CREATE TABLE IF NOT EXISTS omicron.public.tuf_artifact ( + name STRING(63) NOT NULL, + version STRING(63) NOT NULL, + -- This used to be an enum but is now a string, because it can represent + -- artifact kinds currently unknown to a particular version of Nexus as + -- well. + kind STRING(63) NOT NULL, + + -- The time this artifact was first recorded. + time_created TIMESTAMPTZ NOT NULL, + + -- The SHA256 hash of the artifact, typically obtained from the TUF + -- targets.json (and validated at extract time). + sha256 STRING(64) NOT NULL, + -- The length of the artifact, in bytes. + artifact_size INT8 NOT NULL, + + PRIMARY KEY (name, version, kind) +); diff --git a/schema/crdb/27.0.0/up12.sql b/schema/crdb/27.0.0/up12.sql new file mode 100644 index 0000000000..9c1ffb0de4 --- /dev/null +++ b/schema/crdb/27.0.0/up12.sql @@ -0,0 +1,21 @@ +-- Reflects that a particular artifact was provided by a particular TUF repo. +-- This is a many-many mapping. +CREATE TABLE IF NOT EXISTS omicron.public.tuf_repo_artifact ( + tuf_repo_id UUID NOT NULL, + tuf_artifact_name STRING(63) NOT NULL, + tuf_artifact_version STRING(63) NOT NULL, + tuf_artifact_kind STRING(63) NOT NULL, + + /* + For the primary key, this definition uses the natural key rather than a + smaller surrogate key (UUID). That's because with CockroachDB the most + important factor in selecting a primary key is the ability to distribute + well. In this case, the first element of the primary key is the tuf_repo_id, + which is a random UUID. + + For more, see https://www.cockroachlabs.com/blog/how-to-choose-a-primary-key/. + */ + PRIMARY KEY ( + tuf_repo_id, tuf_artifact_name, tuf_artifact_version, tuf_artifact_kind + ) +); diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index 79a43d3c89..c91bb669a9 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -1955,184 +1955,84 @@ CREATE INDEX IF NOT EXISTS lookup_console_by_silo_user ON omicron.public.console /*******************************************************************/ -CREATE TYPE IF NOT EXISTS omicron.public.update_artifact_kind AS ENUM ( - -- Sled artifacts - 'gimlet_sp', - 'gimlet_rot', - 'host', - 'trampoline', - 'control_plane', - - -- PSC artifacts - 'psc_sp', - 'psc_rot', - - -- Switch artifacts - 'switch_sp', - 'switch_rot' -); - -CREATE TABLE IF NOT EXISTS omicron.public.update_artifact ( - name STRING(63) NOT NULL, - version STRING(63) NOT NULL, - kind omicron.public.update_artifact_kind NOT NULL, - - /* the version of the targets.json role this came from */ - targets_role_version INT NOT NULL, - - /* when the metadata this artifact was cached from expires */ - valid_until TIMESTAMPTZ NOT NULL, - - /* data about the target from the targets.json role */ - target_name STRING(512) NOT NULL, - target_sha256 STRING(64) NOT NULL, - target_length INT NOT NULL, - - PRIMARY KEY (name, version, kind) -); - -/* This index is used to quickly find outdated artifacts. */ -CREATE INDEX IF NOT EXISTS lookup_artifact_by_targets_role_version ON omicron.public.update_artifact ( - targets_role_version -); - -/* - * System updates - */ -CREATE TABLE IF NOT EXISTS omicron.public.system_update ( - /* Identity metadata (asset) */ - id UUID PRIMARY KEY, - time_created TIMESTAMPTZ NOT NULL, - time_modified TIMESTAMPTZ NOT NULL, - - -- Because the version is unique, it could be the PK, but that would make - -- this resource different from every other resource for little benefit. - - -- Unique semver version - version STRING(64) NOT NULL -- TODO: length -); - -CREATE UNIQUE INDEX IF NOT EXISTS lookup_update_by_version ON omicron.public.system_update ( - version -); - - -CREATE TYPE IF NOT EXISTS omicron.public.updateable_component_type AS ENUM ( - 'bootloader_for_rot', - 'bootloader_for_sp', - 'bootloader_for_host_proc', - 'hubris_for_psc_rot', - 'hubris_for_psc_sp', - 'hubris_for_sidecar_rot', - 'hubris_for_sidecar_sp', - 'hubris_for_gimlet_rot', - 'hubris_for_gimlet_sp', - 'helios_host_phase_1', - 'helios_host_phase_2', - 'host_omicron' -); - -/* - * Component updates. Associated with at least one system_update through - * system_update_component_update. - */ -CREATE TABLE IF NOT EXISTS omicron.public.component_update ( - /* Identity metadata (asset) */ +-- Describes a single uploaded TUF repo. +-- +-- Identified by both a random uuid and its SHA256 hash. The hash could be the +-- primary key, but it seems unnecessarily large and unwieldy. +CREATE TABLE IF NOT EXISTS omicron.public.tuf_repo ( id UUID PRIMARY KEY, time_created TIMESTAMPTZ NOT NULL, - time_modified TIMESTAMPTZ NOT NULL, - -- On component updates there's no device ID because the update can apply to - -- multiple instances of a given device kind + sha256 STRING(64) NOT NULL, - -- The *system* update version associated with this version (this is confusing, will rename) - version STRING(64) NOT NULL, -- TODO: length - -- TODO: add component update version to component_update + -- The version of the targets.json role that was used to generate the repo. + targets_role_version INT NOT NULL, - component_type omicron.public.updateable_component_type NOT NULL -); + -- The valid_until time for the repo. + valid_until TIMESTAMPTZ NOT NULL, --- version is unique per component type -CREATE UNIQUE INDEX IF NOT EXISTS lookup_component_by_type_and_version ON omicron.public.component_update ( - component_type, version -); + -- The system version described in the TUF repo. + -- + -- This is the "true" primary key, but is not treated as such in the + -- database because we may want to change this format in the future. + -- Re-doing primary keys is annoying. + -- + -- Because the system version is embedded in the repo's artifacts.json, + -- each system version is associated with exactly one checksum. + system_version STRING(64) NOT NULL, -/* - * Associate system updates with component updates. Not done with a - * system_update_id field on component_update because the same component update - * may be part of more than one system update. - */ -CREATE TABLE IF NOT EXISTS omicron.public.system_update_component_update ( - system_update_id UUID NOT NULL, - component_update_id UUID NOT NULL, + -- For debugging only: + -- Filename provided by the user. + file_name TEXT NOT NULL, - PRIMARY KEY (system_update_id, component_update_id) + CONSTRAINT unique_checksum UNIQUE (sha256), + CONSTRAINT unique_system_version UNIQUE (system_version) ); --- For now, the plan is to treat stopped, failed, completed as sub-cases of --- "steady" described by a "reason". But reason is not implemented yet. --- Obviously this could be a boolean, but boolean status fields never stay --- boolean for long. -CREATE TYPE IF NOT EXISTS omicron.public.update_status AS ENUM ( - 'updating', - 'steady' -); +-- Describes an individual artifact from an uploaded TUF repo. +-- +-- In the future, this may also be used to describe artifacts that are fetched +-- from a remote TUF repo, but that requires some additional design work. +CREATE TABLE IF NOT EXISTS omicron.public.tuf_artifact ( + name STRING(63) NOT NULL, + version STRING(63) NOT NULL, + -- This used to be an enum but is now a string, because it can represent + -- artifact kinds currently unknown to a particular version of Nexus as + -- well. + kind STRING(63) NOT NULL, -/* - * Updateable components and their update status - */ -CREATE TABLE IF NOT EXISTS omicron.public.updateable_component ( - /* Identity metadata (asset) */ - id UUID PRIMARY KEY, + -- The time this artifact was first recorded. time_created TIMESTAMPTZ NOT NULL, - time_modified TIMESTAMPTZ NOT NULL, - - -- Free-form string that comes from the device - device_id STRING(40) NOT NULL, - - component_type omicron.public.updateable_component_type NOT NULL, - - -- The semver version of this component's own software - version STRING(64) NOT NULL, -- TODO: length - -- The version of the system update this component's software came from. - -- This may need to be nullable if we are registering components before we - -- know about system versions at all - system_version STRING(64) NOT NULL, -- TODO: length + -- The SHA256 hash of the artifact, typically obtained from the TUF + -- targets.json (and validated at extract time). + sha256 STRING(64) NOT NULL, + -- The length of the artifact, in bytes. + artifact_size INT8 NOT NULL, - status omicron.public.update_status NOT NULL - -- TODO: status reason for updateable_component -); - --- can't have two components of the same type with the same device ID -CREATE UNIQUE INDEX IF NOT EXISTS lookup_component_by_type_and_device ON omicron.public.updateable_component ( - component_type, device_id -); - -CREATE INDEX IF NOT EXISTS lookup_component_by_system_version ON omicron.public.updateable_component ( - system_version + PRIMARY KEY (name, version, kind) ); -/* - * System updates - */ -CREATE TABLE IF NOT EXISTS omicron.public.update_deployment ( - /* Identity metadata (asset) */ - id UUID PRIMARY KEY, - time_created TIMESTAMPTZ NOT NULL, - time_modified TIMESTAMPTZ NOT NULL, - - -- semver version of corresponding system update - -- TODO: this makes sense while version is the PK of system_update, but - -- if/when I change that back to ID, this needs to be the ID too - version STRING(64) NOT NULL, +-- Reflects that a particular artifact was provided by a particular TUF repo. +-- This is a many-many mapping. +CREATE TABLE IF NOT EXISTS omicron.public.tuf_repo_artifact ( + tuf_repo_id UUID NOT NULL, + tuf_artifact_name STRING(63) NOT NULL, + tuf_artifact_version STRING(63) NOT NULL, + tuf_artifact_kind STRING(63) NOT NULL, - status omicron.public.update_status NOT NULL - -- TODO: status reason for update_deployment -); - -CREATE INDEX IF NOT EXISTS lookup_deployment_by_creation on omicron.public.update_deployment ( - time_created + /* + For the primary key, this definition uses the natural key rather than a + smaller surrogate key (UUID). That's because with CockroachDB the most + important factor in selecting a primary key is the ability to distribute + well. In this case, the first element of the primary key is the tuf_repo_id, + which is a random UUID. + + For more, see https://www.cockroachlabs.com/blog/how-to-choose-a-primary-key/. + */ + PRIMARY KEY ( + tuf_repo_id, tuf_artifact_name, tuf_artifact_version, tuf_artifact_kind + ) ); /*******************************************************************/ @@ -3296,7 +3196,7 @@ INSERT INTO omicron.public.db_metadata ( version, target_version ) VALUES - ( TRUE, NOW(), NOW(), '26.0.0', NULL) + ( TRUE, NOW(), NOW(), '27.0.0', NULL) ON CONFLICT DO NOTHING; COMMIT; diff --git a/sled-agent/src/updates.rs b/sled-agent/src/updates.rs index 6144fd9171..13a1ec7623 100644 --- a/sled-agent/src/updates.rs +++ b/sled-agent/src/updates.rs @@ -127,7 +127,7 @@ impl UpdateManager { let response = nexus .cpapi_artifact_download( - nexus_client::types::KnownArtifactKind::ControlPlane, + &KnownArtifactKind::ControlPlane.to_string(), &artifact.name, &artifact.version.clone().into(), ) diff --git a/tufaceous-lib/src/assemble/manifest.rs b/tufaceous-lib/src/assemble/manifest.rs index 3974aa76b2..8825327c1d 100644 --- a/tufaceous-lib/src/assemble/manifest.rs +++ b/tufaceous-lib/src/assemble/manifest.rs @@ -343,10 +343,66 @@ impl DeserializedManifest { .context("error deserializing manifest") } + pub fn to_toml(&self) -> Result { + toml::to_string(self).context("error serializing manifest to TOML") + } + + /// For fake manifests, applies a set of changes to them. + /// + /// Intended for testing. + pub fn apply_tweaks(&mut self, tweaks: &[ManifestTweak]) -> Result<()> { + for tweak in tweaks { + match tweak { + ManifestTweak::SystemVersion(version) => { + self.system_version = version.clone(); + } + ManifestTweak::ArtifactVersion { kind, version } => { + let entries = + self.artifacts.get_mut(kind).with_context(|| { + format!( + "manifest does not have artifact kind \ + {kind}", + ) + })?; + for entry in entries { + entry.version = version.clone(); + } + } + ManifestTweak::ArtifactContents { kind, size_delta } => { + let entries = + self.artifacts.get_mut(kind).with_context(|| { + format!( + "manifest does not have artifact kind \ + {kind}", + ) + })?; + + for entry in entries { + entry.source.apply_size_delta(*size_delta)?; + } + } + } + } + + Ok(()) + } + /// Returns the fake manifest. pub fn fake() -> Self { Self::from_str(FAKE_MANIFEST_TOML).unwrap() } + + /// Returns a version of the fake manifest with a set of changes applied. + /// + /// This is primarily intended for testing. + pub fn tweaked_fake(tweaks: &[ManifestTweak]) -> Self { + let mut manifest = Self::fake(); + manifest + .apply_tweaks(tweaks) + .expect("builtin fake manifest should accept all tweaks"); + + manifest + } } #[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize)] @@ -380,6 +436,39 @@ pub enum DeserializedArtifactSource { }, } +impl DeserializedArtifactSource { + fn apply_size_delta(&mut self, size_delta: i64) -> Result<()> { + match self { + DeserializedArtifactSource::File { .. } => { + bail!("cannot apply size delta to `file` source") + } + DeserializedArtifactSource::Fake { size } => { + *size = (*size).saturating_add_signed(size_delta); + Ok(()) + } + DeserializedArtifactSource::CompositeHost { phase_1, phase_2 } => { + phase_1.apply_size_delta(size_delta)?; + phase_2.apply_size_delta(size_delta)?; + Ok(()) + } + DeserializedArtifactSource::CompositeRot { + archive_a, + archive_b, + } => { + archive_a.apply_size_delta(size_delta)?; + archive_b.apply_size_delta(size_delta)?; + Ok(()) + } + DeserializedArtifactSource::CompositeControlPlane { zones } => { + for zone in zones { + zone.apply_size_delta(size_delta)?; + } + Ok(()) + } + } + } +} + #[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize)] #[serde(tag = "kind", rename_all = "snake_case")] pub enum DeserializedFileArtifactSource { @@ -416,6 +505,18 @@ impl DeserializedFileArtifactSource { let entry = CompositeEntry { data: &data, mtime_source }; f(entry) } + + fn apply_size_delta(&mut self, size_delta: i64) -> Result<()> { + match self { + DeserializedFileArtifactSource::File { .. } => { + bail!("cannot apply size delta to `file` source") + } + DeserializedFileArtifactSource::Fake { size } => { + *size = (*size).saturating_add_signed(size_delta); + Ok(()) + } + } + } } #[derive(Clone, Debug, PartialEq, Eq, Deserialize, Serialize)] @@ -459,6 +560,30 @@ impl DeserializedControlPlaneZoneSource { let entry = CompositeEntry { data: &data, mtime_source }; f(name, entry) } + + fn apply_size_delta(&mut self, size_delta: i64) -> Result<()> { + match self { + DeserializedControlPlaneZoneSource::File { .. } => { + bail!("cannot apply size delta to `file` source") + } + DeserializedControlPlaneZoneSource::Fake { size, .. } => { + (*size) = (*size).saturating_add_signed(size_delta); + Ok(()) + } + } + } +} +/// A change to apply to a manifest. +#[derive(Clone, Debug)] +pub enum ManifestTweak { + /// Update the system version. + SystemVersion(SemverVersion), + + /// Update the versions for this artifact. + ArtifactVersion { kind: KnownArtifactKind, version: SemverVersion }, + + /// Update the contents of this artifact (only support changing the size). + ArtifactContents { kind: KnownArtifactKind, size_delta: i64 }, } fn deserialize_byte_size<'de, D>(deserializer: D) -> Result diff --git a/update-common/Cargo.toml b/update-common/Cargo.toml index cc2ee86232..37542baa8f 100644 --- a/update-common/Cargo.toml +++ b/update-common/Cargo.toml @@ -9,6 +9,7 @@ anyhow.workspace = true bytes.workspace = true camino.workspace = true camino-tempfile.workspace = true +chrono.workspace = true debug-ignore.workspace = true display-error-chain.workspace = true dropshot.workspace = true diff --git a/update-common/src/artifacts/artifacts_with_plan.rs b/update-common/src/artifacts/artifacts_with_plan.rs index 9b579af29a..c2be69e82e 100644 --- a/update-common/src/artifacts/artifacts_with_plan.rs +++ b/update-common/src/artifacts/artifacts_with_plan.rs @@ -4,19 +4,28 @@ use super::ExtractedArtifactDataHandle; use super::UpdatePlan; +use super::UpdatePlanBuildOutput; use super::UpdatePlanBuilder; use crate::errors::RepositoryError; use anyhow::anyhow; +use bytes::Bytes; use camino_tempfile::Utf8TempDir; use debug_ignore::DebugIgnore; +use dropshot::HttpError; +use futures::Stream; +use futures::TryStreamExt; +use omicron_common::api::external::TufRepoDescription; +use omicron_common::api::external::TufRepoMeta; use omicron_common::update::ArtifactHash; use omicron_common::update::ArtifactHashId; use omicron_common::update::ArtifactId; +use sha2::{Digest, Sha256}; use slog::info; use slog::Logger; use std::collections::BTreeMap; use std::collections::HashMap; use std::io; +use tokio::io::AsyncWriteExt; use tough::TargetName; use tufaceous_lib::ArchiveExtractor; use tufaceous_lib::OmicronRepo; @@ -24,6 +33,9 @@ use tufaceous_lib::OmicronRepo; /// A collection of artifacts along with an update plan using those artifacts. #[derive(Debug)] pub struct ArtifactsWithPlan { + // A description of this repository. + description: TufRepoDescription, + // Map of top-level artifact IDs (present in the TUF repo) to the actual // artifacts we're serving (e.g., a top-level RoT artifact will map to two // artifact hashes: one for each of the A and B images). @@ -51,8 +63,65 @@ pub struct ArtifactsWithPlan { } impl ArtifactsWithPlan { + /// Creates a new `ArtifactsWithPlan` from the given stream of `Bytes`. + /// + /// This method reads the stream representing a TUF repo, and writes it to + /// a temporary file. Afterwards, it builds an `ArtifactsWithPlan` from the + /// contents of that file. + pub async fn from_stream( + body: impl Stream> + Send, + file_name: Option, + log: &Logger, + ) -> Result { + // Create a temporary file to store the incoming archive.`` + let tempfile = tokio::task::spawn_blocking(|| { + camino_tempfile::tempfile().map_err(RepositoryError::TempFileCreate) + }) + .await + .unwrap()?; + let mut tempfile = + tokio::io::BufWriter::new(tokio::fs::File::from_std(tempfile)); + + let mut body = std::pin::pin!(body); + + // Stream the uploaded body into our tempfile. + let mut hasher = Sha256::new(); + while let Some(bytes) = body + .try_next() + .await + .map_err(RepositoryError::ReadChunkFromStream)? + { + hasher.update(&bytes); + tempfile + .write_all(&bytes) + .await + .map_err(RepositoryError::TempFileWrite)?; + } + + let repo_hash = ArtifactHash(hasher.finalize().into()); + + // Flush writes. We don't need to seek back to the beginning of the file + // because extracting the repository will do its own seeking as a part of + // unzipping this repo. + tempfile.flush().await.map_err(RepositoryError::TempFileFlush)?; + + let tempfile = tempfile.into_inner().into_std().await; + + let artifacts_with_plan = Self::from_zip( + io::BufReader::new(tempfile), + file_name, + repo_hash, + log, + ) + .await?; + + Ok(artifacts_with_plan) + } + pub async fn from_zip( zip_data: T, + file_name: Option, + repo_hash: ArtifactHash, log: &Logger, ) -> Result where @@ -102,7 +171,7 @@ impl ArtifactsWithPlan { // `dir`, but we'll also unpack nested artifacts like the RoT dual A/B // archives. let mut builder = - UpdatePlanBuilder::new(artifacts.system_version, log)?; + UpdatePlanBuilder::new(artifacts.system_version.clone(), log)?; // Make a pass through each artifact in the repo. For each artifact, we // do one of the following: @@ -124,9 +193,7 @@ impl ArtifactsWithPlan { // priority - copying small SP artifacts is neglible compared to the // work we do to unpack host OS images. - let mut by_id = BTreeMap::new(); - let mut by_hash = HashMap::new(); - for artifact in artifacts.artifacts { + for artifact in &artifacts.artifacts { let target_name = TargetName::try_from(artifact.target.as_str()) .map_err(|error| RepositoryError::LocateTarget { target: artifact.target.clone(), @@ -167,21 +234,44 @@ impl ArtifactsWithPlan { })?; builder - .add_artifact( - artifact.into_id(), - artifact_hash, - stream, - &mut by_id, - &mut by_hash, - ) + .add_artifact(artifact.clone().into_id(), artifact_hash, stream) .await?; } // Ensure we know how to apply updates from this set of artifacts; we'll // remember the plan we create. - let artifacts = builder.build()?; + let UpdatePlanBuildOutput { plan, by_id, by_hash, artifacts_meta } = + builder.build()?; - Ok(Self { by_id, by_hash: by_hash.into(), plan: artifacts }) + let tuf_repository = repository.repo(); + + let file_name = file_name.unwrap_or_else(|| { + // Just pick a reasonable-sounding file name if we don't have one. + format!("system-update-v{}.zip", artifacts.system_version) + }); + + let repo_meta = TufRepoMeta { + hash: repo_hash, + targets_role_version: tuf_repository.targets().signed.version.get(), + valid_until: tuf_repository + .root() + .signed + .expires + .min(tuf_repository.snapshot().signed.expires) + .min(tuf_repository.targets().signed.expires) + .min(tuf_repository.timestamp().signed.expires), + system_version: artifacts.system_version, + file_name, + }; + let description = + TufRepoDescription { repo: repo_meta, artifacts: artifacts_meta }; + + Ok(Self { description, by_id, by_hash: by_hash.into(), plan }) + } + + /// Returns the `ArtifactsDocument` corresponding to this TUF repo. + pub fn description(&self) -> &TufRepoDescription { + &self.description } pub fn by_id(&self) -> &BTreeMap> { @@ -233,13 +323,14 @@ where mod tests { use super::*; use anyhow::{Context, Result}; + use camino::Utf8Path; use camino_tempfile::Utf8TempDir; use clap::Parser; use omicron_common::{ api::internal::nexus::KnownArtifactKind, update::ArtifactKind, }; use omicron_test_utils::dev::test_setup_log; - use std::collections::BTreeSet; + use std::{collections::BTreeSet, time::Duration}; /// Test that `ArtifactsWithPlan` can extract the fake repository generated /// by tufaceous. @@ -253,29 +344,22 @@ mod tests { let archive_path = temp_dir.path().join("archive.zip"); // Create the archive. - let args = tufaceous::Args::try_parse_from([ - "tufaceous", - "assemble", - "../tufaceous/manifests/fake.toml", - archive_path.as_str(), - ]) - .context("error parsing args")?; - - args.exec(&logctx.log) - .await - .context("error executing assemble command")?; + create_fake_archive(&logctx.log, &archive_path).await?; // Now check that it can be read by the archive extractor. - let zip_bytes = std::fs::File::open(&archive_path) - .context("error opening archive.zip")?; - let plan = ArtifactsWithPlan::from_zip(zip_bytes, &logctx.log) - .await - .context("error reading archive.zip")?; + let plan = + build_artifacts_with_plan(&logctx.log, &archive_path).await?; // Check that all known artifact kinds are present in the map. let by_id_kinds: BTreeSet<_> = plan.by_id().keys().map(|id| id.kind.clone()).collect(); let by_hash_kinds: BTreeSet<_> = plan.by_hash().keys().map(|id| id.kind.clone()).collect(); + let artifact_meta_kinds: BTreeSet<_> = plan + .description + .artifacts + .iter() + .map(|meta| meta.id.kind.clone()) + .collect(); // `by_id` should contain one entry for every `KnownArtifactKind`... let mut expected_kinds: BTreeSet<_> = @@ -315,6 +399,10 @@ mod tests { expected_kinds, by_hash_kinds, "expected kinds match by_hash kinds" ); + assert_eq!( + expected_kinds, artifact_meta_kinds, + "expected kinds match artifact_meta kinds" + ); // Every value present in `by_id` should also be a key in `by_hash`. for (id, hash_ids) in plan.by_id() { @@ -327,8 +415,81 @@ mod tests { } } + // + + logctx.cleanup_successful(); + + Ok(()) + } + + /// Test that the archive generated by running `tufaceous assemble` twice + /// has the same artifacts and hashes. + #[tokio::test(flavor = "multi_thread", worker_threads = 2)] + async fn test_fake_archive_idempotent() -> Result<()> { + let logctx = test_setup_log("test_fake_archive_idempotent"); + let temp_dir = Utf8TempDir::new()?; + let archive_path = temp_dir.path().join("archive1.zip"); + + // Create the archive and build a plan from it. + create_fake_archive(&logctx.log, &archive_path).await?; + let mut plan1 = + build_artifacts_with_plan(&logctx.log, &archive_path).await?; + + // Add a 2 second delay to ensure that if we bake any second-based + // timestamps in, that they end up being different from those in the + // first archive. + tokio::time::sleep(Duration::from_secs(2)).await; + + let archive2_path = temp_dir.path().join("archive2.zip"); + create_fake_archive(&logctx.log, &archive2_path).await?; + let mut plan2 = + build_artifacts_with_plan(&logctx.log, &archive2_path).await?; + + // At the moment, the repo .zip itself doesn't match because it bakes + // in timestamps. However, the artifacts inside should match exactly. + plan1.description.sort_artifacts(); + plan2.description.sort_artifacts(); + + assert_eq!( + plan1.description.artifacts, plan2.description.artifacts, + "artifacts match" + ); + logctx.cleanup_successful(); Ok(()) } + + async fn create_fake_archive( + log: &slog::Logger, + archive_path: &Utf8Path, + ) -> Result<()> { + let args = tufaceous::Args::try_parse_from([ + "tufaceous", + "assemble", + "../tufaceous/manifests/fake.toml", + archive_path.as_str(), + ]) + .context("error parsing args")?; + + args.exec(log).await.context("error executing assemble command")?; + + Ok(()) + } + + async fn build_artifacts_with_plan( + log: &slog::Logger, + archive_path: &Utf8Path, + ) -> Result { + let zip_bytes = std::fs::File::open(&archive_path) + .context("error opening archive.zip")?; + // We could also compute the hash from the file here, but the repo hash + // doesn't matter for the test. + let repo_hash = ArtifactHash([0u8; 32]); + let plan = ArtifactsWithPlan::from_zip(zip_bytes, None, repo_hash, log) + .await + .with_context(|| format!("error reading {archive_path}"))?; + + Ok(plan) + } } diff --git a/update-common/src/artifacts/extracted_artifacts.rs b/update-common/src/artifacts/extracted_artifacts.rs index 06e0e5ec65..5ac4a3a395 100644 --- a/update-common/src/artifacts/extracted_artifacts.rs +++ b/update-common/src/artifacts/extracted_artifacts.rs @@ -106,7 +106,7 @@ pub struct ExtractedArtifacts { impl ExtractedArtifacts { pub fn new(log: &Logger) -> Result { let tempdir = camino_tempfile::Builder::new() - .prefix("wicketd-update-artifacts.") + .prefix("update-artifacts.") .tempdir() .map_err(RepositoryError::TempDirCreate)?; info!( @@ -189,7 +189,7 @@ impl ExtractedArtifacts { &self, ) -> Result { let file = NamedUtf8TempFile::new_in(self.tempdir.path()).map_err( - |error| RepositoryError::TempFileCreate { + |error| RepositoryError::NamedTempFileCreate { path: self.tempdir.path().to_owned(), error, }, diff --git a/update-common/src/artifacts/update_plan.rs b/update-common/src/artifacts/update_plan.rs index 7704d5fe8a..c5b171d648 100644 --- a/update-common/src/artifacts/update_plan.rs +++ b/update-common/src/artifacts/update_plan.rs @@ -2,7 +2,8 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -//! Constructor for the `UpdatePlan` wicketd uses to drive sled mupdates. +//! Constructor for the `UpdatePlan` wicketd and Nexus use to drive sled +//! mupdates. //! //! This is a "plan" in name only: it is a strict list of which artifacts to //! apply to which components; the ordering and application of the plan lives @@ -20,6 +21,7 @@ use futures::StreamExt; use futures::TryStreamExt; use hubtools::RawHubrisArchive; use omicron_common::api::external::SemverVersion; +use omicron_common::api::external::TufArtifactMeta; use omicron_common::api::internal::nexus::KnownArtifactKind; use omicron_common::update::ArtifactHash; use omicron_common::update::ArtifactHashId; @@ -107,6 +109,11 @@ pub struct UpdatePlanBuilder<'a> { host_phase_2_hash: Option, control_plane_hash: Option, + // The by_id and by_hash maps, and metadata, used in `ArtifactsWithPlan`. + by_id: BTreeMap>, + by_hash: HashMap, + artifacts_meta: Vec, + // extra fields we use to build the plan extracted_artifacts: ExtractedArtifacts, log: &'a Logger, @@ -135,30 +142,27 @@ impl<'a> UpdatePlanBuilder<'a> { host_phase_2_hash: None, control_plane_hash: None, + by_id: BTreeMap::new(), + by_hash: HashMap::new(), + artifacts_meta: Vec::new(), + extracted_artifacts, log, }) } + /// Adds an artifact with these contents to the by_id and by_hash maps. pub async fn add_artifact( &mut self, artifact_id: ArtifactId, artifact_hash: ArtifactHash, stream: impl Stream> + Send, - by_id: &mut BTreeMap>, - by_hash: &mut HashMap, ) -> Result<(), RepositoryError> { // If we don't know this artifact kind, we'll still serve it up by hash, // but we don't do any further processing on it. let Some(artifact_kind) = artifact_id.kind.to_known() else { return self - .add_unknown_artifact( - artifact_id, - artifact_hash, - stream, - by_id, - by_hash, - ) + .add_unknown_artifact(artifact_id, artifact_hash, stream) .await; }; @@ -175,39 +179,25 @@ impl<'a> UpdatePlanBuilder<'a> { artifact_kind, artifact_hash, stream, - by_id, - by_hash, ) .await } KnownArtifactKind::GimletRot | KnownArtifactKind::PscRot | KnownArtifactKind::SwitchRot => { - self.add_rot_artifact( - artifact_id, - artifact_kind, - stream, - by_id, - by_hash, - ) - .await + self.add_rot_artifact(artifact_id, artifact_kind, stream).await } KnownArtifactKind::Host => { - self.add_host_artifact(artifact_id, stream, by_id, by_hash) + self.add_host_artifact(artifact_id, stream) + } + KnownArtifactKind::Trampoline => { + self.add_trampoline_artifact(artifact_id, stream) } - KnownArtifactKind::Trampoline => self.add_trampoline_artifact( - artifact_id, - stream, - by_id, - by_hash, - ), KnownArtifactKind::ControlPlane => { self.add_control_plane_artifact( artifact_id, artifact_hash, stream, - by_id, - by_hash, ) .await } @@ -220,8 +210,6 @@ impl<'a> UpdatePlanBuilder<'a> { artifact_kind: KnownArtifactKind, artifact_hash: ArtifactHash, stream: impl Stream> + Send, - by_id: &mut BTreeMap>, - by_hash: &mut HashMap, ) -> Result<(), RepositoryError> { let sp_map = match artifact_kind { KnownArtifactKind::GimletSp => &mut self.gimlet_sp, @@ -276,10 +264,8 @@ impl<'a> UpdatePlanBuilder<'a> { data: data.clone(), }); - record_extracted_artifact( + self.record_extracted_artifact( artifact_id, - by_id, - by_hash, data, artifact_kind.into(), self.log, @@ -293,8 +279,6 @@ impl<'a> UpdatePlanBuilder<'a> { artifact_id: ArtifactId, artifact_kind: KnownArtifactKind, stream: impl Stream> + Send, - by_id: &mut BTreeMap>, - by_hash: &mut HashMap, ) -> Result<(), RepositoryError> { let (rot_a, rot_a_kind, rot_b, rot_b_kind) = match artifact_kind { KnownArtifactKind::GimletRot => ( @@ -353,18 +337,14 @@ impl<'a> UpdatePlanBuilder<'a> { rot_a.push(ArtifactIdData { id: rot_a_id, data: rot_a_data.clone() }); rot_b.push(ArtifactIdData { id: rot_b_id, data: rot_b_data.clone() }); - record_extracted_artifact( + self.record_extracted_artifact( artifact_id.clone(), - by_id, - by_hash, rot_a_data, rot_a_kind, self.log, )?; - record_extracted_artifact( + self.record_extracted_artifact( artifact_id, - by_id, - by_hash, rot_b_data, rot_b_kind, self.log, @@ -377,8 +357,6 @@ impl<'a> UpdatePlanBuilder<'a> { &mut self, artifact_id: ArtifactId, stream: impl Stream> + Send, - by_id: &mut BTreeMap>, - by_hash: &mut HashMap, ) -> Result<(), RepositoryError> { if self.host_phase_1.is_some() || self.host_phase_2_hash.is_some() { return Err(RepositoryError::DuplicateArtifactKind( @@ -407,18 +385,14 @@ impl<'a> UpdatePlanBuilder<'a> { Some(ArtifactIdData { id: phase_1_id, data: phase_1_data.clone() }); self.host_phase_2_hash = Some(phase_2_data.hash()); - record_extracted_artifact( + self.record_extracted_artifact( artifact_id.clone(), - by_id, - by_hash, phase_1_data, ArtifactKind::HOST_PHASE_1, self.log, )?; - record_extracted_artifact( + self.record_extracted_artifact( artifact_id, - by_id, - by_hash, phase_2_data, ArtifactKind::HOST_PHASE_2, self.log, @@ -431,8 +405,6 @@ impl<'a> UpdatePlanBuilder<'a> { &mut self, artifact_id: ArtifactId, stream: impl Stream> + Send, - by_id: &mut BTreeMap>, - by_hash: &mut HashMap, ) -> Result<(), RepositoryError> { if self.trampoline_phase_1.is_some() || self.trampoline_phase_2.is_some() @@ -470,18 +442,14 @@ impl<'a> UpdatePlanBuilder<'a> { self.trampoline_phase_2 = Some(ArtifactIdData { id: phase_2_id, data: phase_2_data.clone() }); - record_extracted_artifact( + self.record_extracted_artifact( artifact_id.clone(), - by_id, - by_hash, phase_1_data, ArtifactKind::TRAMPOLINE_PHASE_1, self.log, )?; - record_extracted_artifact( + self.record_extracted_artifact( artifact_id, - by_id, - by_hash, phase_2_data, ArtifactKind::TRAMPOLINE_PHASE_2, self.log, @@ -495,8 +463,6 @@ impl<'a> UpdatePlanBuilder<'a> { artifact_id: ArtifactId, artifact_hash: ArtifactHash, stream: impl Stream> + Send, - by_id: &mut BTreeMap>, - by_hash: &mut HashMap, ) -> Result<(), RepositoryError> { if self.control_plane_hash.is_some() { return Err(RepositoryError::DuplicateArtifactKind( @@ -516,10 +482,8 @@ impl<'a> UpdatePlanBuilder<'a> { self.control_plane_hash = Some(data.hash()); - record_extracted_artifact( + self.record_extracted_artifact( artifact_id, - by_id, - by_hash, data, KnownArtifactKind::ControlPlane.into(), self.log, @@ -533,8 +497,6 @@ impl<'a> UpdatePlanBuilder<'a> { artifact_id: ArtifactId, artifact_hash: ArtifactHash, stream: impl Stream> + Send, - by_id: &mut BTreeMap>, - by_hash: &mut HashMap, ) -> Result<(), RepositoryError> { let artifact_kind = artifact_id.kind.clone(); let artifact_hash_id = @@ -543,10 +505,8 @@ impl<'a> UpdatePlanBuilder<'a> { let data = self.extracted_artifacts.store(artifact_hash_id, stream).await?; - record_extracted_artifact( + self.record_extracted_artifact( artifact_id, - by_id, - by_hash, data, artifact_kind, self.log, @@ -660,7 +620,62 @@ impl<'a> UpdatePlanBuilder<'a> { Ok((image1, image2)) } - pub fn build(self) -> Result { + // Record an artifact in `by_id` and `by_hash`, or fail if either already has an + // entry for this id/hash. + fn record_extracted_artifact( + &mut self, + tuf_repo_artifact_id: ArtifactId, + data: ExtractedArtifactDataHandle, + data_kind: ArtifactKind, + log: &Logger, + ) -> Result<(), RepositoryError> { + use std::collections::hash_map::Entry; + + let artifact_hash_id = + ArtifactHashId { kind: data_kind.clone(), hash: data.hash() }; + + let by_hash_slot = match self.by_hash.entry(artifact_hash_id) { + Entry::Occupied(slot) => { + return Err(RepositoryError::DuplicateHashEntry( + slot.key().clone(), + )); + } + Entry::Vacant(slot) => slot, + }; + + info!( + log, "added artifact"; + "name" => %tuf_repo_artifact_id.name, + "kind" => %by_hash_slot.key().kind, + "version" => %tuf_repo_artifact_id.version, + "hash" => %by_hash_slot.key().hash, + "length" => data.file_size(), + ); + + self.by_id + .entry(tuf_repo_artifact_id.clone()) + .or_default() + .push(by_hash_slot.key().clone()); + + // In the artifacts_meta document, use the expanded artifact ID + // (artifact kind = data_kind, and name and version from + // tuf_repo_artifact_id). + let artifacts_meta_id = ArtifactId { + name: tuf_repo_artifact_id.name, + version: tuf_repo_artifact_id.version, + kind: data_kind, + }; + self.artifacts_meta.push(TufArtifactMeta { + id: artifacts_meta_id, + hash: data.hash(), + size: data.file_size() as u64, + }); + by_hash_slot.insert(data); + + Ok(()) + } + + pub fn build(self) -> Result { // Ensure our multi-board-supporting kinds have at least one board // present. for (kind, no_artifacts) in [ @@ -738,7 +753,7 @@ impl<'a> UpdatePlanBuilder<'a> { } } - Ok(UpdatePlan { + let plan = UpdatePlan { system_version: self.system_version, gimlet_sp: self.gimlet_sp, // checked above gimlet_rot_a: self.gimlet_rot_a, // checked above @@ -770,10 +785,24 @@ impl<'a> UpdatePlanBuilder<'a> { KnownArtifactKind::ControlPlane, ), )?, + }; + Ok(UpdatePlanBuildOutput { + plan, + by_id: self.by_id, + by_hash: self.by_hash, + artifacts_meta: self.artifacts_meta, }) } } +/// The output of [`UpdatePlanBuilder::build`]. +pub struct UpdatePlanBuildOutput { + pub plan: UpdatePlan, + pub by_id: BTreeMap>, + pub by_hash: HashMap, + pub artifacts_meta: Vec, +} + // This function takes and returns `id` to avoid an unnecessary clone; `id` will // be present in either the Ok tuple or the error. fn read_hubris_board_from_archive( @@ -807,48 +836,6 @@ fn read_hubris_board_from_archive( Ok((id, Board(board.to_string()))) } -// Record an artifact in `by_id` and `by_hash`, or fail if either already has an -// entry for this id/hash. -fn record_extracted_artifact( - tuf_repo_artifact_id: ArtifactId, - by_id: &mut BTreeMap>, - by_hash: &mut HashMap, - data: ExtractedArtifactDataHandle, - data_kind: ArtifactKind, - log: &Logger, -) -> Result<(), RepositoryError> { - use std::collections::hash_map::Entry; - - let artifact_hash_id = - ArtifactHashId { kind: data_kind, hash: data.hash() }; - - let by_hash_slot = match by_hash.entry(artifact_hash_id) { - Entry::Occupied(slot) => { - return Err(RepositoryError::DuplicateHashEntry( - slot.key().clone(), - )); - } - Entry::Vacant(slot) => slot, - }; - - info!( - log, "added artifact"; - "name" => %tuf_repo_artifact_id.name, - "kind" => %by_hash_slot.key().kind, - "version" => %tuf_repo_artifact_id.version, - "hash" => %by_hash_slot.key().hash, - "length" => data.file_size(), - ); - - by_id - .entry(tuf_repo_artifact_id) - .or_default() - .push(by_hash_slot.key().clone()); - by_hash_slot.insert(data); - - Ok(()) -} - #[cfg(test)] mod tests { use std::collections::BTreeSet; @@ -962,13 +949,11 @@ mod tests { let logctx = test_setup_log("test_update_plan_from_artifacts"); - let mut by_id = BTreeMap::new(); - let mut by_hash = HashMap::new(); let mut plan_builder = UpdatePlanBuilder::new("0.0.0".parse().unwrap(), &logctx.log) .unwrap(); - // Add a couple artifacts with kinds wicketd doesn't understand; it + // Add a couple artifacts with kinds wicketd/nexus don't understand; it // should still ingest and serve them. let mut expected_unknown_artifacts = BTreeSet::new(); @@ -986,8 +971,6 @@ mod tests { id, hash, futures::stream::iter([Ok(Bytes::from(data))]), - &mut by_id, - &mut by_hash, ) .await .unwrap(); @@ -1009,8 +992,6 @@ mod tests { id, hash, futures::stream::iter([Ok(Bytes::from(data))]), - &mut by_id, - &mut by_hash, ) .await .unwrap(); @@ -1038,8 +1019,6 @@ mod tests { id, hash, futures::stream::iter([Ok(Bytes::from(data))]), - &mut by_id, - &mut by_hash, ) .await .unwrap(); @@ -1067,8 +1046,6 @@ mod tests { id, hash, futures::stream::iter([Ok(data.clone())]), - &mut by_id, - &mut by_hash, ) .await .unwrap(); @@ -1095,14 +1072,13 @@ mod tests { id, hash, futures::stream::iter([Ok(data.clone())]), - &mut by_id, - &mut by_hash, ) .await .unwrap(); } - let plan = plan_builder.build().unwrap(); + let UpdatePlanBuildOutput { plan, by_id, .. } = + plan_builder.build().unwrap(); assert_eq!(plan.gimlet_sp.len(), 2); assert_eq!(plan.psc_sp.len(), 2); diff --git a/update-common/src/errors.rs b/update-common/src/errors.rs index 5fba43b944..4d992e70b2 100644 --- a/update-common/src/errors.rs +++ b/update-common/src/errors.rs @@ -21,8 +21,20 @@ pub enum RepositoryError { #[error("error creating temporary directory")] TempDirCreate(#[source] std::io::Error), + #[error("error creating temporary file")] + TempFileCreate(#[source] std::io::Error), + + #[error("error reading chunk off of input stream")] + ReadChunkFromStream(#[source] HttpError), + + #[error("error writing to temporary file")] + TempFileWrite(#[source] std::io::Error), + + #[error("error flushing temporary file")] + TempFileFlush(#[source] std::io::Error), + #[error("error creating temporary file in {path}")] - TempFileCreate { + NamedTempFileCreate { path: Utf8PathBuf, #[source] error: std::io::Error, @@ -138,10 +150,21 @@ impl RepositoryError { // Errors we had that are unrelated to the contents of a repository // uploaded by a client. RepositoryError::TempDirCreate(_) - | RepositoryError::TempFileCreate { .. } => { + | RepositoryError::TempFileCreate(_) + | RepositoryError::TempFileWrite(_) + | RepositoryError::TempFileFlush(_) + | RepositoryError::NamedTempFileCreate { .. } => { HttpError::for_unavail(None, message) } + // This error is bubbled up. + RepositoryError::ReadChunkFromStream(error) => HttpError { + status_code: error.status_code, + error_code: error.error_code.clone(), + external_message: error.external_message.clone(), + internal_message: error.internal_message.clone(), + }, + // Errors that are definitely caused by bad repository contents. RepositoryError::DuplicateArtifactKind(_) | RepositoryError::LocateTarget { .. } diff --git a/wicketd/src/artifacts/store.rs b/wicketd/src/artifacts/store.rs index a5f24993a8..01543432a2 100644 --- a/wicketd/src/artifacts/store.rs +++ b/wicketd/src/artifacts/store.rs @@ -3,11 +3,9 @@ // file, You can obtain one at https://mozilla.org/MPL/2.0/. use crate::http_entrypoints::InstallableArtifacts; -use dropshot::HttpError; use omicron_common::api::external::SemverVersion; use omicron_common::update::ArtifactHashId; use slog::Logger; -use std::io; use std::sync::Arc; use std::sync::Mutex; use update_common::artifacts::ArtifactsWithPlan; @@ -32,22 +30,12 @@ impl WicketdArtifactStore { Self { log, artifacts_with_plan: Default::default() } } - pub(crate) async fn put_repository( + pub(crate) fn set_artifacts_with_plan( &self, - data: T, - ) -> Result<(), HttpError> - where - T: io::Read + io::Seek + Send + 'static, - { - slog::debug!(self.log, "adding repository"); - - let log = self.log.clone(); - let new_artifacts = ArtifactsWithPlan::from_zip(data, &log) - .await - .map_err(|error| error.to_http_error())?; - self.replace(new_artifacts); - - Ok(()) + artifacts_with_plan: ArtifactsWithPlan, + ) { + slog::debug!(self.log, "setting artifacts_with_plan"); + self.replace(artifacts_with_plan); } pub(crate) fn system_version_and_artifact_ids( diff --git a/wicketd/src/http_entrypoints.rs b/wicketd/src/http_entrypoints.rs index dbd3e31072..9c1740679f 100644 --- a/wicketd/src/http_entrypoints.rs +++ b/wicketd/src/http_entrypoints.rs @@ -25,7 +25,6 @@ use dropshot::Path; use dropshot::RequestContext; use dropshot::StreamingBody; use dropshot::TypedBody; -use futures::TryStreamExt; use gateway_client::types::IgnitionCommand; use gateway_client::types::SpIdentifier; use gateway_client::types::SpType; @@ -44,11 +43,9 @@ use sled_hardware::Baseboard; use slog::o; use std::collections::BTreeMap; use std::collections::BTreeSet; -use std::io; use std::net::IpAddr; use std::net::Ipv6Addr; use std::time::Duration; -use tokio::io::AsyncWriteExt; use wicket_common::rack_setup::PutRssUserConfigInsensitive; use wicket_common::update_events::EventReport; use wicket_common::WICKETD_TIMEOUT; @@ -570,44 +567,7 @@ async fn put_repository( ) -> Result { let rqctx = rqctx.context(); - // Create a temporary file to store the incoming archive. - let tempfile = tokio::task::spawn_blocking(|| { - camino_tempfile::tempfile().map_err(|err| { - HttpError::for_unavail( - None, - format!("failed to create temp file: {err}"), - ) - }) - }) - .await - .unwrap()?; - let mut tempfile = - tokio::io::BufWriter::new(tokio::fs::File::from_std(tempfile)); - - let mut body = std::pin::pin!(body.into_stream()); - - // Stream the uploaded body into our tempfile. - while let Some(bytes) = body.try_next().await? { - tempfile.write_all(&bytes).await.map_err(|err| { - HttpError::for_unavail( - None, - format!("failed to write to temp file: {err}"), - ) - })?; - } - - // Flush writes. We don't need to seek back to the beginning of the file - // because extracting the repository will do its own seeking as a part of - // unzipping this repo. - tempfile.flush().await.map_err(|err| { - HttpError::for_unavail( - None, - format!("failed to flush temp file: {err}"), - ) - })?; - - let tempfile = tempfile.into_inner().into_std().await; - rqctx.update_tracker.put_repository(io::BufReader::new(tempfile)).await?; + rqctx.update_tracker.put_repository(body.into_stream()).await?; Ok(HttpResponseUpdatedNoContent()) } diff --git a/wicketd/src/update_tracker.rs b/wicketd/src/update_tracker.rs index 823a7964de..eec3ee5868 100644 --- a/wicketd/src/update_tracker.rs +++ b/wicketd/src/update_tracker.rs @@ -18,8 +18,10 @@ use anyhow::bail; use anyhow::ensure; use anyhow::Context; use base64::Engine; +use bytes::Bytes; use display_error_chain::DisplayErrorChain; use dropshot::HttpError; +use futures::Stream; use futures::TryFutureExt; use gateway_client::types::HostPhase2Progress; use gateway_client::types::HostPhase2RecoveryImageId; @@ -48,7 +50,6 @@ use slog::Logger; use std::collections::btree_map::Entry; use std::collections::BTreeMap; use std::collections::BTreeSet; -use std::io; use std::net::SocketAddrV6; use std::sync::atomic::AtomicBool; use std::sync::Arc; @@ -64,6 +65,7 @@ use tokio::sync::Mutex; use tokio::task::JoinHandle; use tokio_util::io::StreamReader; use update_common::artifacts::ArtifactIdData; +use update_common::artifacts::ArtifactsWithPlan; use update_common::artifacts::UpdatePlan; use update_engine::events::ProgressUnits; use update_engine::AbortHandle; @@ -342,15 +344,21 @@ impl UpdateTracker { } /// Updates the repository stored inside the update tracker. - pub(crate) async fn put_repository( + pub(crate) async fn put_repository( &self, - data: T, - ) -> Result<(), HttpError> - where - T: io::Read + io::Seek + Send + 'static, - { + stream: impl Stream> + Send + 'static, + ) -> Result<(), HttpError> { + // Build the ArtifactsWithPlan from the stream. + let artifacts_with_plan = ArtifactsWithPlan::from_stream( + stream, + // We don't have a good file name here because file contents are + // uploaded over stdin, so let ArtifactsWithPlan pick the name. + None, &self.log, + ) + .await + .map_err(|error| error.to_http_error())?; let mut update_data = self.sp_update_data.lock().await; - update_data.put_repository(data).await + update_data.set_artifacts_with_plan(artifacts_with_plan).await } /// Gets a list of artifacts stored in the update repository. @@ -725,10 +733,10 @@ impl UpdateTrackerData { } } - async fn put_repository(&mut self, data: T) -> Result<(), HttpError> - where - T: io::Read + io::Seek + Send + 'static, - { + async fn set_artifacts_with_plan( + &mut self, + artifacts_with_plan: ArtifactsWithPlan, + ) -> Result<(), HttpError> { // Are there any updates currently running? If so, then reject the new // repository. let running_sps = self @@ -745,8 +753,8 @@ impl UpdateTrackerData { )); } - // Put the repository into the artifact store. - self.artifact_store.put_repository(data).await?; + // Set the new artifacts_with_plan. + self.artifact_store.set_artifacts_with_plan(artifacts_with_plan); // Reset all running data: a new repository means starting afresh. self.sp_update_data.clear(); From f7ad3153a175b253d73a34f2597266c32280d153 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Thu, 25 Jan 2024 05:31:12 +0000 Subject: [PATCH 30/49] Update taiki-e/install-action digest to 9f9bf5e (#4893) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR contains the following updates: | Package | Type | Update | Change | |---|---|---|---| | [taiki-e/install-action](https://togithub.com/taiki-e/install-action) | action | digest | [`cf2d7f1` -> `9f9bf5e`](https://togithub.com/taiki-e/install-action/compare/cf2d7f1...9f9bf5e) | --- ### Configuration 📅 **Schedule**: Branch creation - "after 8pm,before 6am" in timezone America/Los_Angeles, Automerge - "after 8pm,before 6am" in timezone America/Los_Angeles. 🚦 **Automerge**: Enabled. â™» **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. 🔕 **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box --- This PR has been generated by [Renovate Bot](https://togithub.com/renovatebot/renovate). Co-authored-by: oxide-renovate[bot] <146848827+oxide-renovate[bot]@users.noreply.github.com> --- .github/workflows/hakari.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/hakari.yml b/.github/workflows/hakari.yml index 06da0395a1..46d09c0940 100644 --- a/.github/workflows/hakari.yml +++ b/.github/workflows/hakari.yml @@ -24,7 +24,7 @@ jobs: with: toolchain: stable - name: Install cargo-hakari - uses: taiki-e/install-action@cf2d7f1118304815479579570ad3ec572fe94523 # v2 + uses: taiki-e/install-action@9f9bf5e8df111848fb25b8a97a361d8963025899 # v2 with: tool: cargo-hakari - name: Check workspace-hack Cargo.toml is up-to-date From 2634aad9147972cc3e046663903b0404d121609e Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Thu, 25 Jan 2024 08:15:59 +0000 Subject: [PATCH 31/49] Update Rust crate rustyline to v13 (#4897) --- Cargo.lock | 48 +++++++++++++++++++++++------------------------- Cargo.toml | 2 +- 2 files changed, 24 insertions(+), 26 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c2f3e1a949..837d42fbc2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1003,13 +1003,11 @@ checksum = "cd7cc57abe963c6d3b9d8be5b06ba7c8957a930305ca90304f24ef040aa6f961" [[package]] name = "clipboard-win" -version = "4.5.0" +version = "5.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7191c27c2357d9b7ef96baac1773290d4ca63b24205b82a3fd8a0637afcf0362" +checksum = "c57002a5d9be777c1ef967e33674dac9ebd310d8893e4e3437b14d5f0f6372cc" dependencies = [ "error-code", - "str-buf", - "winapi", ] [[package]] @@ -2161,13 +2159,9 @@ dependencies = [ [[package]] name = "error-code" -version = "2.3.1" +version = "3.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64f18991e7bf11e7ffee451b5318b5c1a73c52d0d0ada6e5a3017c8c1ced6a21" -dependencies = [ - "libc", - "str-buf", -] +checksum = "281e452d3bad4005426416cdba5ccfd4f5c1280e10099e21db27f7c1c28347fc" [[package]] name = "expectorate" @@ -2215,6 +2209,17 @@ dependencies = [ "windows-sys 0.48.0", ] +[[package]] +name = "fd-lock" +version = "4.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e5768da2206272c81ef0b5e951a41862938a6070da63bcea197899942d3b947" +dependencies = [ + "cfg-if", + "rustix 0.38.30", + "windows-sys 0.52.0", +] + [[package]] name = "ff" version = "0.13.0" @@ -4452,11 +4457,11 @@ dependencies = [ [[package]] name = "nix" -version = "0.26.4" +version = "0.27.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "598beaf3cc6fdd9a5dfb1630c2800c7acd31df7aaf0f565796fba2b53ca1af1b" +checksum = "2eb04e9c688eff1c89d72b407f168cf79bb9e867a9d3323ed6c01519eb9cc053" dependencies = [ - "bitflags 1.3.2", + "bitflags 2.4.0", "cfg-if", "libc", ] @@ -6759,7 +6764,7 @@ checksum = "68f4e89a0f80909b3ca4bca9759ed37e4bfddb6f5d2ffb1b4ceb2b1638a3e1eb" dependencies = [ "chrono", "crossterm", - "fd-lock", + "fd-lock 3.0.13", "itertools 0.12.0", "nu-ansi-term", "serde", @@ -7337,21 +7342,20 @@ dependencies = [ [[package]] name = "rustyline" -version = "12.0.0" +version = "13.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "994eca4bca05c87e86e15d90fc7a91d1be64b4482b38cb2d27474568fe7c9db9" +checksum = "02a2d683a4ac90aeef5b1013933f6d977bd37d51ff3f4dad829d4931a7e6be86" dependencies = [ "bitflags 2.4.0", "cfg-if", "clipboard-win", - "fd-lock", + "fd-lock 4.0.2", "home", "libc", "log", "memchr", - "nix 0.26.4", + "nix 0.27.1", "radix_trie", - "scopeguard", "unicode-segmentation", "unicode-width", "utf8parse", @@ -8347,12 +8351,6 @@ dependencies = [ "uuid", ] -[[package]] -name = "str-buf" -version = "1.0.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e08d8363704e6c71fc928674353e6b7c23dcea9d82d7012c8faf2a3a025f8d0" - [[package]] name = "string_cache" version = "0.8.7" diff --git a/Cargo.toml b/Cargo.toml index ed54ae8c6a..ba328fe612 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -324,7 +324,7 @@ rstest = "0.18.2" rustfmt-wrapper = "0.2" rustls = "0.22.2" rustls-pemfile = "2.0.0" -rustyline = "12.0.0" +rustyline = "13.0.0" samael = { version = "0.0.14", features = ["xmlsec"] } schemars = "0.8.16" secrecy = "0.8.0" From 98ab7e2bbbfc83f592ad6a3c0ca8afc48a81c9a9 Mon Sep 17 00:00:00 2001 From: bnaecker Date: Thu, 25 Jan 2024 10:52:27 -0800 Subject: [PATCH 32/49] Update progenitor from v0.4.0 -> v0.5.0 (#4874) --- Cargo.lock | 152 +++++++++--------- clients/dns-service-client/src/lib.rs | 6 +- common/src/api/external/error.rs | 49 ++---- .../app/sagas/switch_port_settings_apply.rs | 2 +- .../app/sagas/switch_port_settings_clear.rs | 2 +- openapi/bootstrap-agent.json | 4 +- openapi/nexus-internal.json | 23 +-- openapi/sled-agent.json | 7 +- openapi/wicketd.json | 22 ++- schema/rss-service-plan-v2.json | 4 + schema/rss-sled-plan.json | 4 +- sled-agent/src/instance.rs | 6 +- sled-agent/src/sim/http_entrypoints_pantry.rs | 9 ++ wicketd/src/preflight_check/uplink.rs | 9 +- workspace-hack/Cargo.toml | 8 +- 15 files changed, 161 insertions(+), 146 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 837d42fbc2..7ea3d2b96d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -174,7 +174,7 @@ dependencies = [ "omicron-workspace-hack", "proc-macro2", "quote", - "syn 2.0.46", + "syn 2.0.48", ] [[package]] @@ -275,7 +275,7 @@ checksum = "5fd55a5ba1179988837d24ab4c7cc8ed6efdeff578ede0416b4225a5fca35bd0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.46", + "syn 2.0.48", ] [[package]] @@ -297,7 +297,7 @@ checksum = "16e62a023e7c117e27523144c5d2459f4397fcc3cab0085af8e2224f643a0193" dependencies = [ "proc-macro2", "quote", - "syn 2.0.46", + "syn 2.0.48", ] [[package]] @@ -308,7 +308,7 @@ checksum = "c980ee35e870bd1a4d2c8294d4c04d0499e67bca1e4b5cefcc693c2fa00caea9" dependencies = [ "proc-macro2", "quote", - "syn 2.0.46", + "syn 2.0.48", ] [[package]] @@ -359,7 +359,7 @@ dependencies = [ "quote", "serde", "serde_tokenstream 0.2.0", - "syn 2.0.46", + "syn 2.0.48", ] [[package]] @@ -496,7 +496,7 @@ dependencies = [ "regex", "rustc-hash", "shlex", - "syn 2.0.46", + "syn 2.0.48", "which", ] @@ -992,7 +992,7 @@ dependencies = [ "heck 0.4.1", "proc-macro2", "quote", - "syn 2.0.46", + "syn 2.0.48", ] [[package]] @@ -1416,7 +1416,7 @@ checksum = "83fdaf97f4804dcebfa5862639bc9ce4121e82140bec2a987ac5140294865b5b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.46", + "syn 2.0.48", ] [[package]] @@ -1464,7 +1464,7 @@ dependencies = [ "proc-macro2", "quote", "strsim 0.10.0", - "syn 2.0.46", + "syn 2.0.48", ] [[package]] @@ -1486,7 +1486,7 @@ checksum = "836a9bbc7ad63342d6d6e7b815ccab164bc77a2d95d84bc3117a8c0d5c98e2d5" dependencies = [ "darling_core 0.20.3", "quote", - "syn 2.0.46", + "syn 2.0.48", ] [[package]] @@ -1518,7 +1518,7 @@ dependencies = [ "quote", "serde", "serde_tokenstream 0.2.0", - "syn 2.0.46", + "syn 2.0.48", ] [[package]] @@ -1570,7 +1570,7 @@ dependencies = [ "proc-macro-error", "proc-macro2", "quote", - "syn 2.0.46", + "syn 2.0.48", ] [[package]] @@ -1603,7 +1603,7 @@ checksum = "5fe87ce4529967e0ba1dcf8450bab64d97dfd5010a6256187ffe2e43e6f0e049" dependencies = [ "proc-macro2", "quote", - "syn 2.0.46", + "syn 2.0.48", ] [[package]] @@ -1623,7 +1623,7 @@ checksum = "62d671cc41a825ebabc75757b62d3d168c577f9149b2d49ece1dad1f72119d25" dependencies = [ "proc-macro2", "quote", - "syn 2.0.46", + "syn 2.0.48", ] [[package]] @@ -1710,7 +1710,7 @@ dependencies = [ "diesel_table_macro_syntax", "proc-macro2", "quote", - "syn 2.0.46", + "syn 2.0.48", ] [[package]] @@ -1719,7 +1719,7 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fc5557efc453706fed5e4fa85006fe9817c224c3f480a34c7e5959fd700921c5" dependencies = [ - "syn 2.0.46", + "syn 2.0.48", ] [[package]] @@ -1962,7 +1962,7 @@ dependencies = [ "quote", "serde", "serde_tokenstream 0.2.0", - "syn 2.0.46", + "syn 2.0.48", ] [[package]] @@ -2324,7 +2324,7 @@ checksum = "1a5c6c585bc94aaf2c7b51dd4c2ba22680844aba4c687be581871a6f518c5742" dependencies = [ "proc-macro2", "quote", - "syn 2.0.46", + "syn 2.0.48", ] [[package]] @@ -2441,7 +2441,7 @@ checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac" dependencies = [ "proc-macro2", "quote", - "syn 2.0.46", + "syn 2.0.48", ] [[package]] @@ -3665,7 +3665,7 @@ version = "0.1.0" source = "git+https://github.com/oxidecomputer/opte?rev=1d29ef60a18179babfb44f0f7a3c2fe71034a2c1#1d29ef60a18179babfb44f0f7a3c2fe71034a2c1" dependencies = [ "quote", - "syn 2.0.46", + "syn 2.0.48", ] [[package]] @@ -4077,7 +4077,7 @@ dependencies = [ "cfg-if", "proc-macro2", "quote", - "syn 2.0.46", + "syn 2.0.48", ] [[package]] @@ -4404,7 +4404,7 @@ version = "0.1.0" dependencies = [ "omicron-workspace-hack", "quote", - "syn 2.0.46", + "syn 2.0.48", ] [[package]] @@ -4557,7 +4557,7 @@ checksum = "9e6a0fd4f737c707bd9086cc16c925f294943eb62eb71499e9fd4cf71f8b9f4e" dependencies = [ "proc-macro2", "quote", - "syn 2.0.46", + "syn 2.0.48", ] [[package]] @@ -5283,7 +5283,7 @@ dependencies = [ "string_cache", "subtle", "syn 1.0.109", - "syn 2.0.46", + "syn 2.0.48", "time", "time-macros", "tokio", @@ -5397,7 +5397,7 @@ checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.46", + "syn 2.0.48", ] [[package]] @@ -5682,7 +5682,7 @@ dependencies = [ "omicron-workspace-hack", "proc-macro2", "quote", - "syn 2.0.46", + "syn 2.0.48", ] [[package]] @@ -5834,7 +5834,7 @@ dependencies = [ "regex", "regex-syntax 0.7.5", "structmeta", - "syn 2.0.46", + "syn 2.0.48", ] [[package]] @@ -5980,7 +5980,7 @@ dependencies = [ "pest_meta", "proc-macro2", "quote", - "syn 2.0.46", + "syn 2.0.48", ] [[package]] @@ -6050,7 +6050,7 @@ checksum = "4359fd9c9171ec6e8c62926d6faaf553a8dc3f64e1507e76da7911b4f6a04405" dependencies = [ "proc-macro2", "quote", - "syn 2.0.46", + "syn 2.0.48", ] [[package]] @@ -6299,7 +6299,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a41cf62165e97c7f814d2221421dbb9afcbcdb0a88068e5ea206e19951c2cbb5" dependencies = [ "proc-macro2", - "syn 2.0.46", + "syn 2.0.48", ] [[package]] @@ -6347,17 +6347,17 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.74" +version = "1.0.78" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2de98502f212cfcea8d0bb305bd0f49d7ebdd75b64ba0a68f937d888f4e0d6db" +checksum = "e2422ad645d89c99f8f3e6b88a9fdeca7fabeac836b1002371c4367c8f984aae" dependencies = [ "unicode-ident", ] [[package]] name = "progenitor" -version = "0.4.0" -source = "git+https://github.com/oxidecomputer/progenitor?branch=main#9339b57628e1e76b1d7131ef93a6c0db2ab0a762" +version = "0.5.0" +source = "git+https://github.com/oxidecomputer/progenitor?branch=main#2d3b9d0eb50a1907974c0b0ba7ee7893425b3e79" dependencies = [ "progenitor-client", "progenitor-impl", @@ -6367,8 +6367,8 @@ dependencies = [ [[package]] name = "progenitor-client" -version = "0.4.0" -source = "git+https://github.com/oxidecomputer/progenitor?branch=main#9339b57628e1e76b1d7131ef93a6c0db2ab0a762" +version = "0.5.0" +source = "git+https://github.com/oxidecomputer/progenitor?branch=main#2d3b9d0eb50a1907974c0b0ba7ee7893425b3e79" dependencies = [ "bytes", "futures-core", @@ -6381,8 +6381,8 @@ dependencies = [ [[package]] name = "progenitor-impl" -version = "0.4.0" -source = "git+https://github.com/oxidecomputer/progenitor?branch=main#9339b57628e1e76b1d7131ef93a6c0db2ab0a762" +version = "0.5.0" +source = "git+https://github.com/oxidecomputer/progenitor?branch=main#2d3b9d0eb50a1907974c0b0ba7ee7893425b3e79" dependencies = [ "getopts", "heck 0.4.1", @@ -6395,7 +6395,7 @@ dependencies = [ "schemars", "serde", "serde_json", - "syn 2.0.46", + "syn 2.0.48", "thiserror", "typify", "unicode-ident", @@ -6403,8 +6403,8 @@ dependencies = [ [[package]] name = "progenitor-macro" -version = "0.4.0" -source = "git+https://github.com/oxidecomputer/progenitor?branch=main#9339b57628e1e76b1d7131ef93a6c0db2ab0a762" +version = "0.5.0" +source = "git+https://github.com/oxidecomputer/progenitor?branch=main#2d3b9d0eb50a1907974c0b0ba7ee7893425b3e79" dependencies = [ "openapiv3", "proc-macro2", @@ -6415,7 +6415,7 @@ dependencies = [ "serde_json", "serde_tokenstream 0.2.0", "serde_yaml", - "syn 2.0.46", + "syn 2.0.48", ] [[package]] @@ -6793,7 +6793,7 @@ checksum = "7f7473c2cfcf90008193dd0e3e16599455cb601a9fce322b5bb55de799664925" dependencies = [ "proc-macro2", "quote", - "syn 2.0.46", + "syn 2.0.48", ] [[package]] @@ -7040,7 +7040,7 @@ dependencies = [ "regex", "relative-path", "rustc_version 0.4.0", - "syn 2.0.46", + "syn 2.0.48", "unicode-ident", ] @@ -7601,7 +7601,7 @@ checksum = "46fe8f8603d81ba86327b23a2e9cdf49e1255fb94a4c5f297f6ee0547178ea2c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.46", + "syn 2.0.48", ] [[package]] @@ -7662,7 +7662,7 @@ checksum = "8725e1dfadb3a50f7e5ce0b1a540466f6ed3fe7a0fca2ac2b8b831d31316bd00" dependencies = [ "proc-macro2", "quote", - "syn 2.0.46", + "syn 2.0.48", ] [[package]] @@ -7694,7 +7694,7 @@ dependencies = [ "proc-macro2", "quote", "serde", - "syn 2.0.46", + "syn 2.0.48", ] [[package]] @@ -7735,7 +7735,7 @@ dependencies = [ "darling 0.20.3", "proc-macro2", "quote", - "syn 2.0.46", + "syn 2.0.48", ] [[package]] @@ -8049,7 +8049,7 @@ source = "git+https://github.com/oxidecomputer/slog-error-chain?branch=main#15f6 dependencies = [ "proc-macro2", "quote", - "syn 2.0.46", + "syn 2.0.48", ] [[package]] @@ -8304,7 +8304,7 @@ checksum = "01b2e185515564f15375f593fb966b5718bc624ba77fe49fa4616ad619690554" dependencies = [ "proc-macro2", "quote", - "syn 2.0.46", + "syn 2.0.48", ] [[package]] @@ -8405,7 +8405,7 @@ dependencies = [ "proc-macro2", "quote", "structmeta-derive", - "syn 2.0.46", + "syn 2.0.48", ] [[package]] @@ -8416,7 +8416,7 @@ checksum = "a60bcaff7397072dca0017d1db428e30d5002e00b6847703e2e42005c95fbe00" dependencies = [ "proc-macro2", "quote", - "syn 2.0.46", + "syn 2.0.48", ] [[package]] @@ -8475,7 +8475,7 @@ dependencies = [ "proc-macro2", "quote", "rustversion", - "syn 2.0.46", + "syn 2.0.48", ] [[package]] @@ -8523,9 +8523,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.46" +version = "2.0.48" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89456b690ff72fddcecf231caedbe615c59480c93358a93dfae7fc29e3ebbf0e" +checksum = "0f3531638e407dfc0814761abb7c00a5b54992b849452a0646b7f65c9f770f3f" dependencies = [ "proc-macro2", "quote", @@ -8707,7 +8707,7 @@ dependencies = [ "proc-macro2", "quote", "structmeta", - "syn 2.0.46", + "syn 2.0.48", ] [[package]] @@ -8732,22 +8732,22 @@ dependencies = [ [[package]] name = "thiserror" -version = "1.0.49" +version = "1.0.56" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1177e8c6d7ede7afde3585fd2513e611227efd6481bd78d2e82ba1ce16557ed4" +checksum = "d54378c645627613241d077a3a79db965db602882668f9136ac42af9ecb730ad" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.49" +version = "1.0.56" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "10712f02019e9288794769fba95cd6847df9874d49d871d062172f9dd41bc4cc" +checksum = "fa0faa943b50f3db30a20aa7e265dbc66076993efed8463e8de414e5d06d3471" dependencies = [ "proc-macro2", "quote", - "syn 2.0.46", + "syn 2.0.48", ] [[package]] @@ -8934,7 +8934,7 @@ checksum = "5b8a1e28f2deaa14e508979454cb3a223b10b938b45af148bc0986de36f1923b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.46", + "syn 2.0.48", ] [[package]] @@ -9201,7 +9201,7 @@ checksum = "5f4f31f56159e98206da9efd823404b79b6ef3143b4a7ab76e67b1751b25a4ab" dependencies = [ "proc-macro2", "quote", - "syn 2.0.46", + "syn 2.0.48", ] [[package]] @@ -9427,8 +9427,8 @@ checksum = "497961ef93d974e23eb6f433eb5fe1b7930b659f06d12dec6fc44a8f554c0bba" [[package]] name = "typify" -version = "0.0.14" -source = "git+https://github.com/oxidecomputer/typify#c9d6453fc3cf69726d539925b838b267f886cb53" +version = "0.0.15" +source = "git+https://github.com/oxidecomputer/typify#1f97f167923f001818d461b1286f8a5242abf8b1" dependencies = [ "typify-impl", "typify-macro", @@ -9436,8 +9436,8 @@ dependencies = [ [[package]] name = "typify-impl" -version = "0.0.14" -source = "git+https://github.com/oxidecomputer/typify#c9d6453fc3cf69726d539925b838b267f886cb53" +version = "0.0.15" +source = "git+https://github.com/oxidecomputer/typify#1f97f167923f001818d461b1286f8a5242abf8b1" dependencies = [ "heck 0.4.1", "log", @@ -9446,15 +9446,15 @@ dependencies = [ "regress", "schemars", "serde_json", - "syn 2.0.46", + "syn 2.0.48", "thiserror", "unicode-ident", ] [[package]] name = "typify-macro" -version = "0.0.14" -source = "git+https://github.com/oxidecomputer/typify#c9d6453fc3cf69726d539925b838b267f886cb53" +version = "0.0.15" +source = "git+https://github.com/oxidecomputer/typify#1f97f167923f001818d461b1286f8a5242abf8b1" dependencies = [ "proc-macro2", "quote", @@ -9462,7 +9462,7 @@ dependencies = [ "serde", "serde_json", "serde_tokenstream 0.2.0", - "syn 2.0.46", + "syn 2.0.48", "typify-impl", ] @@ -9852,7 +9852,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.46", + "syn 2.0.48", "wasm-bindgen-shared", ] @@ -9886,7 +9886,7 @@ checksum = "54681b18a46765f095758388f2d0cf16eb8d4169b639ab575a8f5693af210c7b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.46", + "syn 2.0.48", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -10429,7 +10429,7 @@ checksum = "56097d5b91d711293a42be9289403896b68654625021732067eac7a4ca388a1f" dependencies = [ "proc-macro2", "quote", - "syn 2.0.46", + "syn 2.0.48", ] [[package]] @@ -10440,7 +10440,7 @@ checksum = "b3c129550b3e6de3fd0ba67ba5c81818f9805e58b8d7fee80a3a59d2c9fc601a" dependencies = [ "proc-macro2", "quote", - "syn 2.0.46", + "syn 2.0.48", ] [[package]] @@ -10460,7 +10460,7 @@ checksum = "ce36e65b0d2999d2aafac989fb249189a141aee1f53c612c1f37d72631959f69" dependencies = [ "proc-macro2", "quote", - "syn 2.0.46", + "syn 2.0.48", ] [[package]] diff --git a/clients/dns-service-client/src/lib.rs b/clients/dns-service-client/src/lib.rs index 931e68322f..e437f1a7f6 100644 --- a/clients/dns-service-client/src/lib.rs +++ b/clients/dns-service-client/src/lib.rs @@ -29,8 +29,10 @@ pub fn is_retryable(error: &DnsConfigError) -> bool { let response_value = match error { DnsConfigError::CommunicationError(_) => return true, DnsConfigError::InvalidRequest(_) - | DnsConfigError::InvalidResponsePayload(_) - | DnsConfigError::UnexpectedResponse(_) => return false, + | DnsConfigError::InvalidResponsePayload(_, _) + | DnsConfigError::UnexpectedResponse(_) + | DnsConfigError::InvalidUpgrade(_) + | DnsConfigError::ResponseBodyError(_) => return false, DnsConfigError::ErrorResponse(response_value) => response_value, }; diff --git a/common/src/api/external/error.rs b/common/src/api/external/error.rs index 2661db7bb6..a3876fcac3 100644 --- a/common/src/api/external/error.rs +++ b/common/src/api/external/error.rs @@ -487,20 +487,19 @@ pub trait ClientError: std::fmt::Debug { impl From> for Error { fn from(e: progenitor::progenitor_client::Error) -> Self { match e { - // This error indicates that the inputs were not valid for this API - // call. It's reflective of either a client-side programming error. - progenitor::progenitor_client::Error::InvalidRequest(msg) => { - Error::internal_error(&format!("InvalidRequest: {}", msg)) + // For most error variants, we delegate to the display impl for the + // Progenitor error type, but we pick apart an error response more + // carefully. + progenitor::progenitor_client::Error::InvalidRequest(_) + | progenitor::progenitor_client::Error::CommunicationError(_) + | progenitor::progenitor_client::Error::InvalidResponsePayload( + .., + ) + | progenitor::progenitor_client::Error::UnexpectedResponse(_) + | progenitor::progenitor_client::Error::InvalidUpgrade(_) + | progenitor::progenitor_client::Error::ResponseBodyError(_) => { + Error::internal_error(&e.to_string()) } - - // This error indicates a problem with the request to the remote - // service that did not result in an HTTP response code, but rather - // pertained to local (i.e. client-side) encoding or network - // communication. - progenitor::progenitor_client::Error::CommunicationError(ee) => { - Error::internal_error(&format!("CommunicationError: {}", ee)) - } - // This error represents an expected error from the remote service. progenitor::progenitor_client::Error::ErrorResponse(rv) => { let message = rv.message(); @@ -515,30 +514,6 @@ impl From> for Error { _ => Error::internal_error(&message), } } - - // This error indicates that the body returned by the client didn't - // match what was documented in the OpenAPI description for the - // service. This could only happen for us in the case of a severe - // logic/encoding bug in the remote service or due to a failure of - // our version constraints (i.e. that the call was to a newer - // service with an incompatible response). - progenitor::progenitor_client::Error::InvalidResponsePayload( - ee, - ) => Error::internal_error(&format!( - "InvalidResponsePayload: {}", - ee, - )), - - // This error indicates that the client generated a response code - // that was not described in the OpenAPI description for the - // service; this could be a success or failure response, but either - // way it indicates a logic or version error as above. - progenitor::progenitor_client::Error::UnexpectedResponse(r) => { - Error::internal_error(&format!( - "UnexpectedResponse: status code {}", - r.status(), - )) - } } } } diff --git a/nexus/src/app/sagas/switch_port_settings_apply.rs b/nexus/src/app/sagas/switch_port_settings_apply.rs index 979ec54afd..9d0573f6b0 100644 --- a/nexus/src/app/sagas/switch_port_settings_apply.rs +++ b/nexus/src/app/sagas/switch_port_settings_apply.rs @@ -307,7 +307,7 @@ async fn spa_undo_ensure_switch_port_settings( let log = sagactx.user_data().log(); let port_id: PortId = PortId::from_str(¶ms.switch_port_name) - .map_err(|e| external::Error::internal_error(e))?; + .map_err(|e| external::Error::internal_error(e.to_string().as_str()))?; let orig_port_settings_id = sagactx .lookup::>("original_switch_port_settings_id") diff --git a/nexus/src/app/sagas/switch_port_settings_clear.rs b/nexus/src/app/sagas/switch_port_settings_clear.rs index ff79de8e8e..15290dd75b 100644 --- a/nexus/src/app/sagas/switch_port_settings_clear.rs +++ b/nexus/src/app/sagas/switch_port_settings_clear.rs @@ -187,7 +187,7 @@ async fn spa_undo_clear_switch_port_settings( let log = sagactx.user_data().log(); let port_id: PortId = PortId::from_str(¶ms.port_name) - .map_err(|e| external::Error::internal_error(e))?; + .map_err(|e| external::Error::internal_error(e.to_string().as_str()))?; let orig_port_settings_id = sagactx .lookup::>("original_switch_port_settings_id") diff --git a/openapi/bootstrap-agent.json b/openapi/bootstrap-agent.json index 2a7ff43202..6fd83cef47 100644 --- a/openapi/bootstrap-agent.json +++ b/openapi/bootstrap-agent.json @@ -355,6 +355,7 @@ ] }, "Certificate": { + "description": "Certificate\n\n
JSON schema\n\n```json { \"type\": \"object\", \"required\": [ \"cert\", \"key\" ], \"properties\": { \"cert\": { \"type\": \"string\" }, \"key\": { \"type\": \"string\" } } } ```
", "type": "object", "properties": { "cert": { @@ -903,6 +904,7 @@ "format": "uuid" }, "RecoverySiloConfig": { + "description": "RecoverySiloConfig\n\n
JSON schema\n\n```json { \"type\": \"object\", \"required\": [ \"silo_name\", \"user_name\", \"user_password_hash\" ], \"properties\": { \"silo_name\": { \"$ref\": \"#/components/schemas/Name\" }, \"user_name\": { \"$ref\": \"#/components/schemas/UserId\" }, \"user_password_hash\": { \"$ref\": \"#/components/schemas/NewPasswordHash\" } } } ```
", "type": "object", "properties": { "silo_name": { @@ -967,7 +969,7 @@ ] }, "UserId": { - "description": "Names must begin with a lower case ASCII letter, be composed exclusively of lowercase ASCII, uppercase ASCII, numbers, and '-', and may not end with a '-'. Names cannot be a UUID though they may contain a UUID.", + "description": "Names must begin with a lower case ASCII letter, be composed exclusively of lowercase ASCII, uppercase ASCII, numbers, and '-', and may not end with a '-'. Names cannot be a UUID though they may contain a UUID.\n\n
JSON schema\n\n```json { \"title\": \"A name unique within the parent collection\", \"description\": \"Names must begin with a lower case ASCII letter, be composed exclusively of lowercase ASCII, uppercase ASCII, numbers, and '-', and may not end with a '-'. Names cannot be a UUID though they may contain a UUID.\", \"type\": \"string\", \"maxLength\": 63, \"minLength\": 1, \"pattern\": \"^(?![0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$)^[a-z]([a-zA-Z0-9-]*[a-zA-Z0-9]+)?$\" } ```
", "type": "string" } }, diff --git a/openapi/nexus-internal.json b/openapi/nexus-internal.json index 2a047068ee..8b0807d52c 100644 --- a/openapi/nexus-internal.json +++ b/openapi/nexus-internal.json @@ -3218,6 +3218,7 @@ ] }, "DnsConfigParams": { + "description": "DnsConfigParams\n\n
JSON schema\n\n```json { \"type\": \"object\", \"required\": [ \"generation\", \"time_created\", \"zones\" ], \"properties\": { \"generation\": { \"type\": \"integer\", \"format\": \"uint64\", \"minimum\": 0.0 }, \"time_created\": { \"type\": \"string\", \"format\": \"date-time\" }, \"zones\": { \"type\": \"array\", \"items\": { \"$ref\": \"#/components/schemas/DnsConfigZone\" } } } } ```
", "type": "object", "properties": { "generation": { @@ -3243,6 +3244,7 @@ ] }, "DnsConfigZone": { + "description": "DnsConfigZone\n\n
JSON schema\n\n```json { \"type\": \"object\", \"required\": [ \"records\", \"zone_name\" ], \"properties\": { \"records\": { \"type\": \"object\", \"additionalProperties\": { \"type\": \"array\", \"items\": { \"$ref\": \"#/components/schemas/DnsRecord\" } } }, \"zone_name\": { \"type\": \"string\" } } } ```
", "type": "object", "properties": { "records": { @@ -3264,6 +3266,7 @@ ] }, "DnsRecord": { + "description": "DnsRecord\n\n
JSON schema\n\n```json { \"oneOf\": [ { \"type\": \"object\", \"required\": [ \"data\", \"type\" ], \"properties\": { \"data\": { \"type\": \"string\", \"format\": \"ipv4\" }, \"type\": { \"type\": \"string\", \"enum\": [ \"A\" ] } } }, { \"type\": \"object\", \"required\": [ \"data\", \"type\" ], \"properties\": { \"data\": { \"type\": \"string\", \"format\": \"ipv6\" }, \"type\": { \"type\": \"string\", \"enum\": [ \"AAAA\" ] } } }, { \"type\": \"object\", \"required\": [ \"data\", \"type\" ], \"properties\": { \"data\": { \"$ref\": \"#/components/schemas/Srv\" }, \"type\": { \"type\": \"string\", \"enum\": [ \"SRV\" ] } } } ] } ```
", "oneOf": [ { "type": "object", @@ -4189,6 +4192,7 @@ ] }, "IpNet": { + "description": "IpNet\n\n
JSON schema\n\n```json { \"oneOf\": [ { \"title\": \"v4\", \"allOf\": [ { \"$ref\": \"#/components/schemas/Ipv4Net\" } ] }, { \"title\": \"v6\", \"allOf\": [ { \"$ref\": \"#/components/schemas/Ipv6Net\" } ] } ] } ```
", "anyOf": [ { "$ref": "#/components/schemas/Ipv4Net" @@ -4286,7 +4290,7 @@ ] }, "Ipv4Net": { - "description": "An IPv4 subnet, including prefix and subnet mask", + "description": "An IPv4 subnet, including prefix and subnet mask\n\n
JSON schema\n\n```json { \"title\": \"An IPv4 subnet\", \"description\": \"An IPv4 subnet, including prefix and subnet mask\", \"examples\": [ \"192.168.1.0/24\" ], \"type\": \"string\", \"pattern\": \"^(([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])\\\\.){3}([0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5])/([0-9]|1[0-9]|2[0-9]|3[0-2])$\" } ```
", "type": "string" }, "Ipv4Network": { @@ -4312,7 +4316,7 @@ ] }, "Ipv6Net": { - "description": "An IPv6 subnet, including prefix and subnet mask", + "description": "An IPv6 subnet, including prefix and subnet mask\n\n
JSON schema\n\n```json { \"title\": \"An IPv6 subnet\", \"description\": \"An IPv6 subnet, including prefix and subnet mask\", \"examples\": [ \"fd12:3456::/64\" ], \"type\": \"string\", \"pattern\": \"^([fF][dD])[0-9a-fA-F]{2}:(([0-9a-fA-F]{1,4}:){6}[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){1,6}:)([0-9a-fA-F]{1,4})?\\\\/([0-9]|[1-9][0-9]|1[0-1][0-9]|12[0-8])$\" } ```
", "type": "string" }, "Ipv6Network": { @@ -4654,7 +4658,7 @@ "maxLength": 63 }, "NetworkInterface": { - "description": "Information required to construct a virtual network interface", + "description": "Information required to construct a virtual network interface\n\n
JSON schema\n\n```json { \"description\": \"Information required to construct a virtual network interface\", \"type\": \"object\", \"required\": [ \"id\", \"ip\", \"kind\", \"mac\", \"name\", \"primary\", \"slot\", \"subnet\", \"vni\" ], \"properties\": { \"id\": { \"type\": \"string\", \"format\": \"uuid\" }, \"ip\": { \"type\": \"string\", \"format\": \"ip\" }, \"kind\": { \"$ref\": \"#/components/schemas/NetworkInterfaceKind\" }, \"mac\": { \"$ref\": \"#/components/schemas/MacAddr\" }, \"name\": { \"$ref\": \"#/components/schemas/Name\" }, \"primary\": { \"type\": \"boolean\" }, \"slot\": { \"type\": \"integer\", \"format\": \"uint8\", \"minimum\": 0.0 }, \"subnet\": { \"$ref\": \"#/components/schemas/IpNet\" }, \"vni\": { \"$ref\": \"#/components/schemas/Vni\" } } } ```
", "type": "object", "properties": { "id": { @@ -4702,7 +4706,7 @@ ] }, "NetworkInterfaceKind": { - "description": "The type of network interface", + "description": "The type of network interface\n\n
JSON schema\n\n```json { \"description\": \"The type of network interface\", \"oneOf\": [ { \"description\": \"A vNIC attached to a guest instance\", \"type\": \"object\", \"required\": [ \"id\", \"type\" ], \"properties\": { \"id\": { \"type\": \"string\", \"format\": \"uuid\" }, \"type\": { \"type\": \"string\", \"enum\": [ \"instance\" ] } } }, { \"description\": \"A vNIC associated with an internal service\", \"type\": \"object\", \"required\": [ \"id\", \"type\" ], \"properties\": { \"id\": { \"type\": \"string\", \"format\": \"uuid\" }, \"type\": { \"type\": \"string\", \"enum\": [ \"service\" ] } } } ] } ```
", "oneOf": [ { "description": "A vNIC attached to a guest instance", @@ -4756,7 +4760,7 @@ "type": "string" }, "OmicronZoneConfig": { - "description": "Describes one Omicron-managed zone running on a sled", + "description": "Describes one Omicron-managed zone running on a sled\n\n
JSON schema\n\n```json { \"description\": \"Describes one Omicron-managed zone running on a sled\", \"type\": \"object\", \"required\": [ \"id\", \"underlay_address\", \"zone_type\" ], \"properties\": { \"id\": { \"type\": \"string\", \"format\": \"uuid\" }, \"underlay_address\": { \"type\": \"string\", \"format\": \"ipv6\" }, \"zone_type\": { \"$ref\": \"#/components/schemas/OmicronZoneType\" } } } ```
", "type": "object", "properties": { "id": { @@ -4778,7 +4782,7 @@ ] }, "OmicronZoneDataset": { - "description": "Describes a persistent ZFS dataset associated with an Omicron zone", + "description": "Describes a persistent ZFS dataset associated with an Omicron zone\n\n
JSON schema\n\n```json { \"description\": \"Describes a persistent ZFS dataset associated with an Omicron zone\", \"type\": \"object\", \"required\": [ \"pool_name\" ], \"properties\": { \"pool_name\": { \"$ref\": \"#/components/schemas/ZpoolName\" } } } ```
", "type": "object", "properties": { "pool_name": { @@ -4790,7 +4794,7 @@ ] }, "OmicronZoneType": { - "description": "Describes what kind of zone this is (i.e., what component is running in it) as well as any type-specific configuration", + "description": "Describes what kind of zone this is (i.e., what component is running in it) as well as any type-specific configuration\n\n
JSON schema\n\n```json { \"description\": \"Describes what kind of zone this is (i.e., what component is running in it) as well as any type-specific configuration\", \"oneOf\": [ { \"type\": \"object\", \"required\": [ \"address\", \"dns_servers\", \"nic\", \"ntp_servers\", \"snat_cfg\", \"type\" ], \"properties\": { \"address\": { \"type\": \"string\" }, \"dns_servers\": { \"type\": \"array\", \"items\": { \"type\": \"string\", \"format\": \"ip\" } }, \"domain\": { \"type\": [ \"string\", \"null\" ] }, \"nic\": { \"description\": \"The service vNIC providing outbound connectivity using OPTE.\", \"allOf\": [ { \"$ref\": \"#/components/schemas/NetworkInterface\" } ] }, \"ntp_servers\": { \"type\": \"array\", \"items\": { \"type\": \"string\" } }, \"snat_cfg\": { \"description\": \"The SNAT configuration for outbound connections.\", \"allOf\": [ { \"$ref\": \"#/components/schemas/SourceNatConfig\" } ] }, \"type\": { \"type\": \"string\", \"enum\": [ \"boundary_ntp\" ] } } }, { \"type\": \"object\", \"required\": [ \"address\", \"dataset\", \"type\" ], \"properties\": { \"address\": { \"type\": \"string\" }, \"dataset\": { \"$ref\": \"#/components/schemas/OmicronZoneDataset\" }, \"type\": { \"type\": \"string\", \"enum\": [ \"clickhouse\" ] } } }, { \"type\": \"object\", \"required\": [ \"address\", \"dataset\", \"type\" ], \"properties\": { \"address\": { \"type\": \"string\" }, \"dataset\": { \"$ref\": \"#/components/schemas/OmicronZoneDataset\" }, \"type\": { \"type\": \"string\", \"enum\": [ \"clickhouse_keeper\" ] } } }, { \"type\": \"object\", \"required\": [ \"address\", \"dataset\", \"type\" ], \"properties\": { \"address\": { \"type\": \"string\" }, \"dataset\": { \"$ref\": \"#/components/schemas/OmicronZoneDataset\" }, \"type\": { \"type\": \"string\", \"enum\": [ \"cockroach_db\" ] } } }, { \"type\": \"object\", \"required\": [ \"address\", \"dataset\", \"type\" ], \"properties\": { \"address\": { \"type\": \"string\" }, \"dataset\": { \"$ref\": \"#/components/schemas/OmicronZoneDataset\" }, \"type\": { \"type\": \"string\", \"enum\": [ \"crucible\" ] } } }, { \"type\": \"object\", \"required\": [ \"address\", \"type\" ], \"properties\": { \"address\": { \"type\": \"string\" }, \"type\": { \"type\": \"string\", \"enum\": [ \"crucible_pantry\" ] } } }, { \"type\": \"object\", \"required\": [ \"dataset\", \"dns_address\", \"http_address\", \"nic\", \"type\" ], \"properties\": { \"dataset\": { \"$ref\": \"#/components/schemas/OmicronZoneDataset\" }, \"dns_address\": { \"description\": \"The address at which the external DNS server is reachable.\", \"type\": \"string\" }, \"http_address\": { \"description\": \"The address at which the external DNS server API is reachable.\", \"type\": \"string\" }, \"nic\": { \"description\": \"The service vNIC providing external connectivity using OPTE.\", \"allOf\": [ { \"$ref\": \"#/components/schemas/NetworkInterface\" } ] }, \"type\": { \"type\": \"string\", \"enum\": [ \"external_dns\" ] } } }, { \"type\": \"object\", \"required\": [ \"dataset\", \"dns_address\", \"gz_address\", \"gz_address_index\", \"http_address\", \"type\" ], \"properties\": { \"dataset\": { \"$ref\": \"#/components/schemas/OmicronZoneDataset\" }, \"dns_address\": { \"type\": \"string\" }, \"gz_address\": { \"description\": \"The addresses in the global zone which should be created\\n\\nFor the DNS service, which exists outside the sleds's typical subnet - adding an address in the GZ is necessary to allow inter-zone traffic routing.\", \"type\": \"string\", \"format\": \"ipv6\" }, \"gz_address_index\": { \"description\": \"The address is also identified with an auxiliary bit of information to ensure that the created global zone address can have a unique name.\", \"type\": \"integer\", \"format\": \"uint32\", \"minimum\": 0.0 }, \"http_address\": { \"type\": \"string\" }, \"type\": { \"type\": \"string\", \"enum\": [ \"internal_dns\" ] } } }, { \"type\": \"object\", \"required\": [ \"address\", \"dns_servers\", \"ntp_servers\", \"type\" ], \"properties\": { \"address\": { \"type\": \"string\" }, \"dns_servers\": { \"type\": \"array\", \"items\": { \"type\": \"string\", \"format\": \"ip\" } }, \"domain\": { \"type\": [ \"string\", \"null\" ] }, \"ntp_servers\": { \"type\": \"array\", \"items\": { \"type\": \"string\" } }, \"type\": { \"type\": \"string\", \"enum\": [ \"internal_ntp\" ] } } }, { \"type\": \"object\", \"required\": [ \"external_dns_servers\", \"external_ip\", \"external_tls\", \"internal_address\", \"nic\", \"type\" ], \"properties\": { \"external_dns_servers\": { \"description\": \"External DNS servers Nexus can use to resolve external hosts.\", \"type\": \"array\", \"items\": { \"type\": \"string\", \"format\": \"ip\" } }, \"external_ip\": { \"description\": \"The address at which the external nexus server is reachable.\", \"type\": \"string\", \"format\": \"ip\" }, \"external_tls\": { \"description\": \"Whether Nexus's external endpoint should use TLS\", \"type\": \"boolean\" }, \"internal_address\": { \"description\": \"The address at which the internal nexus server is reachable.\", \"type\": \"string\" }, \"nic\": { \"description\": \"The service vNIC providing external connectivity using OPTE.\", \"allOf\": [ { \"$ref\": \"#/components/schemas/NetworkInterface\" } ] }, \"type\": { \"type\": \"string\", \"enum\": [ \"nexus\" ] } } }, { \"type\": \"object\", \"required\": [ \"address\", \"type\" ], \"properties\": { \"address\": { \"type\": \"string\" }, \"type\": { \"type\": \"string\", \"enum\": [ \"oximeter\" ] } } } ] } ```
", "oneOf": [ { "type": "object", @@ -5135,7 +5139,7 @@ ] }, "OmicronZonesConfig": { - "description": "Describes the set of Omicron-managed zones running on a sled", + "description": "Describes the set of Omicron-managed zones running on a sled\n\n
JSON schema\n\n```json { \"description\": \"Describes the set of Omicron-managed zones running on a sled\", \"type\": \"object\", \"required\": [ \"generation\", \"zones\" ], \"properties\": { \"generation\": { \"description\": \"generation number of this configuration\\n\\nThis generation number is owned by the control plane (i.e., RSS or Nexus, depending on whether RSS-to-Nexus handoff has happened). It should not be bumped within Sled Agent.\\n\\nSled Agent rejects attempts to set the configuration to a generation older than the one it's currently running.\", \"allOf\": [ { \"$ref\": \"#/components/schemas/Generation\" } ] }, \"zones\": { \"description\": \"list of running zones\", \"type\": \"array\", \"items\": { \"$ref\": \"#/components/schemas/OmicronZoneConfig\" } } } } ```
", "type": "object", "properties": { "generation": { @@ -6386,6 +6390,7 @@ ] }, "Srv": { + "description": "Srv\n\n
JSON schema\n\n```json { \"type\": \"object\", \"required\": [ \"port\", \"prio\", \"target\", \"weight\" ], \"properties\": { \"port\": { \"type\": \"integer\", \"format\": \"uint16\", \"minimum\": 0.0 }, \"prio\": { \"type\": \"integer\", \"format\": \"uint16\", \"minimum\": 0.0 }, \"target\": { \"type\": \"string\" }, \"weight\": { \"type\": \"integer\", \"format\": \"uint16\", \"minimum\": 0.0 } } } ```
", "type": "object", "properties": { "port": { @@ -6499,7 +6504,7 @@ "minimum": 0 }, "ZpoolName": { - "description": "Zpool names are of the format ox{i,p}_. They are either Internal or External, and should be unique", + "description": "Zpool names are of the format ox{i,p}_. They are either Internal or External, and should be unique\n\n
JSON schema\n\n```json { \"title\": \"The name of a Zpool\", \"description\": \"Zpool names are of the format ox{i,p}_. They are either Internal or External, and should be unique\", \"type\": \"string\", \"pattern\": \"^ox[ip]_[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$\" } ```
", "type": "string" }, "ZpoolPutRequest": { diff --git a/openapi/sled-agent.json b/openapi/sled-agent.json index 3e3f6abec6..7b9a3efcda 100644 --- a/openapi/sled-agent.json +++ b/openapi/sled-agent.json @@ -2645,6 +2645,7 @@ ] }, "CrucibleOpts": { + "description": "CrucibleOpts\n\n
JSON schema\n\n```json { \"type\": \"object\", \"required\": [ \"id\", \"lossy\", \"read_only\", \"target\" ], \"properties\": { \"cert_pem\": { \"type\": [ \"string\", \"null\" ] }, \"control\": { \"type\": [ \"string\", \"null\" ] }, \"flush_timeout\": { \"type\": [ \"number\", \"null\" ], \"format\": \"float\" }, \"id\": { \"type\": \"string\", \"format\": \"uuid\" }, \"key\": { \"type\": [ \"string\", \"null\" ] }, \"key_pem\": { \"type\": [ \"string\", \"null\" ] }, \"lossy\": { \"type\": \"boolean\" }, \"read_only\": { \"type\": \"boolean\" }, \"root_cert_pem\": { \"type\": [ \"string\", \"null\" ] }, \"target\": { \"type\": \"array\", \"items\": { \"type\": \"string\" } } } } ```
", "type": "object", "properties": { "cert_pem": { @@ -3410,6 +3411,7 @@ ] }, "DiskRequest": { + "description": "DiskRequest\n\n
JSON schema\n\n```json { \"type\": \"object\", \"required\": [ \"device\", \"name\", \"read_only\", \"slot\", \"volume_construction_request\" ], \"properties\": { \"device\": { \"type\": \"string\" }, \"name\": { \"type\": \"string\" }, \"read_only\": { \"type\": \"boolean\" }, \"slot\": { \"$ref\": \"#/components/schemas/Slot\" }, \"volume_construction_request\": { \"$ref\": \"#/components/schemas/VolumeConstructionRequest\" } } } ```
", "type": "object", "properties": { "device": { @@ -6332,7 +6334,7 @@ ] }, "SledRole": { - "description": "Describes the role of the sled within the rack.\n\nNote that this may change if the sled is physically moved within the rack.", + "description": "Describes the role of the sled within the rack.\n\nNote that this may change if the sled is physically moved within the rack.\n\n
JSON schema\n\n```json { \"description\": \"Describes the role of the sled within the rack.\\n\\nNote that this may change if the sled is physically moved within the rack.\", \"oneOf\": [ { \"description\": \"The sled is a general compute sled.\", \"type\": \"string\", \"enum\": [ \"gimlet\" ] }, { \"description\": \"The sled is attached to the network switch, and has additional responsibilities.\", \"type\": \"string\", \"enum\": [ \"scrimlet\" ] } ] } ```
", "oneOf": [ { "description": "The sled is a general compute sled.", @@ -6351,7 +6353,7 @@ ] }, "Slot": { - "description": "A stable index which is translated by Propolis into a PCI BDF, visible to the guest.", + "description": "A stable index which is translated by Propolis into a PCI BDF, visible to the guest.\n\n
JSON schema\n\n```json { \"description\": \"A stable index which is translated by Propolis into a PCI BDF, visible to the guest.\", \"type\": \"integer\", \"format\": \"uint8\", \"minimum\": 0.0 } ```
", "type": "integer", "format": "uint8", "minimum": 0 @@ -6602,6 +6604,7 @@ "minimum": 0 }, "VolumeConstructionRequest": { + "description": "VolumeConstructionRequest\n\n
JSON schema\n\n```json { \"oneOf\": [ { \"type\": \"object\", \"required\": [ \"block_size\", \"id\", \"sub_volumes\", \"type\" ], \"properties\": { \"block_size\": { \"type\": \"integer\", \"format\": \"uint64\", \"minimum\": 0.0 }, \"id\": { \"type\": \"string\", \"format\": \"uuid\" }, \"read_only_parent\": { \"allOf\": [ { \"$ref\": \"#/components/schemas/VolumeConstructionRequest\" } ] }, \"sub_volumes\": { \"type\": \"array\", \"items\": { \"$ref\": \"#/components/schemas/VolumeConstructionRequest\" } }, \"type\": { \"type\": \"string\", \"enum\": [ \"volume\" ] } } }, { \"type\": \"object\", \"required\": [ \"block_size\", \"id\", \"type\", \"url\" ], \"properties\": { \"block_size\": { \"type\": \"integer\", \"format\": \"uint64\", \"minimum\": 0.0 }, \"id\": { \"type\": \"string\", \"format\": \"uuid\" }, \"type\": { \"type\": \"string\", \"enum\": [ \"url\" ] }, \"url\": { \"type\": \"string\" } } }, { \"type\": \"object\", \"required\": [ \"block_size\", \"blocks_per_extent\", \"extent_count\", \"gen\", \"opts\", \"type\" ], \"properties\": { \"block_size\": { \"type\": \"integer\", \"format\": \"uint64\", \"minimum\": 0.0 }, \"blocks_per_extent\": { \"type\": \"integer\", \"format\": \"uint64\", \"minimum\": 0.0 }, \"extent_count\": { \"type\": \"integer\", \"format\": \"uint32\", \"minimum\": 0.0 }, \"gen\": { \"type\": \"integer\", \"format\": \"uint64\", \"minimum\": 0.0 }, \"opts\": { \"$ref\": \"#/components/schemas/CrucibleOpts\" }, \"type\": { \"type\": \"string\", \"enum\": [ \"region\" ] } } }, { \"type\": \"object\", \"required\": [ \"block_size\", \"id\", \"path\", \"type\" ], \"properties\": { \"block_size\": { \"type\": \"integer\", \"format\": \"uint64\", \"minimum\": 0.0 }, \"id\": { \"type\": \"string\", \"format\": \"uuid\" }, \"path\": { \"type\": \"string\" }, \"type\": { \"type\": \"string\", \"enum\": [ \"file\" ] } } } ] } ```
", "oneOf": [ { "type": "object", diff --git a/openapi/wicketd.json b/openapi/wicketd.json index 804b2029c6..300e8412c3 100644 --- a/openapi/wicketd.json +++ b/openapi/wicketd.json @@ -1628,7 +1628,7 @@ ] }, "PowerState": { - "description": "See RFD 81.\n\nThis enum only lists power states the SP is able to control; higher power states are controlled by ignition.", + "description": "See RFD 81.\n\nThis enum only lists power states the SP is able to control; higher power states are controlled by ignition.\n\n
JSON schema\n\n```json { \"description\": \"See RFD 81.\\n\\nThis enum only lists power states the SP is able to control; higher power states are controlled by ignition.\", \"type\": \"string\", \"enum\": [ \"A0\", \"A1\", \"A2\" ] } ```
", "type": "string", "enum": [ "A0", @@ -2186,6 +2186,7 @@ ] }, "RackInitId": { + "description": "RackInitId\n\n
JSON schema\n\n```json { \"type\": \"string\", \"format\": \"uuid\" } ```
", "type": "string", "format": "uuid" }, @@ -2230,7 +2231,7 @@ ] }, "RackOperationStatus": { - "description": "Current status of any rack-level operation being performed by this bootstrap agent.", + "description": "Current status of any rack-level operation being performed by this bootstrap agent.\n\n
JSON schema\n\n```json { \"description\": \"Current status of any rack-level operation being performed by this bootstrap agent.\", \"oneOf\": [ { \"type\": \"object\", \"required\": [ \"id\", \"status\" ], \"properties\": { \"id\": { \"$ref\": \"#/components/schemas/RackInitId\" }, \"status\": { \"type\": \"string\", \"enum\": [ \"initializing\" ] } } }, { \"description\": \"`id` will be none if the rack was already initialized on startup.\", \"type\": \"object\", \"required\": [ \"status\" ], \"properties\": { \"id\": { \"allOf\": [ { \"$ref\": \"#/components/schemas/RackInitId\" } ] }, \"status\": { \"type\": \"string\", \"enum\": [ \"initialized\" ] } } }, { \"type\": \"object\", \"required\": [ \"id\", \"message\", \"status\" ], \"properties\": { \"id\": { \"$ref\": \"#/components/schemas/RackInitId\" }, \"message\": { \"type\": \"string\" }, \"status\": { \"type\": \"string\", \"enum\": [ \"initialization_failed\" ] } } }, { \"type\": \"object\", \"required\": [ \"id\", \"status\" ], \"properties\": { \"id\": { \"$ref\": \"#/components/schemas/RackInitId\" }, \"status\": { \"type\": \"string\", \"enum\": [ \"initialization_panicked\" ] } } }, { \"type\": \"object\", \"required\": [ \"id\", \"status\" ], \"properties\": { \"id\": { \"$ref\": \"#/components/schemas/RackResetId\" }, \"status\": { \"type\": \"string\", \"enum\": [ \"resetting\" ] } } }, { \"description\": \"`reset_id` will be None if the rack is in an uninitialized-on-startup, or Some if it is in an uninitialized state due to a reset operation completing.\", \"type\": \"object\", \"required\": [ \"status\" ], \"properties\": { \"reset_id\": { \"allOf\": [ { \"$ref\": \"#/components/schemas/RackResetId\" } ] }, \"status\": { \"type\": \"string\", \"enum\": [ \"uninitialized\" ] } } }, { \"type\": \"object\", \"required\": [ \"id\", \"message\", \"status\" ], \"properties\": { \"id\": { \"$ref\": \"#/components/schemas/RackResetId\" }, \"message\": { \"type\": \"string\" }, \"status\": { \"type\": \"string\", \"enum\": [ \"reset_failed\" ] } } }, { \"type\": \"object\", \"required\": [ \"id\", \"status\" ], \"properties\": { \"id\": { \"$ref\": \"#/components/schemas/RackResetId\" }, \"status\": { \"type\": \"string\", \"enum\": [ \"reset_panicked\" ] } } } ] } ```
", "oneOf": [ { "type": "object", @@ -2397,6 +2398,7 @@ ] }, "RackResetId": { + "description": "RackResetId\n\n
JSON schema\n\n```json { \"type\": \"string\", \"format\": \"uuid\" } ```
", "type": "string", "format": "uuid" }, @@ -2444,6 +2446,7 @@ ] }, "RotSlot": { + "description": "RotSlot\n\n
JSON schema\n\n```json { \"oneOf\": [ { \"type\": \"object\", \"required\": [ \"slot\" ], \"properties\": { \"slot\": { \"type\": \"string\", \"enum\": [ \"a\" ] } } }, { \"type\": \"object\", \"required\": [ \"slot\" ], \"properties\": { \"slot\": { \"type\": \"string\", \"enum\": [ \"b\" ] } } } ] } ```
", "oneOf": [ { "type": "object", @@ -2476,6 +2479,7 @@ ] }, "RotState": { + "description": "RotState\n\n
JSON schema\n\n```json { \"oneOf\": [ { \"type\": \"object\", \"required\": [ \"active\", \"persistent_boot_preference\", \"state\" ], \"properties\": { \"active\": { \"$ref\": \"#/components/schemas/RotSlot\" }, \"pending_persistent_boot_preference\": { \"allOf\": [ { \"$ref\": \"#/components/schemas/RotSlot\" } ] }, \"persistent_boot_preference\": { \"$ref\": \"#/components/schemas/RotSlot\" }, \"slot_a_sha3_256_digest\": { \"type\": [ \"string\", \"null\" ] }, \"slot_b_sha3_256_digest\": { \"type\": [ \"string\", \"null\" ] }, \"state\": { \"type\": \"string\", \"enum\": [ \"enabled\" ] }, \"transient_boot_preference\": { \"allOf\": [ { \"$ref\": \"#/components/schemas/RotSlot\" } ] } } }, { \"type\": \"object\", \"required\": [ \"message\", \"state\" ], \"properties\": { \"message\": { \"type\": \"string\" }, \"state\": { \"type\": \"string\", \"enum\": [ \"communication_failed\" ] } } } ] } ```
", "oneOf": [ { "type": "object", @@ -2570,6 +2574,7 @@ "pattern": "^(0|[1-9]\\d*)\\.(0|[1-9]\\d*)\\.(0|[1-9]\\d*)(?:-((?:0|[1-9]\\d*|\\d*[a-zA-Z-][0-9a-zA-Z-]*)(?:\\.(?:0|[1-9]\\d*|\\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?(?:\\+([0-9a-zA-Z-]+(?:\\.[0-9a-zA-Z-]+)*))?$" }, "SpComponentCaboose": { + "description": "SpComponentCaboose\n\n
JSON schema\n\n```json { \"type\": \"object\", \"required\": [ \"board\", \"git_commit\", \"name\", \"version\" ], \"properties\": { \"board\": { \"type\": \"string\" }, \"git_commit\": { \"type\": \"string\" }, \"name\": { \"type\": \"string\" }, \"version\": { \"type\": \"string\" } } } ```
", "type": "object", "properties": { "board": { @@ -2593,7 +2598,7 @@ ] }, "SpComponentInfo": { - "description": "Overview of a single SP component.", + "description": "Overview of a single SP component.\n\n
JSON schema\n\n```json { \"description\": \"Overview of a single SP component.\", \"type\": \"object\", \"required\": [ \"capabilities\", \"component\", \"description\", \"device\", \"presence\" ], \"properties\": { \"capabilities\": { \"description\": \"`capabilities` is a bitmask; interpret it via [`gateway_messages::DeviceCapabilities`].\", \"type\": \"integer\", \"format\": \"uint32\", \"minimum\": 0.0 }, \"component\": { \"description\": \"The unique identifier for this component.\", \"type\": \"string\" }, \"description\": { \"description\": \"A human-readable description of the component.\", \"type\": \"string\" }, \"device\": { \"description\": \"The name of the physical device.\", \"type\": \"string\" }, \"presence\": { \"description\": \"Whether or not the component is present, to the best of the SP's ability to judge.\", \"allOf\": [ { \"$ref\": \"#/components/schemas/SpComponentPresence\" } ] }, \"serial_number\": { \"description\": \"The component's serial number, if it has one.\", \"type\": [ \"string\", \"null\" ] } } } ```
", "type": "object", "properties": { "capabilities": { @@ -2637,7 +2642,7 @@ ] }, "SpComponentPresence": { - "description": "Description of the presence or absence of a component.\n\nThe presence of some components may vary based on the power state of the sled (e.g., components that time out or appear unavailable if the sled is in A2 may become present when the sled moves to A0).", + "description": "Description of the presence or absence of a component.\n\nThe presence of some components may vary based on the power state of the sled (e.g., components that time out or appear unavailable if the sled is in A2 may become present when the sled moves to A0).\n\n
JSON schema\n\n```json { \"description\": \"Description of the presence or absence of a component.\\n\\nThe presence of some components may vary based on the power state of the sled (e.g., components that time out or appear unavailable if the sled is in A2 may become present when the sled moves to A0).\", \"oneOf\": [ { \"description\": \"The component is present.\", \"type\": \"string\", \"enum\": [ \"present\" ] }, { \"description\": \"The component is not present.\", \"type\": \"string\", \"enum\": [ \"not_present\" ] }, { \"description\": \"The component is present but in a failed or faulty state.\", \"type\": \"string\", \"enum\": [ \"failed\" ] }, { \"description\": \"The SP is unable to determine the presence of the component.\", \"type\": \"string\", \"enum\": [ \"unavailable\" ] }, { \"description\": \"The SP's attempt to determine the presence of the component timed out.\", \"type\": \"string\", \"enum\": [ \"timeout\" ] }, { \"description\": \"The SP's attempt to determine the presence of the component failed.\", \"type\": \"string\", \"enum\": [ \"error\" ] } ] } ```
", "oneOf": [ { "description": "The component is present.", @@ -2684,6 +2689,7 @@ ] }, "SpIdentifier": { + "description": "SpIdentifier\n\n
JSON schema\n\n```json { \"type\": \"object\", \"required\": [ \"slot\", \"type\" ], \"properties\": { \"slot\": { \"type\": \"integer\", \"format\": \"uint32\", \"minimum\": 0.0 }, \"type\": { \"$ref\": \"#/components/schemas/SpType\" } } } ```
", "type": "object", "properties": { "slot": { @@ -2701,7 +2707,7 @@ ] }, "SpIgnition": { - "description": "State of an ignition target.\n\nTODO: Ignition returns much more information than we're reporting here: do we want to expand this?", + "description": "State of an ignition target.\n\nTODO: Ignition returns much more information than we're reporting here: do we want to expand this?\n\n
JSON schema\n\n```json { \"description\": \"State of an ignition target.\\n\\nTODO: Ignition returns much more information than we're reporting here: do we want to expand this?\", \"oneOf\": [ { \"type\": \"object\", \"required\": [ \"present\" ], \"properties\": { \"present\": { \"type\": \"string\", \"enum\": [ \"no\" ] } } }, { \"type\": \"object\", \"required\": [ \"ctrl_detect_0\", \"ctrl_detect_1\", \"flt_a2\", \"flt_a3\", \"flt_rot\", \"flt_sp\", \"id\", \"power\", \"present\" ], \"properties\": { \"ctrl_detect_0\": { \"type\": \"boolean\" }, \"ctrl_detect_1\": { \"type\": \"boolean\" }, \"flt_a2\": { \"type\": \"boolean\" }, \"flt_a3\": { \"type\": \"boolean\" }, \"flt_rot\": { \"type\": \"boolean\" }, \"flt_sp\": { \"type\": \"boolean\" }, \"id\": { \"$ref\": \"#/components/schemas/SpIgnitionSystemType\" }, \"power\": { \"type\": \"boolean\" }, \"present\": { \"type\": \"string\", \"enum\": [ \"yes\" ] } } } ] } ```
", "oneOf": [ { "type": "object", @@ -2766,7 +2772,7 @@ ] }, "SpIgnitionSystemType": { - "description": "TODO: Do we want to bake in specific board names, or use raw u16 ID numbers?", + "description": "TODO: Do we want to bake in specific board names, or use raw u16 ID numbers?\n\n
JSON schema\n\n```json { \"description\": \"TODO: Do we want to bake in specific board names, or use raw u16 ID numbers?\", \"oneOf\": [ { \"type\": \"object\", \"required\": [ \"system_type\" ], \"properties\": { \"system_type\": { \"type\": \"string\", \"enum\": [ \"gimlet\" ] } } }, { \"type\": \"object\", \"required\": [ \"system_type\" ], \"properties\": { \"system_type\": { \"type\": \"string\", \"enum\": [ \"sidecar\" ] } } }, { \"type\": \"object\", \"required\": [ \"system_type\" ], \"properties\": { \"system_type\": { \"type\": \"string\", \"enum\": [ \"psc\" ] } } }, { \"type\": \"object\", \"required\": [ \"id\", \"system_type\" ], \"properties\": { \"id\": { \"type\": \"integer\", \"format\": \"uint16\", \"minimum\": 0.0 }, \"system_type\": { \"type\": \"string\", \"enum\": [ \"unknown\" ] } } } ] } ```
", "oneOf": [ { "type": "object", @@ -2892,6 +2898,7 @@ ] }, "SpState": { + "description": "SpState\n\n
JSON schema\n\n```json { \"type\": \"object\", \"required\": [ \"base_mac_address\", \"hubris_archive_id\", \"model\", \"power_state\", \"revision\", \"rot\", \"serial_number\" ], \"properties\": { \"base_mac_address\": { \"type\": \"array\", \"items\": { \"type\": \"integer\", \"format\": \"uint8\", \"minimum\": 0.0 }, \"maxItems\": 6, \"minItems\": 6 }, \"hubris_archive_id\": { \"type\": \"string\" }, \"model\": { \"type\": \"string\" }, \"power_state\": { \"$ref\": \"#/components/schemas/PowerState\" }, \"revision\": { \"type\": \"integer\", \"format\": \"uint32\", \"minimum\": 0.0 }, \"rot\": { \"$ref\": \"#/components/schemas/RotState\" }, \"serial_number\": { \"type\": \"string\" } } } ```
", "type": "object", "properties": { "base_mac_address": { @@ -2936,6 +2943,7 @@ ] }, "SpType": { + "description": "SpType\n\n
JSON schema\n\n```json { \"type\": \"string\", \"enum\": [ \"sled\", \"power\", \"switch\" ] } ```
", "type": "string", "enum": [ "sled", @@ -4691,7 +4699,7 @@ ] }, "IgnitionCommand": { - "description": "Ignition command.", + "description": "Ignition command.\n\n
JSON schema\n\n```json { \"description\": \"Ignition command.\", \"type\": \"string\", \"enum\": [ \"power_on\", \"power_off\", \"power_reset\" ] } ```
", "type": "string", "enum": [ "power_on", diff --git a/schema/rss-service-plan-v2.json b/schema/rss-service-plan-v2.json index 62ce358938..10d8f8ab95 100644 --- a/schema/rss-service-plan-v2.json +++ b/schema/rss-service-plan-v2.json @@ -19,6 +19,7 @@ }, "definitions": { "DnsConfigParams": { + "description": "DnsConfigParams\n\n
JSON schema\n\n```json { \"type\": \"object\", \"required\": [ \"generation\", \"time_created\", \"zones\" ], \"properties\": { \"generation\": { \"type\": \"integer\", \"format\": \"uint64\", \"minimum\": 0.0 }, \"time_created\": { \"type\": \"string\", \"format\": \"date-time\" }, \"zones\": { \"type\": \"array\", \"items\": { \"$ref\": \"#/components/schemas/DnsConfigZone\" } } } } ```
", "type": "object", "required": [ "generation", @@ -44,6 +45,7 @@ } }, "DnsConfigZone": { + "description": "DnsConfigZone\n\n
JSON schema\n\n```json { \"type\": \"object\", \"required\": [ \"records\", \"zone_name\" ], \"properties\": { \"records\": { \"type\": \"object\", \"additionalProperties\": { \"type\": \"array\", \"items\": { \"$ref\": \"#/components/schemas/DnsRecord\" } } }, \"zone_name\": { \"type\": \"string\" } } } ```
", "type": "object", "required": [ "records", @@ -65,6 +67,7 @@ } }, "DnsRecord": { + "description": "DnsRecord\n\n
JSON schema\n\n```json { \"oneOf\": [ { \"type\": \"object\", \"required\": [ \"data\", \"type\" ], \"properties\": { \"data\": { \"type\": \"string\", \"format\": \"ipv4\" }, \"type\": { \"type\": \"string\", \"enum\": [ \"A\" ] } } }, { \"type\": \"object\", \"required\": [ \"data\", \"type\" ], \"properties\": { \"data\": { \"type\": \"string\", \"format\": \"ipv6\" }, \"type\": { \"type\": \"string\", \"enum\": [ \"AAAA\" ] } } }, { \"type\": \"object\", \"required\": [ \"data\", \"type\" ], \"properties\": { \"data\": { \"$ref\": \"#/components/schemas/Srv\" }, \"type\": { \"type\": \"string\", \"enum\": [ \"SRV\" ] } } } ] } ```
", "oneOf": [ { "type": "object", @@ -701,6 +704,7 @@ } }, "Srv": { + "description": "Srv\n\n
JSON schema\n\n```json { \"type\": \"object\", \"required\": [ \"port\", \"prio\", \"target\", \"weight\" ], \"properties\": { \"port\": { \"type\": \"integer\", \"format\": \"uint16\", \"minimum\": 0.0 }, \"prio\": { \"type\": \"integer\", \"format\": \"uint16\", \"minimum\": 0.0 }, \"target\": { \"type\": \"string\" }, \"weight\": { \"type\": \"integer\", \"format\": \"uint16\", \"minimum\": 0.0 } } } ```
", "type": "object", "required": [ "port", diff --git a/schema/rss-sled-plan.json b/schema/rss-sled-plan.json index 0396ccc685..cbd73ed066 100644 --- a/schema/rss-sled-plan.json +++ b/schema/rss-sled-plan.json @@ -227,6 +227,7 @@ ] }, "Certificate": { + "description": "Certificate\n\n
JSON schema\n\n```json { \"type\": \"object\", \"required\": [ \"cert\", \"key\" ], \"properties\": { \"cert\": { \"type\": \"string\" }, \"key\": { \"type\": \"string\" } } } ```
", "type": "object", "required": [ "cert", @@ -594,6 +595,7 @@ } }, "RecoverySiloConfig": { + "description": "RecoverySiloConfig\n\n
JSON schema\n\n```json { \"type\": \"object\", \"required\": [ \"silo_name\", \"user_name\", \"user_password_hash\" ], \"properties\": { \"silo_name\": { \"$ref\": \"#/components/schemas/Name\" }, \"user_name\": { \"$ref\": \"#/components/schemas/UserId\" }, \"user_password_hash\": { \"$ref\": \"#/components/schemas/NewPasswordHash\" } } } ```
", "type": "object", "required": [ "silo_name", @@ -718,7 +720,7 @@ ] }, "UserId": { - "description": "Names must begin with a lower case ASCII letter, be composed exclusively of lowercase ASCII, uppercase ASCII, numbers, and '-', and may not end with a '-'. Names cannot be a UUID though they may contain a UUID.", + "description": "Names must begin with a lower case ASCII letter, be composed exclusively of lowercase ASCII, uppercase ASCII, numbers, and '-', and may not end with a '-'. Names cannot be a UUID though they may contain a UUID.\n\n
JSON schema\n\n```json { \"title\": \"A name unique within the parent collection\", \"description\": \"Names must begin with a lower case ASCII letter, be composed exclusively of lowercase ASCII, uppercase ASCII, numbers, and '-', and may not end with a '-'. Names cannot be a UUID though they may contain a UUID.\", \"type\": \"string\", \"maxLength\": 63, \"minLength\": 1, \"pattern\": \"^(?![0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$)^[a-z]([a-zA-Z0-9-]*[a-zA-Z0-9]+)?$\" } ```
", "type": "string" } } diff --git a/sled-agent/src/instance.rs b/sled-agent/src/instance.rs index 3bbe0762f8..47e61cfe71 100644 --- a/sled-agent/src/instance.rs +++ b/sled-agent/src/instance.rs @@ -274,8 +274,10 @@ impl InstanceInner { )) } nexus_client::Error::InvalidRequest(_) - | nexus_client::Error::InvalidResponsePayload(_) - | nexus_client::Error::UnexpectedResponse(_) => { + | nexus_client::Error::InvalidResponsePayload(..) + | nexus_client::Error::UnexpectedResponse(_) + | nexus_client::Error::InvalidUpgrade(_) + | nexus_client::Error::ResponseBodyError(_) => { BackoffError::permanent(Error::Notification( err, )) diff --git a/sled-agent/src/sim/http_entrypoints_pantry.rs b/sled-agent/src/sim/http_entrypoints_pantry.rs index 8f572b46a0..49368f363a 100644 --- a/sled-agent/src/sim/http_entrypoints_pantry.rs +++ b/sled-agent/src/sim/http_entrypoints_pantry.rs @@ -365,6 +365,15 @@ mod tests { ); }; for (key, value) in map.iter() { + // We intentionally skip the "description" key, provided + // that the value is also a true String. This is mostly a + // one-off for the udpate to Progenitor 0.5.0, which caused + // this key to be added. But it's also pretty harmless, + // since it's not possible to get this key-value combination + // in a real JSON schema. + if key == "description" && value.is_string() { + continue; + } let new_path = format!("{path}/{key}"); let rhs_value = rhs_map.get(key).unwrap_or_else(|| { panic!("Real API JSON missing key: \"{new_path}\"") diff --git a/wicketd/src/preflight_check/uplink.rs b/wicketd/src/preflight_check/uplink.rs index 25411f17a5..47995f0c10 100644 --- a/wicketd/src/preflight_check/uplink.rs +++ b/wicketd/src/preflight_check/uplink.rs @@ -161,8 +161,11 @@ fn add_steps_for_single_local_uplink_preflight_check<'a>( |_cx| async { // Check that the port name is valid and that it has no links // configured already. - let port_id = PortId::from_str(&uplink.port) - .map_err(UplinkPreflightTerminalError::InvalidPortName)?; + let port_id = PortId::from_str(&uplink.port).map_err(|_| { + UplinkPreflightTerminalError::InvalidPortName( + uplink.port.clone(), + ) + })?; let links = dpd_client .link_list(&port_id) .await @@ -892,7 +895,7 @@ type DpdError = dpd_client::Error; #[derive(Debug, Error)] pub(crate) enum UplinkPreflightTerminalError { #[error("invalid port name: {0}")] - InvalidPortName(&'static str), + InvalidPortName(String), #[error("failed to connect to dpd to check for current configuration")] GetCurrentConfig(#[source] DpdError), #[error("uplink already configured - is rack already initialized?")] diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index 25a72838a0..49b2489c40 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -81,7 +81,7 @@ petgraph = { version = "0.6.4", features = ["serde-1"] } postgres-types = { version = "0.2.6", default-features = false, features = ["with-chrono-0_4", "with-serde_json-1", "with-uuid-1"] } ppv-lite86 = { version = "0.2.17", default-features = false, features = ["simd", "std"] } predicates = { version = "3.1.0" } -proc-macro2 = { version = "1.0.74" } +proc-macro2 = { version = "1.0.78" } rand = { version = "0.8.5" } rand_chacha = { version = "0.3.1", default-features = false, features = ["std"] } regex = { version = "1.10.3" } @@ -101,7 +101,7 @@ spin = { version = "0.9.8" } string_cache = { version = "0.8.7" } subtle = { version = "2.5.0" } syn-dff4ba8e3ae991db = { package = "syn", version = "1.0.109", features = ["extra-traits", "fold", "full", "visit"] } -syn-f595c2ba2a3f28df = { package = "syn", version = "2.0.46", features = ["extra-traits", "fold", "full", "visit", "visit-mut"] } +syn-f595c2ba2a3f28df = { package = "syn", version = "2.0.48", features = ["extra-traits", "fold", "full", "visit", "visit-mut"] } time = { version = "0.3.27", features = ["formatting", "local-offset", "macros", "parsing"] } tokio = { version = "1.35.1", features = ["full", "test-util"] } tokio-postgres = { version = "0.7.10", features = ["with-chrono-0_4", "with-serde_json-1", "with-uuid-1"] } @@ -188,7 +188,7 @@ petgraph = { version = "0.6.4", features = ["serde-1"] } postgres-types = { version = "0.2.6", default-features = false, features = ["with-chrono-0_4", "with-serde_json-1", "with-uuid-1"] } ppv-lite86 = { version = "0.2.17", default-features = false, features = ["simd", "std"] } predicates = { version = "3.1.0" } -proc-macro2 = { version = "1.0.74" } +proc-macro2 = { version = "1.0.78" } rand = { version = "0.8.5" } rand_chacha = { version = "0.3.1", default-features = false, features = ["std"] } regex = { version = "1.10.3" } @@ -208,7 +208,7 @@ spin = { version = "0.9.8" } string_cache = { version = "0.8.7" } subtle = { version = "2.5.0" } syn-dff4ba8e3ae991db = { package = "syn", version = "1.0.109", features = ["extra-traits", "fold", "full", "visit"] } -syn-f595c2ba2a3f28df = { package = "syn", version = "2.0.46", features = ["extra-traits", "fold", "full", "visit", "visit-mut"] } +syn-f595c2ba2a3f28df = { package = "syn", version = "2.0.48", features = ["extra-traits", "fold", "full", "visit", "visit-mut"] } time = { version = "0.3.27", features = ["formatting", "local-offset", "macros", "parsing"] } time-macros = { version = "0.2.13", default-features = false, features = ["formatting", "parsing"] } tokio = { version = "1.35.1", features = ["full", "test-util"] } From 4fef59923f1cde8a6e8671c347a2e5ad25fa7aa8 Mon Sep 17 00:00:00 2001 From: bnaecker Date: Thu, 25 Jan 2024 13:15:09 -0800 Subject: [PATCH 33/49] Update to USDT 0.5.0 (#4898) - Update diesel-dtrace - Update usdt - Handle API change in how providers are named, since dunders are no longer translated to dashes. --- Cargo.lock | 245 ++++++++++++++++++++----- Cargo.toml | 4 +- gateway/src/lib.rs | 5 +- nexus/db-queries/src/lib.rs | 2 +- oximeter/db/src/client.rs | 2 +- sled-agent/src/bootstrap/server.rs | 4 +- tools/dtrace/aggregate-query-latency.d | 4 +- tools/dtrace/slowest-queries.d | 6 +- tools/dtrace/trace-db-queries.d | 6 +- workspace-hack/Cargo.toml | 10 +- 10 files changed, 223 insertions(+), 65 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 7ea3d2b96d..45d1d47199 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -62,7 +62,7 @@ dependencies = [ "getrandom 0.2.10", "once_cell", "version_check", - "zerocopy 0.7.31", + "zerocopy 0.7.32", ] [[package]] @@ -1691,12 +1691,12 @@ dependencies = [ [[package]] name = "diesel-dtrace" -version = "0.2.0" -source = "git+https://github.com/oxidecomputer/diesel-dtrace?branch=main#c1252df734b52b4e1243e0ca2bd5f00b17730408" +version = "0.3.0" +source = "git+https://github.com/oxidecomputer/diesel-dtrace?branch=main#62ef5ca0fe243a0929791bb9efbb7ed9c32c5368" dependencies = [ "diesel", "serde", - "usdt", + "usdt 0.5.0", "uuid", "version_check", ] @@ -1875,6 +1875,20 @@ dependencies = [ "zerocopy 0.3.0", ] +[[package]] +name = "dof" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "558e5396321b677a59d2c43b3cc3bc44683109c63ac49275f3bbbf41c0ecd002" +dependencies = [ + "goblin", + "pretty-hex 0.4.1", + "serde", + "serde_json", + "thiserror", + "zerocopy 0.7.32", +] + [[package]] name = "downcast" version = "0.11.0" @@ -1947,7 +1961,7 @@ dependencies = [ "tokio", "tokio-rustls 0.25.0", "toml 0.8.8", - "usdt", + "usdt 0.3.5", "uuid", "version_check", "waitgroup", @@ -1976,6 +1990,17 @@ dependencies = [ "thiserror", ] +[[package]] +name = "dtrace-parser" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "71734e3eb68cd4df338d04dffdcc024f89eb0b238150cc95b826fbfad756452b" +dependencies = [ + "pest", + "pest_derive", + "thiserror", +] + [[package]] name = "dyn-clone" version = "1.0.13" @@ -2571,7 +2596,7 @@ dependencies = [ "thiserror", "tlvc 0.3.1 (git+https://github.com/oxidecomputer/tlvc.git?branch=main)", "tokio", - "usdt", + "usdt 0.3.5", "uuid", "version_check", "zip", @@ -2672,6 +2697,17 @@ dependencies = [ "regex", ] +[[package]] +name = "goblin" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb07a4ffed2093b118a525b1d8f5204ae274faed5604537caf7135d0f18d9887" +dependencies = [ + "log", + "plain", + "scroll", +] + [[package]] name = "group" version = "0.13.0" @@ -3969,6 +4005,16 @@ version = "2.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8f232d6ef707e1956a43342693d2a31e72989554d58299d7a88738cc95b0d35c" +[[package]] +name = "memmap" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6585fd95e7bb50d6cc31e20d4cf9afb4e2ba16c5846fc76793f11218da9c475b" +dependencies = [ + "libc", + "winapi", +] + [[package]] name = "memoffset" version = "0.7.1" @@ -4282,7 +4328,7 @@ dependencies = [ "term", "thiserror", "tokio", - "usdt", + "usdt 0.5.0", "uuid", ] @@ -4832,7 +4878,7 @@ dependencies = [ "signal-hook", "signal-hook-tokio", "slog", - "slog-dtrace", + "slog-dtrace 0.3.0", "slog-error-chain", "sp-sim", "subprocess", @@ -4941,7 +4987,7 @@ dependencies = [ "sled-agent-client", "slog", "slog-async", - "slog-dtrace", + "slog-dtrace 0.3.0", "slog-error-chain", "slog-term", "sp-sim", @@ -5135,7 +5181,7 @@ dependencies = [ "sled-storage", "slog", "slog-async", - "slog-dtrace", + "slog-dtrace 0.3.0", "slog-term", "smf", "static_assertions", @@ -5148,7 +5194,7 @@ dependencies = [ "tokio-stream", "tokio-util", "toml 0.8.8", - "usdt", + "usdt 0.5.0", "uuid", "zeroize", "zone", @@ -5184,7 +5230,7 @@ dependencies = [ "thiserror", "tokio", "tokio-postgres", - "usdt", + "usdt 0.5.0", "walkdir", ] @@ -5218,6 +5264,7 @@ dependencies = [ "der", "diesel", "digest", + "dof 0.3.0", "either", "elliptic-curve", "errno", @@ -5298,10 +5345,11 @@ dependencies = [ "trust-dns-proto", "unicode-bidi", "unicode-normalization", - "usdt", + "usdt 0.3.5", + "usdt-impl 0.5.0", "uuid", "yasna", - "zerocopy 0.7.31", + "zerocopy 0.7.32", "zeroize", "zip", ] @@ -5531,7 +5579,7 @@ dependencies = [ "poptrie", "serde", "smoltcp 0.11.0", - "zerocopy 0.7.31", + "zerocopy 0.7.32", ] [[package]] @@ -5601,7 +5649,7 @@ dependencies = [ "serde_json", "slog", "slog-async", - "slog-dtrace", + "slog-dtrace 0.3.0", "slog-term", "strum", "subprocess", @@ -5641,7 +5689,7 @@ dependencies = [ "serde_json", "slog", "slog-async", - "slog-dtrace", + "slog-dtrace 0.3.0", "slog-term", "sqlformat", "sqlparser", @@ -5650,7 +5698,7 @@ dependencies = [ "tempfile", "thiserror", "tokio", - "usdt", + "usdt 0.5.0", "uuid", ] @@ -5700,7 +5748,7 @@ dependencies = [ "schemars", "serde", "slog", - "slog-dtrace", + "slog-dtrace 0.3.0", "thiserror", "tokio", "uuid", @@ -5952,19 +6000,20 @@ checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" [[package]] name = "pest" -version = "2.7.2" +version = "2.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1acb4a4365a13f749a93f1a094a7805e5cfa0955373a9de860d962eaa3a5fe5a" +checksum = "1f200d8d83c44a45b21764d1916299752ca035d15ecd46faca3e9a2a2bf6ad06" dependencies = [ + "memchr", "thiserror", "ucd-trie", ] [[package]] name = "pest_derive" -version = "2.7.2" +version = "2.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "666d00490d4ac815001da55838c500eafb0320019bbaa44444137c48b443a853" +checksum = "bcd6ab1236bbdb3a49027e920e693192ebfe8913f6d60e294de57463a493cfde" dependencies = [ "pest", "pest_generator", @@ -5972,9 +6021,9 @@ dependencies = [ [[package]] name = "pest_generator" -version = "2.7.2" +version = "2.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68ca01446f50dbda87c1786af8770d535423fa8a53aec03b8f4e3d7eb10e0929" +checksum = "2a31940305ffc96863a735bef7c7994a00b325a7138fdbc5bda0f1a0476d3275" dependencies = [ "pest", "pest_meta", @@ -5985,9 +6034,9 @@ dependencies = [ [[package]] name = "pest_meta" -version = "2.7.2" +version = "2.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56af0a30af74d0445c0bf6d9d051c979b516a1a5af790d251daee76005420a48" +checksum = "a7ff62f5259e53b78d1af898941cdcdccfae7385cf7d793a6e55de5d05bb4b7d" dependencies = [ "once_cell", "pest", @@ -6092,6 +6141,12 @@ version = "0.3.27" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "26072860ba924cbfa98ea39c8c19b4dd6a4a25423dbdf219c1eca91aa0cf6964" +[[package]] +name = "plain" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4596b6d070b27117e987119b4dac604f3c58cfb0b191112e24771b2faeac1a6" + [[package]] name = "platforms" version = "3.0.2" @@ -6461,7 +6516,7 @@ dependencies = [ "slog", "slog-async", "slog-bunyan", - "slog-dtrace", + "slog-dtrace 0.2.3", "slog-term", "thiserror", "tokio", @@ -7466,6 +7521,26 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" +[[package]] +name = "scroll" +version = "0.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ab8598aa408498679922eff7fa985c25d58a90771bd6be794434c5277eab1a6" +dependencies = [ + "scroll_derive", +] + +[[package]] +name = "scroll_derive" +version = "0.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7f81c2fde025af7e69b1d1420531c8a8811ca898919db177141a85313b1cb932" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.48", +] + [[package]] name = "sct" version = "0.7.0" @@ -8014,7 +8089,21 @@ dependencies = [ "serde", "serde_json", "slog", - "usdt", + "usdt 0.3.5", + "version_check", +] + +[[package]] +name = "slog-dtrace" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "16c4003e4582bc29415fcbf94f53346c9c379d5dafac45d4bafaa39c7f0453ac" +dependencies = [ + "chrono", + "serde", + "serde_json", + "slog", + "usdt 0.5.0", "version_check", ] @@ -8216,7 +8305,7 @@ dependencies = [ "omicron-workspace-hack", "serde", "slog", - "slog-dtrace", + "slog-dtrace 0.3.0", "sprockets-rot", "thiserror", "tokio", @@ -9649,11 +9738,27 @@ version = "0.3.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2b4c48f9e522b977bbe938a0d7c4d36633d267ba0155aaa253fb57d0531be0fb" dependencies = [ - "dtrace-parser", + "dtrace-parser 0.1.14", "serde", - "usdt-attr-macro", - "usdt-impl", - "usdt-macro", + "usdt-attr-macro 0.3.5", + "usdt-impl 0.3.5", + "usdt-macro 0.3.5", +] + +[[package]] +name = "usdt" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5bf5c47fb471a0bff3d7b17a250817bba8c6cc99b0492abaefe5b3bb99045f02" +dependencies = [ + "dof 0.3.0", + "dtrace-parser 0.2.0", + "goblin", + "memmap", + "serde", + "usdt-attr-macro 0.5.0", + "usdt-impl 0.5.0", + "usdt-macro 0.5.0", ] [[package]] @@ -9662,12 +9767,26 @@ version = "0.3.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "80e6ae4f982ae74dcbaa8eb17baf36ca0d464a3abc8a7172b3bd74c73e9505d6" dependencies = [ - "dtrace-parser", + "dtrace-parser 0.1.14", "proc-macro2", "quote", "serde_tokenstream 0.1.7", "syn 1.0.109", - "usdt-impl", + "usdt-impl 0.3.5", +] + +[[package]] +name = "usdt-attr-macro" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "025161fff40db24774e7757f75df74ecc47e93d7e11e0f6cdfc31b40eacfe136" +dependencies = [ + "dtrace-parser 0.2.0", + "proc-macro2", + "quote", + "serde_tokenstream 0.2.0", + "syn 2.0.48", + "usdt-impl 0.5.0", ] [[package]] @@ -9677,8 +9796,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f53b4ca0b33aae466dc47b30b98adc4f88454928837af8010b6ed02d18474cb1" dependencies = [ "byteorder", - "dof", - "dtrace-parser", + "dof 0.1.5", + "dtrace-parser 0.1.14", "libc", "proc-macro2", "quote", @@ -9690,18 +9809,52 @@ dependencies = [ "version_check", ] +[[package]] +name = "usdt-impl" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f925814e5942ebb87af2d9fcf4c3f8665e37903f741eb11f0fa2396c6ef5f7b1" +dependencies = [ + "byteorder", + "dof 0.3.0", + "dtrace-parser 0.2.0", + "libc", + "proc-macro2", + "quote", + "serde", + "serde_json", + "syn 2.0.48", + "thiserror", + "thread-id", + "version_check", +] + [[package]] name = "usdt-macro" version = "0.3.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7cb093f9653dc91632621c754f9ed4ee25d14e46e0239b6ccaf74a6c0c2788bd" dependencies = [ - "dtrace-parser", + "dtrace-parser 0.1.14", "proc-macro2", "quote", "serde_tokenstream 0.1.7", "syn 1.0.109", - "usdt-impl", + "usdt-impl 0.3.5", +] + +[[package]] +name = "usdt-macro" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3ddd86f8f3abac0b7c87f59fe82446fc96a3854a413f176dd2797ed686b7af4c" +dependencies = [ + "dtrace-parser 0.2.0", + "proc-macro2", + "quote", + "serde_tokenstream 0.2.0", + "syn 2.0.48", + "usdt-impl 0.5.0", ] [[package]] @@ -10085,7 +10238,7 @@ dependencies = [ "sha2", "sled-hardware", "slog", - "slog-dtrace", + "slog-dtrace 0.3.0", "snafu", "subprocess", "tar", @@ -10402,12 +10555,12 @@ dependencies = [ [[package]] name = "zerocopy" -version = "0.7.31" +version = "0.7.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1c4061bedbb353041c12f413700357bec76df2c7e2ca8e4df8bac24c6bf68e3d" +checksum = "74d4d3961e53fa4c9a25a8637fc2bfaf2595b3d3ae34875568a5cf64787716be" dependencies = [ "byteorder", - "zerocopy-derive 0.7.31", + "zerocopy-derive 0.7.32", ] [[package]] @@ -10434,9 +10587,9 @@ dependencies = [ [[package]] name = "zerocopy-derive" -version = "0.7.31" +version = "0.7.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b3c129550b3e6de3fd0ba67ba5c81818f9805e58b8d7fee80a3a59d2c9fc601a" +checksum = "9ce1b18ccd8e73a9321186f97e46f9f04b778851177567b1975109d26a08d2a6" dependencies = [ "proc-macro2", "quote", diff --git a/Cargo.toml b/Cargo.toml index ba328fe612..5e94d82501 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -349,7 +349,7 @@ sled-hardware = { path = "sled-hardware" } sled-storage = { path = "sled-storage" } slog = { version = "2.7", features = [ "dynamic-keys", "max_level_trace", "release_max_level_debug" ] } slog-async = "2.8" -slog-dtrace = "0.2" +slog-dtrace = "0.3" slog-envlogger = "2.2" slog-error-chain = { git = "https://github.com/oxidecomputer/slog-error-chain", branch = "main", features = ["derive"] } slog-term = "2.9" @@ -400,7 +400,7 @@ tui-tree-widget = "0.16.0" unicode-width = "0.1.11" update-common = { path = "update-common" } update-engine = { path = "update-engine" } -usdt = "0.3" +usdt = "0.5.0" uuid = { version = "1.7.0", features = ["serde", "v4"] } walkdir = "2.4" wicket = { path = "wicket" } diff --git a/gateway/src/lib.rs b/gateway/src/lib.rs index 5aa833f6e2..1354f30a0a 100644 --- a/gateway/src/lib.rs +++ b/gateway/src/lib.rs @@ -331,9 +331,8 @@ pub async fn start_server( .map_err(|message| format!("initializing logger: {}", message))?, ); let log = slog::Logger::root(drain.fuse(), slog::o!(FileKv)); - if let slog_dtrace::ProbeRegistration::Failed(e) = registration { - let err = InlineErrorChain::new(&e); - error!(log, "failed to register DTrace probes"; &err); + if let slog_dtrace::ProbeRegistration::Failed(err) = registration { + error!(log, "failed to register DTrace probes"; "err" => &err); return Err(format!("failed to register DTrace probes: {err}")); } else { debug!(log, "registered DTrace probes"); diff --git a/nexus/db-queries/src/lib.rs b/nexus/db-queries/src/lib.rs index 5d1927ebc7..60177990e8 100644 --- a/nexus/db-queries/src/lib.rs +++ b/nexus/db-queries/src/lib.rs @@ -19,7 +19,7 @@ extern crate newtype_derive; #[macro_use] extern crate diesel; -#[usdt::provider(provider = "nexus__db__queries")] +#[usdt::provider(provider = "nexus_db_queries")] mod probes { // Fires before we start a search over a range for a VNI. // diff --git a/oximeter/db/src/client.rs b/oximeter/db/src/client.rs index fc46a2c498..ca996dc894 100644 --- a/oximeter/db/src/client.rs +++ b/oximeter/db/src/client.rs @@ -50,7 +50,7 @@ use tokio::fs; use tokio::sync::Mutex; use uuid::Uuid; -#[usdt::provider(provider = "clickhouse__client")] +#[usdt::provider(provider = "clickhouse_client")] mod probes { fn query__start(_: &usdt::UniqueId, sql: &str) {} fn query__done(_: &usdt::UniqueId) {} diff --git a/sled-agent/src/bootstrap/server.rs b/sled-agent/src/bootstrap/server.rs index 1a9d36c86b..47a8019ac5 100644 --- a/sled-agent/src/bootstrap/server.rs +++ b/sled-agent/src/bootstrap/server.rs @@ -61,8 +61,8 @@ pub enum StartError { #[error("Failed to initialize logger")] InitLogger(#[source] io::Error), - #[error("Failed to register DTrace probes")] - RegisterDTraceProbes(#[source] usdt::Error), + #[error("Failed to register DTrace probes: {0}")] + RegisterDTraceProbes(String), #[error("Failed to find address objects for maghemite")] FindMaghemiteAddrObjs(#[source] underlay::Error), diff --git a/tools/dtrace/aggregate-query-latency.d b/tools/dtrace/aggregate-query-latency.d index b1899cd970..c0ed1751fd 100755 --- a/tools/dtrace/aggregate-query-latency.d +++ b/tools/dtrace/aggregate-query-latency.d @@ -7,14 +7,14 @@ dtrace:::BEGIN printf("Tracing database query latency by connection ID for nexus PID %d, use Ctrl-C to exit\n", $target); } -diesel-db$target:::query-start +diesel_db$target:::query-start { @total_queries = count(); this->conn_id = json(copyinstr(arg1), "ok"); self->ts[this->conn_id] = timestamp; } -diesel-db$target:::query-done +diesel_db$target:::query-done /self->ts[json(copyinstr(arg1), "ok")] != 0/ { this->conn_id = json(copyinstr(arg1), "ok"); diff --git a/tools/dtrace/slowest-queries.d b/tools/dtrace/slowest-queries.d index 40e43fa252..76e22de22f 100755 --- a/tools/dtrace/slowest-queries.d +++ b/tools/dtrace/slowest-queries.d @@ -9,7 +9,7 @@ dtrace:::BEGIN printf("Tracing slowest queries for nexus PID %d, use Ctrl-C to exit\n", $target); } -diesel-db$target:::query-start +diesel_db$target:::query-start { this->conn_id = json(copyinstr(arg1), "ok"); ts[this->conn_id] = timestamp; @@ -17,12 +17,12 @@ diesel-db$target:::query-start } -diesel-db$target:::query-done +diesel_db$target:::query-done { this->conn_id = json(copyinstr(arg1), "ok"); } -diesel-db$target:::query-done +diesel_db$target:::query-done /ts[this->conn_id]/ { this->latency = timestamp - ts[this->conn_id]; diff --git a/tools/dtrace/trace-db-queries.d b/tools/dtrace/trace-db-queries.d index 033d849084..69878b55ba 100755 --- a/tools/dtrace/trace-db-queries.d +++ b/tools/dtrace/trace-db-queries.d @@ -9,19 +9,19 @@ dtrace:::BEGIN printf("Tracing all database queries for nexus PID %d, use Ctrl-C to exit\n", $target); } -diesel-db$target:::query-start +diesel_db$target:::query-start { this->conn_id = json(copyinstr(arg1), "ok"); ts[this->conn_id] = timestamp; query[this->conn_id] = copyinstr(arg2); } -diesel-db$target:::query-done +diesel_db$target:::query-done { this->conn_id = json(copyinstr(arg1), "ok"); } -diesel-db$target:::query-done +diesel_db$target:::query-done /ts[this->conn_id]/ { this->latency = (timestamp - ts[this->conn_id]) / 1000; diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index 49b2489c40..cebd4cab36 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -114,9 +114,10 @@ trust-dns-proto = { version = "0.22.0" } unicode-bidi = { version = "0.3.13" } unicode-normalization = { version = "0.1.22" } usdt = { version = "0.3.5" } +usdt-impl = { version = "0.5.0", default-features = false, features = ["asm", "des"] } uuid = { version = "1.7.0", features = ["serde", "v4"] } yasna = { version = "0.5.2", features = ["bit-vec", "num-bigint", "std", "time"] } -zerocopy = { version = "0.7.31", features = ["derive", "simd"] } +zerocopy = { version = "0.7.32", features = ["derive", "simd"] } zeroize = { version = "1.7.0", features = ["std", "zeroize_derive"] } zip = { version = "0.6.6", default-features = false, features = ["bzip2", "deflate"] } @@ -222,20 +223,23 @@ trust-dns-proto = { version = "0.22.0" } unicode-bidi = { version = "0.3.13" } unicode-normalization = { version = "0.1.22" } usdt = { version = "0.3.5" } +usdt-impl = { version = "0.5.0", default-features = false, features = ["asm", "des"] } uuid = { version = "1.7.0", features = ["serde", "v4"] } yasna = { version = "0.5.2", features = ["bit-vec", "num-bigint", "std", "time"] } -zerocopy = { version = "0.7.31", features = ["derive", "simd"] } +zerocopy = { version = "0.7.32", features = ["derive", "simd"] } zeroize = { version = "1.7.0", features = ["std", "zeroize_derive"] } zip = { version = "0.6.6", default-features = false, features = ["bzip2", "deflate"] } [target.x86_64-unknown-linux-gnu.dependencies] bitflags-f595c2ba2a3f28df = { package = "bitflags", version = "2.4.0", default-features = false, features = ["std"] } +dof = { version = "0.3.0", default-features = false, features = ["des"] } mio = { version = "0.8.9", features = ["net", "os-ext"] } once_cell = { version = "1.19.0", features = ["unstable"] } rustix = { version = "0.38.30", features = ["fs", "termios"] } [target.x86_64-unknown-linux-gnu.build-dependencies] bitflags-f595c2ba2a3f28df = { package = "bitflags", version = "2.4.0", default-features = false, features = ["std"] } +dof = { version = "0.3.0", default-features = false, features = ["des"] } mio = { version = "0.8.9", features = ["net", "os-ext"] } once_cell = { version = "1.19.0", features = ["unstable"] } rustix = { version = "0.38.30", features = ["fs", "termios"] } @@ -270,6 +274,7 @@ rustix = { version = "0.38.30", features = ["fs", "termios"] } [target.x86_64-unknown-illumos.dependencies] bitflags-f595c2ba2a3f28df = { package = "bitflags", version = "2.4.0", default-features = false, features = ["std"] } +dof = { version = "0.3.0", default-features = false, features = ["des"] } errno = { version = "0.3.8", default-features = false, features = ["std"] } mio = { version = "0.8.9", features = ["net", "os-ext"] } once_cell = { version = "1.19.0", features = ["unstable"] } @@ -279,6 +284,7 @@ toml_edit-cdcf2f9584511fe6 = { package = "toml_edit", version = "0.19.15", featu [target.x86_64-unknown-illumos.build-dependencies] bitflags-f595c2ba2a3f28df = { package = "bitflags", version = "2.4.0", default-features = false, features = ["std"] } +dof = { version = "0.3.0", default-features = false, features = ["des"] } errno = { version = "0.3.8", default-features = false, features = ["std"] } mio = { version = "0.8.9", features = ["net", "os-ext"] } once_cell = { version = "1.19.0", features = ["unstable"] } From 2b9c885074fee12c8e113e43b0669ee0b7765f60 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Thu, 25 Jan 2024 15:22:14 -0800 Subject: [PATCH 34/49] Update Rust crate owo-colors to v4 (#4896) --- Cargo.lock | 4 ++-- Cargo.toml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 45d1d47199..dd837b9891 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5541,9 +5541,9 @@ dependencies = [ [[package]] name = "owo-colors" -version = "3.5.0" +version = "4.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c1b04fb49957986fdce4d6ee7a65027d55d4b6d2265e5848bbb507b58ccfdb6f" +checksum = "caff54706df99d2a78a5a4e3455ff45448d81ef1bb63c22cd14052ca0e993a3f" [[package]] name = "oxide-client" diff --git a/Cargo.toml b/Cargo.toml index 5e94d82501..47c412b9b5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -280,7 +280,7 @@ openssl-sys = "0.9" openssl-probe = "0.1.5" opte-ioctl = { git = "https://github.com/oxidecomputer/opte", rev = "1d29ef60a18179babfb44f0f7a3c2fe71034a2c1" } oso = "0.27" -owo-colors = "3.5.0" +owo-colors = "4.0.0" oximeter = { path = "oximeter/oximeter" } oximeter-client = { path = "clients/oximeter-client" } oximeter-db = { path = "oximeter/db/" } From 5b28d0cff0a7e43dd7dd813a6e4e03b58c07ba11 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Fri, 26 Jan 2024 05:30:55 +0000 Subject: [PATCH 35/49] Update taiki-e/install-action digest to 1f501f0 (#4901) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR contains the following updates: | Package | Type | Update | Change | |---|---|---|---| | [taiki-e/install-action](https://togithub.com/taiki-e/install-action) | action | digest | [`9f9bf5e` -> `1f501f0`](https://togithub.com/taiki-e/install-action/compare/9f9bf5e...1f501f0) | --- ### Configuration 📅 **Schedule**: Branch creation - "after 8pm,before 6am" in timezone America/Los_Angeles, Automerge - "after 8pm,before 6am" in timezone America/Los_Angeles. 🚦 **Automerge**: Enabled. ♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. 🔕 **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box --- This PR has been generated by [Renovate Bot](https://togithub.com/renovatebot/renovate). Co-authored-by: oxide-renovate[bot] <146848827+oxide-renovate[bot]@users.noreply.github.com> --- .github/workflows/hakari.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/hakari.yml b/.github/workflows/hakari.yml index 46d09c0940..d4a4a4750c 100644 --- a/.github/workflows/hakari.yml +++ b/.github/workflows/hakari.yml @@ -24,7 +24,7 @@ jobs: with: toolchain: stable - name: Install cargo-hakari - uses: taiki-e/install-action@9f9bf5e8df111848fb25b8a97a361d8963025899 # v2 + uses: taiki-e/install-action@1f501f091c4240a626be17b7496626f8f0cf979a # v2 with: tool: cargo-hakari - name: Check workspace-hack Cargo.toml is up-to-date From 4c15cc0d3fded2844eb8178556daf30a59f778e3 Mon Sep 17 00:00:00 2001 From: "oxide-reflector-bot[bot]" <130185838+oxide-reflector-bot[bot]@users.noreply.github.com> Date: Fri, 26 Jan 2024 10:21:34 +0000 Subject: [PATCH 36/49] Update maghemite to d12bdf8 (#4627) Updated maghemite to commit d12bdf8. --------- Co-authored-by: reflector[bot] <130185838+reflector[bot]@users.noreply.github.com> --- package-manifest.toml | 12 ++++++------ tools/maghemite_ddm_openapi_version | 2 +- tools/maghemite_mg_openapi_version | 2 +- tools/maghemite_mgd_checksums | 2 +- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/package-manifest.toml b/package-manifest.toml index 36e43157f9..f574f1ff5d 100644 --- a/package-manifest.toml +++ b/package-manifest.toml @@ -446,10 +446,10 @@ source.repo = "maghemite" # `tools/maghemite_openapi_version`. Failing to do so will cause a failure when # building `ddm-admin-client` (which will instruct you to update # `tools/maghemite_openapi_version`). -source.commit = "869cf802efcbd2f0edbb614ed92caa3e3164c1fc" +source.commit = "d12bdf89b9058065789cd00c8704e4ce0a352342" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/maghemite/image//maghemite.sha256.txt -source.sha256 = "1cf9cb514d11275d93c4e4760500539a778f23039374508ca07528fcaf0ba3f8" +source.sha256 = "442ef3a927ce2f2a401b631daa3c67a708fbbed83a839552a6fbcadd68120783" output.type = "tarball" [package.mg-ddm] @@ -462,10 +462,10 @@ source.repo = "maghemite" # `tools/maghemite_openapi_version`. Failing to do so will cause a failure when # building `ddm-admin-client` (which will instruct you to update # `tools/maghemite_openapi_version`). -source.commit = "869cf802efcbd2f0edbb614ed92caa3e3164c1fc" +source.commit = "d12bdf89b9058065789cd00c8704e4ce0a352342" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/maghemite/image//mg-ddm.sha256.txt -source.sha256 = "a9b959b4287ac2ec7b45ed99ccd00e1f134b8e3d501099cd669cee5de9525ae3" +source.sha256 = "81a766a88fab3fe7cb7fb6698ec02d05224320500b7a4421bbea9f4123127fba" output.type = "zone" output.intermediate_only = true @@ -477,10 +477,10 @@ source.repo = "maghemite" # `tools/maghemite_openapi_version`. Failing to do so will cause a failure when # building `ddm-admin-client` (which will instruct you to update # `tools/maghemite_openapi_version`). -source.commit = "869cf802efcbd2f0edbb614ed92caa3e3164c1fc" +source.commit = "d12bdf89b9058065789cd00c8704e4ce0a352342" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/maghemite/image//mg-ddm.sha256.txt -source.sha256 = "ab882fbeab54987645492872e67f3351f8d14629a041465cc845ac8583a7002b" +source.sha256 = "bc887e08e3d052d8440983a2d6186cd1d92a52345504092f64f4de2e5335a75d" output.type = "zone" output.intermediate_only = true diff --git a/tools/maghemite_ddm_openapi_version b/tools/maghemite_ddm_openapi_version index be8772b7e6..a103b117e8 100644 --- a/tools/maghemite_ddm_openapi_version +++ b/tools/maghemite_ddm_openapi_version @@ -1,2 +1,2 @@ -COMMIT="869cf802efcbd2f0edbb614ed92caa3e3164c1fc" +COMMIT="d12bdf89b9058065789cd00c8704e4ce0a352342" SHA2="0b0dbc2f8bbc5d2d9be92d64c4865f8f9335355aae62f7de9f67f81dfb3f1803" diff --git a/tools/maghemite_mg_openapi_version b/tools/maghemite_mg_openapi_version index 6bf1999c61..6981c98070 100644 --- a/tools/maghemite_mg_openapi_version +++ b/tools/maghemite_mg_openapi_version @@ -1,2 +1,2 @@ -COMMIT="869cf802efcbd2f0edbb614ed92caa3e3164c1fc" +COMMIT="d12bdf89b9058065789cd00c8704e4ce0a352342" SHA2="7618511f905d26394ef7c552339dd78835ce36a6def0d85b05b6d1e363a5e7b4" diff --git a/tools/maghemite_mgd_checksums b/tools/maghemite_mgd_checksums index b5fe84b662..8d0efb473a 100644 --- a/tools/maghemite_mgd_checksums +++ b/tools/maghemite_mgd_checksums @@ -1,2 +1,2 @@ -CIDL_SHA256="ab882fbeab54987645492872e67f3351f8d14629a041465cc845ac8583a7002b" +CIDL_SHA256="bc887e08e3d052d8440983a2d6186cd1d92a52345504092f64f4de2e5335a75d" MGD_LINUX_SHA256="93331c1001e3aa506a8c1b83346abba1995e489910bff2c94a86730b96617a34" \ No newline at end of file From d5dace65fa093174a7502e0f6a3dde4ccabe6337 Mon Sep 17 00:00:00 2001 From: David Crespo Date: Fri, 26 Jan 2024 09:54:23 -0600 Subject: [PATCH 37/49] Fix IP pools data migration (#4903) Closes #4875 ## Problem After the IP pools migrations on the dogfood rack, the `default` pool was not marked `is_default=true` for the `oxide` silo when it should have been. ## Diagnosis When checking for silo-scoped default pools overriding a fleet-scoped default, I neglected to require that the silo-scoped defaults in question were non-deleted. This means that if there was a deleted pool with `silo_id=` and `is_default=true`, that would be considered an overriding default and leave us with `is_default=false` on the `default` pool. Well, I can't check `silo_id` and `is_default` on the pools because those columns have been dropped, but there is a deleted pool called `oxide-default` that says in the description it was meant as the default pool for only the `oxide` silo. ``` oot@[fd00:1122:3344:105::3]:32221/omicron> select * from omicron.public.ip_pool; id | name | description | time_created | time_modified | time_deleted | rcgen ---------------------------------------+--------------------+--------------------------------+-------------------------------+-------------------------------+-------------------------------+-------- 1efa49a2-3f3a-43ab-97ac-d38658069c39 | oxide-default | oxide silo-only pool - default | 2023-08-31 05:33:00.11079+00 | 2023-08-31 05:33:00.11079+00 | 2023-08-31 06:03:22.426488+00 | 1 ``` I think we can be pretty confident this is what got us. ## Fix Add `AND time_deleted IS NULL` to the subquery. ## Mitigation in existing systems Already done. Dogfood is the only long-running system where the bad migration ran, and all I had to do there was use the API to set `is_default=true` for the (`default` pool, `oxide` silo) link. --- nexus/tests/integration_tests/schema.rs | 90 ++++++++++++------------- schema/crdb/23.0.0/up4.sql | 7 +- 2 files changed, 47 insertions(+), 50 deletions(-) diff --git a/nexus/tests/integration_tests/schema.rs b/nexus/tests/integration_tests/schema.rs index c3ba02d5ce..2d496fcd8e 100644 --- a/nexus/tests/integration_tests/schema.rs +++ b/nexus/tests/integration_tests/schema.rs @@ -951,9 +951,11 @@ const SILO1: Uuid = Uuid::from_u128(0x111151F0_5c3d_4647_83b0_8f3515da7be1); const SILO2: Uuid = Uuid::from_u128(0x222251F0_5c3d_4647_83b0_8f3515da7be1); // "6001" -> "Pool" +const POOL0: Uuid = Uuid::from_u128(0x00006001_5c3d_4647_83b0_8f3515da7be1); const POOL1: Uuid = Uuid::from_u128(0x11116001_5c3d_4647_83b0_8f3515da7be1); const POOL2: Uuid = Uuid::from_u128(0x22226001_5c3d_4647_83b0_8f3515da7be1); const POOL3: Uuid = Uuid::from_u128(0x33336001_5c3d_4647_83b0_8f3515da7be1); +const POOL4: Uuid = Uuid::from_u128(0x44446001_5c3d_4647_83b0_8f3515da7be1); // "513D" -> "Sled" const SLED1: Uuid = Uuid::from_u128(0x1111513d_5c3d_4647_83b0_8f3515da7be1); @@ -975,9 +977,11 @@ fn before_23_0_0(client: &Client) -> BoxFuture<'_, ()> { // no corresponding silo. client.batch_execute(&format!("INSERT INTO ip_pool (id, name, description, time_created, time_modified, time_deleted, rcgen, silo_id, is_default) VALUES + ('{POOL0}', 'pool2', '', now(), now(), now(), 1, '{SILO2}', true), ('{POOL1}', 'pool1', '', now(), now(), NULL, 1, '{SILO1}', true), ('{POOL2}', 'pool2', '', now(), now(), NULL, 1, '{SILO2}', false), - ('{POOL3}', 'pool3', '', now(), now(), NULL, 1, null, true); + ('{POOL3}', 'pool3', '', now(), now(), NULL, 1, null, true), + ('{POOL4}', 'pool4', '', now(), now(), NULL, 1, null, false); ")).await.expect("Failed to create IP Pool"); }) } @@ -992,56 +996,46 @@ fn after_23_0_0(client: &Client) -> BoxFuture<'_, ()> { .expect("Failed to query ip pool resource"); let ip_pool_resources = process_rows(&rows); - assert_eq!(ip_pool_resources.len(), 4); + assert_eq!(ip_pool_resources.len(), 6); + + fn assert_row( + row: &Vec, + ip_pool_id: Uuid, + silo_id: Uuid, + is_default: bool, + ) { + let type_silo = SqlEnum::from(("ip_pool_resource_type", "silo")); + assert_eq!( + row, + &vec![ + ColumnValue::new("ip_pool_id", ip_pool_id), + ColumnValue::new("resource_type", type_silo), + ColumnValue::new("resource_id", silo_id), + ColumnValue::new("is_default", is_default), + ], + ); + } - let type_silo = SqlEnum::from(("ip_pool_resource_type", "silo")); + // pool1 was default on silo1, so gets an entry in the join table + // reflecting that + assert_row(&ip_pool_resources[0].values, POOL1, SILO1, true); - // pool1, which referenced silo1 in the "ip_pool" table, has a newly - // created resource. - // - // The same relationship is true for pool2 / silo2. - assert_eq!( - ip_pool_resources[0].values, - vec![ - ColumnValue::new("ip_pool_id", POOL1), - ColumnValue::new("resource_type", type_silo.clone()), - ColumnValue::new("resource_id", SILO1), - ColumnValue::new("is_default", true), - ], - ); - assert_eq!( - ip_pool_resources[1].values, - vec![ - ColumnValue::new("ip_pool_id", POOL2), - ColumnValue::new("resource_type", type_silo.clone()), - ColumnValue::new("resource_id", SILO2), - ColumnValue::new("is_default", false), - ], - ); + // pool1 was default on silo1, so gets an entry in the join table + // reflecting that + assert_row(&ip_pool_resources[1].values, POOL2, SILO2, false); - // pool3 did not previously have a corresponding silo, so now it's associated - // with both silos as a new resource in each. - // - // Additionally, silo1 already had a default pool (pool1), but silo2 did - // not have one. As a result, pool3 becomes the new default pool for silo2. - assert_eq!( - ip_pool_resources[2].values, - vec![ - ColumnValue::new("ip_pool_id", POOL3), - ColumnValue::new("resource_type", type_silo.clone()), - ColumnValue::new("resource_id", SILO1), - ColumnValue::new("is_default", false), - ], - ); - assert_eq!( - ip_pool_resources[3].values, - vec![ - ColumnValue::new("ip_pool_id", POOL3), - ColumnValue::new("resource_type", type_silo.clone()), - ColumnValue::new("resource_id", SILO2), - ColumnValue::new("is_default", true), - ], - ); + // fleet-scoped silos are a little more complicated + + // pool3 was a fleet-level default, so now it's associated with both + // silos. silo1 had its own default pool as well (pool1), so pool3 + // cannot also be default for silo1. silo2 did not have its own default, + // so pool3 is default for silo2. + assert_row(&ip_pool_resources[2].values, POOL3, SILO1, false); + assert_row(&ip_pool_resources[3].values, POOL3, SILO2, true); + + // fleet-level pool that was not default becomes non-default on all silos + assert_row(&ip_pool_resources[4].values, POOL4, SILO1, false); + assert_row(&ip_pool_resources[5].values, POOL4, SILO2, false); }) } diff --git a/schema/crdb/23.0.0/up4.sql b/schema/crdb/23.0.0/up4.sql index 8fb43f9cf1..2235d0aa01 100644 --- a/schema/crdb/23.0.0/up4.sql +++ b/schema/crdb/23.0.0/up4.sql @@ -23,8 +23,11 @@ SELECT -- AND NOT EXISTS here causes is_default to be false in row 1 if there is a -- conflicting silo default pool. row 2 is inserted in up5. p.is_default AND NOT EXISTS ( - SELECT 1 FROM omicron.public.ip_pool - WHERE silo_id = s.id AND is_default + SELECT 1 + FROM omicron.public.ip_pool p0 + WHERE p0.silo_id = s.id + AND p0.is_default + AND p0.time_deleted IS NULL ) FROM omicron.public.ip_pool AS p -- cross join means we are looking at the cartesian product of all fleet-scoped From a3a9844994d2c7742918d0815c2242aa0fc8a925 Mon Sep 17 00:00:00 2001 From: David Crespo Date: Fri, 26 Jan 2024 15:17:13 -0600 Subject: [PATCH 38/49] Better error message when there's no default IP pool (#4880) Closes #4864 This is a bad error message to get when the problem is that there is no default IP pool configured for your current silo: ``` not found: ip-pool with id "Default pool for current silo" ``` "Default pool for current silo" is not an id, so why would we call it one? This is better: ``` not found: default IP pool for current silo ``` This PR is just making that possible. --- common/src/api/external/error.rs | 27 +++++++++---------- .../src/authz/policy_test/resource_builder.rs | 4 +-- nexus/db-queries/src/db/datastore/ip_pool.rs | 11 ++------ nexus/tests/integration_tests/instances.rs | 3 +-- 4 files changed, 18 insertions(+), 27 deletions(-) diff --git a/common/src/api/external/error.rs b/common/src/api/external/error.rs index a3876fcac3..d2e062f2e1 100644 --- a/common/src/api/external/error.rs +++ b/common/src/api/external/error.rs @@ -144,11 +144,11 @@ pub enum LookupType { ByName(String), /// a specific id was requested ById(Uuid), - /// a session token was requested - BySessionToken(String), /// a specific id was requested with some composite type /// (caller summarizes it) ByCompositeId(String), + /// object selected by criteria that would be confusing to call an ID + ByOther(String), } impl LookupType { @@ -359,23 +359,22 @@ impl From for HttpError { fn from(error: Error) -> HttpError { match error { Error::ObjectNotFound { type_name: t, lookup_type: lt } => { - // TODO-cleanup is there a better way to express this? - let (lookup_field, lookup_value) = match lt { - LookupType::ByName(name) => ("name", name), - LookupType::ById(id) => ("id", id.to_string()), - LookupType::ByCompositeId(label) => ("id", label), - LookupType::BySessionToken(token) => { - ("session token", token) + let message = match lt { + LookupType::ByName(name) => { + format!("{} with name \"{}\"", t, name) } + LookupType::ById(id) => { + format!("{} with id \"{}\"", t, id) + } + LookupType::ByCompositeId(label) => { + format!("{} with id \"{}\"", t, label) + } + LookupType::ByOther(msg) => msg, }; - let message = format!( - "not found: {} with {} \"{}\"", - t, lookup_field, lookup_value - ); HttpError::for_client_error( Some(String::from("ObjectNotFound")), http::StatusCode::NOT_FOUND, - message, + format!("not found: {}", message), ) } diff --git a/nexus/db-queries/src/authz/policy_test/resource_builder.rs b/nexus/db-queries/src/authz/policy_test/resource_builder.rs index dc18b2e47f..59cb283a95 100644 --- a/nexus/db-queries/src/authz/policy_test/resource_builder.rs +++ b/nexus/db-queries/src/authz/policy_test/resource_builder.rs @@ -92,7 +92,7 @@ impl<'a> ResourceBuilder<'a> { // (e.g., "fleet"). resource.resource_type().to_string().to_lowercase() } - LookupType::BySessionToken(_) | LookupType::ByCompositeId(_) => { + LookupType::ByCompositeId(_) | LookupType::ByOther(_) => { panic!("test resources must be given names"); } }; @@ -212,7 +212,7 @@ where LookupType::ByName(name) => format!("{:?}", name), LookupType::ById(id) => format!("id {:?}", id.to_string()), LookupType::ByCompositeId(id) => format!("id {:?}", id), - LookupType::BySessionToken(_) => { + LookupType::ByOther(_) => { unimplemented!() } }; diff --git a/nexus/db-queries/src/db/datastore/ip_pool.rs b/nexus/db-queries/src/db/datastore/ip_pool.rs index 6d3a95af7d..d316d1adb7 100644 --- a/nexus/db-queries/src/db/datastore/ip_pool.rs +++ b/nexus/db-queries/src/db/datastore/ip_pool.rs @@ -134,12 +134,8 @@ impl DataStore { // .authorize(authz::Action::ListChildren, &authz::IP_POOL_LIST) // .await?; - // join ip_pool to ip_pool_resource and filter - - // used in both success and error outcomes - let lookup_type = LookupType::ByCompositeId( - "Default pool for current silo".to_string(), - ); + let lookup_type = + LookupType::ByOther("default IP pool for current silo".to_string()); ip_pool::table .inner_join(ip_pool_resource::table) @@ -161,9 +157,6 @@ impl DataStore { ) .await .map_err(|e| { - // janky to do this manually, but this is an unusual kind of - // lookup in that it is by (silo_id, is_default=true), which is - // arguably a composite ID. public_error_from_diesel_lookup( e, ResourceType::IpPool, diff --git a/nexus/tests/integration_tests/instances.rs b/nexus/tests/integration_tests/instances.rs index 8d97df6cda..57b731c692 100644 --- a/nexus/tests/integration_tests/instances.rs +++ b/nexus/tests/integration_tests/instances.rs @@ -3841,8 +3841,7 @@ async fn test_instance_ephemeral_ip_no_default_pool_error( let url = format!("/v1/instances?project={}", PROJECT_NAME); let error = object_create_error(client, &url, &body, StatusCode::NOT_FOUND).await; - let msg = "not found: ip-pool with id \"Default pool for current silo\"" - .to_string(); + let msg = "not found: default IP pool for current silo".to_string(); assert_eq!(error.message, msg); // same deal if you specify a pool that doesn't exist From 80cc00105e82d83cc7e2658dc079382e2a238bd9 Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Fri, 26 Jan 2024 16:44:14 -0500 Subject: [PATCH 39/49] Serialize blueprints in the database (#4899) This replaces the in-memory blueprint storage added as a placeholder in #4804 with cockroachdb-backed tables. Both the tables and related queries are _heavily_ derived from the similar tables in the inventory system (particularly serializing omicron zones and their related properties). The tables are effectively identical as of this PR, but we opted to keep the separate because we expect them to diverge some over time (e.g., inventory might start collecting additional per-zone properties that don't exist for blueprints, such as uptime). The big exception to "basically the same as inventory" is the `bp_target` table which tracks the current (and past) target blueprint. Inserting into this table has some subtleties, and we use a CTE to check and enforce the invariants. This is the first diesel/CTE I've written; it's based on other similar CTEs in Nexus, but I'd still appreciate a particularly careful look there. Fixes #4793. --- Cargo.lock | 2 + dev-tools/omdb/src/bin/omdb/nexus.rs | 2 +- nexus/db-model/src/deployment.rs | 263 +++ nexus/db-model/src/inventory.rs | 448 +---- nexus/db-model/src/lib.rs | 3 + nexus/db-model/src/omicron_zone_config.rs | 456 +++++ nexus/db-model/src/schema.rs | 85 +- nexus/db-queries/Cargo.toml | 2 + .../db-queries/src/db/datastore/deployment.rs | 1583 +++++++++++++++++ nexus/db-queries/src/db/datastore/mod.rs | 1 + nexus/deployment/Cargo.toml | 2 +- nexus/deployment/src/blueprint_builder.rs | 23 +- nexus/inventory/src/builder.rs | 26 +- nexus/inventory/src/lib.rs | 2 + nexus/src/app/deployment.rs | 165 +- nexus/src/app/mod.rs | 5 - nexus/src/internal_api/http_entrypoints.rs | 50 +- nexus/types/src/deployment.rs | 37 +- openapi/nexus-internal.json | 46 +- schema/crdb/28.0.0/up1.sql | 7 + schema/crdb/28.0.0/up2.sql | 6 + schema/crdb/28.0.0/up3.sql | 31 + schema/crdb/28.0.0/up4.sql | 13 + schema/crdb/28.0.0/up5.sql | 6 + schema/crdb/28.0.0/up6.sql | 6 + schema/crdb/dbinit.sql | 206 ++- 26 files changed, 2884 insertions(+), 592 deletions(-) create mode 100644 nexus/db-model/src/deployment.rs create mode 100644 nexus/db-model/src/omicron_zone_config.rs create mode 100644 nexus/db-queries/src/db/datastore/deployment.rs create mode 100644 schema/crdb/28.0.0/up1.sql create mode 100644 schema/crdb/28.0.0/up2.sql create mode 100644 schema/crdb/28.0.0/up3.sql create mode 100644 schema/crdb/28.0.0/up4.sql create mode 100644 schema/crdb/28.0.0/up5.sql create mode 100644 schema/crdb/28.0.0/up6.sql diff --git a/Cargo.lock b/Cargo.lock index dd837b9891..b2815d9a1f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4283,12 +4283,14 @@ dependencies = [ "http 0.2.11", "hyper 0.14.27", "hyper-rustls 0.26.0", + "illumos-utils", "internal-dns", "ipnetwork", "itertools 0.12.0", "macaddr", "newtype_derive", "nexus-db-model", + "nexus-deployment", "nexus-inventory", "nexus-test-utils", "nexus-types", diff --git a/dev-tools/omdb/src/bin/omdb/nexus.rs b/dev-tools/omdb/src/bin/omdb/nexus.rs index fef069d536..ea89923caa 100644 --- a/dev-tools/omdb/src/bin/omdb/nexus.rs +++ b/dev-tools/omdb/src/bin/omdb/nexus.rs @@ -866,7 +866,7 @@ async fn cmd_nexus_blueprints_target_show( .await .context("fetching target blueprint")?; println!("target blueprint: {}", target.target_id); - println!("set at: {}", target.time_set); + println!("made target at: {}", target.time_made_target); println!("enabled: {}", target.enabled); Ok(()) } diff --git a/nexus/db-model/src/deployment.rs b/nexus/db-model/src/deployment.rs new file mode 100644 index 0000000000..34fe08d78c --- /dev/null +++ b/nexus/db-model/src/deployment.rs @@ -0,0 +1,263 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Types for representing the deployed software and configuration in the +//! database + +use crate::inventory::ZoneType; +use crate::omicron_zone_config::{OmicronZone, OmicronZoneNic}; +use crate::schema::{ + blueprint, bp_omicron_zone, bp_omicron_zone_nic, + bp_omicron_zones_not_in_service, bp_sled_omicron_zones, bp_target, +}; +use crate::{ipv6, Generation, MacAddr, Name, SqlU16, SqlU32, SqlU8}; +use chrono::{DateTime, Utc}; +use ipnetwork::IpNetwork; +use nexus_types::deployment::BlueprintTarget; +use uuid::Uuid; + +/// See [`nexus_types::deployment::Blueprint`]. +#[derive(Queryable, Insertable, Clone, Debug, Selectable)] +#[diesel(table_name = blueprint)] +pub struct Blueprint { + pub id: Uuid, + pub parent_blueprint_id: Option, + pub time_created: DateTime, + pub creator: String, + pub comment: String, +} + +impl From<&'_ nexus_types::deployment::Blueprint> for Blueprint { + fn from(bp: &'_ nexus_types::deployment::Blueprint) -> Self { + Self { + id: bp.id, + parent_blueprint_id: bp.parent_blueprint_id, + time_created: bp.time_created, + creator: bp.creator.clone(), + comment: bp.comment.clone(), + } + } +} + +impl From for nexus_types::deployment::BlueprintMetadata { + fn from(value: Blueprint) -> Self { + Self { + id: value.id, + parent_blueprint_id: value.parent_blueprint_id, + time_created: value.time_created, + creator: value.creator, + comment: value.comment, + } + } +} + +/// See [`nexus_types::deployment::BlueprintTarget`]. +#[derive(Queryable, Clone, Debug, Selectable, Insertable)] +#[diesel(table_name = bp_target)] +pub struct BpTarget { + pub version: SqlU32, + pub blueprint_id: Uuid, + pub enabled: bool, + pub time_made_target: DateTime, +} + +impl BpTarget { + pub fn new(version: u32, target: BlueprintTarget) -> Self { + Self { + version: version.into(), + blueprint_id: target.target_id, + enabled: target.enabled, + time_made_target: target.time_made_target, + } + } +} + +impl From for nexus_types::deployment::BlueprintTarget { + fn from(value: BpTarget) -> Self { + Self { + target_id: value.blueprint_id, + enabled: value.enabled, + time_made_target: value.time_made_target, + } + } +} + +/// See [`nexus_types::deployment::OmicronZonesConfig`]. +#[derive(Queryable, Clone, Debug, Selectable, Insertable)] +#[diesel(table_name = bp_sled_omicron_zones)] +pub struct BpSledOmicronZones { + pub blueprint_id: Uuid, + pub sled_id: Uuid, + pub generation: Generation, +} + +impl BpSledOmicronZones { + pub fn new( + blueprint_id: Uuid, + sled_id: Uuid, + zones_config: &nexus_types::deployment::OmicronZonesConfig, + ) -> Self { + Self { + blueprint_id, + sled_id, + generation: Generation(zones_config.generation), + } + } +} + +/// See [`nexus_types::deployment::OmicronZoneConfig`]. +#[derive(Queryable, Clone, Debug, Selectable, Insertable)] +#[diesel(table_name = bp_omicron_zone)] +pub struct BpOmicronZone { + pub blueprint_id: Uuid, + pub sled_id: Uuid, + pub id: Uuid, + pub underlay_address: ipv6::Ipv6Addr, + pub zone_type: ZoneType, + pub primary_service_ip: ipv6::Ipv6Addr, + pub primary_service_port: SqlU16, + pub second_service_ip: Option, + pub second_service_port: Option, + pub dataset_zpool_name: Option, + pub bp_nic_id: Option, + pub dns_gz_address: Option, + pub dns_gz_address_index: Option, + pub ntp_ntp_servers: Option>, + pub ntp_dns_servers: Option>, + pub ntp_domain: Option, + pub nexus_external_tls: Option, + pub nexus_external_dns_servers: Option>, + pub snat_ip: Option, + pub snat_first_port: Option, + pub snat_last_port: Option, +} + +impl BpOmicronZone { + pub fn new( + blueprint_id: Uuid, + sled_id: Uuid, + zone: &nexus_types::inventory::OmicronZoneConfig, + ) -> Result { + let zone = OmicronZone::new(sled_id, zone)?; + Ok(Self { + blueprint_id, + sled_id: zone.sled_id, + id: zone.id, + underlay_address: zone.underlay_address, + zone_type: zone.zone_type, + primary_service_ip: zone.primary_service_ip, + primary_service_port: zone.primary_service_port, + second_service_ip: zone.second_service_ip, + second_service_port: zone.second_service_port, + dataset_zpool_name: zone.dataset_zpool_name, + bp_nic_id: zone.nic_id, + dns_gz_address: zone.dns_gz_address, + dns_gz_address_index: zone.dns_gz_address_index, + ntp_ntp_servers: zone.ntp_ntp_servers, + ntp_dns_servers: zone.ntp_dns_servers, + ntp_domain: zone.ntp_domain, + nexus_external_tls: zone.nexus_external_tls, + nexus_external_dns_servers: zone.nexus_external_dns_servers, + snat_ip: zone.snat_ip, + snat_first_port: zone.snat_first_port, + snat_last_port: zone.snat_last_port, + }) + } + + pub fn into_omicron_zone_config( + self, + nic_row: Option, + ) -> Result { + let zone = OmicronZone { + sled_id: self.sled_id, + id: self.id, + underlay_address: self.underlay_address, + zone_type: self.zone_type, + primary_service_ip: self.primary_service_ip, + primary_service_port: self.primary_service_port, + second_service_ip: self.second_service_ip, + second_service_port: self.second_service_port, + dataset_zpool_name: self.dataset_zpool_name, + nic_id: self.bp_nic_id, + dns_gz_address: self.dns_gz_address, + dns_gz_address_index: self.dns_gz_address_index, + ntp_ntp_servers: self.ntp_ntp_servers, + ntp_dns_servers: self.ntp_dns_servers, + ntp_domain: self.ntp_domain, + nexus_external_tls: self.nexus_external_tls, + nexus_external_dns_servers: self.nexus_external_dns_servers, + snat_ip: self.snat_ip, + snat_first_port: self.snat_first_port, + snat_last_port: self.snat_last_port, + }; + zone.into_omicron_zone_config(nic_row.map(OmicronZoneNic::from)) + } +} + +#[derive(Queryable, Clone, Debug, Selectable, Insertable)] +#[diesel(table_name = bp_omicron_zone_nic)] +pub struct BpOmicronZoneNic { + blueprint_id: Uuid, + pub id: Uuid, + name: Name, + ip: IpNetwork, + mac: MacAddr, + subnet: IpNetwork, + vni: SqlU32, + is_primary: bool, + slot: SqlU8, +} + +impl From for OmicronZoneNic { + fn from(value: BpOmicronZoneNic) -> Self { + OmicronZoneNic { + id: value.id, + name: value.name, + ip: value.ip, + mac: value.mac, + subnet: value.subnet, + vni: value.vni, + is_primary: value.is_primary, + slot: value.slot, + } + } +} + +impl BpOmicronZoneNic { + pub fn new( + blueprint_id: Uuid, + zone: &nexus_types::inventory::OmicronZoneConfig, + ) -> Result, anyhow::Error> { + let zone_nic = OmicronZoneNic::new(zone)?; + Ok(zone_nic.map(|nic| Self { + blueprint_id, + id: nic.id, + name: nic.name, + ip: nic.ip, + mac: nic.mac, + subnet: nic.subnet, + vni: nic.vni, + is_primary: nic.is_primary, + slot: nic.slot, + })) + } + + pub fn into_network_interface_for_zone( + self, + zone_id: Uuid, + ) -> Result { + let zone_nic = OmicronZoneNic::from(self); + zone_nic.into_network_interface_for_zone(zone_id) + } +} + +/// Nexus wants to think in terms of "zones in service", but since most zones of +/// most blueprints are in service, we store the zones NOT in service in the +/// database. We handle that inversion internally in the db-queries layer. +#[derive(Queryable, Clone, Debug, Selectable, Insertable)] +#[diesel(table_name = bp_omicron_zones_not_in_service)] +pub struct BpOmicronZoneNotInService { + pub blueprint_id: Uuid, + pub bp_omicron_zone_id: Uuid, +} diff --git a/nexus/db-model/src/inventory.rs b/nexus/db-model/src/inventory.rs index 17d74be0aa..d8314f97b8 100644 --- a/nexus/db-model/src/inventory.rs +++ b/nexus/db-model/src/inventory.rs @@ -4,6 +4,7 @@ //! Types for representing the hardware/software inventory in the database +use crate::omicron_zone_config::{OmicronZone, OmicronZoneNic}; use crate::schema::{ hw_baseboard_id, inv_caboose, inv_collection, inv_collection_error, inv_omicron_zone, inv_omicron_zone_nic, inv_root_of_trust, @@ -14,8 +15,7 @@ use crate::{ impl_enum_type, ipv6, ByteCount, Generation, MacAddr, Name, SqlU16, SqlU32, SqlU8, }; -use anyhow::{anyhow, ensure}; -use anyhow::{bail, Context}; +use anyhow::anyhow; use chrono::DateTime; use chrono::Utc; use diesel::backend::Backend; @@ -26,10 +26,8 @@ use diesel::serialize::ToSql; use diesel::{serialize, sql_types}; use ipnetwork::IpNetwork; use nexus_types::inventory::{ - BaseboardId, Caboose, Collection, OmicronZoneType, PowerState, RotPage, - RotSlot, + BaseboardId, Caboose, Collection, PowerState, RotPage, RotSlot, }; -use std::net::SocketAddrV6; use uuid::Uuid; // See [`nexus_types::inventory::PowerState`]. @@ -750,165 +748,29 @@ impl InvOmicronZone { sled_id: Uuid, zone: &nexus_types::inventory::OmicronZoneConfig, ) -> Result { - let id = zone.id; - let underlay_address = ipv6::Ipv6Addr::from(zone.underlay_address); - let mut nic_id = None; - let mut dns_gz_address = None; - let mut dns_gz_address_index = None; - let mut ntp_ntp_servers = None; - let mut ntp_dns_servers = None; - let mut ntp_ntp_domain = None; - let mut nexus_external_tls = None; - let mut nexus_external_dns_servers = None; - let mut snat_ip = None; - let mut snat_first_port = None; - let mut snat_last_port = None; - let mut second_service_ip = None; - let mut second_service_port = None; - - let (zone_type, primary_service_sockaddr_str, dataset) = match &zone - .zone_type - { - OmicronZoneType::BoundaryNtp { - address, - ntp_servers, - dns_servers, - domain, - nic, - snat_cfg, - } => { - ntp_ntp_servers = Some(ntp_servers.clone()); - ntp_dns_servers = Some(dns_servers.clone()); - ntp_ntp_domain = domain.clone(); - snat_ip = Some(IpNetwork::from(snat_cfg.ip)); - snat_first_port = Some(SqlU16::from(snat_cfg.first_port)); - snat_last_port = Some(SqlU16::from(snat_cfg.last_port)); - nic_id = Some(nic.id); - (ZoneType::BoundaryNtp, address, None) - } - OmicronZoneType::Clickhouse { address, dataset } => { - (ZoneType::Clickhouse, address, Some(dataset)) - } - OmicronZoneType::ClickhouseKeeper { address, dataset } => { - (ZoneType::ClickhouseKeeper, address, Some(dataset)) - } - OmicronZoneType::CockroachDb { address, dataset } => { - (ZoneType::CockroachDb, address, Some(dataset)) - } - OmicronZoneType::Crucible { address, dataset } => { - (ZoneType::Crucible, address, Some(dataset)) - } - OmicronZoneType::CruciblePantry { address } => { - (ZoneType::CruciblePantry, address, None) - } - OmicronZoneType::ExternalDns { - dataset, - http_address, - dns_address, - nic, - } => { - nic_id = Some(nic.id); - let sockaddr = dns_address - .parse::() - .with_context(|| { - format!( - "parsing address for external DNS server {:?}", - dns_address - ) - })?; - second_service_ip = Some(sockaddr.ip()); - second_service_port = Some(SqlU16::from(sockaddr.port())); - (ZoneType::ExternalDns, http_address, Some(dataset)) - } - OmicronZoneType::InternalDns { - dataset, - http_address, - dns_address, - gz_address, - gz_address_index, - } => { - dns_gz_address = Some(ipv6::Ipv6Addr::from(gz_address)); - dns_gz_address_index = Some(SqlU32::from(*gz_address_index)); - let sockaddr = dns_address - .parse::() - .with_context(|| { - format!( - "parsing address for internal DNS server {:?}", - dns_address - ) - })?; - second_service_ip = Some(sockaddr.ip()); - second_service_port = Some(SqlU16::from(sockaddr.port())); - (ZoneType::InternalDns, http_address, Some(dataset)) - } - OmicronZoneType::InternalNtp { - address, - ntp_servers, - dns_servers, - domain, - } => { - ntp_ntp_servers = Some(ntp_servers.clone()); - ntp_dns_servers = Some(dns_servers.clone()); - ntp_ntp_domain = domain.clone(); - (ZoneType::InternalNtp, address, None) - } - OmicronZoneType::Nexus { - internal_address, - external_ip, - nic, - external_tls, - external_dns_servers, - } => { - nic_id = Some(nic.id); - nexus_external_tls = Some(*external_tls); - nexus_external_dns_servers = Some(external_dns_servers.clone()); - second_service_ip = Some(*external_ip); - (ZoneType::Nexus, internal_address, None) - } - OmicronZoneType::Oximeter { address } => { - (ZoneType::Oximeter, address, None) - } - }; - - let dataset_zpool_name = - dataset.map(|d| d.pool_name.as_str().to_string()); - let primary_service_sockaddr = primary_service_sockaddr_str - .parse::() - .with_context(|| { - format!( - "parsing socket address for primary IP {:?}", - primary_service_sockaddr_str - ) - })?; - let (primary_service_ip, primary_service_port) = ( - ipv6::Ipv6Addr::from(*primary_service_sockaddr.ip()), - SqlU16::from(primary_service_sockaddr.port()), - ); - - Ok(InvOmicronZone { + let zone = OmicronZone::new(sled_id, zone)?; + Ok(Self { inv_collection_id, - sled_id, - id, - underlay_address, - zone_type, - primary_service_ip, - primary_service_port, - second_service_ip: second_service_ip.map(IpNetwork::from), - second_service_port, - dataset_zpool_name, - nic_id, - dns_gz_address, - dns_gz_address_index, - ntp_ntp_servers, - ntp_dns_servers: ntp_dns_servers - .map(|list| list.into_iter().map(IpNetwork::from).collect()), - ntp_domain: ntp_ntp_domain, - nexus_external_tls, - nexus_external_dns_servers: nexus_external_dns_servers - .map(|list| list.into_iter().map(IpNetwork::from).collect()), - snat_ip, - snat_first_port, - snat_last_port, + sled_id: zone.sled_id, + id: zone.id, + underlay_address: zone.underlay_address, + zone_type: zone.zone_type, + primary_service_ip: zone.primary_service_ip, + primary_service_port: zone.primary_service_port, + second_service_ip: zone.second_service_ip, + second_service_port: zone.second_service_port, + dataset_zpool_name: zone.dataset_zpool_name, + nic_id: zone.nic_id, + dns_gz_address: zone.dns_gz_address, + dns_gz_address_index: zone.dns_gz_address_index, + ntp_ntp_servers: zone.ntp_ntp_servers, + ntp_dns_servers: zone.ntp_dns_servers, + ntp_domain: zone.ntp_domain, + nexus_external_tls: zone.nexus_external_tls, + nexus_external_dns_servers: zone.nexus_external_dns_servers, + snat_ip: zone.snat_ip, + snat_first_port: zone.snat_first_port, + snat_last_port: zone.snat_last_port, }) } @@ -916,169 +778,29 @@ impl InvOmicronZone { self, nic_row: Option, ) -> Result { - let address = SocketAddrV6::new( - std::net::Ipv6Addr::from(self.primary_service_ip), - *self.primary_service_port, - 0, - 0, - ) - .to_string(); - - // Assemble a value that we can use to extract the NIC _if necessary_ - // and report an error if it was needed but not found. - // - // Any error here should be impossible. By the time we get here, the - // caller should have provided `nic_row` iff there's a corresponding - // `nic_id` in this row, and the ids should match up. And whoever - // created this row ought to have provided a nic_id iff this type of - // zone needs a NIC. This last issue is not under our control, though, - // so we definitely want to handle that as an operational error. The - // others could arguably be programmer errors (i.e., we could `assert`), - // but it seems excessive to crash here. - // - // Note that we immediately return for any of the caller errors here. - // For the other error, we will return only later, if some code path - // below tries to use `nic` when it's not present. - let nic = match (self.nic_id, nic_row) { - (Some(expected_id), Some(nic_row)) => { - ensure!(expected_id == nic_row.id, "caller provided wrong NIC"); - Ok(nic_row.into_network_interface_for_zone(self.id)?) - } - (None, None) => Err(anyhow!( - "expected zone to have an associated NIC, but it doesn't" - )), - (Some(_), None) => bail!("caller provided no NIC"), - (None, Some(_)) => bail!("caller unexpectedly provided a NIC"), - }; - - // Similarly, assemble a value that we can use to extract the dataset, - // if necessary. We only return this error if code below tries to use - // this value. - let dataset = self - .dataset_zpool_name - .map(|zpool_name| -> Result<_, anyhow::Error> { - Ok(nexus_types::inventory::OmicronZoneDataset { - pool_name: zpool_name.parse().map_err(|e| { - anyhow!("parsing zpool name {:?}: {}", zpool_name, e) - })?, - }) - }) - .transpose()? - .ok_or_else(|| anyhow!("expected dataset zpool name, found none")); - - // Do the same for the DNS server address. - let dns_address = - match (self.second_service_ip, self.second_service_port) { - (Some(dns_ip), Some(dns_port)) => { - Ok(std::net::SocketAddr::new(dns_ip.ip(), *dns_port) - .to_string()) - } - _ => Err(anyhow!( - "expected second service IP and port, \ - found one missing" - )), - }; - - // Do the same for NTP zone properties. - let ntp_dns_servers = self - .ntp_dns_servers - .ok_or_else(|| anyhow!("expected list of DNS servers, found null")) - .map(|list| { - list.into_iter().map(|ipnetwork| ipnetwork.ip()).collect() - }); - let ntp_ntp_servers = - self.ntp_ntp_servers.ok_or_else(|| anyhow!("expected ntp_servers")); - - let zone_type = match self.zone_type { - ZoneType::BoundaryNtp => { - let snat_cfg = match ( - self.snat_ip, - self.snat_first_port, - self.snat_last_port, - ) { - (Some(ip), Some(first_port), Some(last_port)) => { - nexus_types::inventory::SourceNatConfig { - ip: ip.ip(), - first_port: *first_port, - last_port: *last_port, - } - } - _ => bail!( - "expected non-NULL snat properties, \ - found at least one NULL" - ), - }; - OmicronZoneType::BoundaryNtp { - address, - dns_servers: ntp_dns_servers?, - domain: self.ntp_domain, - nic: nic?, - ntp_servers: ntp_ntp_servers?, - snat_cfg, - } - } - ZoneType::Clickhouse => { - OmicronZoneType::Clickhouse { address, dataset: dataset? } - } - ZoneType::ClickhouseKeeper => { - OmicronZoneType::ClickhouseKeeper { address, dataset: dataset? } - } - ZoneType::CockroachDb => { - OmicronZoneType::CockroachDb { address, dataset: dataset? } - } - ZoneType::Crucible => { - OmicronZoneType::Crucible { address, dataset: dataset? } - } - ZoneType::CruciblePantry => { - OmicronZoneType::CruciblePantry { address } - } - ZoneType::ExternalDns => OmicronZoneType::ExternalDns { - dataset: dataset?, - dns_address: dns_address?, - http_address: address, - nic: nic?, - }, - ZoneType::InternalDns => OmicronZoneType::InternalDns { - dataset: dataset?, - dns_address: dns_address?, - http_address: address, - gz_address: *self.dns_gz_address.ok_or_else(|| { - anyhow!("expected dns_gz_address, found none") - })?, - gz_address_index: *self.dns_gz_address_index.ok_or_else( - || anyhow!("expected dns_gz_address_index, found none"), - )?, - }, - ZoneType::InternalNtp => OmicronZoneType::InternalNtp { - address, - dns_servers: ntp_dns_servers?, - domain: self.ntp_domain, - ntp_servers: ntp_ntp_servers?, - }, - ZoneType::Nexus => OmicronZoneType::Nexus { - internal_address: address, - nic: nic?, - external_tls: self - .nexus_external_tls - .ok_or_else(|| anyhow!("expected 'external_tls'"))?, - external_ip: self - .second_service_ip - .ok_or_else(|| anyhow!("expected second service IP"))? - .ip(), - external_dns_servers: self - .nexus_external_dns_servers - .ok_or_else(|| anyhow!("expected 'external_dns_servers'"))? - .into_iter() - .map(|i| i.ip()) - .collect(), - }, - ZoneType::Oximeter => OmicronZoneType::Oximeter { address }, - }; - Ok(nexus_types::inventory::OmicronZoneConfig { + let zone = OmicronZone { + sled_id: self.sled_id, id: self.id, - underlay_address: std::net::Ipv6Addr::from(self.underlay_address), - zone_type, - }) + underlay_address: self.underlay_address, + zone_type: self.zone_type, + primary_service_ip: self.primary_service_ip, + primary_service_port: self.primary_service_port, + second_service_ip: self.second_service_ip, + second_service_port: self.second_service_port, + dataset_zpool_name: self.dataset_zpool_name, + nic_id: self.nic_id, + dns_gz_address: self.dns_gz_address, + dns_gz_address_index: self.dns_gz_address_index, + ntp_ntp_servers: self.ntp_ntp_servers, + ntp_dns_servers: self.ntp_dns_servers, + ntp_domain: self.ntp_domain, + nexus_external_tls: self.nexus_external_tls, + nexus_external_dns_servers: self.nexus_external_dns_servers, + snat_ip: self.snat_ip, + snat_first_port: self.snat_first_port, + snat_last_port: self.snat_last_port, + }; + zone.into_omicron_zone_config(nic_row.map(OmicronZoneNic::from)) } } @@ -1096,63 +818,45 @@ pub struct InvOmicronZoneNic { slot: SqlU8, } +impl From for OmicronZoneNic { + fn from(value: InvOmicronZoneNic) -> Self { + OmicronZoneNic { + id: value.id, + name: value.name, + ip: value.ip, + mac: value.mac, + subnet: value.subnet, + vni: value.vni, + is_primary: value.is_primary, + slot: value.slot, + } + } +} + impl InvOmicronZoneNic { pub fn new( inv_collection_id: Uuid, zone: &nexus_types::inventory::OmicronZoneConfig, ) -> Result, anyhow::Error> { - match &zone.zone_type { - OmicronZoneType::ExternalDns { nic, .. } - | OmicronZoneType::BoundaryNtp { nic, .. } - | OmicronZoneType::Nexus { nic, .. } => { - // We do not bother storing the NIC's kind and associated id - // because it should be inferrable from the other information - // that we have. Verify that here. - ensure!( - matches!( - nic.kind, - nexus_types::inventory::NetworkInterfaceKind::Service( - id - ) if id == zone.id - ), - "expected zone's NIC kind to be \"service\" and the \ - id to match the zone's id ({})", - zone.id - ); - - Ok(Some(InvOmicronZoneNic { - inv_collection_id, - id: nic.id, - name: Name::from(nic.name.clone()), - ip: IpNetwork::from(nic.ip), - mac: MacAddr::from(nic.mac), - subnet: IpNetwork::from(nic.subnet.clone()), - vni: SqlU32::from(u32::from(nic.vni)), - is_primary: nic.primary, - slot: SqlU8::from(nic.slot), - })) - } - _ => Ok(None), - } + let zone_nic = OmicronZoneNic::new(zone)?; + Ok(zone_nic.map(|nic| Self { + inv_collection_id, + id: nic.id, + name: nic.name, + ip: nic.ip, + mac: nic.mac, + subnet: nic.subnet, + vni: nic.vni, + is_primary: nic.is_primary, + slot: nic.slot, + })) } pub fn into_network_interface_for_zone( self, zone_id: Uuid, ) -> Result { - Ok(nexus_types::inventory::NetworkInterface { - id: self.id, - ip: self.ip.ip(), - kind: nexus_types::inventory::NetworkInterfaceKind::Service( - zone_id, - ), - mac: *self.mac, - name: self.name.into(), - primary: self.is_primary, - slot: *self.slot, - vni: omicron_common::api::external::Vni::try_from(*self.vni) - .context("parsing VNI")?, - subnet: self.subnet.into(), - }) + let zone_nic = OmicronZoneNic::from(self); + zone_nic.into_network_interface_for_zone(zone_id) } } diff --git a/nexus/db-model/src/lib.rs b/nexus/db-model/src/lib.rs index 5c0a68c253..7fa95822a7 100644 --- a/nexus/db-model/src/lib.rs +++ b/nexus/db-model/src/lib.rs @@ -52,7 +52,9 @@ mod switch_port; // These actually represent subqueries, not real table. // However, they must be defined in the same crate as our tables // for join-based marker trait generation. +mod deployment; mod ipv4_nat_entry; +mod omicron_zone_config; pub mod queries; mod quota; mod rack; @@ -114,6 +116,7 @@ pub use console_session::*; pub use dataset::*; pub use dataset_kind::*; pub use db_metadata::*; +pub use deployment::*; pub use device_auth::*; pub use digest::*; pub use disk::*; diff --git a/nexus/db-model/src/omicron_zone_config.rs b/nexus/db-model/src/omicron_zone_config.rs new file mode 100644 index 0000000000..f4726ccd92 --- /dev/null +++ b/nexus/db-model/src/omicron_zone_config.rs @@ -0,0 +1,456 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Types for sharing nontrivial conversions between various `OmicronZoneConfig` +//! database serializations and the corresponding Nexus/sled-agent type +//! +//! Both inventory and deployment have nearly-identical tables to serialize +//! `OmicronZoneConfigs` that are collected or generated, respectively. We +//! expect those tables to diverge over time (e.g., inventory may start +//! collecting extra metadata like uptime). This module provides conversion +//! helpers for the parts of those tables that are common between the two. + +use std::net::SocketAddrV6; + +use crate::inventory::ZoneType; +use crate::{ipv6, MacAddr, Name, SqlU16, SqlU32, SqlU8}; +use anyhow::{anyhow, bail, ensure, Context}; +use ipnetwork::IpNetwork; +use nexus_types::inventory::OmicronZoneType; +use uuid::Uuid; + +#[derive(Debug)] +pub(crate) struct OmicronZone { + pub(crate) sled_id: Uuid, + pub(crate) id: Uuid, + pub(crate) underlay_address: ipv6::Ipv6Addr, + pub(crate) zone_type: ZoneType, + pub(crate) primary_service_ip: ipv6::Ipv6Addr, + pub(crate) primary_service_port: SqlU16, + pub(crate) second_service_ip: Option, + pub(crate) second_service_port: Option, + pub(crate) dataset_zpool_name: Option, + pub(crate) nic_id: Option, + pub(crate) dns_gz_address: Option, + pub(crate) dns_gz_address_index: Option, + pub(crate) ntp_ntp_servers: Option>, + pub(crate) ntp_dns_servers: Option>, + pub(crate) ntp_domain: Option, + pub(crate) nexus_external_tls: Option, + pub(crate) nexus_external_dns_servers: Option>, + pub(crate) snat_ip: Option, + pub(crate) snat_first_port: Option, + pub(crate) snat_last_port: Option, +} + +impl OmicronZone { + pub(crate) fn new( + sled_id: Uuid, + zone: &nexus_types::inventory::OmicronZoneConfig, + ) -> anyhow::Result { + let id = zone.id; + let underlay_address = ipv6::Ipv6Addr::from(zone.underlay_address); + let mut nic_id = None; + let mut dns_gz_address = None; + let mut dns_gz_address_index = None; + let mut ntp_ntp_servers = None; + let mut ntp_dns_servers = None; + let mut ntp_ntp_domain = None; + let mut nexus_external_tls = None; + let mut nexus_external_dns_servers = None; + let mut snat_ip = None; + let mut snat_first_port = None; + let mut snat_last_port = None; + let mut second_service_ip = None; + let mut second_service_port = None; + + let (zone_type, primary_service_sockaddr_str, dataset) = match &zone + .zone_type + { + OmicronZoneType::BoundaryNtp { + address, + ntp_servers, + dns_servers, + domain, + nic, + snat_cfg, + } => { + ntp_ntp_servers = Some(ntp_servers.clone()); + ntp_dns_servers = Some(dns_servers.clone()); + ntp_ntp_domain = domain.clone(); + snat_ip = Some(IpNetwork::from(snat_cfg.ip)); + snat_first_port = Some(SqlU16::from(snat_cfg.first_port)); + snat_last_port = Some(SqlU16::from(snat_cfg.last_port)); + nic_id = Some(nic.id); + (ZoneType::BoundaryNtp, address, None) + } + OmicronZoneType::Clickhouse { address, dataset } => { + (ZoneType::Clickhouse, address, Some(dataset)) + } + OmicronZoneType::ClickhouseKeeper { address, dataset } => { + (ZoneType::ClickhouseKeeper, address, Some(dataset)) + } + OmicronZoneType::CockroachDb { address, dataset } => { + (ZoneType::CockroachDb, address, Some(dataset)) + } + OmicronZoneType::Crucible { address, dataset } => { + (ZoneType::Crucible, address, Some(dataset)) + } + OmicronZoneType::CruciblePantry { address } => { + (ZoneType::CruciblePantry, address, None) + } + OmicronZoneType::ExternalDns { + dataset, + http_address, + dns_address, + nic, + } => { + nic_id = Some(nic.id); + let sockaddr = dns_address + .parse::() + .with_context(|| { + format!( + "parsing address for external DNS server {:?}", + dns_address + ) + })?; + second_service_ip = Some(sockaddr.ip()); + second_service_port = Some(SqlU16::from(sockaddr.port())); + (ZoneType::ExternalDns, http_address, Some(dataset)) + } + OmicronZoneType::InternalDns { + dataset, + http_address, + dns_address, + gz_address, + gz_address_index, + } => { + dns_gz_address = Some(ipv6::Ipv6Addr::from(gz_address)); + dns_gz_address_index = Some(SqlU32::from(*gz_address_index)); + let sockaddr = dns_address + .parse::() + .with_context(|| { + format!( + "parsing address for internal DNS server {:?}", + dns_address + ) + })?; + second_service_ip = Some(sockaddr.ip()); + second_service_port = Some(SqlU16::from(sockaddr.port())); + (ZoneType::InternalDns, http_address, Some(dataset)) + } + OmicronZoneType::InternalNtp { + address, + ntp_servers, + dns_servers, + domain, + } => { + ntp_ntp_servers = Some(ntp_servers.clone()); + ntp_dns_servers = Some(dns_servers.clone()); + ntp_ntp_domain = domain.clone(); + (ZoneType::InternalNtp, address, None) + } + OmicronZoneType::Nexus { + internal_address, + external_ip, + nic, + external_tls, + external_dns_servers, + } => { + nic_id = Some(nic.id); + nexus_external_tls = Some(*external_tls); + nexus_external_dns_servers = Some(external_dns_servers.clone()); + second_service_ip = Some(*external_ip); + (ZoneType::Nexus, internal_address, None) + } + OmicronZoneType::Oximeter { address } => { + (ZoneType::Oximeter, address, None) + } + }; + + let dataset_zpool_name = + dataset.map(|d| d.pool_name.as_str().to_string()); + let primary_service_sockaddr = primary_service_sockaddr_str + .parse::() + .with_context(|| { + format!( + "parsing socket address for primary IP {:?}", + primary_service_sockaddr_str + ) + })?; + let (primary_service_ip, primary_service_port) = ( + ipv6::Ipv6Addr::from(*primary_service_sockaddr.ip()), + SqlU16::from(primary_service_sockaddr.port()), + ); + + Ok(Self { + sled_id, + id, + underlay_address, + zone_type, + primary_service_ip, + primary_service_port, + second_service_ip: second_service_ip.map(IpNetwork::from), + second_service_port, + dataset_zpool_name, + nic_id, + dns_gz_address, + dns_gz_address_index, + ntp_ntp_servers, + ntp_dns_servers: ntp_dns_servers + .map(|list| list.into_iter().map(IpNetwork::from).collect()), + ntp_domain: ntp_ntp_domain, + nexus_external_tls, + nexus_external_dns_servers: nexus_external_dns_servers + .map(|list| list.into_iter().map(IpNetwork::from).collect()), + snat_ip, + snat_first_port, + snat_last_port, + }) + } + + pub(crate) fn into_omicron_zone_config( + self, + nic_row: Option, + ) -> anyhow::Result { + let address = SocketAddrV6::new( + std::net::Ipv6Addr::from(self.primary_service_ip), + *self.primary_service_port, + 0, + 0, + ) + .to_string(); + + // Assemble a value that we can use to extract the NIC _if necessary_ + // and report an error if it was needed but not found. + // + // Any error here should be impossible. By the time we get here, the + // caller should have provided `nic_row` iff there's a corresponding + // `nic_id` in this row, and the ids should match up. And whoever + // created this row ought to have provided a nic_id iff this type of + // zone needs a NIC. This last issue is not under our control, though, + // so we definitely want to handle that as an operational error. The + // others could arguably be programmer errors (i.e., we could `assert`), + // but it seems excessive to crash here. + // + // Note that we immediately return for any of the caller errors here. + // For the other error, we will return only later, if some code path + // below tries to use `nic` when it's not present. + let nic = match (self.nic_id, nic_row) { + (Some(expected_id), Some(nic_row)) => { + ensure!(expected_id == nic_row.id, "caller provided wrong NIC"); + Ok(nic_row.into_network_interface_for_zone(self.id)?) + } + // We don't expect and don't have a NIC. This is reasonable, so we + // don't `bail!` like we do in the next two cases, but we also + // _don't have a NIC_. Put an error into `nic`, and then if we land + // in a zone below that expects one, we'll fail then. + (None, None) => Err(anyhow!( + "expected zone to have an associated NIC, but it doesn't" + )), + (Some(_), None) => bail!("caller provided no NIC"), + (None, Some(_)) => bail!("caller unexpectedly provided a NIC"), + }; + + // Similarly, assemble a value that we can use to extract the dataset, + // if necessary. We only return this error if code below tries to use + // this value. + let dataset = self + .dataset_zpool_name + .map(|zpool_name| -> Result<_, anyhow::Error> { + Ok(nexus_types::inventory::OmicronZoneDataset { + pool_name: zpool_name.parse().map_err(|e| { + anyhow!("parsing zpool name {:?}: {}", zpool_name, e) + })?, + }) + }) + .transpose()? + .ok_or_else(|| anyhow!("expected dataset zpool name, found none")); + + // Do the same for the DNS server address. + let dns_address = + match (self.second_service_ip, self.second_service_port) { + (Some(dns_ip), Some(dns_port)) => { + Ok(std::net::SocketAddr::new(dns_ip.ip(), *dns_port) + .to_string()) + } + _ => Err(anyhow!( + "expected second service IP and port, \ + found one missing" + )), + }; + + // Do the same for NTP zone properties. + let ntp_dns_servers = self + .ntp_dns_servers + .ok_or_else(|| anyhow!("expected list of DNS servers, found null")) + .map(|list| { + list.into_iter().map(|ipnetwork| ipnetwork.ip()).collect() + }); + let ntp_ntp_servers = + self.ntp_ntp_servers.ok_or_else(|| anyhow!("expected ntp_servers")); + + let zone_type = match self.zone_type { + ZoneType::BoundaryNtp => { + let snat_cfg = match ( + self.snat_ip, + self.snat_first_port, + self.snat_last_port, + ) { + (Some(ip), Some(first_port), Some(last_port)) => { + nexus_types::inventory::SourceNatConfig { + ip: ip.ip(), + first_port: *first_port, + last_port: *last_port, + } + } + _ => bail!( + "expected non-NULL snat properties, \ + found at least one NULL" + ), + }; + OmicronZoneType::BoundaryNtp { + address, + dns_servers: ntp_dns_servers?, + domain: self.ntp_domain, + nic: nic?, + ntp_servers: ntp_ntp_servers?, + snat_cfg, + } + } + ZoneType::Clickhouse => { + OmicronZoneType::Clickhouse { address, dataset: dataset? } + } + ZoneType::ClickhouseKeeper => { + OmicronZoneType::ClickhouseKeeper { address, dataset: dataset? } + } + ZoneType::CockroachDb => { + OmicronZoneType::CockroachDb { address, dataset: dataset? } + } + ZoneType::Crucible => { + OmicronZoneType::Crucible { address, dataset: dataset? } + } + ZoneType::CruciblePantry => { + OmicronZoneType::CruciblePantry { address } + } + ZoneType::ExternalDns => OmicronZoneType::ExternalDns { + dataset: dataset?, + dns_address: dns_address?, + http_address: address, + nic: nic?, + }, + ZoneType::InternalDns => OmicronZoneType::InternalDns { + dataset: dataset?, + dns_address: dns_address?, + http_address: address, + gz_address: *self.dns_gz_address.ok_or_else(|| { + anyhow!("expected dns_gz_address, found none") + })?, + gz_address_index: *self.dns_gz_address_index.ok_or_else( + || anyhow!("expected dns_gz_address_index, found none"), + )?, + }, + ZoneType::InternalNtp => OmicronZoneType::InternalNtp { + address, + dns_servers: ntp_dns_servers?, + domain: self.ntp_domain, + ntp_servers: ntp_ntp_servers?, + }, + ZoneType::Nexus => OmicronZoneType::Nexus { + internal_address: address, + nic: nic?, + external_tls: self + .nexus_external_tls + .ok_or_else(|| anyhow!("expected 'external_tls'"))?, + external_ip: self + .second_service_ip + .ok_or_else(|| anyhow!("expected second service IP"))? + .ip(), + external_dns_servers: self + .nexus_external_dns_servers + .ok_or_else(|| anyhow!("expected 'external_dns_servers'"))? + .into_iter() + .map(|i| i.ip()) + .collect(), + }, + ZoneType::Oximeter => OmicronZoneType::Oximeter { address }, + }; + Ok(nexus_types::inventory::OmicronZoneConfig { + id: self.id, + underlay_address: std::net::Ipv6Addr::from(self.underlay_address), + zone_type, + }) + } +} + +#[derive(Debug)] +pub(crate) struct OmicronZoneNic { + pub(crate) id: Uuid, + pub(crate) name: Name, + pub(crate) ip: IpNetwork, + pub(crate) mac: MacAddr, + pub(crate) subnet: IpNetwork, + pub(crate) vni: SqlU32, + pub(crate) is_primary: bool, + pub(crate) slot: SqlU8, +} + +impl OmicronZoneNic { + pub(crate) fn new( + zone: &nexus_types::inventory::OmicronZoneConfig, + ) -> anyhow::Result> { + match &zone.zone_type { + OmicronZoneType::ExternalDns { nic, .. } + | OmicronZoneType::BoundaryNtp { nic, .. } + | OmicronZoneType::Nexus { nic, .. } => { + // We do not bother storing the NIC's kind and associated id + // because it should be inferrable from the other information + // that we have. Verify that here. + ensure!( + matches!( + nic.kind, + nexus_types::inventory::NetworkInterfaceKind::Service( + id + ) if id == zone.id + ), + "expected zone's NIC kind to be \"service\" and the \ + id to match the zone's id ({})", + zone.id + ); + + Ok(Some(Self { + id: nic.id, + name: Name::from(nic.name.clone()), + ip: IpNetwork::from(nic.ip), + mac: MacAddr::from(nic.mac), + subnet: IpNetwork::from(nic.subnet.clone()), + vni: SqlU32::from(u32::from(nic.vni)), + is_primary: nic.primary, + slot: SqlU8::from(nic.slot), + })) + } + _ => Ok(None), + } + } + + pub(crate) fn into_network_interface_for_zone( + self, + zone_id: Uuid, + ) -> anyhow::Result { + Ok(nexus_types::inventory::NetworkInterface { + id: self.id, + ip: self.ip.ip(), + kind: nexus_types::inventory::NetworkInterfaceKind::Service( + zone_id, + ), + mac: *self.mac, + name: self.name.into(), + primary: self.is_primary, + slot: *self.slot, + vni: omicron_common::api::external::Vni::try_from(*self.vni) + .context("parsing VNI")?, + subnet: self.subnet.into(), + }) + } +} diff --git a/nexus/db-model/src/schema.rs b/nexus/db-model/src/schema.rs index eb71a12f04..ddb5ba8e03 100644 --- a/nexus/db-model/src/schema.rs +++ b/nexus/db-model/src/schema.rs @@ -13,7 +13,7 @@ use omicron_common::api::external::SemverVersion; /// /// This should be updated whenever the schema is changed. For more details, /// refer to: schema/crdb/README.adoc -pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(27, 0, 0); +pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(28, 0, 0); table! { disk (id) { @@ -1388,6 +1388,89 @@ table! { } } +/* blueprints */ + +table! { + blueprint (id) { + id -> Uuid, + + parent_blueprint_id -> Nullable, + + time_created -> Timestamptz, + creator -> Text, + comment -> Text, + } +} + +table! { + bp_target (version) { + version -> Int8, + + blueprint_id -> Uuid, + + enabled -> Bool, + time_made_target -> Timestamptz, + } +} + +table! { + bp_sled_omicron_zones (blueprint_id, sled_id) { + blueprint_id -> Uuid, + sled_id -> Uuid, + + generation -> Int8, + } +} + +table! { + bp_omicron_zone (blueprint_id, id) { + blueprint_id -> Uuid, + sled_id -> Uuid, + + id -> Uuid, + underlay_address -> Inet, + zone_type -> crate::ZoneTypeEnum, + + primary_service_ip -> Inet, + primary_service_port -> Int4, + second_service_ip -> Nullable, + second_service_port -> Nullable, + dataset_zpool_name -> Nullable, + bp_nic_id -> Nullable, + dns_gz_address -> Nullable, + dns_gz_address_index -> Nullable, + ntp_ntp_servers -> Nullable>, + ntp_dns_servers -> Nullable>, + ntp_domain -> Nullable, + nexus_external_tls -> Nullable, + nexus_external_dns_servers -> Nullable>, + snat_ip -> Nullable, + snat_first_port -> Nullable, + snat_last_port -> Nullable, + } +} + +table! { + bp_omicron_zone_nic (blueprint_id, id) { + blueprint_id -> Uuid, + id -> Uuid, + name -> Text, + ip -> Inet, + mac -> Int8, + subnet -> Inet, + vni -> Int8, + is_primary -> Bool, + slot -> Int2, + } +} + +table! { + bp_omicron_zones_not_in_service (blueprint_id, bp_omicron_zone_id) { + blueprint_id -> Uuid, + bp_omicron_zone_id -> Uuid, + } +} + table! { bootstore_keys (key, generation) { key -> Text, diff --git a/nexus/db-queries/Cargo.toml b/nexus/db-queries/Cargo.toml index 3240c54f3f..9cdcc88e6a 100644 --- a/nexus/db-queries/Cargo.toml +++ b/nexus/db-queries/Cargo.toml @@ -64,8 +64,10 @@ camino-tempfile.workspace = true expectorate.workspace = true hyper-rustls.workspace = true gateway-client.workspace = true +illumos-utils.workspace = true internal-dns.workspace = true itertools.workspace = true +nexus-deployment.workspace = true nexus-inventory.workspace = true nexus-test-utils.workspace = true omicron-sled-agent.workspace = true diff --git a/nexus/db-queries/src/db/datastore/deployment.rs b/nexus/db-queries/src/db/datastore/deployment.rs new file mode 100644 index 0000000000..72adb1d3df --- /dev/null +++ b/nexus/db-queries/src/db/datastore/deployment.rs @@ -0,0 +1,1583 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use super::DataStore; +use crate::authz; +use crate::authz::ApiResource; +use crate::context::OpContext; +use crate::db; +use crate::db::error::public_error_from_diesel; +use crate::db::error::ErrorHandler; +use crate::db::pagination::paginated; +use crate::db::pagination::Paginator; +use crate::db::DbConnection; +use crate::db::TransactionError; +use anyhow::Context; +use async_bb8_diesel::AsyncConnection; +use async_bb8_diesel::AsyncRunQueryDsl; +use chrono::DateTime; +use chrono::Utc; +use diesel::expression::SelectableHelper; +use diesel::pg::Pg; +use diesel::query_builder::AstPass; +use diesel::query_builder::QueryFragment; +use diesel::query_builder::QueryId; +use diesel::result::DatabaseErrorKind; +use diesel::result::Error as DieselError; +use diesel::sql_types; +use diesel::Column; +use diesel::ExpressionMethods; +use diesel::OptionalExtension; +use diesel::QueryDsl; +use diesel::RunQueryDsl; +use nexus_db_model::Blueprint as DbBlueprint; +use nexus_db_model::BpOmicronZone; +use nexus_db_model::BpOmicronZoneNic; +use nexus_db_model::BpOmicronZoneNotInService; +use nexus_db_model::BpSledOmicronZones; +use nexus_db_model::BpTarget; +use nexus_types::deployment::Blueprint; +use nexus_types::deployment::BlueprintMetadata; +use nexus_types::deployment::BlueprintTarget; +use nexus_types::deployment::OmicronZonesConfig; +use omicron_common::api::external::DataPageParams; +use omicron_common::api::external::Error; +use omicron_common::api::external::ListResultVec; +use omicron_common::api::external::LookupType; +use omicron_common::api::external::ResourceType; +use omicron_common::bail_unless; +use std::collections::BTreeMap; +use std::collections::BTreeSet; +use std::num::NonZeroU32; +use uuid::Uuid; + +/// "limit" used in SQL queries that paginate through all sleds, omicron +/// zones, etc. +/// +/// While we always load an entire blueprint in one operation, we use a +/// [`Paginator`] to guard against single queries returning an unchecked number +/// of rows. +// unsafe: `new_unchecked` is only unsound if the argument is 0. +const SQL_BATCH_SIZE: NonZeroU32 = unsafe { NonZeroU32::new_unchecked(1000) }; + +impl DataStore { + /// List blueprints + pub async fn blueprints_list( + &self, + opctx: &OpContext, + pagparams: &DataPageParams<'_, Uuid>, + ) -> ListResultVec { + use db::schema::blueprint; + + opctx + .authorize(authz::Action::ListChildren, &authz::BLUEPRINT_CONFIG) + .await?; + + let blueprints = paginated(blueprint::table, blueprint::id, pagparams) + .select(DbBlueprint::as_select()) + .get_results_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + + Ok(blueprints.into_iter().map(BlueprintMetadata::from).collect()) + } + + /// Store a complete blueprint into the database + pub async fn blueprint_insert( + &self, + opctx: &OpContext, + blueprint: &Blueprint, + ) -> Result<(), Error> { + opctx + .authorize(authz::Action::Modify, &authz::BLUEPRINT_CONFIG) + .await?; + + // In the database, the blueprint is represented essentially as a tree + // rooted at a `blueprint` row. Other nodes in the tree point + // back at the `blueprint` via `blueprint_id`. + // + // It's helpful to assemble some values before entering the transaction + // so that we can produce the `Error` type that we want here. + let row_blueprint = DbBlueprint::from(blueprint); + let blueprint_id = row_blueprint.id; + let sled_omicron_zones = blueprint + .omicron_zones + .iter() + .map(|(sled_id, zones_config)| { + BpSledOmicronZones::new(blueprint_id, *sled_id, zones_config) + }) + .collect::>(); + let omicron_zones = blueprint + .omicron_zones + .iter() + .flat_map(|(sled_id, zones_config)| { + zones_config.zones.iter().map(|zone| { + BpOmicronZone::new(blueprint_id, *sled_id, zone) + .map_err(|e| Error::internal_error(&format!("{:#}", e))) + }) + }) + .collect::, Error>>()?; + let omicron_zone_nics = blueprint + .omicron_zones + .values() + .flat_map(|zones_config| { + zones_config.zones.iter().filter_map(|zone| { + BpOmicronZoneNic::new(blueprint_id, zone) + .with_context(|| format!("zone {:?}", zone.id)) + .map_err(|e| Error::internal_error(&format!("{:#}", e))) + .transpose() + }) + }) + .collect::, _>>()?; + + // `Blueprint` stores a set of zones in service, but in the database we + // store the set of zones NOT in service (which we expect to be much + // smaller, often empty). Build that inverted set here. + let omicron_zones_not_in_service = { + let mut zones_not_in_service = Vec::new(); + for zone in &omicron_zones { + if !blueprint.zones_in_service.contains(&zone.id) { + zones_not_in_service.push(BpOmicronZoneNotInService { + blueprint_id, + bp_omicron_zone_id: zone.id, + }); + } + } + zones_not_in_service + }; + + // This implementation inserts all records associated with the + // blueprint in one transaction. This is required: we don't want + // any planner or executor to see a half-inserted blueprint, nor do we + // want to leave a partial blueprint around if we crash. However, it + // does mean this is likely to be a big transaction and if that becomes + // a problem we could break this up as long as we address those + // problems. + // + // The SQL here is written so that it doesn't have to be an + // *interactive* transaction. That is, it should in principle be + // possible to generate all this SQL up front and send it as one big + // batch rather than making a bunch of round-trips to the database. + // We'd do that if we had an interface for doing that with bound + // parameters, etc. See oxidecomputer/omicron#973. + let pool = self.pool_connection_authorized(opctx).await?; + pool.transaction_async(|conn| async move { + // Insert the row for the blueprint. + { + use db::schema::blueprint::dsl; + let _: usize = diesel::insert_into(dsl::blueprint) + .values(row_blueprint) + .execute_async(&conn) + .await?; + } + + // Insert all the Omicron zones for this blueprint. + { + use db::schema::bp_sled_omicron_zones::dsl as sled_zones; + let _ = diesel::insert_into(sled_zones::bp_sled_omicron_zones) + .values(sled_omicron_zones) + .execute_async(&conn) + .await?; + } + + { + use db::schema::bp_omicron_zone::dsl as omicron_zone; + let _ = diesel::insert_into(omicron_zone::bp_omicron_zone) + .values(omicron_zones) + .execute_async(&conn) + .await?; + } + + { + use db::schema::bp_omicron_zone_nic::dsl as omicron_zone_nic; + let _ = + diesel::insert_into(omicron_zone_nic::bp_omicron_zone_nic) + .values(omicron_zone_nics) + .execute_async(&conn) + .await?; + } + + { + use db::schema::bp_omicron_zones_not_in_service::dsl; + let _ = + diesel::insert_into(dsl::bp_omicron_zones_not_in_service) + .values(omicron_zones_not_in_service) + .execute_async(&conn) + .await?; + } + + Ok(()) + }) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + + info!( + &opctx.log, + "inserted blueprint"; + "blueprint_id" => %blueprint.id, + ); + + Ok(()) + } + + /// Read a complete blueprint from the database + pub async fn blueprint_read( + &self, + opctx: &OpContext, + authz_blueprint: &authz::Blueprint, + ) -> Result { + opctx.authorize(authz::Action::Read, authz_blueprint).await?; + let conn = self.pool_connection_authorized(opctx).await?; + let blueprint_id = authz_blueprint.id(); + + // Read the metadata from the primary blueprint row, and ensure that it + // exists. + let (parent_blueprint_id, time_created, creator, comment) = { + use db::schema::blueprint::dsl; + + let Some(blueprint) = dsl::blueprint + .filter(dsl::id.eq(blueprint_id)) + .select(DbBlueprint::as_select()) + .get_result_async(&*conn) + .await + .optional() + .map_err(|e| { + public_error_from_diesel(e, ErrorHandler::Server) + })? + else { + return Err(authz_blueprint.not_found()); + }; + + ( + blueprint.parent_blueprint_id, + blueprint.time_created, + blueprint.creator, + blueprint.comment, + ) + }; + + // Read this blueprint's `bp_sled_omicron_zones` rows, which describes + // the `OmicronZonesConfig` generation number for each sled that is a + // part of this blueprint. Construct the BTreeMap we ultimately need, + // but all the `zones` vecs will be empty until our next query below. + let mut omicron_zones: BTreeMap = { + use db::schema::bp_sled_omicron_zones::dsl; + + let mut omicron_zones = BTreeMap::new(); + let mut paginator = Paginator::new(SQL_BATCH_SIZE); + while let Some(p) = paginator.next() { + let batch = paginated( + dsl::bp_sled_omicron_zones, + dsl::sled_id, + &p.current_pagparams(), + ) + .filter(dsl::blueprint_id.eq(blueprint_id)) + .select(BpSledOmicronZones::as_select()) + .load_async(&*conn) + .await + .map_err(|e| { + public_error_from_diesel(e, ErrorHandler::Server) + })?; + + paginator = p.found_batch(&batch, &|s| s.sled_id); + + for s in batch { + let old = omicron_zones.insert( + s.sled_id, + OmicronZonesConfig { + generation: *s.generation, + zones: Vec::new(), + }, + ); + bail_unless!( + old.is_none(), + "found duplicate sled ID in bp_sled_omicron_zones: {}", + s.sled_id + ); + } + } + + omicron_zones + }; + + // Assemble a mutable map of all the NICs found, by NIC id. As we + // match these up with the corresponding zone below, we'll remove items + // from this set. That way we can tell if the same NIC was used twice + // or not used at all. + let mut omicron_zone_nics = { + use db::schema::bp_omicron_zone_nic::dsl; + + let mut omicron_zone_nics = BTreeMap::new(); + let mut paginator = Paginator::new(SQL_BATCH_SIZE); + while let Some(p) = paginator.next() { + let batch = paginated( + dsl::bp_omicron_zone_nic, + dsl::id, + &p.current_pagparams(), + ) + .filter(dsl::blueprint_id.eq(blueprint_id)) + .select(BpOmicronZoneNic::as_select()) + .load_async(&*conn) + .await + .map_err(|e| { + public_error_from_diesel(e, ErrorHandler::Server) + })?; + + paginator = p.found_batch(&batch, &|n| n.id); + + for n in batch { + let nic_id = n.id; + let old = omicron_zone_nics.insert(nic_id, n); + bail_unless!( + old.is_none(), + "found duplicate NIC ID in bp_omicron_zone_nic: {}", + nic_id, + ); + } + } + + omicron_zone_nics + }; + + // Load the list of not-in-service zones. Similar to NICs, we'll use a + // mutable set of zone IDs so we can tell if a zone we expected to be + // inactive wasn't present in the blueprint at all. + let mut omicron_zones_not_in_service = { + use db::schema::bp_omicron_zones_not_in_service::dsl; + + let mut omicron_zones_not_in_service = BTreeSet::new(); + let mut paginator = Paginator::new(SQL_BATCH_SIZE); + while let Some(p) = paginator.next() { + let batch = paginated( + dsl::bp_omicron_zones_not_in_service, + dsl::bp_omicron_zone_id, + &p.current_pagparams(), + ) + .filter(dsl::blueprint_id.eq(blueprint_id)) + .select(BpOmicronZoneNotInService::as_select()) + .load_async(&*conn) + .await + .map_err(|e| { + public_error_from_diesel(e, ErrorHandler::Server) + })?; + + paginator = p.found_batch(&batch, &|z| z.bp_omicron_zone_id); + + for z in batch { + let inserted = omicron_zones_not_in_service + .insert(z.bp_omicron_zone_id); + bail_unless!( + inserted, + "found duplicate zone ID in \ + bp_omicron_zones_not_in_service: {}", + z.bp_omicron_zone_id, + ); + } + } + + omicron_zones_not_in_service + }; + + // Create the in-memory list of zones _in_ service, which we'll + // calculate below as we load zones. (Any zone that isn't present in + // `omicron_zones_not_in_service` is considered in service.) + let mut zones_in_service = BTreeSet::new(); + + // Load all the zones for each sled. + { + use db::schema::bp_omicron_zone::dsl; + + let mut paginator = Paginator::new(SQL_BATCH_SIZE); + while let Some(p) = paginator.next() { + // `paginated` implicitly orders by our `id`, which is also + // handy for testing: the zones are always consistently ordered + let batch = paginated( + dsl::bp_omicron_zone, + dsl::id, + &p.current_pagparams(), + ) + .filter(dsl::blueprint_id.eq(blueprint_id)) + .select(BpOmicronZone::as_select()) + .load_async(&*conn) + .await + .map_err(|e| { + public_error_from_diesel(e, ErrorHandler::Server) + })?; + + paginator = p.found_batch(&batch, &|z| z.id); + + for z in batch { + let nic_row = z + .bp_nic_id + .map(|id| { + // This error means that we found a row in + // bp_omicron_zone that references a NIC by id but + // there's no corresponding row in + // bp_omicron_zone_nic with that id. This should be + // impossible and reflects either a bug or database + // corruption. + omicron_zone_nics.remove(&id).ok_or_else(|| { + Error::internal_error(&format!( + "zone {:?}: expected to find NIC {:?}, \ + but didn't", + z.id, z.bp_nic_id + )) + }) + }) + .transpose()?; + let sled_zones = + omicron_zones.get_mut(&z.sled_id).ok_or_else(|| { + // This error means that we found a row in + // bp_omicron_zone with no associated record in + // bp_sled_omicron_zones. This should be + // impossible and reflects either a bug or database + // corruption. + Error::internal_error(&format!( + "zone {:?}: unknown sled: {:?}", + z.id, z.sled_id + )) + })?; + let zone_id = z.id; + let zone = z + .into_omicron_zone_config(nic_row) + .with_context(|| { + format!("zone {:?}: parse from database", zone_id) + }) + .map_err(|e| { + Error::internal_error(&format!( + "{:#}", + e.to_string() + )) + })?; + sled_zones.zones.push(zone); + + // If we can remove `zone_id` from + // `omicron_zones_not_in_service`, then the zone is not in + // service. Otherwise, add it to the list of in-service + // zones. + if !omicron_zones_not_in_service.remove(&zone_id) { + zones_in_service.insert(zone_id); + } + } + } + } + + bail_unless!( + omicron_zone_nics.is_empty(), + "found extra Omicron zone NICs: {:?}", + omicron_zone_nics.keys() + ); + bail_unless!( + omicron_zones_not_in_service.is_empty(), + "found extra Omicron zones not in service: {:?}", + omicron_zones_not_in_service, + ); + + Ok(Blueprint { + id: blueprint_id, + omicron_zones, + zones_in_service, + parent_blueprint_id, + time_created, + creator, + comment, + }) + } + + /// Delete a blueprint from the database + pub async fn blueprint_delete( + &self, + opctx: &OpContext, + authz_blueprint: &authz::Blueprint, + ) -> Result<(), Error> { + opctx.authorize(authz::Action::Delete, authz_blueprint).await?; + let blueprint_id = authz_blueprint.id(); + + // As with inserting a whole blueprint, we remove it in one big + // transaction. Similar considerations apply. We could + // break it up if these transactions become too big. But we'd need a + // way to stop other clients from discovering a collection after we + // start removing it and we'd also need to make sure we didn't leak a + // collection if we crash while deleting it. + let conn = self.pool_connection_authorized(opctx).await?; + + let ( + nblueprints, + nsled_agent_zones, + nzones, + nnics, + nzones_not_in_service, + ) = conn + .transaction_async(|conn| async move { + // Ensure that blueprint we're about to delete is not the + // current target. + let current_target = + self.blueprint_current_target_only(&conn).await?; + if let Some(current_target) = current_target { + if current_target.target_id == blueprint_id { + return Err(TransactionError::CustomError( + Error::conflict(format!( + "blueprint {blueprint_id} is the \ + current target and cannot be deleted", + )), + )); + } + } + + // Remove the record describing the blueprint itself. + let nblueprints = { + use db::schema::blueprint::dsl; + diesel::delete( + dsl::blueprint.filter(dsl::id.eq(blueprint_id)), + ) + .execute_async(&conn) + .await? + }; + + // Bail out if this blueprint didn't exist; there won't be + // references to it in any of the remaining tables either, since + // deletion always goes through this transaction. + if nblueprints == 0 { + return Err(TransactionError::CustomError( + authz_blueprint.not_found(), + )); + } + + // Remove rows associated with Omicron zones + let nsled_agent_zones = { + use db::schema::bp_sled_omicron_zones::dsl; + diesel::delete( + dsl::bp_sled_omicron_zones + .filter(dsl::blueprint_id.eq(blueprint_id)), + ) + .execute_async(&conn) + .await? + }; + + let nzones = { + use db::schema::bp_omicron_zone::dsl; + diesel::delete( + dsl::bp_omicron_zone + .filter(dsl::blueprint_id.eq(blueprint_id)), + ) + .execute_async(&conn) + .await? + }; + + let nnics = { + use db::schema::bp_omicron_zone_nic::dsl; + diesel::delete( + dsl::bp_omicron_zone_nic + .filter(dsl::blueprint_id.eq(blueprint_id)), + ) + .execute_async(&conn) + .await? + }; + + let nzones_not_in_service = { + use db::schema::bp_omicron_zones_not_in_service::dsl; + diesel::delete( + dsl::bp_omicron_zones_not_in_service + .filter(dsl::blueprint_id.eq(blueprint_id)), + ) + .execute_async(&conn) + .await? + }; + + Ok(( + nblueprints, + nsled_agent_zones, + nzones, + nnics, + nzones_not_in_service, + )) + }) + .await + .map_err(|error| match error { + TransactionError::CustomError(e) => e, + TransactionError::Database(e) => { + public_error_from_diesel(e, ErrorHandler::Server) + } + })?; + + info!(&opctx.log, "removed blueprint"; + "blueprint_id" => blueprint_id.to_string(), + "nblueprints" => nblueprints, + "nsled_agent_zones" => nsled_agent_zones, + "nzones" => nzones, + "nnics" => nnics, + "nzones_not_in_service" => nzones_not_in_service, + ); + + Ok(()) + } + + /// Set the current target blueprint + /// + /// In order to become the target blueprint, `target`'s parent blueprint + /// must be the current target + pub async fn blueprint_target_set_current( + &self, + opctx: &OpContext, + target: BlueprintTarget, + ) -> Result<(), Error> { + opctx + .authorize(authz::Action::Modify, &authz::BLUEPRINT_CONFIG) + .await?; + + let query = InsertTargetQuery { + target_id: target.target_id, + enabled: target.enabled, + time_made_target: target.time_made_target, + }; + + let conn = self.pool_connection_authorized(opctx).await?; + + query + .execute_async(&*conn) + .await + .map_err(|e| Error::from(query.decode_error(e)))?; + + Ok(()) + } + + /// Get the current target blueprint, if one exists + /// + /// Returns both the metadata about the target and the full blueprint + /// contents. If you only need the target metadata, use + /// `blueprint_target_get_current` instead. + pub async fn blueprint_target_get_current_full( + &self, + opctx: &OpContext, + ) -> Result, Error> { + opctx.authorize(authz::Action::Read, &authz::BLUEPRINT_CONFIG).await?; + + let conn = self.pool_connection_authorized(opctx).await?; + let Some(target) = self.blueprint_current_target_only(&conn).await? + else { + return Ok(None); + }; + + // The blueprint for the current target cannot be deleted while it is + // the current target, but it's possible someone else (a) made a new + // blueprint the target and (b) deleted the blueprint pointed to by our + // `target` between the above query and the below query. In such a case, + // this query will fail with an "unknown blueprint ID" error. This + // should be rare in practice. + let authz_blueprint = authz_blueprint_from_id(target.target_id); + let blueprint = self.blueprint_read(opctx, &authz_blueprint).await?; + + Ok(Some((target, blueprint))) + } + + /// Get the current target blueprint, if one exists + pub async fn blueprint_target_get_current( + &self, + opctx: &OpContext, + ) -> Result, Error> { + opctx.authorize(authz::Action::Read, &authz::BLUEPRINT_CONFIG).await?; + let conn = self.pool_connection_authorized(opctx).await?; + self.blueprint_current_target_only(&conn).await + } + + // Helper to fetch the current blueprint target (without fetching the entire + // blueprint for that target). + // + // Caller is responsible for checking authz for this operation. + async fn blueprint_current_target_only( + &self, + conn: &async_bb8_diesel::Connection, + ) -> Result, Error> { + use db::schema::bp_target::dsl; + + let current_target = dsl::bp_target + .order_by(dsl::version.desc()) + .first_async::(conn) + .await + .optional() + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + + Ok(current_target.map(BlueprintTarget::from)) + } +} + +// Helper to create an `authz::Blueprint` for a specific blueprint ID +fn authz_blueprint_from_id(blueprint_id: Uuid) -> authz::Blueprint { + authz::Blueprint::new( + authz::FLEET, + blueprint_id, + LookupType::ById(blueprint_id), + ) +} + +/// Errors related to inserting a target blueprint +#[derive(Debug)] +enum InsertTargetError { + /// The requested target blueprint ID does not exist in the blueprint table. + NoSuchBlueprint(Uuid), + /// The requested target blueprint's parent does not match the current + /// target. + ParentNotTarget(Uuid), + /// Any other error + Other(DieselError), +} + +impl From for Error { + fn from(value: InsertTargetError) -> Self { + match value { + InsertTargetError::NoSuchBlueprint(id) => { + Error::not_found_by_id(ResourceType::Blueprint, &id) + } + InsertTargetError::ParentNotTarget(id) => { + Error::invalid_request(format!( + "Blueprint {id}'s parent blueprint is not the current \ + target blueprint" + )) + } + InsertTargetError::Other(e) => { + public_error_from_diesel(e, ErrorHandler::Server) + } + } + } +} + +/// Query to insert a new current target blueprint. +/// +/// The `bp_target` table's primary key is the `version` field, and we enforce +/// the following invariants: +/// +/// * The first "current target" blueprint is assigned version 1. +/// * In order to be inserted as the first current target blueprint, a +/// blueprint must have a parent_blueprint_id of NULL. +/// * After the first, any subsequent blueprint can only be assigned as the +/// current target if its parent_blueprint_id is the current target blueprint. +/// * When inserting a new child blueprint as the current target, it is assigned +/// a version of 1 + its parent's version. +/// +/// The result of this is a linear history of blueprints, where each target is a +/// direct child of the previous current target. Enforcing the above has some +/// subtleties (particularly around handling the "first blueprint with no +/// parent" case). These are expanded on below through inline comments on the +/// query we generate: +/// +/// ```sql +/// WITH +/// -- Subquery to fetch the current target (i.e., the row with the max +/// -- veresion in `bp_target`). +/// current_target AS ( +/// SELECT +/// "version" AS version, +/// "blueprint_id" AS blueprint_id +/// FROM "bp_target" +/// ORDER BY "version" DESC +/// LIMIT 1 +/// ), +/// +/// -- Error checking subquery: This uses similar tricks as elsewhere in +/// -- this crate to `CAST(... AS UUID)` with non-UUID values that result +/// -- in runtime errors in specific cases, allowing us to give accurate +/// -- error messages. +/// -- +/// -- These checks are not required for correct behavior by the insert +/// -- below. If we removed them, the insert would insert 0 rows if +/// -- these checks would have failed. But they make it easier to report +/// -- specific problems to our caller. +/// -- +/// -- The specific cases we check here are noted below. +/// check_validity AS MATERIALIZED ( +/// SELECT CAST(IF( +/// -- Return `no-such-blueprint` if the ID we're being told to +/// -- set as the target doesn't exist in the blueprint table. +/// (SELECT "id" FROM "blueprint" WHERE "id" = ) IS NULL, +/// 'no-such-blueprint', +/// IF( +/// -- Check for whether our new target's parent matches our current +/// -- target. There are two cases here: The first is the common case +/// -- (i.e., the new target has a parent: does it match the current +/// -- target ID?). The second is the bootstrapping check: if we're +/// -- trying to insert a new target that does not have a parent, +/// -- we should not have a current target at all. +/// -- +/// -- If either of these cases fails, we return `parent-not-target`. +/// ( +/// SELECT "parent_blueprint_id" FROM "blueprint", current_target +/// WHERE +/// "id" = +/// AND current_target.blueprint_id = "parent_blueprint_id" +/// ) IS NOT NULL +/// OR +/// ( +/// SELECT 1 FROM "blueprint" +/// WHERE +/// "id" = +/// AND "parent_blueprint_id" IS NULL +/// AND NOT EXISTS (SELECT version FROM current_target) +/// ) = 1, +/// , +/// 'parent-not-target' +/// ) +/// ) AS UUID) +/// ), +/// +/// -- Determine the new version number to use: either 1 if this is the +/// -- first blueprint being made the current target, or 1 higher than +/// -- the previous target's version. +/// -- +/// -- The final clauses of each of these WHERE clauses repeat the +/// -- checks performed above in `check_validity`, and will cause this +/// -- subquery to return no rows if we should not allow the new +/// -- target to be set. +/// new_target AS ( +/// SELECT 1 AS new_version FROM "blueprint" +/// WHERE +/// "id" = +/// AND "parent_blueprint_id" IS NULL +/// AND NOT EXISTS (SELECT version FROM current_target) +/// UNION +/// SELECT current_target.version + 1 FROM current_target, "blueprint" +/// WHERE +/// "id" = +/// AND "parent_blueprint_id" IS NOT NULL +/// AND "parent_blueprint_id" = current_target.blueprint_id +/// ) +/// +/// -- Perform the actual insertion. +/// INSERT INTO "bp_target"( +/// "version","blueprint_id","enabled","time_made_target" +/// ) +/// SELECT +/// new_target.new_version, +/// , +/// , +/// +/// FROM new_target +/// ``` +#[derive(Debug, Clone, Copy)] +struct InsertTargetQuery { + target_id: Uuid, + enabled: bool, + time_made_target: DateTime, +} + +// Uncastable sentinel used to detect we attempt to make a blueprint the target +// when it does not exist in the blueprint table. +const NO_SUCH_BLUEPRINT_SENTINEL: &str = "no-such-blueprint"; + +// Uncastable sentinel used to detect we attempt to make a blueprint the target +// when its parent_blueprint_id is not the current target. +const PARENT_NOT_TARGET_SENTINEL: &str = "parent-not-target"; + +// Error messages generated from the above sentinel values. +const NO_SUCH_BLUEPRINT_ERROR_MESSAGE: &str = + "could not parse \"no-such-blueprint\" as type uuid: \ + uuid: incorrect UUID length: no-such-blueprint"; +const PARENT_NOT_TARGET_ERROR_MESSAGE: &str = + "could not parse \"parent-not-target\" as type uuid: \ + uuid: incorrect UUID length: parent-not-target"; + +impl InsertTargetQuery { + fn decode_error(&self, err: DieselError) -> InsertTargetError { + match err { + DieselError::DatabaseError(DatabaseErrorKind::Unknown, info) + if info.message() == NO_SUCH_BLUEPRINT_ERROR_MESSAGE => + { + InsertTargetError::NoSuchBlueprint(self.target_id) + } + DieselError::DatabaseError(DatabaseErrorKind::Unknown, info) + if info.message() == PARENT_NOT_TARGET_ERROR_MESSAGE => + { + InsertTargetError::ParentNotTarget(self.target_id) + } + other => InsertTargetError::Other(other), + } + } +} + +impl QueryId for InsertTargetQuery { + type QueryId = (); + const HAS_STATIC_QUERY_ID: bool = false; +} + +impl QueryFragment for InsertTargetQuery { + fn walk_ast<'a>( + &'a self, + mut out: AstPass<'_, 'a, Pg>, + ) -> diesel::QueryResult<()> { + use crate::db::schema::blueprint::dsl as bp_dsl; + use crate::db::schema::bp_target::dsl; + + type FromClause = + diesel::internal::table_macro::StaticQueryFragmentInstance; + type BpTargetFromClause = FromClause; + type BlueprintFromClause = FromClause; + const BP_TARGET_FROM_CLAUSE: BpTargetFromClause = + BpTargetFromClause::new(); + const BLUEPRINT_FROM_CLAUSE: BlueprintFromClause = + BlueprintFromClause::new(); + + out.push_sql("WITH "); + + out.push_sql("current_target AS (SELECT "); + out.push_identifier(dsl::version::NAME)?; + out.push_sql(" AS version,"); + out.push_identifier(dsl::blueprint_id::NAME)?; + out.push_sql(" AS blueprint_id FROM "); + BP_TARGET_FROM_CLAUSE.walk_ast(out.reborrow())?; + out.push_sql(" ORDER BY "); + out.push_identifier(dsl::version::NAME)?; + out.push_sql(" DESC LIMIT 1),"); + + out.push_sql( + "check_validity AS MATERIALIZED ( \ + SELECT \ + CAST( \ + IF( \ + (SELECT ", + ); + out.push_identifier(bp_dsl::id::NAME)?; + out.push_sql(" FROM "); + BLUEPRINT_FROM_CLAUSE.walk_ast(out.reborrow())?; + out.push_sql(" WHERE "); + out.push_identifier(bp_dsl::id::NAME)?; + out.push_sql(" = "); + out.push_bind_param::(&self.target_id)?; + out.push_sql(") IS NULL, "); + out.push_bind_param::( + &NO_SUCH_BLUEPRINT_SENTINEL, + )?; + out.push_sql( + ", \ + IF( \ + (SELECT ", + ); + out.push_identifier(bp_dsl::parent_blueprint_id::NAME)?; + out.push_sql(" FROM "); + BLUEPRINT_FROM_CLAUSE.walk_ast(out.reborrow())?; + out.push_sql(", current_target WHERE "); + out.push_identifier(bp_dsl::id::NAME)?; + out.push_sql(" = "); + out.push_bind_param::(&self.target_id)?; + out.push_sql(" AND current_target.blueprint_id = "); + out.push_identifier(bp_dsl::parent_blueprint_id::NAME)?; + out.push_sql( + " ) IS NOT NULL \ + OR \ + (SELECT 1 FROM ", + ); + BLUEPRINT_FROM_CLAUSE.walk_ast(out.reborrow())?; + out.push_sql(" WHERE "); + out.push_identifier(bp_dsl::id::NAME)?; + out.push_sql(" = "); + out.push_bind_param::(&self.target_id)?; + out.push_sql(" AND "); + out.push_identifier(bp_dsl::parent_blueprint_id::NAME)?; + out.push_sql( + " IS NULL \ + AND NOT EXISTS ( \ + SELECT version FROM current_target) \ + ) = 1, ", + ); + out.push_bind_param::(&self.target_id)?; + out.push_sql(", "); + out.push_bind_param::( + &PARENT_NOT_TARGET_SENTINEL, + )?; + out.push_sql( + " ) \ + ) \ + AS UUID) \ + ), ", + ); + + out.push_sql("new_target AS (SELECT 1 AS new_version FROM "); + BLUEPRINT_FROM_CLAUSE.walk_ast(out.reborrow())?; + out.push_sql(" WHERE "); + out.push_identifier(bp_dsl::id::NAME)?; + out.push_sql(" = "); + out.push_bind_param::(&self.target_id)?; + out.push_sql(" AND "); + out.push_identifier(bp_dsl::parent_blueprint_id::NAME)?; + out.push_sql( + " IS NULL \ + AND NOT EXISTS \ + (SELECT version FROM current_target) \ + UNION \ + SELECT current_target.version + 1 FROM \ + current_target, ", + ); + BLUEPRINT_FROM_CLAUSE.walk_ast(out.reborrow())?; + out.push_sql(" WHERE "); + out.push_identifier(bp_dsl::id::NAME)?; + out.push_sql(" = "); + out.push_bind_param::(&self.target_id)?; + out.push_sql(" AND "); + out.push_identifier(bp_dsl::parent_blueprint_id::NAME)?; + out.push_sql(" IS NOT NULL AND "); + out.push_identifier(bp_dsl::parent_blueprint_id::NAME)?; + out.push_sql(" = current_target.blueprint_id) "); + + out.push_sql("INSERT INTO "); + BP_TARGET_FROM_CLAUSE.walk_ast(out.reborrow())?; + out.push_sql("("); + out.push_identifier(dsl::version::NAME)?; + out.push_sql(","); + out.push_identifier(dsl::blueprint_id::NAME)?; + out.push_sql(","); + out.push_identifier(dsl::enabled::NAME)?; + out.push_sql(","); + out.push_identifier(dsl::time_made_target::NAME)?; + out.push_sql(") SELECT new_target.new_version, "); + out.push_bind_param::(&self.target_id)?; + out.push_sql(","); + out.push_bind_param::(&self.enabled)?; + out.push_sql(","); + out.push_bind_param::>( + &self.time_made_target, + )?; + out.push_sql(" FROM new_target"); + + Ok(()) + } +} + +impl RunQueryDsl for InsertTargetQuery {} + +#[cfg(test)] +mod tests { + use super::*; + use crate::db::datastore::datastore_test; + use nexus_deployment::blueprint_builder::BlueprintBuilder; + use nexus_deployment::blueprint_builder::Ensure; + use nexus_inventory::now_db_precision; + use nexus_test_utils::db::test_setup_database; + use nexus_types::deployment::Policy; + use nexus_types::deployment::SledResources; + use nexus_types::inventory::Collection; + use omicron_common::address::Ipv6Subnet; + use omicron_test_utils::dev; + use rand::thread_rng; + use rand::Rng; + use std::mem; + use std::net::Ipv6Addr; + + static EMPTY_POLICY: Policy = Policy { sleds: BTreeMap::new() }; + + // This is a not-super-future-maintainer-friendly helper to check that all + // the subtables related to blueprints have been pruned of a specific + // blueprint ID. If additional blueprint tables are added in the future, + // this function will silently ignore them unless they're manually added. + async fn ensure_blueprint_fully_deleted( + datastore: &DataStore, + blueprint_id: Uuid, + ) { + let conn = datastore.pool_connection_for_tests().await.unwrap(); + + macro_rules! query_count { + ($table:ident, $blueprint_id_col:ident) => {{ + use db::schema::$table::dsl; + let result = dsl::$table + .filter(dsl::$blueprint_id_col.eq(blueprint_id)) + .count() + .get_result_async(&*conn) + .await; + (stringify!($table), result) + }}; + } + + for (table_name, result) in [ + query_count!(blueprint, id), + query_count!(bp_omicron_zone, blueprint_id), + query_count!(bp_omicron_zone_nic, blueprint_id), + query_count!(bp_omicron_zones_not_in_service, blueprint_id), + ] { + let count: i64 = result.unwrap(); + assert_eq!( + count, 0, + "nonzero row count for blueprint \ + {blueprint_id} in table {table_name}" + ); + } + } + + // Create a fake set of `SledResources`, either with a subnet matching + // `ip` or with an arbitrary one. + fn fake_sled_resources(ip: Option) -> SledResources { + use illumos_utils::zpool::ZpoolName; + let zpools = (0..4) + .map(|_| { + let name = ZpoolName::new_external(Uuid::new_v4()).to_string(); + name.parse().unwrap() + }) + .collect(); + let ip = ip.unwrap_or_else(|| thread_rng().gen::().into()); + SledResources { zpools, subnet: Ipv6Subnet::new(ip) } + } + + // Create a `Policy` that contains all the sleds found in `collection` + fn policy_from_collection(collection: &Collection) -> Policy { + Policy { + sleds: collection + .sled_agents + .iter() + .map(|(sled_id, agent)| { + // `Collection` doesn't currently hold zpool names, so + // we'll construct fake resources for each sled. + ( + *sled_id, + fake_sled_resources(Some( + *agent.sled_agent_address.ip(), + )), + ) + }) + .collect(), + } + } + + fn representative() -> (Collection, Policy, Blueprint) { + // We'll start with a representative collection... + let mut collection = + nexus_inventory::examples::representative().builder.build(); + + // ...and then mutate it such that the omicron zones it reports match + // the sled agent IDs it reports. Steal the sled agent info and drop the + // fake sled-agent IDs: + let mut empty_map = BTreeMap::new(); + mem::swap(&mut empty_map, &mut collection.sled_agents); + let mut sled_agents = empty_map.into_values().collect::>(); + + // Now reinsert them with IDs pulled from the omicron zones. This + // assumes we have more fake sled agents than omicron zones, which is + // currently true for the representative collection. + for &sled_id in collection.omicron_zones.keys() { + let some_sled_agent = sled_agents.pop().expect( + "fewer representative sled agents than \ + representative omicron zones sleds", + ); + collection.sled_agents.insert(sled_id, some_sled_agent); + } + + let policy = policy_from_collection(&collection); + let blueprint = BlueprintBuilder::build_initial_from_collection( + &collection, + &policy, + "test", + ) + .unwrap(); + + (collection, policy, blueprint) + } + + async fn blueprint_list_all_ids( + opctx: &OpContext, + datastore: &DataStore, + ) -> Vec { + datastore + .blueprints_list(opctx, &DataPageParams::max_page()) + .await + .unwrap() + .into_iter() + .map(|bp| bp.id) + .collect() + } + + #[tokio::test] + async fn test_empty_blueprint() { + // Setup + let logctx = dev::test_setup_log("inventory_insert"); + let mut db = test_setup_database(&logctx.log).await; + let (opctx, datastore) = datastore_test(&logctx, &db).await; + + // Create an empty collection and a blueprint from it + let collection = + nexus_inventory::CollectionBuilder::new("test").build(); + let blueprint1 = BlueprintBuilder::build_initial_from_collection( + &collection, + &EMPTY_POLICY, + "test", + ) + .unwrap(); + let authz_blueprint = authz_blueprint_from_id(blueprint1.id); + + // Trying to read it from the database should fail with the relevant + // "not found" error. + let err = datastore + .blueprint_read(&opctx, &authz_blueprint) + .await + .unwrap_err(); + assert_eq!(err, authz_blueprint.not_found()); + + // Write it to the database and read it back. + datastore + .blueprint_insert(&opctx, &blueprint1) + .await + .expect("failed to insert blueprint"); + let blueprint_read = datastore + .blueprint_read(&opctx, &authz_blueprint) + .await + .expect("failed to read collection back"); + assert_eq!(blueprint1, blueprint_read); + assert_eq!( + blueprint_list_all_ids(&opctx, &datastore).await, + [blueprint1.id] + ); + + // There ought to be no sleds or zones in service, and no parent + // blueprint. + assert_eq!(blueprint1.omicron_zones.len(), 0); + assert_eq!(blueprint1.zones_in_service.len(), 0); + assert_eq!(blueprint1.parent_blueprint_id, None); + + // Trying to insert the same blueprint again should fail. + let err = + datastore.blueprint_insert(&opctx, &blueprint1).await.unwrap_err(); + assert!(err.to_string().contains("duplicate key")); + + // Delete the blueprint and ensure it's really gone. + datastore.blueprint_delete(&opctx, &authz_blueprint).await.unwrap(); + ensure_blueprint_fully_deleted(&datastore, blueprint1.id).await; + assert_eq!(blueprint_list_all_ids(&opctx, &datastore).await, []); + + // Clean up. + db.cleanup().await.unwrap(); + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn test_representative_blueprint() { + // Setup + let logctx = dev::test_setup_log("inventory_insert"); + let mut db = test_setup_database(&logctx.log).await; + let (opctx, datastore) = datastore_test(&logctx, &db).await; + + // Create a cohesive representative collection/policy/blueprint + let (collection, mut policy, blueprint1) = representative(); + let authz_blueprint1 = authz_blueprint_from_id(blueprint1.id); + + // Write it to the database and read it back. + datastore + .blueprint_insert(&opctx, &blueprint1) + .await + .expect("failed to insert blueprint"); + let blueprint_read = datastore + .blueprint_read(&opctx, &authz_blueprint1) + .await + .expect("failed to read collection back"); + assert_eq!(blueprint1, blueprint_read); + assert_eq!( + blueprint_list_all_ids(&opctx, &datastore).await, + [blueprint1.id] + ); + + // Check the number of blueprint elements against our collection. + assert_eq!(blueprint1.omicron_zones.len(), policy.sleds.len()); + assert_eq!( + blueprint1.omicron_zones.len(), + collection.omicron_zones.len() + ); + assert_eq!( + blueprint1.all_omicron_zones().count(), + collection.all_omicron_zones().count() + ); + // All zones should be in service. + assert_eq!( + blueprint1.zones_in_service.len(), + blueprint1.all_omicron_zones().count() + ); + assert_eq!(blueprint1.parent_blueprint_id, None); + + // Set blueprint1 as the current target, and ensure that we cannot + // delete it (as the current target cannot be deleted). + let bp1_target = BlueprintTarget { + target_id: blueprint1.id, + enabled: true, + time_made_target: now_db_precision(), + }; + datastore + .blueprint_target_set_current(&opctx, bp1_target) + .await + .unwrap(); + assert_eq!( + datastore.blueprint_target_get_current_full(&opctx).await.unwrap(), + Some((bp1_target, blueprint1.clone())) + ); + let err = datastore + .blueprint_delete(&opctx, &authz_blueprint1) + .await + .unwrap_err(); + assert!( + err.to_string().contains(&format!( + "blueprint {} is the current target and cannot be deleted", + blueprint1.id + )), + "unexpected error: {err}" + ); + + // Add a new sled to `policy`. + let new_sled_id = Uuid::new_v4(); + policy.sleds.insert(new_sled_id, fake_sled_resources(None)); + let new_sled_zpools = &policy.sleds.get(&new_sled_id).unwrap().zpools; + + // Create a builder for a child blueprint. + let mut builder = + BlueprintBuilder::new_based_on(&blueprint1, &policy, "test"); + + // Add zones to our new sled. + assert_eq!( + builder.sled_ensure_zone_ntp(new_sled_id).unwrap(), + Ensure::Added + ); + for zpool_name in new_sled_zpools { + assert_eq!( + builder + .sled_ensure_zone_crucible(new_sled_id, zpool_name.clone()) + .unwrap(), + Ensure::Added + ); + } + let num_new_sled_zones = 1 + new_sled_zpools.len(); + + let blueprint2 = builder.build(); + let authz_blueprint2 = authz_blueprint_from_id(blueprint2.id); + + // Check that we added the new sled and its zones. + assert_eq!( + blueprint1.omicron_zones.len() + 1, + blueprint2.omicron_zones.len() + ); + assert_eq!( + blueprint1.all_omicron_zones().count() + num_new_sled_zones, + blueprint2.all_omicron_zones().count() + ); + + // All zones should be in service. + assert_eq!( + blueprint2.zones_in_service.len(), + blueprint2.all_omicron_zones().count() + ); + assert_eq!(blueprint2.parent_blueprint_id, Some(blueprint1.id)); + + // Check that we can write it to the DB and read it back. + datastore + .blueprint_insert(&opctx, &blueprint2) + .await + .expect("failed to insert blueprint"); + let blueprint_read = datastore + .blueprint_read(&opctx, &authz_blueprint2) + .await + .expect("failed to read collection back"); + println!("diff: {}", blueprint2.diff(&blueprint_read)); + assert_eq!(blueprint2, blueprint_read); + { + let mut expected_ids = [blueprint1.id, blueprint2.id]; + expected_ids.sort(); + assert_eq!( + blueprint_list_all_ids(&opctx, &datastore).await, + expected_ids + ); + } + + // Set blueprint2 as the current target and ensure that means we can not + // delete it. + let bp2_target = BlueprintTarget { + target_id: blueprint2.id, + enabled: true, + time_made_target: now_db_precision(), + }; + datastore + .blueprint_target_set_current(&opctx, bp2_target) + .await + .unwrap(); + assert_eq!( + datastore.blueprint_target_get_current_full(&opctx).await.unwrap(), + Some((bp2_target, blueprint2.clone())) + ); + let err = datastore + .blueprint_delete(&opctx, &authz_blueprint2) + .await + .unwrap_err(); + assert!( + err.to_string().contains(&format!( + "blueprint {} is the current target and cannot be deleted", + blueprint2.id + )), + "unexpected error: {err}" + ); + + // Now that blueprint2 is the target, we should be able to delete + // blueprint1. + datastore.blueprint_delete(&opctx, &authz_blueprint1).await.unwrap(); + ensure_blueprint_fully_deleted(&datastore, blueprint1.id).await; + assert_eq!( + blueprint_list_all_ids(&opctx, &datastore).await, + [blueprint2.id] + ); + + // Clean up. + db.cleanup().await.unwrap(); + logctx.cleanup_successful(); + } + + #[tokio::test] + async fn test_set_target() { + // Setup + let logctx = dev::test_setup_log("inventory_insert"); + let mut db = test_setup_database(&logctx.log).await; + let (opctx, datastore) = datastore_test(&logctx, &db).await; + + // Trying to insert a target that doesn't reference a blueprint should + // fail with a relevant error message. + let nonexistent_blueprint_id = Uuid::new_v4(); + let err = datastore + .blueprint_target_set_current( + &opctx, + BlueprintTarget { + target_id: nonexistent_blueprint_id, + enabled: true, + time_made_target: now_db_precision(), + }, + ) + .await + .unwrap_err(); + assert_eq!( + err, + Error::from(InsertTargetError::NoSuchBlueprint( + nonexistent_blueprint_id + )) + ); + + // There should be no current target still. + assert_eq!( + datastore.blueprint_target_get_current_full(&opctx).await.unwrap(), + None + ); + + // Create three blueprints: + // * `blueprint1` has no parent + // * `blueprint2` and `blueprint3` both have `blueprint1` as parent + let collection = + nexus_inventory::CollectionBuilder::new("test").build(); + let blueprint1 = BlueprintBuilder::build_initial_from_collection( + &collection, + &EMPTY_POLICY, + "test1", + ) + .unwrap(); + let blueprint2 = + BlueprintBuilder::new_based_on(&blueprint1, &EMPTY_POLICY, "test2") + .build(); + let blueprint3 = + BlueprintBuilder::new_based_on(&blueprint1, &EMPTY_POLICY, "test3") + .build(); + assert_eq!(blueprint1.parent_blueprint_id, None); + assert_eq!(blueprint2.parent_blueprint_id, Some(blueprint1.id)); + assert_eq!(blueprint3.parent_blueprint_id, Some(blueprint1.id)); + + // Insert all three into the blueprint table. + datastore.blueprint_insert(&opctx, &blueprint1).await.unwrap(); + datastore.blueprint_insert(&opctx, &blueprint2).await.unwrap(); + datastore.blueprint_insert(&opctx, &blueprint3).await.unwrap(); + + let bp1_target = BlueprintTarget { + target_id: blueprint1.id, + enabled: true, + time_made_target: now_db_precision(), + }; + let bp2_target = BlueprintTarget { + target_id: blueprint2.id, + enabled: true, + time_made_target: now_db_precision(), + }; + let bp3_target = BlueprintTarget { + target_id: blueprint3.id, + enabled: true, + time_made_target: now_db_precision(), + }; + + // Attempting to make blueprint2 the current target should fail because + // it has a non-NULL parent_blueprint_id, but there is no current target + // (i.e., only a blueprint with no parent can be made the current + // target). + let err = datastore + .blueprint_target_set_current(&opctx, bp2_target) + .await + .unwrap_err(); + assert_eq!( + err, + Error::from(InsertTargetError::ParentNotTarget(blueprint2.id)) + ); + + // There should be no current target still. + assert_eq!( + datastore.blueprint_target_get_current_full(&opctx).await.unwrap(), + None + ); + + // We should be able to insert blueprint1, which has no parent (matching + // the currently-empty `bp_target` table's lack of a target). + datastore + .blueprint_target_set_current(&opctx, bp1_target) + .await + .unwrap(); + assert_eq!( + datastore.blueprint_target_get_current_full(&opctx).await.unwrap(), + Some((bp1_target, blueprint1.clone())) + ); + + // Now that blueprint1 is the current target, we should be able to + // insert blueprint2 or blueprint3. WLOG, pick blueprint3. + datastore + .blueprint_target_set_current(&opctx, bp3_target) + .await + .unwrap(); + assert_eq!( + datastore.blueprint_target_get_current_full(&opctx).await.unwrap(), + Some((bp3_target, blueprint3.clone())) + ); + + // Now that blueprint3 is the target, trying to insert blueprint1 or + // blueprint2 should fail, because neither of their parents (NULL and + // blueprint1, respectively) match the current target. + let err = datastore + .blueprint_target_set_current(&opctx, bp1_target) + .await + .unwrap_err(); + assert_eq!( + err, + Error::from(InsertTargetError::ParentNotTarget(blueprint1.id)) + ); + let err = datastore + .blueprint_target_set_current(&opctx, bp2_target) + .await + .unwrap_err(); + assert_eq!( + err, + Error::from(InsertTargetError::ParentNotTarget(blueprint2.id)) + ); + + // Create a child of blueprint3, and ensure when we set it as the target + // with enabled=false, that status is serialized. + let blueprint4 = + BlueprintBuilder::new_based_on(&blueprint3, &EMPTY_POLICY, "test3") + .build(); + assert_eq!(blueprint4.parent_blueprint_id, Some(blueprint3.id)); + datastore.blueprint_insert(&opctx, &blueprint4).await.unwrap(); + let bp4_target = BlueprintTarget { + target_id: blueprint4.id, + enabled: false, + time_made_target: now_db_precision(), + }; + datastore + .blueprint_target_set_current(&opctx, bp4_target) + .await + .unwrap(); + assert_eq!( + datastore.blueprint_target_get_current_full(&opctx).await.unwrap(), + Some((bp4_target, blueprint4)) + ); + + // Clean up. + db.cleanup().await.unwrap(); + logctx.cleanup_successful(); + } +} diff --git a/nexus/db-queries/src/db/datastore/mod.rs b/nexus/db-queries/src/db/datastore/mod.rs index 78a7aeda87..96832b25bf 100644 --- a/nexus/db-queries/src/db/datastore/mod.rs +++ b/nexus/db-queries/src/db/datastore/mod.rs @@ -54,6 +54,7 @@ mod certificate; mod console_session; mod dataset; mod db_metadata; +mod deployment; mod device_auth; mod disk; mod dns; diff --git a/nexus/deployment/Cargo.toml b/nexus/deployment/Cargo.toml index b166f947bf..115dec98a5 100644 --- a/nexus/deployment/Cargo.toml +++ b/nexus/deployment/Cargo.toml @@ -9,6 +9,7 @@ chrono.workspace = true internal-dns.workspace = true ipnet.workspace = true ipnetwork.workspace = true +nexus-inventory.workspace = true nexus-types.workspace = true omicron-common.workspace = true slog.workspace = true @@ -18,6 +19,5 @@ uuid.workspace = true omicron-workspace-hack.workspace = true [dev-dependencies] -nexus-inventory.workspace = true omicron-test-utils.workspace = true sled-agent-client.workspace = true diff --git a/nexus/deployment/src/blueprint_builder.rs b/nexus/deployment/src/blueprint_builder.rs index 689e2d8e2c..ac2fe70e6b 100644 --- a/nexus/deployment/src/blueprint_builder.rs +++ b/nexus/deployment/src/blueprint_builder.rs @@ -9,6 +9,7 @@ use anyhow::anyhow; use internal_dns::config::Host; use internal_dns::config::ZoneVariant; use ipnet::IpAdd; +use nexus_inventory::now_db_precision; use nexus_types::deployment::Blueprint; use nexus_types::deployment::OmicronZoneConfig; use nexus_types::deployment::OmicronZoneDataset; @@ -94,7 +95,7 @@ impl<'a> BlueprintBuilder<'a> { .sleds .keys() .map(|sled_id| { - let zones = collection + let mut zones = collection .omicron_zones .get(sled_id) .map(|z| z.zones.clone()) @@ -118,6 +119,11 @@ impl<'a> BlueprintBuilder<'a> { sled_id )) })?; + + // This is not strictly necessary. But for testing, it's + // helpful for things to be in sorted order. + zones.zones.sort_by_key(|zone| zone.id); + Ok((*sled_id, zones)) }) .collect::>()?; @@ -125,10 +131,10 @@ impl<'a> BlueprintBuilder<'a> { collection.all_omicron_zones().map(|z| z.id).collect(); Ok(Blueprint { id: Uuid::new_v4(), - omicron_zones: omicron_zones, + omicron_zones, zones_in_service, parent_blueprint_id: None, - time_created: chrono::Utc::now(), + time_created: now_db_precision(), creator: creator.to_owned(), comment: format!("from collection {}", collection.id), }) @@ -162,7 +168,7 @@ impl<'a> BlueprintBuilder<'a> { .map(|sled_id| { // Start with self.omicron_zones, which contains entries for any // sled whose zones config is changing in this blueprint. - let zones = self + let mut zones = self .omicron_zones .remove(sled_id) // If it's not there, use the config from the parent @@ -180,15 +186,20 @@ impl<'a> BlueprintBuilder<'a> { generation: Generation::new(), zones: vec![], }); + + // This is not strictly necessary. But for testing, it's + // helpful for things to be in sorted order. + zones.zones.sort_by_key(|zone| zone.id); + (*sled_id, zones) }) .collect(); Blueprint { id: Uuid::new_v4(), - omicron_zones: omicron_zones, + omicron_zones, zones_in_service: self.zones_in_service, parent_blueprint_id: Some(self.parent_blueprint.id), - time_created: chrono::Utc::now(), + time_created: now_db_precision(), creator: self.creator, comment: self.comments.join(", "), } diff --git a/nexus/inventory/src/builder.rs b/nexus/inventory/src/builder.rs index 62d338c1ee..08a905143c 100644 --- a/nexus/inventory/src/builder.rs +++ b/nexus/inventory/src/builder.rs @@ -96,7 +96,7 @@ impl CollectionBuilder { pub fn new(collector: &str) -> Self { CollectionBuilder { errors: vec![], - time_started: now(), + time_started: now_db_precision(), collector: collector.to_owned(), baseboards: BTreeSet::new(), cabooses: BTreeSet::new(), @@ -122,7 +122,7 @@ impl CollectionBuilder { id: Uuid::new_v4(), errors: self.errors.into_iter().map(|e| e.to_string()).collect(), time_started: self.time_started, - time_done: now(), + time_done: now_db_precision(), collector: self.collector, baseboards: self.baseboards, cabooses: self.cabooses, @@ -178,7 +178,7 @@ impl CollectionBuilder { // Separate the SP state into the SP-specific state and the RoT state, // if any. - let now = now(); + let now = now_db_precision(); let _ = self.sps.entry(baseboard.clone()).or_insert_with(|| { ServiceProcessor { time_collected: now, @@ -279,7 +279,7 @@ impl CollectionBuilder { if let Some(previous) = by_id.insert( baseboard.clone(), CabooseFound { - time_collected: now(), + time_collected: now_db_precision(), source: source.to_owned(), caboose: sw_caboose.clone(), }, @@ -348,7 +348,7 @@ impl CollectionBuilder { if let Some(previous) = by_id.insert( baseboard.clone(), RotPageFound { - time_collected: now(), + time_collected: now_db_precision(), source: source.to_owned(), page: sw_rot_page.clone(), }, @@ -456,7 +456,7 @@ impl CollectionBuilder { usable_hardware_threads: inventory.usable_hardware_threads, usable_physical_ram: inventory.usable_physical_ram, reservoir_size: inventory.reservoir_size, - time_collected: now(), + time_collected: now_db_precision(), sled_id, }; @@ -491,7 +491,7 @@ impl CollectionBuilder { self.omicron_zones.insert( sled_id, OmicronZonesFound { - time_collected: now(), + time_collected: now_db_precision(), source: source.to_string(), sled_id, zones, @@ -507,7 +507,7 @@ impl CollectionBuilder { /// This exists because the database doesn't store nanosecond-precision, so if /// we store nanosecond-precision timestamps, then DateTime conversion is lossy /// when round-tripping through the database. That's rather inconvenient. -fn now() -> DateTime { +pub fn now_db_precision() -> DateTime { let ts = Utc::now(); let nanosecs = ts.timestamp_subsec_nanos(); let micros = ts.timestamp_subsec_micros(); @@ -517,7 +517,7 @@ fn now() -> DateTime { #[cfg(test)] mod test { - use super::now; + use super::now_db_precision; use super::CollectionBuilder; use crate::examples::representative; use crate::examples::sp_state; @@ -541,10 +541,10 @@ mod test { // Verify the contents of an empty collection. #[test] fn test_empty() { - let time_before = now(); + let time_before = now_db_precision(); let builder = CollectionBuilder::new("test_empty"); let collection = builder.build(); - let time_after = now(); + let time_after = now_db_precision(); assert!(collection.errors.is_empty()); assert!(time_before <= collection.time_started); @@ -577,7 +577,7 @@ mod test { // a useful quick check. #[test] fn test_basic() { - let time_before = now(); + let time_before = now_db_precision(); let Representative { builder, sleds: [sled1_bb, sled2_bb, sled3_bb, sled4_bb], @@ -587,7 +587,7 @@ mod test { [sled_agent_id_basic, sled_agent_id_extra, sled_agent_id_pc, sled_agent_id_unknown], } = representative(); let collection = builder.build(); - let time_after = now(); + let time_after = now_db_precision(); println!("{:#?}", collection); assert!(time_before <= collection.time_started); assert!(collection.time_started <= collection.time_done); diff --git a/nexus/inventory/src/lib.rs b/nexus/inventory/src/lib.rs index f11af8fede..6dee7bb7ec 100644 --- a/nexus/inventory/src/lib.rs +++ b/nexus/inventory/src/lib.rs @@ -27,6 +27,8 @@ pub use builder::CollectionBuilder; pub use builder::CollectorBug; pub use builder::InventoryError; +pub use builder::now_db_precision; + pub use collector::Collector; pub use sled_agent_enumerator::SledAgentEnumerator; diff --git a/nexus/src/app/deployment.rs b/nexus/src/app/deployment.rs index 9439cdc6d5..b9718a0367 100644 --- a/nexus/src/app/deployment.rs +++ b/nexus/src/app/deployment.rs @@ -5,13 +5,12 @@ //! Configuration of the deployment system use nexus_db_queries::authz; -use nexus_db_queries::authz::Action; -use nexus_db_queries::authz::ApiResource; use nexus_db_queries::context::OpContext; use nexus_db_queries::db::pagination::Paginator; use nexus_deployment::blueprint_builder::BlueprintBuilder; use nexus_deployment::planner::Planner; use nexus_types::deployment::Blueprint; +use nexus_types::deployment::BlueprintMetadata; use nexus_types::deployment::BlueprintTarget; use nexus_types::deployment::BlueprintTargetSet; use nexus_types::deployment::Policy; @@ -27,7 +26,6 @@ use omicron_common::api::external::Error; use omicron_common::api::external::ListResultVec; use omicron_common::api::external::LookupResult; use omicron_common::api::external::LookupType; -use omicron_common::api::external::ResourceType; use slog_error_chain::InlineErrorChain; use std::collections::BTreeMap; use std::collections::BTreeSet; @@ -47,28 +45,6 @@ const SQL_BATCH_SIZE: NonZeroU32 = unsafe { NonZeroU32::new_unchecked(1000) }; const SQL_LIMIT_INVENTORY: NonZeroU32 = unsafe { NonZeroU32::new_unchecked(1000) }; -/// Temporary in-memory store of blueprints -/// -/// Blueprints eventually need to be stored in the database. That will obviate -/// the need for this structure. -pub struct Blueprints { - all_blueprints: BTreeMap, - target: BlueprintTarget, -} - -impl Blueprints { - pub fn new() -> Blueprints { - Blueprints { - all_blueprints: BTreeMap::new(), - target: BlueprintTarget { - target_id: None, - enabled: false, - time_set: chrono::Utc::now(), - }, - } - } -} - /// Common structure for collecting information that the planner needs struct PlanningContext { policy: Policy, @@ -76,30 +52,14 @@ struct PlanningContext { } impl super::Nexus { - // Once we store blueprints in the database, this function will likely just - // delegate to a corresponding datastore function. pub async fn blueprint_list( &self, opctx: &OpContext, pagparams: &DataPageParams<'_, Uuid>, - ) -> ListResultVec { - opctx.authorize(Action::ListChildren, &authz::BLUEPRINT_CONFIG).await?; - Ok(self - .blueprints - .lock() - .unwrap() - .all_blueprints - .values() - .filter_map(|f| match pagparams.marker { - None => Some(f.clone()), - Some(marker) if f.id > *marker => Some(f.clone()), - _ => None, - }) - .collect()) + ) -> ListResultVec { + self.db_datastore.blueprints_list(opctx, pagparams).await } - // Once we store blueprints in the database, this function will likely just - // delegate to a corresponding datastore function. pub async fn blueprint_view( &self, opctx: &OpContext, @@ -110,18 +70,9 @@ impl super::Nexus { blueprint_id, LookupType::ById(blueprint_id), ); - opctx.authorize(Action::Read, &blueprint).await?; - self.blueprints - .lock() - .unwrap() - .all_blueprints - .get(&blueprint_id) - .cloned() - .ok_or_else(|| blueprint.not_found()) + self.db_datastore.blueprint_read(opctx, &blueprint).await } - // Once we store blueprints in the database, this function will likely just - // delegate to a corresponding datastore function. pub async fn blueprint_delete( &self, opctx: &OpContext, @@ -132,90 +83,35 @@ impl super::Nexus { blueprint_id, LookupType::ById(blueprint_id), ); - opctx.authorize(Action::Delete, &blueprint).await?; - - let mut blueprints = self.blueprints.lock().unwrap(); - if let Some(target_id) = blueprints.target.target_id { - if target_id == blueprint_id { - return Err(Error::conflict(format!( - "blueprint {} is the current target and cannot be deleted", - blueprint_id - ))); - } - } - - if blueprints.all_blueprints.remove(&blueprint_id).is_none() { - return Err(blueprint.not_found()); - } - - Ok(()) + self.db_datastore.blueprint_delete(opctx, &blueprint).await } pub async fn blueprint_target_view( &self, opctx: &OpContext, - ) -> Result { - self.blueprint_target(opctx).await.map(|(target, _)| target) - } - - // This is a stand-in for a datastore function that fetches the current - // target information and the target blueprint's contents. This helper - // exists to combine the authz check with the lookup, which is what the - // datastore function will eventually do. - async fn blueprint_target( - &self, - opctx: &OpContext, - ) -> Result<(BlueprintTarget, Option), Error> { - opctx.authorize(Action::Read, &authz::BLUEPRINT_CONFIG).await?; - let blueprints = self.blueprints.lock().unwrap(); - Ok(( - blueprints.target.clone(), - blueprints.target.target_id.and_then(|target_id| { - blueprints.all_blueprints.get(&target_id).cloned() - }), - )) + ) -> Result, Error> { + self.db_datastore.blueprint_target_get_current(opctx).await } - // Once we store blueprints in the database, this function will likely just - // delegate to a corresponding datastore function. pub async fn blueprint_target_set( &self, opctx: &OpContext, params: BlueprintTargetSet, ) -> Result { - opctx.authorize(Action::Modify, &authz::BLUEPRINT_CONFIG).await?; - let new_target_id = params.target_id; - let enabled = params.enabled; - let mut blueprints = self.blueprints.lock().unwrap(); - if let Some(blueprint) = blueprints.all_blueprints.get(&new_target_id) { - if blueprint.parent_blueprint_id != blueprints.target.target_id { - return Err(Error::conflict(&format!( - "blueprint {:?}: parent is {:?}, which is not the current \ - target {:?}", - new_target_id, - blueprint - .parent_blueprint_id - .map(|p| p.to_string()) - .unwrap_or_else(|| String::from("")), - blueprints - .target - .target_id - .map(|p| p.to_string()) - .unwrap_or_else(|| String::from("")), - ))); - } - blueprints.target = BlueprintTarget { - target_id: Some(new_target_id), - enabled, - time_set: chrono::Utc::now(), - }; + let new_target = BlueprintTarget { + target_id: params.target_id, + enabled: params.enabled, + time_made_target: chrono::Utc::now(), + }; + + self.db_datastore + .blueprint_target_set_current(opctx, new_target) + .await?; + + // When we add a background task executing the target blueprint, + // this is the point where we'd signal it to update its target. - // When we add a background task executing the target blueprint, - // this is the point where we'd signal it to update its target. - Ok(blueprints.target.clone()) - } else { - Err(Error::not_found_by_id(ResourceType::Blueprint, &new_target_id)) - } + Ok(new_target) } async fn blueprint_planning_context( @@ -286,20 +182,12 @@ impl super::Nexus { Ok(PlanningContext { creator, policy: Policy { sleds } }) } - // Once we store blueprints in the database, this function will likely just - // delegate to a corresponding datastore function. async fn blueprint_add( &self, opctx: &OpContext, - blueprint: Blueprint, + blueprint: &Blueprint, ) -> Result<(), Error> { - opctx.authorize(Action::Modify, &authz::BLUEPRINT_CONFIG).await?; - let mut blueprints = self.blueprints.lock().unwrap(); - assert!(blueprints - .all_blueprints - .insert(blueprint.id, blueprint) - .is_none()); - Ok(()) + self.db_datastore.blueprint_insert(opctx, blueprint).await } pub async fn blueprint_generate_from_collection( @@ -329,7 +217,7 @@ impl super::Nexus { )) })?; - self.blueprint_add(&opctx, blueprint.clone()).await?; + self.blueprint_add(&opctx, &blueprint).await?; Ok(blueprint) } @@ -337,8 +225,9 @@ impl super::Nexus { &self, opctx: &OpContext, ) -> CreateResult { - let (_, maybe_parent) = self.blueprint_target(opctx).await?; - let Some(parent_blueprint) = maybe_parent else { + let maybe_target = + self.db_datastore.blueprint_target_get_current_full(opctx).await?; + let Some((_, parent_blueprint)) = maybe_target else { return Err(Error::conflict( "cannot regenerate blueprint without existing target", )); @@ -358,7 +247,7 @@ impl super::Nexus { )) })?; - self.blueprint_add(&opctx, blueprint.clone()).await?; + self.blueprint_add(&opctx, &blueprint).await?; Ok(blueprint) } } diff --git a/nexus/src/app/mod.rs b/nexus/src/app/mod.rs index d6ad7c98ea..bf8522452a 100644 --- a/nexus/src/app/mod.rs +++ b/nexus/src/app/mod.rs @@ -183,10 +183,6 @@ pub struct Nexus { /// Default Crucible region allocation strategy default_region_allocation_strategy: RegionAllocationStrategy, - - /// information about blueprints (deployment configurations) - // This will go away once these are stored in the database. - blueprints: std::sync::Mutex, } impl Nexus { @@ -419,7 +415,6 @@ impl Nexus { .pkg .default_region_allocation_strategy .clone(), - blueprints: std::sync::Mutex::new(deployment::Blueprints::new()), }; // TODO-cleanup all the extra Arcs here seems wrong diff --git a/nexus/src/internal_api/http_entrypoints.rs b/nexus/src/internal_api/http_entrypoints.rs index 58038cb37a..0122d9b439 100644 --- a/nexus/src/internal_api/http_entrypoints.rs +++ b/nexus/src/internal_api/http_entrypoints.rs @@ -26,6 +26,8 @@ use dropshot::TypedBody; use hyper::Body; use nexus_db_model::Ipv4NatEntryView; use nexus_types::deployment::Blueprint; +use nexus_types::deployment::BlueprintMetadata; +use nexus_types::deployment::BlueprintTarget; use nexus_types::deployment::BlueprintTargetSet; use nexus_types::internal_api::params::SwitchPutRequest; use nexus_types::internal_api::params::SwitchPutResponse; @@ -45,7 +47,6 @@ use oximeter::types::ProducerResults; use oximeter_producer::{collect, ProducerIdPathParams}; use schemars::JsonSchema; use serde::Deserialize; -use serde::Serialize; use std::collections::BTreeMap; use std::sync::Arc; use uuid::Uuid; @@ -620,7 +621,7 @@ async fn ipv4_nat_changeset( async fn blueprint_list( rqctx: RequestContext>, query_params: Query, -) -> Result>, HttpError> { +) -> Result>, HttpError> { let apictx = rqctx.context(); let handler = async { let nexus = &apictx.nexus; @@ -631,7 +632,7 @@ async fn blueprint_list( Ok(HttpResponseOk(ScanById::results_page( &query, blueprints, - &|_, blueprint: &Blueprint| blueprint.id, + &|_, blueprint: &BlueprintMetadata| blueprint.id, )?)) }; @@ -680,35 +681,6 @@ async fn blueprint_delete( // Managing the current target blueprint -/// Describes what blueprint, if any, the system is currently working toward -#[derive(Debug, Serialize, JsonSchema)] -pub struct BlueprintTarget { - /// id of the blueprint that the system is trying to make real - pub target_id: Uuid, - /// policy: should the system actively work towards this blueprint - /// - /// This should generally be left enabled. - pub enabled: bool, - /// when this blueprint was made the target - pub time_set: chrono::DateTime, -} - -impl TryFrom for BlueprintTarget { - type Error = Error; - - fn try_from( - value: nexus_types::deployment::BlueprintTarget, - ) -> Result { - Ok(BlueprintTarget { - target_id: value.target_id.ok_or_else(|| { - Error::conflict("no target blueprint has been configured") - })?, - enabled: value.enabled, - time_set: value.time_set, - }) - } -} - /// Fetches the current target blueprint, if any #[endpoint { method = GET, @@ -721,8 +693,11 @@ async fn blueprint_target_view( let handler = async { let opctx = crate::context::op_context_for_internal_api(&rqctx).await; let nexus = &apictx.nexus; - let target = nexus.blueprint_target_view(&opctx).await?; - Ok(HttpResponseOk(BlueprintTarget::try_from(target)?)) + let target = + nexus.blueprint_target_view(&opctx).await?.ok_or_else(|| { + Error::conflict("no target blueprint has been configured") + })?; + Ok(HttpResponseOk(target)) }; apictx.internal_latencies.instrument_dropshot_handler(&rqctx, handler).await } @@ -741,11 +716,8 @@ async fn blueprint_target_set( let opctx = crate::context::op_context_for_internal_api(&rqctx).await; let nexus = &apictx.nexus; let target = target.into_inner(); - let result = nexus.blueprint_target_set(&opctx, target).await?; - Ok(HttpResponseOk( - BlueprintTarget::try_from(result) - .map_err(|e| Error::conflict(e.to_string()))?, - )) + let target = nexus.blueprint_target_set(&opctx, target).await?; + Ok(HttpResponseOk(target)) }; apictx.internal_latencies.instrument_dropshot_handler(&rqctx, handler).await } diff --git a/nexus/types/src/deployment.rs b/nexus/types/src/deployment.rs index 95404a2c17..3b4c3b3142 100644 --- a/nexus/types/src/deployment.rs +++ b/nexus/types/src/deployment.rs @@ -16,6 +16,7 @@ pub use crate::inventory::OmicronZoneConfig; pub use crate::inventory::OmicronZoneDataset; pub use crate::inventory::OmicronZoneType; pub use crate::inventory::OmicronZonesConfig; +pub use crate::inventory::SourceNatConfig; pub use crate::inventory::ZpoolName; use omicron_common::address::Ipv6Subnet; use omicron_common::address::SLED_PREFIX; @@ -184,13 +185,39 @@ impl Blueprint { } } -/// Describes which blueprint the system is currently trying to make real -// This is analogous to the db model type until we have that. -#[derive(Debug, Clone)] +/// Describe high-level metadata about a blueprint +// These fields are a subset of [`Blueprint`], and include only the data we can +// quickly fetch from the main blueprint table (e.g., when listing all +// blueprints). +#[derive(Debug, Clone, Eq, PartialEq, JsonSchema, Serialize)] +pub struct BlueprintMetadata { + /// unique identifier for this blueprint + pub id: Uuid, + + /// which blueprint this blueprint is based on + pub parent_blueprint_id: Option, + + /// when this blueprint was generated (for debugging) + pub time_created: chrono::DateTime, + /// identity of the component that generated the blueprint (for debugging) + /// This would generally be the Uuid of a Nexus instance. + pub creator: String, + /// human-readable string describing why this blueprint was created + /// (for debugging) + pub comment: String, +} + +/// Describes what blueprint, if any, the system is currently working toward +#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, JsonSchema)] pub struct BlueprintTarget { - pub target_id: Option, + /// id of the blueprint that the system is trying to make real + pub target_id: Uuid, + /// policy: should the system actively work towards this blueprint + /// + /// This should generally be left enabled. pub enabled: bool, - pub time_set: chrono::DateTime, + /// when this blueprint was made the target + pub time_made_target: chrono::DateTime, } /// Specifies what blueprint, if any, the system should be working toward diff --git a/openapi/nexus-internal.json b/openapi/nexus-internal.json index 8b0807d52c..bc26736b37 100644 --- a/openapi/nexus-internal.json +++ b/openapi/nexus-internal.json @@ -164,7 +164,7 @@ "content": { "application/json": { "schema": { - "$ref": "#/components/schemas/BlueprintResultsPage" + "$ref": "#/components/schemas/BlueprintMetadataResultsPage" } } } @@ -2132,7 +2132,43 @@ "zones_in_service" ] }, - "BlueprintResultsPage": { + "BlueprintMetadata": { + "description": "Describe high-level metadata about a blueprint", + "type": "object", + "properties": { + "comment": { + "description": "human-readable string describing why this blueprint was created (for debugging)", + "type": "string" + }, + "creator": { + "description": "identity of the component that generated the blueprint (for debugging) This would generally be the Uuid of a Nexus instance.", + "type": "string" + }, + "id": { + "description": "unique identifier for this blueprint", + "type": "string", + "format": "uuid" + }, + "parent_blueprint_id": { + "nullable": true, + "description": "which blueprint this blueprint is based on", + "type": "string", + "format": "uuid" + }, + "time_created": { + "description": "when this blueprint was generated (for debugging)", + "type": "string", + "format": "date-time" + } + }, + "required": [ + "comment", + "creator", + "id", + "time_created" + ] + }, + "BlueprintMetadataResultsPage": { "description": "A single page of results", "type": "object", "properties": { @@ -2140,7 +2176,7 @@ "description": "list of items on this page of results", "type": "array", "items": { - "$ref": "#/components/schemas/Blueprint" + "$ref": "#/components/schemas/BlueprintMetadata" } }, "next_page": { @@ -2166,7 +2202,7 @@ "type": "string", "format": "uuid" }, - "time_set": { + "time_made_target": { "description": "when this blueprint was made the target", "type": "string", "format": "date-time" @@ -2175,7 +2211,7 @@ "required": [ "enabled", "target_id", - "time_set" + "time_made_target" ] }, "BlueprintTargetSet": { diff --git a/schema/crdb/28.0.0/up1.sql b/schema/crdb/28.0.0/up1.sql new file mode 100644 index 0000000000..fda4e3ed5c --- /dev/null +++ b/schema/crdb/28.0.0/up1.sql @@ -0,0 +1,7 @@ +CREATE TABLE IF NOT EXISTS omicron.public.blueprint ( + id UUID PRIMARY KEY, + parent_blueprint_id UUID, + time_created TIMESTAMPTZ NOT NULL, + creator TEXT NOT NULL, + comment TEXT NOT NULL +); diff --git a/schema/crdb/28.0.0/up2.sql b/schema/crdb/28.0.0/up2.sql new file mode 100644 index 0000000000..a51c1a31fa --- /dev/null +++ b/schema/crdb/28.0.0/up2.sql @@ -0,0 +1,6 @@ +CREATE TABLE IF NOT EXISTS omicron.public.bp_sled_omicron_zones ( + blueprint_id UUID NOT NULL, + sled_id UUID NOT NULL, + generation INT8 NOT NULL, + PRIMARY KEY (blueprint_id, sled_id) +); diff --git a/schema/crdb/28.0.0/up3.sql b/schema/crdb/28.0.0/up3.sql new file mode 100644 index 0000000000..55e09ca719 --- /dev/null +++ b/schema/crdb/28.0.0/up3.sql @@ -0,0 +1,31 @@ +CREATE TABLE IF NOT EXISTS omicron.public.bp_omicron_zone ( + blueprint_id UUID NOT NULL, + sled_id UUID NOT NULL, + id UUID NOT NULL, + underlay_address INET NOT NULL, + zone_type omicron.public.zone_type NOT NULL, + primary_service_ip INET NOT NULL, + primary_service_port INT4 + CHECK (primary_service_port BETWEEN 0 AND 65535) + NOT NULL, + second_service_ip INET, + second_service_port INT4 + CHECK (second_service_port IS NULL + OR second_service_port BETWEEN 0 AND 65535), + dataset_zpool_name TEXT, + bp_nic_id UUID, + dns_gz_address INET, + dns_gz_address_index INT8, + ntp_ntp_servers TEXT[], + ntp_dns_servers INET[], + ntp_domain TEXT, + nexus_external_tls BOOLEAN, + nexus_external_dns_servers INET ARRAY, + snat_ip INET, + snat_first_port INT4 + CHECK (snat_first_port IS NULL OR snat_first_port BETWEEN 0 AND 65535), + snat_last_port INT4 + CHECK (snat_last_port IS NULL OR snat_last_port BETWEEN 0 AND 65535), + + PRIMARY KEY (blueprint_id, id) +); diff --git a/schema/crdb/28.0.0/up4.sql b/schema/crdb/28.0.0/up4.sql new file mode 100644 index 0000000000..beff4da802 --- /dev/null +++ b/schema/crdb/28.0.0/up4.sql @@ -0,0 +1,13 @@ +CREATE TABLE IF NOT EXISTS omicron.public.bp_omicron_zone_nic ( + blueprint_id UUID NOT NULL, + id UUID NOT NULL, + name TEXT NOT NULL, + ip INET NOT NULL, + mac INT8 NOT NULL, + subnet INET NOT NULL, + vni INT8 NOT NULL, + is_primary BOOLEAN NOT NULL, + slot INT2 NOT NULL, + + PRIMARY KEY (blueprint_id, id) +); diff --git a/schema/crdb/28.0.0/up5.sql b/schema/crdb/28.0.0/up5.sql new file mode 100644 index 0000000000..72c34400a3 --- /dev/null +++ b/schema/crdb/28.0.0/up5.sql @@ -0,0 +1,6 @@ +CREATE TABLE IF NOT EXISTS omicron.public.bp_omicron_zones_not_in_service ( + blueprint_id UUID NOT NULL, + bp_omicron_zone_id UUID NOT NULL, + + PRIMARY KEY (blueprint_id, bp_omicron_zone_id) +); diff --git a/schema/crdb/28.0.0/up6.sql b/schema/crdb/28.0.0/up6.sql new file mode 100644 index 0000000000..41e69ca3da --- /dev/null +++ b/schema/crdb/28.0.0/up6.sql @@ -0,0 +1,6 @@ +CREATE TABLE IF NOT EXISTS omicron.public.bp_target ( + version INT8 PRIMARY KEY, + blueprint_id UUID NOT NULL, + enabled BOOL NOT NULL, + time_made_target TIMESTAMPTZ NOT NULL +); diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index c91bb669a9..86d1340379 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -2954,8 +2954,8 @@ CREATE TABLE IF NOT EXISTS omicron.public.inv_omicron_zone ( -- service in them) primary_service_ip INET NOT NULL, primary_service_port INT4 - CHECK (primary_service_port BETWEEN 0 AND 65535) - NOT NULL, + CHECK (primary_service_port BETWEEN 0 AND 65535) + NOT NULL, -- The remaining properties may be NULL for different kinds of zones. The -- specific constraints are not enforced at the database layer, basically @@ -2967,7 +2967,7 @@ CREATE TABLE IF NOT EXISTS omicron.public.inv_omicron_zone ( second_service_ip INET, second_service_port INT4 CHECK (second_service_port IS NULL - OR second_service_port BETWEEN 0 AND 65535), + OR second_service_port BETWEEN 0 AND 65535), -- Zones may have an associated dataset. They're currently always on a U.2. -- The only thing we need to identify it here is the name of the zpool that @@ -2995,9 +2995,9 @@ CREATE TABLE IF NOT EXISTS omicron.public.inv_omicron_zone ( -- Source NAT configuration (currently used for boundary NTP only) snat_ip INET, snat_first_port INT4 - CHECK (snat_first_port IS NULL OR snat_first_port BETWEEN 0 AND 65535), + CHECK (snat_first_port IS NULL OR snat_first_port BETWEEN 0 AND 65535), snat_last_port INT4 - CHECK (snat_last_port IS NULL OR snat_last_port BETWEEN 0 AND 65535), + CHECK (snat_last_port IS NULL OR snat_last_port BETWEEN 0 AND 65535), PRIMARY KEY (inv_collection_id, id) ); @@ -3016,6 +3016,200 @@ CREATE TABLE IF NOT EXISTS omicron.public.inv_omicron_zone_nic ( PRIMARY KEY (inv_collection_id, id) ); +/* + * System-level blueprints + * + * See RFD 457 and 459 for context. + * + * A blueprint describes a potential system configuration. The primary table is + * the `blueprint` table, which stores only a small amount of metadata about the + * blueprint. The bulk of the information is stored in the `bp_*` tables below, + * each of which references back to `blueprint` by ID. + * + * `bp_target` describes the "target blueprints" of the system. Insertion must + * follow a strict set of rules: + * + * * The first target blueprint must have version=1, and must have no parent + * blueprint. + * * The Nth target blueprint must have version=N, and its parent blueprint must + * be the blueprint that was the target at version=N-1. + * + * The result is that the current target blueprint can always be found by + * looking at the maximally-versioned row in `bp_target`, and there is a linear + * history from that blueprint all the way back to the version=1 blueprint. We + * will eventually prune old blueprint targets, so it will not always be + * possible to view the entire history. + * + * `bp_sled_omicron_zones`, `bp_omicron_zone`, and `bp_omicron_zone_nic` are + * nearly identical to their `inv_*` counterparts, and record the + * `OmicronZonesConfig` for each sled. + * + * `bp_omicron_zones_not_in_service` stores a list of Omicron zones (present in + * `bp_omicron_zone`) that are NOT in service; e.g., should not appear in + * internal DNS. Nexus's in-memory `Blueprint` representation stores the set of + * zones that ARE in service. We invert that logic at this layer because we + * expect most blueprints to have a relatively large number of omicron zones, + * almost all of which will be in service. This is a minor and perhaps + * unnecessary optimization at the database layer, but it's also relatively + * simple and hidden by the relevant read and insert queries in + * `nexus-db-queries`. + */ + +-- list of all blueprints +CREATE TABLE IF NOT EXISTS omicron.public.blueprint ( + id UUID PRIMARY KEY, + + -- This is effectively a foreign key back to this table; however, it is + -- allowed to be NULL: the initial blueprint has no parent. Additionally, + -- it may be non-NULL but no longer reference a row in this table: once a + -- child blueprint has been created from a parent, it's possible for the + -- parent to be deleted. We do not NULL out this field on such a deletion, + -- so we can always see that there had been a particular parent even if it's + -- now gone. + parent_blueprint_id UUID, + + -- These fields are for debugging only. + time_created TIMESTAMPTZ NOT NULL, + creator TEXT NOT NULL, + comment TEXT NOT NULL +); + +-- table describing both the current and historical target blueprints of the +-- system +CREATE TABLE IF NOT EXISTS omicron.public.bp_target ( + -- Monotonically increasing version for all bp_targets + version INT8 PRIMARY KEY, + + -- Effectively a foreign key into the `blueprint` table, but may reference a + -- blueprint that has been deleted (if this target is no longer the current + -- target: the current target must not be deleted). + blueprint_id UUID NOT NULL, + + -- Is this blueprint enabled? + -- + -- Currently, we have no code that acts on this value; however, it exists as + -- an escape hatch once we have automated blueprint planning and execution. + -- An operator can set the current blueprint to disabled, which should stop + -- planning and execution (presumably until a support case can address + -- whatever issue the update system is causing). + enabled BOOL NOT NULL, + + -- Timestamp for when this blueprint was made the current target + time_made_target TIMESTAMPTZ NOT NULL +); + +-- see inv_sled_omicron_zones, which is identical except it references a +-- collection whereas this table references a blueprint +CREATE TABLE IF NOT EXISTS omicron.public.bp_sled_omicron_zones ( + -- foreign key into `blueprint` table + blueprint_id UUID NOT NULL, + + sled_id UUID NOT NULL, + generation INT8 NOT NULL, + PRIMARY KEY (blueprint_id, sled_id) +); + +-- description of omicron zones specified in a blueprint +-- +-- This is currently identical to `inv_omicron_zone`, except that the foreign +-- keys reference other blueprint tables intead of inventory tables. We expect +-- their sameness to diverge over time as either inventory or blueprints (or +-- both) grow context-specific properties. +CREATE TABLE IF NOT EXISTS omicron.public.bp_omicron_zone ( + -- foreign key into the `blueprint` table + blueprint_id UUID NOT NULL, + + -- unique id for this sled (should be foreign keys into `sled` table, though + -- it's conceivable a blueprint could refer to a sled that no longer exists, + -- particularly if the blueprint is older than the current target) + sled_id UUID NOT NULL, + + -- unique id for this zone + id UUID NOT NULL, + underlay_address INET NOT NULL, + zone_type omicron.public.zone_type NOT NULL, + + -- SocketAddr of the "primary" service for this zone + -- (what this describes varies by zone type, but all zones have at least one + -- service in them) + primary_service_ip INET NOT NULL, + primary_service_port INT4 + CHECK (primary_service_port BETWEEN 0 AND 65535) + NOT NULL, + + -- The remaining properties may be NULL for different kinds of zones. The + -- specific constraints are not enforced at the database layer, basically + -- because it's really complicated to do that and it's not obvious that it's + -- worthwhile. + + -- Some zones have a second service. Like the primary one, the meaning of + -- this is zone-type-dependent. + second_service_ip INET, + second_service_port INT4 + CHECK (second_service_port IS NULL + OR second_service_port BETWEEN 0 AND 65535), + + -- Zones may have an associated dataset. They're currently always on a U.2. + -- The only thing we need to identify it here is the name of the zpool that + -- it's on. + dataset_zpool_name TEXT, + + -- Zones with external IPs have an associated NIC and sockaddr for listening + -- (first is a foreign key into `bp_omicron_zone_nic`) + bp_nic_id UUID, + + -- Properties for internal DNS servers + -- address attached to this zone from outside the sled's subnet + dns_gz_address INET, + dns_gz_address_index INT8, + + -- Properties common to both kinds of NTP zones + ntp_ntp_servers TEXT[], + ntp_dns_servers INET[], + ntp_domain TEXT, + + -- Properties specific to Nexus zones + nexus_external_tls BOOLEAN, + nexus_external_dns_servers INET ARRAY, + + -- Source NAT configuration (currently used for boundary NTP only) + snat_ip INET, + snat_first_port INT4 + CHECK (snat_first_port IS NULL OR snat_first_port BETWEEN 0 AND 65535), + snat_last_port INT4 + CHECK (snat_last_port IS NULL OR snat_last_port BETWEEN 0 AND 65535), + + PRIMARY KEY (blueprint_id, id) +); + +CREATE TABLE IF NOT EXISTS omicron.public.bp_omicron_zone_nic ( + blueprint_id UUID NOT NULL, + id UUID NOT NULL, + name TEXT NOT NULL, + ip INET NOT NULL, + mac INT8 NOT NULL, + subnet INET NOT NULL, + vni INT8 NOT NULL, + is_primary BOOLEAN NOT NULL, + slot INT2 NOT NULL, + + PRIMARY KEY (blueprint_id, id) +); + +-- list of omicron zones that are considered NOT in-service for a blueprint +-- +-- In Rust code, we generally want to deal with "zones in service", which means +-- they should appear in DNS. However, almost all zones in almost all blueprints +-- will be in service, so we can induce considerably less database work by +-- storing the zones _not_ in service. Our DB wrapper layer handles this +-- inversion, so the rest of our Rust code can ignore it. +CREATE TABLE IF NOT EXISTS omicron.public.bp_omicron_zones_not_in_service ( + blueprint_id UUID NOT NULL, + bp_omicron_zone_id UUID NOT NULL, + + PRIMARY KEY (blueprint_id, bp_omicron_zone_id) +); + /*******************************************************************/ /* @@ -3196,7 +3390,7 @@ INSERT INTO omicron.public.db_metadata ( version, target_version ) VALUES - ( TRUE, NOW(), NOW(), '27.0.0', NULL) + ( TRUE, NOW(), NOW(), '28.0.0', NULL) ON CONFLICT DO NOTHING; COMMIT; From 5215d850768f8a79160220bfd6441959a8a04064 Mon Sep 17 00:00:00 2001 From: Levon Tarver <11586085+internet-diglett@users.noreply.github.com> Date: Fri, 26 Jan 2024 17:48:08 -0600 Subject: [PATCH 40/49] background task for service zone nat (#4857) Currently the logic for configuring NAT for service zones is deeply nested and crosses sled-agent http API boundaries. The cleanest way to deliver eventual consistency for service zone nat entries was to pull the zone information from inventory and use that to generate nat entries to reconcile against the `ipv4_nat_entry` table. This covers us in the following scenarios: ### RSS: * User provides configuration to RSS * RSS process ultimately creates a sled plan and service plan * Application of service plan by sled-agents creates zones * zone create makes direct calls to dendrite to configure NAT (it is the only way it can be done at this time) * eventually the Nexus zones are launched and handoff to Nexus is complete * inventory task is run, recording zone locations to db * service zone nat background task reads inventory from db and uses the data to generate records for `ipv4_nat_entry` table, then triggers dendrite sync. * sync is ultimately a noop because nat entries already exist in dendrite (dendrite operations are idempotent) ### Cold boot: * sled-agents create switch zones if they are managing a scrimlet, and subsequently create zones written to their ledgers. This may result in direct calls to dendrite. * Once nexus is back up, inventory will resume being collected * service zone nat background task will read inventory from db to reconcile entries in `ipv4_nat_entry` table and then trigger dendrite sync. * If nat is out of date on dendrite, it will be updated on trigger. ### Dendrite crash * If dendrite crashes and restarts, it will immediately contact Nexus for re-sync (pre-existing logic from earlier NAT RPW work) * service zone and instance nat entries are now present in rpw table, so all nat entries will be restored ### Migration / Relocation of service zone * New zone gets created on a sled in the rack. Direct call to dendrite will be made (it uses the same logic as pre-nexus to create zone). * Inventory task will record new location of service zone * Service zone nat background task will use inventory to update table, adding and removing the necessary nat entries and triggering a dendrite update Considerations --- Because this relies on data from the inventory task which runs on a periodic timer (600s), and because this task also runs on a periodic timer (30s), there may be some latency for picking up changes. A few potential avenues for improvement: * Plumb additional logic into service zone nat configuration that enables direct updates to the `ipv4_nat_entry` table once nexus is online. Of note, this would further bifurcate the logic of pre-nexus and post-nexus state management. At this moment, it seems that this is the most painful approach. An argument can be made that we ultimately should be lifting the nat configuration logic _out_ of the service zone creation instead. * Decrease the timer for the inventory task. This is the simplest change, however this would result in more frequent collection, increasing overhead. I do not know _how much_ this would increase overhead. Maybe it is negligible. * Plumb in the ability to trigger the inventory collection task for interesting control plane events. This would allow us to keep the _relatively_ infrequent timing intervals but allow us to refresh on-demand when needed. Related --- Closes #4650 Extracted from #4822 --- common/src/address.rs | 6 + common/src/nexus_config.rs | 16 + dev-tools/omdb/tests/env.out | 12 + dev-tools/omdb/tests/successes.out | 11 + docs/how-to-run.adoc | 102 +++-- nexus/db-model/src/ipv4_nat_entry.rs | 2 +- nexus/db-model/src/ipv4net.rs | 1 + nexus/db-model/src/ipv6net.rs | 1 + nexus/db-model/src/macaddr.rs | 1 + nexus/db-model/src/schema.rs | 2 +- nexus/db-model/src/vni.rs | 10 +- .../src/db/datastore/ipv4_nat_entry.rs | 210 ++++++++++ nexus/examples/config.toml | 1 + nexus/src/app/background/init.rs | 29 +- nexus/src/app/background/mod.rs | 1 + .../app/background/sync_service_zone_nat.rs | 362 ++++++++++++++++++ nexus/tests/config.test.toml | 1 + schema/crdb/29.0.0/up1.sql | 14 + schema/crdb/dbinit.sql | 17 +- smf/nexus/multi-sled/config-partial.toml | 1 + smf/nexus/single-sled/config-partial.toml | 1 + 21 files changed, 770 insertions(+), 31 deletions(-) create mode 100644 nexus/src/app/background/sync_service_zone_nat.rs create mode 100644 schema/crdb/29.0.0/up1.sql diff --git a/common/src/address.rs b/common/src/address.rs index 0c8df33868..65a6604daf 100644 --- a/common/src/address.rs +++ b/common/src/address.rs @@ -18,6 +18,12 @@ pub const AZ_PREFIX: u8 = 48; pub const RACK_PREFIX: u8 = 56; pub const SLED_PREFIX: u8 = 64; +/// maximum possible value for a tcp or udp port +pub const MAX_PORT: u16 = u16::MAX; + +/// minimum possible value for a tcp or udp port +pub const MIN_PORT: u16 = u16::MIN; + /// The amount of redundancy for internal DNS servers. /// /// Must be less than or equal to MAX_DNS_REDUNDANCY. diff --git a/common/src/nexus_config.rs b/common/src/nexus_config.rs index be4b05ffdf..dedd091d81 100644 --- a/common/src/nexus_config.rs +++ b/common/src/nexus_config.rs @@ -334,6 +334,8 @@ pub struct BackgroundTaskConfig { pub inventory: InventoryConfig, /// configuration for phantom disks task pub phantom_disks: PhantomDiskConfig, + /// configuration for service zone nat sync task + pub sync_service_zone_nat: SyncServiceZoneNatConfig, } #[serde_as] @@ -376,6 +378,14 @@ pub struct NatCleanupConfig { pub period_secs: Duration, } +#[serde_as] +#[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)] +pub struct SyncServiceZoneNatConfig { + /// period (in seconds) for periodic activations of this background task + #[serde_as(as = "DurationSeconds")] + pub period_secs: Duration, +} + #[serde_as] #[derive(Clone, Debug, Deserialize, Eq, PartialEq, Serialize)] pub struct InventoryConfig { @@ -517,6 +527,7 @@ mod test { }; use crate::address::{Ipv6Subnet, RACK_PREFIX}; use crate::api::internal::shared::SwitchLocation; + use crate::nexus_config::SyncServiceZoneNatConfig; use camino::{Utf8Path, Utf8PathBuf}; use dropshot::ConfigDropshot; use dropshot::ConfigLogging; @@ -665,6 +676,7 @@ mod test { inventory.nkeep = 11 inventory.disable = false phantom_disks.period_secs = 30 + sync_service_zone_nat.period_secs = 30 [default_region_allocation_strategy] type = "random" seed = 0 @@ -769,6 +781,9 @@ mod test { phantom_disks: PhantomDiskConfig { period_secs: Duration::from_secs(30), }, + sync_service_zone_nat: SyncServiceZoneNatConfig { + period_secs: Duration::from_secs(30) + } }, default_region_allocation_strategy: crate::nexus_config::RegionAllocationStrategy::Random { @@ -827,6 +842,7 @@ mod test { inventory.nkeep = 3 inventory.disable = false phantom_disks.period_secs = 30 + sync_service_zone_nat.period_secs = 30 [default_region_allocation_strategy] type = "random" "##, diff --git a/dev-tools/omdb/tests/env.out b/dev-tools/omdb/tests/env.out index c08f592852..8cca1b063a 100644 --- a/dev-tools/omdb/tests/env.out +++ b/dev-tools/omdb/tests/env.out @@ -70,6 +70,10 @@ task: "phantom_disks" detects and un-deletes phantom disks +task: "service_zone_nat_tracker" + ensures service zone nat records are recorded in NAT RPW table + + --------------------------------------------- stderr: note: using Nexus URL http://127.0.0.1:REDACTED_PORT @@ -139,6 +143,10 @@ task: "phantom_disks" detects and un-deletes phantom disks +task: "service_zone_nat_tracker" + ensures service zone nat records are recorded in NAT RPW table + + --------------------------------------------- stderr: note: Nexus URL not specified. Will pick one from DNS. @@ -195,6 +203,10 @@ task: "phantom_disks" detects and un-deletes phantom disks +task: "service_zone_nat_tracker" + ensures service zone nat records are recorded in NAT RPW table + + --------------------------------------------- stderr: note: Nexus URL not specified. Will pick one from DNS. diff --git a/dev-tools/omdb/tests/successes.out b/dev-tools/omdb/tests/successes.out index 65520ab59c..f291bbb6a0 100644 --- a/dev-tools/omdb/tests/successes.out +++ b/dev-tools/omdb/tests/successes.out @@ -264,6 +264,10 @@ task: "phantom_disks" detects and un-deletes phantom disks +task: "service_zone_nat_tracker" + ensures service zone nat records are recorded in NAT RPW table + + --------------------------------------------- stderr: note: using Nexus URL http://127.0.0.1:REDACTED_PORT/ @@ -369,6 +373,13 @@ task: "phantom_disks" number of phantom disks deleted: 0 number of phantom disk delete errors: 0 +task: "service_zone_nat_tracker" + configured period: every 30s + currently executing: no + last completed activation: iter 2, triggered by an explicit signal + started at (s ago) and ran for ms + last completion reported error: inventory collection is None + --------------------------------------------- stderr: note: using Nexus URL http://127.0.0.1:REDACTED_PORT/ diff --git a/docs/how-to-run.adoc b/docs/how-to-run.adoc index f6d780ad72..c1f78a0521 100644 --- a/docs/how-to-run.adoc +++ b/docs/how-to-run.adoc @@ -498,41 +498,93 @@ Follow the instructions to set up the https://github.com/oxidecomputer/oxide.rs[ oxide auth login --host http://192.168.1.21 ---- +=== Configure quotas for your silo + +Setting resource quotas is required before you can begin uploading images, provisioning instances, etc. +In this example we'll update the recovery silo so we can provision instances directly from it: + +[source, console] +---- +$ oxide api /v1/system/silos/recovery/quotas --method PUT --input - <>). +Here we will first create an ip pool for the recovery silo: [source,console] ----- -$ oxide ip-pool range add --pool default --first 192.168.1.31 --last 192.168.1.40 -success -IpPoolRange { - id: 4a61e65a-d96d-4c56-9cfd-dc1e44d9e99b, - ip_pool_id: 1b1289a7-cefe-4a7e-a8c9-d93330846301, - range: V4( - Ipv4Range { - first: 192.168.1.31, - last: 192.168.1.40, - }, - ), - time_created: 2023-08-02T16:31:43.679785Z, +--- +$ oxide api /v1/system/ip-pools --method POST --input - < CreateResult { + use db::schema::ipv4_nat_entry::dsl; + + let vni = nexus_db_model::Vni(Vni::SERVICES_VNI); + + // find all active nat entries with the services vni + let result: Vec = dsl::ipv4_nat_entry + .filter(dsl::vni.eq(vni)) + .filter(dsl::version_removed.is_null()) + .select(Ipv4NatEntry::as_select()) + .load_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?; + + // determine what to keep and what to delete + let mut keep: Vec<_> = vec![]; + let mut delete: Vec<_> = vec![]; + + for db_entry in result.iter() { + let values = Ipv4NatValues { + external_address: db_entry.external_address, + first_port: db_entry.first_port, + last_port: db_entry.last_port, + sled_address: db_entry.sled_address, + vni: db_entry.vni, + mac: db_entry.mac, + }; + + if nat_entries.contains(&values) { + keep.push(values); + } else { + delete.push(db_entry) + } + } + + // delete entries that are not present in requested entries + for entry in delete { + if let Err(e) = self.ipv4_nat_delete(opctx, entry).await { + error!( + opctx.log, + "failed to delete service zone nat entry"; + "error" => ?e, + "entry" => ?entry, + ); + } + } + + // optimization: only attempt to add what is missing + let add = nat_entries.iter().filter(|entry| !keep.contains(entry)); + + let mut count = 0; + + // insert nat_entries + for entry in add { + if let Err(e) = + self.ensure_ipv4_nat_entry(opctx, entry.clone()).await + { + error!( + opctx.log, + "failed to ensure service zone nat entry"; + "error" => ?e, + "entry" => ?entry, + ); + continue; + } + count += 1; + } + + Ok(count) + } + pub async fn ipv4_nat_delete( &self, opctx: &OpContext, @@ -592,4 +678,128 @@ mod test { db.cleanup().await.unwrap(); logctx.cleanup_successful(); } + + // Test our ability to reconcile a set of service zone nat entries + #[tokio::test] + async fn ipv4_nat_sync_service_zones() { + let logctx = dev::test_setup_log("ipv4_nat_sync_service_zones"); + let mut db = test_setup_database(&logctx.log).await; + let (opctx, datastore) = datastore_test(&logctx, &db).await; + + // We should not have any NAT entries at this moment + let initial_state = + datastore.ipv4_nat_list_since_version(&opctx, 0, 10).await.unwrap(); + + assert!(initial_state.is_empty()); + assert_eq!( + datastore.ipv4_nat_current_version(&opctx).await.unwrap(), + 0 + ); + + // create two nat entries: + // 1. an entry should be deleted during the next sync + // 2. an entry that should be kept during the next sync + + let external_address = external::Ipv4Net( + ipnetwork::Ipv4Network::try_from("10.0.0.100").unwrap(), + ); + + let sled_address = external::Ipv6Net( + ipnetwork::Ipv6Network::try_from("fd00:1122:3344:104::1").unwrap(), + ); + + // Add a nat entry. + let nat1 = Ipv4NatValues { + external_address: external_address.into(), + first_port: 0.into(), + last_port: 999.into(), + sled_address: sled_address.into(), + vni: Vni(external::Vni::SERVICES_VNI), + mac: MacAddr( + external::MacAddr::from_str("A8:40:25:F5:EB:2A").unwrap(), + ), + }; + + let nat2 = Ipv4NatValues { + first_port: 1000.into(), + last_port: 1999.into(), + ..nat1 + }; + + datastore.ensure_ipv4_nat_entry(&opctx, nat1.clone()).await.unwrap(); + datastore.ensure_ipv4_nat_entry(&opctx, nat2.clone()).await.unwrap(); + + let db_entries = + datastore.ipv4_nat_list_since_version(&opctx, 0, 10).await.unwrap(); + + assert_eq!(db_entries.len(), 2); + + // sync two nat entries: + // 1. a nat entry that already exists + // 2. a nat entry that does not already exist + + let nat3 = Ipv4NatValues { + first_port: 2000.into(), + last_port: 2999.into(), + ..nat2 + }; + + datastore + .ipv4_nat_sync_service_zones(&opctx, &[nat2.clone(), nat3.clone()]) + .await + .unwrap(); + + // we should have three nat entries in the db + // 1. the old one that was deleted during the last sync + // 2. the old one that "survived" the last sync + // 3. a new one that was added during the last sync + let db_entries = + datastore.ipv4_nat_list_since_version(&opctx, 0, 10).await.unwrap(); + + assert_eq!(db_entries.len(), 3); + + // nat2 and nat3 should not be soft deleted + for request in [nat2.clone(), nat3.clone()] { + assert!(db_entries.iter().any(|entry| { + entry.first_port == request.first_port + && entry.last_port == request.last_port + && entry.time_deleted.is_none() + })); + } + + // nat1 should be soft deleted + assert!(db_entries.iter().any(|entry| { + entry.first_port == nat1.first_port + && entry.last_port == nat1.last_port + && entry.time_deleted.is_some() + && entry.version_removed.is_some() + })); + + // add nat1 back + // this simulates a zone leaving and then returning, i.e. when a sled gets restarted + datastore + .ipv4_nat_sync_service_zones( + &opctx, + &[nat1.clone(), nat2.clone(), nat3.clone()], + ) + .await + .unwrap(); + + // we should have four nat entries in the db + let db_entries = + datastore.ipv4_nat_list_since_version(&opctx, 0, 10).await.unwrap(); + + assert_eq!(db_entries.len(), 4); + + // there should be an active entry for nat1 again + assert!(db_entries.iter().any(|entry| { + entry.first_port == nat1.first_port + && entry.last_port == nat1.last_port + && entry.time_deleted.is_none() + && entry.version_removed.is_none() + })); + + db.cleanup().await.unwrap(); + logctx.cleanup_successful(); + } } diff --git a/nexus/examples/config.toml b/nexus/examples/config.toml index f13ea721b8..dcab2d9da1 100644 --- a/nexus/examples/config.toml +++ b/nexus/examples/config.toml @@ -105,6 +105,7 @@ inventory.nkeep = 5 # Disable inventory collection altogether (for emergencies) inventory.disable = false phantom_disks.period_secs = 30 +sync_service_zone_nat.period_secs = 30 [default_region_allocation_strategy] # allocate region on 3 random distinct zpools, on 3 random distinct sleds. diff --git a/nexus/src/app/background/init.rs b/nexus/src/app/background/init.rs index d30d2162c4..49ac6d93e2 100644 --- a/nexus/src/app/background/init.rs +++ b/nexus/src/app/background/init.rs @@ -12,6 +12,7 @@ use super::external_endpoints; use super::inventory_collection; use super::nat_cleanup; use super::phantom_disks; +use super::sync_service_zone_nat::ServiceZoneNatTracker; use nexus_db_model::DnsGroup; use nexus_db_queries::context::OpContext; use nexus_db_queries::db::DataStore; @@ -56,6 +57,9 @@ pub struct BackgroundTasks { /// task handle for the task that detects phantom disks pub task_phantom_disks: common::TaskHandle, + + /// task handle for the service zone nat tracker + pub task_service_zone_nat_tracker: common::TaskHandle, } impl BackgroundTasks { @@ -106,6 +110,9 @@ impl BackgroundTasks { (task, watcher_channel) }; + let dpd_clients: Vec<_> = + dpd_clients.values().map(|client| client.clone()).collect(); + let nat_cleanup = { driver.register( "nat_v4_garbage_collector".to_string(), @@ -116,7 +123,7 @@ impl BackgroundTasks { config.nat_cleanup.period_secs, Box::new(nat_cleanup::Ipv4NatGarbageCollector::new( datastore.clone(), - dpd_clients.values().map(|client| client.clone()).collect(), + dpd_clients.clone(), )), opctx.child(BTreeMap::new()), vec![], @@ -149,7 +156,8 @@ impl BackgroundTasks { // Background task: phantom disk detection let task_phantom_disks = { - let detector = phantom_disks::PhantomDiskDetector::new(datastore); + let detector = + phantom_disks::PhantomDiskDetector::new(datastore.clone()); let task = driver.register( String::from("phantom_disks"), @@ -163,6 +171,22 @@ impl BackgroundTasks { task }; + let task_service_zone_nat_tracker = { + driver.register( + "service_zone_nat_tracker".to_string(), + String::from( + "ensures service zone nat records are recorded in NAT RPW table", + ), + config.sync_service_zone_nat.period_secs, + Box::new(ServiceZoneNatTracker::new( + datastore.clone(), + dpd_clients.clone(), + )), + opctx.child(BTreeMap::new()), + vec![], + ) + }; + BackgroundTasks { driver, task_internal_dns_config, @@ -174,6 +198,7 @@ impl BackgroundTasks { nat_cleanup, task_inventory_collection, task_phantom_disks, + task_service_zone_nat_tracker, } } diff --git a/nexus/src/app/background/mod.rs b/nexus/src/app/background/mod.rs index 70b20224d4..166fc2654b 100644 --- a/nexus/src/app/background/mod.rs +++ b/nexus/src/app/background/mod.rs @@ -14,6 +14,7 @@ mod inventory_collection; mod nat_cleanup; mod phantom_disks; mod status; +mod sync_service_zone_nat; pub use common::Driver; pub use common::TaskHandle; diff --git a/nexus/src/app/background/sync_service_zone_nat.rs b/nexus/src/app/background/sync_service_zone_nat.rs new file mode 100644 index 0000000000..8e75f97d7a --- /dev/null +++ b/nexus/src/app/background/sync_service_zone_nat.rs @@ -0,0 +1,362 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Background task for detecting changes to service zone locations and +//! updating the NAT rpw table accordingly + +use super::common::BackgroundTask; +use anyhow::Context; +use futures::future::BoxFuture; +use futures::FutureExt; +use nexus_db_model::Ipv4NatValues; +use nexus_db_queries::context::OpContext; +use nexus_db_queries::db::lookup::LookupPath; +use nexus_db_queries::db::DataStore; +use omicron_common::address::{MAX_PORT, MIN_PORT}; +use omicron_common::api::external; +use serde_json::json; +use sled_agent_client::types::OmicronZoneType; +use std::net::{IpAddr, SocketAddr}; +use std::num::NonZeroU32; +use std::sync::Arc; + +// Minumum number of boundary NTP zones that should be present in a valid +// set of service zone nat configurations. +const MIN_NTP_COUNT: usize = 1; + +// Minumum number of nexus zones that should be present in a valid +// set of service zone nat configurations. +const MIN_NEXUS_COUNT: usize = 1; + +// Minumum number of external DNS zones that should be present in a valid +// set of service zone nat configurations. +const MIN_EXTERNAL_DNS_COUNT: usize = 1; + +/// Background task that ensures service zones have nat entries +/// persisted in the NAT RPW table +pub struct ServiceZoneNatTracker { + datastore: Arc, + dpd_clients: Vec>, +} + +impl ServiceZoneNatTracker { + pub fn new( + datastore: Arc, + dpd_clients: Vec>, + ) -> Self { + Self { datastore, dpd_clients } + } +} + +impl BackgroundTask for ServiceZoneNatTracker { + fn activate<'a>( + &'a mut self, + opctx: &'a OpContext, + ) -> BoxFuture<'a, serde_json::Value> { + async { + let log = &opctx.log; + + // check inventory + let inventory = match self + .datastore + .inventory_get_latest_collection( + opctx, + NonZeroU32::new(u32::MAX).unwrap(), + ) + .await + { + Ok(inventory) => inventory, + Err(e) => { + error!( + &log, + "failed to collect inventory"; + "error" => format!("{:#}", e) + ); + return json!({ + "error": + format!( + "failed collect inventory: \ + {:#}", + e + ) + }); + } + }; + + // generate set of Service Zone NAT entries + let collection = match inventory { + Some(c) => c, + // this could happen if we check the inventory table before the + // inventory job has finished running for the first time + None => { + warn!( + &log, + "inventory collection is None"; + ); + return json!({ + "error": "inventory collection is None" + }); + } + }; + + let mut ipv4_nat_values: Vec = vec![]; + let mut ntp_count = 0; + let mut nexus_count = 0; + let mut dns_count = 0; + + for (sled_id, zones_found) in collection.omicron_zones { + let (_, sled) = match LookupPath::new(opctx, &self.datastore) + .sled_id(sled_id) + .fetch() + .await + .context("failed to look up sled") + { + Ok(result) => result, + Err(e) => { + error!( + &log, + "failed to lookup sled by id"; + "id" => ?sled_id, + "error" => ?e, + ); + continue; + } + }; + + let sled_address = external::Ipv6Net( + ipnetwork::Ipv6Network::new(*sled.ip, 128).unwrap(), + ); + + let zones_config: sled_agent_client::types::OmicronZonesConfig = + zones_found.zones; + let zones: Vec = + zones_config.zones; + + for zone in zones { + let zone_type: OmicronZoneType = zone.zone_type; + match zone_type { + OmicronZoneType::BoundaryNtp { + nic, snat_cfg, .. + } => { + let external_ip = match snat_cfg.ip { + IpAddr::V4(addr) => addr, + IpAddr::V6(_) => { + error!( + &log, + "ipv6 addresses for service zone nat not implemented"; + ); + continue; + } + }; + + let external_address = + ipnetwork::Ipv4Network::new(external_ip, 32) + .unwrap(); + + let nat_value = Ipv4NatValues { + external_address: nexus_db_model::Ipv4Net( + omicron_common::api::external::Ipv4Net( + external_address, + ), + ), + first_port: snat_cfg.first_port.into(), + last_port: snat_cfg.last_port.into(), + sled_address: sled_address.into(), + vni: nexus_db_model::Vni(nic.vni), + mac: nexus_db_model::MacAddr(nic.mac), + }; + + // Append ipv4 nat entry + ipv4_nat_values.push(nat_value); + ntp_count += 1; + } + OmicronZoneType::Nexus { nic, external_ip, .. } => { + let external_ip = match external_ip { + IpAddr::V4(addr) => addr, + IpAddr::V6(_) => { + error!( + &log, + "ipv6 addresses for service zone nat not implemented"; + ); + continue; + } + }; + + let external_address = + ipnetwork::Ipv4Network::new(external_ip, 32) + .unwrap(); + + let nat_value = Ipv4NatValues { + external_address: nexus_db_model::Ipv4Net( + omicron_common::api::external::Ipv4Net( + external_address, + ), + ), + first_port: MIN_PORT.into(), + last_port: MAX_PORT.into(), + sled_address: sled_address.into(), + vni: nexus_db_model::Vni(nic.vni), + mac: nexus_db_model::MacAddr(nic.mac), + }; + + // Append ipv4 nat entry + ipv4_nat_values.push(nat_value); + nexus_count += 1; + }, + OmicronZoneType::ExternalDns { nic, dns_address, .. } => { + let socket_addr: SocketAddr = match dns_address.parse() { + Ok(value) => value, + Err(e) => { + error!( + &log, + "failed to parse value into socketaddr"; + "value" => dns_address, + "error" => ?e, + ); + continue; + } + }; + let external_ip = match socket_addr { + SocketAddr::V4(v4) => { + *v4.ip() + }, + SocketAddr::V6(_) => { + error!( + &log, + "ipv6 addresses for service zone nat not implemented"; + ); + continue; + }, + }; + + let external_address = + ipnetwork::Ipv4Network::new(external_ip, 32) + .unwrap(); + + let nat_value = Ipv4NatValues { + external_address: nexus_db_model::Ipv4Net( + omicron_common::api::external::Ipv4Net( + external_address, + ), + ), + first_port: MIN_PORT.into(), + last_port: MAX_PORT.into(), + sled_address: sled_address.into(), + vni: nexus_db_model::Vni(nic.vni), + mac: nexus_db_model::MacAddr(nic.mac), + }; + + // Append ipv4 nat entry + ipv4_nat_values.push(nat_value); + dns_count += 1; + }, + // we explictly list all cases instead of using a wildcard, + // that way if someone adds a new type to OmicronZoneType that + // requires NAT, they must come here to update this logic as + // well + OmicronZoneType::Clickhouse {..} => continue, + OmicronZoneType::ClickhouseKeeper {..} => continue, + OmicronZoneType::CockroachDb {..} => continue, + OmicronZoneType::Crucible {..} => continue, + OmicronZoneType::CruciblePantry {..} => continue, + OmicronZoneType::InternalNtp {..} => continue, + OmicronZoneType::InternalDns {..} => continue, + OmicronZoneType::Oximeter { ..} => continue, + } + } + } + + // if we make it this far this should not be empty: + // * nexus is running so we should at least have generated a nat value for it + // * nexus requies other services zones that require nat to come up first + if ipv4_nat_values.is_empty() { + error!( + &log, + "nexus is running but no service zone nat values could be generated from inventory"; + ); + return json!({ + "error": "nexus is running but no service zone nat values could be generated from inventory" + }); + } + + if dns_count < MIN_EXTERNAL_DNS_COUNT { + error!( + &log, + "generated config for fewer than the minimum allowed number of dns zones"; + ); + return json!({ + "error": "generated config for fewer than the minimum allowed number of dns zones" + }); + } + + if ntp_count < MIN_NTP_COUNT { + error!( + &log, + "generated config for fewer than the minimum allowed number of ntp zones"; + ); + return json!({ + "error": "generated config for fewer than the minimum allowed number of ntp zones" + + }); + } + + if nexus_count < MIN_NEXUS_COUNT { + error!( + &log, + "generated config for fewer than the minimum allowed number of nexus zones"; + ); + return json!({ + "error": "generated config for fewer than the minimum allowed number of nexus zones" + + }); + } + + // reconcile service zone nat entries + let result = match self.datastore.ipv4_nat_sync_service_zones(opctx, &ipv4_nat_values).await { + Ok(num) => num, + Err(e) => { + error!( + &log, + "failed to update service zone nat records"; + "error" => format!("{:#}", e) + ); + return json!({ + "error": + format!( + "failed to update service zone nat records: \ + {:#}", + e + ) + }); + }, + }; + + // notify dpd if we've added any new records + if result > 0 { + for client in &self.dpd_clients { + if let Err(e) = client.ipv4_nat_trigger_update().await { + error!( + &log, + "failed to trigger dpd rpw workflow"; + "error" => ?e + ); + }; + } + } + + let rv = serde_json::to_value(&result).unwrap_or_else(|error| { + json!({ + "error": + format!( + "failed to serialize final value: {:#}", + error + ) + }) + }); + + rv + } + .boxed() + } +} diff --git a/nexus/tests/config.test.toml b/nexus/tests/config.test.toml index a4436234f0..476b8fe6c8 100644 --- a/nexus/tests/config.test.toml +++ b/nexus/tests/config.test.toml @@ -99,6 +99,7 @@ inventory.nkeep = 3 # Disable inventory collection altogether (for emergencies) inventory.disable = false phantom_disks.period_secs = 30 +sync_service_zone_nat.period_secs = 30 [default_region_allocation_strategy] # we only have one sled in the test environment, so we need to use the diff --git a/schema/crdb/29.0.0/up1.sql b/schema/crdb/29.0.0/up1.sql new file mode 100644 index 0000000000..a213380944 --- /dev/null +++ b/schema/crdb/29.0.0/up1.sql @@ -0,0 +1,14 @@ +CREATE INDEX IF NOT EXISTS ipv4_nat_lookup_by_vni ON omicron.public.ipv4_nat_entry ( + vni +) +STORING ( + external_address, + first_port, + last_port, + sled_address, + mac, + version_added, + version_removed, + time_created, + time_deleted +); diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index 86d1340379..6ff92acfa4 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -3383,6 +3383,21 @@ CREATE TABLE IF NOT EXISTS omicron.public.db_metadata ( ALTER TABLE omicron.public.switch_port_settings_link_config ADD COLUMN IF NOT EXISTS autoneg BOOL NOT NULL DEFAULT false; +CREATE INDEX IF NOT EXISTS ipv4_nat_lookup_by_vni ON omicron.public.ipv4_nat_entry ( + vni +) +STORING ( + external_address, + first_port, + last_port, + sled_address, + mac, + version_added, + version_removed, + time_created, + time_deleted +); + INSERT INTO omicron.public.db_metadata ( singleton, time_created, @@ -3390,7 +3405,7 @@ INSERT INTO omicron.public.db_metadata ( version, target_version ) VALUES - ( TRUE, NOW(), NOW(), '28.0.0', NULL) + ( TRUE, NOW(), NOW(), '29.0.0', NULL) ON CONFLICT DO NOTHING; COMMIT; diff --git a/smf/nexus/multi-sled/config-partial.toml b/smf/nexus/multi-sled/config-partial.toml index d330f32ab6..d84bf8d4b0 100644 --- a/smf/nexus/multi-sled/config-partial.toml +++ b/smf/nexus/multi-sled/config-partial.toml @@ -47,6 +47,7 @@ inventory.nkeep = 3 # Disable inventory collection altogether (for emergencies) inventory.disable = false phantom_disks.period_secs = 30 +sync_service_zone_nat.period_secs = 30 [default_region_allocation_strategy] # by default, allocate across 3 distinct sleds diff --git a/smf/nexus/single-sled/config-partial.toml b/smf/nexus/single-sled/config-partial.toml index cbd4851613..01206655f0 100644 --- a/smf/nexus/single-sled/config-partial.toml +++ b/smf/nexus/single-sled/config-partial.toml @@ -47,6 +47,7 @@ inventory.nkeep = 3 # Disable inventory collection altogether (for emergencies) inventory.disable = false phantom_disks.period_secs = 30 +sync_service_zone_nat.period_secs = 30 [default_region_allocation_strategy] # by default, allocate without requirement for distinct sleds. From c91421a5693fb05e5308a051567f866551805f10 Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Fri, 26 Jan 2024 17:50:56 -0800 Subject: [PATCH 41/49] Update Rust crate sqlparser to 0.43.1 (#4902) --- Cargo.lock | 4 ++-- Cargo.toml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index b2815d9a1f..7d7a8422c2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8379,9 +8379,9 @@ dependencies = [ [[package]] name = "sqlparser" -version = "0.41.0" +version = "0.43.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5cc2c25a6c66789625ef164b4c7d2e548d627902280c13710d33da8222169964" +checksum = "f95c4bae5aba7cd30bd506f7140026ade63cff5afd778af8854026f9606bf5d4" dependencies = [ "log", "sqlparser_derive", diff --git a/Cargo.toml b/Cargo.toml index 47c412b9b5..1192806d15 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -359,7 +359,7 @@ sp-sim = { path = "sp-sim" } sprockets-common = { git = "http://github.com/oxidecomputer/sprockets", rev = "77df31efa5619d0767ffc837ef7468101608aee9" } sprockets-host = { git = "http://github.com/oxidecomputer/sprockets", rev = "77df31efa5619d0767ffc837ef7468101608aee9" } sprockets-rot = { git = "http://github.com/oxidecomputer/sprockets", rev = "77df31efa5619d0767ffc837ef7468101608aee9" } -sqlparser = { version = "0.41.0", features = [ "visitor" ] } +sqlparser = { version = "0.43.1", features = [ "visitor" ] } static_assertions = "1.1.0" # Please do not change the Steno version to a Git dependency. It makes it # harder than expected to make breaking changes (even if you specify a specific From bd0ac96aa0afc6d89e46b4673c1194cb4dce615c Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Sat, 27 Jan 2024 05:28:14 +0000 Subject: [PATCH 42/49] Update taiki-e/install-action digest to bee85d7 (#4908) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This PR contains the following updates: | Package | Type | Update | Change | |---|---|---|---| | [taiki-e/install-action](https://togithub.com/taiki-e/install-action) | action | digest | [`1f501f0` -> `bee85d7`](https://togithub.com/taiki-e/install-action/compare/1f501f0...bee85d7) | --- ### Configuration 📅 **Schedule**: Branch creation - "after 8pm,before 6am" in timezone America/Los_Angeles, Automerge - "after 8pm,before 6am" in timezone America/Los_Angeles. 🚦 **Automerge**: Enabled. ♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox. 🔕 **Ignore**: Close this PR and you won't be reminded about this update again. --- - [ ] If you want to rebase/retry this PR, check this box --- This PR has been generated by [Renovate Bot](https://togithub.com/renovatebot/renovate). Co-authored-by: oxide-renovate[bot] <146848827+oxide-renovate[bot]@users.noreply.github.com> --- .github/workflows/hakari.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/hakari.yml b/.github/workflows/hakari.yml index d4a4a4750c..85aa0ab7f4 100644 --- a/.github/workflows/hakari.yml +++ b/.github/workflows/hakari.yml @@ -24,7 +24,7 @@ jobs: with: toolchain: stable - name: Install cargo-hakari - uses: taiki-e/install-action@1f501f091c4240a626be17b7496626f8f0cf979a # v2 + uses: taiki-e/install-action@bee85d7ea77c01f7a403c22ac2c802b431b093df # v2 with: tool: cargo-hakari - name: Check workspace-hack Cargo.toml is up-to-date From c2b90bc664ee957a3956a965b30f57413cde71e9 Mon Sep 17 00:00:00 2001 From: Nils Nieuwejaar Date: Sat, 27 Jan 2024 00:31:58 -0500 Subject: [PATCH 43/49] Update dendrite commit (#4907) Update progenitor to get clippy fix Preserve switch zone logs in CI --- .github/buildomat/jobs/deploy.sh | 1 + Cargo.lock | 8 ++++---- package-manifest.toml | 12 ++++++------ tools/dendrite_openapi_version | 4 ++-- tools/dendrite_stub_checksums | 6 +++--- 5 files changed, 16 insertions(+), 15 deletions(-) diff --git a/.github/buildomat/jobs/deploy.sh b/.github/buildomat/jobs/deploy.sh index e69cfb0078..5e43ff7f7c 100755 --- a/.github/buildomat/jobs/deploy.sh +++ b/.github/buildomat/jobs/deploy.sh @@ -5,6 +5,7 @@ #: target = "lab-2.0-opte-0.28" #: output_rules = [ #: "%/var/svc/log/oxide-sled-agent:default.log*", +#: "%/zone/oxz_*/root/var/svc/log/oxide-*.log*", #: "%/pool/ext/*/crypt/zone/oxz_*/root/var/svc/log/oxide-*.log*", #: "%/pool/ext/*/crypt/zone/oxz_*/root/var/svc/log/system-illumos-*.log*", #: "%/pool/ext/*/crypt/zone/oxz_ntp_*/root/var/log/chrony/*.log*", diff --git a/Cargo.lock b/Cargo.lock index 7d7a8422c2..5309fac767 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6414,7 +6414,7 @@ dependencies = [ [[package]] name = "progenitor" version = "0.5.0" -source = "git+https://github.com/oxidecomputer/progenitor?branch=main#2d3b9d0eb50a1907974c0b0ba7ee7893425b3e79" +source = "git+https://github.com/oxidecomputer/progenitor?branch=main#86b60220b88a2ca3629fb87acf8f83ff35f63aaa" dependencies = [ "progenitor-client", "progenitor-impl", @@ -6425,7 +6425,7 @@ dependencies = [ [[package]] name = "progenitor-client" version = "0.5.0" -source = "git+https://github.com/oxidecomputer/progenitor?branch=main#2d3b9d0eb50a1907974c0b0ba7ee7893425b3e79" +source = "git+https://github.com/oxidecomputer/progenitor?branch=main#86b60220b88a2ca3629fb87acf8f83ff35f63aaa" dependencies = [ "bytes", "futures-core", @@ -6439,7 +6439,7 @@ dependencies = [ [[package]] name = "progenitor-impl" version = "0.5.0" -source = "git+https://github.com/oxidecomputer/progenitor?branch=main#2d3b9d0eb50a1907974c0b0ba7ee7893425b3e79" +source = "git+https://github.com/oxidecomputer/progenitor?branch=main#86b60220b88a2ca3629fb87acf8f83ff35f63aaa" dependencies = [ "getopts", "heck 0.4.1", @@ -6461,7 +6461,7 @@ dependencies = [ [[package]] name = "progenitor-macro" version = "0.5.0" -source = "git+https://github.com/oxidecomputer/progenitor?branch=main#2d3b9d0eb50a1907974c0b0ba7ee7893425b3e79" +source = "git+https://github.com/oxidecomputer/progenitor?branch=main#86b60220b88a2ca3629fb87acf8f83ff35f63aaa" dependencies = [ "openapiv3", "proc-macro2", diff --git a/package-manifest.toml b/package-manifest.toml index f574f1ff5d..b08457e46c 100644 --- a/package-manifest.toml +++ b/package-manifest.toml @@ -497,8 +497,8 @@ only_for_targets.image = "standard" # 2. Copy dendrite.tar.gz from dendrite/out to omicron/out source.type = "prebuilt" source.repo = "dendrite" -source.commit = "1c2f91a493c8b3c5fb7b853c570b2901ac3c22a7" -source.sha256 = "052d97370515189465e4e835edb4a2d7e1e0b55ace0230ba18f045a03d975e80" +source.commit = "fd159136c552d8b4ec4d49dd9bae7e38f6a636e6" +source.sha256 = "1e24598ba77dc00682cdf54fc370696ef5aa49ed510ab7f72fcc91d61d679e7b" output.type = "zone" output.intermediate_only = true @@ -522,8 +522,8 @@ only_for_targets.image = "standard" # 2. Copy the output zone image from dendrite/out to omicron/out source.type = "prebuilt" source.repo = "dendrite" -source.commit = "1c2f91a493c8b3c5fb7b853c570b2901ac3c22a7" -source.sha256 = "3ebc1ee37c4d7a0657a78abbaad2fe81570da88128505bfdc4ea47e3e05c6277" +source.commit = "fd159136c552d8b4ec4d49dd9bae7e38f6a636e6" +source.sha256 = "720df8aff3aaa0f8a86ec606089ebf8b5068d7f3c243bd4c868b96ef72d13485" output.type = "zone" output.intermediate_only = true @@ -540,8 +540,8 @@ only_for_targets.image = "standard" # 2. Copy dendrite.tar.gz from dendrite/out to omicron/out/dendrite-softnpu.tar.gz source.type = "prebuilt" source.repo = "dendrite" -source.commit = "1c2f91a493c8b3c5fb7b853c570b2901ac3c22a7" -source.sha256 = "18079b2ce1003facb476e28499f2e31ebe092510ecd6c685fa1a91f1a34f2dda" +source.commit = "fd159136c552d8b4ec4d49dd9bae7e38f6a636e6" +source.sha256 = "5e34a10d9dca6c94f96075140d42b755dee1f5e6a3485fc239b12e12b89a30c5" output.type = "zone" output.intermediate_only = true diff --git a/tools/dendrite_openapi_version b/tools/dendrite_openapi_version index 6bda68c69d..56bcb2d9ff 100644 --- a/tools/dendrite_openapi_version +++ b/tools/dendrite_openapi_version @@ -1,2 +1,2 @@ -COMMIT="1c2f91a493c8b3c5fb7b853c570b2901ac3c22a7" -SHA2="07d115bfa8498a8015ca2a8447efeeac32e24aeb25baf3d5e2313216e11293c0" +COMMIT="fd159136c552d8b4ec4d49dd9bae7e38f6a636e6" +SHA2="e8f73a83d5c62f7efce998f821acc80a91b7995c95bd9ec2c228372829310099" diff --git a/tools/dendrite_stub_checksums b/tools/dendrite_stub_checksums index de183cb496..497ce5c010 100644 --- a/tools/dendrite_stub_checksums +++ b/tools/dendrite_stub_checksums @@ -1,3 +1,3 @@ -CIDL_SHA256_ILLUMOS="052d97370515189465e4e835edb4a2d7e1e0b55ace0230ba18f045a03d975e80" -CIDL_SHA256_LINUX_DPD="5c8bc252818897bc552a039f2423eb668d99e19ef54374644412c7aca533f94e" -CIDL_SHA256_LINUX_SWADM="9d549fc3ebaf392961404b50e802ccb5e81e41e779ecc46166d49e5fb44b524f" +CIDL_SHA256_ILLUMOS="1e24598ba77dc00682cdf54fc370696ef5aa49ed510ab7f72fcc91d61d679e7b" +CIDL_SHA256_LINUX_DPD="4fc43b53a048264664ede64805d4d179ec32d50cf9ab1aaa0fa4e17190e511a2" +CIDL_SHA256_LINUX_SWADM="0ab34a2063e68568aa064f7b71825a603d47b3e399f3e7f45976edb5d5283f0f" From 3ee0afd751a4175dc0855c91b5f0dfb7c7c21dc9 Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Mon, 29 Jan 2024 12:11:53 -0500 Subject: [PATCH 44/49] SP versions v1.0.6 (#4916) --- tools/hubris_checksums | 14 +++++++------- tools/hubris_version | 2 +- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/tools/hubris_checksums b/tools/hubris_checksums index 478d8f192e..8dc282e500 100644 --- a/tools/hubris_checksums +++ b/tools/hubris_checksums @@ -1,7 +1,7 @@ -6567a0775d5f0b7ff09d97f149532a627222971eadd89ea0dac186c9a825846d build-gimlet-c-image-default-v1.0.5.zip -1190b27246d8c8c20837d957266ac9e90e32934841b9acc2990d2762a3b53a16 build-gimlet-d-image-default-v1.0.5.zip -79e644ffbbd7195ff2699c90ee26f277edac40b385fc5bb8e7821a4611ad7c11 build-gimlet-e-image-default-v1.0.5.zip -bf83e0311e18fc716dd5a315106aa965d278c4f481892fe124bc376b2e23581e build-psc-b-image-default-v1.0.5.zip -0dd1de9c3d3c686e8a05525fbed48c6532b608b34c77214b7fe15a8f54b0f3cb build-psc-c-image-default-v1.0.5.zip -c024d5546288d0d953735b3a0221ee0e218cc27ed1e26eede5c91c9a8137c592 build-sidecar-b-image-default-v1.0.5.zip -de79320022718be94c81dc7d44b5229ce0956aff9c1ffa11e8c3ff8961af49bb build-sidecar-c-image-default-v1.0.5.zip +e1b3dc5c7da643b27c0dd5bf8e915d13661446e711bfdeb1d8274eed63fa5843 build-gimlet-c-image-default-v1.0.6.zip +3002444307047429531ef862435a034c64b89a698921bf19794ac97b777a2f95 build-gimlet-d-image-default-v1.0.6.zip +9e783bc92fb1c8a91f4b117241ed4c0ff2818f32f46c5193cdcdbbe02d56af9a build-gimlet-e-image-default-v1.0.6.zip +dece7d39f7fcd2f15dc62d91e94046b1f438a3e0fd2c804efd5f67e12ce0dd58 build-psc-b-image-default-v1.0.6.zip +7e94035b52f1dcb137b477750bf9e215d4fcd07fe95b2cfdbbc0d7fada79eb28 build-psc-c-image-default-v1.0.6.zip +ccf09dc7c9c2a946b89bcfafb391100504880fa395c9079dfb7a3b28635a4abb build-sidecar-b-image-default-v1.0.6.zip +b5d91c212f813dbdba06c1f5b098fd37fe6cb93fe33fd3c58325cb6504dc6d05 build-sidecar-c-image-default-v1.0.6.zip diff --git a/tools/hubris_version b/tools/hubris_version index 37e565d060..f2c1e74f2b 100644 --- a/tools/hubris_version +++ b/tools/hubris_version @@ -1 +1 @@ -TAGS=(gimlet-v1.0.5 psc-v1.0.5 sidecar-v1.0.5) +TAGS=(gimlet-v1.0.6 psc-v1.0.6 sidecar-v1.0.6) From 45df2e6ce738eba9a66e2f885bdc509d8932834b Mon Sep 17 00:00:00 2001 From: Alan Hanson Date: Mon, 29 Jan 2024 09:47:55 -0800 Subject: [PATCH 45/49] Update crucible and propolis versions (#4912) Crucible changes Remove a superfluous copy during write serialization (#1087) Update to progenitor v0.5.0, pull in required Omicron updates (#1115) Update usdt to v0.5.0 (#1116) Do not panic on reinitialize of a downstairs client. (#1114) Bump (tracing-)opentelemetry(-jaeger) (#1113) Make the Guest -> Upstairs queue fully async (#1086) Switch to per-block ownership (#1107) Handle timeout in the client IO task (#1109) Enforce buffer alignment (#1106) Block size buffers (#1105) New dtrace probes and a counter struct in the Upstairs. (#1104) Implement read decryption offloading (#1089) Remove Arc + Mutex from Buffer (#1094) Comment cleanup and rename of DsState::Repair -> Reconcile (#1102) do not panic the dynamometer for OOB writes (#1101) Allow dsc to start the downstairs in read-only mode. (#1098) Use the omicron-zone-package methods for topo sorting (#1099) Package with topological sorting (#1097) Fix clippy lints in dsc (#1095) Propolis changes: PHD: demote artifact store logs to DEBUG, enable DEBUG on CI (#626) PHD: fix missing newlines in serial.log (#622) PHD: fix run_shell_command with multiline commands (#621) PHD: fix `--artifact-directory` not doing anything (#618) Update h2 dependency Update Crucible (and Omicron) dependencies PHD: refactor guest serial console handling (#615) phd: add basic "migration-from-base" tests + machinery (#609) phd: Ensure min disk size fits read-only parents (#611) phd: automatically fetch `crucible-downstairs` from Buildomat (#604) Mitigate behavior from illumos#16183 PHD: add guest adapter for WS2022 (#607) phd: include error cause chain in failure output (#606) add QEMU pvpanic ISA device (#596) Add crucible-mem backend Make crucible opt parsing more terse in standalone Co-authored-by: Alan Hanson --- Cargo.lock | 164 +++++++++++++++++++++++++++--------------- Cargo.toml | 12 ++-- package-manifest.toml | 12 ++-- 3 files changed, 119 insertions(+), 69 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5309fac767..a058462468 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -461,7 +461,7 @@ dependencies = [ [[package]] name = "bhyve_api" version = "0.0.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=1e25649e8c2ac274bd04adfe0513dd14a482058c#1e25649e8c2ac274bd04adfe0513dd14a482058c" +source = "git+https://github.com/oxidecomputer/propolis?rev=ff6c4df2e816eee6e7b2b0488777d30ef35ee217#ff6c4df2e816eee6e7b2b0488777d30ef35ee217" dependencies = [ "bhyve_api_sys", "libc", @@ -471,7 +471,7 @@ dependencies = [ [[package]] name = "bhyve_api_sys" version = "0.0.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=1e25649e8c2ac274bd04adfe0513dd14a482058c#1e25649e8c2ac274bd04adfe0513dd14a482058c" +source = "git+https://github.com/oxidecomputer/propolis?rev=ff6c4df2e816eee6e7b2b0488777d30ef35ee217#ff6c4df2e816eee6e7b2b0488777d30ef35ee217" dependencies = [ "libc", "strum", @@ -638,7 +638,7 @@ dependencies = [ "ipnetwork", "omicron-common", "omicron-workspace-hack", - "progenitor", + "progenitor 0.5.0 (git+https://github.com/oxidecomputer/progenitor?branch=main)", "regress", "reqwest", "schemars", @@ -1294,13 +1294,13 @@ dependencies = [ [[package]] name = "crucible-agent-client" version = "0.0.1" -source = "git+https://github.com/oxidecomputer/crucible?rev=e71b10d2f9f1fb52818b916bae83ba15a339548d#e71b10d2f9f1fb52818b916bae83ba15a339548d" +source = "git+https://github.com/oxidecomputer/crucible?rev=2d4bc11232d53f177c286383926fa5f8c1b2a938#2d4bc11232d53f177c286383926fa5f8c1b2a938" dependencies = [ "anyhow", "chrono", "crucible-workspace-hack", "percent-encoding", - "progenitor", + "progenitor 0.5.0 (git+https://github.com/oxidecomputer/progenitor?branch=main)", "reqwest", "schemars", "serde", @@ -1310,13 +1310,13 @@ dependencies = [ [[package]] name = "crucible-pantry-client" version = "0.0.1" -source = "git+https://github.com/oxidecomputer/crucible?rev=e71b10d2f9f1fb52818b916bae83ba15a339548d#e71b10d2f9f1fb52818b916bae83ba15a339548d" +source = "git+https://github.com/oxidecomputer/crucible?rev=2d4bc11232d53f177c286383926fa5f8c1b2a938#2d4bc11232d53f177c286383926fa5f8c1b2a938" dependencies = [ "anyhow", "chrono", "crucible-workspace-hack", "percent-encoding", - "progenitor", + "progenitor 0.5.0 (git+https://github.com/oxidecomputer/progenitor?branch=main)", "reqwest", "schemars", "serde", @@ -1327,7 +1327,7 @@ dependencies = [ [[package]] name = "crucible-smf" version = "0.0.0" -source = "git+https://github.com/oxidecomputer/crucible?rev=e71b10d2f9f1fb52818b916bae83ba15a339548d#e71b10d2f9f1fb52818b916bae83ba15a339548d" +source = "git+https://github.com/oxidecomputer/crucible?rev=2d4bc11232d53f177c286383926fa5f8c1b2a938#2d4bc11232d53f177c286383926fa5f8c1b2a938" dependencies = [ "crucible-workspace-hack", "libc", @@ -1530,8 +1530,8 @@ dependencies = [ "omicron-common", "omicron-workspace-hack", "omicron-zone-package", - "progenitor", - "progenitor-client", + "progenitor 0.5.0 (git+https://github.com/oxidecomputer/progenitor?branch=main)", + "progenitor-client 0.5.0 (git+https://github.com/oxidecomputer/progenitor?branch=main)", "quote", "reqwest", "rustfmt-wrapper", @@ -1852,7 +1852,7 @@ dependencies = [ "chrono", "http 0.2.11", "omicron-workspace-hack", - "progenitor", + "progenitor 0.5.0 (git+https://github.com/oxidecomputer/progenitor?branch=main)", "reqwest", "schemars", "serde", @@ -1906,8 +1906,8 @@ dependencies = [ "ipnetwork", "omicron-workspace-hack", "omicron-zone-package", - "progenitor", - "progenitor-client", + "progenitor 0.5.0 (git+https://github.com/oxidecomputer/progenitor?branch=main)", + "progenitor-client 0.5.0 (git+https://github.com/oxidecomputer/progenitor?branch=main)", "quote", "rand 0.8.5", "regress", @@ -2545,7 +2545,7 @@ dependencies = [ "chrono", "gateway-messages", "omicron-workspace-hack", - "progenitor", + "progenitor 0.5.0 (git+https://github.com/oxidecomputer/progenitor?branch=main)", "rand 0.8.5", "reqwest", "schemars", @@ -3457,7 +3457,7 @@ version = "0.1.0" dependencies = [ "installinator-common", "omicron-workspace-hack", - "progenitor", + "progenitor 0.5.0 (git+https://github.com/oxidecomputer/progenitor?branch=main)", "regress", "reqwest", "schemars", @@ -3537,7 +3537,7 @@ dependencies = [ "omicron-common", "omicron-test-utils", "omicron-workspace-hack", - "progenitor", + "progenitor 0.5.0 (git+https://github.com/oxidecomputer/progenitor?branch=main)", "reqwest", "serde", "serde_json", @@ -4042,8 +4042,8 @@ dependencies = [ "omicron-common", "omicron-workspace-hack", "omicron-zone-package", - "progenitor", - "progenitor-client", + "progenitor 0.5.0 (git+https://github.com/oxidecomputer/progenitor?branch=main)", + "progenitor-client 0.5.0 (git+https://github.com/oxidecomputer/progenitor?branch=main)", "quote", "reqwest", "rustfmt-wrapper", @@ -4212,7 +4212,7 @@ dependencies = [ "omicron-common", "omicron-passwords", "omicron-workspace-hack", - "progenitor", + "progenitor 0.5.0 (git+https://github.com/oxidecomputer/progenitor?branch=main)", "regress", "reqwest", "schemars", @@ -4780,7 +4780,7 @@ dependencies = [ "omicron-workspace-hack", "once_cell", "parse-display", - "progenitor", + "progenitor 0.5.0 (git+https://github.com/oxidecomputer/progenitor?branch=main)", "proptest", "rand 0.8.5", "regress", @@ -4880,7 +4880,7 @@ dependencies = [ "signal-hook", "signal-hook-tokio", "slog", - "slog-dtrace 0.3.0", + "slog-dtrace", "slog-error-chain", "sp-sim", "subprocess", @@ -4968,7 +4968,7 @@ dependencies = [ "petgraph", "pq-sys", "pretty_assertions", - "progenitor-client", + "progenitor-client 0.5.0 (git+https://github.com/oxidecomputer/progenitor?branch=main)", "propolis-client", "rand 0.8.5", "rcgen", @@ -4989,7 +4989,7 @@ dependencies = [ "sled-agent-client", "slog", "slog-async", - "slog-dtrace 0.3.0", + "slog-dtrace", "slog-error-chain", "slog-term", "sp-sim", @@ -5183,7 +5183,7 @@ dependencies = [ "sled-storage", "slog", "slog-async", - "slog-dtrace 0.3.0", + "slog-dtrace", "slog-term", "smf", "static_assertions", @@ -5558,7 +5558,7 @@ dependencies = [ "http 0.2.11", "hyper 0.14.27", "omicron-workspace-hack", - "progenitor", + "progenitor 0.5.0 (git+https://github.com/oxidecomputer/progenitor?branch=main)", "rand 0.8.5", "regress", "reqwest", @@ -5614,7 +5614,7 @@ dependencies = [ "futures", "omicron-common", "omicron-workspace-hack", - "progenitor", + "progenitor 0.5.0 (git+https://github.com/oxidecomputer/progenitor?branch=main)", "reqwest", "serde", "slog", @@ -5651,7 +5651,7 @@ dependencies = [ "serde_json", "slog", "slog-async", - "slog-dtrace 0.3.0", + "slog-dtrace", "slog-term", "strum", "subprocess", @@ -5691,7 +5691,7 @@ dependencies = [ "serde_json", "slog", "slog-async", - "slog-dtrace 0.3.0", + "slog-dtrace", "slog-term", "sqlformat", "sqlparser", @@ -5750,7 +5750,7 @@ dependencies = [ "schemars", "serde", "slog", - "slog-dtrace 0.3.0", + "slog-dtrace", "thiserror", "tokio", "uuid", @@ -6416,9 +6416,20 @@ name = "progenitor" version = "0.5.0" source = "git+https://github.com/oxidecomputer/progenitor?branch=main#86b60220b88a2ca3629fb87acf8f83ff35f63aaa" dependencies = [ - "progenitor-client", - "progenitor-impl", - "progenitor-macro", + "progenitor-client 0.5.0 (git+https://github.com/oxidecomputer/progenitor?branch=main)", + "progenitor-impl 0.5.0 (git+https://github.com/oxidecomputer/progenitor?branch=main)", + "progenitor-macro 0.5.0 (git+https://github.com/oxidecomputer/progenitor?branch=main)", + "serde_json", +] + +[[package]] +name = "progenitor" +version = "0.5.0" +source = "git+https://github.com/oxidecomputer/progenitor#86b60220b88a2ca3629fb87acf8f83ff35f63aaa" +dependencies = [ + "progenitor-client 0.5.0 (git+https://github.com/oxidecomputer/progenitor)", + "progenitor-impl 0.5.0 (git+https://github.com/oxidecomputer/progenitor)", + "progenitor-macro 0.5.0 (git+https://github.com/oxidecomputer/progenitor)", "serde_json", ] @@ -6436,6 +6447,20 @@ dependencies = [ "serde_urlencoded", ] +[[package]] +name = "progenitor-client" +version = "0.5.0" +source = "git+https://github.com/oxidecomputer/progenitor#86b60220b88a2ca3629fb87acf8f83ff35f63aaa" +dependencies = [ + "bytes", + "futures-core", + "percent-encoding", + "reqwest", + "serde", + "serde_json", + "serde_urlencoded", +] + [[package]] name = "progenitor-impl" version = "0.5.0" @@ -6458,6 +6483,28 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "progenitor-impl" +version = "0.5.0" +source = "git+https://github.com/oxidecomputer/progenitor#86b60220b88a2ca3629fb87acf8f83ff35f63aaa" +dependencies = [ + "getopts", + "heck 0.4.1", + "http 0.2.11", + "indexmap 2.1.0", + "openapiv3", + "proc-macro2", + "quote", + "regex", + "schemars", + "serde", + "serde_json", + "syn 2.0.48", + "thiserror", + "typify", + "unicode-ident", +] + [[package]] name = "progenitor-macro" version = "0.5.0" @@ -6465,7 +6512,24 @@ source = "git+https://github.com/oxidecomputer/progenitor?branch=main#86b60220b8 dependencies = [ "openapiv3", "proc-macro2", - "progenitor-impl", + "progenitor-impl 0.5.0 (git+https://github.com/oxidecomputer/progenitor?branch=main)", + "quote", + "schemars", + "serde", + "serde_json", + "serde_tokenstream 0.2.0", + "serde_yaml", + "syn 2.0.48", +] + +[[package]] +name = "progenitor-macro" +version = "0.5.0" +source = "git+https://github.com/oxidecomputer/progenitor#86b60220b88a2ca3629fb87acf8f83ff35f63aaa" +dependencies = [ + "openapiv3", + "proc-macro2", + "progenitor-impl 0.5.0 (git+https://github.com/oxidecomputer/progenitor)", "quote", "schemars", "serde", @@ -6478,12 +6542,12 @@ dependencies = [ [[package]] name = "propolis-client" version = "0.1.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=1e25649e8c2ac274bd04adfe0513dd14a482058c#1e25649e8c2ac274bd04adfe0513dd14a482058c" +source = "git+https://github.com/oxidecomputer/propolis?rev=ff6c4df2e816eee6e7b2b0488777d30ef35ee217#ff6c4df2e816eee6e7b2b0488777d30ef35ee217" dependencies = [ "async-trait", "base64", "futures", - "progenitor", + "progenitor 0.5.0 (git+https://github.com/oxidecomputer/progenitor)", "rand 0.8.5", "reqwest", "schemars", @@ -6499,7 +6563,7 @@ dependencies = [ [[package]] name = "propolis-mock-server" version = "0.0.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=1e25649e8c2ac274bd04adfe0513dd14a482058c#1e25649e8c2ac274bd04adfe0513dd14a482058c" +source = "git+https://github.com/oxidecomputer/propolis?rev=ff6c4df2e816eee6e7b2b0488777d30ef35ee217#ff6c4df2e816eee6e7b2b0488777d30ef35ee217" dependencies = [ "anyhow", "atty", @@ -6508,7 +6572,7 @@ dependencies = [ "dropshot", "futures", "hyper 0.14.27", - "progenitor", + "progenitor 0.5.0 (git+https://github.com/oxidecomputer/progenitor)", "propolis_types", "rand 0.8.5", "reqwest", @@ -6518,7 +6582,7 @@ dependencies = [ "slog", "slog-async", "slog-bunyan", - "slog-dtrace 0.2.3", + "slog-dtrace", "slog-term", "thiserror", "tokio", @@ -6529,7 +6593,7 @@ dependencies = [ [[package]] name = "propolis_types" version = "0.0.0" -source = "git+https://github.com/oxidecomputer/propolis?rev=1e25649e8c2ac274bd04adfe0513dd14a482058c#1e25649e8c2ac274bd04adfe0513dd14a482058c" +source = "git+https://github.com/oxidecomputer/propolis?rev=ff6c4df2e816eee6e7b2b0488777d30ef35ee217#ff6c4df2e816eee6e7b2b0488777d30ef35ee217" dependencies = [ "schemars", "serde", @@ -7990,7 +8054,7 @@ dependencies = [ "ipnetwork", "omicron-common", "omicron-workspace-hack", - "progenitor", + "progenitor 0.5.0 (git+https://github.com/oxidecomputer/progenitor?branch=main)", "regress", "reqwest", "schemars", @@ -8081,20 +8145,6 @@ dependencies = [ "time", ] -[[package]] -name = "slog-dtrace" -version = "0.2.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ebb79013d51afb48c5159d62068658fa672772be3aeeadee0d2710fb3903f637" -dependencies = [ - "chrono", - "serde", - "serde_json", - "slog", - "usdt 0.3.5", - "version_check", -] - [[package]] name = "slog-dtrace" version = "0.3.0" @@ -8307,7 +8357,7 @@ dependencies = [ "omicron-workspace-hack", "serde", "slog", - "slog-dtrace 0.3.0", + "slog-dtrace", "sprockets-rot", "thiserror", "tokio", @@ -10240,7 +10290,7 @@ dependencies = [ "sha2", "sled-hardware", "slog", - "slog-dtrace 0.3.0", + "slog-dtrace", "snafu", "subprocess", "tar", @@ -10269,7 +10319,7 @@ dependencies = [ "installinator-common", "ipnetwork", "omicron-workspace-hack", - "progenitor", + "progenitor 0.5.0 (git+https://github.com/oxidecomputer/progenitor?branch=main)", "regress", "reqwest", "schemars", diff --git a/Cargo.toml b/Cargo.toml index 1192806d15..d0738f9cd3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -178,9 +178,9 @@ cookie = "0.18" criterion = { version = "0.5.1", features = [ "async_tokio" ] } crossbeam = "0.8" crossterm = { version = "0.27.0", features = ["event-stream"] } -crucible-agent-client = { git = "https://github.com/oxidecomputer/crucible", rev = "e71b10d2f9f1fb52818b916bae83ba15a339548d" } -crucible-pantry-client = { git = "https://github.com/oxidecomputer/crucible", rev = "e71b10d2f9f1fb52818b916bae83ba15a339548d" } -crucible-smf = { git = "https://github.com/oxidecomputer/crucible", rev = "e71b10d2f9f1fb52818b916bae83ba15a339548d" } +crucible-agent-client = { git = "https://github.com/oxidecomputer/crucible", rev = "2d4bc11232d53f177c286383926fa5f8c1b2a938" } +crucible-pantry-client = { git = "https://github.com/oxidecomputer/crucible", rev = "2d4bc11232d53f177c286383926fa5f8c1b2a938" } +crucible-smf = { git = "https://github.com/oxidecomputer/crucible", rev = "2d4bc11232d53f177c286383926fa5f8c1b2a938" } curve25519-dalek = "4" datatest-stable = "0.2.3" display-error-chain = "0.2.0" @@ -304,9 +304,9 @@ prettyplease = "0.2.16" proc-macro2 = "1.0" progenitor = { git = "https://github.com/oxidecomputer/progenitor", branch = "main" } progenitor-client = { git = "https://github.com/oxidecomputer/progenitor", branch = "main" } -bhyve_api = { git = "https://github.com/oxidecomputer/propolis", rev = "1e25649e8c2ac274bd04adfe0513dd14a482058c" } -propolis-client = { git = "https://github.com/oxidecomputer/propolis", rev = "1e25649e8c2ac274bd04adfe0513dd14a482058c" } -propolis-mock-server = { git = "https://github.com/oxidecomputer/propolis", rev = "1e25649e8c2ac274bd04adfe0513dd14a482058c" } +bhyve_api = { git = "https://github.com/oxidecomputer/propolis", rev = "ff6c4df2e816eee6e7b2b0488777d30ef35ee217" } +propolis-client = { git = "https://github.com/oxidecomputer/propolis", rev = "ff6c4df2e816eee6e7b2b0488777d30ef35ee217" } +propolis-mock-server = { git = "https://github.com/oxidecomputer/propolis", rev = "ff6c4df2e816eee6e7b2b0488777d30ef35ee217" } proptest = "1.4.0" quote = "1.0" rand = "0.8.5" diff --git a/package-manifest.toml b/package-manifest.toml index b08457e46c..c34b84eb9d 100644 --- a/package-manifest.toml +++ b/package-manifest.toml @@ -405,10 +405,10 @@ only_for_targets.image = "standard" # 3. Use source.type = "manual" instead of "prebuilt" source.type = "prebuilt" source.repo = "crucible" -source.commit = "e71b10d2f9f1fb52818b916bae83ba15a339548d" +source.commit = "2d4bc11232d53f177c286383926fa5f8c1b2a938" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/crucible/image//crucible.sha256.txt -source.sha256 = "030a02551e487f561bcfad47426b953d15c4430d77770765c7fc03afd8d61bd9" +source.sha256 = "88ec93657a644e8f10a32d1d22cc027db901aea81027f49ce7bee58fc4a35755" output.type = "zone" [package.crucible-pantry] @@ -416,10 +416,10 @@ service_name = "crucible_pantry" only_for_targets.image = "standard" source.type = "prebuilt" source.repo = "crucible" -source.commit = "e71b10d2f9f1fb52818b916bae83ba15a339548d" +source.commit = "2d4bc11232d53f177c286383926fa5f8c1b2a938" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/crucible/image//crucible-pantry.sha256.txt -source.sha256 = "c74e23e7f7995ba3a69a9ec3a31f1db517ec15cd3a9942c2c07621b219b743b2" +source.sha256 = "e2c3ed2d4cd6b5da3d38dd52df6d4a259280be7d45c30a363e9c71b174ecc6f8" output.type = "zone" # Refer to @@ -430,10 +430,10 @@ service_name = "propolis-server" only_for_targets.image = "standard" source.type = "prebuilt" source.repo = "propolis" -source.commit = "1e25649e8c2ac274bd04adfe0513dd14a482058c" +source.commit = "ff6c4df2e816eee6e7b2b0488777d30ef35ee217" # The SHA256 digest is automatically posted to: # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/propolis/image//propolis-server.sha256.txt -source.sha256 = "09c124315da3e434c85fe1ddb16459c36d8302e15705ff18fe6bbc7b4876f5f9" +source.sha256 = "aa10aa245a92e657fc074bd588ef6bbddaad2d9c946a8e1b91c02dce7e057561" output.type = "zone" [package.mg-ddm-gz] From 17153dbbfd6363b082feecb69b35f0bcf8e6077f Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Mon, 29 Jan 2024 11:24:24 -0800 Subject: [PATCH 46/49] Update Rust crate serde_json to 1.0.113 (#4910) --- Cargo.lock | 4 ++-- Cargo.toml | 2 +- workspace-hack/Cargo.toml | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a058462468..78dc9039ab 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -7767,9 +7767,9 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.111" +version = "1.0.113" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "176e46fa42316f18edd598015a5166857fc835ec732f5215eac6b7bdbf0a84f4" +checksum = "69801b70b1c3dac963ecb03a364ba0ceda9cf60c71cfe475e99864759c8b8a79" dependencies = [ "itoa", "ryu", diff --git a/Cargo.toml b/Cargo.toml index d0738f9cd3..591cc143ac 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -332,7 +332,7 @@ semver = { version = "1.0.21", features = ["std", "serde"] } serde = { version = "1.0", default-features = false, features = [ "derive" ] } serde_derive = "1.0" serde_human_bytes = { git = "http://github.com/oxidecomputer/serde_human_bytes", branch = "main" } -serde_json = "1.0.111" +serde_json = "1.0.113" serde_path_to_error = "0.1.15" serde_tokenstream = "0.2" serde_urlencoded = "0.7.1" diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml index cebd4cab36..bf01830630 100644 --- a/workspace-hack/Cargo.toml +++ b/workspace-hack/Cargo.toml @@ -92,7 +92,7 @@ ring = { version = "0.17.7", features = ["std"] } schemars = { version = "0.8.16", features = ["bytes", "chrono", "uuid1"] } semver = { version = "1.0.21", features = ["serde"] } serde = { version = "1.0.195", features = ["alloc", "derive", "rc"] } -serde_json = { version = "1.0.111", features = ["raw_value", "unbounded_depth"] } +serde_json = { version = "1.0.113", features = ["raw_value", "unbounded_depth"] } sha2 = { version = "0.10.8", features = ["oid"] } similar = { version = "2.3.0", features = ["inline", "unicode"] } slog = { version = "2.7.0", features = ["dynamic-keys", "max_level_trace", "release_max_level_debug", "release_max_level_trace"] } @@ -200,7 +200,7 @@ ring = { version = "0.17.7", features = ["std"] } schemars = { version = "0.8.16", features = ["bytes", "chrono", "uuid1"] } semver = { version = "1.0.21", features = ["serde"] } serde = { version = "1.0.195", features = ["alloc", "derive", "rc"] } -serde_json = { version = "1.0.111", features = ["raw_value", "unbounded_depth"] } +serde_json = { version = "1.0.113", features = ["raw_value", "unbounded_depth"] } sha2 = { version = "0.10.8", features = ["oid"] } similar = { version = "2.3.0", features = ["inline", "unicode"] } slog = { version = "2.7.0", features = ["dynamic-keys", "max_level_trace", "release_max_level_debug", "release_max_level_trace"] } From 28c938f7b4bf6a24a71de65b0a94aefa8511ec5a Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Mon, 29 Jan 2024 11:24:41 -0800 Subject: [PATCH 47/49] Update Rust crate cargo_toml to 0.19 (#4914) --- Cargo.lock | 4 ++-- dev-tools/xtask/Cargo.toml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 78dc9039ab..410daa8a84 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -794,9 +794,9 @@ dependencies = [ [[package]] name = "cargo_toml" -version = "0.18.0" +version = "0.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "802b755090e39835a4b0440fb0bbee0df7495a8b337f63db21e616f7821c7e8c" +checksum = "922d6ea3081d68b9e3e09557204bff47f9b5406a4a304dc917e187f8cafd582b" dependencies = [ "serde", "toml 0.8.8", diff --git a/dev-tools/xtask/Cargo.toml b/dev-tools/xtask/Cargo.toml index bccb69a1f7..0429fcae79 100644 --- a/dev-tools/xtask/Cargo.toml +++ b/dev-tools/xtask/Cargo.toml @@ -7,6 +7,6 @@ license = "MPL-2.0" [dependencies] anyhow.workspace = true camino.workspace = true -cargo_toml = "0.18" +cargo_toml = "0.19" cargo_metadata = "0.18" clap.workspace = true From ab1ba613b5ec4d30b369543e3702dc8c4f735eaf Mon Sep 17 00:00:00 2001 From: "oxide-renovate[bot]" <146848827+oxide-renovate[bot]@users.noreply.github.com> Date: Mon, 29 Jan 2024 11:24:56 -0800 Subject: [PATCH 48/49] Update Rust crate rcgen to 0.12.1 (#4909) --- Cargo.lock | 4 ++-- Cargo.toml | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 410daa8a84..d40d2a5839 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6820,9 +6820,9 @@ dependencies = [ [[package]] name = "rcgen" -version = "0.12.0" +version = "0.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d918c80c5a4c7560db726763020bd16db179e4d5b828078842274a443addb5d" +checksum = "48406db8ac1f3cbc7dcdb56ec355343817958a356ff430259bb07baf7607e1e1" dependencies = [ "pem", "ring 0.17.7", diff --git a/Cargo.toml b/Cargo.toml index 591cc143ac..e1e4d40736 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -312,7 +312,7 @@ quote = "1.0" rand = "0.8.5" ratatui = "0.25.0" rayon = "1.8" -rcgen = "0.12.0" +rcgen = "0.12.1" reedline = "0.28.0" ref-cast = "1.0" regex = "1.10.3" From 62547d22e74c4bb134b8d65e373ace793de7529f Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Tue, 30 Jan 2024 06:19:33 -0500 Subject: [PATCH 49/49] RoT staging/dev and prod/rel v1.0.5 (#4917) --- .github/buildomat/jobs/tuf-repo.sh | 4 ++-- tools/dvt_dock_version | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/buildomat/jobs/tuf-repo.sh b/.github/buildomat/jobs/tuf-repo.sh index 5e7a2d4a91..f8514e2b13 100644 --- a/.github/buildomat/jobs/tuf-repo.sh +++ b/.github/buildomat/jobs/tuf-repo.sh @@ -278,8 +278,8 @@ EOF done } # usage: SERIES ROT_DIR ROT_VERSION BOARDS... -add_hubris_artifacts rot-staging-dev staging/dev cert-staging-dev-v1.0.4 "${ALL_BOARDS[@]}" -add_hubris_artifacts rot-prod-rel prod/rel cert-prod-rel-v1.0.4 "${ALL_BOARDS[@]}" +add_hubris_artifacts rot-staging-dev staging/dev cert-staging-dev-v1.0.5 "${ALL_BOARDS[@]}" +add_hubris_artifacts rot-prod-rel prod/rel cert-prod-rel-v1.0.5 "${ALL_BOARDS[@]}" for series in "${SERIES_LIST[@]}"; do /work/tufaceous assemble --no-generate-key /work/manifest-"$series".toml /work/repo-"$series".zip diff --git a/tools/dvt_dock_version b/tools/dvt_dock_version index f7fef543f4..047065135b 100644 --- a/tools/dvt_dock_version +++ b/tools/dvt_dock_version @@ -1 +1 @@ -COMMIT=ad874c11ecd0c45bdc1e4c2ac35c2bcbe472d55f +COMMIT=e384836415e05ae0ba648810ab1c87e9093cdabb